src/ispell/lookup.cpp

   1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
   2 /*
   3  * lookup.c - see if a word appears in the dictionary
   4  *
   5  * Pace Willisson, 1983
   6  *
   7  * Copyright 1987, 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA
   8  * All rights reserved.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  *
  14  * 1. Redistributions of source code must retain the above copyright
  15  *    notice, this list of conditions and the following disclaimer.
  16  * 2. Redistributions in binary form must reproduce the above copyright
  17  *    notice, this list of conditions and the following disclaimer in the
  18  *    documentation and/or other materials provided with the distribution.
  19  * 3. All modifications to the source code must be clearly marked as
  20  *    such.  Binary redistributions based on modified source code
  21  *    must be clearly marked as modified versions in the documentation
  22  *    and/or other materials provided with the distribution.
  23  * 4. All advertising materials mentioning features or use of this software
  24  *    must display the following acknowledgment:
  25  *      This product includes software developed by Geoff Kuenning and
  26  *      other unpaid contributors.
  27  * 5. The name of Geoff Kuenning may not be used to endorse or promote
  28  *    products derived from this software without specific prior
  29  *    written permission.
  30  *
  31  * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
  32  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  33  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  34  * ARE DISCLAIMED.  IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
  35  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  36  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  37  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  38  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  39  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  40  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  41  * SUCH DAMAGE.
  42  */
  43
  44 /*
  45  * $Log$
  46  * Revision 1.7  2003/09/25 02:44:48  dom
  47  * bug 5813
  48  *
  49  * Revision 1.6  2003/08/26 13:20:40  dom
  50  * ispell crasher fix, implement enchant_dictionary_release
  51  *
  52  * Revision 1.5  2003/08/26 13:08:03  uwog
  53  * Fix segfault when the requested dictionary couldn't be found.
  54  *
  55  * Revision 1.4  2003/08/14 16:27:36  dom
  56  * update some documentation
  57  *
  58  * Revision 1.3  2003/07/28 20:40:27  dom
  59  * fix up the license clause, further win32-registry proof some directory getting functions
  60  *
  61  * Revision 1.2  2003/07/16 22:52:47  dom
  62  * LGPL + exception license
  63  *
  64  * Revision 1.1  2003/07/15 01:15:07  dom
  65  * ispell enchant backend
  66  *
  67  * Revision 1.3  2003/01/29 05:50:12  hippietrail
  68  *
  69  * Fixed my mess in EncodingManager.
  70  * Changed many C casts to C++ casts.
  71  *
  72  * Revision 1.2  2003/01/25 03:16:05  hippietrail
  73  *
  74  * An UT_ICONV_INVALID fix which escaped the last commit.
  75  *
  76  * Revision 1.1  2003/01/24 05:52:34  hippietrail
  77  *
  78  * Refactored ispell code. Old ispell global variables had been put into
  79  * an allocated structure, a pointer to which was passed to many functions.
  80  * I have now made all such functions and variables private members of the
  81  * ISpellChecker class. It was C OO, now it's C++ OO.
  82  *
  83  * I've fixed the makefiles and tested compilation but am unable to test
  84  * operation. Please back out my changes if they cause problems which
  85  * are not obvious or easy to fix.
  86  *
  87  * Revision 1.12  2003/01/06 18:48:39  dom
  88  * ispell cleanup, start of using new 'add' save features
  89  *
  90  * Revision 1.11  2002/09/19 05:31:17  hippietrail
  91  *
  92  * More Ispell cleanup.  Conditional globals and DEREF macros are removed.
  93  * K&R function declarations removed, converted to Doxygen style comments
  94  * where possible.  No code has been changed (I hope).  Compiles for me but
  95  * unable to test.
  96  *
  97  * Revision 1.10  2002/09/17 03:03:30  hippietrail
  98  *
  99  * After seeking permission on the developer list I've reformatted all the
 100  * spelling source which seemed to have parts which used 2, 3, 4, and 8
 101  * spaces for tabs.  It should all look good with our standard 4-space
 102  * tabs now.
 103  * I've concentrated just on indentation in the actual code.  More prettying
 104  * could be done.
 105  * * NO code changes were made *
 106  *
 107  * Revision 1.9  2002/09/13 17:20:13  mpritchett
 108  * Fix more warnings for Linux build
 109  *
 110  * Revision 1.8  2002/05/03 09:49:43  fjfranklin
 111  * o hash downloader update (Gabriel Gerhardsson)
 112  * - Comment out the "Can't open <dictionary>" printf.
 113  * - Make the progressbar more clean at the begining of the download.
 114  * - Add support for tarballs that doesn't have the full path included
 115  * - Fix copyright headers on the newly added files (*HashDownloader.*)
 116  *
 117  * Revision 1.7  2001/08/27 19:06:30  dom
 118  * Lots of compilation fixes
 119  *
 120  * Revision 1.6  2001/08/10 18:32:40  dom
 121  * Spelling and iconv updates. god, i hate iconv
 122  *
 123  * Revision 1.5  2001/08/10 09:57:49  hub
 124  * Patch by sobomax@FreeBSD.org
 125  * #include "iconv.h" directive is missed from src/other/spell/xp/lookup.c and
 126  * src/wp/impexp/xp/ie_imp_RTF.cpp.
 127  * See bug 1823
 128  *
 129  * Revision 1.4  2001/07/18 17:46:01  dom
 130  * Module changes, and fix compiler warnings
 131  *
 132  * Revision 1.3  2001/06/12 21:32:49  dom
 133  * More ispell work...
 134  *
 135  * Revision 1.2  2001/05/12 16:05:42  thomasf
 136  * Big pseudo changes to ispell to make it pass around a structure rather
 137  * than rely on all sorts of gloabals willy nilly here and there.  Also
 138  * fixed our spelling class to work with accepting suggestions once more.
 139  * This code is dirty, gross and ugly (not to mention still not supporting
 140  * multiple hash sized just yet) but it works on my machine and will no
 141  * doubt break other machines.
 142  *
 143  * Revision 1.1  2001/04/15 16:01:24  tomas_f
 144  * moving to spell/xp
 145  *
 146  * Revision 1.7  1999/09/29 23:33:32  justin
 147  * Updates to the underlying ispell-based code to support suggested corrections.
 148  *
 149  * Revision 1.6  1999/04/13 17:12:51  jeff
 150  * Applied "Darren O. Benham" <gecko@benham.net> spell check changes.
 151  * Fixed crash on Win32 with the new code.
 152  *
 153  * Revision 1.5  1999/01/07 01:07:48  paul
 154  * Fixed spell leaks.
 155  *
 156  * Revision 1.5  1999/01/07 01:07:48  paul
 157  * Fixed spell leaks.
 158  *
 159  * Revision 1.4  1998/12/29 14:55:33  eric
 160  *
 161  * I've doctored the ispell code pretty extensively here.  It is now
 162  * warning-free on Win32.  It also *works* on Win32 now, since I
 163  * replaced all the I/O calls with ANSI standard ones.
 164  *
 165  * Revision 1.3  1998/12/28 23:11:30  eric
 166  *
 167  * modified spell code and integration to build on Windows.
 168  * This is still a hack.
 169  *
 170  * Actually, it doesn't yet WORK on Windows.  It just builds.
 171  * SpellCheckInit is failing for some reason.
 172  *
 173  * Revision 1.2  1998/12/28 22:16:22  eric
 174  *
 175  * These changes begin to incorporate the spell checker into AbiWord.  Most
 176  * of this is a hack.
 177  *
 178  * 1.  added other/spell to the -I list in config/abi_defs
 179  * 2.  replaced other/spell/Makefile with one which is more like
 180  *      our build system.
 181  * 3.  added other/spell to other/Makefile so that the build will now
 182  *      dive down and build the spell check library.
 183  * 4.  added the AbiSpell library to the Makefiles in wp/main
 184  * 5.  added a call to SpellCheckInit in wp/main/unix/UnixMain.cpp.
 185  *      This call is a HACK and should be replaced with something
 186  *      proper later.
 187  * 6.  added code to fv_View.cpp as follows:
 188  *      whenever you double-click on a word, the spell checker
 189  *      verifies that word and prints its status to stdout.
 190  *
 191  * Caveats:
 192  * 1.  This will break the Windows build.  I'm going to work on fixing it
 193  *      now.
 194  * 2.  This only works if your dictionary is in /usr/lib/ispell/american.hash.
 195  *      The dictionary location is currently hard-coded.  This will be
 196  *      fixed as well.
 197  *
 198  * Anyway, such as it is, it works.
 199  *
 200  * Revision 1.1  1998/12/28 18:04:43  davet
 201  * Spell checker code stripped from ispell.  At this point, there are
 202  * two external routines...  the Init routine, and a check-a-word routine
 203  * which returns a boolean value, and takes a 16 bit char string.
 204  * The code resembles the ispell code as much as possible still.
 205  *
 206  * Revision 1.42  1995/01/08  23:23:42  geoff
 207  * Support MSDOS_BINARY_OPEN when opening the hash file to read it in.
 208  *
 209  * Revision 1.41  1994/01/25  07:11:51  geoff
 210  * Get rid of all old RCS log lines in preparation for the 3.1 release.
 211  *
 212  */
 213
 214 #include <stdlib.h>
 215 #include <string.h>
 216 #include <ctype.h>
 217
 218 #include "enchant-provider.h"
 219 #include "ispell_checker.h"
 220 #include "msgs.h"
 221
 222 #define G_ICONV_INVALID (GIConv)-1
 223
 224 static bool g_iconv_is_valid(GIConv i)
 225 {
 226   return (i != G_ICONV_INVALID);
 227 }
 228
 229 #ifdef INDEXDUMP
 230 static void     dumpindex P ((struct flagptr * indexp, int depth));
 231 #endif /* INDEXDUMP */
 232
 233 int             gnMaskBits = 64;
 234
 235 /*!
 236  * \param hashname name of the hash file (dictionary)
 237  *
 238  * \return
 239  */
 240 int ISpellChecker::linit (char *hashname)
 241 {
 242         FILE*   fpHash;
 243
 244     register int        i;
 245     register struct dent * dp;
 246     struct flagent *    entry;
 247     struct flagptr *    ind;
 248     int                 nextchar, x;
 249     int                 viazero;
 250     register ichar_t *  cp;
 251
 252     if ((fpHash = enchant_fopen (hashname, "rb")) == NULL)
 253         {
 254                 return (-1);
 255         }
 256
 257     m_hashsize = fread (reinterpret_cast<char *>(&m_hashheader), 1, sizeof m_hashheader, fpHash);
 258     if (m_hashsize < static_cast<int>(sizeof(m_hashheader)))
 259         {
 260                 if (m_hashsize < 0)
 261                         fprintf (stderr, LOOKUP_C_CANT_READ, hashname);
 262                 else if (m_hashsize == 0)
 263                         fprintf (stderr, LOOKUP_C_NULL_HASH, hashname);
 264                 else
 265                         fprintf (stderr,
 266                           LOOKUP_C_SHORT_HASH (m_hashname, m_hashsize,
 267                                 static_cast<int>(sizeof m_hashheader)));
 268                 return (-1);
 269         }
 270     else if (m_hashheader.magic != MAGIC)
 271         {
 272                 fprintf (stderr,
 273                   LOOKUP_C_BAD_MAGIC (hashname, static_cast<unsigned int>(MAGIC),
 274                         static_cast<unsigned int>(m_hashheader.magic)));
 275                 return (-1);
 276         }
 277     else if (m_hashheader.magic2 != MAGIC)
 278         {
 279                 fprintf (stderr,
 280                   LOOKUP_C_BAD_MAGIC2 (hashname, static_cast<unsigned int>(MAGIC),
 281                         static_cast<unsigned int>(m_hashheader.magic2)));
 282                 return (-1);
 283         }
 284 /*  else if (hashheader.compileoptions != COMPILEOPTIONS*/
 285     else if ( 1 != 1
 286       ||  m_hashheader.maxstringchars != MAXSTRINGCHARS
 287       ||  m_hashheader.maxstringcharlen != MAXSTRINGCHARLEN)
 288         {
 289                 fprintf (stderr,
 290                   LOOKUP_C_BAD_OPTIONS (static_cast<unsigned int>(m_hashheader.compileoptions),
 291                         m_hashheader.maxstringchars, m_hashheader.maxstringcharlen,
 292                         static_cast<unsigned int>(COMPILEOPTIONS), MAXSTRINGCHARS, MAXSTRINGCHARLEN));
 293                 return (-1);
 294         }
 295
 296         {
 297                 m_hashtbl =
 298                  (struct dent *)
 299                         calloc (static_cast<unsigned>(m_hashheader.tblsize), sizeof (struct dent));
 300                 m_hashsize = m_hashheader.tblsize;
 301                 m_hashstrings = static_cast<char *>(malloc(static_cast<unsigned>(m_hashheader.stringsize)));
 302         }
 303     m_numsflags = m_hashheader.stblsize;
 304     m_numpflags = m_hashheader.ptblsize;
 305     m_sflaglist = (struct flagent *)
 306       malloc ((m_numsflags + m_numpflags) * sizeof (struct flagent));
 307     if (m_hashtbl == NULL  ||  m_hashstrings == NULL  ||  m_sflaglist == NULL)
 308         {
 309                 fprintf (stderr, LOOKUP_C_NO_HASH_SPACE);
 310                 return (-1);
 311         }
 312     m_pflaglist = m_sflaglist + m_numsflags;
 313
 314         {
 315                 if( fread ( m_hashstrings, 1, static_cast<unsigned>(m_hashheader.stringsize), fpHash)
 316                         != static_cast<size_t>(m_hashheader.stringsize) )
 317             {
 318                     fprintf (stderr, LOOKUP_C_BAD_FORMAT);
 319                         fprintf (stderr, "stringsize err\n" );
 320                 return (-1);
 321             }
 322                 if ( m_hashheader.compileoptions & 0x04 )
 323                 {
 324                         if(  fread (reinterpret_cast<char *>(m_hashtbl), 1, static_cast<unsigned>(m_hashheader.tblsize) * sizeof(struct dent), fpHash)
 325                         != (static_cast<size_t>(m_hashheader.tblsize * sizeof (struct dent))))
 326                     {
 327                             fprintf (stderr, LOOKUP_C_BAD_FORMAT);
 328                         return (-1);
 329                     }
 330                 }
 331                 else
 332                 {
 333                         for( x=0; x<m_hashheader.tblsize; x++ )
 334                         {
 335                                 if(  fread ( reinterpret_cast<char*>(m_hashtbl+x), sizeof( struct dent)-sizeof( MASKTYPE ), 1, fpHash)
 336                                 != 1)
 337                             {
 338                                     fprintf (stderr, LOOKUP_C_BAD_FORMAT);
 339                                 return (-1);
 340                             }
 341                         }       /*for*/
 342                 }       /*else*/
 343         }
 344     if (fread (reinterpret_cast<char *>(m_sflaglist), 1,
 345         static_cast<unsigned>(m_numsflags+ m_numpflags) * sizeof (struct flagent), fpHash)
 346       != (m_numsflags + m_numpflags) * sizeof (struct flagent))
 347         {
 348                 fprintf (stderr, LOOKUP_C_BAD_FORMAT);
 349                 return (-1);
 350         }
 351     fclose (fpHash);
 352
 353         {
 354                 for (i = m_hashsize, dp = m_hashtbl;  --i >= 0;  dp++)
 355                 {
 356                         if (dp->word == (char *) -1)
 357                                 dp->word = NULL;
 358                         else
 359                                 dp->word = &m_hashstrings [ reinterpret_cast<size_t>(dp->word) ];
 360                         if (dp->next == (struct dent *) -1)
 361                                 dp->next = NULL;
 362                         else
 363                                 dp->next = &m_hashtbl [ reinterpret_cast<size_t>(dp->next) ];
 364             }
 365         }
 366
 367     for (i = m_numsflags + m_numpflags, entry = m_sflaglist; --i >= 0; entry++)
 368         {
 369                 if (entry->stripl)
 370                         entry->strip = reinterpret_cast<ichar_t *>(&m_hashstrings[reinterpret_cast<size_t>(entry->strip)]);
 371                 else
 372                         entry->strip = NULL;
 373                 if (entry->affl)
 374                         entry->affix = reinterpret_cast<ichar_t *>(&m_hashstrings[reinterpret_cast<size_t>(entry->affix)]);
 375                 else
 376                         entry->affix = NULL;
 377         }
 378     /*
 379     ** Warning - 'entry' and 'i' are reset in the body of the loop
 380     ** below.  Don't try to optimize it by (e.g.) moving the decrement
 381     ** of i into the loop condition.
 382     */
 383     for (i = m_numsflags, entry = m_sflaglist;  i > 0;  i--, entry++)
 384         {
 385                 if (entry->affl == 0)
 386                 {
 387                         cp = NULL;
 388                         ind = &m_sflagindex[0];
 389                         viazero = 1;
 390                 }
 391                 else
 392                 {
 393                         cp = entry->affix + entry->affl - 1;
 394                         ind = &m_sflagindex[*cp];
 395                         viazero = 0;
 396                         while (ind->numents == 0  &&  ind->pu.fp != NULL)
 397                         {
 398                                 if (cp == entry->affix)
 399                                 {
 400                                         ind = &ind->pu.fp[0];
 401                                         viazero = 1;
 402                                 }
 403                                 else
 404                                 {
 405                                         ind = &ind->pu.fp[*--cp];
 406                                         viazero = 0;
 407                                 }
 408                         }
 409                 }
 410                 if (ind->numents == 0)
 411                         ind->pu.ent = entry;
 412                 ind->numents++;
 413                 /*
 414                 ** If this index entry has more than MAXSEARCH flags in
 415                 ** it, we will split it into subentries to reduce the
 416                 ** searching.  However, the split doesn't make sense in
 417                 ** two cases:  (a) if we are already at the end of the
 418                 ** current affix, or (b) if all the entries in the list
 419                 ** have identical affixes.  Since the list is sorted, (b)
 420                 ** is true if the first and last affixes in the list
 421                 ** are identical.
 422                 */
 423                 if (!viazero  &&  ind->numents >= MAXSEARCH
 424                   &&  icharcmp (entry->affix, ind->pu.ent->affix) != 0)
 425                 {
 426                         /* Sneaky trick:  back up and reprocess */
 427                         entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */
 428                         i = m_numsflags - (entry - m_sflaglist);
 429                         ind->pu.fp =
 430                           (struct flagptr *)
 431                         calloc (static_cast<unsigned>(SET_SIZE + m_hashheader.nstrchars),
 432                           sizeof (struct flagptr));
 433                         if (ind->pu.fp == NULL)
 434                         {
 435                                 fprintf (stderr, LOOKUP_C_NO_LANG_SPACE);
 436                                 return (-1);
 437                         }
 438                         ind->numents = 0;
 439                 }
 440         }
 441     /*
 442     ** Warning - 'entry' and 'i' are reset in the body of the loop
 443     ** below.  Don't try to optimize it by (e.g.) moving the decrement
 444     ** of i into the loop condition.
 445     */
 446     for (i = m_numpflags, entry = m_pflaglist;  i > 0;  i--, entry++)
 447         {
 448                 if (entry->affl == 0)
 449             {
 450                         cp = NULL;
 451                         ind = &m_pflagindex[0];
 452                         viazero = 1;
 453             }
 454                 else
 455                 {
 456                         cp = entry->affix;
 457                         ind = &m_pflagindex[*cp++];
 458                         viazero = 0;
 459                         while (ind->numents == 0  &&  ind->pu.fp != NULL)
 460                         {
 461                                 if (*cp == 0)
 462                                 {
 463                                         ind = &ind->pu.fp[0];
 464                                         viazero = 1;
 465                                 }
 466                                 else
 467                                 {
 468                                         ind = &ind->pu.fp[*cp++];
 469                                         viazero = 0;
 470                                 }
 471                         }
 472                 }
 473                 if (ind->numents == 0)
 474                         ind->pu.ent = entry;
 475                 ind->numents++;
 476                 /*
 477                 ** If this index entry has more than MAXSEARCH flags in
 478                 ** it, we will split it into subentries to reduce the
 479                 ** searching.  However, the split doesn't make sense in
 480                 ** two cases:  (a) if we are already at the end of the
 481                 ** current affix, or (b) if all the entries in the list
 482                 ** have identical affixes.  Since the list is sorted, (b)
 483                 ** is true if the first and last affixes in the list
 484                 ** are identical.
 485                 */
 486                 if (!viazero  &&  ind->numents >= MAXSEARCH
 487                   &&  icharcmp (entry->affix, ind->pu.ent->affix) != 0)
 488                 {
 489                         /* Sneaky trick:  back up and reprocess */
 490                         entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */
 491                         i = m_numpflags - (entry - m_pflaglist);
 492                         ind->pu.fp =
 493                           static_cast<struct flagptr *>(calloc(SET_SIZE + m_hashheader.nstrchars,
 494                                 sizeof (struct flagptr)));
 495                         if (ind->pu.fp == NULL)
 496                         {
 497                                 fprintf (stderr, LOOKUP_C_NO_LANG_SPACE);
 498                                 return (-1);
 499                         }
 500                         ind->numents = 0;
 501                 }
 502         }
 503 #ifdef INDEXDUMP
 504     fprintf (stderr, "Prefix index table:\n");
 505     dumpindex (m_pflagindex, 0);
 506     fprintf (stderr, "Suffix index table:\n");
 507     dumpindex (m_sflagindex, 0);
 508 #endif
 509     if (m_hashheader.nstrchartype == 0)
 510                 m_chartypes = NULL;
 511     else
 512         {
 513                 m_chartypes = (struct strchartype *)
 514                   malloc (m_hashheader.nstrchartype * sizeof (struct strchartype));
 515                 if (m_chartypes == NULL)
 516                 {
 517                         fprintf (stderr, LOOKUP_C_NO_LANG_SPACE);
 518                         return (-1);
 519                 }
 520                 for (i = 0, nextchar = m_hashheader.strtypestart;
 521                   i < m_hashheader.nstrchartype;
 522                   i++)
 523                 {
 524                         m_chartypes[i].name = &m_hashstrings[nextchar];
 525                         nextchar += strlen (m_chartypes[i].name) + 1;
 526                         m_chartypes[i].deformatter = &m_hashstrings[nextchar];
 527                         nextchar += strlen (m_chartypes[i].deformatter) + 1;
 528                         m_chartypes[i].suffixes = &m_hashstrings[nextchar];
 529                         while (m_hashstrings[nextchar] != '\0')
 530                                 nextchar += strlen (&m_hashstrings[nextchar]) + 1;
 531                         nextchar++;
 532                 }
 533         }
 534
 535     initckch(NULL);
 536
 537     return (0);
 538 }
 539
 540 #ifndef FREEP
 541 #define FREEP(p)        do { if (p) free(p); } while (0)
 542 #endif
 543
 544 /*!
 545  * \param wchars Characters in -w option, if any
 546  */
 547 void ISpellChecker::initckch (char *wchars)
 548 {
 549         register ichar_t    c;
 550         char                num[4];
 551
 552         for (c = 0; c < static_cast<ichar_t>(SET_SIZE+ m_hashheader.nstrchars); ++c)
 553     {
 554                 if (iswordch (c))
 555                 {
 556                         if (!mylower (c))
 557                         {
 558                                 m_Try[m_Trynum] = c;
 559                                 ++m_Trynum;
 560                         }
 561                 }
 562                 else if (isboundarych (c))
 563                 {
 564                         m_Try[m_Trynum] = c;
 565                         ++m_Trynum;
 566                 }
 567         }
 568         if (wchars != NULL)
 569     {
 570                 while (m_Trynum < SET_SIZE  &&  *wchars != '\0')
 571                 {
 572                         if (*wchars != 'n'  &&  *wchars != '\\')
 573                         {
 574                                 c = *wchars;
 575                                 ++wchars;
 576                         }
 577                         else
 578                         {
 579                             ++wchars;
 580                             num[0] = '\0';
 581                             num[1] = '\0';
 582                             num[2] = '\0';
 583                             num[3] = '\0';
 584                             if (isdigit (wchars[0]))
 585                                 {
 586                                     num[0] = wchars[0];
 587                                     if (isdigit (wchars[1]))
 588                                     {
 589                                                 num[1] = wchars[1];
 590                                                 if (isdigit (wchars[2]))
 591                                                         num[2] = wchars[2];
 592                                         }
 593                                 }
 594                                 if (wchars[-1] == 'n')
 595                                 {
 596                                     wchars += strlen (num);
 597                                     c = atoi (num);
 598                                 }
 599                                 else
 600                                 {
 601                                     wchars += strlen (num);
 602                                     c = 0;
 603                                     if (num[0])
 604                                                 c = num[0] - '0';
 605                                     if (num[1])
 606                                     {
 607                                                 c <<= 3;
 608                                                 c += num[1] - '0';
 609                                         }
 610                                         if (num[2])
 611                                         {
 612                                                 c <<= 3;
 613                                                 c += num[2] - '0';
 614                                         }
 615                                 }
 616                         }
 617 /*              c &= NOPARITY;*/
 618                         if (!m_hashheader.wordchars[c])
 619                         {
 620                                 m_hashheader.wordchars[c] = 1;
 621                                 m_hashheader.sortorder[c] = m_hashheader.sortval++;
 622                                 m_Try[m_Trynum] = c;
 623                                 ++m_Trynum;
 624                         }
 625                 }
 626     }
 627 }
 628
 629 /*
 630  * \param indexp
 631  */
 632 void ISpellChecker::clearindex (struct flagptr *indexp)
 633 {
 634     register int                i;
 635     for (i = 0;  i < SET_SIZE + m_hashheader.nstrchars;  i++, indexp++)
 636         {
 637                 if (indexp->numents == 0 && indexp->pu.fp != NULL)
 638                 {
 639                     clearindex(indexp->pu.fp);
 640                         free(indexp->pu.fp);
 641                 }
 642         }
 643 }
 644
 645 #ifdef INDEXDUMP
 646 static void dumpindex (indexp, depth)
 647     register struct flagptr *   indexp;
 648     register int                depth;
 649 {
 650     register int                i;
 651     int                         j;
 652     int                         k;
 653     char                        stripbuf[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
 654
 655     for (i = 0;  i < SET_SIZE + hashheader.nstrchars;  i++, indexp++)
 656         {
 657                 if (indexp->numents == 0  &&  indexp->pu.fp != NULL)
 658             {
 659                         for (j = depth;  --j >= 0;  )
 660                                 putc (' ', stderr);
 661                         if (i >= ' '  &&  i <= '~')
 662                                 putc (i, stderr);
 663                         else
 664                                 fprintf (stderr, "0x%x", i);
 665                         putc ('\n', stderr);
 666                         dumpindex (indexp->pu.fp, depth + 1);
 667             }
 668                 else if (indexp->numents)
 669                 {
 670                         for (j = depth;  --j >= 0;  )
 671                                 putc (' ', stderr);
 672                         if (i >= ' '  &&  i <= '~')
 673                                 putc (i, stderr);
 674                         else
 675                                 fprintf (stderr, "0x%x", i);
 676                         fprintf (stderr, " -> %d entries\n", indexp->numents);
 677                         for (k = 0;  k < indexp->numents;  k++)
 678                         {
 679                                 for (j = depth;  --j >= 0;  )
 680                                         putc (' ', stderr);
 681                                 if (indexp->pu.ent[k].stripl)
 682                                 {
 683                                         ichartostr (stripbuf, indexp->pu.ent[k].strip,
 684                                           sizeof stripbuf, 1);
 685                                         fprintf (stderr, "     entry %d (-%s,%s)\n",
 686                                           &indexp->pu.ent[k] - sflaglist,
 687                                           stripbuf,
 688                                           indexp->pu.ent[k].affl
 689                                                 ? ichartosstr (indexp->pu.ent[k].affix, 1) : "-");
 690                                 }
 691                                 else
 692                                         fprintf (stderr, "     entry %d (%s)\n",
 693                                           &indexp->pu.ent[k] - sflaglist,
 694                                           ichartosstr (indexp->pu.ent[k].affix, 1));
 695                         }
 696                 }
 697         }
 698 }
 699 #endif
 700
 701 /* n is length of s */
 702
 703 /*
 704  * \param s
 705  * \param dotree
 706  *
 707  * \return
 708  */
 709 struct dent * ISpellChecker::ispell_lookup (ichar_t *s, int dotree)
 710 {
 711     register struct dent *      dp;
 712     register char *             s1;
 713     char                        schar[INPUTWORDLEN + MAXAFFIXLEN];
 714
 715     dp = &m_hashtbl[hash (s, m_hashsize)];
 716     if (ichartostr (schar, s, sizeof schar, 1))
 717                 fprintf (stderr, WORD_TOO_LONG (schar));
 718     for (  ;  dp != NULL;  dp = dp->next)
 719         {
 720                 /* quick strcmp, but only for equality */
 721                 s1 = dp->word;
 722                 if (s1  &&  s1[0] == schar[0]  &&  strcmp (s1 + 1, schar + 1) == 0)
 723                         return dp;
 724 #ifndef NO_CAPITALIZATION_SUPPORT
 725                 while (dp->flagfield & MOREVARIANTS)    /* Skip variations */
 726                         dp = dp->next;
 727 #endif
 728         }
 729         return NULL;
 730 }
 731
 732 void ISpellChecker::alloc_ispell_struct()
 733 {
 734         m_translate_in =
 735         m_translate_out = G_ICONV_INVALID;
 736 }
 737
 738 void ISpellChecker::free_ispell_struct()
 739 {
 740         if (g_iconv_is_valid(m_translate_in))
 741                 g_iconv_close (m_translate_in);
 742         if (g_iconv_is_valid(m_translate_out))
 743                 g_iconv_close (m_translate_out);
 744 }