1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
3 * Copyright (C) 2003 Dom Lachowicz
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02110-1301, USA.
21 * In addition, as a special exception, Dom Lachowicz
22 * gives permission to link the code of this program with
23 * the non-LGPL Spelling Provider libraries (eg: a MSFT Office
24 * spell checker backend) and distribute linked combinations including
25 * the two. You must obey the GNU Lesser General Public License in all
26 * respects for all of the code used other than said providers. If you modify
27 * this file, you may extend this exception to your version of the
28 * file, but you are not obligated to do so. If you do not wish to
29 * do so, delete this exception statement from your version.
33 * This is a rough approximation of an "ispell compatibility mode"
36 * Modified in 2007 to work when called from emacs which
37 * calls a spelling program (e.g. enchant) like this
39 * enchant -a -m -d dictionary
49 #include "enchant-provider.h"
51 /* word has to be bigger than this to be checked */
52 #define MIN_WORD_LENGTH 1
55 #define WIN32_LEAN_AND_MEAN
58 static char charset[15] = "CP437";
70 print_version (FILE * to)
72 fprintf (to, "@(#) International Ispell Version 3.1.20 (but really Enchant %s)\n", VERSION);
77 print_help (FILE * to, const char * prog)
79 fprintf (to, "Usage: %s [options] -a|-d dict|-l|-L|-m|-v[v]|<file>\n", prog);
80 fprintf (to, "\t-a lists alternatives.\n");
81 fprintf (to, "\t-d dict uses dictionary <dict>.\n");
82 fprintf (to, "\t-l lists misspellings.\n");
83 fprintf (to, "\t-m is ignored.\n");
84 fprintf (to, "\t-L displays line numbers.\n");
85 fprintf (to, "\t-v displays program version.\n");
89 consume_line (FILE * in, GString * str)
92 gsize bytes_read, bytes_written;
96 g_string_truncate (str, 0);
98 while (ret && (ch = fgetc (in)) != EOF) {
102 g_string_append_c (str, ch);
110 utf = g_convert(str->str, str->len, "UTF-8", charset, &bytes_read, &bytes_written, NULL);
112 utf = g_locale_to_utf8 (str->str, str->len, &bytes_read, &bytes_written, NULL);
116 g_string_assign (str, utf);
119 /* Else str->str stays the same. we'll assume that it's
120 already utf8 and glib is just being stupid. */
127 print_utf (FILE * out, const char * str)
129 gsize bytes_read, bytes_written;
132 native = g_locale_from_utf8 (str, -1, &bytes_read, &bytes_written, NULL);
134 fwrite (native, 1, bytes_written, out);
137 /* We'll assume that it's already utf8 and glib is just being stupid. */
138 fwrite (str, 1, strlen (str), out);
143 do_mode_a (FILE * out, EnchantDict * dict, GString * word, size_t start_pos, size_t lineCount)
148 if (word->len <= MIN_WORD_LENGTH || enchant_dict_check (dict, word->str, word->len) == 0) {
150 fprintf (out, "* %u\n", (unsigned int)lineCount);
152 fwrite ("*\n", 1, 2, out);
155 suggs = enchant_dict_suggest (dict, word->str,
156 word->len, &n_suggs);
157 if (!n_suggs || !suggs) {
158 fwrite ("# ", 1, 2, out);
160 fprintf (out, "%u ", (unsigned int)lineCount);
161 print_utf (out, word->str);
162 fprintf (out, " %u\n", (unsigned int)start_pos);
167 fwrite ("& ", 1, 2, out);
169 fprintf (out, "%u ", (unsigned int)lineCount);
170 print_utf (out, word->str);
171 fprintf (out, " %u %u:", (unsigned int)n_suggs, (unsigned int)start_pos);
173 for (i = 0; i < n_suggs; i++) {
175 print_utf (out, suggs[i]);
177 if (i != (n_suggs - 1))
178 fwrite (",", 1, 1, out);
180 fwrite ("\n", 1, 1, out);
183 enchant_dict_free_string_list (dict, suggs);
189 do_mode_l (FILE * out, EnchantDict * dict, GString * word, size_t lineCount)
191 if (enchant_dict_check (dict, word->str, word->len) != 0) {
193 fprintf (out, "%u ", (unsigned int)lineCount);
194 print_utf (out, word->str);
195 fwrite ("\n", 1, 1, out);
201 is_word_char (gunichar uc, size_t n)
205 type = g_unichar_type(uc);
208 case G_UNICODE_MODIFIER_LETTER:
209 case G_UNICODE_LOWERCASE_LETTER:
210 case G_UNICODE_TITLECASE_LETTER:
211 case G_UNICODE_UPPERCASE_LETTER:
212 case G_UNICODE_OTHER_LETTER:
213 case G_UNICODE_COMBINING_MARK:
214 case G_UNICODE_ENCLOSING_MARK:
215 case G_UNICODE_NON_SPACING_MARK:
216 case G_UNICODE_DECIMAL_NUMBER:
217 case G_UNICODE_LETTER_NUMBER:
218 case G_UNICODE_OTHER_NUMBER:
219 case G_UNICODE_CONNECT_PUNCTUATION:
220 return 1; /* Enchant 1.3.0 defines word chars like this. */
222 case G_UNICODE_CONTROL:
223 case G_UNICODE_FORMAT:
224 case G_UNICODE_UNASSIGNED:
225 case G_UNICODE_PRIVATE_USE:
226 case G_UNICODE_SURROGATE:
227 case G_UNICODE_DASH_PUNCTUATION:
228 case G_UNICODE_CLOSE_PUNCTUATION:
229 case G_UNICODE_FINAL_PUNCTUATION:
230 case G_UNICODE_INITIAL_PUNCTUATION:
231 case G_UNICODE_OTHER_PUNCTUATION:
232 case G_UNICODE_OPEN_PUNCTUATION:
233 case G_UNICODE_CURRENCY_SYMBOL:
234 case G_UNICODE_MODIFIER_SYMBOL:
235 case G_UNICODE_MATH_SYMBOL:
236 case G_UNICODE_OTHER_SYMBOL:
237 case G_UNICODE_LINE_SEPARATOR:
238 case G_UNICODE_PARAGRAPH_SEPARATOR:
239 case G_UNICODE_SPACE_SEPARATOR:
241 if ((n > 0) && (uc == g_utf8_get_char("'"))) {
242 return 1; /** Char ' is accepted only within a word. */
244 else if ((n > 0) && (type == G_UNICODE_DASH_PUNCTUATION)) {
245 return 1; /* hyphens only accepted within a word. */
253 typedef struct lang_map {
259 /* Maps ispell language codes to enchant language codes. */
260 /* The list is partially taken from src/ispell/ispell_checker.cpp. */
261 static const LangMap lingua[] = {
262 {"american", "en_US"},
263 {"brazilian", "pt_BR"},
264 {"british", "en_GB"},
283 {"interlingua", "ia"},
288 {"lithuanian", "lt"},
290 {"nederlands", "nl"},
296 {"portuguese", "pt"},
303 {"suomi", "fi"}, /* For Emacs/Voikko/tmispell compatibility. */
308 {"yiddish-yivo", "yi"},
309 {NULL, NULL} /* Last item must be {NULL, NULL}. */
313 /* Converts ispell language code to enchant language code. */
315 convert_language_code (gchar *code)
318 for (i = 0; lingua[i].ispell; i++) {
319 if (!strcmp(code,lingua[i].ispell)) {
320 /* We must call g_strdup() because the calling program g_free()s the result. */
321 return g_strdup (lingua[i].enchant);
324 /* Let's call g_strdup() here too! */
325 return g_strdup (code);
329 /* Splits a line into a set of (word,word_position) touples. */
331 tokenize_line (GString * line)
333 GSList * tokens = NULL;
334 char *utf = (char *) line->str;
340 size_t start_pos = 0;
341 word = g_string_new (NULL);
343 while (cur_pos < line->len && *utf) {
345 /* Skip non-word characters. */
346 cur_pos = g_utf8_pointer_to_offset ((const char*)line->str, utf);
347 uc = g_utf8_get_char (utf);
348 while (cur_pos < line->len && *utf && !is_word_char(uc,0)) {
349 utf = g_utf8_next_char (utf);
350 uc = g_utf8_get_char (utf);
351 cur_pos = g_utf8_pointer_to_offset ((const char*)line->str, utf);
355 /* Skip over word. */
356 while (cur_pos < line->len && *utf && is_word_char(uc,1)) {
357 g_string_append_unichar (word, uc);
358 utf = g_utf8_next_char (utf);
359 uc = g_utf8_get_char (utf);
360 cur_pos = g_utf8_pointer_to_offset ((const char*)line->str, utf);
363 /* Do not accept one or more ' at the end of the word. */
365 while ((i >= 0) && (word->str[i] == '\'')) {
366 g_string_truncate (word, i);
370 /* Save (word, position) touple. */
372 tokens = g_slist_append (tokens, g_string_new_len (word->str, word->len));
373 tokens = g_slist_append (tokens, GINT_TO_POINTER(start_pos));
374 g_string_truncate (word, 0);
377 g_string_free (word, TRUE);
383 parse_file (FILE * in, FILE * out, IspellMode_t mode, int countLines, gchar *dictionary)
385 EnchantBroker * broker;
388 GString * str, * word = NULL;
389 GSList * tokens, *token_ptr;
391 size_t pos, lineCount = 0;
393 gboolean was_last_line = FALSE, corrected_something = FALSE;
399 lang = convert_language_code (dictionary);
402 lang = enchant_get_user_language();
407 /* Enchant will get rid of useless trailing garbage like de_DE@euro or de_DE.ISO-8859-15 */
409 broker = enchant_broker_init ();
410 dict = enchant_broker_request_dict (broker, lang);
413 fprintf (stderr, "Couldn't create a dictionary for %s\n", lang);
415 enchant_broker_free (broker);
421 str = g_string_new (NULL);
423 while (!was_last_line) {
424 was_last_line = consume_line (in, str);
430 corrected_something = FALSE;
431 token_ptr = tokens = tokenize_line (str);
432 while (tokens != NULL) {
433 corrected_something = TRUE;
435 word = (GString *)tokens->data;
436 tokens = tokens->next;
437 pos = GPOINTER_TO_INT(tokens->data);
438 tokens = tokens->next;
441 do_mode_a (out, dict, word, pos, lineCount);
442 else if (mode == MODE_L)
443 do_mode_l (out, dict, word, lineCount);
445 g_string_free(word, TRUE);
448 g_slist_free (token_ptr);
451 if (mode == MODE_A && corrected_something) {
452 fwrite ("\n", 1, 1, out);
454 g_string_truncate (str, 0);
458 enchant_broker_free_dict (broker, dict);
459 enchant_broker_free (broker);
461 g_string_free (str, TRUE);
466 int main (int argc, char ** argv)
468 IspellMode_t mode = MODE_NONE;
476 gchar *dictionary = 0; /* -d dictionary */
478 /* Initialize system locale */
479 setlocale(LC_ALL, "");
482 /* Workaround about glib's "locale" not being the set C locale */
483 if (GetFileType(GetStdHandle(STD_INPUT_HANDLE)) != FILE_TYPE_CHAR) {
484 sprintf_s(charset,15,"CP%u",GetACP());
486 sprintf_s(charset,15,"CP%u",GetConsoleCP());
490 for (i = 1; i < argc; i++) {
491 char * arg = argv[i];
493 if (strlen (arg) == 2) {
494 /* It seems that the first one of these that is specified gets precedence. */
495 if (arg[1] == 'a' && MODE_NONE == mode)
497 else if (arg[1] == 'l' && MODE_NONE == mode)
499 else if (arg[1] == 'v' && MODE_NONE == mode)
501 else if (arg[1] == 'L' && MODE_NONE == mode)
503 else if (arg[1] == 'm')
504 ; /* Ignore. Emacs calls ispell with '-m'. */
505 else if (arg[1] == 'd') {
507 dictionary = argv[i]; /* Emacs calls ispell with '-d dictionary'. */
510 else if ((strlen (arg) == 3) && (arg[1] == 'v') && (arg[2] == 'v')) {
511 mode = MODE_VERSION; /* Emacs (or ispell.el) calls [ai]spell with '-vv'. */
513 else if (arg[1] == 'd') {
514 dictionary = arg + 2; /* Accept "-ddictionary", i.e. no space between -d and dictionary. */
516 else if (strlen (arg) > 2) {
517 fprintf (stderr, "-%c does not take any parameters.\n", arg[1]);
527 if (mode == MODE_VERSION) {
528 print_version (stdout);
530 else if (mode == MODE_NONE && !file) {
531 print_help (stdout, argv[0]);
535 fp = enchant_fopen (file, "rb");
537 fprintf (stderr, "Error: Could not open the file \"%s\" for reading.\n", file);
542 rval = parse_file (fp, stdout, mode, countLines, dictionary);