1 /* Recode Serbian text from Cyrillic to Latin script.
2 Copyright (C) 2006-2007, 2010, 2012, 2015 Free Software Foundation,
4 Written by Bruno Haible <bruno@clisp.org>, 2006.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
37 #include "relocatable.h"
40 #include "localcharset.h"
41 #include "c-strcase.h"
42 #include "xstriconv.h"
44 #include "propername.h"
47 #define _(str) gettext (str)
51 static const struct option long_options[] =
53 { "help", no_argument, NULL, 'h' },
54 { "version", no_argument, NULL, 'V' },
58 /* Forward declaration of local functions. */
59 static void usage (int status)
60 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
61 __attribute__ ((noreturn))
64 static void process (FILE *stream);
67 main (int argc, char *argv[])
69 /* Default values for command line options. */
71 bool do_version = false;
75 /* Set program name for message texts. */
76 set_program_name (argv[0]);
79 /* Set locale via LC_ALL. */
80 setlocale (LC_ALL, "");
83 /* Set the text message domain. */
84 bindtextdomain (PACKAGE, relocate (LOCALEDIR));
87 /* Ensure that write errors on stdout are detected. */
88 atexit (close_stdout);
90 /* Parse command line options. */
91 while ((opt = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
94 case '\0': /* Long option. */
103 usage (EXIT_FAILURE);
106 /* Version information is requested. */
109 printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
110 /* xgettext: no-wrap */
111 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
112 License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\
113 This is free software: you are free to change and redistribute it.\n\
114 There is NO WARRANTY, to the extent permitted by law.\n\
117 printf (_("Written by %s and %s.\n"),
118 /* TRANSLATORS: This is a proper name. The last name is
119 (with Unicode escapes) "\u0160egan" or (with HTML entities)
121 proper_name_utf8 ("Danilo Segan", "Danilo \305\240egan"),
122 proper_name ("Bruno Haible"));
126 /* Help is requested. */
128 usage (EXIT_SUCCESS);
130 if (argc - optind > 0)
131 error (EXIT_FAILURE, 0, _("too many arguments"));
139 /* Display usage information and exit. */
143 if (status != EXIT_SUCCESS)
144 fprintf (stderr, _("Try '%s --help' for more information.\n"),
148 /* xgettext: no-wrap */
150 Usage: %s [OPTION]\n\
153 /* xgettext: no-wrap */
155 Recode Serbian text from Cyrillic to Latin script.\n"));
156 /* xgettext: no-wrap */
158 The input text is read from standard input. The converted text is output to\n\
159 standard output.\n"));
161 /* xgettext: no-wrap */
163 Informative output:\n"));
164 /* xgettext: no-wrap */
166 -h, --help display this help and exit\n"));
167 /* xgettext: no-wrap */
169 -V, --version output version information and exit\n"));
171 /* TRANSLATORS: The placeholder indicates the bug-reporting address
172 for this package. Please add _another line_ saying
173 "Report translation bugs to <...>\n" with the address for translation
174 bugs (typically your translation team's web or email address). */
175 fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"), stdout);
182 /* Routines for reading a line.
183 Don't use routines that drop NUL bytes. Don't use getline(), because it
184 doesn't provide a good error message in case of memory allocation failure.
185 The gnulib module 'linebuffer' is nearly the right thing, except that we
186 don't want an extra newline at the end of file. */
188 /* A 'struct linebuffer' holds a line of text. */
192 size_t size; /* Allocated. */
193 size_t length; /* Used. */
197 /* Initialize linebuffer LINEBUFFER for use. */
199 init_linebuffer (struct linebuffer *lb)
206 /* Read an arbitrarily long line of text from STREAM into linebuffer LB.
207 Keep the newline. Do not NUL terminate.
208 Return LINEBUFFER, except at end of file return NULL. */
209 static struct linebuffer *
210 read_linebuffer (struct linebuffer *lb, FILE *stream)
216 char *p = lb->buffer;
217 char *end = lb->buffer + lb->size;
221 int c = getc (stream);
224 if (p == lb->buffer || ferror (stream))
230 size_t oldsize = lb->size; /* = p - lb->buffer */
231 size_t newsize = 2 * oldsize + 40;
232 lb->buffer = (char *) xrealloc (lb->buffer, newsize);
234 p = lb->buffer + oldsize;
235 end = lb->buffer + newsize;
242 lb->length = p - lb->buffer;
247 /* Free linebuffer LB and its data, all allocated with malloc. */
249 destroy_linebuffer (struct linebuffer *lb)
251 if (lb->buffer != NULL)
256 /* Process the input and produce the output. */
258 process (FILE *stream)
260 struct linebuffer lb;
261 const char *locale_code = locale_charset ();
262 bool need_code_conversion = (c_strcasecmp (locale_code, "UTF-8") != 0);
264 iconv_t conv_to_utf8 = (iconv_t)(-1);
265 iconv_t conv_from_utf8 = (iconv_t)(-1);
266 char *last_utf8_line;
267 size_t last_utf8_line_len;
268 char *last_backconv_line;
269 size_t last_backconv_line_len;
272 init_linebuffer (&lb);
274 /* Initialize the conversion descriptors. */
275 if (need_code_conversion)
278 /* Avoid glibc-2.1 bug with EUC-KR. */
279 # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
280 && !defined _LIBICONV_VERSION
281 if (strcmp (locale_code, "EUC-KR") != 0)
284 conv_to_utf8 = iconv_open ("UTF-8", locale_code);
285 /* TODO: Maybe append //TRANSLIT here? */
286 conv_from_utf8 = iconv_open (locale_code, "UTF-8");
288 if (conv_to_utf8 == (iconv_t)(-1))
289 error (EXIT_FAILURE, 0, _("\
290 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
291 and iconv() does not support this conversion."),
292 locale_code, "UTF-8", basename (program_name));
293 if (conv_from_utf8 == (iconv_t)(-1))
294 error (EXIT_FAILURE, 0, _("\
295 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
296 and iconv() does not support this conversion."),
297 "UTF-8", locale_code, basename (program_name));
298 last_utf8_line = NULL;
299 last_utf8_line_len = 0;
300 last_backconv_line = NULL;
301 last_backconv_line_len = 0;
303 error (EXIT_FAILURE, 0, _("\
304 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
305 This version was built without iconv()."),
306 locale_code, "UTF-8", basename (program_name));
310 /* Read the input line by line.
311 Processing it character by character is not possible, because some
312 filters need to look at adjacent characters. Processing the entire file
313 in a whole chunk would take an excessive amount of memory. */
319 size_t filtered_line_len;
322 if (read_linebuffer (&lb, stream) == NULL)
325 line_len = lb.length;
326 /* read_linebuffer always returns a non-void result. */
331 /* Convert it to UTF-8. */
332 if (need_code_conversion)
334 char *utf8_line = last_utf8_line;
335 size_t utf8_line_len = last_utf8_line_len;
337 if (xmem_cd_iconv (line, line_len, conv_to_utf8,
338 &utf8_line, &utf8_line_len) != 0)
339 error (EXIT_FAILURE, errno,
340 _("input is not valid in \"%s\" encoding"),
342 if (utf8_line != last_utf8_line)
344 if (last_utf8_line != NULL)
345 free (last_utf8_line);
346 last_utf8_line = utf8_line;
347 last_utf8_line_len = utf8_line_len;
351 line_len = utf8_line_len;
355 /* Apply the filter. */
356 serbian_to_latin (line, line_len, &filtered_line, &filtered_line_len);
359 /* Convert it back to the original encoding. */
360 if (need_code_conversion)
362 char *backconv_line = last_backconv_line;
363 size_t backconv_line_len = last_backconv_line_len;
365 if (xmem_cd_iconv (filtered_line, filtered_line_len, conv_from_utf8,
366 &backconv_line, &backconv_line_len) != 0)
367 error (EXIT_FAILURE, errno,
368 _("error while converting from \"%s\" encoding to \"%s\" encoding"),
369 "UTF-8", locale_code);
370 if (backconv_line != last_backconv_line)
372 if (last_backconv_line != NULL)
373 free (last_backconv_line);
374 last_backconv_line = backconv_line;
375 last_backconv_line_len = backconv_line_len;
378 fwrite (backconv_line, 1, backconv_line_len, stdout);
382 fwrite (filtered_line, 1, filtered_line_len, stdout);
384 free (filtered_line);
388 if (need_code_conversion)
390 iconv_close (conv_from_utf8);
391 iconv_close (conv_to_utf8);
395 destroy_linebuffer (&lb);