1 /* Recode Serbian text from Cyrillic to Latin script.
2 Copyright (C) 2006-2007, 2010, 2012 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2006.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
36 #include "relocatable.h"
39 #include "localcharset.h"
40 #include "c-strcase.h"
41 #include "xstriconv.h"
43 #include "propername.h"
46 #define _(str) gettext (str)
50 static const struct option long_options[] =
52 { "help", no_argument, NULL, 'h' },
53 { "version", no_argument, NULL, 'V' },
57 /* Forward declaration of local functions. */
58 static void usage (int status)
59 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
60 __attribute__ ((noreturn))
63 static void process (FILE *stream);
66 main (int argc, char *argv[])
68 /* Default values for command line options. */
70 bool do_version = false;
74 /* Set program name for message texts. */
75 set_program_name (argv[0]);
78 /* Set locale via LC_ALL. */
79 setlocale (LC_ALL, "");
82 /* Set the text message domain. */
83 bindtextdomain (PACKAGE, relocate (LOCALEDIR));
86 /* Ensure that write errors on stdout are detected. */
87 atexit (close_stdout);
89 /* Parse command line options. */
90 while ((opt = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
93 case '\0': /* Long option. */
102 usage (EXIT_FAILURE);
105 /* Version information is requested. */
108 printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
109 /* xgettext: no-wrap */
110 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
111 License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\
112 This is free software: you are free to change and redistribute it.\n\
113 There is NO WARRANTY, to the extent permitted by law.\n\
116 printf (_("Written by %s and %s.\n"),
117 /* TRANSLATORS: This is a proper name. The last name is
118 (with Unicode escapes) "\u0160egan" or (with HTML entities)
120 proper_name_utf8 ("Danilo Segan", "Danilo \305\240egan"),
121 proper_name ("Bruno Haible"));
125 /* Help is requested. */
127 usage (EXIT_SUCCESS);
129 if (argc - optind > 0)
130 error (EXIT_FAILURE, 0, _("too many arguments"));
138 /* Display usage information and exit. */
142 if (status != EXIT_SUCCESS)
143 fprintf (stderr, _("Try '%s --help' for more information.\n"),
147 /* xgettext: no-wrap */
149 Usage: %s [OPTION]\n\
152 /* xgettext: no-wrap */
154 Recode Serbian text from Cyrillic to Latin script.\n"));
155 /* xgettext: no-wrap */
157 The input text is read from standard input. The converted text is output to\n\
158 standard output.\n"));
160 /* xgettext: no-wrap */
162 Informative output:\n"));
163 /* xgettext: no-wrap */
165 -h, --help display this help and exit\n"));
166 /* xgettext: no-wrap */
168 -V, --version output version information and exit\n"));
170 /* TRANSLATORS: The placeholder indicates the bug-reporting address
171 for this package. Please add _another line_ saying
172 "Report translation bugs to <...>\n" with the address for translation
173 bugs (typically your translation team's web or email address). */
174 fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"), stdout);
181 /* Routines for reading a line.
182 Don't use routines that drop NUL bytes. Don't use getline(), because it
183 doesn't provide a good error message in case of memory allocation failure.
184 The gnulib module 'linebuffer' is nearly the right thing, except that we
185 don't want an extra newline at the end of file. */
187 /* A 'struct linebuffer' holds a line of text. */
191 size_t size; /* Allocated. */
192 size_t length; /* Used. */
196 /* Initialize linebuffer LINEBUFFER for use. */
198 init_linebuffer (struct linebuffer *lb)
205 /* Read an arbitrarily long line of text from STREAM into linebuffer LB.
206 Keep the newline. Do not NUL terminate.
207 Return LINEBUFFER, except at end of file return NULL. */
208 static struct linebuffer *
209 read_linebuffer (struct linebuffer *lb, FILE *stream)
215 char *p = lb->buffer;
216 char *end = lb->buffer + lb->size;
220 int c = getc (stream);
223 if (p == lb->buffer || ferror (stream))
229 size_t oldsize = lb->size; /* = p - lb->buffer */
230 size_t newsize = 2 * oldsize + 40;
231 lb->buffer = (char *) xrealloc (lb->buffer, newsize);
233 p = lb->buffer + oldsize;
234 end = lb->buffer + newsize;
241 lb->length = p - lb->buffer;
246 /* Free linebuffer LB and its data, all allocated with malloc. */
248 destroy_linebuffer (struct linebuffer *lb)
250 if (lb->buffer != NULL)
255 /* Process the input and produce the output. */
257 process (FILE *stream)
259 struct linebuffer lb;
260 const char *locale_code = locale_charset ();
261 bool need_code_conversion = (c_strcasecmp (locale_code, "UTF-8") != 0);
263 iconv_t conv_to_utf8 = (iconv_t)(-1);
264 iconv_t conv_from_utf8 = (iconv_t)(-1);
265 char *last_utf8_line;
266 size_t last_utf8_line_len;
267 char *last_backconv_line;
268 size_t last_backconv_line_len;
271 init_linebuffer (&lb);
273 /* Initialize the conversion descriptors. */
274 if (need_code_conversion)
277 /* Avoid glibc-2.1 bug with EUC-KR. */
278 # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
279 && !defined _LIBICONV_VERSION
280 if (strcmp (locale_code, "EUC-KR") != 0)
283 conv_to_utf8 = iconv_open ("UTF-8", locale_code);
284 /* TODO: Maybe append //TRANSLIT here? */
285 conv_from_utf8 = iconv_open (locale_code, "UTF-8");
287 if (conv_to_utf8 == (iconv_t)(-1))
288 error (EXIT_FAILURE, 0, _("\
289 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
290 and iconv() does not support this conversion."),
291 locale_code, "UTF-8", basename (program_name));
292 if (conv_from_utf8 == (iconv_t)(-1))
293 error (EXIT_FAILURE, 0, _("\
294 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
295 and iconv() does not support this conversion."),
296 "UTF-8", locale_code, basename (program_name));
297 last_utf8_line = NULL;
298 last_utf8_line_len = 0;
299 last_backconv_line = NULL;
300 last_backconv_line_len = 0;
302 error (EXIT_FAILURE, 0, _("\
303 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
304 This version was built without iconv()."),
305 locale_code, "UTF-8", basename (program_name));
309 /* Read the input line by line.
310 Processing it character by character is not possible, because some
311 filters need to look at adjacent characters. Processing the entire file
312 in a whole chunk would take an excessive amount of memory. */
318 size_t filtered_line_len;
321 if (read_linebuffer (&lb, stream) == NULL)
324 line_len = lb.length;
325 /* read_linebuffer always returns a non-void result. */
330 /* Convert it to UTF-8. */
331 if (need_code_conversion)
333 char *utf8_line = last_utf8_line;
334 size_t utf8_line_len = last_utf8_line_len;
336 if (xmem_cd_iconv (line, line_len, conv_to_utf8,
337 &utf8_line, &utf8_line_len) != 0)
338 error (EXIT_FAILURE, errno,
339 _("input is not valid in \"%s\" encoding"),
341 if (utf8_line != last_utf8_line)
343 if (last_utf8_line != NULL)
344 free (last_utf8_line);
345 last_utf8_line = utf8_line;
346 last_utf8_line_len = utf8_line_len;
350 line_len = utf8_line_len;
354 /* Apply the filter. */
355 serbian_to_latin (line, line_len, &filtered_line, &filtered_line_len);
358 /* Convert it back to the original encoding. */
359 if (need_code_conversion)
361 char *backconv_line = last_backconv_line;
362 size_t backconv_line_len = last_backconv_line_len;
364 if (xmem_cd_iconv (filtered_line, filtered_line_len, conv_from_utf8,
365 &backconv_line, &backconv_line_len) != 0)
366 error (EXIT_FAILURE, errno,
367 _("error while converting from \"%s\" encoding to \"%s\" encoding"),
368 "UTF-8", locale_code);
369 if (backconv_line != last_backconv_line)
371 if (last_backconv_line != NULL)
372 free (last_backconv_line);
373 last_backconv_line = backconv_line;
374 last_backconv_line_len = backconv_line_len;
377 fwrite (backconv_line, 1, backconv_line_len, stdout);
381 fwrite (filtered_line, 1, filtered_line_len, stdout);
383 free (filtered_line);
387 if (need_code_conversion)
389 iconv_close (conv_from_utf8);
390 iconv_close (conv_to_utf8);
394 destroy_linebuffer (&lb);