gettext-tools/src/recode-sr-latin.c

   1 /* Recode Serbian text from Cyrillic to Latin script.
   2    Copyright (C) 2006-2007, 2010, 2012, 2015 Free Software Foundation,
   3    Inc.
   4    Written by Bruno Haible <bruno@clisp.org>, 2006.
   5
   6    This program is free software: you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  18
  19 #ifdef HAVE_CONFIG_H
  20 # include "config.h"
  21 #endif
  22
  23 #include <errno.h>
  24 #include <getopt.h>
  25 #include <stdbool.h>
  26 #include <stdio.h>
  27 #include <stdlib.h>
  28 #include <locale.h>
  29
  30 #if HAVE_ICONV
  31 #include <iconv.h>
  32 #endif
  33
  34 #include "closeout.h"
  35 #include "error.h"
  36 #include "progname.h"
  37 #include "relocatable.h"
  38 #include "basename.h"
  39 #include "xalloc.h"
  40 #include "localcharset.h"
  41 #include "c-strcase.h"
  42 #include "xstriconv.h"
  43 #include "filters.h"
  44 #include "propername.h"
  45 #include "gettext.h"
  46
  47 #define _(str) gettext (str)
  48
  49
  50 /* Long options.  */
  51 static const struct option long_options[] =
  52 {
  53   { "help", no_argument, NULL, 'h' },
  54   { "version", no_argument, NULL, 'V' },
  55   { NULL, 0, NULL, 0 }
  56 };
  57
  58 /* Forward declaration of local functions.  */
  59 static void usage (int status)
  60 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
  61      __attribute__ ((noreturn))
  62 #endif
  63 ;
  64 static void process (FILE *stream);
  65
  66 int
  67 main (int argc, char *argv[])
  68 {
  69   /* Default values for command line options.  */
  70   bool do_help = false;
  71   bool do_version = false;
  72
  73   int opt;
  74
  75   /* Set program name for message texts.  */
  76   set_program_name (argv[0]);
  77
  78 #ifdef HAVE_SETLOCALE
  79   /* Set locale via LC_ALL.  */
  80   setlocale (LC_ALL, "");
  81 #endif
  82
  83   /* Set the text message domain.  */
  84   bindtextdomain (PACKAGE, relocate (LOCALEDIR));
  85   textdomain (PACKAGE);
  86
  87   /* Ensure that write errors on stdout are detected.  */
  88   atexit (close_stdout);
  89
  90   /* Parse command line options.  */
  91   while ((opt = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
  92     switch (opt)
  93     {
  94     case '\0':          /* Long option.  */
  95       break;
  96     case 'h':
  97       do_help = true;
  98       break;
  99     case 'V':
 100       do_version = true;
 101       break;
 102     default:
 103       usage (EXIT_FAILURE);
 104     }
 105
 106   /* Version information is requested.  */
 107   if (do_version)
 108     {
 109       printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
 110       /* xgettext: no-wrap */
 111       printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
 112 License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\
 113 This is free software: you are free to change and redistribute it.\n\
 114 There is NO WARRANTY, to the extent permitted by law.\n\
 115 "),
 116               "2006-2007");
 117       printf (_("Written by %s and %s.\n"),
 118               /* TRANSLATORS: This is a proper name. The last name is
 119                  (with Unicode escapes) "\u0160egan" or (with HTML entities)
 120                  "&Scaron;egan".  */
 121               proper_name_utf8 ("Danilo Segan", "Danilo \305\240egan"),
 122               proper_name ("Bruno Haible"));
 123       exit (EXIT_SUCCESS);
 124     }
 125
 126   /* Help is requested.  */
 127   if (do_help)
 128     usage (EXIT_SUCCESS);
 129
 130   if (argc - optind > 0)
 131     error (EXIT_FAILURE, 0, _("too many arguments"));
 132
 133   process (stdin);
 134
 135   exit (EXIT_SUCCESS);
 136 }
 137
 138
 139 /* Display usage information and exit.  */
 140 static void
 141 usage (int status)
 142 {
 143   if (status != EXIT_SUCCESS)
 144     fprintf (stderr, _("Try '%s --help' for more information.\n"),
 145              program_name);
 146   else
 147     {
 148       /* xgettext: no-wrap */
 149       printf (_("\
 150 Usage: %s [OPTION]\n\
 151 "), program_name);
 152       printf ("\n");
 153       /* xgettext: no-wrap */
 154       printf (_("\
 155 Recode Serbian text from Cyrillic to Latin script.\n"));
 156       /* xgettext: no-wrap */
 157       printf (_("\
 158 The input text is read from standard input.  The converted text is output to\n\
 159 standard output.\n"));
 160       printf ("\n");
 161       /* xgettext: no-wrap */
 162       printf (_("\
 163 Informative output:\n"));
 164       /* xgettext: no-wrap */
 165       printf (_("\
 166   -h, --help                  display this help and exit\n"));
 167       /* xgettext: no-wrap */
 168       printf (_("\
 169   -V, --version               output version information and exit\n"));
 170       printf ("\n");
 171       /* TRANSLATORS: The placeholder indicates the bug-reporting address
 172          for this package.  Please add _another line_ saying
 173          "Report translation bugs to <...>\n" with the address for translation
 174          bugs (typically your translation team's web or email address).  */
 175       fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"), stdout);
 176     }
 177
 178   exit (status);
 179 }
 180
 181
 182 /* Routines for reading a line.
 183    Don't use routines that drop NUL bytes.  Don't use getline(), because it
 184    doesn't provide a good error message in case of memory allocation failure.
 185    The gnulib module 'linebuffer' is nearly the right thing, except that we
 186    don't want an extra newline at the end of file.  */
 187
 188 /* A 'struct linebuffer' holds a line of text. */
 189
 190 struct linebuffer
 191 {
 192   size_t size;                  /* Allocated. */
 193   size_t length;                /* Used. */
 194   char *buffer;
 195 };
 196
 197 /* Initialize linebuffer LINEBUFFER for use. */
 198 static inline void
 199 init_linebuffer (struct linebuffer *lb)
 200 {
 201   lb->size = 0;
 202   lb->length = 0;
 203   lb->buffer = NULL;
 204 }
 205
 206 /* Read an arbitrarily long line of text from STREAM into linebuffer LB.
 207    Keep the newline.  Do not NUL terminate.
 208    Return LINEBUFFER, except at end of file return NULL.  */
 209 static struct linebuffer *
 210 read_linebuffer (struct linebuffer *lb, FILE *stream)
 211 {
 212   if (feof (stream))
 213     return NULL;
 214   else
 215     {
 216       char *p = lb->buffer;
 217       char *end = lb->buffer + lb->size;
 218
 219       for (;;)
 220         {
 221           int c = getc (stream);
 222           if (c == EOF)
 223             {
 224               if (p == lb->buffer || ferror (stream))
 225                 return NULL;
 226               break;
 227             }
 228           if (p == end)
 229             {
 230               size_t oldsize = lb->size; /* = p - lb->buffer */
 231               size_t newsize = 2 * oldsize + 40;
 232               lb->buffer = (char *) xrealloc (lb->buffer, newsize);
 233               lb->size = newsize;
 234               p = lb->buffer + oldsize;
 235               end = lb->buffer + newsize;
 236             }
 237           *p++ = c;
 238           if (c == '\n')
 239             break;
 240         }
 241
 242       lb->length = p - lb->buffer;
 243       return lb;
 244     }
 245 }
 246
 247 /* Free linebuffer LB and its data, all allocated with malloc. */
 248 static inline void
 249 destroy_linebuffer (struct linebuffer *lb)
 250 {
 251   if (lb->buffer != NULL)
 252     free (lb->buffer);
 253 }
 254
 255
 256 /* Process the input and produce the output.  */
 257 static void
 258 process (FILE *stream)
 259 {
 260   struct linebuffer lb;
 261   const char *locale_code = locale_charset ();
 262   bool need_code_conversion = (c_strcasecmp (locale_code, "UTF-8") != 0);
 263 #if HAVE_ICONV
 264   iconv_t conv_to_utf8 = (iconv_t)(-1);
 265   iconv_t conv_from_utf8 = (iconv_t)(-1);
 266   char *last_utf8_line;
 267   size_t last_utf8_line_len;
 268   char *last_backconv_line;
 269   size_t last_backconv_line_len;
 270 #endif
 271
 272   init_linebuffer (&lb);
 273
 274   /* Initialize the conversion descriptors.  */
 275   if (need_code_conversion)
 276     {
 277 #if HAVE_ICONV
 278       /* Avoid glibc-2.1 bug with EUC-KR.  */
 279 # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
 280      && !defined _LIBICONV_VERSION
 281       if (strcmp (locale_code, "EUC-KR") != 0)
 282 # endif
 283         {
 284           conv_to_utf8 = iconv_open ("UTF-8", locale_code);
 285           /* TODO:  Maybe append //TRANSLIT here?  */
 286           conv_from_utf8 = iconv_open (locale_code, "UTF-8");
 287         }
 288       if (conv_to_utf8 == (iconv_t)(-1))
 289         error (EXIT_FAILURE, 0, _("\
 290 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
 291 and iconv() does not support this conversion."),
 292                locale_code, "UTF-8", basename (program_name));
 293       if (conv_from_utf8 == (iconv_t)(-1))
 294         error (EXIT_FAILURE, 0, _("\
 295 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
 296 and iconv() does not support this conversion."),
 297                "UTF-8", locale_code, basename (program_name));
 298       last_utf8_line = NULL;
 299       last_utf8_line_len = 0;
 300       last_backconv_line = NULL;
 301       last_backconv_line_len = 0;
 302 #else
 303       error (EXIT_FAILURE, 0, _("\
 304 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
 305 This version was built without iconv()."),
 306              locale_code, "UTF-8", basename (program_name));
 307 #endif
 308     }
 309
 310   /* Read the input line by line.
 311      Processing it character by character is not possible, because some
 312      filters need to look at adjacent characters.  Processing the entire file
 313      in a whole chunk would take an excessive amount of memory.  */
 314   for (;;)
 315     {
 316       char *line;
 317       size_t line_len;
 318       char *filtered_line;
 319       size_t filtered_line_len;
 320
 321       /* Read a line.  */
 322       if (read_linebuffer (&lb, stream) == NULL)
 323         break;
 324       line = lb.buffer;
 325       line_len = lb.length;
 326       /* read_linebuffer always returns a non-void result.  */
 327       if (line_len == 0)
 328         abort ();
 329
 330 #if HAVE_ICONV
 331       /* Convert it to UTF-8.  */
 332       if (need_code_conversion)
 333         {
 334           char *utf8_line = last_utf8_line;
 335           size_t utf8_line_len = last_utf8_line_len;
 336
 337           if (xmem_cd_iconv (line, line_len, conv_to_utf8,
 338                              &utf8_line, &utf8_line_len) != 0)
 339             error (EXIT_FAILURE, errno,
 340                    _("input is not valid in \"%s\" encoding"),
 341                    locale_code);
 342           if (utf8_line != last_utf8_line)
 343             {
 344               if (last_utf8_line != NULL)
 345                 free (last_utf8_line);
 346               last_utf8_line = utf8_line;
 347               last_utf8_line_len = utf8_line_len;
 348             }
 349
 350           line = utf8_line;
 351           line_len = utf8_line_len;
 352         }
 353 #endif
 354
 355       /* Apply the filter.  */
 356       serbian_to_latin (line, line_len, &filtered_line, &filtered_line_len);
 357
 358 #if HAVE_ICONV
 359       /* Convert it back to the original encoding.  */
 360       if (need_code_conversion)
 361         {
 362           char *backconv_line = last_backconv_line;
 363           size_t backconv_line_len = last_backconv_line_len;
 364
 365           if (xmem_cd_iconv (filtered_line, filtered_line_len, conv_from_utf8,
 366                              &backconv_line, &backconv_line_len) != 0)
 367             error (EXIT_FAILURE, errno,
 368                    _("error while converting from \"%s\" encoding to \"%s\" encoding"),
 369                    "UTF-8", locale_code);
 370           if (backconv_line != last_backconv_line)
 371             {
 372               if (last_backconv_line != NULL)
 373                 free (last_backconv_line);
 374               last_backconv_line = backconv_line;
 375               last_backconv_line_len = backconv_line_len;
 376             }
 377
 378           fwrite (backconv_line, 1, backconv_line_len, stdout);
 379         }
 380       else
 381 #endif
 382         fwrite (filtered_line, 1, filtered_line_len, stdout);
 383
 384       free (filtered_line);
 385     }
 386
 387 #if HAVE_ICONV
 388   if (need_code_conversion)
 389     {
 390       iconv_close (conv_from_utf8);
 391       iconv_close (conv_to_utf8);
 392     }
 393 #endif
 394
 395   destroy_linebuffer (&lb);
 396 }