gettext-tools/src/read-stringtable.c

   1 /* Reading NeXTstep/GNUstep .strings files.
   2    Copyright (C) 2003, 2005-2007, 2009, 2015 Free Software Foundation,
   3    Inc.
   4    Written by Bruno Haible <bruno@clisp.org>, 2003.
   5
   6    This program is free software: you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  18
  19 #ifdef HAVE_CONFIG_H
  20 # include <config.h>
  21 #endif
  22
  23 /* Specification.  */
  24 #include "read-stringtable.h"
  25
  26 #include <assert.h>
  27 #include <errno.h>
  28 #include <stdbool.h>
  29 #include <stdio.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32
  33 #include "error.h"
  34 #include "error-progname.h"
  35 #include "read-catalog-abstract.h"
  36 #include "xalloc.h"
  37 #include "xvasprintf.h"
  38 #include "po-xerror.h"
  39 #include "unistr.h"
  40 #include "gettext.h"
  41
  42 #define _(str) gettext (str)
  43
  44 /* The format of NeXTstep/GNUstep .strings files is documented in
  45      gnustep-base-1.8.0/Tools/make_strings/Using.txt
  46    and in the comments of method propertyListFromStringsFileFormat in
  47      gnustep-base-1.8.0/Source/NSString.m
  48    In summary, it's a Objective-C like file with pseudo-assignments of the form
  49           "key" = "value";
  50    where the key is the msgid and the value is the msgstr.
  51
  52    The implementation of the parser of .strings files is in
  53      gnustep-base-1.8.0/Source/NSString.m
  54      function GSPropertyListFromStringsFormat
  55      (indirectly called from NSBundle's method localizedStringForKey).
  56
  57    A test case is in
  58      gnustep-base-1.8.0/Testing/English.lproj/NXStringTable.example
  59  */
  60
  61 /* Handling of comments: We copy all comments from the .strings file to
  62    the PO file. This is not really needed; it's a service for translators
  63    who don't like PO files and prefer to maintain the .strings file.  */
  64
  65
  66 /* Real filename, used in error messages about the input file.  */
  67 static const char *real_file_name;
  68
  69 /* File name and line number.  */
  70 extern lex_pos_ty gram_pos;
  71
  72 /* The input file stream.  */
  73 static FILE *fp;
  74
  75
  76 /* Phase 1: Read a byte.
  77    Max. 4 pushback characters.  */
  78
  79 static unsigned char phase1_pushback[4];
  80 static int phase1_pushback_length;
  81
  82 static int
  83 phase1_getc ()
  84 {
  85   int c;
  86
  87   if (phase1_pushback_length)
  88     return phase1_pushback[--phase1_pushback_length];
  89
  90   c = getc (fp);
  91
  92   if (c == EOF)
  93     {
  94       if (ferror (fp))
  95         {
  96           const char *errno_description = strerror (errno);
  97           po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
  98                      xasprintf ("%s: %s",
  99                                 xasprintf (_("error while reading \"%s\""),
 100                                            real_file_name),
 101                                 errno_description));
 102         }
 103       return EOF;
 104     }
 105
 106   return c;
 107 }
 108
 109 static void
 110 phase1_ungetc (int c)
 111 {
 112   if (c != EOF)
 113     phase1_pushback[phase1_pushback_length++] = c;
 114 }
 115
 116
 117 /* Phase 2: Read an UCS-4 character.
 118    Max. 2 pushback characters.  */
 119
 120 /* End-of-file indicator for functions returning an UCS-4 character.  */
 121 #define UEOF -1
 122
 123 static int phase2_pushback[4];
 124 static int phase2_pushback_length;
 125
 126 /* The input file can be in Unicode encoding (UCS-2BE, UCS-2LE, UTF-8, each
 127    with a BOM!), or otherwise the locale-dependent default encoding is used.
 128    Since we don't want to depend on the locale here, we use ISO-8859-1
 129    instead.  */
 130 enum enc
 131 {
 132   enc_undetermined,
 133   enc_ucs2be,
 134   enc_ucs2le,
 135   enc_utf8,
 136   enc_iso8859_1
 137 };
 138 static enum enc encoding;
 139
 140 static int
 141 phase2_getc ()
 142 {
 143   if (phase2_pushback_length)
 144     return phase2_pushback[--phase2_pushback_length];
 145
 146   if (encoding == enc_undetermined)
 147     {
 148       /* Determine the input file's encoding.  */
 149       int c0, c1;
 150
 151       c0 = phase1_getc ();
 152       if (c0 == EOF)
 153         return UEOF;
 154       c1 = phase1_getc ();
 155       if (c1 == EOF)
 156         {
 157           phase1_ungetc (c0);
 158           encoding = enc_iso8859_1;
 159         }
 160       else if (c0 == 0xfe && c1 == 0xff)
 161         encoding = enc_ucs2be;
 162       else if (c0 == 0xff && c1 == 0xfe)
 163         encoding = enc_ucs2le;
 164       else
 165         {
 166           int c2;
 167
 168           c2 = phase1_getc ();
 169           if (c2 == EOF)
 170             {
 171               phase1_ungetc (c1);
 172               phase1_ungetc (c0);
 173               encoding = enc_iso8859_1;
 174             }
 175           else if (c0 == 0xef && c1 == 0xbb && c2 == 0xbf)
 176             encoding = enc_utf8;
 177           else
 178             {
 179               phase1_ungetc (c2);
 180               phase1_ungetc (c1);
 181               phase1_ungetc (c0);
 182               encoding = enc_iso8859_1;
 183             }
 184         }
 185     }
 186
 187   switch (encoding)
 188     {
 189     case enc_ucs2be:
 190       /* Read an UCS-2BE encoded character.  */
 191       {
 192         int c0, c1;
 193
 194         c0 = phase1_getc ();
 195         if (c0 == EOF)
 196           return UEOF;
 197         c1 = phase1_getc ();
 198         if (c1 == EOF)
 199           return UEOF;
 200         return (c0 << 8) + c1;
 201       }
 202
 203     case enc_ucs2le:
 204       /* Read an UCS-2LE encoded character.  */
 205       {
 206         int c0, c1;
 207
 208         c0 = phase1_getc ();
 209         if (c0 == EOF)
 210           return UEOF;
 211         c1 = phase1_getc ();
 212         if (c1 == EOF)
 213           return UEOF;
 214         return c0 + (c1 << 8);
 215       }
 216
 217     case enc_utf8:
 218       /* Read an UTF-8 encoded character.  */
 219       {
 220         unsigned char buf[6];
 221         unsigned int count;
 222         int c;
 223         ucs4_t uc;
 224
 225         c = phase1_getc ();
 226         if (c == EOF)
 227           return UEOF;
 228         buf[0] = c;
 229         count = 1;
 230
 231         if (buf[0] >= 0xc0)
 232           {
 233             c = phase1_getc ();
 234             if (c == EOF)
 235               return UEOF;
 236             buf[1] = c;
 237             count = 2;
 238
 239             if (buf[0] >= 0xe0
 240                 && ((buf[1] ^ 0x80) < 0x40))
 241               {
 242                 c = phase1_getc ();
 243                 if (c == EOF)
 244                   return UEOF;
 245                 buf[2] = c;
 246                 count = 3;
 247
 248                 if (buf[0] >= 0xf0
 249                     && ((buf[2] ^ 0x80) < 0x40))
 250                   {
 251                     c = phase1_getc ();
 252                     if (c == EOF)
 253                       return UEOF;
 254                     buf[3] = c;
 255                     count = 4;
 256
 257                     if (buf[0] >= 0xf8
 258                         && ((buf[3] ^ 0x80) < 0x40))
 259                       {
 260                         c = phase1_getc ();
 261                         if (c == EOF)
 262                           return UEOF;
 263                         buf[4] = c;
 264                         count = 5;
 265
 266                         if (buf[0] >= 0xfc
 267                             && ((buf[4] ^ 0x80) < 0x40))
 268                           {
 269                             c = phase1_getc ();
 270                             if (c == EOF)
 271                               return UEOF;
 272                             buf[5] = c;
 273                             count = 6;
 274                           }
 275                       }
 276                   }
 277               }
 278           }
 279
 280         u8_mbtouc (&uc, buf, count);
 281         return uc;
 282       }
 283
 284     case enc_iso8859_1:
 285       /* Read an ISO-8859-1 encoded character.  */
 286       {
 287         int c = phase1_getc ();
 288
 289         if (c == EOF)
 290           return UEOF;
 291         return c;
 292       }
 293
 294     default:
 295       abort ();
 296     }
 297 }
 298
 299 static void
 300 phase2_ungetc (int c)
 301 {
 302   if (c != UEOF)
 303     phase2_pushback[phase2_pushback_length++] = c;
 304 }
 305
 306
 307 /* Phase 3: Read an UCS-4 character, with line number handling.  */
 308
 309 static int
 310 phase3_getc ()
 311 {
 312   int c = phase2_getc ();
 313
 314   if (c == '\n')
 315     gram_pos.line_number++;
 316
 317   return c;
 318 }
 319
 320 static void
 321 phase3_ungetc (int c)
 322 {
 323   if (c == '\n')
 324     --gram_pos.line_number;
 325   phase2_ungetc (c);
 326 }
 327
 328
 329 /* Convert from UCS-4 to UTF-8.  */
 330 static char *
 331 conv_from_ucs4 (const int *buffer, size_t buflen)
 332 {
 333   unsigned char *utf8_string;
 334   size_t pos;
 335   unsigned char *q;
 336
 337   /* Each UCS-4 word needs 6 bytes at worst.  */
 338   utf8_string = XNMALLOC (6 * buflen + 1, unsigned char);
 339
 340   for (pos = 0, q = utf8_string; pos < buflen; )
 341     {
 342       unsigned int uc;
 343       int n;
 344
 345       uc = buffer[pos++];
 346       n = u8_uctomb (q, uc, 6);
 347       assert (n > 0);
 348       q += n;
 349     }
 350   *q = '\0';
 351   assert (q - utf8_string <= 6 * buflen);
 352
 353   return (char *) utf8_string;
 354 }
 355
 356
 357 /* Parse a string enclosed in double-quotes.  Input is UCS-4 encoded.
 358    Return the string in UTF-8 encoding, or NULL if the input doesn't represent
 359    a valid string enclosed in double-quotes.  */
 360 static char *
 361 parse_escaped_string (const int *string, size_t length)
 362 {
 363   static int *buffer;
 364   static size_t bufmax;
 365   static size_t buflen;
 366   const int *string_limit = string + length;
 367   int c;
 368
 369   if (string == string_limit)
 370     return NULL;
 371   c = *string++;
 372   if (c != '"')
 373     return NULL;
 374   buflen = 0;
 375   for (;;)
 376     {
 377       if (string == string_limit)
 378         return NULL;
 379       c = *string++;
 380       if (c == '"')
 381         break;
 382       if (c == '\\')
 383         {
 384           if (string == string_limit)
 385             return NULL;
 386           c = *string++;
 387           if (c >= '0' && c <= '7')
 388             {
 389               unsigned int n = 0;
 390               int j = 0;
 391               for (;;)
 392                 {
 393                   n = n * 8 + (c - '0');
 394                   if (++j == 3)
 395                     break;
 396                   if (string == string_limit)
 397                     break;
 398                   c = *string;
 399                   if (!(c >= '0' && c <= '7'))
 400                     break;
 401                   string++;
 402                 }
 403               c = n;
 404             }
 405           else if (c == 'u' || c == 'U')
 406             {
 407               unsigned int n = 0;
 408               int j;
 409               for (j = 0; j < 4; j++)
 410                 {
 411                   if (string == string_limit)
 412                     break;
 413                   c = *string;
 414                   if (c >= '0' && c <= '9')
 415                     n = n * 16 + (c - '0');
 416                   else if (c >= 'A' && c <= 'F')
 417                     n = n * 16 + (c - 'A' + 10);
 418                   else if (c >= 'a' && c <= 'f')
 419                     n = n * 16 + (c - 'a' + 10);
 420                   else
 421                     break;
 422                   string++;
 423                 }
 424               c = n;
 425             }
 426           else
 427             switch (c)
 428               {
 429               case 'a': c = '\a'; break;
 430               case 'b': c = '\b'; break;
 431               case 't': c = '\t'; break;
 432               case 'r': c = '\r'; break;
 433               case 'n': c = '\n'; break;
 434               case 'v': c = '\v'; break;
 435               case 'f': c = '\f'; break;
 436               }
 437         }
 438       if (buflen >= bufmax)
 439         {
 440           bufmax = 2 * bufmax + 10;
 441           buffer = xrealloc (buffer, bufmax * sizeof (int));
 442         }
 443       buffer[buflen++] = c;
 444     }
 445
 446   return conv_from_ucs4 (buffer, buflen);
 447 }
 448
 449
 450 /* Accumulating flag comments.  */
 451
 452 static char *special_comment;
 453
 454 static inline void
 455 special_comment_reset ()
 456 {
 457   if (special_comment != NULL)
 458     free (special_comment);
 459   special_comment = NULL;
 460 }
 461
 462 static void
 463 special_comment_add (const char *flag)
 464 {
 465   if (special_comment == NULL)
 466     special_comment = xstrdup (flag);
 467   else
 468     {
 469       size_t total_len = strlen (special_comment) + 2 + strlen (flag) + 1;
 470       special_comment = xrealloc (special_comment, total_len);
 471       strcat (special_comment, ", ");
 472       strcat (special_comment, flag);
 473     }
 474 }
 475
 476 static inline void
 477 special_comment_finish ()
 478 {
 479   if (special_comment != NULL)
 480     {
 481       po_callback_comment_special (special_comment);
 482       free (special_comment);
 483       special_comment = NULL;
 484     }
 485 }
 486
 487
 488 /* Accumulating comments.  */
 489
 490 static int *buffer;
 491 static size_t bufmax;
 492 static size_t buflen;
 493 static bool next_is_obsolete;
 494 static bool next_is_fuzzy;
 495 static char *fuzzy_msgstr;
 496 static bool expect_fuzzy_msgstr_as_c_comment;
 497 static bool expect_fuzzy_msgstr_as_cxx_comment;
 498
 499 static inline void
 500 comment_start ()
 501 {
 502   buflen = 0;
 503 }
 504
 505 static inline void
 506 comment_add (int c)
 507 {
 508   if (buflen >= bufmax)
 509     {
 510       bufmax = 2 * bufmax + 10;
 511       buffer = xrealloc (buffer, bufmax * sizeof (int));
 512     }
 513   buffer[buflen++] = c;
 514 }
 515
 516 static inline void
 517 comment_line_end (size_t chars_to_remove, bool test_for_fuzzy_msgstr)
 518 {
 519   char *line;
 520
 521   buflen -= chars_to_remove;
 522   /* Drop trailing white space, but not EOLs.  */
 523   while (buflen >= 1
 524          && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
 525     --buflen;
 526
 527   /* At special positions we interpret a comment of the form
 528        = "escaped string"
 529      with an optional trailing semicolon as being the fuzzy msgstr, not a
 530      regular comment.  */
 531   if (test_for_fuzzy_msgstr
 532       && buflen > 2 && buffer[0] == '=' && buffer[1] == ' '
 533       && (fuzzy_msgstr =
 534           parse_escaped_string (buffer + 2,
 535                                 buflen - (buffer[buflen - 1] == ';') - 2)))
 536     return;
 537
 538   line = conv_from_ucs4 (buffer, buflen);
 539
 540   if (strcmp (line, "Flag: untranslated") == 0)
 541     {
 542       special_comment_add ("fuzzy");
 543       next_is_fuzzy = true;
 544     }
 545   else if (strcmp (line, "Flag: unmatched") == 0)
 546     next_is_obsolete = true;
 547   else if (strlen (line) >= 6 && memcmp (line, "Flag: ", 6) == 0)
 548     special_comment_add (line + 6);
 549   else if (strlen (line) >= 9 && memcmp (line, "Comment: ", 9) == 0)
 550     /* A comment extracted from the source.  */
 551     po_callback_comment_dot (line + 9);
 552   else
 553     {
 554       char *last_colon;
 555       unsigned long number;
 556       char *endp;
 557
 558       if (strlen (line) >= 6 && memcmp (line, "File: ", 6) == 0
 559           && (last_colon = strrchr (line + 6, ':')) != NULL
 560           && *(last_colon + 1) != '\0'
 561           && (number = strtoul (last_colon + 1, &endp, 10), *endp == '\0'))
 562         {
 563           /* A "File: <filename>:<number>" type comment.  */
 564           *last_colon = '\0';
 565           po_callback_comment_filepos (line + 6, number);
 566         }
 567       else
 568         po_callback_comment (line);
 569     }
 570 }
 571
 572
 573 /* Phase 4: Replace each comment that is not inside a string with a space
 574    character.  */
 575
 576 static int
 577 phase4_getc ()
 578 {
 579   int c;
 580
 581   c = phase3_getc ();
 582   if (c != '/')
 583     return c;
 584   c = phase3_getc ();
 585   switch (c)
 586     {
 587     default:
 588       phase3_ungetc (c);
 589       return '/';
 590
 591     case '*':
 592       /* C style comment.  */
 593       {
 594         bool last_was_star;
 595         size_t trailing_stars;
 596         bool seen_newline;
 597
 598         comment_start ();
 599         last_was_star = false;
 600         trailing_stars = 0;
 601         seen_newline = false;
 602         /* Drop additional stars at the beginning of the comment.  */
 603         for (;;)
 604           {
 605             c = phase3_getc ();
 606             if (c != '*')
 607               break;
 608             last_was_star = true;
 609           }
 610         phase3_ungetc (c);
 611         for (;;)
 612           {
 613             c = phase3_getc ();
 614             if (c == UEOF)
 615               break;
 616             /* We skip all leading white space, but not EOLs.  */
 617             if (!(buflen == 0 && (c == ' ' || c == '\t')))
 618               comment_add (c);
 619             switch (c)
 620               {
 621               case '\n':
 622                 seen_newline = true;
 623                 comment_line_end (1, false);
 624                 comment_start ();
 625                 last_was_star = false;
 626                 trailing_stars = 0;
 627                 continue;
 628
 629               case '*':
 630                 last_was_star = true;
 631                 trailing_stars++;
 632                 continue;
 633
 634               case '/':
 635                 if (last_was_star)
 636                   {
 637                     /* Drop additional stars at the end of the comment.  */
 638                     comment_line_end (trailing_stars + 1,
 639                                       expect_fuzzy_msgstr_as_c_comment
 640                                       && !seen_newline);
 641                     break;
 642                   }
 643                 /* FALLTHROUGH */
 644
 645               default:
 646                 last_was_star = false;
 647                 trailing_stars = 0;
 648                 continue;
 649               }
 650             break;
 651           }
 652         return ' ';
 653       }
 654
 655     case '/':
 656       /* C++ style comment.  */
 657       comment_start ();
 658       for (;;)
 659         {
 660           c = phase3_getc ();
 661           if (c == '\n' || c == UEOF)
 662             break;
 663           /* We skip all leading white space, but not EOLs.  */
 664           if (!(buflen == 0 && (c == ' ' || c == '\t')))
 665             comment_add (c);
 666         }
 667       comment_line_end (0, expect_fuzzy_msgstr_as_cxx_comment);
 668       return '\n';
 669     }
 670 }
 671
 672 static inline void
 673 phase4_ungetc (int c)
 674 {
 675   phase3_ungetc (c);
 676 }
 677
 678
 679 /* Return true if a character is considered as whitespace.  */
 680 static bool
 681 is_whitespace (int c)
 682 {
 683   return (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f'
 684           || c == '\b');
 685 }
 686
 687 /* Return true if a character needs quoting, i.e. cannot be used in unquoted
 688    tokens.  */
 689 static bool
 690 is_quotable (int c)
 691 {
 692   if ((c >= '0' && c <= '9')
 693       || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
 694     return false;
 695   switch (c)
 696     {
 697     case '!': case '#': case '$': case '%': case '&': case '*':
 698     case '+': case '-': case '.': case '/': case ':': case '?':
 699     case '@': case '|': case '~': case '_': case '^':
 700       return false;
 701     default:
 702       return true;
 703     }
 704 }
 705
 706
 707 /* Read a key or value string.
 708    Return the string in UTF-8 encoding, or NULL if no string is seen.
 709    Return the start position of the string in *pos.  */
 710 static char *
 711 read_string (lex_pos_ty *pos)
 712 {
 713   static int *buffer;
 714   static size_t bufmax;
 715   static size_t buflen;
 716   int c;
 717
 718   /* Skip whitespace before the string.  */
 719   do
 720     c = phase4_getc ();
 721   while (is_whitespace (c));
 722
 723   if (c == UEOF)
 724     /* No more string.  */
 725     return NULL;
 726
 727   *pos = gram_pos;
 728   buflen = 0;
 729   if (c == '"')
 730     {
 731       /* Read a string enclosed in double-quotes.  */
 732       for (;;)
 733         {
 734           c = phase3_getc ();
 735           if (c == UEOF || c == '"')
 736             break;
 737           if (c == '\\')
 738             {
 739               c = phase3_getc ();
 740               if (c == UEOF)
 741                 break;
 742               if (c >= '0' && c <= '7')
 743                 {
 744                   unsigned int n = 0;
 745                   int j = 0;
 746                   for (;;)
 747                     {
 748                       n = n * 8 + (c - '0');
 749                       if (++j == 3)
 750                         break;
 751                       c = phase3_getc ();
 752                       if (!(c >= '0' && c <= '7'))
 753                         {
 754                           phase3_ungetc (c);
 755                           break;
 756                         }
 757                     }
 758                   c = n;
 759                 }
 760               else if (c == 'u' || c == 'U')
 761                 {
 762                   unsigned int n = 0;
 763                   int j;
 764                   for (j = 0; j < 4; j++)
 765                     {
 766                       c = phase3_getc ();
 767                       if (c >= '0' && c <= '9')
 768                         n = n * 16 + (c - '0');
 769                       else if (c >= 'A' && c <= 'F')
 770                         n = n * 16 + (c - 'A' + 10);
 771                       else if (c >= 'a' && c <= 'f')
 772                         n = n * 16 + (c - 'a' + 10);
 773                       else
 774                         {
 775                           phase3_ungetc (c);
 776                           break;
 777                         }
 778                     }
 779                   c = n;
 780                 }
 781               else
 782                 switch (c)
 783                   {
 784                   case 'a': c = '\a'; break;
 785                   case 'b': c = '\b'; break;
 786                   case 't': c = '\t'; break;
 787                   case 'r': c = '\r'; break;
 788                   case 'n': c = '\n'; break;
 789                   case 'v': c = '\v'; break;
 790                   case 'f': c = '\f'; break;
 791                   }
 792             }
 793           if (buflen >= bufmax)
 794             {
 795               bufmax = 2 * bufmax + 10;
 796               buffer = xrealloc (buffer, bufmax * sizeof (int));
 797             }
 798           buffer[buflen++] = c;
 799         }
 800       if (c == UEOF)
 801         po_xerror (PO_SEVERITY_ERROR, NULL,
 802                    real_file_name, gram_pos.line_number, (size_t)(-1), false,
 803                    _("warning: unterminated string"));
 804     }
 805   else
 806     {
 807       /* Read a token outside quotes.  */
 808       if (is_quotable (c))
 809         po_xerror (PO_SEVERITY_ERROR, NULL,
 810                    real_file_name, gram_pos.line_number, (size_t)(-1), false,
 811                    _("warning: syntax error"));
 812       for (; c != UEOF && !is_quotable (c); c = phase4_getc ())
 813         {
 814           if (buflen >= bufmax)
 815             {
 816               bufmax = 2 * bufmax + 10;
 817               buffer = xrealloc (buffer, bufmax * sizeof (int));
 818             }
 819           buffer[buflen++] = c;
 820         }
 821     }
 822
 823   return conv_from_ucs4 (buffer, buflen);
 824 }
 825
 826
 827 /* Read a .strings file from a stream, and dispatch to the various
 828    abstract_catalog_reader_class_ty methods.  */
 829 static void
 830 stringtable_parse (abstract_catalog_reader_ty *pop, FILE *file,
 831                    const char *real_filename, const char *logical_filename)
 832 {
 833   fp = file;
 834   real_file_name = real_filename;
 835   gram_pos.file_name = xstrdup (real_file_name);
 836   gram_pos.line_number = 1;
 837   encoding = enc_undetermined;
 838   expect_fuzzy_msgstr_as_c_comment = false;
 839   expect_fuzzy_msgstr_as_cxx_comment = false;
 840
 841   for (;;)
 842     {
 843       char *msgid;
 844       lex_pos_ty msgid_pos;
 845       char *msgstr;
 846       lex_pos_ty msgstr_pos;
 847       int c;
 848
 849       /* Prepare for next msgid/msgstr pair.  */
 850       special_comment_reset ();
 851       next_is_obsolete = false;
 852       next_is_fuzzy = false;
 853       fuzzy_msgstr = NULL;
 854
 855       /* Read the key and all the comments preceding it.  */
 856       msgid = read_string (&msgid_pos);
 857       if (msgid == NULL)
 858         break;
 859
 860       special_comment_finish ();
 861
 862       /* Skip whitespace.  */
 863       do
 864         c = phase4_getc ();
 865       while (is_whitespace (c));
 866
 867       /* Expect a '=' or ';'.  */
 868       if (c == UEOF)
 869         {
 870           po_xerror (PO_SEVERITY_ERROR, NULL,
 871                      real_file_name, gram_pos.line_number, (size_t)(-1), false,
 872                      _("warning: unterminated key/value pair"));
 873           break;
 874         }
 875       if (c == ';')
 876         {
 877           /* "key"; is an abbreviation for "key"=""; and does not
 878              necessarily designate an untranslated entry.  */
 879           msgstr = xstrdup ("");
 880           msgstr_pos = msgid_pos;
 881           po_callback_message (NULL, msgid, &msgid_pos, NULL,
 882                                msgstr, strlen (msgstr) + 1, &msgstr_pos,
 883                                NULL, NULL, NULL,
 884                                false, next_is_obsolete);
 885         }
 886       else if (c == '=')
 887         {
 888           /* Read the value.  */
 889           msgstr = read_string (&msgstr_pos);
 890           if (msgstr == NULL)
 891             {
 892               po_xerror (PO_SEVERITY_ERROR, NULL,
 893                          real_file_name, gram_pos.line_number, (size_t)(-1),
 894                          false, _("warning: unterminated key/value pair"));
 895               break;
 896             }
 897
 898           /* Skip whitespace.  But for fuzzy key/value pairs, look for the
 899              tentative msgstr in the form of a C style comment.  */
 900           expect_fuzzy_msgstr_as_c_comment = next_is_fuzzy;
 901           do
 902             {
 903               c = phase4_getc ();
 904               if (fuzzy_msgstr != NULL)
 905                 expect_fuzzy_msgstr_as_c_comment = false;
 906             }
 907           while (is_whitespace (c));
 908           expect_fuzzy_msgstr_as_c_comment = false;
 909
 910           /* Expect a ';'.  */
 911           if (c == ';')
 912             {
 913               /* But for fuzzy key/value pairs, look for the tentative msgstr
 914                  in the form of a C++ style comment. */
 915               if (fuzzy_msgstr == NULL && next_is_fuzzy)
 916                 {
 917                   do
 918                     c = phase3_getc ();
 919                   while (c == ' ');
 920                   phase3_ungetc (c);
 921
 922                   expect_fuzzy_msgstr_as_cxx_comment = true;
 923                   c = phase4_getc ();
 924                   phase4_ungetc (c);
 925                   expect_fuzzy_msgstr_as_cxx_comment = false;
 926                 }
 927               if (fuzzy_msgstr != NULL && strcmp (msgstr, msgid) == 0)
 928                 msgstr = fuzzy_msgstr;
 929
 930               /* A key/value pair.  */
 931               po_callback_message (NULL, msgid, &msgid_pos, NULL,
 932                                    msgstr, strlen (msgstr) + 1, &msgstr_pos,
 933                                    NULL, NULL, NULL,
 934                                    false, next_is_obsolete);
 935             }
 936           else
 937             {
 938               po_xerror (PO_SEVERITY_ERROR, NULL,
 939                          real_file_name, gram_pos.line_number, (size_t)(-1),
 940                          false, _("\
 941 warning: syntax error, expected ';' after string"));
 942               break;
 943             }
 944         }
 945       else
 946         {
 947           po_xerror (PO_SEVERITY_ERROR, NULL,
 948                      real_file_name, gram_pos.line_number, (size_t)(-1), false,
 949                      _("\
 950 warning: syntax error, expected '=' or ';' after string"));
 951           break;
 952         }
 953     }
 954
 955   fp = NULL;
 956   real_file_name = NULL;
 957   gram_pos.line_number = 0;
 958 }
 959
 960 const struct catalog_input_format input_format_stringtable =
 961 {
 962   stringtable_parse,                    /* parse */
 963   true                                  /* produces_utf8 */
 964 };