gettext-tools/src/read-stringtable.c

   1 /* Reading NeXTstep/GNUstep .strings files.
   2    Copyright (C) 2003, 2005-2007, 2009 Free Software Foundation, Inc.
   3    Written by Bruno Haible <bruno@clisp.org>, 2003.
   4
   5    This program is free software: you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published by
   7    the Free Software Foundation; either version 3 of the License, or
   8    (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  17
  18 #ifdef HAVE_CONFIG_H
  19 # include <config.h>
  20 #endif
  21
  22 /* Specification.  */
  23 #include "read-stringtable.h"
  24
  25 #include <assert.h>
  26 #include <errno.h>
  27 #include <stdbool.h>
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31
  32 #include "error.h"
  33 #include "error-progname.h"
  34 #include "read-catalog-abstract.h"
  35 #include "xalloc.h"
  36 #include "xvasprintf.h"
  37 #include "po-xerror.h"
  38 #include "unistr.h"
  39 #include "gettext.h"
  40
  41 #define _(str) gettext (str)
  42
  43 /* The format of NeXTstep/GNUstep .strings files is documented in
  44      gnustep-base-1.8.0/Tools/make_strings/Using.txt
  45    and in the comments of method propertyListFromStringsFileFormat in
  46      gnustep-base-1.8.0/Source/NSString.m
  47    In summary, it's a Objective-C like file with pseudo-assignments of the form
  48           "key" = "value";
  49    where the key is the msgid and the value is the msgstr.
  50
  51    The implementation of the parser of .strings files is in
  52      gnustep-base-1.8.0/Source/NSString.m
  53      function GSPropertyListFromStringsFormat
  54      (indirectly called from NSBundle's method localizedStringForKey).
  55
  56    A test case is in
  57      gnustep-base-1.8.0/Testing/English.lproj/NXStringTable.example
  58  */
  59
  60 /* Handling of comments: We copy all comments from the .strings file to
  61    the PO file. This is not really needed; it's a service for translators
  62    who don't like PO files and prefer to maintain the .strings file.  */
  63
  64
  65 /* Real filename, used in error messages about the input file.  */
  66 static const char *real_file_name;
  67
  68 /* File name and line number.  */
  69 extern lex_pos_ty gram_pos;
  70
  71 /* The input file stream.  */
  72 static FILE *fp;
  73
  74
  75 /* Phase 1: Read a byte.
  76    Max. 4 pushback characters.  */
  77
  78 static unsigned char phase1_pushback[4];
  79 static int phase1_pushback_length;
  80
  81 static int
  82 phase1_getc ()
  83 {
  84   int c;
  85
  86   if (phase1_pushback_length)
  87     return phase1_pushback[--phase1_pushback_length];
  88
  89   c = getc (fp);
  90
  91   if (c == EOF)
  92     {
  93       if (ferror (fp))
  94         {
  95           const char *errno_description = strerror (errno);
  96           po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
  97                      xasprintf ("%s: %s",
  98                                 xasprintf (_("error while reading \"%s\""),
  99                                            real_file_name),
 100                                 errno_description));
 101         }
 102       return EOF;
 103     }
 104
 105   return c;
 106 }
 107
 108 static void
 109 phase1_ungetc (int c)
 110 {
 111   if (c != EOF)
 112     phase1_pushback[phase1_pushback_length++] = c;
 113 }
 114
 115
 116 /* Phase 2: Read an UCS-4 character.
 117    Max. 2 pushback characters.  */
 118
 119 /* End-of-file indicator for functions returning an UCS-4 character.  */
 120 #define UEOF -1
 121
 122 static int phase2_pushback[4];
 123 static int phase2_pushback_length;
 124
 125 /* The input file can be in Unicode encoding (UCS-2BE, UCS-2LE, UTF-8, each
 126    with a BOM!), or otherwise the locale-dependent default encoding is used.
 127    Since we don't want to depend on the locale here, we use ISO-8859-1
 128    instead.  */
 129 enum enc
 130 {
 131   enc_undetermined,
 132   enc_ucs2be,
 133   enc_ucs2le,
 134   enc_utf8,
 135   enc_iso8859_1
 136 };
 137 static enum enc encoding;
 138
 139 static int
 140 phase2_getc ()
 141 {
 142   if (phase2_pushback_length)
 143     return phase2_pushback[--phase2_pushback_length];
 144
 145   if (encoding == enc_undetermined)
 146     {
 147       /* Determine the input file's encoding.  */
 148       int c0, c1;
 149
 150       c0 = phase1_getc ();
 151       if (c0 == EOF)
 152         return UEOF;
 153       c1 = phase1_getc ();
 154       if (c1 == EOF)
 155         {
 156           phase1_ungetc (c0);
 157           encoding = enc_iso8859_1;
 158         }
 159       else if (c0 == 0xfe && c1 == 0xff)
 160         encoding = enc_ucs2be;
 161       else if (c0 == 0xff && c1 == 0xfe)
 162         encoding = enc_ucs2le;
 163       else
 164         {
 165           int c2;
 166
 167           c2 = phase1_getc ();
 168           if (c2 == EOF)
 169             {
 170               phase1_ungetc (c1);
 171               phase1_ungetc (c0);
 172               encoding = enc_iso8859_1;
 173             }
 174           else if (c0 == 0xef && c1 == 0xbb && c2 == 0xbf)
 175             encoding = enc_utf8;
 176           else
 177             {
 178               phase1_ungetc (c2);
 179               phase1_ungetc (c1);
 180               phase1_ungetc (c0);
 181               encoding = enc_iso8859_1;
 182             }
 183         }
 184     }
 185
 186   switch (encoding)
 187     {
 188     case enc_ucs2be:
 189       /* Read an UCS-2BE encoded character.  */
 190       {
 191         int c0, c1;
 192
 193         c0 = phase1_getc ();
 194         if (c0 == EOF)
 195           return UEOF;
 196         c1 = phase1_getc ();
 197         if (c1 == EOF)
 198           return UEOF;
 199         return (c0 << 8) + c1;
 200       }
 201
 202     case enc_ucs2le:
 203       /* Read an UCS-2LE encoded character.  */
 204       {
 205         int c0, c1;
 206
 207         c0 = phase1_getc ();
 208         if (c0 == EOF)
 209           return UEOF;
 210         c1 = phase1_getc ();
 211         if (c1 == EOF)
 212           return UEOF;
 213         return c0 + (c1 << 8);
 214       }
 215
 216     case enc_utf8:
 217       /* Read an UTF-8 encoded character.  */
 218       {
 219         unsigned char buf[6];
 220         unsigned int count;
 221         int c;
 222         ucs4_t uc;
 223
 224         c = phase1_getc ();
 225         if (c == EOF)
 226           return UEOF;
 227         buf[0] = c;
 228         count = 1;
 229
 230         if (buf[0] >= 0xc0)
 231           {
 232             c = phase1_getc ();
 233             if (c == EOF)
 234               return UEOF;
 235             buf[1] = c;
 236             count = 2;
 237
 238             if (buf[0] >= 0xe0
 239                 && ((buf[1] ^ 0x80) < 0x40))
 240               {
 241                 c = phase1_getc ();
 242                 if (c == EOF)
 243                   return UEOF;
 244                 buf[2] = c;
 245                 count = 3;
 246
 247                 if (buf[0] >= 0xf0
 248                     && ((buf[2] ^ 0x80) < 0x40))
 249                   {
 250                     c = phase1_getc ();
 251                     if (c == EOF)
 252                       return UEOF;
 253                     buf[3] = c;
 254                     count = 4;
 255
 256                     if (buf[0] >= 0xf8
 257                         && ((buf[3] ^ 0x80) < 0x40))
 258                       {
 259                         c = phase1_getc ();
 260                         if (c == EOF)
 261                           return UEOF;
 262                         buf[4] = c;
 263                         count = 5;
 264
 265                         if (buf[0] >= 0xfc
 266                             && ((buf[4] ^ 0x80) < 0x40))
 267                           {
 268                             c = phase1_getc ();
 269                             if (c == EOF)
 270                               return UEOF;
 271                             buf[5] = c;
 272                             count = 6;
 273                           }
 274                       }
 275                   }
 276               }
 277           }
 278
 279         u8_mbtouc (&uc, buf, count);
 280         return uc;
 281       }
 282
 283     case enc_iso8859_1:
 284       /* Read an ISO-8859-1 encoded character.  */
 285       {
 286         int c = phase1_getc ();
 287
 288         if (c == EOF)
 289           return UEOF;
 290         return c;
 291       }
 292
 293     default:
 294       abort ();
 295     }
 296 }
 297
 298 static void
 299 phase2_ungetc (int c)
 300 {
 301   if (c != UEOF)
 302     phase2_pushback[phase2_pushback_length++] = c;
 303 }
 304
 305
 306 /* Phase 3: Read an UCS-4 character, with line number handling.  */
 307
 308 static int
 309 phase3_getc ()
 310 {
 311   int c = phase2_getc ();
 312
 313   if (c == '\n')
 314     gram_pos.line_number++;
 315
 316   return c;
 317 }
 318
 319 static void
 320 phase3_ungetc (int c)
 321 {
 322   if (c == '\n')
 323     --gram_pos.line_number;
 324   phase2_ungetc (c);
 325 }
 326
 327
 328 /* Convert from UCS-4 to UTF-8.  */
 329 static char *
 330 conv_from_ucs4 (const int *buffer, size_t buflen)
 331 {
 332   unsigned char *utf8_string;
 333   size_t pos;
 334   unsigned char *q;
 335
 336   /* Each UCS-4 word needs 6 bytes at worst.  */
 337   utf8_string = XNMALLOC (6 * buflen + 1, unsigned char);
 338
 339   for (pos = 0, q = utf8_string; pos < buflen; )
 340     {
 341       unsigned int uc;
 342       int n;
 343
 344       uc = buffer[pos++];
 345       n = u8_uctomb (q, uc, 6);
 346       assert (n > 0);
 347       q += n;
 348     }
 349   *q = '\0';
 350   assert (q - utf8_string <= 6 * buflen);
 351
 352   return (char *) utf8_string;
 353 }
 354
 355
 356 /* Parse a string enclosed in double-quotes.  Input is UCS-4 encoded.
 357    Return the string in UTF-8 encoding, or NULL if the input doesn't represent
 358    a valid string enclosed in double-quotes.  */
 359 static char *
 360 parse_escaped_string (const int *string, size_t length)
 361 {
 362   static int *buffer;
 363   static size_t bufmax;
 364   static size_t buflen;
 365   const int *string_limit = string + length;
 366   int c;
 367
 368   if (string == string_limit)
 369     return NULL;
 370   c = *string++;
 371   if (c != '"')
 372     return NULL;
 373   buflen = 0;
 374   for (;;)
 375     {
 376       if (string == string_limit)
 377         return NULL;
 378       c = *string++;
 379       if (c == '"')
 380         break;
 381       if (c == '\\')
 382         {
 383           if (string == string_limit)
 384             return NULL;
 385           c = *string++;
 386           if (c >= '0' && c <= '7')
 387             {
 388               unsigned int n = 0;
 389               int j = 0;
 390               for (;;)
 391                 {
 392                   n = n * 8 + (c - '0');
 393                   if (++j == 3)
 394                     break;
 395                   if (string == string_limit)
 396                     break;
 397                   c = *string;
 398                   if (!(c >= '0' && c <= '7'))
 399                     break;
 400                   string++;
 401                 }
 402               c = n;
 403             }
 404           else if (c == 'u' || c == 'U')
 405             {
 406               unsigned int n = 0;
 407               int j;
 408               for (j = 0; j < 4; j++)
 409                 {
 410                   if (string == string_limit)
 411                     break;
 412                   c = *string;
 413                   if (c >= '0' && c <= '9')
 414                     n = n * 16 + (c - '0');
 415                   else if (c >= 'A' && c <= 'F')
 416                     n = n * 16 + (c - 'A' + 10);
 417                   else if (c >= 'a' && c <= 'f')
 418                     n = n * 16 + (c - 'a' + 10);
 419                   else
 420                     break;
 421                   string++;
 422                 }
 423               c = n;
 424             }
 425           else
 426             switch (c)
 427               {
 428               case 'a': c = '\a'; break;
 429               case 'b': c = '\b'; break;
 430               case 't': c = '\t'; break;
 431               case 'r': c = '\r'; break;
 432               case 'n': c = '\n'; break;
 433               case 'v': c = '\v'; break;
 434               case 'f': c = '\f'; break;
 435               }
 436         }
 437       if (buflen >= bufmax)
 438         {
 439           bufmax = 2 * bufmax + 10;
 440           buffer = xrealloc (buffer, bufmax * sizeof (int));
 441         }
 442       buffer[buflen++] = c;
 443     }
 444
 445   return conv_from_ucs4 (buffer, buflen);
 446 }
 447
 448
 449 /* Accumulating flag comments.  */
 450
 451 static char *special_comment;
 452
 453 static inline void
 454 special_comment_reset ()
 455 {
 456   if (special_comment != NULL)
 457     free (special_comment);
 458   special_comment = NULL;
 459 }
 460
 461 static void
 462 special_comment_add (const char *flag)
 463 {
 464   if (special_comment == NULL)
 465     special_comment = xstrdup (flag);
 466   else
 467     {
 468       size_t total_len = strlen (special_comment) + 2 + strlen (flag) + 1;
 469       special_comment = xrealloc (special_comment, total_len);
 470       strcat (special_comment, ", ");
 471       strcat (special_comment, flag);
 472     }
 473 }
 474
 475 static inline void
 476 special_comment_finish ()
 477 {
 478   if (special_comment != NULL)
 479     {
 480       po_callback_comment_special (special_comment);
 481       free (special_comment);
 482       special_comment = NULL;
 483     }
 484 }
 485
 486
 487 /* Accumulating comments.  */
 488
 489 static int *buffer;
 490 static size_t bufmax;
 491 static size_t buflen;
 492 static bool next_is_obsolete;
 493 static bool next_is_fuzzy;
 494 static char *fuzzy_msgstr;
 495 static bool expect_fuzzy_msgstr_as_c_comment;
 496 static bool expect_fuzzy_msgstr_as_cxx_comment;
 497
 498 static inline void
 499 comment_start ()
 500 {
 501   buflen = 0;
 502 }
 503
 504 static inline void
 505 comment_add (int c)
 506 {
 507   if (buflen >= bufmax)
 508     {
 509       bufmax = 2 * bufmax + 10;
 510       buffer = xrealloc (buffer, bufmax * sizeof (int));
 511     }
 512   buffer[buflen++] = c;
 513 }
 514
 515 static inline void
 516 comment_line_end (size_t chars_to_remove, bool test_for_fuzzy_msgstr)
 517 {
 518   char *line;
 519
 520   buflen -= chars_to_remove;
 521   /* Drop trailing white space, but not EOLs.  */
 522   while (buflen >= 1
 523          && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
 524     --buflen;
 525
 526   /* At special positions we interpret a comment of the form
 527        = "escaped string"
 528      with an optional trailing semicolon as being the fuzzy msgstr, not a
 529      regular comment.  */
 530   if (test_for_fuzzy_msgstr
 531       && buflen > 2 && buffer[0] == '=' && buffer[1] == ' '
 532       && (fuzzy_msgstr =
 533           parse_escaped_string (buffer + 2,
 534                                 buflen - (buffer[buflen - 1] == ';') - 2)))
 535     return;
 536
 537   line = conv_from_ucs4 (buffer, buflen);
 538
 539   if (strcmp (line, "Flag: untranslated") == 0)
 540     {
 541       special_comment_add ("fuzzy");
 542       next_is_fuzzy = true;
 543     }
 544   else if (strcmp (line, "Flag: unmatched") == 0)
 545     next_is_obsolete = true;
 546   else if (strlen (line) >= 6 && memcmp (line, "Flag: ", 6) == 0)
 547     special_comment_add (line + 6);
 548   else if (strlen (line) >= 9 && memcmp (line, "Comment: ", 9) == 0)
 549     /* A comment extracted from the source.  */
 550     po_callback_comment_dot (line + 9);
 551   else
 552     {
 553       char *last_colon;
 554       unsigned long number;
 555       char *endp;
 556
 557       if (strlen (line) >= 6 && memcmp (line, "File: ", 6) == 0
 558           && (last_colon = strrchr (line + 6, ':')) != NULL
 559           && *(last_colon + 1) != '\0'
 560           && (number = strtoul (last_colon + 1, &endp, 10), *endp == '\0'))
 561         {
 562           /* A "File: <filename>:<number>" type comment.  */
 563           *last_colon = '\0';
 564           po_callback_comment_filepos (line + 6, number);
 565         }
 566       else
 567         po_callback_comment (line);
 568     }
 569 }
 570
 571
 572 /* Phase 4: Replace each comment that is not inside a string with a space
 573    character.  */
 574
 575 static int
 576 phase4_getc ()
 577 {
 578   int c;
 579
 580   c = phase3_getc ();
 581   if (c != '/')
 582     return c;
 583   c = phase3_getc ();
 584   switch (c)
 585     {
 586     default:
 587       phase3_ungetc (c);
 588       return '/';
 589
 590     case '*':
 591       /* C style comment.  */
 592       {
 593         bool last_was_star;
 594         size_t trailing_stars;
 595         bool seen_newline;
 596
 597         comment_start ();
 598         last_was_star = false;
 599         trailing_stars = 0;
 600         seen_newline = false;
 601         /* Drop additional stars at the beginning of the comment.  */
 602         for (;;)
 603           {
 604             c = phase3_getc ();
 605             if (c != '*')
 606               break;
 607             last_was_star = true;
 608           }
 609         phase3_ungetc (c);
 610         for (;;)
 611           {
 612             c = phase3_getc ();
 613             if (c == UEOF)
 614               break;
 615             /* We skip all leading white space, but not EOLs.  */
 616             if (!(buflen == 0 && (c == ' ' || c == '\t')))
 617               comment_add (c);
 618             switch (c)
 619               {
 620               case '\n':
 621                 seen_newline = true;
 622                 comment_line_end (1, false);
 623                 comment_start ();
 624                 last_was_star = false;
 625                 trailing_stars = 0;
 626                 continue;
 627
 628               case '*':
 629                 last_was_star = true;
 630                 trailing_stars++;
 631                 continue;
 632
 633               case '/':
 634                 if (last_was_star)
 635                   {
 636                     /* Drop additional stars at the end of the comment.  */
 637                     comment_line_end (trailing_stars + 1,
 638                                       expect_fuzzy_msgstr_as_c_comment
 639                                       && !seen_newline);
 640                     break;
 641                   }
 642                 /* FALLTHROUGH */
 643
 644               default:
 645                 last_was_star = false;
 646                 trailing_stars = 0;
 647                 continue;
 648               }
 649             break;
 650           }
 651         return ' ';
 652       }
 653
 654     case '/':
 655       /* C++ style comment.  */
 656       comment_start ();
 657       for (;;)
 658         {
 659           c = phase3_getc ();
 660           if (c == '\n' || c == UEOF)
 661             break;
 662           /* We skip all leading white space, but not EOLs.  */
 663           if (!(buflen == 0 && (c == ' ' || c == '\t')))
 664             comment_add (c);
 665         }
 666       comment_line_end (0, expect_fuzzy_msgstr_as_cxx_comment);
 667       return '\n';
 668     }
 669 }
 670
 671 static inline void
 672 phase4_ungetc (int c)
 673 {
 674   phase3_ungetc (c);
 675 }
 676
 677
 678 /* Return true if a character is considered as whitespace.  */
 679 static bool
 680 is_whitespace (int c)
 681 {
 682   return (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f'
 683           || c == '\b');
 684 }
 685
 686 /* Return true if a character needs quoting, i.e. cannot be used in unquoted
 687    tokens.  */
 688 static bool
 689 is_quotable (int c)
 690 {
 691   if ((c >= '0' && c <= '9')
 692       || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
 693     return false;
 694   switch (c)
 695     {
 696     case '!': case '#': case '$': case '%': case '&': case '*':
 697     case '+': case '-': case '.': case '/': case ':': case '?':
 698     case '@': case '|': case '~': case '_': case '^':
 699       return false;
 700     default:
 701       return true;
 702     }
 703 }
 704
 705
 706 /* Read a key or value string.
 707    Return the string in UTF-8 encoding, or NULL if no string is seen.
 708    Return the start position of the string in *pos.  */
 709 static char *
 710 read_string (lex_pos_ty *pos)
 711 {
 712   static int *buffer;
 713   static size_t bufmax;
 714   static size_t buflen;
 715   int c;
 716
 717   /* Skip whitespace before the string.  */
 718   do
 719     c = phase4_getc ();
 720   while (is_whitespace (c));
 721
 722   if (c == UEOF)
 723     /* No more string.  */
 724     return NULL;
 725
 726   *pos = gram_pos;
 727   buflen = 0;
 728   if (c == '"')
 729     {
 730       /* Read a string enclosed in double-quotes.  */
 731       for (;;)
 732         {
 733           c = phase3_getc ();
 734           if (c == UEOF || c == '"')
 735             break;
 736           if (c == '\\')
 737             {
 738               c = phase3_getc ();
 739               if (c == UEOF)
 740                 break;
 741               if (c >= '0' && c <= '7')
 742                 {
 743                   unsigned int n = 0;
 744                   int j = 0;
 745                   for (;;)
 746                     {
 747                       n = n * 8 + (c - '0');
 748                       if (++j == 3)
 749                         break;
 750                       c = phase3_getc ();
 751                       if (!(c >= '0' && c <= '7'))
 752                         {
 753                           phase3_ungetc (c);
 754                           break;
 755                         }
 756                     }
 757                   c = n;
 758                 }
 759               else if (c == 'u' || c == 'U')
 760                 {
 761                   unsigned int n = 0;
 762                   int j;
 763                   for (j = 0; j < 4; j++)
 764                     {
 765                       c = phase3_getc ();
 766                       if (c >= '0' && c <= '9')
 767                         n = n * 16 + (c - '0');
 768                       else if (c >= 'A' && c <= 'F')
 769                         n = n * 16 + (c - 'A' + 10);
 770                       else if (c >= 'a' && c <= 'f')
 771                         n = n * 16 + (c - 'a' + 10);
 772                       else
 773                         {
 774                           phase3_ungetc (c);
 775                           break;
 776                         }
 777                     }
 778                   c = n;
 779                 }
 780               else
 781                 switch (c)
 782                   {
 783                   case 'a': c = '\a'; break;
 784                   case 'b': c = '\b'; break;
 785                   case 't': c = '\t'; break;
 786                   case 'r': c = '\r'; break;
 787                   case 'n': c = '\n'; break;
 788                   case 'v': c = '\v'; break;
 789                   case 'f': c = '\f'; break;
 790                   }
 791             }
 792           if (buflen >= bufmax)
 793             {
 794               bufmax = 2 * bufmax + 10;
 795               buffer = xrealloc (buffer, bufmax * sizeof (int));
 796             }
 797           buffer[buflen++] = c;
 798         }
 799       if (c == UEOF)
 800         po_xerror (PO_SEVERITY_ERROR, NULL,
 801                    real_file_name, gram_pos.line_number, (size_t)(-1), false,
 802                    _("warning: unterminated string"));
 803     }
 804   else
 805     {
 806       /* Read a token outside quotes.  */
 807       if (is_quotable (c))
 808         po_xerror (PO_SEVERITY_ERROR, NULL,
 809                    real_file_name, gram_pos.line_number, (size_t)(-1), false,
 810                    _("warning: syntax error"));
 811       for (; c != UEOF && !is_quotable (c); c = phase4_getc ())
 812         {
 813           if (buflen >= bufmax)
 814             {
 815               bufmax = 2 * bufmax + 10;
 816               buffer = xrealloc (buffer, bufmax * sizeof (int));
 817             }
 818           buffer[buflen++] = c;
 819         }
 820     }
 821
 822   return conv_from_ucs4 (buffer, buflen);
 823 }
 824
 825
 826 /* Read a .strings file from a stream, and dispatch to the various
 827    abstract_catalog_reader_class_ty methods.  */
 828 static void
 829 stringtable_parse (abstract_catalog_reader_ty *pop, FILE *file,
 830                    const char *real_filename, const char *logical_filename)
 831 {
 832   fp = file;
 833   real_file_name = real_filename;
 834   gram_pos.file_name = xstrdup (real_file_name);
 835   gram_pos.line_number = 1;
 836   encoding = enc_undetermined;
 837   expect_fuzzy_msgstr_as_c_comment = false;
 838   expect_fuzzy_msgstr_as_cxx_comment = false;
 839
 840   for (;;)
 841     {
 842       char *msgid;
 843       lex_pos_ty msgid_pos;
 844       char *msgstr;
 845       lex_pos_ty msgstr_pos;
 846       int c;
 847
 848       /* Prepare for next msgid/msgstr pair.  */
 849       special_comment_reset ();
 850       next_is_obsolete = false;
 851       next_is_fuzzy = false;
 852       fuzzy_msgstr = NULL;
 853
 854       /* Read the key and all the comments preceding it.  */
 855       msgid = read_string (&msgid_pos);
 856       if (msgid == NULL)
 857         break;
 858
 859       special_comment_finish ();
 860
 861       /* Skip whitespace.  */
 862       do
 863         c = phase4_getc ();
 864       while (is_whitespace (c));
 865
 866       /* Expect a '=' or ';'.  */
 867       if (c == UEOF)
 868         {
 869           po_xerror (PO_SEVERITY_ERROR, NULL,
 870                      real_file_name, gram_pos.line_number, (size_t)(-1), false,
 871                      _("warning: unterminated key/value pair"));
 872           break;
 873         }
 874       if (c == ';')
 875         {
 876           /* "key"; is an abbreviation for "key"=""; and does not
 877              necessarily designate an untranslated entry.  */
 878           msgstr = xstrdup ("");
 879           msgstr_pos = msgid_pos;
 880           po_callback_message (NULL, msgid, &msgid_pos, NULL,
 881                                msgstr, strlen (msgstr) + 1, &msgstr_pos,
 882                                NULL, NULL, NULL,
 883                                false, next_is_obsolete);
 884         }
 885       else if (c == '=')
 886         {
 887           /* Read the value.  */
 888           msgstr = read_string (&msgstr_pos);
 889           if (msgstr == NULL)
 890             {
 891               po_xerror (PO_SEVERITY_ERROR, NULL,
 892                          real_file_name, gram_pos.line_number, (size_t)(-1),
 893                          false, _("warning: unterminated key/value pair"));
 894               break;
 895             }
 896
 897           /* Skip whitespace.  But for fuzzy key/value pairs, look for the
 898              tentative msgstr in the form of a C style comment.  */
 899           expect_fuzzy_msgstr_as_c_comment = next_is_fuzzy;
 900           do
 901             {
 902               c = phase4_getc ();
 903               if (fuzzy_msgstr != NULL)
 904                 expect_fuzzy_msgstr_as_c_comment = false;
 905             }
 906           while (is_whitespace (c));
 907           expect_fuzzy_msgstr_as_c_comment = false;
 908
 909           /* Expect a ';'.  */
 910           if (c == ';')
 911             {
 912               /* But for fuzzy key/value pairs, look for the tentative msgstr
 913                  in the form of a C++ style comment. */
 914               if (fuzzy_msgstr == NULL && next_is_fuzzy)
 915                 {
 916                   do
 917                     c = phase3_getc ();
 918                   while (c == ' ');
 919                   phase3_ungetc (c);
 920
 921                   expect_fuzzy_msgstr_as_cxx_comment = true;
 922                   c = phase4_getc ();
 923                   phase4_ungetc (c);
 924                   expect_fuzzy_msgstr_as_cxx_comment = false;
 925                 }
 926               if (fuzzy_msgstr != NULL && strcmp (msgstr, msgid) == 0)
 927                 msgstr = fuzzy_msgstr;
 928
 929               /* A key/value pair.  */
 930               po_callback_message (NULL, msgid, &msgid_pos, NULL,
 931                                    msgstr, strlen (msgstr) + 1, &msgstr_pos,
 932                                    NULL, NULL, NULL,
 933                                    false, next_is_obsolete);
 934             }
 935           else
 936             {
 937               po_xerror (PO_SEVERITY_ERROR, NULL,
 938                          real_file_name, gram_pos.line_number, (size_t)(-1),
 939                          false, _("\
 940 warning: syntax error, expected ';' after string"));
 941               break;
 942             }
 943         }
 944       else
 945         {
 946           po_xerror (PO_SEVERITY_ERROR, NULL,
 947                      real_file_name, gram_pos.line_number, (size_t)(-1), false,
 948                      _("\
 949 warning: syntax error, expected '=' or ';' after string"));
 950           break;
 951         }
 952     }
 953
 954   fp = NULL;
 955   real_file_name = NULL;
 956   gram_pos.line_number = 0;
 957 }
 958
 959 const struct catalog_input_format input_format_stringtable =
 960 {
 961   stringtable_parse,                    /* parse */
 962   true                                  /* produces_utf8 */
 963 };