gettext-tools/src/x-librep.c

   1 /* xgettext librep backend.
   2    Copyright (C) 2001-2003, 2005-2009 Free Software Foundation, Inc.
   3
   4    This file was written by Bruno Haible <haible@clisp.cons.org>, 2001.
   5
   6    This program is free software: you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  18
  19 #ifdef HAVE_CONFIG_H
  20 # include "config.h"
  21 #endif
  22
  23 /* Specification.  */
  24 #include "x-librep.h"
  25
  26 #include <errno.h>
  27 #include <stdbool.h>
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31
  32 #include "c-ctype.h"
  33 #include "message.h"
  34 #include "xgettext.h"
  35 #include "error.h"
  36 #include "xalloc.h"
  37 #include "hash.h"
  38 #include "gettext.h"
  39
  40 #define _(s) gettext(s)
  41
  42
  43 /* Summary of librep syntax:
  44    - ';' starts a comment until end of line.
  45    - Block comments start with '#|' and end with '|#'.
  46    - Numbers are constituted of an optional prefix (#b, #B for binary,
  47      #o, #O for octal, #d, #D for decimal, #x, #X for hexadecimal,
  48      #e, #E for exact, #i, #I for inexact), an optional sign (+ or -), and
  49      the digits.
  50    - Characters are written as '?' followed by the character, possibly
  51      with an escape sequence, for examples '?a', '?\n', '?\177'.
  52    - Strings are delimited by double quotes. Backslash introduces an escape
  53      sequence. The following are understood: '\n', '\r', '\f', '\t', '\a',
  54      '\\', '\^C', '\012' (octal), '\x12' (hexadecimal).
  55    - Symbols: can contain meta-characters - whitespace or any from ()[]'";|\' -
  56      if preceded by backslash or enclosed in |...|.
  57    - Keywords: written as #:SYMBOL.
  58    - () delimit lists.
  59    - [] delimit vectors.
  60    The reader is implemented in librep-0.14/src/lisp.c.  */
  61
  62
  63 /* ====================== Keyword set customization.  ====================== */
  64
  65 /* If true extract all strings.  */
  66 static bool extract_all = false;
  67
  68 static hash_table keywords;
  69 static bool default_keywords = true;
  70
  71
  72 void
  73 x_librep_extract_all ()
  74 {
  75   extract_all = true;
  76 }
  77
  78
  79 void
  80 x_librep_keyword (const char *name)
  81 {
  82   if (name == NULL)
  83     default_keywords = false;
  84   else
  85     {
  86       const char *end;
  87       struct callshape shape;
  88       const char *colon;
  89
  90       if (keywords.table == NULL)
  91         hash_init (&keywords, 100);
  92
  93       split_keywordspec (name, &end, &shape);
  94
  95       /* The characters between name and end should form a valid Lisp
  96          symbol.  */
  97       colon = strchr (name, ':');
  98       if (colon == NULL || colon >= end)
  99         insert_keyword_callshape (&keywords, name, end - name, &shape);
 100     }
 101 }
 102
 103 /* Finish initializing the keywords hash table.
 104    Called after argument processing, before each file is processed.  */
 105 static void
 106 init_keywords ()
 107 {
 108   if (default_keywords)
 109     {
 110       /* When adding new keywords here, also update the documentation in
 111          xgettext.texi!  */
 112       x_librep_keyword ("_");
 113       default_keywords = false;
 114     }
 115 }
 116
 117 void
 118 init_flag_table_librep ()
 119 {
 120   xgettext_record_flag ("_:1:pass-librep-format");
 121   xgettext_record_flag ("format:2:librep-format");
 122 }
 123
 124
 125 /* ======================== Reading of characters.  ======================== */
 126
 127 /* Real filename, used in error messages about the input file.  */
 128 static const char *real_file_name;
 129
 130 /* Logical filename and line number, used to label the extracted messages.  */
 131 static char *logical_file_name;
 132 static int line_number;
 133
 134 /* The input file stream.  */
 135 static FILE *fp;
 136
 137
 138 /* Fetch the next character from the input file.  */
 139 static int
 140 do_getc ()
 141 {
 142   int c = getc (fp);
 143
 144   if (c == EOF)
 145     {
 146       if (ferror (fp))
 147         error (EXIT_FAILURE, errno, _("\
 148 error while reading \"%s\""), real_file_name);
 149     }
 150   else if (c == '\n')
 151    line_number++;
 152
 153   return c;
 154 }
 155
 156 /* Put back the last fetched character, not EOF.  */
 157 static void
 158 do_ungetc (int c)
 159 {
 160   if (c == '\n')
 161     line_number--;
 162   ungetc (c, fp);
 163 }
 164
 165
 166 /* ========================== Reading of tokens.  ========================== */
 167
 168
 169 /* A token consists of a sequence of characters.  */
 170 struct token
 171 {
 172   int allocated;                /* number of allocated 'token_char's */
 173   int charcount;                /* number of used 'token_char's */
 174   char *chars;                  /* the token's constituents */
 175 };
 176
 177 /* Initialize a 'struct token'.  */
 178 static inline void
 179 init_token (struct token *tp)
 180 {
 181   tp->allocated = 10;
 182   tp->chars = XNMALLOC (tp->allocated, char);
 183   tp->charcount = 0;
 184 }
 185
 186 /* Free the memory pointed to by a 'struct token'.  */
 187 static inline void
 188 free_token (struct token *tp)
 189 {
 190   free (tp->chars);
 191 }
 192
 193 /* Ensure there is enough room in the token for one more character.  */
 194 static inline void
 195 grow_token (struct token *tp)
 196 {
 197   if (tp->charcount == tp->allocated)
 198     {
 199       tp->allocated *= 2;
 200       tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
 201     }
 202 }
 203
 204 /* Read the next token.  If 'first' is given, it points to the first
 205    character, which has already been read.  Returns true for a symbol,
 206    false for a number.  */
 207 static bool
 208 read_token (struct token *tp, const int *first)
 209 {
 210   int c;
 211   /* Variables for speculative number parsing:  */
 212   int radix = -1;
 213   int nfirst = 0;
 214   bool exact = true;
 215   bool rational = false;
 216   bool exponent = false;
 217   bool had_sign = false;
 218   bool expecting_prefix = false;
 219
 220   init_token (tp);
 221
 222   if (first)
 223     c = *first;
 224   else
 225     c = do_getc ();
 226
 227   for (;; c = do_getc ())
 228     {
 229       switch (c)
 230         {
 231         case EOF:
 232           goto done;
 233
 234         case ' ': case '\t': case '\n': case '\f': case '\r':
 235         case '(': case ')': case '[': case ']':
 236         case '\'': case '"': case ';': case ',': case '`':
 237           goto done;
 238
 239         case '\\':
 240           radix = 0;
 241           c = do_getc ();
 242           if (c == EOF)
 243             /* Invalid, but be tolerant.  */
 244             break;
 245           grow_token (tp);
 246           tp->chars[tp->charcount++] = c;
 247           break;
 248
 249         case '|':
 250           radix = 0;
 251           for (;;)
 252             {
 253               c = do_getc ();
 254               if (c == EOF || c == '|')
 255                 break;
 256               grow_token (tp);
 257               tp->chars[tp->charcount++] = c;
 258             }
 259           break;
 260
 261         default:
 262           if (radix != 0)
 263             {
 264               if (expecting_prefix)
 265                 {
 266                   switch (c)
 267                     {
 268                     case 'B': case 'b':
 269                       radix = 2;
 270                       break;
 271                     case 'O': case 'o':
 272                       radix = 8;
 273                       break;
 274                     case 'D': case 'd':
 275                       radix = 10;
 276                       break;
 277                     case 'X': case 'x':
 278                       radix = 16;
 279                       break;
 280                     case 'E': case 'e':
 281                     case 'I': case 'i':
 282                       break;
 283                     default:
 284                       radix = 0;
 285                       break;
 286                     }
 287                   expecting_prefix = false;
 288                   nfirst = tp->charcount + 1;
 289                 }
 290               else if (tp->charcount == nfirst
 291                        && (c == '+' || c == '-' || c == '#'))
 292                 {
 293                   if (c == '#')
 294                     {
 295                       if (had_sign)
 296                         radix = 0;
 297                       else
 298                         expecting_prefix = true;
 299                     }
 300                   else
 301                     had_sign = true;
 302                   nfirst = tp->charcount + 1;
 303                 }
 304               else
 305                 {
 306                   switch (radix)
 307                     {
 308                     case -1:
 309                       if (c == '.')
 310                         {
 311                           radix = 10;
 312                           exact = false;
 313                         }
 314                       else if (!(c >= '0' && c <= '9'))
 315                         radix = 0;
 316                       else if (c == '0')
 317                         radix = 1;
 318                       else
 319                         radix = 10;
 320                       break;
 321
 322                     case 1:
 323                       switch (c)
 324                         {
 325                         case 'X': case 'x':
 326                           radix = 16;
 327                           nfirst = tp->charcount + 1;
 328                           break;
 329                         case '0': case '1': case '2': case '3': case '4':
 330                         case '5': case '6': case '7':
 331                           radix = 8;
 332                           nfirst = tp->charcount;
 333                           break;
 334                         case '.': case 'E': case 'e':
 335                           radix = 10;
 336                           exact = false;
 337                           break;
 338                         case '/':
 339                           radix = 10;
 340                           rational = true;
 341                           break;
 342                         default:
 343                           radix = 0;
 344                           break;
 345                         }
 346                       break;
 347
 348                     default:
 349                       switch (c)
 350                         {
 351                         case '.':
 352                           if (exact && radix == 10 && !rational)
 353                             exact = false;
 354                           else
 355                             radix = 0;
 356                           break;
 357                         case '/':
 358                           if (exact && !rational)
 359                             rational = true;
 360                           else
 361                             radix = 0;
 362                           break;
 363                         case 'E': case 'e':
 364                           if (radix == 10)
 365                             {
 366                               if (!rational && !exponent)
 367                                 {
 368                                   exponent = true;
 369                                   exact = false;
 370                                 }
 371                               else
 372                                 radix = 0;
 373                               break;
 374                             }
 375                           /*FALLTHROUGH*/
 376                         default:
 377                           if (exponent && (c == '+' || c == '-'))
 378                             break;
 379                           if ((radix <= 10
 380                                && !(c >= '0' && c <= '0' + radix - 1))
 381                               || (radix == 16 && !c_isxdigit (c)))
 382                             radix = 0;
 383                           break;
 384                         }
 385                       break;
 386                     }
 387                 }
 388             }
 389           else
 390             {
 391               if (c == '#')
 392                 goto done;
 393             }
 394           grow_token (tp);
 395           tp->chars[tp->charcount++] = c;
 396         }
 397     }
 398  done:
 399   if (c != EOF)
 400     do_ungetc (c);
 401   if (radix > 0 && nfirst < tp->charcount)
 402     return false; /* number */
 403   else
 404     return true; /* symbol */
 405 }
 406
 407
 408 /* ========================= Accumulating comments ========================= */
 409
 410
 411 static char *buffer;
 412 static size_t bufmax;
 413 static size_t buflen;
 414
 415 static inline void
 416 comment_start ()
 417 {
 418   buflen = 0;
 419 }
 420
 421 static inline void
 422 comment_add (int c)
 423 {
 424   if (buflen >= bufmax)
 425     {
 426       bufmax = 2 * bufmax + 10;
 427       buffer = xrealloc (buffer, bufmax);
 428     }
 429   buffer[buflen++] = c;
 430 }
 431
 432 static inline void
 433 comment_line_end (size_t chars_to_remove)
 434 {
 435   buflen -= chars_to_remove;
 436   while (buflen >= 1
 437          && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
 438     --buflen;
 439   if (chars_to_remove == 0 && buflen >= bufmax)
 440     {
 441       bufmax = 2 * bufmax + 10;
 442       buffer = xrealloc (buffer, bufmax);
 443     }
 444   buffer[buflen] = '\0';
 445   savable_comment_add (buffer);
 446 }
 447
 448
 449 /* These are for tracking whether comments count as immediately before
 450    keyword.  */
 451 static int last_comment_line;
 452 static int last_non_comment_line;
 453
 454
 455 /* ========================= Accumulating messages ========================= */
 456
 457
 458 static message_list_ty *mlp;
 459
 460
 461 /* ============== Reading of objects.  See CLHS 2 "Syntax".  ============== */
 462
 463
 464 /* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings.
 465    Other objects need not to be represented precisely.  */
 466 enum object_type
 467 {
 468   t_symbol,     /* symbol */
 469   t_string,     /* string */
 470   t_other,      /* other kind of real object */
 471   t_dot,        /* '.' pseudo object */
 472   t_close,      /* ')' or ']' pseudo object */
 473   t_eof         /* EOF marker */
 474 };
 475
 476 struct object
 477 {
 478   enum object_type type;
 479   struct token *token;          /* for t_symbol and t_string */
 480   int line_number_at_start;     /* for t_string */
 481 };
 482
 483 /* Free the memory pointed to by a 'struct object'.  */
 484 static inline void
 485 free_object (struct object *op)
 486 {
 487   if (op->type == t_symbol || op->type == t_string)
 488     {
 489       free_token (op->token);
 490       free (op->token);
 491     }
 492 }
 493
 494 /* Convert a t_symbol/t_string token to a char*.  */
 495 static char *
 496 string_of_object (const struct object *op)
 497 {
 498   char *str;
 499   int n;
 500
 501   if (!(op->type == t_symbol || op->type == t_string))
 502     abort ();
 503   n = op->token->charcount;
 504   str = XNMALLOC (n + 1, char);
 505   memcpy (str, op->token->chars, n);
 506   str[n] = '\0';
 507   return str;
 508 }
 509
 510 /* Context lookup table.  */
 511 static flag_context_list_table_ty *flag_context_list_table;
 512
 513 /* Returns the character represented by an escape sequence.  */
 514 static int
 515 do_getc_escaped (int c)
 516 {
 517   switch (c)
 518     {
 519     case 'n':
 520       return '\n';
 521     case 'r':
 522       return '\r';
 523     case 'f':
 524       return '\f';
 525     case 't':
 526       return '\t';
 527     case 'v':
 528       return '\v';
 529     case 'a':
 530       return '\a';
 531     case '^':
 532       c = do_getc ();
 533       if (c == EOF)
 534         return EOF;
 535       return c & 0x1f;
 536     case '0': case '1': case '2': case '3': case '4':
 537     case '5': case '6': case '7':
 538       {
 539         int n = c - '0';
 540
 541         c = do_getc ();
 542         if (c != EOF)
 543           {
 544             if (c >= '0' && c <= '7')
 545               {
 546                 n = (n << 3) + (c - '0');
 547                 c = do_getc ();
 548                 if (c != EOF)
 549                   {
 550                     if (c >= '0' && c <= '7')
 551                       n = (n << 3) + (c - '0');
 552                     else
 553                       do_ungetc (c);
 554                   }
 555               }
 556             else
 557               do_ungetc (c);
 558           }
 559         return (unsigned char) n;
 560       }
 561     case 'x':
 562       {
 563         int n = 0;
 564
 565         for (;;)
 566           {
 567             c = do_getc ();
 568             if (c == EOF)
 569               break;
 570             else if (c >= '0' && c <= '9')
 571               n = (n << 4) + (c - '0');
 572             else if (c >= 'A' && c <= 'F')
 573               n = (n << 4) + (c - 'A' + 10);
 574             else if (c >= 'a' && c <= 'f')
 575               n = (n << 4) + (c - 'a' + 10);
 576             else
 577               {
 578                 do_ungetc (c);
 579                 break;
 580               }
 581           }
 582         return (unsigned char) n;
 583       }
 584     default:
 585       return c;
 586     }
 587 }
 588
 589 /* Read the next object.  */
 590 static void
 591 read_object (struct object *op, flag_context_ty outer_context)
 592 {
 593   for (;;)
 594     {
 595       int c;
 596
 597       c = do_getc ();
 598
 599       switch (c)
 600         {
 601         case EOF:
 602           op->type = t_eof;
 603           return;
 604
 605         case '\n':
 606           /* Comments assumed to be grouped with a message must immediately
 607              precede it, with no non-whitespace token on a line between
 608              both.  */
 609           if (last_non_comment_line > last_comment_line)
 610             savable_comment_reset ();
 611           continue;
 612
 613         case ' ': case '\t': case '\f': case '\r':
 614           continue;
 615
 616         case '(':
 617           {
 618             int arg = 0;                /* Current argument number.  */
 619             flag_context_list_iterator_ty context_iter;
 620             const struct callshapes *shapes = NULL;
 621             struct arglist_parser *argparser = NULL;
 622
 623             for (;; arg++)
 624               {
 625                 struct object inner;
 626                 flag_context_ty inner_context;
 627
 628                 if (arg == 0)
 629                   inner_context = null_context;
 630                 else
 631                   inner_context =
 632                     inherited_context (outer_context,
 633                                        flag_context_list_iterator_advance (
 634                                          &context_iter));
 635
 636                 read_object (&inner, inner_context);
 637
 638                 /* Recognize end of list.  */
 639                 if (inner.type == t_close)
 640                   {
 641                     op->type = t_other;
 642                     /* Don't bother converting "()" to "NIL".  */
 643                     last_non_comment_line = line_number;
 644                     if (argparser != NULL)
 645                       arglist_parser_done (argparser, arg);
 646                     return;
 647                   }
 648
 649                 /* Dots are not allowed in every position.
 650                    But be tolerant.  */
 651
 652                 /* EOF inside list is illegal.  But be tolerant.  */
 653                 if (inner.type == t_eof)
 654                   break;
 655
 656                 if (arg == 0)
 657                   {
 658                     /* This is the function position.  */
 659                     if (inner.type == t_symbol)
 660                       {
 661                         char *symbol_name = string_of_object (&inner);
 662                         void *keyword_value;
 663
 664                         if (hash_find_entry (&keywords,
 665                                              symbol_name, strlen (symbol_name),
 666                                              &keyword_value)
 667                             == 0)
 668                           shapes = (const struct callshapes *) keyword_value;
 669
 670                         argparser = arglist_parser_alloc (mlp, shapes);
 671
 672                         context_iter =
 673                           flag_context_list_iterator (
 674                             flag_context_list_table_lookup (
 675                               flag_context_list_table,
 676                               symbol_name, strlen (symbol_name)));
 677
 678                         free (symbol_name);
 679                       }
 680                     else
 681                       context_iter = null_context_list_iterator;
 682                   }
 683                 else
 684                   {
 685                     /* These are the argument positions.  */
 686                     if (argparser != NULL && inner.type == t_string)
 687                       arglist_parser_remember (argparser, arg,
 688                                                string_of_object (&inner),
 689                                                inner_context,
 690                                                logical_file_name,
 691                                                inner.line_number_at_start,
 692                                                savable_comment);
 693                   }
 694
 695                 free_object (&inner);
 696               }
 697
 698             if (argparser != NULL)
 699               arglist_parser_done (argparser, arg);
 700           }
 701           op->type = t_other;
 702           last_non_comment_line = line_number;
 703           return;
 704
 705         case '[':
 706           {
 707             for (;;)
 708               {
 709                 struct object inner;
 710
 711                 read_object (&inner, null_context);
 712
 713                 /* Recognize end of vector.  */
 714                 if (inner.type == t_close)
 715                   {
 716                     op->type = t_other;
 717                     last_non_comment_line = line_number;
 718                     return;
 719                   }
 720
 721                 /* Dots are not allowed.  But be tolerant.  */
 722
 723                 /* EOF inside vector is illegal.  But be tolerant.  */
 724                 if (inner.type == t_eof)
 725                   break;
 726
 727                 free_object (&inner);
 728               }
 729           }
 730           op->type = t_other;
 731           last_non_comment_line = line_number;
 732           return;
 733
 734         case ')': case ']':
 735           /* Tell the caller about the end of list or vector.
 736              Unmatched closing parenthesis is illegal.  But be tolerant.  */
 737           op->type = t_close;
 738           last_non_comment_line = line_number;
 739           return;
 740
 741         case ',':
 742           {
 743             int c = do_getc ();
 744             /* The ,@ handling inside lists is wrong anyway, because
 745                ,@form expands to an unknown number of elements.  */
 746             if (c != EOF && c != '@')
 747               do_ungetc (c);
 748           }
 749           /*FALLTHROUGH*/
 750         case '\'':
 751         case '`':
 752           {
 753             struct object inner;
 754
 755             read_object (&inner, null_context);
 756
 757             /* Dots and EOF are not allowed here.  But be tolerant.  */
 758
 759             free_object (&inner);
 760
 761             op->type = t_other;
 762             last_non_comment_line = line_number;
 763             return;
 764           }
 765
 766         case ';':
 767           {
 768             bool all_semicolons = true;
 769
 770             last_comment_line = line_number;
 771             comment_start ();
 772             for (;;)
 773               {
 774                 int c = do_getc ();
 775                 if (c == EOF || c == '\n' || c == '\f' || c == '\r')
 776                   break;
 777                 if (c != ';')
 778                   all_semicolons = false;
 779                 if (!all_semicolons)
 780                   {
 781                     /* We skip all leading white space, but not EOLs.  */
 782                     if (!(buflen == 0 && (c == ' ' || c == '\t')))
 783                       comment_add (c);
 784                   }
 785               }
 786             comment_line_end (0);
 787             continue;
 788           }
 789
 790         case '"':
 791           {
 792             op->token = XMALLOC (struct token);
 793             init_token (op->token);
 794             op->line_number_at_start = line_number;
 795             for (;;)
 796               {
 797                 int c = do_getc ();
 798                 if (c == EOF)
 799                   /* Invalid input.  Be tolerant, no error message.  */
 800                   break;
 801                 if (c == '"')
 802                   break;
 803                 if (c == '\\')
 804                   {
 805                     c = do_getc ();
 806                     if (c == EOF)
 807                       /* Invalid input.  Be tolerant, no error message.  */
 808                       break;
 809                     if (c == '\n')
 810                       /* Ignore escaped newline.  */
 811                       ;
 812                     else
 813                       {
 814                         c = do_getc_escaped (c);
 815                         if (c == EOF)
 816                           /* Invalid input.  Be tolerant, no error message.  */
 817                           break;
 818                         grow_token (op->token);
 819                         op->token->chars[op->token->charcount++] = c;
 820                       }
 821                   }
 822                 else
 823                   {
 824                     grow_token (op->token);
 825                     op->token->chars[op->token->charcount++] = c;
 826                   }
 827               }
 828             op->type = t_string;
 829
 830             if (extract_all)
 831               {
 832                 lex_pos_ty pos;
 833
 834                 pos.file_name = logical_file_name;
 835                 pos.line_number = op->line_number_at_start;
 836                 remember_a_message (mlp, NULL, string_of_object (op),
 837                                     null_context, &pos, NULL, savable_comment);
 838               }
 839             last_non_comment_line = line_number;
 840             return;
 841           }
 842
 843         case '?':
 844           c = do_getc ();
 845           if (c == EOF)
 846             /* Invalid input.  Be tolerant, no error message.  */
 847             ;
 848           else if (c == '\\')
 849             {
 850               c = do_getc ();
 851               if (c == EOF)
 852                 /* Invalid input.  Be tolerant, no error message.  */
 853                 ;
 854               else
 855                 {
 856                   c = do_getc_escaped (c);
 857                   if (c == EOF)
 858                     /* Invalid input.  Be tolerant, no error message.  */
 859                     ;
 860                 }
 861             }
 862           op->type = t_other;
 863           last_non_comment_line = line_number;
 864           return;
 865
 866         case '#':
 867           /* Dispatch macro handling.  */
 868           c = do_getc ();
 869           if (c == EOF)
 870             /* Invalid input.  Be tolerant, no error message.  */
 871             {
 872               op->type = t_other;
 873               return;
 874             }
 875
 876           switch (c)
 877             {
 878             case '!':
 879               if (ftell (fp) == 2)
 880                 /* Skip comment until !# */
 881                 {
 882                   c = do_getc ();
 883                   for (;;)
 884                     {
 885                       if (c == EOF)
 886                         break;
 887                       if (c == '!')
 888                         {
 889                           c = do_getc ();
 890                           if (c == EOF || c == '#')
 891                             break;
 892                         }
 893                       else
 894                         c = do_getc ();
 895                     }
 896                   if (c == EOF)
 897                     {
 898                       /* EOF not allowed here.  But be tolerant.  */
 899                       op->type = t_eof;
 900                       return;
 901                     }
 902                   continue;
 903                 }
 904               /*FALLTHROUGH*/
 905             case '\'':
 906             case ':':
 907               {
 908                 struct object inner;
 909                 read_object (&inner, null_context);
 910                 /* Dots and EOF are not allowed here.
 911                    But be tolerant.  */
 912                 free_object (&inner);
 913                 op->type = t_other;
 914                 last_non_comment_line = line_number;
 915                 return;
 916               }
 917
 918             case '[':
 919             case '(':
 920               {
 921                 struct object inner;
 922                 do_ungetc (c);
 923                 read_object (&inner, null_context);
 924                 /* Dots and EOF are not allowed here.
 925                    But be tolerant.  */
 926                 free_object (&inner);
 927                 op->type = t_other;
 928                 last_non_comment_line = line_number;
 929                 return;
 930               }
 931
 932             case '|':
 933               {
 934                 int depth = 0;
 935
 936                 comment_start ();
 937                 c = do_getc ();
 938                 for (;;)
 939                   {
 940                     if (c == EOF)
 941                       break;
 942                     if (c == '|')
 943                       {
 944                         c = do_getc ();
 945                         if (c == EOF)
 946                           break;
 947                         if (c == '#')
 948                           {
 949                             if (depth == 0)
 950                               {
 951                                 comment_line_end (0);
 952                                 break;
 953                               }
 954                             depth--;
 955                             comment_add ('|');
 956                             comment_add ('#');
 957                             c = do_getc ();
 958                           }
 959                         else
 960                           comment_add ('|');
 961                       }
 962                     else if (c == '#')
 963                       {
 964                         c = do_getc ();
 965                         if (c == EOF)
 966                           break;
 967                         comment_add ('#');
 968                         if (c == '|')
 969                           {
 970                             depth++;
 971                             comment_add ('|');
 972                             c = do_getc ();
 973                           }
 974                       }
 975                     else
 976                       {
 977                         /* We skip all leading white space.  */
 978                         if (!(buflen == 0 && (c == ' ' || c == '\t')))
 979                           comment_add (c);
 980                         if (c == '\n')
 981                           {
 982                             comment_line_end (1);
 983                             comment_start ();
 984                           }
 985                         c = do_getc ();
 986                       }
 987                   }
 988                 if (c == EOF)
 989                   {
 990                     /* EOF not allowed here.  But be tolerant.  */
 991                     op->type = t_eof;
 992                     return;
 993                   }
 994                 last_comment_line = line_number;
 995                 continue;
 996               }
 997
 998             case '\\':
 999               {
1000                 struct token token;
1001                 int first = '\\';
1002                 read_token (&token, &first);
1003                 free_token (&token);
1004                 op->type = t_other;
1005                 last_non_comment_line = line_number;
1006                 return;
1007               }
1008
1009             case 'T': case 't':
1010             case 'F': case 'f':
1011               op->type = t_other;
1012               last_non_comment_line = line_number;
1013               return;
1014
1015             case 'B': case 'b':
1016             case 'O': case 'o':
1017             case 'D': case 'd':
1018             case 'X': case 'x':
1019             case 'E': case 'e':
1020             case 'I': case 'i':
1021               {
1022                 struct token token;
1023                 do_ungetc (c);
1024                 c = '#';
1025                 read_token (&token, &c);
1026                 free_token (&token);
1027                 op->type = t_other;
1028                 last_non_comment_line = line_number;
1029                 return;
1030               }
1031
1032             default:
1033               /* Invalid input.  Be tolerant, no error message.  */
1034               op->type = t_other;
1035               last_non_comment_line = line_number;
1036               return;
1037             }
1038
1039           /*NOTREACHED*/
1040           abort ();
1041
1042         default:
1043           /* Read a token.  */
1044           {
1045             bool symbol;
1046
1047             op->token = XMALLOC (struct token);
1048             symbol = read_token (op->token, &c);
1049             if (op->token->charcount == 1 && op->token->chars[0] == '.')
1050               {
1051                 free_token (op->token);
1052                 free (op->token);
1053                 op->type = t_dot;
1054                 last_non_comment_line = line_number;
1055                 return;
1056               }
1057             if (!symbol)
1058               {
1059                 free_token (op->token);
1060                 free (op->token);
1061                 op->type = t_other;
1062                 last_non_comment_line = line_number;
1063                 return;
1064               }
1065             /* Distinguish between "foo" and "foo#bar".  */
1066             c = do_getc ();
1067             if (c == '#')
1068               {
1069                 struct token second_token;
1070
1071                 free_token (op->token);
1072                 free (op->token);
1073                 read_token (&second_token, NULL);
1074                 free_token (&second_token);
1075                 op->type = t_other;
1076                 last_non_comment_line = line_number;
1077                 return;
1078               }
1079             else
1080               {
1081                 if (c != EOF)
1082                   do_ungetc (c);
1083                 op->type = t_symbol;
1084                 last_non_comment_line = line_number;
1085                 return;
1086               }
1087           }
1088         }
1089     }
1090 }
1091
1092
1093 void
1094 extract_librep (FILE *f,
1095                 const char *real_filename, const char *logical_filename,
1096                 flag_context_list_table_ty *flag_table,
1097                 msgdomain_list_ty *mdlp)
1098 {
1099   mlp = mdlp->item[0]->messages;
1100
1101   fp = f;
1102   real_file_name = real_filename;
1103   logical_file_name = xstrdup (logical_filename);
1104   line_number = 1;
1105
1106   last_comment_line = -1;
1107   last_non_comment_line = -1;
1108
1109   flag_context_list_table = flag_table;
1110
1111   init_keywords ();
1112
1113   /* Eat tokens until eof is seen.  When read_object returns
1114      due to an unbalanced closing parenthesis, just restart it.  */
1115   do
1116     {
1117       struct object toplevel_object;
1118
1119       read_object (&toplevel_object, null_context);
1120
1121       if (toplevel_object.type == t_eof)
1122         break;
1123
1124       free_object (&toplevel_object);
1125     }
1126   while (!feof (fp));
1127
1128   /* Close scanner.  */
1129   fp = NULL;
1130   real_file_name = NULL;
1131   logical_file_name = NULL;
1132   line_number = 0;
1133 }