gettext-tools/src/x-librep.c

   1 /* xgettext librep backend.
   2    Copyright (C) 2001-2003, 2005-2009, 2015 Free Software Foundation,
   3    Inc.
   4
   5    This file was written by Bruno Haible <haible@clisp.cons.org>, 2001.
   6
   7    This program is free software: you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include "config.h"
  22 #endif
  23
  24 /* Specification.  */
  25 #include "x-librep.h"
  26
  27 #include <errno.h>
  28 #include <stdbool.h>
  29 #include <stdio.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32
  33 #include "c-ctype.h"
  34 #include "message.h"
  35 #include "xgettext.h"
  36 #include "error.h"
  37 #include "xalloc.h"
  38 #include "hash.h"
  39 #include "gettext.h"
  40
  41 #define _(s) gettext(s)
  42
  43
  44 /* Summary of librep syntax:
  45    - ';' starts a comment until end of line.
  46    - Block comments start with '#|' and end with '|#'.
  47    - Numbers are constituted of an optional prefix (#b, #B for binary,
  48      #o, #O for octal, #d, #D for decimal, #x, #X for hexadecimal,
  49      #e, #E for exact, #i, #I for inexact), an optional sign (+ or -), and
  50      the digits.
  51    - Characters are written as '?' followed by the character, possibly
  52      with an escape sequence, for examples '?a', '?\n', '?\177'.
  53    - Strings are delimited by double quotes. Backslash introduces an escape
  54      sequence. The following are understood: '\n', '\r', '\f', '\t', '\a',
  55      '\\', '\^C', '\012' (octal), '\x12' (hexadecimal).
  56    - Symbols: can contain meta-characters - whitespace or any from ()[]'";|\' -
  57      if preceded by backslash or enclosed in |...|.
  58    - Keywords: written as #:SYMBOL.
  59    - () delimit lists.
  60    - [] delimit vectors.
  61    The reader is implemented in librep-0.14/src/lisp.c.  */
  62
  63
  64 /* ====================== Keyword set customization.  ====================== */
  65
  66 /* If true extract all strings.  */
  67 static bool extract_all = false;
  68
  69 static hash_table keywords;
  70 static bool default_keywords = true;
  71
  72
  73 void
  74 x_librep_extract_all ()
  75 {
  76   extract_all = true;
  77 }
  78
  79
  80 void
  81 x_librep_keyword (const char *name)
  82 {
  83   if (name == NULL)
  84     default_keywords = false;
  85   else
  86     {
  87       const char *end;
  88       struct callshape shape;
  89       const char *colon;
  90
  91       if (keywords.table == NULL)
  92         hash_init (&keywords, 100);
  93
  94       split_keywordspec (name, &end, &shape);
  95
  96       /* The characters between name and end should form a valid Lisp
  97          symbol.  */
  98       colon = strchr (name, ':');
  99       if (colon == NULL || colon >= end)
 100         insert_keyword_callshape (&keywords, name, end - name, &shape);
 101     }
 102 }
 103
 104 /* Finish initializing the keywords hash table.
 105    Called after argument processing, before each file is processed.  */
 106 static void
 107 init_keywords ()
 108 {
 109   if (default_keywords)
 110     {
 111       /* When adding new keywords here, also update the documentation in
 112          xgettext.texi!  */
 113       x_librep_keyword ("_");
 114       default_keywords = false;
 115     }
 116 }
 117
 118 void
 119 init_flag_table_librep ()
 120 {
 121   xgettext_record_flag ("_:1:pass-librep-format");
 122   xgettext_record_flag ("format:2:librep-format");
 123 }
 124
 125
 126 /* ======================== Reading of characters.  ======================== */
 127
 128 /* Real filename, used in error messages about the input file.  */
 129 static const char *real_file_name;
 130
 131 /* Logical filename and line number, used to label the extracted messages.  */
 132 static char *logical_file_name;
 133 static int line_number;
 134
 135 /* The input file stream.  */
 136 static FILE *fp;
 137
 138
 139 /* Fetch the next character from the input file.  */
 140 static int
 141 do_getc ()
 142 {
 143   int c = getc (fp);
 144
 145   if (c == EOF)
 146     {
 147       if (ferror (fp))
 148         error (EXIT_FAILURE, errno, _("\
 149 error while reading \"%s\""), real_file_name);
 150     }
 151   else if (c == '\n')
 152    line_number++;
 153
 154   return c;
 155 }
 156
 157 /* Put back the last fetched character, not EOF.  */
 158 static void
 159 do_ungetc (int c)
 160 {
 161   if (c == '\n')
 162     line_number--;
 163   ungetc (c, fp);
 164 }
 165
 166
 167 /* ========================== Reading of tokens.  ========================== */
 168
 169
 170 /* A token consists of a sequence of characters.  */
 171 struct token
 172 {
 173   int allocated;                /* number of allocated 'token_char's */
 174   int charcount;                /* number of used 'token_char's */
 175   char *chars;                  /* the token's constituents */
 176 };
 177
 178 /* Initialize a 'struct token'.  */
 179 static inline void
 180 init_token (struct token *tp)
 181 {
 182   tp->allocated = 10;
 183   tp->chars = XNMALLOC (tp->allocated, char);
 184   tp->charcount = 0;
 185 }
 186
 187 /* Free the memory pointed to by a 'struct token'.  */
 188 static inline void
 189 free_token (struct token *tp)
 190 {
 191   free (tp->chars);
 192 }
 193
 194 /* Ensure there is enough room in the token for one more character.  */
 195 static inline void
 196 grow_token (struct token *tp)
 197 {
 198   if (tp->charcount == tp->allocated)
 199     {
 200       tp->allocated *= 2;
 201       tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
 202     }
 203 }
 204
 205 /* Read the next token.  If 'first' is given, it points to the first
 206    character, which has already been read.  Returns true for a symbol,
 207    false for a number.  */
 208 static bool
 209 read_token (struct token *tp, const int *first)
 210 {
 211   int c;
 212   /* Variables for speculative number parsing:  */
 213   int radix = -1;
 214   int nfirst = 0;
 215   bool exact = true;
 216   bool rational = false;
 217   bool exponent = false;
 218   bool had_sign = false;
 219   bool expecting_prefix = false;
 220
 221   init_token (tp);
 222
 223   if (first)
 224     c = *first;
 225   else
 226     c = do_getc ();
 227
 228   for (;; c = do_getc ())
 229     {
 230       switch (c)
 231         {
 232         case EOF:
 233           goto done;
 234
 235         case ' ': case '\t': case '\n': case '\f': case '\r':
 236         case '(': case ')': case '[': case ']':
 237         case '\'': case '"': case ';': case ',': case '`':
 238           goto done;
 239
 240         case '\\':
 241           radix = 0;
 242           c = do_getc ();
 243           if (c == EOF)
 244             /* Invalid, but be tolerant.  */
 245             break;
 246           grow_token (tp);
 247           tp->chars[tp->charcount++] = c;
 248           break;
 249
 250         case '|':
 251           radix = 0;
 252           for (;;)
 253             {
 254               c = do_getc ();
 255               if (c == EOF || c == '|')
 256                 break;
 257               grow_token (tp);
 258               tp->chars[tp->charcount++] = c;
 259             }
 260           break;
 261
 262         default:
 263           if (radix != 0)
 264             {
 265               if (expecting_prefix)
 266                 {
 267                   switch (c)
 268                     {
 269                     case 'B': case 'b':
 270                       radix = 2;
 271                       break;
 272                     case 'O': case 'o':
 273                       radix = 8;
 274                       break;
 275                     case 'D': case 'd':
 276                       radix = 10;
 277                       break;
 278                     case 'X': case 'x':
 279                       radix = 16;
 280                       break;
 281                     case 'E': case 'e':
 282                     case 'I': case 'i':
 283                       break;
 284                     default:
 285                       radix = 0;
 286                       break;
 287                     }
 288                   expecting_prefix = false;
 289                   nfirst = tp->charcount + 1;
 290                 }
 291               else if (tp->charcount == nfirst
 292                        && (c == '+' || c == '-' || c == '#'))
 293                 {
 294                   if (c == '#')
 295                     {
 296                       if (had_sign)
 297                         radix = 0;
 298                       else
 299                         expecting_prefix = true;
 300                     }
 301                   else
 302                     had_sign = true;
 303                   nfirst = tp->charcount + 1;
 304                 }
 305               else
 306                 {
 307                   switch (radix)
 308                     {
 309                     case -1:
 310                       if (c == '.')
 311                         {
 312                           radix = 10;
 313                           exact = false;
 314                         }
 315                       else if (!(c >= '0' && c <= '9'))
 316                         radix = 0;
 317                       else if (c == '0')
 318                         radix = 1;
 319                       else
 320                         radix = 10;
 321                       break;
 322
 323                     case 1:
 324                       switch (c)
 325                         {
 326                         case 'X': case 'x':
 327                           radix = 16;
 328                           nfirst = tp->charcount + 1;
 329                           break;
 330                         case '0': case '1': case '2': case '3': case '4':
 331                         case '5': case '6': case '7':
 332                           radix = 8;
 333                           nfirst = tp->charcount;
 334                           break;
 335                         case '.': case 'E': case 'e':
 336                           radix = 10;
 337                           exact = false;
 338                           break;
 339                         case '/':
 340                           radix = 10;
 341                           rational = true;
 342                           break;
 343                         default:
 344                           radix = 0;
 345                           break;
 346                         }
 347                       break;
 348
 349                     default:
 350                       switch (c)
 351                         {
 352                         case '.':
 353                           if (exact && radix == 10 && !rational)
 354                             exact = false;
 355                           else
 356                             radix = 0;
 357                           break;
 358                         case '/':
 359                           if (exact && !rational)
 360                             rational = true;
 361                           else
 362                             radix = 0;
 363                           break;
 364                         case 'E': case 'e':
 365                           if (radix == 10)
 366                             {
 367                               if (!rational && !exponent)
 368                                 {
 369                                   exponent = true;
 370                                   exact = false;
 371                                 }
 372                               else
 373                                 radix = 0;
 374                               break;
 375                             }
 376                           /*FALLTHROUGH*/
 377                         default:
 378                           if (exponent && (c == '+' || c == '-'))
 379                             break;
 380                           if ((radix <= 10
 381                                && !(c >= '0' && c <= '0' + radix - 1))
 382                               || (radix == 16 && !c_isxdigit (c)))
 383                             radix = 0;
 384                           break;
 385                         }
 386                       break;
 387                     }
 388                 }
 389             }
 390           else
 391             {
 392               if (c == '#')
 393                 goto done;
 394             }
 395           grow_token (tp);
 396           tp->chars[tp->charcount++] = c;
 397         }
 398     }
 399  done:
 400   if (c != EOF)
 401     do_ungetc (c);
 402   if (radix > 0 && nfirst < tp->charcount)
 403     return false; /* number */
 404   else
 405     return true; /* symbol */
 406 }
 407
 408
 409 /* ========================= Accumulating comments ========================= */
 410
 411
 412 static char *buffer;
 413 static size_t bufmax;
 414 static size_t buflen;
 415
 416 static inline void
 417 comment_start ()
 418 {
 419   buflen = 0;
 420 }
 421
 422 static inline void
 423 comment_add (int c)
 424 {
 425   if (buflen >= bufmax)
 426     {
 427       bufmax = 2 * bufmax + 10;
 428       buffer = xrealloc (buffer, bufmax);
 429     }
 430   buffer[buflen++] = c;
 431 }
 432
 433 static inline void
 434 comment_line_end (size_t chars_to_remove)
 435 {
 436   buflen -= chars_to_remove;
 437   while (buflen >= 1
 438          && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
 439     --buflen;
 440   if (chars_to_remove == 0 && buflen >= bufmax)
 441     {
 442       bufmax = 2 * bufmax + 10;
 443       buffer = xrealloc (buffer, bufmax);
 444     }
 445   buffer[buflen] = '\0';
 446   savable_comment_add (buffer);
 447 }
 448
 449
 450 /* These are for tracking whether comments count as immediately before
 451    keyword.  */
 452 static int last_comment_line;
 453 static int last_non_comment_line;
 454
 455
 456 /* ========================= Accumulating messages ========================= */
 457
 458
 459 static message_list_ty *mlp;
 460
 461
 462 /* ============== Reading of objects.  See CLHS 2 "Syntax".  ============== */
 463
 464
 465 /* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings.
 466    Other objects need not to be represented precisely.  */
 467 enum object_type
 468 {
 469   t_symbol,     /* symbol */
 470   t_string,     /* string */
 471   t_other,      /* other kind of real object */
 472   t_dot,        /* '.' pseudo object */
 473   t_close,      /* ')' or ']' pseudo object */
 474   t_eof         /* EOF marker */
 475 };
 476
 477 struct object
 478 {
 479   enum object_type type;
 480   struct token *token;          /* for t_symbol and t_string */
 481   int line_number_at_start;     /* for t_string */
 482 };
 483
 484 /* Free the memory pointed to by a 'struct object'.  */
 485 static inline void
 486 free_object (struct object *op)
 487 {
 488   if (op->type == t_symbol || op->type == t_string)
 489     {
 490       free_token (op->token);
 491       free (op->token);
 492     }
 493 }
 494
 495 /* Convert a t_symbol/t_string token to a char*.  */
 496 static char *
 497 string_of_object (const struct object *op)
 498 {
 499   char *str;
 500   int n;
 501
 502   if (!(op->type == t_symbol || op->type == t_string))
 503     abort ();
 504   n = op->token->charcount;
 505   str = XNMALLOC (n + 1, char);
 506   memcpy (str, op->token->chars, n);
 507   str[n] = '\0';
 508   return str;
 509 }
 510
 511 /* Context lookup table.  */
 512 static flag_context_list_table_ty *flag_context_list_table;
 513
 514 /* Returns the character represented by an escape sequence.  */
 515 static int
 516 do_getc_escaped (int c)
 517 {
 518   switch (c)
 519     {
 520     case 'n':
 521       return '\n';
 522     case 'r':
 523       return '\r';
 524     case 'f':
 525       return '\f';
 526     case 't':
 527       return '\t';
 528     case 'v':
 529       return '\v';
 530     case 'a':
 531       return '\a';
 532     case '^':
 533       c = do_getc ();
 534       if (c == EOF)
 535         return EOF;
 536       return c & 0x1f;
 537     case '0': case '1': case '2': case '3': case '4':
 538     case '5': case '6': case '7':
 539       {
 540         int n = c - '0';
 541
 542         c = do_getc ();
 543         if (c != EOF)
 544           {
 545             if (c >= '0' && c <= '7')
 546               {
 547                 n = (n << 3) + (c - '0');
 548                 c = do_getc ();
 549                 if (c != EOF)
 550                   {
 551                     if (c >= '0' && c <= '7')
 552                       n = (n << 3) + (c - '0');
 553                     else
 554                       do_ungetc (c);
 555                   }
 556               }
 557             else
 558               do_ungetc (c);
 559           }
 560         return (unsigned char) n;
 561       }
 562     case 'x':
 563       {
 564         int n = 0;
 565
 566         for (;;)
 567           {
 568             c = do_getc ();
 569             if (c == EOF)
 570               break;
 571             else if (c >= '0' && c <= '9')
 572               n = (n << 4) + (c - '0');
 573             else if (c >= 'A' && c <= 'F')
 574               n = (n << 4) + (c - 'A' + 10);
 575             else if (c >= 'a' && c <= 'f')
 576               n = (n << 4) + (c - 'a' + 10);
 577             else
 578               {
 579                 do_ungetc (c);
 580                 break;
 581               }
 582           }
 583         return (unsigned char) n;
 584       }
 585     default:
 586       return c;
 587     }
 588 }
 589
 590 /* Read the next object.  */
 591 static void
 592 read_object (struct object *op, flag_context_ty outer_context)
 593 {
 594   for (;;)
 595     {
 596       int c;
 597
 598       c = do_getc ();
 599
 600       switch (c)
 601         {
 602         case EOF:
 603           op->type = t_eof;
 604           return;
 605
 606         case '\n':
 607           /* Comments assumed to be grouped with a message must immediately
 608              precede it, with no non-whitespace token on a line between
 609              both.  */
 610           if (last_non_comment_line > last_comment_line)
 611             savable_comment_reset ();
 612           continue;
 613
 614         case ' ': case '\t': case '\f': case '\r':
 615           continue;
 616
 617         case '(':
 618           {
 619             int arg = 0;                /* Current argument number.  */
 620             flag_context_list_iterator_ty context_iter;
 621             const struct callshapes *shapes = NULL;
 622             struct arglist_parser *argparser = NULL;
 623
 624             for (;; arg++)
 625               {
 626                 struct object inner;
 627                 flag_context_ty inner_context;
 628
 629                 if (arg == 0)
 630                   inner_context = null_context;
 631                 else
 632                   inner_context =
 633                     inherited_context (outer_context,
 634                                        flag_context_list_iterator_advance (
 635                                          &context_iter));
 636
 637                 read_object (&inner, inner_context);
 638
 639                 /* Recognize end of list.  */
 640                 if (inner.type == t_close)
 641                   {
 642                     op->type = t_other;
 643                     /* Don't bother converting "()" to "NIL".  */
 644                     last_non_comment_line = line_number;
 645                     if (argparser != NULL)
 646                       arglist_parser_done (argparser, arg);
 647                     return;
 648                   }
 649
 650                 /* Dots are not allowed in every position.
 651                    But be tolerant.  */
 652
 653                 /* EOF inside list is illegal.  But be tolerant.  */
 654                 if (inner.type == t_eof)
 655                   break;
 656
 657                 if (arg == 0)
 658                   {
 659                     /* This is the function position.  */
 660                     if (inner.type == t_symbol)
 661                       {
 662                         char *symbol_name = string_of_object (&inner);
 663                         void *keyword_value;
 664
 665                         if (hash_find_entry (&keywords,
 666                                              symbol_name, strlen (symbol_name),
 667                                              &keyword_value)
 668                             == 0)
 669                           shapes = (const struct callshapes *) keyword_value;
 670
 671                         argparser = arglist_parser_alloc (mlp, shapes);
 672
 673                         context_iter =
 674                           flag_context_list_iterator (
 675                             flag_context_list_table_lookup (
 676                               flag_context_list_table,
 677                               symbol_name, strlen (symbol_name)));
 678
 679                         free (symbol_name);
 680                       }
 681                     else
 682                       context_iter = null_context_list_iterator;
 683                   }
 684                 else
 685                   {
 686                     /* These are the argument positions.  */
 687                     if (argparser != NULL && inner.type == t_string)
 688                       arglist_parser_remember (argparser, arg,
 689                                                string_of_object (&inner),
 690                                                inner_context,
 691                                                logical_file_name,
 692                                                inner.line_number_at_start,
 693                                                savable_comment);
 694                   }
 695
 696                 free_object (&inner);
 697               }
 698
 699             if (argparser != NULL)
 700               arglist_parser_done (argparser, arg);
 701           }
 702           op->type = t_other;
 703           last_non_comment_line = line_number;
 704           return;
 705
 706         case '[':
 707           {
 708             for (;;)
 709               {
 710                 struct object inner;
 711
 712                 read_object (&inner, null_context);
 713
 714                 /* Recognize end of vector.  */
 715                 if (inner.type == t_close)
 716                   {
 717                     op->type = t_other;
 718                     last_non_comment_line = line_number;
 719                     return;
 720                   }
 721
 722                 /* Dots are not allowed.  But be tolerant.  */
 723
 724                 /* EOF inside vector is illegal.  But be tolerant.  */
 725                 if (inner.type == t_eof)
 726                   break;
 727
 728                 free_object (&inner);
 729               }
 730           }
 731           op->type = t_other;
 732           last_non_comment_line = line_number;
 733           return;
 734
 735         case ')': case ']':
 736           /* Tell the caller about the end of list or vector.
 737              Unmatched closing parenthesis is illegal.  But be tolerant.  */
 738           op->type = t_close;
 739           last_non_comment_line = line_number;
 740           return;
 741
 742         case ',':
 743           {
 744             int c = do_getc ();
 745             /* The ,@ handling inside lists is wrong anyway, because
 746                ,@form expands to an unknown number of elements.  */
 747             if (c != EOF && c != '@')
 748               do_ungetc (c);
 749           }
 750           /*FALLTHROUGH*/
 751         case '\'':
 752         case '`':
 753           {
 754             struct object inner;
 755
 756             read_object (&inner, null_context);
 757
 758             /* Dots and EOF are not allowed here.  But be tolerant.  */
 759
 760             free_object (&inner);
 761
 762             op->type = t_other;
 763             last_non_comment_line = line_number;
 764             return;
 765           }
 766
 767         case ';':
 768           {
 769             bool all_semicolons = true;
 770
 771             last_comment_line = line_number;
 772             comment_start ();
 773             for (;;)
 774               {
 775                 int c = do_getc ();
 776                 if (c == EOF || c == '\n' || c == '\f' || c == '\r')
 777                   break;
 778                 if (c != ';')
 779                   all_semicolons = false;
 780                 if (!all_semicolons)
 781                   {
 782                     /* We skip all leading white space, but not EOLs.  */
 783                     if (!(buflen == 0 && (c == ' ' || c == '\t')))
 784                       comment_add (c);
 785                   }
 786               }
 787             comment_line_end (0);
 788             continue;
 789           }
 790
 791         case '"':
 792           {
 793             op->token = XMALLOC (struct token);
 794             init_token (op->token);
 795             op->line_number_at_start = line_number;
 796             for (;;)
 797               {
 798                 int c = do_getc ();
 799                 if (c == EOF)
 800                   /* Invalid input.  Be tolerant, no error message.  */
 801                   break;
 802                 if (c == '"')
 803                   break;
 804                 if (c == '\\')
 805                   {
 806                     c = do_getc ();
 807                     if (c == EOF)
 808                       /* Invalid input.  Be tolerant, no error message.  */
 809                       break;
 810                     if (c == '\n')
 811                       /* Ignore escaped newline.  */
 812                       ;
 813                     else
 814                       {
 815                         c = do_getc_escaped (c);
 816                         if (c == EOF)
 817                           /* Invalid input.  Be tolerant, no error message.  */
 818                           break;
 819                         grow_token (op->token);
 820                         op->token->chars[op->token->charcount++] = c;
 821                       }
 822                   }
 823                 else
 824                   {
 825                     grow_token (op->token);
 826                     op->token->chars[op->token->charcount++] = c;
 827                   }
 828               }
 829             op->type = t_string;
 830
 831             if (extract_all)
 832               {
 833                 lex_pos_ty pos;
 834
 835                 pos.file_name = logical_file_name;
 836                 pos.line_number = op->line_number_at_start;
 837                 remember_a_message (mlp, NULL, string_of_object (op),
 838                                     null_context, &pos, NULL, savable_comment);
 839               }
 840             last_non_comment_line = line_number;
 841             return;
 842           }
 843
 844         case '?':
 845           c = do_getc ();
 846           if (c == EOF)
 847             /* Invalid input.  Be tolerant, no error message.  */
 848             ;
 849           else if (c == '\\')
 850             {
 851               c = do_getc ();
 852               if (c == EOF)
 853                 /* Invalid input.  Be tolerant, no error message.  */
 854                 ;
 855               else
 856                 {
 857                   c = do_getc_escaped (c);
 858                   if (c == EOF)
 859                     /* Invalid input.  Be tolerant, no error message.  */
 860                     ;
 861                 }
 862             }
 863           op->type = t_other;
 864           last_non_comment_line = line_number;
 865           return;
 866
 867         case '#':
 868           /* Dispatch macro handling.  */
 869           c = do_getc ();
 870           if (c == EOF)
 871             /* Invalid input.  Be tolerant, no error message.  */
 872             {
 873               op->type = t_other;
 874               return;
 875             }
 876
 877           switch (c)
 878             {
 879             case '!':
 880               if (ftell (fp) == 2)
 881                 /* Skip comment until !# */
 882                 {
 883                   c = do_getc ();
 884                   for (;;)
 885                     {
 886                       if (c == EOF)
 887                         break;
 888                       if (c == '!')
 889                         {
 890                           c = do_getc ();
 891                           if (c == EOF || c == '#')
 892                             break;
 893                         }
 894                       else
 895                         c = do_getc ();
 896                     }
 897                   if (c == EOF)
 898                     {
 899                       /* EOF not allowed here.  But be tolerant.  */
 900                       op->type = t_eof;
 901                       return;
 902                     }
 903                   continue;
 904                 }
 905               /*FALLTHROUGH*/
 906             case '\'':
 907             case ':':
 908               {
 909                 struct object inner;
 910                 read_object (&inner, null_context);
 911                 /* Dots and EOF are not allowed here.
 912                    But be tolerant.  */
 913                 free_object (&inner);
 914                 op->type = t_other;
 915                 last_non_comment_line = line_number;
 916                 return;
 917               }
 918
 919             case '[':
 920             case '(':
 921               {
 922                 struct object inner;
 923                 do_ungetc (c);
 924                 read_object (&inner, null_context);
 925                 /* Dots and EOF are not allowed here.
 926                    But be tolerant.  */
 927                 free_object (&inner);
 928                 op->type = t_other;
 929                 last_non_comment_line = line_number;
 930                 return;
 931               }
 932
 933             case '|':
 934               {
 935                 int depth = 0;
 936
 937                 comment_start ();
 938                 c = do_getc ();
 939                 for (;;)
 940                   {
 941                     if (c == EOF)
 942                       break;
 943                     if (c == '|')
 944                       {
 945                         c = do_getc ();
 946                         if (c == EOF)
 947                           break;
 948                         if (c == '#')
 949                           {
 950                             if (depth == 0)
 951                               {
 952                                 comment_line_end (0);
 953                                 break;
 954                               }
 955                             depth--;
 956                             comment_add ('|');
 957                             comment_add ('#');
 958                             c = do_getc ();
 959                           }
 960                         else
 961                           comment_add ('|');
 962                       }
 963                     else if (c == '#')
 964                       {
 965                         c = do_getc ();
 966                         if (c == EOF)
 967                           break;
 968                         comment_add ('#');
 969                         if (c == '|')
 970                           {
 971                             depth++;
 972                             comment_add ('|');
 973                             c = do_getc ();
 974                           }
 975                       }
 976                     else
 977                       {
 978                         /* We skip all leading white space.  */
 979                         if (!(buflen == 0 && (c == ' ' || c == '\t')))
 980                           comment_add (c);
 981                         if (c == '\n')
 982                           {
 983                             comment_line_end (1);
 984                             comment_start ();
 985                           }
 986                         c = do_getc ();
 987                       }
 988                   }
 989                 if (c == EOF)
 990                   {
 991                     /* EOF not allowed here.  But be tolerant.  */
 992                     op->type = t_eof;
 993                     return;
 994                   }
 995                 last_comment_line = line_number;
 996                 continue;
 997               }
 998
 999             case '\\':
1000               {
1001                 struct token token;
1002                 int first = '\\';
1003                 read_token (&token, &first);
1004                 free_token (&token);
1005                 op->type = t_other;
1006                 last_non_comment_line = line_number;
1007                 return;
1008               }
1009
1010             case 'T': case 't':
1011             case 'F': case 'f':
1012               op->type = t_other;
1013               last_non_comment_line = line_number;
1014               return;
1015
1016             case 'B': case 'b':
1017             case 'O': case 'o':
1018             case 'D': case 'd':
1019             case 'X': case 'x':
1020             case 'E': case 'e':
1021             case 'I': case 'i':
1022               {
1023                 struct token token;
1024                 do_ungetc (c);
1025                 c = '#';
1026                 read_token (&token, &c);
1027                 free_token (&token);
1028                 op->type = t_other;
1029                 last_non_comment_line = line_number;
1030                 return;
1031               }
1032
1033             default:
1034               /* Invalid input.  Be tolerant, no error message.  */
1035               op->type = t_other;
1036               last_non_comment_line = line_number;
1037               return;
1038             }
1039
1040           /*NOTREACHED*/
1041           abort ();
1042
1043         default:
1044           /* Read a token.  */
1045           {
1046             bool symbol;
1047
1048             op->token = XMALLOC (struct token);
1049             symbol = read_token (op->token, &c);
1050             if (op->token->charcount == 1 && op->token->chars[0] == '.')
1051               {
1052                 free_token (op->token);
1053                 free (op->token);
1054                 op->type = t_dot;
1055                 last_non_comment_line = line_number;
1056                 return;
1057               }
1058             if (!symbol)
1059               {
1060                 free_token (op->token);
1061                 free (op->token);
1062                 op->type = t_other;
1063                 last_non_comment_line = line_number;
1064                 return;
1065               }
1066             /* Distinguish between "foo" and "foo#bar".  */
1067             c = do_getc ();
1068             if (c == '#')
1069               {
1070                 struct token second_token;
1071
1072                 free_token (op->token);
1073                 free (op->token);
1074                 read_token (&second_token, NULL);
1075                 free_token (&second_token);
1076                 op->type = t_other;
1077                 last_non_comment_line = line_number;
1078                 return;
1079               }
1080             else
1081               {
1082                 if (c != EOF)
1083                   do_ungetc (c);
1084                 op->type = t_symbol;
1085                 last_non_comment_line = line_number;
1086                 return;
1087               }
1088           }
1089         }
1090     }
1091 }
1092
1093
1094 void
1095 extract_librep (FILE *f,
1096                 const char *real_filename, const char *logical_filename,
1097                 flag_context_list_table_ty *flag_table,
1098                 msgdomain_list_ty *mdlp)
1099 {
1100   mlp = mdlp->item[0]->messages;
1101
1102   fp = f;
1103   real_file_name = real_filename;
1104   logical_file_name = xstrdup (logical_filename);
1105   line_number = 1;
1106
1107   last_comment_line = -1;
1108   last_non_comment_line = -1;
1109
1110   flag_context_list_table = flag_table;
1111
1112   init_keywords ();
1113
1114   /* Eat tokens until eof is seen.  When read_object returns
1115      due to an unbalanced closing parenthesis, just restart it.  */
1116   do
1117     {
1118       struct object toplevel_object;
1119
1120       read_object (&toplevel_object, null_context);
1121
1122       if (toplevel_object.type == t_eof)
1123         break;
1124
1125       free_object (&toplevel_object);
1126     }
1127   while (!feof (fp));
1128
1129   /* Close scanner.  */
1130   fp = NULL;
1131   real_file_name = NULL;
1132   logical_file_name = NULL;
1133   line_number = 0;
1134 }