gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41 #include "symcat.h"
  42
  43 /* Tokens with SPELL_STRING store their spelling in the token list,
  44    and it's length in the token->val.name.len.  */
  45 enum spell_type
  46 {
  47   SPELL_OPERATOR = 0,
  48   SPELL_CHAR,
  49   SPELL_IDENT,
  50   SPELL_STRING,
  51   SPELL_NONE
  52 };
  53
  54 struct token_spelling
  55 {
  56   enum spell_type category;
  57   const unsigned char *name;
  58 };
  59
  60 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  61                                              U":>", U"<%", U"%>"};
  62
  63 #define OP(e, s) { SPELL_OPERATOR, U s           },
  64 #define TK(e, s) { s,              U STRINGX (e) },
  65 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  66 #undef OP
  67 #undef TK
  68
  69 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  70 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  71
  72 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
  73 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
  74 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
  75
  76 static int skip_block_comment PARAMS ((cpp_reader *));
  77 static int skip_line_comment PARAMS ((cpp_reader *));
  78 static void adjust_column PARAMS ((cpp_reader *));
  79 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  80 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
  81 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  82 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  83 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  84 static void unterminated PARAMS ((cpp_reader *, int));
  85 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  86 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  87 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
  88 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
  89 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  90
  91 static cpp_chunk *new_chunk PARAMS ((unsigned int));
  92 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
  93
  94 /* Utility routine:
  95
  96    Compares, the token TOKEN to the NUL-terminated string STRING.
  97    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  98
  99 int
 100 cpp_ideq (token, string)
 101      const cpp_token *token;
 102      const char *string;
 103 {
 104   if (token->type != CPP_NAME)
 105     return 0;
 106
 107   return !ustrcmp (token->val.node->name, (const U_CHAR *) string);
 108 }
 109
 110 /* Call when meeting a newline.  Returns the character after the newline
 111    (or carriage-return newline combination), or EOF.  */
 112 static cppchar_t
 113 handle_newline (buffer, newline_char)
 114      cpp_buffer *buffer;
 115      cppchar_t newline_char;
 116 {
 117   cppchar_t next = EOF;
 118
 119   buffer->col_adjust = 0;
 120   buffer->lineno++;
 121   buffer->line_base = buffer->cur;
 122
 123   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 124   if (buffer->cur < buffer->rlimit)
 125     {
 126       next = *buffer->cur++;
 127       if (next + newline_char == '\r' + '\n')
 128         {
 129           buffer->line_base = buffer->cur;
 130           if (buffer->cur < buffer->rlimit)
 131             next = *buffer->cur++;
 132           else
 133             next = EOF;
 134         }
 135     }
 136
 137   buffer->read_ahead = next;
 138   return next;
 139 }
 140
 141 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 142    encountered.  It warns if necessary, and returns true if the
 143    trigraph should be honoured.  FROM_CHAR is the third character of a
 144    trigraph, and presumed to be the previous character for position
 145    reporting.  */
 146 static int
 147 trigraph_ok (pfile, from_char)
 148      cpp_reader *pfile;
 149      cppchar_t from_char;
 150 {
 151   int accept = CPP_OPTION (pfile, trigraphs);
 152
 153   /* Don't warn about trigraphs in comments.  */
 154   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 155     {
 156       cpp_buffer *buffer = pfile->buffer;
 157       if (accept)
 158         cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
 159                                "trigraph ??%c converted to %c",
 160                                (int) from_char,
 161                                (int) _cpp_trigraph_map[from_char]);
 162       else
 163         cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
 164                                "trigraph ??%c ignored", (int) from_char);
 165     }
 166
 167   return accept;
 168 }
 169
 170 /* Assumes local variables buffer and result.  */
 171 #define ACCEPT_CHAR(t) \
 172   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 173
 174 /* When we move to multibyte character sets, add to these something
 175    that saves and restores the state of the multibyte conversion
 176    library.  This probably involves saving and restoring a "cookie".
 177    In the case of glibc it is an 8-byte structure, so is not a high
 178    overhead operation.  In any case, it's out of the fast path.  */
 179 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 180 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 181
 182 /* Skips any escaped newlines introduced by NEXT, which is either a
 183    '?' or a '\\'.  Returns the next character, which will also have
 184    been placed in buffer->read_ahead.  This routine performs
 185    preprocessing stages 1 and 2 of the ISO C standard.  */
 186 static cppchar_t
 187 skip_escaped_newlines (buffer, next)
 188      cpp_buffer *buffer;
 189      cppchar_t next;
 190 {
 191   /* Only do this if we apply stages 1 and 2.  */
 192   if (!buffer->from_stage3)
 193     {
 194       cppchar_t next1;
 195       const unsigned char *saved_cur;
 196       int space;
 197
 198       do
 199         {
 200           if (buffer->cur == buffer->rlimit)
 201             break;
 202
 203           SAVE_STATE ();
 204           if (next == '?')
 205             {
 206               next1 = *buffer->cur++;
 207               if (next1 != '?' || buffer->cur == buffer->rlimit)
 208                 {
 209                   RESTORE_STATE ();
 210                   break;
 211                 }
 212
 213               next1 = *buffer->cur++;
 214               if (!_cpp_trigraph_map[next1]
 215                   || !trigraph_ok (buffer->pfile, next1))
 216                 {
 217                   RESTORE_STATE ();
 218                   break;
 219                 }
 220
 221               /* We have a full trigraph here.  */
 222               next = _cpp_trigraph_map[next1];
 223               if (next != '\\' || buffer->cur == buffer->rlimit)
 224                 break;
 225               SAVE_STATE ();
 226             }
 227
 228           /* We have a backslash, and room for at least one more character.  */
 229           space = 0;
 230           do
 231             {
 232               next1 = *buffer->cur++;
 233               if (!is_nvspace (next1))
 234                 break;
 235               space = 1;
 236             }
 237           while (buffer->cur < buffer->rlimit);
 238
 239           if (!is_vspace (next1))
 240             {
 241               RESTORE_STATE ();
 242               break;
 243             }
 244
 245           if (space)
 246             cpp_warning (buffer->pfile,
 247                          "backslash and newline separated by space");
 248
 249           next = handle_newline (buffer, next1);
 250           if (next == EOF)
 251             cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
 252         }
 253       while (next == '\\' || next == '?');
 254     }
 255
 256   buffer->read_ahead = next;
 257   return next;
 258 }
 259
 260 /* Obtain the next character, after trigraph conversion and skipping
 261    an arbitrary string of escaped newlines.  The common case of no
 262    trigraphs or escaped newlines falls through quickly.  */
 263 static cppchar_t
 264 get_effective_char (buffer)
 265      cpp_buffer *buffer;
 266 {
 267   cppchar_t next = EOF;
 268
 269   if (buffer->cur < buffer->rlimit)
 270     {
 271       next = *buffer->cur++;
 272
 273       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 274          can introduce escaped newlines, which we want to skip, or
 275          UCNs, which, depending upon lexer state, we will handle in
 276          the future.  */
 277       if (next == '?' || next == '\\')
 278         next = skip_escaped_newlines (buffer, next);
 279     }
 280
 281   buffer->read_ahead = next;
 282   return next;
 283 }
 284
 285 /* Skip a C-style block comment.  We find the end of the comment by
 286    seeing if an asterisk is before every '/' we encounter.  Returns
 287    non-zero if comment terminated by EOF, zero otherwise.  */
 288 static int
 289 skip_block_comment (pfile)
 290      cpp_reader *pfile;
 291 {
 292   cpp_buffer *buffer = pfile->buffer;
 293   cppchar_t c = EOF, prevc = EOF;
 294
 295   pfile->state.lexing_comment = 1;
 296   while (buffer->cur != buffer->rlimit)
 297     {
 298       prevc = c, c = *buffer->cur++;
 299
 300     next_char:
 301       /* FIXME: For speed, create a new character class of characters
 302          of interest inside block comments.  */
 303       if (c == '?' || c == '\\')
 304         c = skip_escaped_newlines (buffer, c);
 305
 306       /* People like decorating comments with '*', so check for '/'
 307          instead for efficiency.  */
 308       if (c == '/')
 309         {
 310           if (prevc == '*')
 311             break;
 312
 313           /* Warn about potential nested comments, but not if the '/'
 314              comes immediately before the true comment delimeter.
 315              Don't bother to get it right across escaped newlines.  */
 316           if (CPP_OPTION (pfile, warn_comments)
 317               && buffer->cur != buffer->rlimit)
 318             {
 319               prevc = c, c = *buffer->cur++;
 320               if (c == '*' && buffer->cur != buffer->rlimit)
 321                 {
 322                   prevc = c, c = *buffer->cur++;
 323                   if (c != '/')
 324                     cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
 325                                            CPP_BUF_COL (buffer),
 326                                            "\"/*\" within comment");
 327                 }
 328               goto next_char;
 329             }
 330         }
 331       else if (is_vspace (c))
 332         {
 333           prevc = c, c = handle_newline (buffer, c);
 334           goto next_char;
 335         }
 336       else if (c == '\t')
 337         adjust_column (pfile);
 338     }
 339
 340   pfile->state.lexing_comment = 0;
 341   buffer->read_ahead = EOF;
 342   return c != '/' || prevc != '*';
 343 }
 344
 345 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 346    non-zero if a multiline comment.  The following new line, if any,
 347    is left in buffer->read_ahead.  */
 348 static int
 349 skip_line_comment (pfile)
 350      cpp_reader *pfile;
 351 {
 352   cpp_buffer *buffer = pfile->buffer;
 353   unsigned int orig_lineno = buffer->lineno;
 354   cppchar_t c;
 355
 356   pfile->state.lexing_comment = 1;
 357   do
 358     {
 359       c = EOF;
 360       if (buffer->cur == buffer->rlimit)
 361         break;
 362
 363       c = *buffer->cur++;
 364       if (c == '?' || c == '\\')
 365         c = skip_escaped_newlines (buffer, c);
 366     }
 367   while (!is_vspace (c));
 368
 369   pfile->state.lexing_comment = 0;
 370   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 371   return orig_lineno != buffer->lineno;
 372 }
 373
 374 /* pfile->buffer->cur is one beyond the \t character.  Update
 375    col_adjust so we track the column correctly.  */
 376 static void
 377 adjust_column (pfile)
 378      cpp_reader *pfile;
 379 {
 380   cpp_buffer *buffer = pfile->buffer;
 381   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 382
 383   /* Round it up to multiple of the tabstop, but subtract 1 since the
 384      tab itself occupies a character position.  */
 385   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 386                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 387 }
 388
 389 /* Skips whitespace, saving the next non-whitespace character.
 390    Adjusts pfile->col_adjust to account for tabs.  Without this,
 391    tokens might be assigned an incorrect column.  */
 392 static void
 393 skip_whitespace (pfile, c)
 394      cpp_reader *pfile;
 395      cppchar_t c;
 396 {
 397   cpp_buffer *buffer = pfile->buffer;
 398   unsigned int warned = 0;
 399
 400   do
 401     {
 402       /* Horizontal space always OK.  */
 403       if (c == ' ')
 404         ;
 405       else if (c == '\t')
 406         adjust_column (pfile);
 407       /* Just \f \v or \0 left.  */
 408       else if (c == '\0')
 409         {
 410           if (!warned)
 411             {
 412               cpp_warning (pfile, "null character(s) ignored");
 413               warned = 1;
 414             }
 415         }
 416       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 417         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
 418                                CPP_BUF_COL (buffer),
 419                                "%s in preprocessing directive",
 420                                c == '\f' ? "form feed" : "vertical tab");
 421
 422       c = EOF;
 423       if (buffer->cur == buffer->rlimit)
 424         break;
 425       c = *buffer->cur++;
 426     }
 427   /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
 428   while (is_nvspace (c));
 429
 430   /* Remember the next character.  */
 431   buffer->read_ahead = c;
 432 }
 433
 434 /* See if the characters of a number token are valid in a name (no
 435    '.', '+' or '-').  */
 436 static int
 437 name_p (pfile, string)
 438      cpp_reader *pfile;
 439      const cpp_string *string;
 440 {
 441   unsigned int i;
 442
 443   for (i = 0; i < string->len; i++)
 444     if (!is_idchar (string->text[i]))
 445       return 0;
 446
 447   return 1;
 448 }
 449
 450 /* Parse an identifier, skipping embedded backslash-newlines.
 451    Calculate the hash value of the token while parsing, for improved
 452    performance.  The hashing algorithm *must* match cpp_lookup().  */
 453
 454 static cpp_hashnode *
 455 parse_identifier (pfile, c)
 456      cpp_reader *pfile;
 457      cppchar_t c;
 458 {
 459   cpp_hashnode *result;
 460   cpp_buffer *buffer = pfile->buffer;
 461   unsigned char *dest, *limit;
 462   unsigned int r = 0, saw_dollar = 0;
 463
 464   dest = POOL_FRONT (&pfile->ident_pool);
 465   limit = POOL_LIMIT (&pfile->ident_pool);
 466
 467   do
 468     {
 469       do
 470         {
 471           /* Need room for terminating null.  */
 472           if (dest + 1 >= limit)
 473             limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
 474
 475           *dest++ = c;
 476           r = HASHSTEP (r, c);
 477
 478           if (c == '$')
 479             saw_dollar++;
 480
 481           c = EOF;
 482           if (buffer->cur == buffer->rlimit)
 483             break;
 484
 485           c = *buffer->cur++;
 486         }
 487       while (is_idchar (c));
 488
 489       /* Potential escaped newline?  */
 490       if (c != '?' && c != '\\')
 491         break;
 492       c = skip_escaped_newlines (buffer, c);
 493     }
 494   while (is_idchar (c));
 495
 496   /* Remember the next character.  */
 497   buffer->read_ahead = c;
 498
 499   /* $ is not a identifier character in the standard, but is commonly
 500      accepted as an extension.  Don't warn about it in skipped
 501      conditional blocks.  */
 502   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
 503     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 504
 505   /* Identifiers are null-terminated.  */
 506   *dest = '\0';
 507
 508   /* This routine commits the memory if necessary.  */
 509   result = _cpp_lookup_with_hash (pfile,
 510                                   dest - POOL_FRONT (&pfile->ident_pool), r);
 511
 512   /* Some identifiers require diagnostics when lexed.  */
 513   if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
 514     {
 515       /* It is allowed to poison the same identifier twice.  */
 516       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 517         cpp_error (pfile, "attempt to use poisoned \"%s\"", result->name);
 518
 519       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 520          replacement list of a variable-arguments macro.  */
 521       if (result == pfile->spec_nodes.n__VA_ARGS__
 522           && !pfile->state.va_args_ok)
 523         cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variable-argument macro");
 524     }
 525
 526   return result;
 527 }
 528
 529 /* Parse a number, skipping embedded backslash-newlines.  */
 530 static void
 531 parse_number (pfile, number, c, leading_period)
 532      cpp_reader *pfile;
 533      cpp_string *number;
 534      cppchar_t c;
 535      int leading_period;
 536 {
 537   cpp_buffer *buffer = pfile->buffer;
 538   cpp_pool *pool = pfile->string_pool;
 539   unsigned char *dest, *limit;
 540
 541   dest = POOL_FRONT (pool);
 542   limit = POOL_LIMIT (pool);
 543
 544   /* Place a leading period.  */
 545   if (leading_period)
 546     {
 547       if (dest >= limit)
 548         limit = _cpp_next_chunk (pool, 0, &dest);
 549       *dest++ = '.';
 550     }
 551
 552   do
 553     {
 554       do
 555         {
 556           /* Need room for terminating null.  */
 557           if (dest + 1 >= limit)
 558             limit = _cpp_next_chunk (pool, 0, &dest);
 559           *dest++ = c;
 560
 561           c = EOF;
 562           if (buffer->cur == buffer->rlimit)
 563             break;
 564
 565           c = *buffer->cur++;
 566         }
 567       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 568
 569       /* Potential escaped newline?  */
 570       if (c != '?' && c != '\\')
 571         break;
 572       c = skip_escaped_newlines (buffer, c);
 573     }
 574   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 575
 576   /* Remember the next character.  */
 577   buffer->read_ahead = c;
 578
 579   /* Null-terminate the number.  */
 580   *dest = '\0';
 581
 582   number->text = POOL_FRONT (pool);
 583   number->len = dest - number->text;
 584   POOL_COMMIT (pool, number->len + 1);
 585 }
 586
 587 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 588 static void
 589 unterminated (pfile, term)
 590      cpp_reader *pfile;
 591      int term;
 592 {
 593   cpp_error (pfile, "missing terminating %c character", term);
 594
 595   if (term == '\"' && pfile->mlstring_pos.line
 596       && pfile->mlstring_pos.line != pfile->lexer_pos.line)
 597     {
 598       cpp_error_with_line (pfile, pfile->mlstring_pos.line,
 599                            pfile->mlstring_pos.col,
 600                            "possible start of unterminated string literal");
 601       pfile->mlstring_pos.line = 0;
 602     }
 603 }
 604
 605 /* Subroutine of parse_string.  */
 606 static int
 607 unescaped_terminator_p (pfile, dest)
 608      cpp_reader *pfile;
 609      const unsigned char *dest;
 610 {
 611   const unsigned char *start, *temp;
 612
 613   /* In #include-style directives, terminators are not escapeable.  */
 614   if (pfile->state.angled_headers)
 615     return 1;
 616
 617   start = POOL_FRONT (pfile->string_pool);
 618
 619   /* An odd number of consecutive backslashes represents an escaped
 620      terminator.  */
 621   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 622     ;
 623
 624   return ((dest - temp) & 1) == 0;
 625 }
 626
 627 /* Parses a string, character constant, or angle-bracketed header file
 628    name.  Handles embedded trigraphs and escaped newlines.
 629
 630    Multi-line strings are allowed, but they are deprecated within
 631    directives.  */
 632 static void
 633 parse_string (pfile, token, terminator)
 634      cpp_reader *pfile;
 635      cpp_token *token;
 636      cppchar_t terminator;
 637 {
 638   cpp_buffer *buffer = pfile->buffer;
 639   cpp_pool *pool = pfile->string_pool;
 640   unsigned char *dest, *limit;
 641   cppchar_t c;
 642   unsigned int nulls = 0;
 643
 644   dest = POOL_FRONT (pool);
 645   limit = POOL_LIMIT (pool);
 646
 647   for (;;)
 648     {
 649       if (buffer->cur == buffer->rlimit)
 650         {
 651           c = EOF;
 652           unterminated (pfile, terminator);
 653           break;
 654         }
 655       c = *buffer->cur++;
 656
 657     have_char:
 658       /* Handle trigraphs, escaped newlines etc.  */
 659       if (c == '?' || c == '\\')
 660         c = skip_escaped_newlines (buffer, c);
 661
 662       if (c == terminator && unescaped_terminator_p (pfile, dest))
 663         {
 664           c = EOF;
 665           break;
 666         }
 667       else if (is_vspace (c))
 668         {
 669           /* In assembly language, silently terminate string and
 670              character literals at end of line.  This is a kludge
 671              around not knowing where comments are.  */
 672           if (CPP_OPTION (pfile, lang_asm) && terminator != '>')
 673             break;
 674
 675           /* Character constants and header names may not extend over
 676              multiple lines.  In Standard C, neither may strings.
 677              Unfortunately, we accept multiline strings as an
 678              extension, except in #include family directives.  */
 679           if (terminator != '"' || pfile->state.angled_headers)
 680             {
 681               unterminated (pfile, terminator);
 682               break;
 683             }
 684
 685           if (pfile->mlstring_pos.line == 0)
 686             {
 687               pfile->mlstring_pos = pfile->lexer_pos;
 688               if (CPP_PEDANTIC (pfile))
 689                 cpp_pedwarn (pfile, "multi-line string constant");
 690             }
 691
 692           handle_newline (buffer, c);  /* Stores to read_ahead.  */
 693           c = '\n';
 694         }
 695       else if (c == '\0')
 696         {
 697           if (nulls++ == 0)
 698             cpp_warning (pfile, "null character(s) preserved in literal");
 699         }
 700
 701       /* No terminating null for strings - they could contain nulls.  */
 702       if (dest >= limit)
 703         limit = _cpp_next_chunk (pool, 0, &dest);
 704       *dest++ = c;
 705
 706       /* If we had a new line, the next character is in read_ahead.  */
 707       if (c != '\n')
 708         continue;
 709       c = buffer->read_ahead;
 710       if (c != EOF)
 711         goto have_char;
 712     }
 713
 714   /* Remember the next character.  */
 715   buffer->read_ahead = c;
 716
 717   token->val.str.text = POOL_FRONT (pool);
 718   token->val.str.len = dest - token->val.str.text;
 719   POOL_COMMIT (pool, token->val.str.len);
 720 }
 721
 722 /* The stored comment includes the comment start and any terminator.  */
 723 static void
 724 save_comment (pfile, token, from)
 725      cpp_reader *pfile;
 726      cpp_token *token;
 727      const unsigned char *from;
 728 {
 729   unsigned char *buffer;
 730   unsigned int len;
 731
 732   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 733   /* C++ comments probably (not definitely) have moved past a new
 734      line, which we don't want to save in the comment.  */
 735   if (pfile->buffer->read_ahead != EOF)
 736     len--;
 737   buffer = _cpp_pool_alloc (pfile->string_pool, len);
 738
 739   token->type = CPP_COMMENT;
 740   token->val.str.len = len;
 741   token->val.str.text = buffer;
 742
 743   buffer[0] = '/';
 744   memcpy (buffer + 1, from, len - 1);
 745 }
 746
 747 /* Subroutine of lex_token to handle '%'.  A little tricky, since we
 748    want to avoid stepping back when lexing %:%X.  */
 749 static void
 750 lex_percent (buffer, result)
 751      cpp_buffer *buffer;
 752      cpp_token *result;
 753 {
 754   cppchar_t c;
 755
 756   result->type = CPP_MOD;
 757   /* Parsing %:%X could leave an extra character.  */
 758   if (buffer->extra_char == EOF)
 759     c = get_effective_char (buffer);
 760   else
 761     {
 762       c = buffer->read_ahead = buffer->extra_char;
 763       buffer->extra_char = EOF;
 764     }
 765
 766   if (c == '=')
 767     ACCEPT_CHAR (CPP_MOD_EQ);
 768   else if (CPP_OPTION (buffer->pfile, digraphs))
 769     {
 770       if (c == ':')
 771         {
 772           result->flags |= DIGRAPH;
 773           ACCEPT_CHAR (CPP_HASH);
 774           if (get_effective_char (buffer) == '%')
 775             {
 776               buffer->extra_char = get_effective_char (buffer);
 777               if (buffer->extra_char == ':')
 778                 {
 779                   buffer->extra_char = EOF;
 780                   ACCEPT_CHAR (CPP_PASTE);
 781                 }
 782               else
 783                 /* We'll catch the extra_char when we're called back.  */
 784                 buffer->read_ahead = '%';
 785             }
 786         }
 787       else if (c == '>')
 788         {
 789           result->flags |= DIGRAPH;
 790           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 791         }
 792     }
 793 }
 794
 795 /* Subroutine of lex_token to handle '.'.  This is tricky, since we
 796    want to avoid stepping back when lexing '...' or '.123'.  In the
 797    latter case we should also set a flag for parse_number.  */
 798 static void
 799 lex_dot (pfile, result)
 800      cpp_reader *pfile;
 801      cpp_token *result;
 802 {
 803   cpp_buffer *buffer = pfile->buffer;
 804   cppchar_t c;
 805
 806   /* Parsing ..X could leave an extra character.  */
 807   if (buffer->extra_char == EOF)
 808     c = get_effective_char (buffer);
 809   else
 810     {
 811       c = buffer->read_ahead = buffer->extra_char;
 812       buffer->extra_char = EOF;
 813     }
 814
 815   /* All known character sets have 0...9 contiguous.  */
 816   if (c >= '0' && c <= '9')
 817     {
 818       result->type = CPP_NUMBER;
 819       parse_number (pfile, &result->val.str, c, 1);
 820     }
 821   else
 822     {
 823       result->type = CPP_DOT;
 824       if (c == '.')
 825         {
 826           buffer->extra_char = get_effective_char (buffer);
 827           if (buffer->extra_char == '.')
 828             {
 829               buffer->extra_char = EOF;
 830               ACCEPT_CHAR (CPP_ELLIPSIS);
 831             }
 832           else
 833             /* We'll catch the extra_char when we're called back.  */
 834             buffer->read_ahead = '.';
 835         }
 836       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 837         ACCEPT_CHAR (CPP_DOT_STAR);
 838     }
 839 }
 840
 841 void
 842 _cpp_lex_token (pfile, result)
 843      cpp_reader *pfile;
 844      cpp_token *result;
 845 {
 846   cppchar_t c;
 847   cpp_buffer *buffer;
 848   const unsigned char *comment_start;
 849   unsigned char bol = pfile->state.next_bol;
 850
 851  done_directive:
 852   buffer = pfile->buffer;
 853   pfile->state.next_bol = 0;
 854   result->flags = 0;
 855  next_char:
 856   pfile->lexer_pos.line = buffer->lineno;
 857  next_char2:
 858   pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
 859
 860   c = buffer->read_ahead;
 861   if (c == EOF && buffer->cur < buffer->rlimit)
 862     {
 863       c = *buffer->cur++;
 864       pfile->lexer_pos.col++;
 865     }
 866
 867  do_switch:
 868   buffer->read_ahead = EOF;
 869   switch (c)
 870     {
 871     case EOF:
 872       /* Non-empty files should end in a newline.  Ignore for command
 873          line and _Pragma buffers.  */
 874       if (pfile->lexer_pos.col != 0 && !buffer->from_stage3)
 875         cpp_pedwarn (pfile, "no newline at end of file");
 876       pfile->state.next_bol = 1;
 877       result->type = CPP_EOF;
 878       break;
 879
 880     case ' ': case '\t': case '\f': case '\v': case '\0':
 881       skip_whitespace (pfile, c);
 882       result->flags |= PREV_WHITE;
 883       goto next_char2;
 884
 885     case '\n': case '\r':
 886       if (!pfile->state.in_directive)
 887         {
 888           handle_newline (buffer, c);
 889           bol = 1;
 890           pfile->lexer_pos.output_line = buffer->lineno;
 891
 892           /* Newlines in arguments are white space (6.10.3.10).
 893              Otherwise, clear any white space flag.  */
 894           if (pfile->state.parsing_args)
 895             result->flags |= PREV_WHITE;
 896           else
 897             result->flags &= ~PREV_WHITE;
 898           goto next_char;
 899         }
 900
 901       /* Don't let directives spill over to the next line.  */
 902       buffer->read_ahead = c;
 903       pfile->state.next_bol = 1;
 904       result->type = CPP_EOF;
 905       break;
 906
 907     case '?':
 908     case '\\':
 909       /* These could start an escaped newline, or '?' a trigraph.  Let
 910          skip_escaped_newlines do all the work.  */
 911       {
 912         unsigned int lineno = buffer->lineno;
 913
 914         c = skip_escaped_newlines (buffer, c);
 915         if (lineno != buffer->lineno)
 916           /* We had at least one escaped newline of some sort, and the
 917              next character is in buffer->read_ahead.  Update the
 918              token's line and column.  */
 919             goto next_char;
 920
 921         /* We are either the original '?' or '\\', or a trigraph.  */
 922         result->type = CPP_QUERY;
 923         buffer->read_ahead = EOF;
 924         if (c == '\\')
 925           goto random_char;
 926         else if (c != '?')
 927           goto do_switch;
 928       }
 929       break;
 930
 931     case '0': case '1': case '2': case '3': case '4':
 932     case '5': case '6': case '7': case '8': case '9':
 933       result->type = CPP_NUMBER;
 934       parse_number (pfile, &result->val.str, c, 0);
 935       break;
 936
 937     case '$':
 938       if (!CPP_OPTION (pfile, dollars_in_ident))
 939         goto random_char;
 940       /* Fall through... */
 941
 942     case '_':
 943     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 944     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 945     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 946     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 947     case 'y': case 'z':
 948     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 949     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 950     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 951     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 952     case 'Y': case 'Z':
 953       result->type = CPP_NAME;
 954       result->val.node = parse_identifier (pfile, c);
 955
 956       /* 'L' may introduce wide characters or strings.  */
 957       if (result->val.node == pfile->spec_nodes.n_L)
 958         {
 959           c = buffer->read_ahead; /* For make_string.  */
 960           if (c == '\'' || c == '"')
 961             {
 962               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
 963               goto make_string;
 964             }
 965         }
 966       /* Convert named operators to their proper types.  */
 967       else if (result->val.node->flags & NODE_OPERATOR)
 968         {
 969           result->flags |= NAMED_OP;
 970           result->type = result->val.node->value.operator;
 971         }
 972       break;
 973
 974     case '\'':
 975     case '"':
 976       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
 977     make_string:
 978       parse_string (pfile, result, c);
 979       break;
 980
 981     case '/':
 982       /* A potential block or line comment.  */
 983       comment_start = buffer->cur;
 984       result->type = CPP_DIV;
 985       c = get_effective_char (buffer);
 986       if (c == '=')
 987         ACCEPT_CHAR (CPP_DIV_EQ);
 988       if (c != '/' && c != '*')
 989         break;
 990
 991       if (c == '*')
 992         {
 993           if (skip_block_comment (pfile))
 994             cpp_error_with_line (pfile, pfile->lexer_pos.line,
 995                                  pfile->lexer_pos.col,
 996                                  "unterminated comment");
 997         }
 998       else
 999         {
1000           if (!CPP_OPTION (pfile, cplusplus_comments)
1001               && !CPP_IN_SYSTEM_HEADER (pfile))
1002             break;
1003
1004           /* We silently allow C++ comments in system headers,
1005              irrespective of conformance mode, because lots of
1006              broken systems do that and trying to clean it up in
1007              fixincludes is a nightmare.  */
1008           if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1009               && ! buffer->warned_cplusplus_comments)
1010             {
1011               cpp_pedwarn (pfile,
1012                            "C++ style comments are not allowed in ISO C89");
1013               cpp_pedwarn (pfile,
1014                            "(this will be reported only once per input file)");
1015               buffer->warned_cplusplus_comments = 1;
1016             }
1017
1018           /* Skip_line_comment updates buffer->read_ahead.  */
1019           if (skip_line_comment (pfile))
1020             cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1021                                    pfile->lexer_pos.col,
1022                                    "multi-line comment");
1023         }
1024
1025       /* Skipping the comment has updated buffer->read_ahead.  */
1026       if (!pfile->state.save_comments)
1027         {
1028           result->flags |= PREV_WHITE;
1029           goto next_char;
1030         }
1031
1032       /* Save the comment as a token in its own right.  */
1033       save_comment (pfile, result, comment_start);
1034       break;
1035
1036     case '<':
1037       if (pfile->state.angled_headers)
1038         {
1039           result->type = CPP_HEADER_NAME;
1040           c = '>';              /* terminator.  */
1041           goto make_string;
1042         }
1043
1044       result->type = CPP_LESS;
1045       c = get_effective_char (buffer);
1046       if (c == '=')
1047         ACCEPT_CHAR (CPP_LESS_EQ);
1048       else if (c == '<')
1049         {
1050           ACCEPT_CHAR (CPP_LSHIFT);
1051           if (get_effective_char (buffer) == '=')
1052             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1053         }
1054       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1055         {
1056           ACCEPT_CHAR (CPP_MIN);
1057           if (get_effective_char (buffer) == '=')
1058             ACCEPT_CHAR (CPP_MIN_EQ);
1059         }
1060       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1061         {
1062           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1063           result->flags |= DIGRAPH;
1064         }
1065       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1066         {
1067           ACCEPT_CHAR (CPP_OPEN_BRACE);
1068           result->flags |= DIGRAPH;
1069         }
1070       break;
1071
1072     case '>':
1073       result->type = CPP_GREATER;
1074       c = get_effective_char (buffer);
1075       if (c == '=')
1076         ACCEPT_CHAR (CPP_GREATER_EQ);
1077       else if (c == '>')
1078         {
1079           ACCEPT_CHAR (CPP_RSHIFT);
1080           if (get_effective_char (buffer) == '=')
1081             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1082         }
1083       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1084         {
1085           ACCEPT_CHAR (CPP_MAX);
1086           if (get_effective_char (buffer) == '=')
1087             ACCEPT_CHAR (CPP_MAX_EQ);
1088         }
1089       break;
1090
1091     case '%':
1092       lex_percent (buffer, result);
1093       if (result->type == CPP_HASH)
1094         goto do_hash;
1095       break;
1096
1097     case '.':
1098       lex_dot (pfile, result);
1099       break;
1100
1101     case '+':
1102       result->type = CPP_PLUS;
1103       c = get_effective_char (buffer);
1104       if (c == '=')
1105         ACCEPT_CHAR (CPP_PLUS_EQ);
1106       else if (c == '+')
1107         ACCEPT_CHAR (CPP_PLUS_PLUS);
1108       break;
1109
1110     case '-':
1111       result->type = CPP_MINUS;
1112       c = get_effective_char (buffer);
1113       if (c == '>')
1114         {
1115           ACCEPT_CHAR (CPP_DEREF);
1116           if (CPP_OPTION (pfile, cplusplus)
1117               && get_effective_char (buffer) == '*')
1118             ACCEPT_CHAR (CPP_DEREF_STAR);
1119         }
1120       else if (c == '=')
1121         ACCEPT_CHAR (CPP_MINUS_EQ);
1122       else if (c == '-')
1123         ACCEPT_CHAR (CPP_MINUS_MINUS);
1124       break;
1125
1126     case '*':
1127       result->type = CPP_MULT;
1128       if (get_effective_char (buffer) == '=')
1129         ACCEPT_CHAR (CPP_MULT_EQ);
1130       break;
1131
1132     case '=':
1133       result->type = CPP_EQ;
1134       if (get_effective_char (buffer) == '=')
1135         ACCEPT_CHAR (CPP_EQ_EQ);
1136       break;
1137
1138     case '!':
1139       result->type = CPP_NOT;
1140       if (get_effective_char (buffer) == '=')
1141         ACCEPT_CHAR (CPP_NOT_EQ);
1142       break;
1143
1144     case '&':
1145       result->type = CPP_AND;
1146       c = get_effective_char (buffer);
1147       if (c == '=')
1148         ACCEPT_CHAR (CPP_AND_EQ);
1149       else if (c == '&')
1150         ACCEPT_CHAR (CPP_AND_AND);
1151       break;
1152
1153     case '#':
1154       c = buffer->extra_char;   /* Can be set by error condition below.  */
1155       if (c != EOF)
1156         {
1157           buffer->read_ahead = c;
1158           buffer->extra_char = EOF;
1159         }
1160       else
1161         c = get_effective_char (buffer);
1162
1163       if (c == '#')
1164         {
1165           ACCEPT_CHAR (CPP_PASTE);
1166           break;
1167         }
1168
1169       result->type = CPP_HASH;
1170     do_hash:
1171       if (bol)
1172         {
1173           if (pfile->state.parsing_args)
1174             {
1175               /* 6.10.3 paragraph 11: If there are sequences of
1176                  preprocessing tokens within the list of arguments that
1177                  would otherwise act as preprocessing directives, the
1178                  behavior is undefined.
1179
1180                  This implementation will report a hard error, terminate
1181                  the macro invocation, and proceed to process the
1182                  directive.  */
1183               cpp_error (pfile,
1184                          "directives may not be used inside a macro argument");
1185
1186               /* Put a '#' in lookahead, return CPP_EOF for parse_arg.  */
1187               buffer->extra_char = buffer->read_ahead;
1188               buffer->read_ahead = '#';
1189               pfile->state.next_bol = 1;
1190               result->type = CPP_EOF;
1191
1192               /* Get whitespace right - newline_in_args sets it.  */
1193               if (pfile->lexer_pos.col == 1)
1194                 result->flags &= ~PREV_WHITE;
1195             }
1196           else
1197             {
1198               /* This is the hash introducing a directive.  */
1199               if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1200                 goto done_directive; /* bol still 1.  */
1201               /* This is in fact an assembler #.  */
1202             }
1203         }
1204       break;
1205
1206     case '|':
1207       result->type = CPP_OR;
1208       c = get_effective_char (buffer);
1209       if (c == '=')
1210         ACCEPT_CHAR (CPP_OR_EQ);
1211       else if (c == '|')
1212         ACCEPT_CHAR (CPP_OR_OR);
1213       break;
1214
1215     case '^':
1216       result->type = CPP_XOR;
1217       if (get_effective_char (buffer) == '=')
1218         ACCEPT_CHAR (CPP_XOR_EQ);
1219       break;
1220
1221     case ':':
1222       result->type = CPP_COLON;
1223       c = get_effective_char (buffer);
1224       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1225         ACCEPT_CHAR (CPP_SCOPE);
1226       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1227         {
1228           result->flags |= DIGRAPH;
1229           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1230         }
1231       break;
1232
1233     case '~': result->type = CPP_COMPL; break;
1234     case ',': result->type = CPP_COMMA; break;
1235     case '(': result->type = CPP_OPEN_PAREN; break;
1236     case ')': result->type = CPP_CLOSE_PAREN; break;
1237     case '[': result->type = CPP_OPEN_SQUARE; break;
1238     case ']': result->type = CPP_CLOSE_SQUARE; break;
1239     case '{': result->type = CPP_OPEN_BRACE; break;
1240     case '}': result->type = CPP_CLOSE_BRACE; break;
1241     case ';': result->type = CPP_SEMICOLON; break;
1242
1243     case '@':
1244       if (CPP_OPTION (pfile, objc))
1245         {
1246           /* In Objective C, '@' may begin keywords or strings, like
1247              @keyword or @"string".  It would be nice to call
1248              get_effective_char here and test the result.  However, we
1249              would then need to pass 2 characters to parse_identifier,
1250              making it ugly and slowing down its main loop.  Instead,
1251              we assume we have an identifier, and recover if not.  */
1252           result->type = CPP_NAME;
1253           result->val.node = parse_identifier (pfile, c);
1254           if (result->val.node->length != 1)
1255             break;
1256
1257           /* OK, so it wasn't an identifier.  Maybe a string?  */
1258           if (buffer->read_ahead == '"')
1259             {
1260               c = '"';
1261               ACCEPT_CHAR (CPP_OSTRING);
1262               goto make_string;
1263             }
1264         }
1265       goto random_char;
1266
1267     random_char:
1268     default:
1269       result->type = CPP_OTHER;
1270       result->val.c = c;
1271       break;
1272     }
1273 }
1274
1275 /* An upper bound on the number of bytes needed to spell a token,
1276    including preceding whitespace.  */
1277 unsigned int
1278 cpp_token_len (token)
1279      const cpp_token *token;
1280 {
1281   unsigned int len;
1282
1283   switch (TOKEN_SPELL (token))
1284     {
1285     default:            len = 0;                        break;
1286     case SPELL_STRING:  len = token->val.str.len;       break;
1287     case SPELL_IDENT:   len = token->val.node->length;  break;
1288     }
1289   /* 1 for whitespace, 4 for comment delimeters.  */
1290   return len + 5;
1291 }
1292
1293 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1294    already contain the enough space to hold the token's spelling.
1295    Returns a pointer to the character after the last character
1296    written.  */
1297 unsigned char *
1298 cpp_spell_token (pfile, token, buffer)
1299      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1300      const cpp_token *token;
1301      unsigned char *buffer;
1302 {
1303   switch (TOKEN_SPELL (token))
1304     {
1305     case SPELL_OPERATOR:
1306       {
1307         const unsigned char *spelling;
1308         unsigned char c;
1309
1310         if (token->flags & DIGRAPH)
1311           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1312         else if (token->flags & NAMED_OP)
1313           goto spell_ident;
1314         else
1315           spelling = TOKEN_NAME (token);
1316
1317         while ((c = *spelling++) != '\0')
1318           *buffer++ = c;
1319       }
1320       break;
1321
1322     case SPELL_IDENT:
1323       spell_ident:
1324       memcpy (buffer, token->val.node->name, token->val.node->length);
1325       buffer += token->val.node->length;
1326       break;
1327
1328     case SPELL_STRING:
1329       {
1330         int left, right, tag;
1331         switch (token->type)
1332           {
1333           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1334           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1335           case CPP_OSTRING:     left = '"';  right = '"';  tag = '@';  break;
1336           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1337           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1338           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1339           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1340           }
1341         if (tag) *buffer++ = tag;
1342         if (left) *buffer++ = left;
1343         memcpy (buffer, token->val.str.text, token->val.str.len);
1344         buffer += token->val.str.len;
1345         if (right) *buffer++ = right;
1346       }
1347       break;
1348
1349     case SPELL_CHAR:
1350       *buffer++ = token->val.c;
1351       break;
1352
1353     case SPELL_NONE:
1354       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1355       break;
1356     }
1357
1358   return buffer;
1359 }
1360
1361 /* Returns a token as a null-terminated string.  The string is
1362    temporary, and automatically freed later.  Useful for diagnostics.  */
1363 unsigned char *
1364 cpp_token_as_text (pfile, token)
1365      cpp_reader *pfile;
1366      const cpp_token *token;
1367 {
1368   unsigned int len = cpp_token_len (token);
1369   unsigned char *start = _cpp_pool_alloc (&pfile->temp_string_pool, len), *end;
1370
1371   end = cpp_spell_token (pfile, token, start);
1372   end[0] = '\0';
1373
1374   return start;
1375 }
1376
1377 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1378 const char *
1379 cpp_type2name (type)
1380      enum cpp_ttype type;
1381 {
1382   return (const char *) token_spellings[type].name;
1383 }
1384
1385 /* Writes the spelling of token to FP.  Separate from cpp_spell_token
1386    for efficiency - to avoid double-buffering.  Also, outputs a space
1387    if PREV_WHITE is flagged.  */
1388 void
1389 cpp_output_token (token, fp)
1390      const cpp_token *token;
1391      FILE *fp;
1392 {
1393   if (token->flags & PREV_WHITE)
1394     putc (' ', fp);
1395
1396   switch (TOKEN_SPELL (token))
1397     {
1398     case SPELL_OPERATOR:
1399       {
1400         const unsigned char *spelling;
1401
1402         if (token->flags & DIGRAPH)
1403           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1404         else if (token->flags & NAMED_OP)
1405           goto spell_ident;
1406         else
1407           spelling = TOKEN_NAME (token);
1408
1409         ufputs (spelling, fp);
1410       }
1411       break;
1412
1413     spell_ident:
1414     case SPELL_IDENT:
1415       ufputs (token->val.node->name, fp);
1416     break;
1417
1418     case SPELL_STRING:
1419       {
1420         int left, right, tag;
1421         switch (token->type)
1422           {
1423           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1424           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1425           case CPP_OSTRING:     left = '"';  right = '"';  tag = '@';  break;
1426           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1427           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1428           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1429           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1430           }
1431         if (tag) putc (tag, fp);
1432         if (left) putc (left, fp);
1433         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1434         if (right) putc (right, fp);
1435       }
1436       break;
1437
1438     case SPELL_CHAR:
1439       putc (token->val.c, fp);
1440       break;
1441
1442     case SPELL_NONE:
1443       /* An error, most probably.  */
1444       break;
1445     }
1446 }
1447
1448 /* Compare two tokens.  */
1449 int
1450 _cpp_equiv_tokens (a, b)
1451      const cpp_token *a, *b;
1452 {
1453   if (a->type == b->type && a->flags == b->flags)
1454     switch (TOKEN_SPELL (a))
1455       {
1456       default:                  /* Keep compiler happy.  */
1457       case SPELL_OPERATOR:
1458         return 1;
1459       case SPELL_CHAR:
1460         return a->val.c == b->val.c; /* Character.  */
1461       case SPELL_NONE:
1462         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1463       case SPELL_IDENT:
1464         return a->val.node == b->val.node;
1465       case SPELL_STRING:
1466         return (a->val.str.len == b->val.str.len
1467                 && !memcmp (a->val.str.text, b->val.str.text,
1468                             a->val.str.len));
1469       }
1470
1471   return 0;
1472 }
1473
1474 #if 0
1475 /* Compare two token lists.  */
1476 int
1477 _cpp_equiv_toklists (a, b)
1478      const struct toklist *a, *b;
1479 {
1480   unsigned int i, count;
1481
1482   count = a->limit - a->first;
1483   if (count != (b->limit - b->first))
1484     return 0;
1485
1486   for (i = 0; i < count; i++)
1487     if (! _cpp_equiv_tokens (&a->first[i], &b->first[i]))
1488       return 0;
1489
1490   return 1;
1491 }
1492 #endif
1493
1494 /* Determine whether two tokens can be pasted together, and if so,
1495    what the resulting token is.  Returns CPP_EOF if the tokens cannot
1496    be pasted, or the appropriate type for the merged token if they
1497    can.  */
1498 enum cpp_ttype
1499 cpp_can_paste (pfile, token1, token2, digraph)
1500      cpp_reader * pfile;
1501      const cpp_token *token1, *token2;
1502      int* digraph;
1503 {
1504   enum cpp_ttype a = token1->type, b = token2->type;
1505   int cxx = CPP_OPTION (pfile, cplusplus);
1506
1507   /* Treat named operators as if they were ordinary NAMEs.  */
1508   if (token1->flags & NAMED_OP)
1509     a = CPP_NAME;
1510   if (token2->flags & NAMED_OP)
1511     b = CPP_NAME;
1512
1513   if (a <= CPP_LAST_EQ && b == CPP_EQ)
1514     return a + (CPP_EQ_EQ - CPP_EQ);
1515
1516   switch (a)
1517     {
1518     case CPP_GREATER:
1519       if (b == a) return CPP_RSHIFT;
1520       if (b == CPP_QUERY && cxx)        return CPP_MAX;
1521       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
1522       break;
1523     case CPP_LESS:
1524       if (b == a) return CPP_LSHIFT;
1525       if (b == CPP_QUERY && cxx)        return CPP_MIN;
1526       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
1527       if (CPP_OPTION (pfile, digraphs))
1528         {
1529           if (b == CPP_COLON)
1530             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1531           if (b == CPP_MOD)
1532             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
1533         }
1534       break;
1535
1536     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
1537     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
1538     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
1539
1540     case CPP_MINUS:
1541       if (b == a)               return CPP_MINUS_MINUS;
1542       if (b == CPP_GREATER)     return CPP_DEREF;
1543       break;
1544     case CPP_COLON:
1545       if (b == a && cxx)        return CPP_SCOPE;
1546       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1547         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1548       break;
1549
1550     case CPP_MOD:
1551       if (CPP_OPTION (pfile, digraphs))
1552         {
1553           if (b == CPP_GREATER)
1554             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
1555           if (b == CPP_COLON)
1556             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
1557         }
1558       break;
1559     case CPP_DEREF:
1560       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1561       break;
1562     case CPP_DOT:
1563       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1564       if (b == CPP_NUMBER)      return CPP_NUMBER;
1565       break;
1566
1567     case CPP_HASH:
1568       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1569         /* %:%: digraph */
1570         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1571       break;
1572
1573     case CPP_NAME:
1574       if (b == CPP_NAME)        return CPP_NAME;
1575       if (b == CPP_NUMBER
1576           && name_p (pfile, &token2->val.str)) return CPP_NAME;
1577       if (b == CPP_CHAR
1578           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1579       if (b == CPP_STRING
1580           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1581       break;
1582
1583     case CPP_NUMBER:
1584       if (b == CPP_NUMBER)      return CPP_NUMBER;
1585       if (b == CPP_NAME)        return CPP_NUMBER;
1586       if (b == CPP_DOT)         return CPP_NUMBER;
1587       /* Numbers cannot have length zero, so this is safe.  */
1588       if ((b == CPP_PLUS || b == CPP_MINUS)
1589           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1590         return CPP_NUMBER;
1591       break;
1592
1593     case CPP_OTHER:
1594       if (CPP_OPTION (pfile, objc) && token1->val.c == '@')
1595         {
1596           if (b == CPP_NAME)    return CPP_NAME;
1597           if (b == CPP_STRING)  return CPP_OSTRING;
1598         }
1599
1600     default:
1601       break;
1602     }
1603
1604   return CPP_EOF;
1605 }
1606
1607 /* Returns nonzero if a space should be inserted to avoid an
1608    accidental token paste for output.  For simplicity, it is
1609    conservative, and occasionally advises a space where one is not
1610    needed, e.g. "." and ".2".  */
1611
1612 int
1613 cpp_avoid_paste (pfile, token1, token2)
1614      cpp_reader *pfile;
1615      const cpp_token *token1, *token2;
1616 {
1617   enum cpp_ttype a = token1->type, b = token2->type;
1618   cppchar_t c;
1619
1620   if (token1->flags & NAMED_OP)
1621     a = CPP_NAME;
1622   if (token2->flags & NAMED_OP)
1623     b = CPP_NAME;
1624
1625   c = EOF;
1626   if (token2->flags & DIGRAPH)
1627     c = digraph_spellings[b - CPP_FIRST_DIGRAPH][0];
1628   else if (token_spellings[b].category == SPELL_OPERATOR)
1629     c = token_spellings[b].name[0];
1630
1631   /* Quickly get everything that can paste with an '='.  */
1632   if (a <= CPP_LAST_EQ && c == '=')
1633     return 1;
1634
1635   switch (a)
1636     {
1637     case CPP_GREATER:   return c == '>' || c == '?';
1638     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1639     case CPP_PLUS:      return c == '+';
1640     case CPP_MINUS:     return c == '-' || c == '>';
1641     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1642     case CPP_MOD:       return c == ':' || c == '>';
1643     case CPP_AND:       return c == '&';
1644     case CPP_OR:        return c == '|';
1645     case CPP_COLON:     return c == ':' || c == '>';
1646     case CPP_DEREF:     return c == '*';
1647     case CPP_DOT:       return c == '.' || c == '%';
1648     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1649     case CPP_NAME:      return ((b == CPP_NUMBER
1650                                  && name_p (pfile, &token2->val.str))
1651                                 || b == CPP_NAME
1652                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1653     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1654                                 || c == '.' || c == '+' || c == '-');
1655     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1656                                 && token1->val.c == '@'
1657                                 && (b == CPP_NAME || b == CPP_STRING));
1658     default:            break;
1659     }
1660
1661   return 0;
1662 }
1663
1664 /* Output all the remaining tokens on the current line, and a newline
1665    character, to FP.  Leading whitespace is removed.  */
1666 void
1667 cpp_output_line (pfile, fp)
1668      cpp_reader *pfile;
1669      FILE *fp;
1670 {
1671   cpp_token token;
1672
1673   _cpp_get_token (pfile, &token);
1674   token.flags &= ~PREV_WHITE;
1675   while (token.type != CPP_EOF)
1676     {
1677       cpp_output_token (&token, fp);
1678       _cpp_get_token (pfile, &token);
1679     }
1680
1681   putc ('\n', fp);
1682 }
1683
1684 /* Memory pools.  */
1685
1686 struct dummy
1687 {
1688   char c;
1689   union
1690   {
1691     double d;
1692     int *p;
1693   } u;
1694 };
1695
1696 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1697
1698 static int
1699 chunk_suitable (pool, chunk, size)
1700      cpp_pool *pool;
1701      cpp_chunk *chunk;
1702      unsigned int size;
1703 {
1704   /* Being at least twice SIZE means we can use memcpy in
1705      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
1706      anyway.  */
1707   return (chunk && pool->locked != chunk
1708           && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
1709 }
1710
1711 /* Returns the end of the new pool.  PTR points to a char in the old
1712    pool, and is updated to point to the same char in the new pool.  */
1713 unsigned char *
1714 _cpp_next_chunk (pool, len, ptr)
1715      cpp_pool *pool;
1716      unsigned int len;
1717      unsigned char **ptr;
1718 {
1719   cpp_chunk *chunk = pool->cur->next;
1720
1721   /* LEN is the minimum size we want in the new pool.  */
1722   len += POOL_ROOM (pool);
1723   if (! chunk_suitable (pool, chunk, len))
1724     {
1725       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
1726
1727       chunk->next = pool->cur->next;
1728       pool->cur->next = chunk;
1729     }
1730
1731   /* Update the pointer before changing chunk's front.  */
1732   if (ptr)
1733     *ptr += chunk->base - POOL_FRONT (pool);
1734
1735   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
1736   chunk->front = chunk->base;
1737
1738   pool->cur = chunk;
1739   return POOL_LIMIT (pool);
1740 }
1741
1742 static cpp_chunk *
1743 new_chunk (size)
1744      unsigned int size;
1745 {
1746   unsigned char *base;
1747   cpp_chunk *result;
1748
1749   size = ALIGN (size, DEFAULT_ALIGNMENT);
1750   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
1751   /* Put the chunk descriptor at the end.  Then chunk overruns will
1752      cause obvious chaos.  */
1753   result = (cpp_chunk *) (base + size);
1754   result->base = base;
1755   result->front = base;
1756   result->limit = base + size;
1757   result->next = 0;
1758
1759   return result;
1760 }
1761
1762 void
1763 _cpp_init_pool (pool, size, align, temp)
1764      cpp_pool *pool;
1765      unsigned int size, align, temp;
1766 {
1767   if (align == 0)
1768     align = DEFAULT_ALIGNMENT;
1769   if (align & (align - 1))
1770     abort ();
1771   pool->align = align;
1772   pool->cur = new_chunk (size);
1773   pool->locked = 0;
1774   pool->locks = 0;
1775   if (temp)
1776     pool->cur->next = pool->cur;
1777 }
1778
1779 void
1780 _cpp_lock_pool (pool)
1781      cpp_pool *pool;
1782 {
1783   if (pool->locks++ == 0)
1784     pool->locked = pool->cur;
1785 }
1786
1787 void
1788 _cpp_unlock_pool (pool)
1789      cpp_pool *pool;
1790 {
1791   if (--pool->locks == 0)
1792     pool->locked = 0;
1793 }
1794
1795 void
1796 _cpp_free_pool (pool)
1797      cpp_pool *pool;
1798 {
1799   cpp_chunk *chunk = pool->cur, *next;
1800
1801   do
1802     {
1803       next = chunk->next;
1804       free (chunk->base);
1805       chunk = next;
1806     }
1807   while (chunk && chunk != pool->cur);
1808 }
1809
1810 /* Reserve LEN bytes from a memory pool.  */
1811 unsigned char *
1812 _cpp_pool_reserve (pool, len)
1813      cpp_pool *pool;
1814      unsigned int len;
1815 {
1816   len = ALIGN (len, pool->align);
1817   if (len > (unsigned int) POOL_ROOM (pool))
1818     _cpp_next_chunk (pool, len, 0);
1819
1820   return POOL_FRONT (pool);
1821 }
1822
1823 /* Allocate LEN bytes from a memory pool.  */
1824 unsigned char *
1825 _cpp_pool_alloc (pool, len)
1826      cpp_pool *pool;
1827      unsigned int len;
1828 {
1829   unsigned char *result = _cpp_pool_reserve (pool, len);
1830
1831   POOL_COMMIT (pool, len);
1832   return result;
1833 }