gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41 #include "symcat.h"
  42
  43 /* Tokens with SPELL_STRING store their spelling in the token list,
  44    and it's length in the token->val.name.len.  */
  45 enum spell_type
  46 {
  47   SPELL_OPERATOR = 0,
  48   SPELL_CHAR,
  49   SPELL_IDENT,
  50   SPELL_STRING,
  51   SPELL_NONE
  52 };
  53
  54 struct token_spelling
  55 {
  56   enum spell_type category;
  57   const unsigned char *name;
  58 };
  59
  60 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  61                                              U":>", U"<%", U"%>"};
  62
  63 #define OP(e, s) { SPELL_OPERATOR, U s           },
  64 #define TK(e, s) { s,              U STRINGX (e) },
  65 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  66 #undef OP
  67 #undef TK
  68
  69 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  70 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  71
  72 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
  73 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
  74 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
  75
  76 static int skip_block_comment PARAMS ((cpp_reader *));
  77 static int skip_line_comment PARAMS ((cpp_reader *));
  78 static void adjust_column PARAMS ((cpp_reader *));
  79 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  80 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
  81 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  82 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  83 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  84 static void unterminated PARAMS ((cpp_reader *, int));
  85 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  86 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  87 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
  88 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
  89 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  90
  91 static cpp_chunk *new_chunk PARAMS ((unsigned int));
  92 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
  93
  94 /* Utility routine:
  95
  96    Compares, the token TOKEN to the NUL-terminated string STRING.
  97    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  98
  99 int
 100 cpp_ideq (token, string)
 101      const cpp_token *token;
 102      const char *string;
 103 {
 104   if (token->type != CPP_NAME)
 105     return 0;
 106
 107   return !ustrcmp (token->val.node->name, (const U_CHAR *) string);
 108 }
 109
 110 /* Call when meeting a newline.  Returns the character after the newline
 111    (or carriage-return newline combination), or EOF.  */
 112 static cppchar_t
 113 handle_newline (buffer, newline_char)
 114      cpp_buffer *buffer;
 115      cppchar_t newline_char;
 116 {
 117   cppchar_t next = EOF;
 118
 119   buffer->col_adjust = 0;
 120   buffer->lineno++;
 121   buffer->line_base = buffer->cur;
 122
 123   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 124   if (buffer->cur < buffer->rlimit)
 125     {
 126       next = *buffer->cur++;
 127       if (next + newline_char == '\r' + '\n')
 128         {
 129           buffer->line_base = buffer->cur;
 130           if (buffer->cur < buffer->rlimit)
 131             next = *buffer->cur++;
 132           else
 133             next = EOF;
 134         }
 135     }
 136
 137   buffer->read_ahead = next;
 138   return next;
 139 }
 140
 141 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 142    encountered.  It warns if necessary, and returns true if the
 143    trigraph should be honoured.  FROM_CHAR is the third character of a
 144    trigraph, and presumed to be the previous character for position
 145    reporting.  */
 146 static int
 147 trigraph_ok (pfile, from_char)
 148      cpp_reader *pfile;
 149      cppchar_t from_char;
 150 {
 151   int accept = CPP_OPTION (pfile, trigraphs);
 152
 153   /* Don't warn about trigraphs in comments.  */
 154   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 155     {
 156       cpp_buffer *buffer = pfile->buffer;
 157       if (accept)
 158         cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
 159                                "trigraph ??%c converted to %c",
 160                                (int) from_char,
 161                                (int) _cpp_trigraph_map[from_char]);
 162       else
 163         cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
 164                                "trigraph ??%c ignored", (int) from_char);
 165     }
 166
 167   return accept;
 168 }
 169
 170 /* Assumes local variables buffer and result.  */
 171 #define ACCEPT_CHAR(t) \
 172   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 173
 174 /* When we move to multibyte character sets, add to these something
 175    that saves and restores the state of the multibyte conversion
 176    library.  This probably involves saving and restoring a "cookie".
 177    In the case of glibc it is an 8-byte structure, so is not a high
 178    overhead operation.  In any case, it's out of the fast path.  */
 179 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 180 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 181
 182 /* Skips any escaped newlines introduced by NEXT, which is either a
 183    '?' or a '\\'.  Returns the next character, which will also have
 184    been placed in buffer->read_ahead.  This routine performs
 185    preprocessing stages 1 and 2 of the ISO C standard.  */
 186 static cppchar_t
 187 skip_escaped_newlines (buffer, next)
 188      cpp_buffer *buffer;
 189      cppchar_t next;
 190 {
 191   /* Only do this if we apply stages 1 and 2.  */
 192   if (!buffer->from_stage3)
 193     {
 194       cppchar_t next1;
 195       const unsigned char *saved_cur;
 196       int space;
 197
 198       do
 199         {
 200           if (buffer->cur == buffer->rlimit)
 201             break;
 202
 203           SAVE_STATE ();
 204           if (next == '?')
 205             {
 206               next1 = *buffer->cur++;
 207               if (next1 != '?' || buffer->cur == buffer->rlimit)
 208                 {
 209                   RESTORE_STATE ();
 210                   break;
 211                 }
 212
 213               next1 = *buffer->cur++;
 214               if (!_cpp_trigraph_map[next1]
 215                   || !trigraph_ok (buffer->pfile, next1))
 216                 {
 217                   RESTORE_STATE ();
 218                   break;
 219                 }
 220
 221               /* We have a full trigraph here.  */
 222               next = _cpp_trigraph_map[next1];
 223               if (next != '\\' || buffer->cur == buffer->rlimit)
 224                 break;
 225               SAVE_STATE ();
 226             }
 227
 228           /* We have a backslash, and room for at least one more character.  */
 229           space = 0;
 230           do
 231             {
 232               next1 = *buffer->cur++;
 233               if (!is_nvspace (next1))
 234                 break;
 235               space = 1;
 236             }
 237           while (buffer->cur < buffer->rlimit);
 238
 239           if (!is_vspace (next1))
 240             {
 241               RESTORE_STATE ();
 242               break;
 243             }
 244
 245           if (space)
 246             cpp_warning (buffer->pfile,
 247                          "backslash and newline separated by space");
 248
 249           next = handle_newline (buffer, next1);
 250           if (next == EOF)
 251             cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
 252         }
 253       while (next == '\\' || next == '?');
 254     }
 255
 256   buffer->read_ahead = next;
 257   return next;
 258 }
 259
 260 /* Obtain the next character, after trigraph conversion and skipping
 261    an arbitrary string of escaped newlines.  The common case of no
 262    trigraphs or escaped newlines falls through quickly.  */
 263 static cppchar_t
 264 get_effective_char (buffer)
 265      cpp_buffer *buffer;
 266 {
 267   cppchar_t next = EOF;
 268
 269   if (buffer->cur < buffer->rlimit)
 270     {
 271       next = *buffer->cur++;
 272
 273       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 274          can introduce escaped newlines, which we want to skip, or
 275          UCNs, which, depending upon lexer state, we will handle in
 276          the future.  */
 277       if (next == '?' || next == '\\')
 278         next = skip_escaped_newlines (buffer, next);
 279     }
 280
 281   buffer->read_ahead = next;
 282   return next;
 283 }
 284
 285 /* Skip a C-style block comment.  We find the end of the comment by
 286    seeing if an asterisk is before every '/' we encounter.  Returns
 287    non-zero if comment terminated by EOF, zero otherwise.  */
 288 static int
 289 skip_block_comment (pfile)
 290      cpp_reader *pfile;
 291 {
 292   cpp_buffer *buffer = pfile->buffer;
 293   cppchar_t c = EOF, prevc = EOF;
 294
 295   pfile->state.lexing_comment = 1;
 296   while (buffer->cur != buffer->rlimit)
 297     {
 298       prevc = c, c = *buffer->cur++;
 299
 300     next_char:
 301       /* FIXME: For speed, create a new character class of characters
 302          of interest inside block comments.  */
 303       if (c == '?' || c == '\\')
 304         c = skip_escaped_newlines (buffer, c);
 305
 306       /* People like decorating comments with '*', so check for '/'
 307          instead for efficiency.  */
 308       if (c == '/')
 309         {
 310           if (prevc == '*')
 311             break;
 312
 313           /* Warn about potential nested comments, but not if the '/'
 314              comes immediately before the true comment delimeter.
 315              Don't bother to get it right across escaped newlines.  */
 316           if (CPP_OPTION (pfile, warn_comments)
 317               && buffer->cur != buffer->rlimit)
 318             {
 319               prevc = c, c = *buffer->cur++;
 320               if (c == '*' && buffer->cur != buffer->rlimit)
 321                 {
 322                   prevc = c, c = *buffer->cur++;
 323                   if (c != '/')
 324                     cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
 325                                            CPP_BUF_COL (buffer),
 326                                            "\"/*\" within comment");
 327                 }
 328               goto next_char;
 329             }
 330         }
 331       else if (is_vspace (c))
 332         {
 333           prevc = c, c = handle_newline (buffer, c);
 334           goto next_char;
 335         }
 336       else if (c == '\t')
 337         adjust_column (pfile);
 338     }
 339
 340   pfile->state.lexing_comment = 0;
 341   buffer->read_ahead = EOF;
 342   return c != '/' || prevc != '*';
 343 }
 344
 345 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 346    non-zero if a multiline comment.  The following new line, if any,
 347    is left in buffer->read_ahead.  */
 348 static int
 349 skip_line_comment (pfile)
 350      cpp_reader *pfile;
 351 {
 352   cpp_buffer *buffer = pfile->buffer;
 353   unsigned int orig_lineno = buffer->lineno;
 354   cppchar_t c;
 355
 356   pfile->state.lexing_comment = 1;
 357   do
 358     {
 359       c = EOF;
 360       if (buffer->cur == buffer->rlimit)
 361         break;
 362
 363       c = *buffer->cur++;
 364       if (c == '?' || c == '\\')
 365         c = skip_escaped_newlines (buffer, c);
 366     }
 367   while (!is_vspace (c));
 368
 369   pfile->state.lexing_comment = 0;
 370   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 371   return orig_lineno != buffer->lineno;
 372 }
 373
 374 /* pfile->buffer->cur is one beyond the \t character.  Update
 375    col_adjust so we track the column correctly.  */
 376 static void
 377 adjust_column (pfile)
 378      cpp_reader *pfile;
 379 {
 380   cpp_buffer *buffer = pfile->buffer;
 381   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 382
 383   /* Round it up to multiple of the tabstop, but subtract 1 since the
 384      tab itself occupies a character position.  */
 385   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 386                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 387 }
 388
 389 /* Skips whitespace, saving the next non-whitespace character.
 390    Adjusts pfile->col_adjust to account for tabs.  Without this,
 391    tokens might be assigned an incorrect column.  */
 392 static void
 393 skip_whitespace (pfile, c)
 394      cpp_reader *pfile;
 395      cppchar_t c;
 396 {
 397   cpp_buffer *buffer = pfile->buffer;
 398   unsigned int warned = 0;
 399
 400   do
 401     {
 402       /* Horizontal space always OK.  */
 403       if (c == ' ')
 404         ;
 405       else if (c == '\t')
 406         adjust_column (pfile);
 407       /* Just \f \v or \0 left.  */
 408       else if (c == '\0')
 409         {
 410           if (!warned)
 411             {
 412               cpp_warning (pfile, "null character(s) ignored");
 413               warned = 1;
 414             }
 415         }
 416       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 417         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
 418                                CPP_BUF_COL (buffer),
 419                                "%s in preprocessing directive",
 420                                c == '\f' ? "form feed" : "vertical tab");
 421
 422       c = EOF;
 423       if (buffer->cur == buffer->rlimit)
 424         break;
 425       c = *buffer->cur++;
 426     }
 427   /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
 428   while (is_nvspace (c));
 429
 430   /* Remember the next character.  */
 431   buffer->read_ahead = c;
 432 }
 433
 434 /* See if the characters of a number token are valid in a name (no
 435    '.', '+' or '-').  */
 436 static int
 437 name_p (pfile, string)
 438      cpp_reader *pfile;
 439      const cpp_string *string;
 440 {
 441   unsigned int i;
 442
 443   for (i = 0; i < string->len; i++)
 444     if (!is_idchar (string->text[i]))
 445       return 0;
 446
 447   return 1;
 448 }
 449
 450 /* Parse an identifier, skipping embedded backslash-newlines.
 451    Calculate the hash value of the token while parsing, for improved
 452    performance.  The hashing algorithm *must* match cpp_lookup().  */
 453
 454 static cpp_hashnode *
 455 parse_identifier (pfile, c)
 456      cpp_reader *pfile;
 457      cppchar_t c;
 458 {
 459   cpp_hashnode *result;
 460   cpp_buffer *buffer = pfile->buffer;
 461   unsigned char *dest, *limit;
 462   unsigned int r = 0, saw_dollar = 0;
 463
 464   dest = POOL_FRONT (&pfile->ident_pool);
 465   limit = POOL_LIMIT (&pfile->ident_pool);
 466
 467   do
 468     {
 469       do
 470         {
 471           /* Need room for terminating null.  */
 472           if (dest + 1 >= limit)
 473             limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
 474
 475           *dest++ = c;
 476           r = HASHSTEP (r, c);
 477
 478           if (c == '$')
 479             saw_dollar++;
 480
 481           c = EOF;
 482           if (buffer->cur == buffer->rlimit)
 483             break;
 484
 485           c = *buffer->cur++;
 486         }
 487       while (is_idchar (c));
 488
 489       /* Potential escaped newline?  */
 490       if (c != '?' && c != '\\')
 491         break;
 492       c = skip_escaped_newlines (buffer, c);
 493     }
 494   while (is_idchar (c));
 495
 496   /* Remember the next character.  */
 497   buffer->read_ahead = c;
 498
 499   /* $ is not a identifier character in the standard, but is commonly
 500      accepted as an extension.  Don't warn about it in skipped
 501      conditional blocks.  */
 502   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
 503     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 504
 505   /* Identifiers are null-terminated.  */
 506   *dest = '\0';
 507
 508   /* This routine commits the memory if necessary.  */
 509   result = _cpp_lookup_with_hash (pfile,
 510                                   dest - POOL_FRONT (&pfile->ident_pool), r);
 511
 512   /* Some identifiers require diagnostics when lexed.  */
 513   if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
 514     {
 515       /* It is allowed to poison the same identifier twice.  */
 516       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 517         cpp_error (pfile, "attempt to use poisoned \"%s\"", result->name);
 518
 519       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 520          replacement list of a variable-arguments macro.  */
 521       if (result == pfile->spec_nodes.n__VA_ARGS__
 522           && !pfile->state.va_args_ok)
 523         cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variable-argument macro");
 524     }
 525
 526   return result;
 527 }
 528
 529 /* Parse a number, skipping embedded backslash-newlines.  */
 530 static void
 531 parse_number (pfile, number, c, leading_period)
 532      cpp_reader *pfile;
 533      cpp_string *number;
 534      cppchar_t c;
 535      int leading_period;
 536 {
 537   cpp_buffer *buffer = pfile->buffer;
 538   cpp_pool *pool = pfile->string_pool;
 539   unsigned char *dest, *limit;
 540
 541   dest = POOL_FRONT (pool);
 542   limit = POOL_LIMIT (pool);
 543
 544   /* Place a leading period.  */
 545   if (leading_period)
 546     {
 547       if (dest >= limit)
 548         limit = _cpp_next_chunk (pool, 0, &dest);
 549       *dest++ = '.';
 550     }
 551
 552   do
 553     {
 554       do
 555         {
 556           /* Need room for terminating null.  */
 557           if (dest + 1 >= limit)
 558             limit = _cpp_next_chunk (pool, 0, &dest);
 559           *dest++ = c;
 560
 561           c = EOF;
 562           if (buffer->cur == buffer->rlimit)
 563             break;
 564
 565           c = *buffer->cur++;
 566         }
 567       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 568
 569       /* Potential escaped newline?  */
 570       if (c != '?' && c != '\\')
 571         break;
 572       c = skip_escaped_newlines (buffer, c);
 573     }
 574   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 575
 576   /* Remember the next character.  */
 577   buffer->read_ahead = c;
 578
 579   /* Null-terminate the number.  */
 580   *dest = '\0';
 581
 582   number->text = POOL_FRONT (pool);
 583   number->len = dest - number->text;
 584   POOL_COMMIT (pool, number->len + 1);
 585 }
 586
 587 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 588 static void
 589 unterminated (pfile, term)
 590      cpp_reader *pfile;
 591      int term;
 592 {
 593   cpp_error (pfile, "missing terminating %c character", term);
 594
 595   if (term == '\"' && pfile->mlstring_pos.line
 596       && pfile->mlstring_pos.line != pfile->lexer_pos.line)
 597     {
 598       cpp_error_with_line (pfile, pfile->mlstring_pos.line,
 599                            pfile->mlstring_pos.col,
 600                            "possible start of unterminated string literal");
 601       pfile->mlstring_pos.line = 0;
 602     }
 603 }
 604
 605 /* Subroutine of parse_string.  */
 606 static int
 607 unescaped_terminator_p (pfile, dest)
 608      cpp_reader *pfile;
 609      const unsigned char *dest;
 610 {
 611   const unsigned char *start, *temp;
 612
 613   /* In #include-style directives, terminators are not escapeable.  */
 614   if (pfile->state.angled_headers)
 615     return 1;
 616
 617   start = POOL_FRONT (pfile->string_pool);
 618
 619   /* An odd number of consecutive backslashes represents an escaped
 620      terminator.  */
 621   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 622     ;
 623
 624   return ((dest - temp) & 1) == 0;
 625 }
 626
 627 /* Parses a string, character constant, or angle-bracketed header file
 628    name.  Handles embedded trigraphs and escaped newlines.
 629
 630    Multi-line strings are allowed, but they are deprecated within
 631    directives.  */
 632 static void
 633 parse_string (pfile, token, terminator)
 634      cpp_reader *pfile;
 635      cpp_token *token;
 636      cppchar_t terminator;
 637 {
 638   cpp_buffer *buffer = pfile->buffer;
 639   cpp_pool *pool = pfile->string_pool;
 640   unsigned char *dest, *limit;
 641   cppchar_t c;
 642   unsigned int nulls = 0;
 643
 644   dest = POOL_FRONT (pool);
 645   limit = POOL_LIMIT (pool);
 646
 647   for (;;)
 648     {
 649       if (buffer->cur == buffer->rlimit)
 650         {
 651           c = EOF;
 652           unterminated (pfile, terminator);
 653           break;
 654         }
 655       c = *buffer->cur++;
 656
 657     have_char:
 658       /* Handle trigraphs, escaped newlines etc.  */
 659       if (c == '?' || c == '\\')
 660         c = skip_escaped_newlines (buffer, c);
 661
 662       if (c == terminator && unescaped_terminator_p (pfile, dest))
 663         {
 664           c = EOF;
 665           break;
 666         }
 667       else if (is_vspace (c))
 668         {
 669           /* In assembly language, silently terminate string and
 670              character literals at end of line.  This is a kludge
 671              around not knowing where comments are.  */
 672           if (CPP_OPTION (pfile, lang_asm) && terminator != '>')
 673             break;
 674
 675           /* Character constants and header names may not extend over
 676              multiple lines.  In Standard C, neither may strings.
 677              Unfortunately, we accept multiline strings as an
 678              extension, except in #include family directives.  */
 679           if (terminator != '"' || pfile->state.angled_headers)
 680             {
 681               unterminated (pfile, terminator);
 682               break;
 683             }
 684
 685           if (pfile->mlstring_pos.line == 0)
 686             {
 687               pfile->mlstring_pos = pfile->lexer_pos;
 688               if (CPP_PEDANTIC (pfile))
 689                 cpp_pedwarn (pfile, "multi-line string constant");
 690             }
 691
 692           handle_newline (buffer, c);  /* Stores to read_ahead.  */
 693           c = '\n';
 694         }
 695       else if (c == '\0')
 696         {
 697           if (nulls++ == 0)
 698             cpp_warning (pfile, "null character(s) preserved in literal");
 699         }
 700
 701       /* No terminating null for strings - they could contain nulls.  */
 702       if (dest >= limit)
 703         limit = _cpp_next_chunk (pool, 0, &dest);
 704       *dest++ = c;
 705
 706       /* If we had a new line, the next character is in read_ahead.  */
 707       if (c != '\n')
 708         continue;
 709       c = buffer->read_ahead;
 710       if (c != EOF)
 711         goto have_char;
 712     }
 713
 714   /* Remember the next character.  */
 715   buffer->read_ahead = c;
 716
 717   token->val.str.text = POOL_FRONT (pool);
 718   token->val.str.len = dest - token->val.str.text;
 719   POOL_COMMIT (pool, token->val.str.len);
 720 }
 721
 722 /* The stored comment includes the comment start and any terminator.  */
 723 static void
 724 save_comment (pfile, token, from)
 725      cpp_reader *pfile;
 726      cpp_token *token;
 727      const unsigned char *from;
 728 {
 729   unsigned char *buffer;
 730   unsigned int len;
 731
 732   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 733   /* C++ comments probably (not definitely) have moved past a new
 734      line, which we don't want to save in the comment.  */
 735   if (pfile->buffer->read_ahead != EOF)
 736     len--;
 737   buffer = _cpp_pool_alloc (pfile->string_pool, len);
 738
 739   token->type = CPP_COMMENT;
 740   token->val.str.len = len;
 741   token->val.str.text = buffer;
 742
 743   buffer[0] = '/';
 744   memcpy (buffer + 1, from, len - 1);
 745 }
 746
 747 /* Subroutine of lex_token to handle '%'.  A little tricky, since we
 748    want to avoid stepping back when lexing %:%X.  */
 749 static void
 750 lex_percent (buffer, result)
 751      cpp_buffer *buffer;
 752      cpp_token *result;
 753 {
 754   cppchar_t c;
 755
 756   result->type = CPP_MOD;
 757   /* Parsing %:%X could leave an extra character.  */
 758   if (buffer->extra_char == EOF)
 759     c = get_effective_char (buffer);
 760   else
 761     {
 762       c = buffer->read_ahead = buffer->extra_char;
 763       buffer->extra_char = EOF;
 764     }
 765
 766   if (c == '=')
 767     ACCEPT_CHAR (CPP_MOD_EQ);
 768   else if (CPP_OPTION (buffer->pfile, digraphs))
 769     {
 770       if (c == ':')
 771         {
 772           result->flags |= DIGRAPH;
 773           ACCEPT_CHAR (CPP_HASH);
 774           if (get_effective_char (buffer) == '%')
 775             {
 776               buffer->extra_char = get_effective_char (buffer);
 777               if (buffer->extra_char == ':')
 778                 {
 779                   buffer->extra_char = EOF;
 780                   ACCEPT_CHAR (CPP_PASTE);
 781                 }
 782               else
 783                 /* We'll catch the extra_char when we're called back.  */
 784                 buffer->read_ahead = '%';
 785             }
 786         }
 787       else if (c == '>')
 788         {
 789           result->flags |= DIGRAPH;
 790           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 791         }
 792     }
 793 }
 794
 795 /* Subroutine of lex_token to handle '.'.  This is tricky, since we
 796    want to avoid stepping back when lexing '...' or '.123'.  In the
 797    latter case we should also set a flag for parse_number.  */
 798 static void
 799 lex_dot (pfile, result)
 800      cpp_reader *pfile;
 801      cpp_token *result;
 802 {
 803   cpp_buffer *buffer = pfile->buffer;
 804   cppchar_t c;
 805
 806   /* Parsing ..X could leave an extra character.  */
 807   if (buffer->extra_char == EOF)
 808     c = get_effective_char (buffer);
 809   else
 810     {
 811       c = buffer->read_ahead = buffer->extra_char;
 812       buffer->extra_char = EOF;
 813     }
 814
 815   /* All known character sets have 0...9 contiguous.  */
 816   if (c >= '0' && c <= '9')
 817     {
 818       result->type = CPP_NUMBER;
 819       parse_number (pfile, &result->val.str, c, 1);
 820     }
 821   else
 822     {
 823       result->type = CPP_DOT;
 824       if (c == '.')
 825         {
 826           buffer->extra_char = get_effective_char (buffer);
 827           if (buffer->extra_char == '.')
 828             {
 829               buffer->extra_char = EOF;
 830               ACCEPT_CHAR (CPP_ELLIPSIS);
 831             }
 832           else
 833             /* We'll catch the extra_char when we're called back.  */
 834             buffer->read_ahead = '.';
 835         }
 836       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 837         ACCEPT_CHAR (CPP_DOT_STAR);
 838     }
 839 }
 840
 841 void
 842 _cpp_lex_token (pfile, result)
 843      cpp_reader *pfile;
 844      cpp_token *result;
 845 {
 846   cppchar_t c;
 847   cpp_buffer *buffer;
 848   const unsigned char *comment_start;
 849   unsigned char bol = pfile->state.next_bol;
 850
 851  done_directive:
 852   buffer = pfile->buffer;
 853   pfile->state.next_bol = 0;
 854   result->flags = 0;
 855  next_char:
 856   pfile->lexer_pos.line = buffer->lineno;
 857  next_char2:
 858   pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
 859
 860   c = buffer->read_ahead;
 861   if (c == EOF && buffer->cur < buffer->rlimit)
 862     {
 863       c = *buffer->cur++;
 864       pfile->lexer_pos.col++;
 865     }
 866
 867  do_switch:
 868   buffer->read_ahead = EOF;
 869   switch (c)
 870     {
 871     case EOF:
 872       /* Non-empty files should end in a newline.  Ignore for command
 873          line and _Pragma buffers.  */
 874       if (pfile->lexer_pos.col != 0 && !buffer->from_stage3)
 875         cpp_pedwarn (pfile, "no newline at end of file");
 876       pfile->state.next_bol = 1;
 877       pfile->skipping = 0;      /* In case missing #endif.  */
 878       result->type = CPP_EOF;
 879       /* Don't do MI optimisation.  */
 880       return;
 881
 882     case ' ': case '\t': case '\f': case '\v': case '\0':
 883       skip_whitespace (pfile, c);
 884       result->flags |= PREV_WHITE;
 885       goto next_char2;
 886
 887     case '\n': case '\r':
 888       if (!pfile->state.in_directive)
 889         {
 890           handle_newline (buffer, c);
 891           bol = 1;
 892           pfile->lexer_pos.output_line = buffer->lineno;
 893
 894           /* Newlines in arguments are white space (6.10.3.10).
 895              Otherwise, clear any white space flag.  */
 896           if (pfile->state.parsing_args)
 897             result->flags |= PREV_WHITE;
 898           else
 899             result->flags &= ~PREV_WHITE;
 900           goto next_char;
 901         }
 902
 903       /* Don't let directives spill over to the next line.  */
 904       buffer->read_ahead = c;
 905       pfile->state.next_bol = 1;
 906       result->type = CPP_EOF;
 907       break;
 908
 909     case '?':
 910     case '\\':
 911       /* These could start an escaped newline, or '?' a trigraph.  Let
 912          skip_escaped_newlines do all the work.  */
 913       {
 914         unsigned int lineno = buffer->lineno;
 915
 916         c = skip_escaped_newlines (buffer, c);
 917         if (lineno != buffer->lineno)
 918           /* We had at least one escaped newline of some sort, and the
 919              next character is in buffer->read_ahead.  Update the
 920              token's line and column.  */
 921             goto next_char;
 922
 923         /* We are either the original '?' or '\\', or a trigraph.  */
 924         result->type = CPP_QUERY;
 925         buffer->read_ahead = EOF;
 926         if (c == '\\')
 927           goto random_char;
 928         else if (c != '?')
 929           goto do_switch;
 930       }
 931       break;
 932
 933     case '0': case '1': case '2': case '3': case '4':
 934     case '5': case '6': case '7': case '8': case '9':
 935       result->type = CPP_NUMBER;
 936       parse_number (pfile, &result->val.str, c, 0);
 937       break;
 938
 939     case '$':
 940       if (!CPP_OPTION (pfile, dollars_in_ident))
 941         goto random_char;
 942       /* Fall through... */
 943
 944     case '_':
 945     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 946     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 947     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 948     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 949     case 'y': case 'z':
 950     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 951     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 952     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 953     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 954     case 'Y': case 'Z':
 955       result->type = CPP_NAME;
 956       result->val.node = parse_identifier (pfile, c);
 957
 958       /* 'L' may introduce wide characters or strings.  */
 959       if (result->val.node == pfile->spec_nodes.n_L)
 960         {
 961           c = buffer->read_ahead; /* For make_string.  */
 962           if (c == '\'' || c == '"')
 963             {
 964               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
 965               goto make_string;
 966             }
 967         }
 968       /* Convert named operators to their proper types.  */
 969       else if (result->val.node->flags & NODE_OPERATOR)
 970         {
 971           result->flags |= NAMED_OP;
 972           result->type = result->val.node->value.operator;
 973         }
 974       break;
 975
 976     case '\'':
 977     case '"':
 978       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
 979     make_string:
 980       parse_string (pfile, result, c);
 981       break;
 982
 983     case '/':
 984       /* A potential block or line comment.  */
 985       comment_start = buffer->cur;
 986       result->type = CPP_DIV;
 987       c = get_effective_char (buffer);
 988       if (c == '=')
 989         ACCEPT_CHAR (CPP_DIV_EQ);
 990       if (c != '/' && c != '*')
 991         break;
 992
 993       if (c == '*')
 994         {
 995           if (skip_block_comment (pfile))
 996             cpp_error_with_line (pfile, pfile->lexer_pos.line,
 997                                  pfile->lexer_pos.col,
 998                                  "unterminated comment");
 999         }
1000       else
1001         {
1002           if (!CPP_OPTION (pfile, cplusplus_comments)
1003               && !CPP_IN_SYSTEM_HEADER (pfile))
1004             break;
1005
1006           /* We silently allow C++ comments in system headers,
1007              irrespective of conformance mode, because lots of
1008              broken systems do that and trying to clean it up in
1009              fixincludes is a nightmare.  */
1010           if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1011               && ! buffer->warned_cplusplus_comments)
1012             {
1013               cpp_pedwarn (pfile,
1014                            "C++ style comments are not allowed in ISO C89");
1015               cpp_pedwarn (pfile,
1016                            "(this will be reported only once per input file)");
1017               buffer->warned_cplusplus_comments = 1;
1018             }
1019
1020           /* Skip_line_comment updates buffer->read_ahead.  */
1021           if (skip_line_comment (pfile))
1022             cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1023                                    pfile->lexer_pos.col,
1024                                    "multi-line comment");
1025         }
1026
1027       /* Skipping the comment has updated buffer->read_ahead.  */
1028       if (!pfile->state.save_comments)
1029         {
1030           result->flags |= PREV_WHITE;
1031           goto next_char;
1032         }
1033
1034       /* Save the comment as a token in its own right.  */
1035       save_comment (pfile, result, comment_start);
1036       /* Don't do MI optimisation.  */
1037       return;
1038
1039     case '<':
1040       if (pfile->state.angled_headers)
1041         {
1042           result->type = CPP_HEADER_NAME;
1043           c = '>';              /* terminator.  */
1044           goto make_string;
1045         }
1046
1047       result->type = CPP_LESS;
1048       c = get_effective_char (buffer);
1049       if (c == '=')
1050         ACCEPT_CHAR (CPP_LESS_EQ);
1051       else if (c == '<')
1052         {
1053           ACCEPT_CHAR (CPP_LSHIFT);
1054           if (get_effective_char (buffer) == '=')
1055             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1056         }
1057       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1058         {
1059           ACCEPT_CHAR (CPP_MIN);
1060           if (get_effective_char (buffer) == '=')
1061             ACCEPT_CHAR (CPP_MIN_EQ);
1062         }
1063       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1064         {
1065           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1066           result->flags |= DIGRAPH;
1067         }
1068       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1069         {
1070           ACCEPT_CHAR (CPP_OPEN_BRACE);
1071           result->flags |= DIGRAPH;
1072         }
1073       break;
1074
1075     case '>':
1076       result->type = CPP_GREATER;
1077       c = get_effective_char (buffer);
1078       if (c == '=')
1079         ACCEPT_CHAR (CPP_GREATER_EQ);
1080       else if (c == '>')
1081         {
1082           ACCEPT_CHAR (CPP_RSHIFT);
1083           if (get_effective_char (buffer) == '=')
1084             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1085         }
1086       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1087         {
1088           ACCEPT_CHAR (CPP_MAX);
1089           if (get_effective_char (buffer) == '=')
1090             ACCEPT_CHAR (CPP_MAX_EQ);
1091         }
1092       break;
1093
1094     case '%':
1095       lex_percent (buffer, result);
1096       if (result->type == CPP_HASH)
1097         goto do_hash;
1098       break;
1099
1100     case '.':
1101       lex_dot (pfile, result);
1102       break;
1103
1104     case '+':
1105       result->type = CPP_PLUS;
1106       c = get_effective_char (buffer);
1107       if (c == '=')
1108         ACCEPT_CHAR (CPP_PLUS_EQ);
1109       else if (c == '+')
1110         ACCEPT_CHAR (CPP_PLUS_PLUS);
1111       break;
1112
1113     case '-':
1114       result->type = CPP_MINUS;
1115       c = get_effective_char (buffer);
1116       if (c == '>')
1117         {
1118           ACCEPT_CHAR (CPP_DEREF);
1119           if (CPP_OPTION (pfile, cplusplus)
1120               && get_effective_char (buffer) == '*')
1121             ACCEPT_CHAR (CPP_DEREF_STAR);
1122         }
1123       else if (c == '=')
1124         ACCEPT_CHAR (CPP_MINUS_EQ);
1125       else if (c == '-')
1126         ACCEPT_CHAR (CPP_MINUS_MINUS);
1127       break;
1128
1129     case '*':
1130       result->type = CPP_MULT;
1131       if (get_effective_char (buffer) == '=')
1132         ACCEPT_CHAR (CPP_MULT_EQ);
1133       break;
1134
1135     case '=':
1136       result->type = CPP_EQ;
1137       if (get_effective_char (buffer) == '=')
1138         ACCEPT_CHAR (CPP_EQ_EQ);
1139       break;
1140
1141     case '!':
1142       result->type = CPP_NOT;
1143       if (get_effective_char (buffer) == '=')
1144         ACCEPT_CHAR (CPP_NOT_EQ);
1145       break;
1146
1147     case '&':
1148       result->type = CPP_AND;
1149       c = get_effective_char (buffer);
1150       if (c == '=')
1151         ACCEPT_CHAR (CPP_AND_EQ);
1152       else if (c == '&')
1153         ACCEPT_CHAR (CPP_AND_AND);
1154       break;
1155
1156     case '#':
1157       c = buffer->extra_char;   /* Can be set by error condition below.  */
1158       if (c != EOF)
1159         {
1160           buffer->read_ahead = c;
1161           buffer->extra_char = EOF;
1162         }
1163       else
1164         c = get_effective_char (buffer);
1165
1166       if (c == '#')
1167         {
1168           ACCEPT_CHAR (CPP_PASTE);
1169           break;
1170         }
1171
1172       result->type = CPP_HASH;
1173     do_hash:
1174       if (bol)
1175         {
1176           if (pfile->state.parsing_args)
1177             {
1178               /* 6.10.3 paragraph 11: If there are sequences of
1179                  preprocessing tokens within the list of arguments that
1180                  would otherwise act as preprocessing directives, the
1181                  behavior is undefined.
1182
1183                  This implementation will report a hard error, terminate
1184                  the macro invocation, and proceed to process the
1185                  directive.  */
1186               cpp_error (pfile,
1187                          "directives may not be used inside a macro argument");
1188
1189               /* Put a '#' in lookahead, return CPP_EOF for parse_arg.  */
1190               buffer->extra_char = buffer->read_ahead;
1191               buffer->read_ahead = '#';
1192               pfile->state.next_bol = 1;
1193               result->type = CPP_EOF;
1194
1195               /* Get whitespace right - newline_in_args sets it.  */
1196               if (pfile->lexer_pos.col == 1)
1197                 result->flags &= ~PREV_WHITE;
1198             }
1199           else
1200             {
1201               /* This is the hash introducing a directive.  */
1202               if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1203                 goto done_directive; /* bol still 1.  */
1204               /* This is in fact an assembler #.  */
1205             }
1206         }
1207       break;
1208
1209     case '|':
1210       result->type = CPP_OR;
1211       c = get_effective_char (buffer);
1212       if (c == '=')
1213         ACCEPT_CHAR (CPP_OR_EQ);
1214       else if (c == '|')
1215         ACCEPT_CHAR (CPP_OR_OR);
1216       break;
1217
1218     case '^':
1219       result->type = CPP_XOR;
1220       if (get_effective_char (buffer) == '=')
1221         ACCEPT_CHAR (CPP_XOR_EQ);
1222       break;
1223
1224     case ':':
1225       result->type = CPP_COLON;
1226       c = get_effective_char (buffer);
1227       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1228         ACCEPT_CHAR (CPP_SCOPE);
1229       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1230         {
1231           result->flags |= DIGRAPH;
1232           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1233         }
1234       break;
1235
1236     case '~': result->type = CPP_COMPL; break;
1237     case ',': result->type = CPP_COMMA; break;
1238     case '(': result->type = CPP_OPEN_PAREN; break;
1239     case ')': result->type = CPP_CLOSE_PAREN; break;
1240     case '[': result->type = CPP_OPEN_SQUARE; break;
1241     case ']': result->type = CPP_CLOSE_SQUARE; break;
1242     case '{': result->type = CPP_OPEN_BRACE; break;
1243     case '}': result->type = CPP_CLOSE_BRACE; break;
1244     case ';': result->type = CPP_SEMICOLON; break;
1245
1246     case '@':
1247       if (CPP_OPTION (pfile, objc))
1248         {
1249           /* In Objective C, '@' may begin keywords or strings, like
1250              @keyword or @"string".  It would be nice to call
1251              get_effective_char here and test the result.  However, we
1252              would then need to pass 2 characters to parse_identifier,
1253              making it ugly and slowing down its main loop.  Instead,
1254              we assume we have an identifier, and recover if not.  */
1255           result->type = CPP_NAME;
1256           result->val.node = parse_identifier (pfile, c);
1257           if (result->val.node->length != 1)
1258             break;
1259
1260           /* OK, so it wasn't an identifier.  Maybe a string?  */
1261           if (buffer->read_ahead == '"')
1262             {
1263               c = '"';
1264               ACCEPT_CHAR (CPP_OSTRING);
1265               goto make_string;
1266             }
1267         }
1268       goto random_char;
1269
1270     random_char:
1271     default:
1272       result->type = CPP_OTHER;
1273       result->val.c = c;
1274       break;
1275     }
1276
1277   /* If not in a directive, this token invalidates controlling macros.  */
1278   if (!pfile->state.in_directive)
1279     pfile->mi_state = MI_FAILED;
1280 }
1281
1282 /* An upper bound on the number of bytes needed to spell a token,
1283    including preceding whitespace.  */
1284 unsigned int
1285 cpp_token_len (token)
1286      const cpp_token *token;
1287 {
1288   unsigned int len;
1289
1290   switch (TOKEN_SPELL (token))
1291     {
1292     default:            len = 0;                        break;
1293     case SPELL_STRING:  len = token->val.str.len;       break;
1294     case SPELL_IDENT:   len = token->val.node->length;  break;
1295     }
1296   /* 1 for whitespace, 4 for comment delimeters.  */
1297   return len + 5;
1298 }
1299
1300 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1301    already contain the enough space to hold the token's spelling.
1302    Returns a pointer to the character after the last character
1303    written.  */
1304 unsigned char *
1305 cpp_spell_token (pfile, token, buffer)
1306      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1307      const cpp_token *token;
1308      unsigned char *buffer;
1309 {
1310   switch (TOKEN_SPELL (token))
1311     {
1312     case SPELL_OPERATOR:
1313       {
1314         const unsigned char *spelling;
1315         unsigned char c;
1316
1317         if (token->flags & DIGRAPH)
1318           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1319         else if (token->flags & NAMED_OP)
1320           goto spell_ident;
1321         else
1322           spelling = TOKEN_NAME (token);
1323
1324         while ((c = *spelling++) != '\0')
1325           *buffer++ = c;
1326       }
1327       break;
1328
1329     case SPELL_IDENT:
1330       spell_ident:
1331       memcpy (buffer, token->val.node->name, token->val.node->length);
1332       buffer += token->val.node->length;
1333       break;
1334
1335     case SPELL_STRING:
1336       {
1337         int left, right, tag;
1338         switch (token->type)
1339           {
1340           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1341           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1342           case CPP_OSTRING:     left = '"';  right = '"';  tag = '@';  break;
1343           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1344           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1345           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1346           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1347           }
1348         if (tag) *buffer++ = tag;
1349         if (left) *buffer++ = left;
1350         memcpy (buffer, token->val.str.text, token->val.str.len);
1351         buffer += token->val.str.len;
1352         if (right) *buffer++ = right;
1353       }
1354       break;
1355
1356     case SPELL_CHAR:
1357       *buffer++ = token->val.c;
1358       break;
1359
1360     case SPELL_NONE:
1361       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1362       break;
1363     }
1364
1365   return buffer;
1366 }
1367
1368 /* Returns a token as a null-terminated string.  The string is
1369    temporary, and automatically freed later.  Useful for diagnostics.  */
1370 unsigned char *
1371 cpp_token_as_text (pfile, token)
1372      cpp_reader *pfile;
1373      const cpp_token *token;
1374 {
1375   unsigned int len = cpp_token_len (token);
1376   unsigned char *start = _cpp_pool_alloc (&pfile->temp_string_pool, len), *end;
1377
1378   end = cpp_spell_token (pfile, token, start);
1379   end[0] = '\0';
1380
1381   return start;
1382 }
1383
1384 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1385 const char *
1386 cpp_type2name (type)
1387      enum cpp_ttype type;
1388 {
1389   return (const char *) token_spellings[type].name;
1390 }
1391
1392 /* Writes the spelling of token to FP.  Separate from cpp_spell_token
1393    for efficiency - to avoid double-buffering.  Also, outputs a space
1394    if PREV_WHITE is flagged.  */
1395 void
1396 cpp_output_token (token, fp)
1397      const cpp_token *token;
1398      FILE *fp;
1399 {
1400   if (token->flags & PREV_WHITE)
1401     putc (' ', fp);
1402
1403   switch (TOKEN_SPELL (token))
1404     {
1405     case SPELL_OPERATOR:
1406       {
1407         const unsigned char *spelling;
1408
1409         if (token->flags & DIGRAPH)
1410           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1411         else if (token->flags & NAMED_OP)
1412           goto spell_ident;
1413         else
1414           spelling = TOKEN_NAME (token);
1415
1416         ufputs (spelling, fp);
1417       }
1418       break;
1419
1420     spell_ident:
1421     case SPELL_IDENT:
1422       ufputs (token->val.node->name, fp);
1423     break;
1424
1425     case SPELL_STRING:
1426       {
1427         int left, right, tag;
1428         switch (token->type)
1429           {
1430           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1431           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1432           case CPP_OSTRING:     left = '"';  right = '"';  tag = '@';  break;
1433           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1434           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1435           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1436           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1437           }
1438         if (tag) putc (tag, fp);
1439         if (left) putc (left, fp);
1440         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1441         if (right) putc (right, fp);
1442       }
1443       break;
1444
1445     case SPELL_CHAR:
1446       putc (token->val.c, fp);
1447       break;
1448
1449     case SPELL_NONE:
1450       /* An error, most probably.  */
1451       break;
1452     }
1453 }
1454
1455 /* Compare two tokens.  */
1456 int
1457 _cpp_equiv_tokens (a, b)
1458      const cpp_token *a, *b;
1459 {
1460   if (a->type == b->type && a->flags == b->flags)
1461     switch (TOKEN_SPELL (a))
1462       {
1463       default:                  /* Keep compiler happy.  */
1464       case SPELL_OPERATOR:
1465         return 1;
1466       case SPELL_CHAR:
1467         return a->val.c == b->val.c; /* Character.  */
1468       case SPELL_NONE:
1469         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1470       case SPELL_IDENT:
1471         return a->val.node == b->val.node;
1472       case SPELL_STRING:
1473         return (a->val.str.len == b->val.str.len
1474                 && !memcmp (a->val.str.text, b->val.str.text,
1475                             a->val.str.len));
1476       }
1477
1478   return 0;
1479 }
1480
1481 #if 0
1482 /* Compare two token lists.  */
1483 int
1484 _cpp_equiv_toklists (a, b)
1485      const struct toklist *a, *b;
1486 {
1487   unsigned int i, count;
1488
1489   count = a->limit - a->first;
1490   if (count != (b->limit - b->first))
1491     return 0;
1492
1493   for (i = 0; i < count; i++)
1494     if (! _cpp_equiv_tokens (&a->first[i], &b->first[i]))
1495       return 0;
1496
1497   return 1;
1498 }
1499 #endif
1500
1501 /* Determine whether two tokens can be pasted together, and if so,
1502    what the resulting token is.  Returns CPP_EOF if the tokens cannot
1503    be pasted, or the appropriate type for the merged token if they
1504    can.  */
1505 enum cpp_ttype
1506 cpp_can_paste (pfile, token1, token2, digraph)
1507      cpp_reader * pfile;
1508      const cpp_token *token1, *token2;
1509      int* digraph;
1510 {
1511   enum cpp_ttype a = token1->type, b = token2->type;
1512   int cxx = CPP_OPTION (pfile, cplusplus);
1513
1514   /* Treat named operators as if they were ordinary NAMEs.  */
1515   if (token1->flags & NAMED_OP)
1516     a = CPP_NAME;
1517   if (token2->flags & NAMED_OP)
1518     b = CPP_NAME;
1519
1520   if (a <= CPP_LAST_EQ && b == CPP_EQ)
1521     return a + (CPP_EQ_EQ - CPP_EQ);
1522
1523   switch (a)
1524     {
1525     case CPP_GREATER:
1526       if (b == a) return CPP_RSHIFT;
1527       if (b == CPP_QUERY && cxx)        return CPP_MAX;
1528       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
1529       break;
1530     case CPP_LESS:
1531       if (b == a) return CPP_LSHIFT;
1532       if (b == CPP_QUERY && cxx)        return CPP_MIN;
1533       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
1534       if (CPP_OPTION (pfile, digraphs))
1535         {
1536           if (b == CPP_COLON)
1537             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1538           if (b == CPP_MOD)
1539             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
1540         }
1541       break;
1542
1543     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
1544     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
1545     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
1546
1547     case CPP_MINUS:
1548       if (b == a)               return CPP_MINUS_MINUS;
1549       if (b == CPP_GREATER)     return CPP_DEREF;
1550       break;
1551     case CPP_COLON:
1552       if (b == a && cxx)        return CPP_SCOPE;
1553       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1554         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1555       break;
1556
1557     case CPP_MOD:
1558       if (CPP_OPTION (pfile, digraphs))
1559         {
1560           if (b == CPP_GREATER)
1561             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
1562           if (b == CPP_COLON)
1563             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
1564         }
1565       break;
1566     case CPP_DEREF:
1567       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1568       break;
1569     case CPP_DOT:
1570       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1571       if (b == CPP_NUMBER)      return CPP_NUMBER;
1572       break;
1573
1574     case CPP_HASH:
1575       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1576         /* %:%: digraph */
1577         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1578       break;
1579
1580     case CPP_NAME:
1581       if (b == CPP_NAME)        return CPP_NAME;
1582       if (b == CPP_NUMBER
1583           && name_p (pfile, &token2->val.str)) return CPP_NAME;
1584       if (b == CPP_CHAR
1585           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1586       if (b == CPP_STRING
1587           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1588       break;
1589
1590     case CPP_NUMBER:
1591       if (b == CPP_NUMBER)      return CPP_NUMBER;
1592       if (b == CPP_NAME)        return CPP_NUMBER;
1593       if (b == CPP_DOT)         return CPP_NUMBER;
1594       /* Numbers cannot have length zero, so this is safe.  */
1595       if ((b == CPP_PLUS || b == CPP_MINUS)
1596           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1597         return CPP_NUMBER;
1598       break;
1599
1600     case CPP_OTHER:
1601       if (CPP_OPTION (pfile, objc) && token1->val.c == '@')
1602         {
1603           if (b == CPP_NAME)    return CPP_NAME;
1604           if (b == CPP_STRING)  return CPP_OSTRING;
1605         }
1606
1607     default:
1608       break;
1609     }
1610
1611   return CPP_EOF;
1612 }
1613
1614 /* Returns nonzero if a space should be inserted to avoid an
1615    accidental token paste for output.  For simplicity, it is
1616    conservative, and occasionally advises a space where one is not
1617    needed, e.g. "." and ".2".  */
1618
1619 int
1620 cpp_avoid_paste (pfile, token1, token2)
1621      cpp_reader *pfile;
1622      const cpp_token *token1, *token2;
1623 {
1624   enum cpp_ttype a = token1->type, b = token2->type;
1625   cppchar_t c;
1626
1627   if (token1->flags & NAMED_OP)
1628     a = CPP_NAME;
1629   if (token2->flags & NAMED_OP)
1630     b = CPP_NAME;
1631
1632   c = EOF;
1633   if (token2->flags & DIGRAPH)
1634     c = digraph_spellings[b - CPP_FIRST_DIGRAPH][0];
1635   else if (token_spellings[b].category == SPELL_OPERATOR)
1636     c = token_spellings[b].name[0];
1637
1638   /* Quickly get everything that can paste with an '='.  */
1639   if (a <= CPP_LAST_EQ && c == '=')
1640     return 1;
1641
1642   switch (a)
1643     {
1644     case CPP_GREATER:   return c == '>' || c == '?';
1645     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1646     case CPP_PLUS:      return c == '+';
1647     case CPP_MINUS:     return c == '-' || c == '>';
1648     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1649     case CPP_MOD:       return c == ':' || c == '>';
1650     case CPP_AND:       return c == '&';
1651     case CPP_OR:        return c == '|';
1652     case CPP_COLON:     return c == ':' || c == '>';
1653     case CPP_DEREF:     return c == '*';
1654     case CPP_DOT:       return c == '.' || c == '%';
1655     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1656     case CPP_NAME:      return ((b == CPP_NUMBER
1657                                  && name_p (pfile, &token2->val.str))
1658                                 || b == CPP_NAME
1659                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1660     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1661                                 || c == '.' || c == '+' || c == '-');
1662     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1663                                 && token1->val.c == '@'
1664                                 && (b == CPP_NAME || b == CPP_STRING));
1665     default:            break;
1666     }
1667
1668   return 0;
1669 }
1670
1671 /* Output all the remaining tokens on the current line, and a newline
1672    character, to FP.  Leading whitespace is removed.  */
1673 void
1674 cpp_output_line (pfile, fp)
1675      cpp_reader *pfile;
1676      FILE *fp;
1677 {
1678   cpp_token token;
1679
1680   cpp_get_token (pfile, &token);
1681   token.flags &= ~PREV_WHITE;
1682   while (token.type != CPP_EOF)
1683     {
1684       cpp_output_token (&token, fp);
1685       cpp_get_token (pfile, &token);
1686     }
1687
1688   putc ('\n', fp);
1689 }
1690
1691 /* Memory pools.  */
1692
1693 struct dummy
1694 {
1695   char c;
1696   union
1697   {
1698     double d;
1699     int *p;
1700   } u;
1701 };
1702
1703 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1704
1705 static int
1706 chunk_suitable (pool, chunk, size)
1707      cpp_pool *pool;
1708      cpp_chunk *chunk;
1709      unsigned int size;
1710 {
1711   /* Being at least twice SIZE means we can use memcpy in
1712      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
1713      anyway.  */
1714   return (chunk && pool->locked != chunk
1715           && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
1716 }
1717
1718 /* Returns the end of the new pool.  PTR points to a char in the old
1719    pool, and is updated to point to the same char in the new pool.  */
1720 unsigned char *
1721 _cpp_next_chunk (pool, len, ptr)
1722      cpp_pool *pool;
1723      unsigned int len;
1724      unsigned char **ptr;
1725 {
1726   cpp_chunk *chunk = pool->cur->next;
1727
1728   /* LEN is the minimum size we want in the new pool.  */
1729   len += POOL_ROOM (pool);
1730   if (! chunk_suitable (pool, chunk, len))
1731     {
1732       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
1733
1734       chunk->next = pool->cur->next;
1735       pool->cur->next = chunk;
1736     }
1737
1738   /* Update the pointer before changing chunk's front.  */
1739   if (ptr)
1740     *ptr += chunk->base - POOL_FRONT (pool);
1741
1742   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
1743   chunk->front = chunk->base;
1744
1745   pool->cur = chunk;
1746   return POOL_LIMIT (pool);
1747 }
1748
1749 static cpp_chunk *
1750 new_chunk (size)
1751      unsigned int size;
1752 {
1753   unsigned char *base;
1754   cpp_chunk *result;
1755
1756   size = ALIGN (size, DEFAULT_ALIGNMENT);
1757   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
1758   /* Put the chunk descriptor at the end.  Then chunk overruns will
1759      cause obvious chaos.  */
1760   result = (cpp_chunk *) (base + size);
1761   result->base = base;
1762   result->front = base;
1763   result->limit = base + size;
1764   result->next = 0;
1765
1766   return result;
1767 }
1768
1769 void
1770 _cpp_init_pool (pool, size, align, temp)
1771      cpp_pool *pool;
1772      unsigned int size, align, temp;
1773 {
1774   if (align == 0)
1775     align = DEFAULT_ALIGNMENT;
1776   if (align & (align - 1))
1777     abort ();
1778   pool->align = align;
1779   pool->cur = new_chunk (size);
1780   pool->locked = 0;
1781   pool->locks = 0;
1782   if (temp)
1783     pool->cur->next = pool->cur;
1784 }
1785
1786 void
1787 _cpp_lock_pool (pool)
1788      cpp_pool *pool;
1789 {
1790   if (pool->locks++ == 0)
1791     pool->locked = pool->cur;
1792 }
1793
1794 void
1795 _cpp_unlock_pool (pool)
1796      cpp_pool *pool;
1797 {
1798   if (--pool->locks == 0)
1799     pool->locked = 0;
1800 }
1801
1802 void
1803 _cpp_free_pool (pool)
1804      cpp_pool *pool;
1805 {
1806   cpp_chunk *chunk = pool->cur, *next;
1807
1808   do
1809     {
1810       next = chunk->next;
1811       free (chunk->base);
1812       chunk = next;
1813     }
1814   while (chunk && chunk != pool->cur);
1815 }
1816
1817 /* Reserve LEN bytes from a memory pool.  */
1818 unsigned char *
1819 _cpp_pool_reserve (pool, len)
1820      cpp_pool *pool;
1821      unsigned int len;
1822 {
1823   len = ALIGN (len, pool->align);
1824   if (len > (unsigned int) POOL_ROOM (pool))
1825     _cpp_next_chunk (pool, len, 0);
1826
1827   return POOL_FRONT (pool);
1828 }
1829
1830 /* Allocate LEN bytes from a memory pool.  */
1831 unsigned char *
1832 _cpp_pool_alloc (pool, len)
1833      cpp_pool *pool;
1834      unsigned int len;
1835 {
1836   unsigned char *result = _cpp_pool_reserve (pool, len);
1837
1838   POOL_COMMIT (pool, len);
1839   return result;
1840 }