gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41
  42 /* MULTIBYTE_CHARS support only works for native compilers.
  43    ??? Ideally what we want is to model widechar support after
  44    the current floating point support.  */
  45 #ifdef CROSS_COMPILE
  46 #undef MULTIBYTE_CHARS
  47 #endif
  48
  49 #ifdef MULTIBYTE_CHARS
  50 #include "mbchar.h"
  51 #include <locale.h>
  52 #endif
  53
  54 /* Tokens with SPELL_STRING store their spelling in the token list,
  55    and it's length in the token->val.name.len.  */
  56 enum spell_type
  57 {
  58   SPELL_OPERATOR = 0,
  59   SPELL_CHAR,
  60   SPELL_IDENT,
  61   SPELL_STRING,
  62   SPELL_NONE
  63 };
  64
  65 struct token_spelling
  66 {
  67   enum spell_type category;
  68   const unsigned char *name;
  69 };
  70
  71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  72                                              U":>", U"<%", U"%>"};
  73
  74 #define OP(e, s) { SPELL_OPERATOR, U s           },
  75 #define TK(e, s) { s,              U STRINGX (e) },
  76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  77 #undef OP
  78 #undef TK
  79
  80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  82
  83 static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
  84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
  85 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  86
  87 static int skip_block_comment PARAMS ((cpp_reader *));
  88 static int skip_line_comment PARAMS ((cpp_reader *));
  89 static void adjust_column PARAMS ((cpp_reader *));
  90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  92 static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
  93                                                     const U_CHAR *));
  94 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  95 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  96 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  97 static void unterminated PARAMS ((cpp_reader *, int));
  98 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  99 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
 100 static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
 101 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
 102 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
 103 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
 104                                    const unsigned char *, unsigned int *));
 105 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
 106
 107 static cpp_chunk *new_chunk PARAMS ((unsigned int));
 108 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
 109 static unsigned int hex_digit_value PARAMS ((unsigned int));
 110 static _cpp_buff *new_buff PARAMS ((unsigned int));
 111
 112 /* Utility routine:
 113
 114    Compares, the token TOKEN to the NUL-terminated string STRING.
 115    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 116
 117 int
 118 cpp_ideq (token, string)
 119      const cpp_token *token;
 120      const char *string;
 121 {
 122   if (token->type != CPP_NAME)
 123     return 0;
 124
 125   return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
 126 }
 127
 128 /* Call when meeting a newline.  Returns the character after the newline
 129    (or carriage-return newline combination), or EOF.  */
 130 static cppchar_t
 131 handle_newline (pfile, newline_char)
 132      cpp_reader *pfile;
 133      cppchar_t newline_char;
 134 {
 135   cpp_buffer *buffer;
 136   cppchar_t next = EOF;
 137
 138   pfile->line++;
 139   buffer = pfile->buffer;
 140   buffer->col_adjust = 0;
 141   buffer->line_base = buffer->cur;
 142
 143   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 144   if (buffer->cur < buffer->rlimit)
 145     {
 146       next = *buffer->cur++;
 147       if (next + newline_char == '\r' + '\n')
 148         {
 149           buffer->line_base = buffer->cur;
 150           if (buffer->cur < buffer->rlimit)
 151             next = *buffer->cur++;
 152           else
 153             next = EOF;
 154         }
 155     }
 156
 157   buffer->read_ahead = next;
 158   return next;
 159 }
 160
 161 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 162    encountered.  It warns if necessary, and returns true if the
 163    trigraph should be honoured.  FROM_CHAR is the third character of a
 164    trigraph, and presumed to be the previous character for position
 165    reporting.  */
 166 static int
 167 trigraph_ok (pfile, from_char)
 168      cpp_reader *pfile;
 169      cppchar_t from_char;
 170 {
 171   int accept = CPP_OPTION (pfile, trigraphs);
 172
 173   /* Don't warn about trigraphs in comments.  */
 174   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 175     {
 176       cpp_buffer *buffer = pfile->buffer;
 177
 178       if (accept)
 179         cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
 180                                "trigraph ??%c converted to %c",
 181                                (int) from_char,
 182                                (int) _cpp_trigraph_map[from_char]);
 183       else if (buffer->cur != buffer->last_Wtrigraphs)
 184         {
 185           buffer->last_Wtrigraphs = buffer->cur;
 186           cpp_warning_with_line (pfile, pfile->line,
 187                                  CPP_BUF_COL (buffer) - 2,
 188                                  "trigraph ??%c ignored", (int) from_char);
 189         }
 190     }
 191
 192   return accept;
 193 }
 194
 195 /* Assumes local variables buffer and result.  */
 196 #define ACCEPT_CHAR(t) \
 197   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 198
 199 /* When we move to multibyte character sets, add to these something
 200    that saves and restores the state of the multibyte conversion
 201    library.  This probably involves saving and restoring a "cookie".
 202    In the case of glibc it is an 8-byte structure, so is not a high
 203    overhead operation.  In any case, it's out of the fast path.  */
 204 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 205 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 206
 207 /* Skips any escaped newlines introduced by NEXT, which is either a
 208    '?' or a '\\'.  Returns the next character, which will also have
 209    been placed in buffer->read_ahead.  This routine performs
 210    preprocessing stages 1 and 2 of the ISO C standard.  */
 211 static cppchar_t
 212 skip_escaped_newlines (pfile, next)
 213      cpp_reader *pfile;
 214      cppchar_t next;
 215 {
 216   cpp_buffer *buffer = pfile->buffer;
 217
 218   /* Only do this if we apply stages 1 and 2.  */
 219   if (!buffer->from_stage3)
 220     {
 221       cppchar_t next1;
 222       const unsigned char *saved_cur;
 223       int space;
 224
 225       do
 226         {
 227           if (buffer->cur == buffer->rlimit)
 228             break;
 229
 230           SAVE_STATE ();
 231           if (next == '?')
 232             {
 233               next1 = *buffer->cur++;
 234               if (next1 != '?' || buffer->cur == buffer->rlimit)
 235                 {
 236                   RESTORE_STATE ();
 237                   break;
 238                 }
 239
 240               next1 = *buffer->cur++;
 241               if (!_cpp_trigraph_map[next1]
 242                   || !trigraph_ok (pfile, next1))
 243                 {
 244                   RESTORE_STATE ();
 245                   break;
 246                 }
 247
 248               /* We have a full trigraph here.  */
 249               next = _cpp_trigraph_map[next1];
 250               if (next != '\\' || buffer->cur == buffer->rlimit)
 251                 break;
 252               SAVE_STATE ();
 253             }
 254
 255           /* We have a backslash, and room for at least one more character.  */
 256           space = 0;
 257           do
 258             {
 259               next1 = *buffer->cur++;
 260               if (!is_nvspace (next1))
 261                 break;
 262               space = 1;
 263             }
 264           while (buffer->cur < buffer->rlimit);
 265
 266           if (!is_vspace (next1))
 267             {
 268               RESTORE_STATE ();
 269               break;
 270             }
 271
 272           if (space && !pfile->state.lexing_comment)
 273             cpp_warning (pfile, "backslash and newline separated by space");
 274
 275           next = handle_newline (pfile, next1);
 276           if (next == EOF)
 277             cpp_pedwarn (pfile, "backslash-newline at end of file");
 278         }
 279       while (next == '\\' || next == '?');
 280     }
 281
 282   buffer->read_ahead = next;
 283   return next;
 284 }
 285
 286 /* Obtain the next character, after trigraph conversion and skipping
 287    an arbitrary string of escaped newlines.  The common case of no
 288    trigraphs or escaped newlines falls through quickly.  */
 289 static cppchar_t
 290 get_effective_char (pfile)
 291      cpp_reader *pfile;
 292 {
 293   cpp_buffer *buffer = pfile->buffer;
 294   cppchar_t next = EOF;
 295
 296   if (buffer->cur < buffer->rlimit)
 297     {
 298       next = *buffer->cur++;
 299
 300       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 301          can introduce escaped newlines, which we want to skip, or
 302          UCNs, which, depending upon lexer state, we will handle in
 303          the future.  */
 304       if (next == '?' || next == '\\')
 305         next = skip_escaped_newlines (pfile, next);
 306     }
 307
 308   buffer->read_ahead = next;
 309   return next;
 310 }
 311
 312 /* Skip a C-style block comment.  We find the end of the comment by
 313    seeing if an asterisk is before every '/' we encounter.  Returns
 314    non-zero if comment terminated by EOF, zero otherwise.  */
 315 static int
 316 skip_block_comment (pfile)
 317      cpp_reader *pfile;
 318 {
 319   cpp_buffer *buffer = pfile->buffer;
 320   cppchar_t c = EOF, prevc = EOF;
 321
 322   pfile->state.lexing_comment = 1;
 323   while (buffer->cur != buffer->rlimit)
 324     {
 325       prevc = c, c = *buffer->cur++;
 326
 327     next_char:
 328       /* FIXME: For speed, create a new character class of characters
 329          of interest inside block comments.  */
 330       if (c == '?' || c == '\\')
 331         c = skip_escaped_newlines (pfile, c);
 332
 333       /* People like decorating comments with '*', so check for '/'
 334          instead for efficiency.  */
 335       if (c == '/')
 336         {
 337           if (prevc == '*')
 338             break;
 339
 340           /* Warn about potential nested comments, but not if the '/'
 341              comes immediately before the true comment delimeter.
 342              Don't bother to get it right across escaped newlines.  */
 343           if (CPP_OPTION (pfile, warn_comments)
 344               && buffer->cur != buffer->rlimit)
 345             {
 346               prevc = c, c = *buffer->cur++;
 347               if (c == '*' && buffer->cur != buffer->rlimit)
 348                 {
 349                   prevc = c, c = *buffer->cur++;
 350                   if (c != '/')
 351                     cpp_warning_with_line (pfile, pfile->line,
 352                                            CPP_BUF_COL (buffer) - 2,
 353                                            "\"/*\" within comment");
 354                 }
 355               goto next_char;
 356             }
 357         }
 358       else if (is_vspace (c))
 359         {
 360           prevc = c, c = handle_newline (pfile, c);
 361           goto next_char;
 362         }
 363       else if (c == '\t')
 364         adjust_column (pfile);
 365     }
 366
 367   pfile->state.lexing_comment = 0;
 368   buffer->read_ahead = EOF;
 369   return c != '/' || prevc != '*';
 370 }
 371
 372 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 373    non-zero if a multiline comment.  The following new line, if any,
 374    is left in buffer->read_ahead.  */
 375 static int
 376 skip_line_comment (pfile)
 377      cpp_reader *pfile;
 378 {
 379   cpp_buffer *buffer = pfile->buffer;
 380   unsigned int orig_line = pfile->line;
 381   cppchar_t c;
 382
 383   pfile->state.lexing_comment = 1;
 384   do
 385     {
 386       c = EOF;
 387       if (buffer->cur == buffer->rlimit)
 388         break;
 389
 390       c = *buffer->cur++;
 391       if (c == '?' || c == '\\')
 392         c = skip_escaped_newlines (pfile, c);
 393     }
 394   while (!is_vspace (c));
 395
 396   pfile->state.lexing_comment = 0;
 397   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 398   return orig_line != pfile->line;
 399 }
 400
 401 /* pfile->buffer->cur is one beyond the \t character.  Update
 402    col_adjust so we track the column correctly.  */
 403 static void
 404 adjust_column (pfile)
 405      cpp_reader *pfile;
 406 {
 407   cpp_buffer *buffer = pfile->buffer;
 408   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 409
 410   /* Round it up to multiple of the tabstop, but subtract 1 since the
 411      tab itself occupies a character position.  */
 412   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 413                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 414 }
 415
 416 /* Skips whitespace, saving the next non-whitespace character.
 417    Adjusts pfile->col_adjust to account for tabs.  Without this,
 418    tokens might be assigned an incorrect column.  */
 419 static void
 420 skip_whitespace (pfile, c)
 421      cpp_reader *pfile;
 422      cppchar_t c;
 423 {
 424   cpp_buffer *buffer = pfile->buffer;
 425   unsigned int warned = 0;
 426
 427   do
 428     {
 429       /* Horizontal space always OK.  */
 430       if (c == ' ')
 431         ;
 432       else if (c == '\t')
 433         adjust_column (pfile);
 434       /* Just \f \v or \0 left.  */
 435       else if (c == '\0')
 436         {
 437           if (!warned)
 438             {
 439               cpp_warning (pfile, "null character(s) ignored");
 440               warned = 1;
 441             }
 442         }
 443       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 444         cpp_pedwarn_with_line (pfile, pfile->line,
 445                                CPP_BUF_COL (buffer),
 446                                "%s in preprocessing directive",
 447                                c == '\f' ? "form feed" : "vertical tab");
 448
 449       c = EOF;
 450       if (buffer->cur == buffer->rlimit)
 451         break;
 452       c = *buffer->cur++;
 453     }
 454   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 455   while (is_nvspace (c));
 456
 457   /* Remember the next character.  */
 458   buffer->read_ahead = c;
 459 }
 460
 461 /* See if the characters of a number token are valid in a name (no
 462    '.', '+' or '-').  */
 463 static int
 464 name_p (pfile, string)
 465      cpp_reader *pfile;
 466      const cpp_string *string;
 467 {
 468   unsigned int i;
 469
 470   for (i = 0; i < string->len; i++)
 471     if (!is_idchar (string->text[i]))
 472       return 0;
 473
 474   return 1;
 475 }
 476
 477 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 478    a critical inner loop.  The common case is an identifier which has
 479    not been split by backslash-newline, does not contain a dollar
 480    sign, and has already been scanned (roughly 10:1 ratio of
 481    seen:unseen identifiers in normal code; the distribution is
 482    Poisson-like).  Second most common case is a new identifier, not
 483    split and no dollar sign.  The other possibilities are rare and
 484    have been relegated to parse_identifier_slow.  */
 485
 486 static cpp_hashnode *
 487 parse_identifier (pfile)
 488      cpp_reader *pfile;
 489 {
 490   cpp_hashnode *result;
 491   const U_CHAR *cur, *rlimit;
 492
 493   /* Fast-path loop.  Skim over a normal identifier.
 494      N.B. ISIDNUM does not include $.  */
 495   cur    = pfile->buffer->cur - 1;
 496   rlimit = pfile->buffer->rlimit;
 497   do
 498     cur++;
 499   while (cur < rlimit && ISIDNUM (*cur));
 500
 501   /* Check for slow-path cases.  */
 502   if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
 503     result = parse_identifier_slow (pfile, cur);
 504   else
 505     {
 506       const U_CHAR *base = pfile->buffer->cur - 1;
 507       result = (cpp_hashnode *)
 508         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 509       pfile->buffer->cur = cur;
 510     }
 511
 512   /* Rarely, identifiers require diagnostics when lexed.
 513      XXX Has to be forced out of the fast path.  */
 514   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 515                         && !pfile->state.skipping, 0))
 516     {
 517       /* It is allowed to poison the same identifier twice.  */
 518       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 519         cpp_error (pfile, "attempt to use poisoned \"%s\"",
 520                    NODE_NAME (result));
 521
 522       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 523          replacement list of a variadic macro.  */
 524       if (result == pfile->spec_nodes.n__VA_ARGS__
 525           && !pfile->state.va_args_ok)
 526         cpp_pedwarn (pfile,
 527         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 528     }
 529
 530   return result;
 531 }
 532
 533 /* Slow path.  This handles identifiers which have been split, and
 534    identifiers which contain dollar signs.  The part of the identifier
 535    from PFILE->buffer->cur-1 to CUR has already been scanned.  */
 536 static cpp_hashnode *
 537 parse_identifier_slow (pfile, cur)
 538      cpp_reader *pfile;
 539      const U_CHAR *cur;
 540 {
 541   cpp_buffer *buffer = pfile->buffer;
 542   const U_CHAR *base = buffer->cur - 1;
 543   struct obstack *stack = &pfile->hash_table->stack;
 544   unsigned int c, saw_dollar = 0, len;
 545
 546   /* Copy the part of the token which is known to be okay.  */
 547   obstack_grow (stack, base, cur - base);
 548
 549   /* Now process the part which isn't.  We are looking at one of
 550      '$', '\\', or '?' on entry to this loop.  */
 551   c = *cur++;
 552   buffer->cur = cur;
 553   do
 554     {
 555       while (is_idchar (c))
 556         {
 557           obstack_1grow (stack, c);
 558
 559           if (c == '$')
 560             saw_dollar++;
 561
 562           c = EOF;
 563           if (buffer->cur == buffer->rlimit)
 564             break;
 565
 566           c = *buffer->cur++;
 567         }
 568
 569       /* Potential escaped newline?  */
 570       if (c != '?' && c != '\\')
 571         break;
 572       c = skip_escaped_newlines (pfile, c);
 573     }
 574   while (is_idchar (c));
 575
 576   /* Remember the next character.  */
 577   buffer->read_ahead = c;
 578
 579   /* $ is not a identifier character in the standard, but is commonly
 580      accepted as an extension.  Don't warn about it in skipped
 581      conditional blocks.  */
 582   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 583     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 584
 585   /* Identifiers are null-terminated.  */
 586   len = obstack_object_size (stack);
 587   obstack_1grow (stack, '\0');
 588
 589   return (cpp_hashnode *)
 590     ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
 591 }
 592
 593 /* Parse a number, skipping embedded backslash-newlines.  */
 594 static void
 595 parse_number (pfile, number, c, leading_period)
 596      cpp_reader *pfile;
 597      cpp_string *number;
 598      cppchar_t c;
 599      int leading_period;
 600 {
 601   cpp_buffer *buffer = pfile->buffer;
 602   cpp_pool *pool = &pfile->ident_pool;
 603   unsigned char *dest, *limit;
 604
 605   dest = POOL_FRONT (pool);
 606   limit = POOL_LIMIT (pool);
 607
 608   /* Place a leading period.  */
 609   if (leading_period)
 610     {
 611       if (dest >= limit)
 612         limit = _cpp_next_chunk (pool, 0, &dest);
 613       *dest++ = '.';
 614     }
 615
 616   do
 617     {
 618       do
 619         {
 620           /* Need room for terminating null.  */
 621           if (dest + 1 >= limit)
 622             limit = _cpp_next_chunk (pool, 0, &dest);
 623           *dest++ = c;
 624
 625           c = EOF;
 626           if (buffer->cur == buffer->rlimit)
 627             break;
 628
 629           c = *buffer->cur++;
 630         }
 631       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 632
 633       /* Potential escaped newline?  */
 634       if (c != '?' && c != '\\')
 635         break;
 636       c = skip_escaped_newlines (pfile, c);
 637     }
 638   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 639
 640   /* Remember the next character.  */
 641   buffer->read_ahead = c;
 642
 643   /* Null-terminate the number.  */
 644   *dest = '\0';
 645
 646   number->text = POOL_FRONT (pool);
 647   number->len = dest - number->text;
 648   POOL_COMMIT (pool, number->len + 1);
 649 }
 650
 651 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 652 static void
 653 unterminated (pfile, term)
 654      cpp_reader *pfile;
 655      int term;
 656 {
 657   cpp_error (pfile, "missing terminating %c character", term);
 658
 659   if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
 660     {
 661       cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
 662                            "possible start of unterminated string literal");
 663       pfile->mls_line = 0;
 664     }
 665 }
 666
 667 /* Subroutine of parse_string.  */
 668 static int
 669 unescaped_terminator_p (pfile, dest)
 670      cpp_reader *pfile;
 671      const unsigned char *dest;
 672 {
 673   const unsigned char *start, *temp;
 674
 675   /* In #include-style directives, terminators are not escapeable.  */
 676   if (pfile->state.angled_headers)
 677     return 1;
 678
 679   start = POOL_FRONT (&pfile->ident_pool);
 680
 681   /* An odd number of consecutive backslashes represents an escaped
 682      terminator.  */
 683   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 684     ;
 685
 686   return ((dest - temp) & 1) == 0;
 687 }
 688
 689 /* Parses a string, character constant, or angle-bracketed header file
 690    name.  Handles embedded trigraphs and escaped newlines.  The stored
 691    string is guaranteed NUL-terminated, but it is not guaranteed that
 692    this is the first NUL since embedded NULs are preserved.
 693
 694    Multi-line strings are allowed, but they are deprecated.  */
 695 static void
 696 parse_string (pfile, token, terminator)
 697      cpp_reader *pfile;
 698      cpp_token *token;
 699      cppchar_t terminator;
 700 {
 701   cpp_buffer *buffer = pfile->buffer;
 702   cpp_pool *pool = &pfile->ident_pool;
 703   unsigned char *dest, *limit;
 704   cppchar_t c;
 705   bool warned_nulls = false, warned_multi = false;
 706
 707   dest = POOL_FRONT (pool);
 708   limit = POOL_LIMIT (pool);
 709
 710   for (;;)
 711     {
 712       if (buffer->cur == buffer->rlimit)
 713         c = EOF;
 714       else
 715         c = *buffer->cur++;
 716
 717     have_char:
 718       /* We need space for the terminating NUL.  */
 719       if (dest >= limit)
 720         limit = _cpp_next_chunk (pool, 0, &dest);
 721
 722       if (c == EOF)
 723         {
 724           unterminated (pfile, terminator);
 725           break;
 726         }
 727
 728       /* Handle trigraphs, escaped newlines etc.  */
 729       if (c == '?' || c == '\\')
 730         c = skip_escaped_newlines (pfile, c);
 731
 732       if (c == terminator && unescaped_terminator_p (pfile, dest))
 733         {
 734           c = EOF;
 735           break;
 736         }
 737       else if (is_vspace (c))
 738         {
 739           /* In assembly language, silently terminate string and
 740              character literals at end of line.  This is a kludge
 741              around not knowing where comments are.  */
 742           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 743             break;
 744
 745           /* Character constants and header names may not extend over
 746              multiple lines.  In Standard C, neither may strings.
 747              Unfortunately, we accept multiline strings as an
 748              extension, except in #include family directives.  */
 749           if (terminator != '"' || pfile->state.angled_headers)
 750             {
 751               unterminated (pfile, terminator);
 752               break;
 753             }
 754
 755           if (!warned_multi)
 756             {
 757               warned_multi = true;
 758               cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 759             }
 760
 761           if (pfile->mls_line == 0)
 762             {
 763               pfile->mls_line = token->line;
 764               pfile->mls_col = token->col;
 765             }
 766
 767           c = handle_newline (pfile, c);
 768           *dest++ = '\n';
 769           goto have_char;
 770         }
 771       else if (c == '\0' && !warned_nulls)
 772         {
 773           warned_nulls = true;
 774           cpp_warning (pfile, "null character(s) preserved in literal");
 775         }
 776
 777       *dest++ = c;
 778     }
 779
 780   /* Remember the next character.  */
 781   buffer->read_ahead = c;
 782   *dest = '\0';
 783
 784   token->val.str.text = POOL_FRONT (pool);
 785   token->val.str.len = dest - token->val.str.text;
 786   POOL_COMMIT (pool, token->val.str.len + 1);
 787 }
 788
 789 /* The stored comment includes the comment start and any terminator.  */
 790 static void
 791 save_comment (pfile, token, from)
 792      cpp_reader *pfile;
 793      cpp_token *token;
 794      const unsigned char *from;
 795 {
 796   unsigned char *buffer;
 797   unsigned int len;
 798
 799   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 800   /* C++ comments probably (not definitely) have moved past a new
 801      line, which we don't want to save in the comment.  */
 802   if (pfile->buffer->read_ahead != EOF)
 803     len--;
 804   buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
 805
 806   token->type = CPP_COMMENT;
 807   token->val.str.len = len;
 808   token->val.str.text = buffer;
 809
 810   buffer[0] = '/';
 811   memcpy (buffer + 1, from, len - 1);
 812 }
 813
 814 /* Subroutine of _cpp_lex_direct to handle '%'.  A little tricky, since we
 815    want to avoid stepping back when lexing %:%X.  */
 816 static void
 817 lex_percent (pfile, result)
 818      cpp_reader *pfile;
 819      cpp_token *result;
 820 {
 821   cpp_buffer *buffer= pfile->buffer;
 822   cppchar_t c;
 823
 824   result->type = CPP_MOD;
 825   /* Parsing %:%X could leave an extra character.  */
 826   if (buffer->extra_char == EOF)
 827     c = get_effective_char (pfile);
 828   else
 829     {
 830       c = buffer->read_ahead = buffer->extra_char;
 831       buffer->extra_char = EOF;
 832     }
 833
 834   if (c == '=')
 835     ACCEPT_CHAR (CPP_MOD_EQ);
 836   else if (CPP_OPTION (pfile, digraphs))
 837     {
 838       if (c == ':')
 839         {
 840           result->flags |= DIGRAPH;
 841           ACCEPT_CHAR (CPP_HASH);
 842           if (get_effective_char (pfile) == '%')
 843             {
 844               buffer->extra_char = get_effective_char (pfile);
 845               if (buffer->extra_char == ':')
 846                 {
 847                   buffer->extra_char = EOF;
 848                   ACCEPT_CHAR (CPP_PASTE);
 849                 }
 850               else
 851                 /* We'll catch the extra_char when we're called back.  */
 852                 buffer->read_ahead = '%';
 853             }
 854         }
 855       else if (c == '>')
 856         {
 857           result->flags |= DIGRAPH;
 858           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 859         }
 860     }
 861 }
 862
 863 /* Subroutine of _cpp_lex_direct to handle '.'.  This is tricky, since we
 864    want to avoid stepping back when lexing '...' or '.123'.  In the
 865    latter case we should also set a flag for parse_number.  */
 866 static void
 867 lex_dot (pfile, result)
 868      cpp_reader *pfile;
 869      cpp_token *result;
 870 {
 871   cpp_buffer *buffer = pfile->buffer;
 872   cppchar_t c;
 873
 874   /* Parsing ..X could leave an extra character.  */
 875   if (buffer->extra_char == EOF)
 876     c = get_effective_char (pfile);
 877   else
 878     {
 879       c = buffer->read_ahead = buffer->extra_char;
 880       buffer->extra_char = EOF;
 881     }
 882
 883   /* All known character sets have 0...9 contiguous.  */
 884   if (c >= '0' && c <= '9')
 885     {
 886       result->type = CPP_NUMBER;
 887       parse_number (pfile, &result->val.str, c, 1);
 888     }
 889   else
 890     {
 891       result->type = CPP_DOT;
 892       if (c == '.')
 893         {
 894           buffer->extra_char = get_effective_char (pfile);
 895           if (buffer->extra_char == '.')
 896             {
 897               buffer->extra_char = EOF;
 898               ACCEPT_CHAR (CPP_ELLIPSIS);
 899             }
 900           else
 901             /* We'll catch the extra_char when we're called back.  */
 902             buffer->read_ahead = '.';
 903         }
 904       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 905         ACCEPT_CHAR (CPP_DOT_STAR);
 906     }
 907 }
 908
 909 /* Allocate COUNT tokens for RUN.  */
 910 void
 911 _cpp_init_tokenrun (run, count)
 912      tokenrun *run;
 913      unsigned int count;
 914 {
 915   run->base = xnewvec (cpp_token, count);
 916   run->limit = run->base + count;
 917   run->next = NULL;
 918 }
 919
 920 /* Returns the next tokenrun, or creates one if there is none.  */
 921 static tokenrun *
 922 next_tokenrun (run)
 923      tokenrun *run;
 924 {
 925   if (run->next == NULL)
 926     {
 927       run->next = xnew (tokenrun);
 928       run->next->prev = run;
 929       _cpp_init_tokenrun (run->next, 250);
 930     }
 931
 932   return run->next;
 933 }
 934
 935 /* Allocate a single token that is invalidated at the same time as the
 936    rest of the tokens on the line.  Has its line and col set to the
 937    same as the last lexed token, so that diagnostics appear in the
 938    right place.  */
 939 cpp_token *
 940 _cpp_temp_token (pfile)
 941      cpp_reader *pfile;
 942 {
 943   cpp_token *old, *result;
 944
 945   old = pfile->cur_token - 1;
 946   if (pfile->cur_token == pfile->cur_run->limit)
 947     {
 948       pfile->cur_run = next_tokenrun (pfile->cur_run);
 949       pfile->cur_token = pfile->cur_run->base;
 950     }
 951
 952   result = pfile->cur_token++;
 953   result->line = old->line;
 954   result->col = old->col;
 955   return result;
 956 }
 957
 958 /* Lex a token into RESULT (external interface).  Takes care of issues
 959    like directive handling, token lookahead, multiple include
 960    opimisation and skipping.  */
 961 const cpp_token *
 962 _cpp_lex_token (pfile)
 963      cpp_reader *pfile;
 964 {
 965   cpp_token *result;
 966
 967   for (;;)
 968     {
 969       if (pfile->cur_token == pfile->cur_run->limit)
 970         {
 971           pfile->cur_run = next_tokenrun (pfile->cur_run);
 972           pfile->cur_token = pfile->cur_run->base;
 973         }
 974
 975       if (pfile->lookaheads)
 976         {
 977           pfile->lookaheads--;
 978           result = pfile->cur_token++;
 979         }
 980       else
 981         result = _cpp_lex_direct (pfile);
 982
 983       if (result->flags & BOL)
 984         {
 985           /* Is this a directive.  If _cpp_handle_directive returns
 986              false, it is an assembler #.  */
 987           if (result->type == CPP_HASH
 988               && !pfile->state.parsing_args
 989               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 990             continue;
 991           if (pfile->cb.line_change && !pfile->state.skipping)
 992             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 993         }
 994
 995       /* We don't skip tokens in directives.  */
 996       if (pfile->state.in_directive)
 997         break;
 998
 999       /* Outside a directive, invalidate controlling macros.  At file
1000          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1001          get here and MI optimisation works.  */
1002       pfile->mi_valid = false;
1003
1004       if (!pfile->state.skipping || result->type == CPP_EOF)
1005         break;
1006     }
1007
1008   return result;
1009 }
1010
1011 /* Lex a token into pfile->cur_token, which is also incremented, to
1012    get diagnostics pointing to the correct location.
1013
1014    Does not handle issues such as token lookahead, multiple-include
1015    optimisation, directives, skipping etc.  This function is only
1016    suitable for use by _cpp_lex_token, and in special cases like
1017    lex_expansion_token which doesn't care for any of these issues.
1018
1019    When meeting a newline, returns CPP_EOF if parsing a directive,
1020    otherwise returns to the start of the token buffer if permissible.
1021    Returns the location of the lexed token.  */
1022 cpp_token *
1023 _cpp_lex_direct (pfile)
1024      cpp_reader *pfile;
1025 {
1026   cppchar_t c;
1027   cpp_buffer *buffer;
1028   const unsigned char *comment_start;
1029   cpp_token *result = pfile->cur_token++;
1030
1031  fresh_line:
1032   buffer = pfile->buffer;
1033   result->flags = buffer->saved_flags;
1034   buffer->saved_flags = 0;
1035  update_tokens_line:
1036   result->line = pfile->line;
1037
1038  skipped_white:
1039   c = buffer->read_ahead;
1040   if (c == EOF && buffer->cur < buffer->rlimit)
1041     c = *buffer->cur++;
1042   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1043   buffer->read_ahead = EOF;
1044
1045  trigraph:
1046   switch (c)
1047     {
1048     case EOF:
1049       buffer->saved_flags = BOL;
1050       if (!pfile->state.parsing_args && !pfile->state.in_directive)
1051         {
1052           if (buffer->cur != buffer->line_base)
1053             {
1054               /* Non-empty files should end in a newline.  Don't warn
1055                  for command line and _Pragma buffers.  */
1056               if (!buffer->from_stage3)
1057                 cpp_pedwarn (pfile, "no newline at end of file");
1058               handle_newline (pfile, '\n');
1059             }
1060
1061           /* Don't pop the last buffer.  */
1062           if (buffer->prev)
1063             {
1064               unsigned char stop = buffer->return_at_eof;
1065
1066               _cpp_pop_buffer (pfile);
1067               if (!stop)
1068                 goto fresh_line;
1069             }
1070         }
1071       result->type = CPP_EOF;
1072       break;
1073
1074     case ' ': case '\t': case '\f': case '\v': case '\0':
1075       skip_whitespace (pfile, c);
1076       result->flags |= PREV_WHITE;
1077       goto skipped_white;
1078
1079     case '\n': case '\r':
1080       handle_newline (pfile, c);
1081       buffer->saved_flags = BOL;
1082       if (! pfile->state.in_directive)
1083         {
1084           if (pfile->state.parsing_args == 2)
1085             buffer->saved_flags |= PREV_WHITE;
1086           if (!pfile->keep_tokens)
1087             {
1088               pfile->cur_run = &pfile->base_run;
1089               result = pfile->base_run.base;
1090               pfile->cur_token = result + 1;
1091             }
1092           goto fresh_line;
1093         }
1094       result->type = CPP_EOF;
1095       break;
1096
1097     case '?':
1098     case '\\':
1099       /* These could start an escaped newline, or '?' a trigraph.  Let
1100          skip_escaped_newlines do all the work.  */
1101       {
1102         unsigned int line = pfile->line;
1103
1104         c = skip_escaped_newlines (pfile, c);
1105         if (line != pfile->line)
1106           /* We had at least one escaped newline of some sort, and the
1107              next character is in buffer->read_ahead.  Update the
1108              token's line and column.  */
1109             goto update_tokens_line;
1110
1111         /* We are either the original '?' or '\\', or a trigraph.  */
1112         result->type = CPP_QUERY;
1113         buffer->read_ahead = EOF;
1114         if (c == '\\')
1115           goto random_char;
1116         else if (c != '?')
1117           goto trigraph;
1118       }
1119       break;
1120
1121     case '0': case '1': case '2': case '3': case '4':
1122     case '5': case '6': case '7': case '8': case '9':
1123       result->type = CPP_NUMBER;
1124       parse_number (pfile, &result->val.str, c, 0);
1125       break;
1126
1127     case '$':
1128       if (!CPP_OPTION (pfile, dollars_in_ident))
1129         goto random_char;
1130       /* Fall through...  */
1131
1132     case '_':
1133     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1134     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1135     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1136     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1137     case 'y': case 'z':
1138     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1139     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1140     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1141     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1142     case 'Y': case 'Z':
1143       result->type = CPP_NAME;
1144       result->val.node = parse_identifier (pfile);
1145
1146       /* 'L' may introduce wide characters or strings.  */
1147       if (result->val.node == pfile->spec_nodes.n_L)
1148         {
1149           c = buffer->read_ahead;
1150           if (c == EOF && buffer->cur < buffer->rlimit)
1151             c = *buffer->cur;
1152           if (c == '\'' || c == '"')
1153             {
1154               buffer->cur++;
1155               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1156               goto make_string;
1157             }
1158         }
1159       /* Convert named operators to their proper types.  */
1160       else if (result->val.node->flags & NODE_OPERATOR)
1161         {
1162           result->flags |= NAMED_OP;
1163           result->type = result->val.node->value.operator;
1164         }
1165       break;
1166
1167     case '\'':
1168     case '"':
1169       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1170     make_string:
1171       parse_string (pfile, result, c);
1172       break;
1173
1174     case '/':
1175       /* A potential block or line comment.  */
1176       comment_start = buffer->cur;
1177       result->type = CPP_DIV;
1178       c = get_effective_char (pfile);
1179       if (c == '=')
1180         ACCEPT_CHAR (CPP_DIV_EQ);
1181       if (c != '/' && c != '*')
1182         break;
1183
1184       if (c == '*')
1185         {
1186           if (skip_block_comment (pfile))
1187             cpp_error (pfile, "unterminated comment");
1188         }
1189       else
1190         {
1191           if (!CPP_OPTION (pfile, cplusplus_comments)
1192               && !CPP_IN_SYSTEM_HEADER (pfile))
1193             break;
1194
1195           /* Warn about comments only if pedantically GNUC89, and not
1196              in system headers.  */
1197           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1198               && ! buffer->warned_cplusplus_comments)
1199             {
1200               cpp_pedwarn (pfile,
1201                            "C++ style comments are not allowed in ISO C89");
1202               cpp_pedwarn (pfile,
1203                            "(this will be reported only once per input file)");
1204               buffer->warned_cplusplus_comments = 1;
1205             }
1206
1207           /* Skip_line_comment updates buffer->read_ahead.  */
1208           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1209             cpp_warning (pfile, "multi-line comment");
1210         }
1211
1212       /* Skipping the comment has updated buffer->read_ahead.  */
1213       if (!pfile->state.save_comments)
1214         {
1215           result->flags |= PREV_WHITE;
1216           goto update_tokens_line;
1217         }
1218
1219       /* Save the comment as a token in its own right.  */
1220       save_comment (pfile, result, comment_start);
1221       /* Don't do MI optimisation.  */
1222       break;
1223
1224     case '<':
1225       if (pfile->state.angled_headers)
1226         {
1227           result->type = CPP_HEADER_NAME;
1228           c = '>';              /* terminator.  */
1229           goto make_string;
1230         }
1231
1232       result->type = CPP_LESS;
1233       c = get_effective_char (pfile);
1234       if (c == '=')
1235         ACCEPT_CHAR (CPP_LESS_EQ);
1236       else if (c == '<')
1237         {
1238           ACCEPT_CHAR (CPP_LSHIFT);
1239           if (get_effective_char (pfile) == '=')
1240             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1241         }
1242       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1243         {
1244           ACCEPT_CHAR (CPP_MIN);
1245           if (get_effective_char (pfile) == '=')
1246             ACCEPT_CHAR (CPP_MIN_EQ);
1247         }
1248       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1249         {
1250           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1251           result->flags |= DIGRAPH;
1252         }
1253       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1254         {
1255           ACCEPT_CHAR (CPP_OPEN_BRACE);
1256           result->flags |= DIGRAPH;
1257         }
1258       break;
1259
1260     case '>':
1261       result->type = CPP_GREATER;
1262       c = get_effective_char (pfile);
1263       if (c == '=')
1264         ACCEPT_CHAR (CPP_GREATER_EQ);
1265       else if (c == '>')
1266         {
1267           ACCEPT_CHAR (CPP_RSHIFT);
1268           if (get_effective_char (pfile) == '=')
1269             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1270         }
1271       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1272         {
1273           ACCEPT_CHAR (CPP_MAX);
1274           if (get_effective_char (pfile) == '=')
1275             ACCEPT_CHAR (CPP_MAX_EQ);
1276         }
1277       break;
1278
1279     case '%':
1280       lex_percent (pfile, result);
1281       break;
1282
1283     case '.':
1284       lex_dot (pfile, result);
1285       break;
1286
1287     case '+':
1288       result->type = CPP_PLUS;
1289       c = get_effective_char (pfile);
1290       if (c == '=')
1291         ACCEPT_CHAR (CPP_PLUS_EQ);
1292       else if (c == '+')
1293         ACCEPT_CHAR (CPP_PLUS_PLUS);
1294       break;
1295
1296     case '-':
1297       result->type = CPP_MINUS;
1298       c = get_effective_char (pfile);
1299       if (c == '>')
1300         {
1301           ACCEPT_CHAR (CPP_DEREF);
1302           if (CPP_OPTION (pfile, cplusplus)
1303               && get_effective_char (pfile) == '*')
1304             ACCEPT_CHAR (CPP_DEREF_STAR);
1305         }
1306       else if (c == '=')
1307         ACCEPT_CHAR (CPP_MINUS_EQ);
1308       else if (c == '-')
1309         ACCEPT_CHAR (CPP_MINUS_MINUS);
1310       break;
1311
1312     case '*':
1313       result->type = CPP_MULT;
1314       if (get_effective_char (pfile) == '=')
1315         ACCEPT_CHAR (CPP_MULT_EQ);
1316       break;
1317
1318     case '=':
1319       result->type = CPP_EQ;
1320       if (get_effective_char (pfile) == '=')
1321         ACCEPT_CHAR (CPP_EQ_EQ);
1322       break;
1323
1324     case '!':
1325       result->type = CPP_NOT;
1326       if (get_effective_char (pfile) == '=')
1327         ACCEPT_CHAR (CPP_NOT_EQ);
1328       break;
1329
1330     case '&':
1331       result->type = CPP_AND;
1332       c = get_effective_char (pfile);
1333       if (c == '=')
1334         ACCEPT_CHAR (CPP_AND_EQ);
1335       else if (c == '&')
1336         ACCEPT_CHAR (CPP_AND_AND);
1337       break;
1338
1339     case '#':
1340       result->type = CPP_HASH;
1341       if (get_effective_char (pfile) == '#')
1342           ACCEPT_CHAR (CPP_PASTE);
1343       break;
1344
1345     case '|':
1346       result->type = CPP_OR;
1347       c = get_effective_char (pfile);
1348       if (c == '=')
1349         ACCEPT_CHAR (CPP_OR_EQ);
1350       else if (c == '|')
1351         ACCEPT_CHAR (CPP_OR_OR);
1352       break;
1353
1354     case '^':
1355       result->type = CPP_XOR;
1356       if (get_effective_char (pfile) == '=')
1357         ACCEPT_CHAR (CPP_XOR_EQ);
1358       break;
1359
1360     case ':':
1361       result->type = CPP_COLON;
1362       c = get_effective_char (pfile);
1363       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1364         ACCEPT_CHAR (CPP_SCOPE);
1365       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1366         {
1367           result->flags |= DIGRAPH;
1368           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1369         }
1370       break;
1371
1372     case '~': result->type = CPP_COMPL; break;
1373     case ',': result->type = CPP_COMMA; break;
1374     case '(': result->type = CPP_OPEN_PAREN; break;
1375     case ')': result->type = CPP_CLOSE_PAREN; break;
1376     case '[': result->type = CPP_OPEN_SQUARE; break;
1377     case ']': result->type = CPP_CLOSE_SQUARE; break;
1378     case '{': result->type = CPP_OPEN_BRACE; break;
1379     case '}': result->type = CPP_CLOSE_BRACE; break;
1380     case ';': result->type = CPP_SEMICOLON; break;
1381
1382       /* @ is a punctuator in Objective C.  */
1383     case '@': result->type = CPP_ATSIGN; break;
1384
1385     random_char:
1386     default:
1387       result->type = CPP_OTHER;
1388       result->val.c = c;
1389       break;
1390     }
1391
1392   return result;
1393 }
1394
1395 /* An upper bound on the number of bytes needed to spell a token,
1396    including preceding whitespace.  */
1397 unsigned int
1398 cpp_token_len (token)
1399      const cpp_token *token;
1400 {
1401   unsigned int len;
1402
1403   switch (TOKEN_SPELL (token))
1404     {
1405     default:            len = 0;                                break;
1406     case SPELL_STRING:  len = token->val.str.len;               break;
1407     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1408     }
1409   /* 1 for whitespace, 4 for comment delimeters.  */
1410   return len + 5;
1411 }
1412
1413 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1414    already contain the enough space to hold the token's spelling.
1415    Returns a pointer to the character after the last character
1416    written.  */
1417 unsigned char *
1418 cpp_spell_token (pfile, token, buffer)
1419      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1420      const cpp_token *token;
1421      unsigned char *buffer;
1422 {
1423   switch (TOKEN_SPELL (token))
1424     {
1425     case SPELL_OPERATOR:
1426       {
1427         const unsigned char *spelling;
1428         unsigned char c;
1429
1430         if (token->flags & DIGRAPH)
1431           spelling
1432             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1433         else if (token->flags & NAMED_OP)
1434           goto spell_ident;
1435         else
1436           spelling = TOKEN_NAME (token);
1437
1438         while ((c = *spelling++) != '\0')
1439           *buffer++ = c;
1440       }
1441       break;
1442
1443     case SPELL_IDENT:
1444       spell_ident:
1445       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1446       buffer += NODE_LEN (token->val.node);
1447       break;
1448
1449     case SPELL_STRING:
1450       {
1451         int left, right, tag;
1452         switch (token->type)
1453           {
1454           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1455           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1456           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1457           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1458           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1459           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1460           }
1461         if (tag) *buffer++ = tag;
1462         if (left) *buffer++ = left;
1463         memcpy (buffer, token->val.str.text, token->val.str.len);
1464         buffer += token->val.str.len;
1465         if (right) *buffer++ = right;
1466       }
1467       break;
1468
1469     case SPELL_CHAR:
1470       *buffer++ = token->val.c;
1471       break;
1472
1473     case SPELL_NONE:
1474       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1475       break;
1476     }
1477
1478   return buffer;
1479 }
1480
1481 /* Returns a token as a null-terminated string.  The string is
1482    temporary, and automatically freed later.  Useful for diagnostics.  */
1483 unsigned char *
1484 cpp_token_as_text (pfile, token)
1485      cpp_reader *pfile;
1486      const cpp_token *token;
1487 {
1488   unsigned int len = cpp_token_len (token);
1489   unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1490
1491   end = cpp_spell_token (pfile, token, start);
1492   end[0] = '\0';
1493
1494   return start;
1495 }
1496
1497 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1498 const char *
1499 cpp_type2name (type)
1500      enum cpp_ttype type;
1501 {
1502   return (const char *) token_spellings[type].name;
1503 }
1504
1505 /* Writes the spelling of token to FP, without any preceding space.
1506    Separated from cpp_spell_token for efficiency - to avoid stdio
1507    double-buffering.  */
1508 void
1509 cpp_output_token (token, fp)
1510      const cpp_token *token;
1511      FILE *fp;
1512 {
1513   switch (TOKEN_SPELL (token))
1514     {
1515     case SPELL_OPERATOR:
1516       {
1517         const unsigned char *spelling;
1518
1519         if (token->flags & DIGRAPH)
1520           spelling
1521             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1522         else if (token->flags & NAMED_OP)
1523           goto spell_ident;
1524         else
1525           spelling = TOKEN_NAME (token);
1526
1527         ufputs (spelling, fp);
1528       }
1529       break;
1530
1531     spell_ident:
1532     case SPELL_IDENT:
1533       ufputs (NODE_NAME (token->val.node), fp);
1534     break;
1535
1536     case SPELL_STRING:
1537       {
1538         int left, right, tag;
1539         switch (token->type)
1540           {
1541           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1542           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1543           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1544           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1545           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1546           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1547           }
1548         if (tag) putc (tag, fp);
1549         if (left) putc (left, fp);
1550         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1551         if (right) putc (right, fp);
1552       }
1553       break;
1554
1555     case SPELL_CHAR:
1556       putc (token->val.c, fp);
1557       break;
1558
1559     case SPELL_NONE:
1560       /* An error, most probably.  */
1561       break;
1562     }
1563 }
1564
1565 /* Compare two tokens.  */
1566 int
1567 _cpp_equiv_tokens (a, b)
1568      const cpp_token *a, *b;
1569 {
1570   if (a->type == b->type && a->flags == b->flags)
1571     switch (TOKEN_SPELL (a))
1572       {
1573       default:                  /* Keep compiler happy.  */
1574       case SPELL_OPERATOR:
1575         return 1;
1576       case SPELL_CHAR:
1577         return a->val.c == b->val.c; /* Character.  */
1578       case SPELL_NONE:
1579         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1580       case SPELL_IDENT:
1581         return a->val.node == b->val.node;
1582       case SPELL_STRING:
1583         return (a->val.str.len == b->val.str.len
1584                 && !memcmp (a->val.str.text, b->val.str.text,
1585                             a->val.str.len));
1586       }
1587
1588   return 0;
1589 }
1590
1591 /* Determine whether two tokens can be pasted together, and if so,
1592    what the resulting token is.  Returns CPP_EOF if the tokens cannot
1593    be pasted, or the appropriate type for the merged token if they
1594    can.  */
1595 enum cpp_ttype
1596 cpp_can_paste (pfile, token1, token2, digraph)
1597      cpp_reader * pfile;
1598      const cpp_token *token1, *token2;
1599      int* digraph;
1600 {
1601   enum cpp_ttype a = token1->type, b = token2->type;
1602   int cxx = CPP_OPTION (pfile, cplusplus);
1603
1604   /* Treat named operators as if they were ordinary NAMEs.  */
1605   if (token1->flags & NAMED_OP)
1606     a = CPP_NAME;
1607   if (token2->flags & NAMED_OP)
1608     b = CPP_NAME;
1609
1610   if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1611     return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1612
1613   switch (a)
1614     {
1615     case CPP_GREATER:
1616       if (b == a) return CPP_RSHIFT;
1617       if (b == CPP_QUERY && cxx)        return CPP_MAX;
1618       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
1619       break;
1620     case CPP_LESS:
1621       if (b == a) return CPP_LSHIFT;
1622       if (b == CPP_QUERY && cxx)        return CPP_MIN;
1623       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
1624       if (CPP_OPTION (pfile, digraphs))
1625         {
1626           if (b == CPP_COLON)
1627             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1628           if (b == CPP_MOD)
1629             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
1630         }
1631       break;
1632
1633     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
1634     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
1635     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
1636
1637     case CPP_MINUS:
1638       if (b == a)               return CPP_MINUS_MINUS;
1639       if (b == CPP_GREATER)     return CPP_DEREF;
1640       break;
1641     case CPP_COLON:
1642       if (b == a && cxx)        return CPP_SCOPE;
1643       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1644         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1645       break;
1646
1647     case CPP_MOD:
1648       if (CPP_OPTION (pfile, digraphs))
1649         {
1650           if (b == CPP_GREATER)
1651             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
1652           if (b == CPP_COLON)
1653             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
1654         }
1655       break;
1656     case CPP_DEREF:
1657       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1658       break;
1659     case CPP_DOT:
1660       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1661       if (b == CPP_NUMBER)      return CPP_NUMBER;
1662       break;
1663
1664     case CPP_HASH:
1665       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1666         /* %:%: digraph */
1667         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1668       break;
1669
1670     case CPP_NAME:
1671       if (b == CPP_NAME)        return CPP_NAME;
1672       if (b == CPP_NUMBER
1673           && name_p (pfile, &token2->val.str)) return CPP_NAME;
1674       if (b == CPP_CHAR
1675           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1676       if (b == CPP_STRING
1677           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1678       break;
1679
1680     case CPP_NUMBER:
1681       if (b == CPP_NUMBER)      return CPP_NUMBER;
1682       if (b == CPP_NAME)        return CPP_NUMBER;
1683       if (b == CPP_DOT)         return CPP_NUMBER;
1684       /* Numbers cannot have length zero, so this is safe.  */
1685       if ((b == CPP_PLUS || b == CPP_MINUS)
1686           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1687         return CPP_NUMBER;
1688       break;
1689
1690     default:
1691       break;
1692     }
1693
1694   return CPP_EOF;
1695 }
1696
1697 /* Returns nonzero if a space should be inserted to avoid an
1698    accidental token paste for output.  For simplicity, it is
1699    conservative, and occasionally advises a space where one is not
1700    needed, e.g. "." and ".2".  */
1701
1702 int
1703 cpp_avoid_paste (pfile, token1, token2)
1704      cpp_reader *pfile;
1705      const cpp_token *token1, *token2;
1706 {
1707   enum cpp_ttype a = token1->type, b = token2->type;
1708   cppchar_t c;
1709
1710   if (token1->flags & NAMED_OP)
1711     a = CPP_NAME;
1712   if (token2->flags & NAMED_OP)
1713     b = CPP_NAME;
1714
1715   c = EOF;
1716   if (token2->flags & DIGRAPH)
1717     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1718   else if (token_spellings[b].category == SPELL_OPERATOR)
1719     c = token_spellings[b].name[0];
1720
1721   /* Quickly get everything that can paste with an '='.  */
1722   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1723     return 1;
1724
1725   switch (a)
1726     {
1727     case CPP_GREATER:   return c == '>' || c == '?';
1728     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1729     case CPP_PLUS:      return c == '+';
1730     case CPP_MINUS:     return c == '-' || c == '>';
1731     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1732     case CPP_MOD:       return c == ':' || c == '>';
1733     case CPP_AND:       return c == '&';
1734     case CPP_OR:        return c == '|';
1735     case CPP_COLON:     return c == ':' || c == '>';
1736     case CPP_DEREF:     return c == '*';
1737     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1738     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1739     case CPP_NAME:      return ((b == CPP_NUMBER
1740                                  && name_p (pfile, &token2->val.str))
1741                                 || b == CPP_NAME
1742                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1743     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1744                                 || c == '.' || c == '+' || c == '-');
1745     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1746                                 && token1->val.c == '@'
1747                                 && (b == CPP_NAME || b == CPP_STRING));
1748     default:            break;
1749     }
1750
1751   return 0;
1752 }
1753
1754 /* Output all the remaining tokens on the current line, and a newline
1755    character, to FP.  Leading whitespace is removed.  If there are
1756    macros, special token padding is not performed.  */
1757 void
1758 cpp_output_line (pfile, fp)
1759      cpp_reader *pfile;
1760      FILE *fp;
1761 {
1762   const cpp_token *token;
1763
1764   token = cpp_get_token (pfile);
1765   while (token->type != CPP_EOF)
1766     {
1767       cpp_output_token (token, fp);
1768       token = cpp_get_token (pfile);
1769       if (token->flags & PREV_WHITE)
1770         putc (' ', fp);
1771     }
1772
1773   putc ('\n', fp);
1774 }
1775
1776 /* Returns the value of a hexadecimal digit.  */
1777 static unsigned int
1778 hex_digit_value (c)
1779      unsigned int c;
1780 {
1781   if (c >= 'a' && c <= 'f')
1782     return c - 'a' + 10;
1783   if (c >= 'A' && c <= 'F')
1784     return c - 'A' + 10;
1785   if (c >= '0' && c <= '9')
1786     return c - '0';
1787   abort ();
1788 }
1789
1790 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1791    failure if cpplib is not parsing C++ or C99.  Such failure is
1792    silent, and no variables are updated.  Otherwise returns 0, and
1793    warns if -Wtraditional.
1794
1795    [lex.charset]: The character designated by the universal character
1796    name \UNNNNNNNN is that character whose character short name in
1797    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1798    universal character name \uNNNN is that character whose character
1799    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1800    for a universal character name is less than 0x20 or in the range
1801    0x7F-0x9F (inclusive), or if the universal character name
1802    designates a character in the basic source character set, then the
1803    program is ill-formed.
1804
1805    We assume that wchar_t is Unicode, so we don't need to do any
1806    mapping.  Is this ever wrong?
1807
1808    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1809    LIMIT is the end of the string or charconst.  PSTR is updated to
1810    point after the UCS on return, and the UCS is written into PC.  */
1811
1812 static int
1813 maybe_read_ucs (pfile, pstr, limit, pc)
1814      cpp_reader *pfile;
1815      const unsigned char **pstr;
1816      const unsigned char *limit;
1817      unsigned int *pc;
1818 {
1819   const unsigned char *p = *pstr;
1820   unsigned int code = 0;
1821   unsigned int c = *pc, length;
1822
1823   /* Only attempt to interpret a UCS for C++ and C99.  */
1824   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1825     return 1;
1826
1827   if (CPP_WTRADITIONAL (pfile))
1828     cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1829
1830   length = (c == 'u' ? 4: 8);
1831
1832   if ((size_t) (limit - p) < length)
1833     {
1834       cpp_error (pfile, "incomplete universal-character-name");
1835       /* Skip to the end to avoid more diagnostics.  */
1836       p = limit;
1837     }
1838   else
1839     {
1840       for (; length; length--, p++)
1841         {
1842           c = *p;
1843           if (ISXDIGIT (c))
1844             code = (code << 4) + hex_digit_value (c);
1845           else
1846             {
1847               cpp_error (pfile,
1848                          "non-hex digit '%c' in universal-character-name", c);
1849               /* We shouldn't skip in case there are multibyte chars.  */
1850               break;
1851             }
1852         }
1853     }
1854
1855 #ifdef TARGET_EBCDIC
1856   cpp_error (pfile, "universal-character-name on EBCDIC target");
1857   code = 0x3f;  /* EBCDIC invalid character */
1858 #else
1859  /* True extended characters are OK.  */
1860   if (code >= 0xa0
1861       && !(code & 0x80000000)
1862       && !(code >= 0xD800 && code <= 0xDFFF))
1863     ;
1864   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1865      hex escapes so that this also works with EBCDIC hosts.  */
1866   else if (code == 0x24 || code == 0x40 || code == 0x60)
1867     ;
1868   /* Don't give another error if one occurred above.  */
1869   else if (length == 0)
1870     cpp_error (pfile, "universal-character-name out of range");
1871 #endif
1872
1873   *pstr = p;
1874   *pc = code;
1875   return 0;
1876 }
1877
1878 /* Interpret an escape sequence, and return its value.  PSTR points to
1879    the input pointer, which is just after the backslash.  LIMIT is how
1880    much text we have.  MASK is a bitmask for the precision for the
1881    destination type (char or wchar_t).  TRADITIONAL, if true, does not
1882    interpret escapes that did not exist in traditional C.
1883
1884    Handles all relevant diagnostics.  */
1885
1886 unsigned int
1887 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1888      cpp_reader *pfile;
1889      const unsigned char **pstr;
1890      const unsigned char *limit;
1891      unsigned HOST_WIDE_INT mask;
1892      int traditional;
1893 {
1894   int unknown = 0;
1895   const unsigned char *str = *pstr;
1896   unsigned int c = *str++;
1897
1898   switch (c)
1899     {
1900     case '\\': case '\'': case '"': case '?': break;
1901     case 'b': c = TARGET_BS;      break;
1902     case 'f': c = TARGET_FF;      break;
1903     case 'n': c = TARGET_NEWLINE; break;
1904     case 'r': c = TARGET_CR;      break;
1905     case 't': c = TARGET_TAB;     break;
1906     case 'v': c = TARGET_VT;      break;
1907
1908     case '(': case '{': case '[': case '%':
1909       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1910          '\%' is used to prevent SCCS from getting confused.  */
1911       unknown = CPP_PEDANTIC (pfile);
1912       break;
1913
1914     case 'a':
1915       if (CPP_WTRADITIONAL (pfile))
1916         cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1917       if (!traditional)
1918         c = TARGET_BELL;
1919       break;
1920
1921     case 'e': case 'E':
1922       if (CPP_PEDANTIC (pfile))
1923         cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1924       c = TARGET_ESC;
1925       break;
1926
1927     case 'u': case 'U':
1928       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1929       break;
1930
1931     case 'x':
1932       if (CPP_WTRADITIONAL (pfile))
1933         cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1934
1935       if (!traditional)
1936         {
1937           unsigned int i = 0, overflow = 0;
1938           int digits_found = 0;
1939
1940           while (str < limit)
1941             {
1942               c = *str;
1943               if (! ISXDIGIT (c))
1944                 break;
1945               str++;
1946               overflow |= i ^ (i << 4 >> 4);
1947               i = (i << 4) + hex_digit_value (c);
1948               digits_found = 1;
1949             }
1950
1951           if (!digits_found)
1952             cpp_error (pfile, "\\x used with no following hex digits");
1953
1954           if (overflow | (i != (i & mask)))
1955             {
1956               cpp_pedwarn (pfile, "hex escape sequence out of range");
1957               i &= mask;
1958             }
1959           c = i;
1960         }
1961       break;
1962
1963     case '0':  case '1':  case '2':  case '3':
1964     case '4':  case '5':  case '6':  case '7':
1965       {
1966         unsigned int i = c - '0';
1967         int count = 0;
1968
1969         while (str < limit && ++count < 3)
1970           {
1971             c = *str;
1972             if (c < '0' || c > '7')
1973               break;
1974             str++;
1975             i = (i << 3) + c - '0';
1976           }
1977
1978         if (i != (i & mask))
1979           {
1980             cpp_pedwarn (pfile, "octal escape sequence out of range");
1981             i &= mask;
1982           }
1983         c = i;
1984       }
1985       break;
1986
1987     default:
1988       unknown = 1;
1989       break;
1990     }
1991
1992   if (unknown)
1993     {
1994       if (ISGRAPH (c))
1995         cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1996       else
1997         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1998     }
1999
2000   if (c > mask)
2001     cpp_pedwarn (pfile, "escape sequence out of range for character");
2002
2003   *pstr = str;
2004   return c;
2005 }
2006
2007 #ifndef MAX_CHAR_TYPE_SIZE
2008 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
2009 #endif
2010
2011 #ifndef MAX_WCHAR_TYPE_SIZE
2012 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
2013 #endif
2014
2015 /* Interpret a (possibly wide) character constant in TOKEN.
2016    WARN_MULTI warns about multi-character charconsts, if not
2017    TRADITIONAL.  TRADITIONAL also indicates not to interpret escapes
2018    that did not exist in traditional C.  PCHARS_SEEN points to a
2019    variable that is filled in with the number of characters seen.  */
2020 HOST_WIDE_INT
2021 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
2022      cpp_reader *pfile;
2023      const cpp_token *token;
2024      int warn_multi;
2025      int traditional;
2026      unsigned int *pchars_seen;
2027 {
2028   const unsigned char *str = token->val.str.text;
2029   const unsigned char *limit = str + token->val.str.len;
2030   unsigned int chars_seen = 0;
2031   unsigned int width, max_chars, c;
2032   unsigned HOST_WIDE_INT mask;
2033   HOST_WIDE_INT result = 0;
2034
2035 #ifdef MULTIBYTE_CHARS
2036   (void) local_mbtowc (NULL, NULL, 0);
2037 #endif
2038
2039   /* Width in bits.  */
2040   if (token->type == CPP_CHAR)
2041     width = MAX_CHAR_TYPE_SIZE;
2042   else
2043     width = MAX_WCHAR_TYPE_SIZE;
2044
2045   if (width < HOST_BITS_PER_WIDE_INT)
2046     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
2047   else
2048     mask = ~0;
2049   max_chars = HOST_BITS_PER_WIDE_INT / width;
2050
2051   while (str < limit)
2052     {
2053 #ifdef MULTIBYTE_CHARS
2054       wchar_t wc;
2055       int char_len;
2056
2057       char_len = local_mbtowc (&wc, str, limit - str);
2058       if (char_len == -1)
2059         {
2060           cpp_warning (pfile, "ignoring invalid multibyte character");
2061           c = *str++;
2062         }
2063       else
2064         {
2065           str += char_len;
2066           c = wc;
2067         }
2068 #else
2069       c = *str++;
2070 #endif
2071
2072       if (c == '\\')
2073         c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
2074
2075 #ifdef MAP_CHARACTER
2076       if (ISPRINT (c))
2077         c = MAP_CHARACTER (c);
2078 #endif
2079
2080       /* Merge character into result; ignore excess chars.  */
2081       if (++chars_seen <= max_chars)
2082         {
2083           if (width < HOST_BITS_PER_WIDE_INT)
2084             result = (result << width) | (c & mask);
2085           else
2086             result = c;
2087         }
2088     }
2089
2090   if (chars_seen == 0)
2091     cpp_error (pfile, "empty character constant");
2092   else if (chars_seen > max_chars)
2093     {
2094       chars_seen = max_chars;
2095       cpp_warning (pfile, "character constant too long");
2096     }
2097   else if (chars_seen > 1 && !traditional && warn_multi)
2098     cpp_warning (pfile, "multi-character character constant");
2099
2100   /* If char type is signed, sign-extend the constant.  The
2101      __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
2102   if (token->type == CPP_CHAR && chars_seen)
2103     {
2104       unsigned int nbits = chars_seen * width;
2105       unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2106
2107       if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2108           || ((result >> (nbits - 1)) & 1) == 0)
2109         result &= mask;
2110       else
2111         result |= ~mask;
2112     }
2113
2114   *pchars_seen = chars_seen;
2115   return result;
2116 }
2117
2118 /* Memory buffers.  */
2119
2120 struct dummy
2121 {
2122   char c;
2123   union
2124   {
2125     double d;
2126     int *p;
2127   } u;
2128 };
2129
2130 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2131 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
2132
2133 /* Create a new allocation buffer.  */
2134 static _cpp_buff *
2135 new_buff (len)
2136      unsigned int len;
2137 {
2138   _cpp_buff *result;
2139   char *base;
2140
2141   if (len < 4000)
2142     len = 4000;
2143   len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
2144
2145   base = xmalloc (len + sizeof (_cpp_buff));
2146   result = (_cpp_buff *) (base + len);
2147   result->base = base;
2148   result->cur = base;
2149   result->limit = base + len;
2150   result->next = NULL;
2151   return result;
2152 }
2153
2154 /* Place a chain of unwanted allocation buffers on the free list.  */
2155 void
2156 _cpp_release_buff (pfile, buff)
2157      cpp_reader *pfile;
2158      _cpp_buff *buff;
2159 {
2160   _cpp_buff *end = buff;
2161
2162   while (end->next)
2163     end = end->next;
2164   end->next = pfile->free_buffs;
2165   pfile->free_buffs = buff;
2166 }
2167
2168 /* Return a free buffer of size at least MIN_SIZE.  */
2169 _cpp_buff *
2170 _cpp_get_buff (pfile, min_size)
2171      cpp_reader *pfile;
2172      unsigned int min_size;
2173 {
2174   _cpp_buff *result, **p;
2175
2176   for (p = &pfile->free_buffs;; p = &(*p)->next)
2177     {
2178       if (*p == NULL || (*p)->next == NULL)
2179         return new_buff (min_size);
2180       result = (*p)->next;
2181       if ((unsigned int) (result->limit - result->base) > min_size)
2182         break;
2183     }
2184
2185   *p = result->next;
2186   result->next = NULL;
2187   result->cur = result->base;
2188   return result;
2189 }
2190
2191 /* Return a buffer chained on the end of BUFF.  Copy to it the
2192    uncommitted remaining bytes of BUFF, with at least MIN_EXTRA more
2193    bytes.  */
2194 _cpp_buff *
2195 _cpp_extend_buff (pfile, buff, min_extra)
2196      cpp_reader *pfile;
2197      _cpp_buff *buff;
2198      unsigned int min_extra;
2199 {
2200   unsigned int size = min_extra + (buff->limit - buff->cur) * 2;
2201
2202   buff->next = _cpp_get_buff (pfile, size);
2203   memcpy (buff->next->base, buff->cur, buff->limit - buff->cur);
2204   return buff->next;
2205 }
2206
2207 /* Free a chain of buffers starting at BUFF.  */
2208 void
2209 _cpp_free_buff (buff)
2210      _cpp_buff *buff;
2211 {
2212   _cpp_buff *next;
2213
2214   for (; buff; buff = next)
2215     {
2216       next = buff->next;
2217       free (buff->base);
2218     }
2219 }
2220
2221 static int
2222 chunk_suitable (pool, chunk, size)
2223      cpp_pool *pool;
2224      cpp_chunk *chunk;
2225      unsigned int size;
2226 {
2227   /* Being at least twice SIZE means we can use memcpy in
2228      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
2229      anyway.  */
2230   return (chunk && pool->locked != chunk
2231           && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2232 }
2233
2234 /* Returns the end of the new pool.  PTR points to a char in the old
2235    pool, and is updated to point to the same char in the new pool.  */
2236 unsigned char *
2237 _cpp_next_chunk (pool, len, ptr)
2238      cpp_pool *pool;
2239      unsigned int len;
2240      unsigned char **ptr;
2241 {
2242   cpp_chunk *chunk = pool->cur->next;
2243
2244   /* LEN is the minimum size we want in the new pool.  */
2245   len += POOL_ROOM (pool);
2246   if (! chunk_suitable (pool, chunk, len))
2247     {
2248       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2249
2250       chunk->next = pool->cur->next;
2251       pool->cur->next = chunk;
2252     }
2253
2254   /* Update the pointer before changing chunk's front.  */
2255   if (ptr)
2256     *ptr += chunk->base - POOL_FRONT (pool);
2257
2258   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2259   chunk->front = chunk->base;
2260
2261   pool->cur = chunk;
2262   return POOL_LIMIT (pool);
2263 }
2264
2265 static cpp_chunk *
2266 new_chunk (size)
2267      unsigned int size;
2268 {
2269   unsigned char *base;
2270   cpp_chunk *result;
2271
2272   size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2273   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2274   /* Put the chunk descriptor at the end.  Then chunk overruns will
2275      cause obvious chaos.  */
2276   result = (cpp_chunk *) (base + size);
2277   result->base = base;
2278   result->front = base;
2279   result->limit = base + size;
2280   result->next = 0;
2281
2282   return result;
2283 }
2284
2285 void
2286 _cpp_init_pool (pool, size, align, temp)
2287      cpp_pool *pool;
2288      unsigned int size, align, temp;
2289 {
2290   if (align == 0)
2291     align = DEFAULT_ALIGNMENT;
2292   if (align & (align - 1))
2293     abort ();
2294   pool->align = align;
2295   pool->first = new_chunk (size);
2296   pool->cur = pool->first;
2297   pool->locked = 0;
2298   pool->locks = 0;
2299   if (temp)
2300     pool->cur->next = pool->cur;
2301 }
2302
2303 void
2304 _cpp_lock_pool (pool)
2305      cpp_pool *pool;
2306 {
2307   if (pool->locks++ == 0)
2308     pool->locked = pool->cur;
2309 }
2310
2311 void
2312 _cpp_unlock_pool (pool)
2313      cpp_pool *pool;
2314 {
2315   if (--pool->locks == 0)
2316     pool->locked = 0;
2317 }
2318
2319 void
2320 _cpp_free_pool (pool)
2321      cpp_pool *pool;
2322 {
2323   cpp_chunk *chunk = pool->first, *next;
2324
2325   do
2326     {
2327       next = chunk->next;
2328       free (chunk->base);
2329       chunk = next;
2330     }
2331   while (chunk && chunk != pool->first);
2332 }
2333
2334 /* Reserve LEN bytes from a memory pool.  */
2335 unsigned char *
2336 _cpp_pool_reserve (pool, len)
2337      cpp_pool *pool;
2338      unsigned int len;
2339 {
2340   len = POOL_ALIGN (len, pool->align);
2341   if (len > (unsigned int) POOL_ROOM (pool))
2342     _cpp_next_chunk (pool, len, 0);
2343
2344   return POOL_FRONT (pool);
2345 }
2346
2347 /* Allocate LEN bytes from a memory pool.  */
2348 unsigned char *
2349 _cpp_pool_alloc (pool, len)
2350      cpp_pool *pool;
2351      unsigned int len;
2352 {
2353   unsigned char *result = _cpp_pool_reserve (pool, len);
2354
2355   POOL_COMMIT (pool, len);
2356   return result;
2357 }