gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41
  42 /* MULTIBYTE_CHARS support only works for native compilers.
  43    ??? Ideally what we want is to model widechar support after
  44    the current floating point support.  */
  45 #ifdef CROSS_COMPILE
  46 #undef MULTIBYTE_CHARS
  47 #endif
  48
  49 #ifdef MULTIBYTE_CHARS
  50 #include "mbchar.h"
  51 #include <locale.h>
  52 #endif
  53
  54 /* Tokens with SPELL_STRING store their spelling in the token list,
  55    and it's length in the token->val.name.len.  */
  56 enum spell_type
  57 {
  58   SPELL_OPERATOR = 0,
  59   SPELL_CHAR,
  60   SPELL_IDENT,
  61   SPELL_STRING,
  62   SPELL_NONE
  63 };
  64
  65 struct token_spelling
  66 {
  67   enum spell_type category;
  68   const unsigned char *name;
  69 };
  70
  71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  72                                              U":>", U"<%", U"%>"};
  73
  74 #define OP(e, s) { SPELL_OPERATOR, U s           },
  75 #define TK(e, s) { s,              U STRINGX (e) },
  76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  77 #undef OP
  78 #undef TK
  79
  80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  82
  83 static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
  84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
  85 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  86
  87 static int skip_block_comment PARAMS ((cpp_reader *));
  88 static int skip_line_comment PARAMS ((cpp_reader *));
  89 static void adjust_column PARAMS ((cpp_reader *));
  90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  92 static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
  93                                                     const U_CHAR *));
  94 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  95 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  96 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  97 static void unterminated PARAMS ((cpp_reader *, int));
  98 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  99 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
 100 static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
 101 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
 102 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
 103 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
 104                                    const unsigned char *, unsigned int *));
 105 static cpp_token *lex_token PARAMS ((cpp_reader *, cpp_token *));
 106 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
 107
 108 static cpp_chunk *new_chunk PARAMS ((unsigned int));
 109 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
 110 static unsigned int hex_digit_value PARAMS ((unsigned int));
 111
 112 /* Utility routine:
 113
 114    Compares, the token TOKEN to the NUL-terminated string STRING.
 115    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 116
 117 int
 118 cpp_ideq (token, string)
 119      const cpp_token *token;
 120      const char *string;
 121 {
 122   if (token->type != CPP_NAME)
 123     return 0;
 124
 125   return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
 126 }
 127
 128 /* Call when meeting a newline.  Returns the character after the newline
 129    (or carriage-return newline combination), or EOF.  */
 130 static cppchar_t
 131 handle_newline (pfile, newline_char)
 132      cpp_reader *pfile;
 133      cppchar_t newline_char;
 134 {
 135   cpp_buffer *buffer;
 136   cppchar_t next = EOF;
 137
 138   pfile->line++;
 139   buffer = pfile->buffer;
 140   buffer->col_adjust = 0;
 141   buffer->line_base = buffer->cur;
 142
 143   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 144   if (buffer->cur < buffer->rlimit)
 145     {
 146       next = *buffer->cur++;
 147       if (next + newline_char == '\r' + '\n')
 148         {
 149           buffer->line_base = buffer->cur;
 150           if (buffer->cur < buffer->rlimit)
 151             next = *buffer->cur++;
 152           else
 153             next = EOF;
 154         }
 155     }
 156
 157   buffer->read_ahead = next;
 158   return next;
 159 }
 160
 161 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 162    encountered.  It warns if necessary, and returns true if the
 163    trigraph should be honoured.  FROM_CHAR is the third character of a
 164    trigraph, and presumed to be the previous character for position
 165    reporting.  */
 166 static int
 167 trigraph_ok (pfile, from_char)
 168      cpp_reader *pfile;
 169      cppchar_t from_char;
 170 {
 171   int accept = CPP_OPTION (pfile, trigraphs);
 172
 173   /* Don't warn about trigraphs in comments.  */
 174   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 175     {
 176       cpp_buffer *buffer = pfile->buffer;
 177
 178       if (accept)
 179         cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
 180                                "trigraph ??%c converted to %c",
 181                                (int) from_char,
 182                                (int) _cpp_trigraph_map[from_char]);
 183       else if (buffer->cur != buffer->last_Wtrigraphs)
 184         {
 185           buffer->last_Wtrigraphs = buffer->cur;
 186           cpp_warning_with_line (pfile, pfile->line,
 187                                  CPP_BUF_COL (buffer) - 2,
 188                                  "trigraph ??%c ignored", (int) from_char);
 189         }
 190     }
 191
 192   return accept;
 193 }
 194
 195 /* Assumes local variables buffer and result.  */
 196 #define ACCEPT_CHAR(t) \
 197   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 198
 199 /* When we move to multibyte character sets, add to these something
 200    that saves and restores the state of the multibyte conversion
 201    library.  This probably involves saving and restoring a "cookie".
 202    In the case of glibc it is an 8-byte structure, so is not a high
 203    overhead operation.  In any case, it's out of the fast path.  */
 204 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 205 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 206
 207 /* Skips any escaped newlines introduced by NEXT, which is either a
 208    '?' or a '\\'.  Returns the next character, which will also have
 209    been placed in buffer->read_ahead.  This routine performs
 210    preprocessing stages 1 and 2 of the ISO C standard.  */
 211 static cppchar_t
 212 skip_escaped_newlines (pfile, next)
 213      cpp_reader *pfile;
 214      cppchar_t next;
 215 {
 216   cpp_buffer *buffer = pfile->buffer;
 217
 218   /* Only do this if we apply stages 1 and 2.  */
 219   if (!buffer->from_stage3)
 220     {
 221       cppchar_t next1;
 222       const unsigned char *saved_cur;
 223       int space;
 224
 225       do
 226         {
 227           if (buffer->cur == buffer->rlimit)
 228             break;
 229
 230           SAVE_STATE ();
 231           if (next == '?')
 232             {
 233               next1 = *buffer->cur++;
 234               if (next1 != '?' || buffer->cur == buffer->rlimit)
 235                 {
 236                   RESTORE_STATE ();
 237                   break;
 238                 }
 239
 240               next1 = *buffer->cur++;
 241               if (!_cpp_trigraph_map[next1]
 242                   || !trigraph_ok (pfile, next1))
 243                 {
 244                   RESTORE_STATE ();
 245                   break;
 246                 }
 247
 248               /* We have a full trigraph here.  */
 249               next = _cpp_trigraph_map[next1];
 250               if (next != '\\' || buffer->cur == buffer->rlimit)
 251                 break;
 252               SAVE_STATE ();
 253             }
 254
 255           /* We have a backslash, and room for at least one more character.  */
 256           space = 0;
 257           do
 258             {
 259               next1 = *buffer->cur++;
 260               if (!is_nvspace (next1))
 261                 break;
 262               space = 1;
 263             }
 264           while (buffer->cur < buffer->rlimit);
 265
 266           if (!is_vspace (next1))
 267             {
 268               RESTORE_STATE ();
 269               break;
 270             }
 271
 272           if (space && !pfile->state.lexing_comment)
 273             cpp_warning (pfile, "backslash and newline separated by space");
 274
 275           next = handle_newline (pfile, next1);
 276           if (next == EOF)
 277             cpp_pedwarn (pfile, "backslash-newline at end of file");
 278         }
 279       while (next == '\\' || next == '?');
 280     }
 281
 282   buffer->read_ahead = next;
 283   return next;
 284 }
 285
 286 /* Obtain the next character, after trigraph conversion and skipping
 287    an arbitrary string of escaped newlines.  The common case of no
 288    trigraphs or escaped newlines falls through quickly.  */
 289 static cppchar_t
 290 get_effective_char (pfile)
 291      cpp_reader *pfile;
 292 {
 293   cpp_buffer *buffer = pfile->buffer;
 294   cppchar_t next = EOF;
 295
 296   if (buffer->cur < buffer->rlimit)
 297     {
 298       next = *buffer->cur++;
 299
 300       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 301          can introduce escaped newlines, which we want to skip, or
 302          UCNs, which, depending upon lexer state, we will handle in
 303          the future.  */
 304       if (next == '?' || next == '\\')
 305         next = skip_escaped_newlines (pfile, next);
 306     }
 307
 308   buffer->read_ahead = next;
 309   return next;
 310 }
 311
 312 /* Skip a C-style block comment.  We find the end of the comment by
 313    seeing if an asterisk is before every '/' we encounter.  Returns
 314    non-zero if comment terminated by EOF, zero otherwise.  */
 315 static int
 316 skip_block_comment (pfile)
 317      cpp_reader *pfile;
 318 {
 319   cpp_buffer *buffer = pfile->buffer;
 320   cppchar_t c = EOF, prevc = EOF;
 321
 322   pfile->state.lexing_comment = 1;
 323   while (buffer->cur != buffer->rlimit)
 324     {
 325       prevc = c, c = *buffer->cur++;
 326
 327     next_char:
 328       /* FIXME: For speed, create a new character class of characters
 329          of interest inside block comments.  */
 330       if (c == '?' || c == '\\')
 331         c = skip_escaped_newlines (pfile, c);
 332
 333       /* People like decorating comments with '*', so check for '/'
 334          instead for efficiency.  */
 335       if (c == '/')
 336         {
 337           if (prevc == '*')
 338             break;
 339
 340           /* Warn about potential nested comments, but not if the '/'
 341              comes immediately before the true comment delimeter.
 342              Don't bother to get it right across escaped newlines.  */
 343           if (CPP_OPTION (pfile, warn_comments)
 344               && buffer->cur != buffer->rlimit)
 345             {
 346               prevc = c, c = *buffer->cur++;
 347               if (c == '*' && buffer->cur != buffer->rlimit)
 348                 {
 349                   prevc = c, c = *buffer->cur++;
 350                   if (c != '/')
 351                     cpp_warning_with_line (pfile, pfile->line,
 352                                            CPP_BUF_COL (buffer) - 2,
 353                                            "\"/*\" within comment");
 354                 }
 355               goto next_char;
 356             }
 357         }
 358       else if (is_vspace (c))
 359         {
 360           prevc = c, c = handle_newline (pfile, c);
 361           goto next_char;
 362         }
 363       else if (c == '\t')
 364         adjust_column (pfile);
 365     }
 366
 367   pfile->state.lexing_comment = 0;
 368   buffer->read_ahead = EOF;
 369   return c != '/' || prevc != '*';
 370 }
 371
 372 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 373    non-zero if a multiline comment.  The following new line, if any,
 374    is left in buffer->read_ahead.  */
 375 static int
 376 skip_line_comment (pfile)
 377      cpp_reader *pfile;
 378 {
 379   cpp_buffer *buffer = pfile->buffer;
 380   unsigned int orig_line = pfile->line;
 381   cppchar_t c;
 382
 383   pfile->state.lexing_comment = 1;
 384   do
 385     {
 386       c = EOF;
 387       if (buffer->cur == buffer->rlimit)
 388         break;
 389
 390       c = *buffer->cur++;
 391       if (c == '?' || c == '\\')
 392         c = skip_escaped_newlines (pfile, c);
 393     }
 394   while (!is_vspace (c));
 395
 396   pfile->state.lexing_comment = 0;
 397   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 398   return orig_line != pfile->line;
 399 }
 400
 401 /* pfile->buffer->cur is one beyond the \t character.  Update
 402    col_adjust so we track the column correctly.  */
 403 static void
 404 adjust_column (pfile)
 405      cpp_reader *pfile;
 406 {
 407   cpp_buffer *buffer = pfile->buffer;
 408   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 409
 410   /* Round it up to multiple of the tabstop, but subtract 1 since the
 411      tab itself occupies a character position.  */
 412   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 413                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 414 }
 415
 416 /* Skips whitespace, saving the next non-whitespace character.
 417    Adjusts pfile->col_adjust to account for tabs.  Without this,
 418    tokens might be assigned an incorrect column.  */
 419 static void
 420 skip_whitespace (pfile, c)
 421      cpp_reader *pfile;
 422      cppchar_t c;
 423 {
 424   cpp_buffer *buffer = pfile->buffer;
 425   unsigned int warned = 0;
 426
 427   do
 428     {
 429       /* Horizontal space always OK.  */
 430       if (c == ' ')
 431         ;
 432       else if (c == '\t')
 433         adjust_column (pfile);
 434       /* Just \f \v or \0 left.  */
 435       else if (c == '\0')
 436         {
 437           if (!warned)
 438             {
 439               cpp_warning (pfile, "null character(s) ignored");
 440               warned = 1;
 441             }
 442         }
 443       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 444         cpp_pedwarn_with_line (pfile, pfile->line,
 445                                CPP_BUF_COL (buffer),
 446                                "%s in preprocessing directive",
 447                                c == '\f' ? "form feed" : "vertical tab");
 448
 449       c = EOF;
 450       if (buffer->cur == buffer->rlimit)
 451         break;
 452       c = *buffer->cur++;
 453     }
 454   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 455   while (is_nvspace (c));
 456
 457   /* Remember the next character.  */
 458   buffer->read_ahead = c;
 459 }
 460
 461 /* See if the characters of a number token are valid in a name (no
 462    '.', '+' or '-').  */
 463 static int
 464 name_p (pfile, string)
 465      cpp_reader *pfile;
 466      const cpp_string *string;
 467 {
 468   unsigned int i;
 469
 470   for (i = 0; i < string->len; i++)
 471     if (!is_idchar (string->text[i]))
 472       return 0;
 473
 474   return 1;
 475 }
 476
 477 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 478    a critical inner loop.  The common case is an identifier which has
 479    not been split by backslash-newline, does not contain a dollar
 480    sign, and has already been scanned (roughly 10:1 ratio of
 481    seen:unseen identifiers in normal code; the distribution is
 482    Poisson-like).  Second most common case is a new identifier, not
 483    split and no dollar sign.  The other possibilities are rare and
 484    have been relegated to parse_identifier_slow.  */
 485
 486 static cpp_hashnode *
 487 parse_identifier (pfile)
 488      cpp_reader *pfile;
 489 {
 490   cpp_hashnode *result;
 491   const U_CHAR *cur, *rlimit;
 492
 493   /* Fast-path loop.  Skim over a normal identifier.
 494      N.B. ISIDNUM does not include $.  */
 495   cur    = pfile->buffer->cur - 1;
 496   rlimit = pfile->buffer->rlimit;
 497   do
 498     cur++;
 499   while (cur < rlimit && ISIDNUM (*cur));
 500
 501   /* Check for slow-path cases.  */
 502   if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
 503     result = parse_identifier_slow (pfile, cur);
 504   else
 505     {
 506       const U_CHAR *base = pfile->buffer->cur - 1;
 507       result = (cpp_hashnode *)
 508         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 509       pfile->buffer->cur = cur;
 510     }
 511
 512   /* Rarely, identifiers require diagnostics when lexed.
 513      XXX Has to be forced out of the fast path.  */
 514   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 515                         && !pfile->state.skipping, 0))
 516     {
 517       /* It is allowed to poison the same identifier twice.  */
 518       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 519         cpp_error (pfile, "attempt to use poisoned \"%s\"",
 520                    NODE_NAME (result));
 521
 522       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 523          replacement list of a variadic macro.  */
 524       if (result == pfile->spec_nodes.n__VA_ARGS__
 525           && !pfile->state.va_args_ok)
 526         cpp_pedwarn (pfile,
 527         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 528     }
 529
 530   return result;
 531 }
 532
 533 /* Slow path.  This handles identifiers which have been split, and
 534    identifiers which contain dollar signs.  The part of the identifier
 535    from PFILE->buffer->cur-1 to CUR has already been scanned.  */
 536 static cpp_hashnode *
 537 parse_identifier_slow (pfile, cur)
 538      cpp_reader *pfile;
 539      const U_CHAR *cur;
 540 {
 541   cpp_buffer *buffer = pfile->buffer;
 542   const U_CHAR *base = buffer->cur - 1;
 543   struct obstack *stack = &pfile->hash_table->stack;
 544   unsigned int c, saw_dollar = 0, len;
 545
 546   /* Copy the part of the token which is known to be okay.  */
 547   obstack_grow (stack, base, cur - base);
 548
 549   /* Now process the part which isn't.  We are looking at one of
 550      '$', '\\', or '?' on entry to this loop.  */
 551   c = *cur++;
 552   buffer->cur = cur;
 553   do
 554     {
 555       while (is_idchar (c))
 556         {
 557           obstack_1grow (stack, c);
 558
 559           if (c == '$')
 560             saw_dollar++;
 561
 562           c = EOF;
 563           if (buffer->cur == buffer->rlimit)
 564             break;
 565
 566           c = *buffer->cur++;
 567         }
 568
 569       /* Potential escaped newline?  */
 570       if (c != '?' && c != '\\')
 571         break;
 572       c = skip_escaped_newlines (pfile, c);
 573     }
 574   while (is_idchar (c));
 575
 576   /* Remember the next character.  */
 577   buffer->read_ahead = c;
 578
 579   /* $ is not a identifier character in the standard, but is commonly
 580      accepted as an extension.  Don't warn about it in skipped
 581      conditional blocks.  */
 582   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 583     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 584
 585   /* Identifiers are null-terminated.  */
 586   len = obstack_object_size (stack);
 587   obstack_1grow (stack, '\0');
 588
 589   return (cpp_hashnode *)
 590     ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
 591 }
 592
 593 /* Parse a number, skipping embedded backslash-newlines.  */
 594 static void
 595 parse_number (pfile, number, c, leading_period)
 596      cpp_reader *pfile;
 597      cpp_string *number;
 598      cppchar_t c;
 599      int leading_period;
 600 {
 601   cpp_buffer *buffer = pfile->buffer;
 602   cpp_pool *pool = &pfile->ident_pool;
 603   unsigned char *dest, *limit;
 604
 605   dest = POOL_FRONT (pool);
 606   limit = POOL_LIMIT (pool);
 607
 608   /* Place a leading period.  */
 609   if (leading_period)
 610     {
 611       if (dest >= limit)
 612         limit = _cpp_next_chunk (pool, 0, &dest);
 613       *dest++ = '.';
 614     }
 615
 616   do
 617     {
 618       do
 619         {
 620           /* Need room for terminating null.  */
 621           if (dest + 1 >= limit)
 622             limit = _cpp_next_chunk (pool, 0, &dest);
 623           *dest++ = c;
 624
 625           c = EOF;
 626           if (buffer->cur == buffer->rlimit)
 627             break;
 628
 629           c = *buffer->cur++;
 630         }
 631       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 632
 633       /* Potential escaped newline?  */
 634       if (c != '?' && c != '\\')
 635         break;
 636       c = skip_escaped_newlines (pfile, c);
 637     }
 638   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 639
 640   /* Remember the next character.  */
 641   buffer->read_ahead = c;
 642
 643   /* Null-terminate the number.  */
 644   *dest = '\0';
 645
 646   number->text = POOL_FRONT (pool);
 647   number->len = dest - number->text;
 648   POOL_COMMIT (pool, number->len + 1);
 649 }
 650
 651 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 652 static void
 653 unterminated (pfile, term)
 654      cpp_reader *pfile;
 655      int term;
 656 {
 657   cpp_error (pfile, "missing terminating %c character", term);
 658
 659   if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
 660     {
 661       cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
 662                            "possible start of unterminated string literal");
 663       pfile->mls_line = 0;
 664     }
 665 }
 666
 667 /* Subroutine of parse_string.  */
 668 static int
 669 unescaped_terminator_p (pfile, dest)
 670      cpp_reader *pfile;
 671      const unsigned char *dest;
 672 {
 673   const unsigned char *start, *temp;
 674
 675   /* In #include-style directives, terminators are not escapeable.  */
 676   if (pfile->state.angled_headers)
 677     return 1;
 678
 679   start = POOL_FRONT (&pfile->ident_pool);
 680
 681   /* An odd number of consecutive backslashes represents an escaped
 682      terminator.  */
 683   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 684     ;
 685
 686   return ((dest - temp) & 1) == 0;
 687 }
 688
 689 /* Parses a string, character constant, or angle-bracketed header file
 690    name.  Handles embedded trigraphs and escaped newlines.  The stored
 691    string is guaranteed NUL-terminated, but it is not guaranteed that
 692    this is the first NUL since embedded NULs are preserved.
 693
 694    Multi-line strings are allowed, but they are deprecated.  */
 695 static void
 696 parse_string (pfile, token, terminator)
 697      cpp_reader *pfile;
 698      cpp_token *token;
 699      cppchar_t terminator;
 700 {
 701   cpp_buffer *buffer = pfile->buffer;
 702   cpp_pool *pool = &pfile->ident_pool;
 703   unsigned char *dest, *limit;
 704   cppchar_t c;
 705   bool warned_nulls = false, warned_multi = false;
 706
 707   dest = POOL_FRONT (pool);
 708   limit = POOL_LIMIT (pool);
 709
 710   for (;;)
 711     {
 712       if (buffer->cur == buffer->rlimit)
 713         c = EOF;
 714       else
 715         c = *buffer->cur++;
 716
 717     have_char:
 718       /* We need space for the terminating NUL.  */
 719       if (dest >= limit)
 720         limit = _cpp_next_chunk (pool, 0, &dest);
 721
 722       if (c == EOF)
 723         {
 724           unterminated (pfile, terminator);
 725           break;
 726         }
 727
 728       /* Handle trigraphs, escaped newlines etc.  */
 729       if (c == '?' || c == '\\')
 730         c = skip_escaped_newlines (pfile, c);
 731
 732       if (c == terminator && unescaped_terminator_p (pfile, dest))
 733         {
 734           c = EOF;
 735           break;
 736         }
 737       else if (is_vspace (c))
 738         {
 739           /* In assembly language, silently terminate string and
 740              character literals at end of line.  This is a kludge
 741              around not knowing where comments are.  */
 742           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 743             break;
 744
 745           /* Character constants and header names may not extend over
 746              multiple lines.  In Standard C, neither may strings.
 747              Unfortunately, we accept multiline strings as an
 748              extension, except in #include family directives.  */
 749           if (terminator != '"' || pfile->state.angled_headers)
 750             {
 751               unterminated (pfile, terminator);
 752               break;
 753             }
 754
 755           if (!warned_multi)
 756             {
 757               warned_multi = true;
 758               cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 759             }
 760
 761           if (pfile->mls_line == 0)
 762             {
 763               pfile->mls_line = token->line;
 764               pfile->mls_col = token->col;
 765             }
 766
 767           c = handle_newline (pfile, c);
 768           *dest++ = '\n';
 769           goto have_char;
 770         }
 771       else if (c == '\0' && !warned_nulls)
 772         {
 773           warned_nulls = true;
 774           cpp_warning (pfile, "null character(s) preserved in literal");
 775         }
 776
 777       *dest++ = c;
 778     }
 779
 780   /* Remember the next character.  */
 781   buffer->read_ahead = c;
 782   *dest = '\0';
 783
 784   token->val.str.text = POOL_FRONT (pool);
 785   token->val.str.len = dest - token->val.str.text;
 786   POOL_COMMIT (pool, token->val.str.len + 1);
 787 }
 788
 789 /* The stored comment includes the comment start and any terminator.  */
 790 static void
 791 save_comment (pfile, token, from)
 792      cpp_reader *pfile;
 793      cpp_token *token;
 794      const unsigned char *from;
 795 {
 796   unsigned char *buffer;
 797   unsigned int len;
 798
 799   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 800   /* C++ comments probably (not definitely) have moved past a new
 801      line, which we don't want to save in the comment.  */
 802   if (pfile->buffer->read_ahead != EOF)
 803     len--;
 804   buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
 805
 806   token->type = CPP_COMMENT;
 807   token->val.str.len = len;
 808   token->val.str.text = buffer;
 809
 810   buffer[0] = '/';
 811   memcpy (buffer + 1, from, len - 1);
 812 }
 813
 814 /* Subroutine of lex_token to handle '%'.  A little tricky, since we
 815    want to avoid stepping back when lexing %:%X.  */
 816 static void
 817 lex_percent (pfile, result)
 818      cpp_reader *pfile;
 819      cpp_token *result;
 820 {
 821   cpp_buffer *buffer= pfile->buffer;
 822   cppchar_t c;
 823
 824   result->type = CPP_MOD;
 825   /* Parsing %:%X could leave an extra character.  */
 826   if (buffer->extra_char == EOF)
 827     c = get_effective_char (pfile);
 828   else
 829     {
 830       c = buffer->read_ahead = buffer->extra_char;
 831       buffer->extra_char = EOF;
 832     }
 833
 834   if (c == '=')
 835     ACCEPT_CHAR (CPP_MOD_EQ);
 836   else if (CPP_OPTION (pfile, digraphs))
 837     {
 838       if (c == ':')
 839         {
 840           result->flags |= DIGRAPH;
 841           ACCEPT_CHAR (CPP_HASH);
 842           if (get_effective_char (pfile) == '%')
 843             {
 844               buffer->extra_char = get_effective_char (pfile);
 845               if (buffer->extra_char == ':')
 846                 {
 847                   buffer->extra_char = EOF;
 848                   ACCEPT_CHAR (CPP_PASTE);
 849                 }
 850               else
 851                 /* We'll catch the extra_char when we're called back.  */
 852                 buffer->read_ahead = '%';
 853             }
 854         }
 855       else if (c == '>')
 856         {
 857           result->flags |= DIGRAPH;
 858           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 859         }
 860     }
 861 }
 862
 863 /* Subroutine of lex_token to handle '.'.  This is tricky, since we
 864    want to avoid stepping back when lexing '...' or '.123'.  In the
 865    latter case we should also set a flag for parse_number.  */
 866 static void
 867 lex_dot (pfile, result)
 868      cpp_reader *pfile;
 869      cpp_token *result;
 870 {
 871   cpp_buffer *buffer = pfile->buffer;
 872   cppchar_t c;
 873
 874   /* Parsing ..X could leave an extra character.  */
 875   if (buffer->extra_char == EOF)
 876     c = get_effective_char (pfile);
 877   else
 878     {
 879       c = buffer->read_ahead = buffer->extra_char;
 880       buffer->extra_char = EOF;
 881     }
 882
 883   /* All known character sets have 0...9 contiguous.  */
 884   if (c >= '0' && c <= '9')
 885     {
 886       result->type = CPP_NUMBER;
 887       parse_number (pfile, &result->val.str, c, 1);
 888     }
 889   else
 890     {
 891       result->type = CPP_DOT;
 892       if (c == '.')
 893         {
 894           buffer->extra_char = get_effective_char (pfile);
 895           if (buffer->extra_char == '.')
 896             {
 897               buffer->extra_char = EOF;
 898               ACCEPT_CHAR (CPP_ELLIPSIS);
 899             }
 900           else
 901             /* We'll catch the extra_char when we're called back.  */
 902             buffer->read_ahead = '.';
 903         }
 904       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 905         ACCEPT_CHAR (CPP_DOT_STAR);
 906     }
 907 }
 908
 909 /* Allocate COUNT tokens for RUN.  */
 910 void
 911 _cpp_init_tokenrun (run, count)
 912      tokenrun *run;
 913      unsigned int count;
 914 {
 915   run->base = xnewvec (cpp_token, count);
 916   run->limit = run->base + count;
 917   run->next = NULL;
 918 }
 919
 920 /* Returns the next tokenrun, or creates one if there is none.  */
 921 static tokenrun *
 922 next_tokenrun (run)
 923      tokenrun *run;
 924 {
 925   if (run->next == NULL)
 926     {
 927       run->next = xnew (tokenrun);
 928       run->next->prev = run;
 929       _cpp_init_tokenrun (run->next, 250);
 930     }
 931
 932   return run->next;
 933 }
 934
 935 /* Lex a token into RESULT (external interface).  */
 936 const cpp_token *
 937 _cpp_lex_token (pfile)
 938      cpp_reader *pfile;
 939 {
 940   cpp_token *result;
 941
 942   for (;;)
 943     {
 944       if (pfile->cur_token == pfile->cur_run->limit)
 945         {
 946           pfile->cur_run = next_tokenrun (pfile->cur_run);
 947           pfile->cur_token = pfile->cur_run->base;
 948         }
 949       result = pfile->cur_token++;
 950
 951       if (pfile->lookaheads)
 952         pfile->lookaheads--;
 953       else
 954         result = lex_token (pfile, result);
 955
 956       if (result->flags & BOL)
 957         {
 958           /* Is this a directive.  If _cpp_handle_directive returns
 959              false, it is an assembler #.  */
 960           if (result->type == CPP_HASH
 961               && !pfile->state.parsing_args
 962               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 963             continue;
 964           if (pfile->cb.line_change && !pfile->state.skipping)
 965             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 966         }
 967
 968       /* We don't skip tokens in directives.  */
 969       if (pfile->state.in_directive)
 970         break;
 971
 972       /* Outside a directive, invalidate controlling macros.  At file
 973          EOF, lex_token takes care of popping the buffer, so we never
 974          get here and MI optimisation works.  */
 975       pfile->mi_valid = false;
 976
 977       if (!pfile->state.skipping || result->type == CPP_EOF)
 978         break;
 979     }
 980
 981   return result;
 982 }
 983
 984 /* Lex a token into RESULT.  When meeting a newline, returns CPP_EOF
 985    if parsing a directive, otherwise returns to the start of the token
 986    buffer if permissible.  Returns the location of the lexed token.  */
 987 static cpp_token *
 988 lex_token (pfile, result)
 989      cpp_reader *pfile;
 990      cpp_token *result;
 991 {
 992   cppchar_t c;
 993   cpp_buffer *buffer;
 994   const unsigned char *comment_start;
 995
 996  fresh_line:
 997   buffer = pfile->buffer;
 998   result->flags = buffer->saved_flags;
 999   buffer->saved_flags = 0;
1000  update_tokens_line:
1001   result->line = pfile->line;
1002
1003  skipped_white:
1004   c = buffer->read_ahead;
1005   if (c == EOF && buffer->cur < buffer->rlimit)
1006     c = *buffer->cur++;
1007   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1008   buffer->read_ahead = EOF;
1009
1010  trigraph:
1011   switch (c)
1012     {
1013     case EOF:
1014       buffer->saved_flags = BOL;
1015       if (!pfile->state.parsing_args && !pfile->state.in_directive)
1016         {
1017           if (buffer->cur != buffer->line_base)
1018             {
1019               /* Non-empty files should end in a newline.  Don't warn
1020                  for command line and _Pragma buffers.  */
1021               if (!buffer->from_stage3)
1022                 cpp_pedwarn (pfile, "no newline at end of file");
1023               handle_newline (pfile, '\n');
1024             }
1025
1026           /* Don't pop the last buffer.  */
1027           if (buffer->prev)
1028             {
1029               unsigned char stop = buffer->return_at_eof;
1030
1031               _cpp_pop_buffer (pfile);
1032               if (!stop)
1033                 goto fresh_line;
1034             }
1035         }
1036       result->type = CPP_EOF;
1037       break;
1038
1039     case ' ': case '\t': case '\f': case '\v': case '\0':
1040       skip_whitespace (pfile, c);
1041       result->flags |= PREV_WHITE;
1042       goto skipped_white;
1043
1044     case '\n': case '\r':
1045       handle_newline (pfile, c);
1046       buffer->saved_flags = BOL;
1047       if (! pfile->state.in_directive)
1048         {
1049           if (!pfile->keep_tokens)
1050             {
1051               pfile->cur_run = &pfile->base_run;
1052               result = pfile->base_run.base;
1053               pfile->cur_token = result + 1;
1054             }
1055           goto fresh_line;
1056         }
1057       result->type = CPP_EOF;
1058       break;
1059
1060     case '?':
1061     case '\\':
1062       /* These could start an escaped newline, or '?' a trigraph.  Let
1063          skip_escaped_newlines do all the work.  */
1064       {
1065         unsigned int line = pfile->line;
1066
1067         c = skip_escaped_newlines (pfile, c);
1068         if (line != pfile->line)
1069           /* We had at least one escaped newline of some sort, and the
1070              next character is in buffer->read_ahead.  Update the
1071              token's line and column.  */
1072             goto update_tokens_line;
1073
1074         /* We are either the original '?' or '\\', or a trigraph.  */
1075         result->type = CPP_QUERY;
1076         buffer->read_ahead = EOF;
1077         if (c == '\\')
1078           goto random_char;
1079         else if (c != '?')
1080           goto trigraph;
1081       }
1082       break;
1083
1084     case '0': case '1': case '2': case '3': case '4':
1085     case '5': case '6': case '7': case '8': case '9':
1086       result->type = CPP_NUMBER;
1087       parse_number (pfile, &result->val.str, c, 0);
1088       break;
1089
1090     case '$':
1091       if (!CPP_OPTION (pfile, dollars_in_ident))
1092         goto random_char;
1093       /* Fall through...  */
1094
1095     case '_':
1096     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1097     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1098     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1099     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1100     case 'y': case 'z':
1101     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1102     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1103     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1104     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1105     case 'Y': case 'Z':
1106       result->type = CPP_NAME;
1107       result->val.node = parse_identifier (pfile);
1108
1109       /* 'L' may introduce wide characters or strings.  */
1110       if (result->val.node == pfile->spec_nodes.n_L)
1111         {
1112           c = buffer->read_ahead;
1113           if (c == EOF && buffer->cur < buffer->rlimit)
1114             c = *buffer->cur;
1115           if (c == '\'' || c == '"')
1116             {
1117               buffer->cur++;
1118               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1119               goto make_string;
1120             }
1121         }
1122       /* Convert named operators to their proper types.  */
1123       else if (result->val.node->flags & NODE_OPERATOR)
1124         {
1125           result->flags |= NAMED_OP;
1126           result->type = result->val.node->value.operator;
1127         }
1128       break;
1129
1130     case '\'':
1131     case '"':
1132       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1133     make_string:
1134       parse_string (pfile, result, c);
1135       break;
1136
1137     case '/':
1138       /* A potential block or line comment.  */
1139       comment_start = buffer->cur;
1140       result->type = CPP_DIV;
1141       c = get_effective_char (pfile);
1142       if (c == '=')
1143         ACCEPT_CHAR (CPP_DIV_EQ);
1144       if (c != '/' && c != '*')
1145         break;
1146
1147       if (c == '*')
1148         {
1149           if (skip_block_comment (pfile))
1150             cpp_error (pfile, "unterminated comment");
1151         }
1152       else
1153         {
1154           if (!CPP_OPTION (pfile, cplusplus_comments)
1155               && !CPP_IN_SYSTEM_HEADER (pfile))
1156             break;
1157
1158           /* Warn about comments only if pedantically GNUC89, and not
1159              in system headers.  */
1160           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1161               && ! buffer->warned_cplusplus_comments)
1162             {
1163               cpp_pedwarn (pfile,
1164                            "C++ style comments are not allowed in ISO C89");
1165               cpp_pedwarn (pfile,
1166                            "(this will be reported only once per input file)");
1167               buffer->warned_cplusplus_comments = 1;
1168             }
1169
1170           /* Skip_line_comment updates buffer->read_ahead.  */
1171           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1172             cpp_warning (pfile, "multi-line comment");
1173         }
1174
1175       /* Skipping the comment has updated buffer->read_ahead.  */
1176       if (!pfile->state.save_comments)
1177         {
1178           result->flags |= PREV_WHITE;
1179           goto update_tokens_line;
1180         }
1181
1182       /* Save the comment as a token in its own right.  */
1183       save_comment (pfile, result, comment_start);
1184       /* Don't do MI optimisation.  */
1185       break;
1186
1187     case '<':
1188       if (pfile->state.angled_headers)
1189         {
1190           result->type = CPP_HEADER_NAME;
1191           c = '>';              /* terminator.  */
1192           goto make_string;
1193         }
1194
1195       result->type = CPP_LESS;
1196       c = get_effective_char (pfile);
1197       if (c == '=')
1198         ACCEPT_CHAR (CPP_LESS_EQ);
1199       else if (c == '<')
1200         {
1201           ACCEPT_CHAR (CPP_LSHIFT);
1202           if (get_effective_char (pfile) == '=')
1203             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1204         }
1205       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1206         {
1207           ACCEPT_CHAR (CPP_MIN);
1208           if (get_effective_char (pfile) == '=')
1209             ACCEPT_CHAR (CPP_MIN_EQ);
1210         }
1211       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1212         {
1213           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1214           result->flags |= DIGRAPH;
1215         }
1216       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1217         {
1218           ACCEPT_CHAR (CPP_OPEN_BRACE);
1219           result->flags |= DIGRAPH;
1220         }
1221       break;
1222
1223     case '>':
1224       result->type = CPP_GREATER;
1225       c = get_effective_char (pfile);
1226       if (c == '=')
1227         ACCEPT_CHAR (CPP_GREATER_EQ);
1228       else if (c == '>')
1229         {
1230           ACCEPT_CHAR (CPP_RSHIFT);
1231           if (get_effective_char (pfile) == '=')
1232             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1233         }
1234       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1235         {
1236           ACCEPT_CHAR (CPP_MAX);
1237           if (get_effective_char (pfile) == '=')
1238             ACCEPT_CHAR (CPP_MAX_EQ);
1239         }
1240       break;
1241
1242     case '%':
1243       lex_percent (pfile, result);
1244       break;
1245
1246     case '.':
1247       lex_dot (pfile, result);
1248       break;
1249
1250     case '+':
1251       result->type = CPP_PLUS;
1252       c = get_effective_char (pfile);
1253       if (c == '=')
1254         ACCEPT_CHAR (CPP_PLUS_EQ);
1255       else if (c == '+')
1256         ACCEPT_CHAR (CPP_PLUS_PLUS);
1257       break;
1258
1259     case '-':
1260       result->type = CPP_MINUS;
1261       c = get_effective_char (pfile);
1262       if (c == '>')
1263         {
1264           ACCEPT_CHAR (CPP_DEREF);
1265           if (CPP_OPTION (pfile, cplusplus)
1266               && get_effective_char (pfile) == '*')
1267             ACCEPT_CHAR (CPP_DEREF_STAR);
1268         }
1269       else if (c == '=')
1270         ACCEPT_CHAR (CPP_MINUS_EQ);
1271       else if (c == '-')
1272         ACCEPT_CHAR (CPP_MINUS_MINUS);
1273       break;
1274
1275     case '*':
1276       result->type = CPP_MULT;
1277       if (get_effective_char (pfile) == '=')
1278         ACCEPT_CHAR (CPP_MULT_EQ);
1279       break;
1280
1281     case '=':
1282       result->type = CPP_EQ;
1283       if (get_effective_char (pfile) == '=')
1284         ACCEPT_CHAR (CPP_EQ_EQ);
1285       break;
1286
1287     case '!':
1288       result->type = CPP_NOT;
1289       if (get_effective_char (pfile) == '=')
1290         ACCEPT_CHAR (CPP_NOT_EQ);
1291       break;
1292
1293     case '&':
1294       result->type = CPP_AND;
1295       c = get_effective_char (pfile);
1296       if (c == '=')
1297         ACCEPT_CHAR (CPP_AND_EQ);
1298       else if (c == '&')
1299         ACCEPT_CHAR (CPP_AND_AND);
1300       break;
1301
1302     case '#':
1303       result->type = CPP_HASH;
1304       if (get_effective_char (pfile) == '#')
1305           ACCEPT_CHAR (CPP_PASTE);
1306       break;
1307
1308     case '|':
1309       result->type = CPP_OR;
1310       c = get_effective_char (pfile);
1311       if (c == '=')
1312         ACCEPT_CHAR (CPP_OR_EQ);
1313       else if (c == '|')
1314         ACCEPT_CHAR (CPP_OR_OR);
1315       break;
1316
1317     case '^':
1318       result->type = CPP_XOR;
1319       if (get_effective_char (pfile) == '=')
1320         ACCEPT_CHAR (CPP_XOR_EQ);
1321       break;
1322
1323     case ':':
1324       result->type = CPP_COLON;
1325       c = get_effective_char (pfile);
1326       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1327         ACCEPT_CHAR (CPP_SCOPE);
1328       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1329         {
1330           result->flags |= DIGRAPH;
1331           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1332         }
1333       break;
1334
1335     case '~': result->type = CPP_COMPL; break;
1336     case ',': result->type = CPP_COMMA; break;
1337     case '(': result->type = CPP_OPEN_PAREN; break;
1338     case ')': result->type = CPP_CLOSE_PAREN; break;
1339     case '[': result->type = CPP_OPEN_SQUARE; break;
1340     case ']': result->type = CPP_CLOSE_SQUARE; break;
1341     case '{': result->type = CPP_OPEN_BRACE; break;
1342     case '}': result->type = CPP_CLOSE_BRACE; break;
1343     case ';': result->type = CPP_SEMICOLON; break;
1344
1345       /* @ is a punctuator in Objective C.  */
1346     case '@': result->type = CPP_ATSIGN; break;
1347
1348     random_char:
1349     default:
1350       result->type = CPP_OTHER;
1351       result->val.c = c;
1352       break;
1353     }
1354
1355   return result;
1356 }
1357
1358 /* An upper bound on the number of bytes needed to spell a token,
1359    including preceding whitespace.  */
1360 unsigned int
1361 cpp_token_len (token)
1362      const cpp_token *token;
1363 {
1364   unsigned int len;
1365
1366   switch (TOKEN_SPELL (token))
1367     {
1368     default:            len = 0;                                break;
1369     case SPELL_STRING:  len = token->val.str.len;               break;
1370     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1371     }
1372   /* 1 for whitespace, 4 for comment delimeters.  */
1373   return len + 5;
1374 }
1375
1376 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1377    already contain the enough space to hold the token's spelling.
1378    Returns a pointer to the character after the last character
1379    written.  */
1380 unsigned char *
1381 cpp_spell_token (pfile, token, buffer)
1382      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1383      const cpp_token *token;
1384      unsigned char *buffer;
1385 {
1386   switch (TOKEN_SPELL (token))
1387     {
1388     case SPELL_OPERATOR:
1389       {
1390         const unsigned char *spelling;
1391         unsigned char c;
1392
1393         if (token->flags & DIGRAPH)
1394           spelling
1395             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1396         else if (token->flags & NAMED_OP)
1397           goto spell_ident;
1398         else
1399           spelling = TOKEN_NAME (token);
1400
1401         while ((c = *spelling++) != '\0')
1402           *buffer++ = c;
1403       }
1404       break;
1405
1406     case SPELL_IDENT:
1407       spell_ident:
1408       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1409       buffer += NODE_LEN (token->val.node);
1410       break;
1411
1412     case SPELL_STRING:
1413       {
1414         int left, right, tag;
1415         switch (token->type)
1416           {
1417           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1418           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1419           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1420           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1421           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1422           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1423           }
1424         if (tag) *buffer++ = tag;
1425         if (left) *buffer++ = left;
1426         memcpy (buffer, token->val.str.text, token->val.str.len);
1427         buffer += token->val.str.len;
1428         if (right) *buffer++ = right;
1429       }
1430       break;
1431
1432     case SPELL_CHAR:
1433       *buffer++ = token->val.c;
1434       break;
1435
1436     case SPELL_NONE:
1437       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1438       break;
1439     }
1440
1441   return buffer;
1442 }
1443
1444 /* Returns a token as a null-terminated string.  The string is
1445    temporary, and automatically freed later.  Useful for diagnostics.  */
1446 unsigned char *
1447 cpp_token_as_text (pfile, token)
1448      cpp_reader *pfile;
1449      const cpp_token *token;
1450 {
1451   unsigned int len = cpp_token_len (token);
1452   unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1453
1454   end = cpp_spell_token (pfile, token, start);
1455   end[0] = '\0';
1456
1457   return start;
1458 }
1459
1460 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1461 const char *
1462 cpp_type2name (type)
1463      enum cpp_ttype type;
1464 {
1465   return (const char *) token_spellings[type].name;
1466 }
1467
1468 /* Writes the spelling of token to FP.  Separate from cpp_spell_token
1469    for efficiency - to avoid double-buffering.  Also, outputs a space
1470    if PREV_WHITE is flagged.  */
1471 void
1472 cpp_output_token (token, fp)
1473      const cpp_token *token;
1474      FILE *fp;
1475 {
1476   if (token->flags & PREV_WHITE)
1477     putc (' ', fp);
1478
1479   switch (TOKEN_SPELL (token))
1480     {
1481     case SPELL_OPERATOR:
1482       {
1483         const unsigned char *spelling;
1484
1485         if (token->flags & DIGRAPH)
1486           spelling
1487             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1488         else if (token->flags & NAMED_OP)
1489           goto spell_ident;
1490         else
1491           spelling = TOKEN_NAME (token);
1492
1493         ufputs (spelling, fp);
1494       }
1495       break;
1496
1497     spell_ident:
1498     case SPELL_IDENT:
1499       ufputs (NODE_NAME (token->val.node), fp);
1500     break;
1501
1502     case SPELL_STRING:
1503       {
1504         int left, right, tag;
1505         switch (token->type)
1506           {
1507           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1508           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1509           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1510           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1511           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1512           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1513           }
1514         if (tag) putc (tag, fp);
1515         if (left) putc (left, fp);
1516         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1517         if (right) putc (right, fp);
1518       }
1519       break;
1520
1521     case SPELL_CHAR:
1522       putc (token->val.c, fp);
1523       break;
1524
1525     case SPELL_NONE:
1526       /* An error, most probably.  */
1527       break;
1528     }
1529 }
1530
1531 /* Compare two tokens.  */
1532 int
1533 _cpp_equiv_tokens (a, b)
1534      const cpp_token *a, *b;
1535 {
1536   if (a->type == b->type && a->flags == b->flags)
1537     switch (TOKEN_SPELL (a))
1538       {
1539       default:                  /* Keep compiler happy.  */
1540       case SPELL_OPERATOR:
1541         return 1;
1542       case SPELL_CHAR:
1543         return a->val.c == b->val.c; /* Character.  */
1544       case SPELL_NONE:
1545         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1546       case SPELL_IDENT:
1547         return a->val.node == b->val.node;
1548       case SPELL_STRING:
1549         return (a->val.str.len == b->val.str.len
1550                 && !memcmp (a->val.str.text, b->val.str.text,
1551                             a->val.str.len));
1552       }
1553
1554   return 0;
1555 }
1556
1557 /* Determine whether two tokens can be pasted together, and if so,
1558    what the resulting token is.  Returns CPP_EOF if the tokens cannot
1559    be pasted, or the appropriate type for the merged token if they
1560    can.  */
1561 enum cpp_ttype
1562 cpp_can_paste (pfile, token1, token2, digraph)
1563      cpp_reader * pfile;
1564      const cpp_token *token1, *token2;
1565      int* digraph;
1566 {
1567   enum cpp_ttype a = token1->type, b = token2->type;
1568   int cxx = CPP_OPTION (pfile, cplusplus);
1569
1570   /* Treat named operators as if they were ordinary NAMEs.  */
1571   if (token1->flags & NAMED_OP)
1572     a = CPP_NAME;
1573   if (token2->flags & NAMED_OP)
1574     b = CPP_NAME;
1575
1576   if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1577     return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1578
1579   switch (a)
1580     {
1581     case CPP_GREATER:
1582       if (b == a) return CPP_RSHIFT;
1583       if (b == CPP_QUERY && cxx)        return CPP_MAX;
1584       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
1585       break;
1586     case CPP_LESS:
1587       if (b == a) return CPP_LSHIFT;
1588       if (b == CPP_QUERY && cxx)        return CPP_MIN;
1589       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
1590       if (CPP_OPTION (pfile, digraphs))
1591         {
1592           if (b == CPP_COLON)
1593             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1594           if (b == CPP_MOD)
1595             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
1596         }
1597       break;
1598
1599     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
1600     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
1601     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
1602
1603     case CPP_MINUS:
1604       if (b == a)               return CPP_MINUS_MINUS;
1605       if (b == CPP_GREATER)     return CPP_DEREF;
1606       break;
1607     case CPP_COLON:
1608       if (b == a && cxx)        return CPP_SCOPE;
1609       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1610         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1611       break;
1612
1613     case CPP_MOD:
1614       if (CPP_OPTION (pfile, digraphs))
1615         {
1616           if (b == CPP_GREATER)
1617             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
1618           if (b == CPP_COLON)
1619             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
1620         }
1621       break;
1622     case CPP_DEREF:
1623       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1624       break;
1625     case CPP_DOT:
1626       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1627       if (b == CPP_NUMBER)      return CPP_NUMBER;
1628       break;
1629
1630     case CPP_HASH:
1631       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1632         /* %:%: digraph */
1633         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1634       break;
1635
1636     case CPP_NAME:
1637       if (b == CPP_NAME)        return CPP_NAME;
1638       if (b == CPP_NUMBER
1639           && name_p (pfile, &token2->val.str)) return CPP_NAME;
1640       if (b == CPP_CHAR
1641           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1642       if (b == CPP_STRING
1643           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1644       break;
1645
1646     case CPP_NUMBER:
1647       if (b == CPP_NUMBER)      return CPP_NUMBER;
1648       if (b == CPP_NAME)        return CPP_NUMBER;
1649       if (b == CPP_DOT)         return CPP_NUMBER;
1650       /* Numbers cannot have length zero, so this is safe.  */
1651       if ((b == CPP_PLUS || b == CPP_MINUS)
1652           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1653         return CPP_NUMBER;
1654       break;
1655
1656     default:
1657       break;
1658     }
1659
1660   return CPP_EOF;
1661 }
1662
1663 /* Returns nonzero if a space should be inserted to avoid an
1664    accidental token paste for output.  For simplicity, it is
1665    conservative, and occasionally advises a space where one is not
1666    needed, e.g. "." and ".2".  */
1667
1668 int
1669 cpp_avoid_paste (pfile, token1, token2)
1670      cpp_reader *pfile;
1671      const cpp_token *token1, *token2;
1672 {
1673   enum cpp_ttype a = token1->type, b = token2->type;
1674   cppchar_t c;
1675
1676   if (token1->flags & NAMED_OP)
1677     a = CPP_NAME;
1678   if (token2->flags & NAMED_OP)
1679     b = CPP_NAME;
1680
1681   c = EOF;
1682   if (token2->flags & DIGRAPH)
1683     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1684   else if (token_spellings[b].category == SPELL_OPERATOR)
1685     c = token_spellings[b].name[0];
1686
1687   /* Quickly get everything that can paste with an '='.  */
1688   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1689     return 1;
1690
1691   switch (a)
1692     {
1693     case CPP_GREATER:   return c == '>' || c == '?';
1694     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1695     case CPP_PLUS:      return c == '+';
1696     case CPP_MINUS:     return c == '-' || c == '>';
1697     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1698     case CPP_MOD:       return c == ':' || c == '>';
1699     case CPP_AND:       return c == '&';
1700     case CPP_OR:        return c == '|';
1701     case CPP_COLON:     return c == ':' || c == '>';
1702     case CPP_DEREF:     return c == '*';
1703     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1704     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1705     case CPP_NAME:      return ((b == CPP_NUMBER
1706                                  && name_p (pfile, &token2->val.str))
1707                                 || b == CPP_NAME
1708                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1709     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1710                                 || c == '.' || c == '+' || c == '-');
1711     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1712                                 && token1->val.c == '@'
1713                                 && (b == CPP_NAME || b == CPP_STRING));
1714     default:            break;
1715     }
1716
1717   return 0;
1718 }
1719
1720 /* Output all the remaining tokens on the current line, and a newline
1721    character, to FP.  Leading whitespace is removed.  */
1722 void
1723 cpp_output_line (pfile, fp)
1724      cpp_reader *pfile;
1725      FILE *fp;
1726 {
1727   cpp_token token;
1728
1729   cpp_get_token (pfile, &token);
1730   token.flags &= ~PREV_WHITE;
1731   while (token.type != CPP_EOF)
1732     {
1733       cpp_output_token (&token, fp);
1734       cpp_get_token (pfile, &token);
1735     }
1736
1737   putc ('\n', fp);
1738 }
1739
1740 /* Returns the value of a hexadecimal digit.  */
1741 static unsigned int
1742 hex_digit_value (c)
1743      unsigned int c;
1744 {
1745   if (c >= 'a' && c <= 'f')
1746     return c - 'a' + 10;
1747   if (c >= 'A' && c <= 'F')
1748     return c - 'A' + 10;
1749   if (c >= '0' && c <= '9')
1750     return c - '0';
1751   abort ();
1752 }
1753
1754 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1755    failure if cpplib is not parsing C++ or C99.  Such failure is
1756    silent, and no variables are updated.  Otherwise returns 0, and
1757    warns if -Wtraditional.
1758
1759    [lex.charset]: The character designated by the universal character
1760    name \UNNNNNNNN is that character whose character short name in
1761    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1762    universal character name \uNNNN is that character whose character
1763    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1764    for a universal character name is less than 0x20 or in the range
1765    0x7F-0x9F (inclusive), or if the universal character name
1766    designates a character in the basic source character set, then the
1767    program is ill-formed.
1768
1769    We assume that wchar_t is Unicode, so we don't need to do any
1770    mapping.  Is this ever wrong?
1771
1772    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1773    LIMIT is the end of the string or charconst.  PSTR is updated to
1774    point after the UCS on return, and the UCS is written into PC.  */
1775
1776 static int
1777 maybe_read_ucs (pfile, pstr, limit, pc)
1778      cpp_reader *pfile;
1779      const unsigned char **pstr;
1780      const unsigned char *limit;
1781      unsigned int *pc;
1782 {
1783   const unsigned char *p = *pstr;
1784   unsigned int code = 0;
1785   unsigned int c = *pc, length;
1786
1787   /* Only attempt to interpret a UCS for C++ and C99.  */
1788   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1789     return 1;
1790
1791   if (CPP_WTRADITIONAL (pfile))
1792     cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1793
1794   length = (c == 'u' ? 4: 8);
1795
1796   if ((size_t) (limit - p) < length)
1797     {
1798       cpp_error (pfile, "incomplete universal-character-name");
1799       /* Skip to the end to avoid more diagnostics.  */
1800       p = limit;
1801     }
1802   else
1803     {
1804       for (; length; length--, p++)
1805         {
1806           c = *p;
1807           if (ISXDIGIT (c))
1808             code = (code << 4) + hex_digit_value (c);
1809           else
1810             {
1811               cpp_error (pfile,
1812                          "non-hex digit '%c' in universal-character-name", c);
1813               /* We shouldn't skip in case there are multibyte chars.  */
1814               break;
1815             }
1816         }
1817     }
1818
1819 #ifdef TARGET_EBCDIC
1820   cpp_error (pfile, "universal-character-name on EBCDIC target");
1821   code = 0x3f;  /* EBCDIC invalid character */
1822 #else
1823  /* True extended characters are OK.  */
1824   if (code >= 0xa0
1825       && !(code & 0x80000000)
1826       && !(code >= 0xD800 && code <= 0xDFFF))
1827     ;
1828   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1829      hex escapes so that this also works with EBCDIC hosts.  */
1830   else if (code == 0x24 || code == 0x40 || code == 0x60)
1831     ;
1832   /* Don't give another error if one occurred above.  */
1833   else if (length == 0)
1834     cpp_error (pfile, "universal-character-name out of range");
1835 #endif
1836
1837   *pstr = p;
1838   *pc = code;
1839   return 0;
1840 }
1841
1842 /* Interpret an escape sequence, and return its value.  PSTR points to
1843    the input pointer, which is just after the backslash.  LIMIT is how
1844    much text we have.  MASK is a bitmask for the precision for the
1845    destination type (char or wchar_t).  TRADITIONAL, if true, does not
1846    interpret escapes that did not exist in traditional C.
1847
1848    Handles all relevant diagnostics.  */
1849
1850 unsigned int
1851 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1852      cpp_reader *pfile;
1853      const unsigned char **pstr;
1854      const unsigned char *limit;
1855      unsigned HOST_WIDE_INT mask;
1856      int traditional;
1857 {
1858   int unknown = 0;
1859   const unsigned char *str = *pstr;
1860   unsigned int c = *str++;
1861
1862   switch (c)
1863     {
1864     case '\\': case '\'': case '"': case '?': break;
1865     case 'b': c = TARGET_BS;      break;
1866     case 'f': c = TARGET_FF;      break;
1867     case 'n': c = TARGET_NEWLINE; break;
1868     case 'r': c = TARGET_CR;      break;
1869     case 't': c = TARGET_TAB;     break;
1870     case 'v': c = TARGET_VT;      break;
1871
1872     case '(': case '{': case '[': case '%':
1873       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1874          '\%' is used to prevent SCCS from getting confused.  */
1875       unknown = CPP_PEDANTIC (pfile);
1876       break;
1877
1878     case 'a':
1879       if (CPP_WTRADITIONAL (pfile))
1880         cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1881       if (!traditional)
1882         c = TARGET_BELL;
1883       break;
1884
1885     case 'e': case 'E':
1886       if (CPP_PEDANTIC (pfile))
1887         cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1888       c = TARGET_ESC;
1889       break;
1890
1891     case 'u': case 'U':
1892       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1893       break;
1894
1895     case 'x':
1896       if (CPP_WTRADITIONAL (pfile))
1897         cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1898
1899       if (!traditional)
1900         {
1901           unsigned int i = 0, overflow = 0;
1902           int digits_found = 0;
1903
1904           while (str < limit)
1905             {
1906               c = *str;
1907               if (! ISXDIGIT (c))
1908                 break;
1909               str++;
1910               overflow |= i ^ (i << 4 >> 4);
1911               i = (i << 4) + hex_digit_value (c);
1912               digits_found = 1;
1913             }
1914
1915           if (!digits_found)
1916             cpp_error (pfile, "\\x used with no following hex digits");
1917
1918           if (overflow | (i != (i & mask)))
1919             {
1920               cpp_pedwarn (pfile, "hex escape sequence out of range");
1921               i &= mask;
1922             }
1923           c = i;
1924         }
1925       break;
1926
1927     case '0':  case '1':  case '2':  case '3':
1928     case '4':  case '5':  case '6':  case '7':
1929       {
1930         unsigned int i = c - '0';
1931         int count = 0;
1932
1933         while (str < limit && ++count < 3)
1934           {
1935             c = *str;
1936             if (c < '0' || c > '7')
1937               break;
1938             str++;
1939             i = (i << 3) + c - '0';
1940           }
1941
1942         if (i != (i & mask))
1943           {
1944             cpp_pedwarn (pfile, "octal escape sequence out of range");
1945             i &= mask;
1946           }
1947         c = i;
1948       }
1949       break;
1950
1951     default:
1952       unknown = 1;
1953       break;
1954     }
1955
1956   if (unknown)
1957     {
1958       if (ISGRAPH (c))
1959         cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1960       else
1961         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1962     }
1963
1964   if (c > mask)
1965     cpp_pedwarn (pfile, "escape sequence out of range for character");
1966
1967   *pstr = str;
1968   return c;
1969 }
1970
1971 #ifndef MAX_CHAR_TYPE_SIZE
1972 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1973 #endif
1974
1975 #ifndef MAX_WCHAR_TYPE_SIZE
1976 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1977 #endif
1978
1979 /* Interpret a (possibly wide) character constant in TOKEN.
1980    WARN_MULTI warns about multi-character charconsts, if not
1981    TRADITIONAL.  TRADITIONAL also indicates not to interpret escapes
1982    that did not exist in traditional C.  PCHARS_SEEN points to a
1983    variable that is filled in with the number of characters seen.  */
1984 HOST_WIDE_INT
1985 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1986      cpp_reader *pfile;
1987      const cpp_token *token;
1988      int warn_multi;
1989      int traditional;
1990      unsigned int *pchars_seen;
1991 {
1992   const unsigned char *str = token->val.str.text;
1993   const unsigned char *limit = str + token->val.str.len;
1994   unsigned int chars_seen = 0;
1995   unsigned int width, max_chars, c;
1996   unsigned HOST_WIDE_INT mask;
1997   HOST_WIDE_INT result = 0;
1998
1999 #ifdef MULTIBYTE_CHARS
2000   (void) local_mbtowc (NULL, NULL, 0);
2001 #endif
2002
2003   /* Width in bits.  */
2004   if (token->type == CPP_CHAR)
2005     width = MAX_CHAR_TYPE_SIZE;
2006   else
2007     width = MAX_WCHAR_TYPE_SIZE;
2008
2009   if (width < HOST_BITS_PER_WIDE_INT)
2010     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
2011   else
2012     mask = ~0;
2013   max_chars = HOST_BITS_PER_WIDE_INT / width;
2014
2015   while (str < limit)
2016     {
2017 #ifdef MULTIBYTE_CHARS
2018       wchar_t wc;
2019       int char_len;
2020
2021       char_len = local_mbtowc (&wc, str, limit - str);
2022       if (char_len == -1)
2023         {
2024           cpp_warning (pfile, "ignoring invalid multibyte character");
2025           c = *str++;
2026         }
2027       else
2028         {
2029           str += char_len;
2030           c = wc;
2031         }
2032 #else
2033       c = *str++;
2034 #endif
2035
2036       if (c == '\\')
2037         c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
2038
2039 #ifdef MAP_CHARACTER
2040       if (ISPRINT (c))
2041         c = MAP_CHARACTER (c);
2042 #endif
2043
2044       /* Merge character into result; ignore excess chars.  */
2045       if (++chars_seen <= max_chars)
2046         {
2047           if (width < HOST_BITS_PER_WIDE_INT)
2048             result = (result << width) | (c & mask);
2049           else
2050             result = c;
2051         }
2052     }
2053
2054   if (chars_seen == 0)
2055     cpp_error (pfile, "empty character constant");
2056   else if (chars_seen > max_chars)
2057     {
2058       chars_seen = max_chars;
2059       cpp_warning (pfile, "character constant too long");
2060     }
2061   else if (chars_seen > 1 && !traditional && warn_multi)
2062     cpp_warning (pfile, "multi-character character constant");
2063
2064   /* If char type is signed, sign-extend the constant.  The
2065      __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
2066   if (token->type == CPP_CHAR && chars_seen)
2067     {
2068       unsigned int nbits = chars_seen * width;
2069       unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2070
2071       if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2072           || ((result >> (nbits - 1)) & 1) == 0)
2073         result &= mask;
2074       else
2075         result |= ~mask;
2076     }
2077
2078   *pchars_seen = chars_seen;
2079   return result;
2080 }
2081
2082 /* Memory pools.  */
2083
2084 struct dummy
2085 {
2086   char c;
2087   union
2088   {
2089     double d;
2090     int *p;
2091   } u;
2092 };
2093
2094 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2095
2096 static int
2097 chunk_suitable (pool, chunk, size)
2098      cpp_pool *pool;
2099      cpp_chunk *chunk;
2100      unsigned int size;
2101 {
2102   /* Being at least twice SIZE means we can use memcpy in
2103      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
2104      anyway.  */
2105   return (chunk && pool->locked != chunk
2106           && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2107 }
2108
2109 /* Returns the end of the new pool.  PTR points to a char in the old
2110    pool, and is updated to point to the same char in the new pool.  */
2111 unsigned char *
2112 _cpp_next_chunk (pool, len, ptr)
2113      cpp_pool *pool;
2114      unsigned int len;
2115      unsigned char **ptr;
2116 {
2117   cpp_chunk *chunk = pool->cur->next;
2118
2119   /* LEN is the minimum size we want in the new pool.  */
2120   len += POOL_ROOM (pool);
2121   if (! chunk_suitable (pool, chunk, len))
2122     {
2123       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2124
2125       chunk->next = pool->cur->next;
2126       pool->cur->next = chunk;
2127     }
2128
2129   /* Update the pointer before changing chunk's front.  */
2130   if (ptr)
2131     *ptr += chunk->base - POOL_FRONT (pool);
2132
2133   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2134   chunk->front = chunk->base;
2135
2136   pool->cur = chunk;
2137   return POOL_LIMIT (pool);
2138 }
2139
2140 static cpp_chunk *
2141 new_chunk (size)
2142      unsigned int size;
2143 {
2144   unsigned char *base;
2145   cpp_chunk *result;
2146
2147   size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2148   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2149   /* Put the chunk descriptor at the end.  Then chunk overruns will
2150      cause obvious chaos.  */
2151   result = (cpp_chunk *) (base + size);
2152   result->base = base;
2153   result->front = base;
2154   result->limit = base + size;
2155   result->next = 0;
2156
2157   return result;
2158 }
2159
2160 void
2161 _cpp_init_pool (pool, size, align, temp)
2162      cpp_pool *pool;
2163      unsigned int size, align, temp;
2164 {
2165   if (align == 0)
2166     align = DEFAULT_ALIGNMENT;
2167   if (align & (align - 1))
2168     abort ();
2169   pool->align = align;
2170   pool->first = new_chunk (size);
2171   pool->cur = pool->first;
2172   pool->locked = 0;
2173   pool->locks = 0;
2174   if (temp)
2175     pool->cur->next = pool->cur;
2176 }
2177
2178 void
2179 _cpp_lock_pool (pool)
2180      cpp_pool *pool;
2181 {
2182   if (pool->locks++ == 0)
2183     pool->locked = pool->cur;
2184 }
2185
2186 void
2187 _cpp_unlock_pool (pool)
2188      cpp_pool *pool;
2189 {
2190   if (--pool->locks == 0)
2191     pool->locked = 0;
2192 }
2193
2194 void
2195 _cpp_free_pool (pool)
2196      cpp_pool *pool;
2197 {
2198   cpp_chunk *chunk = pool->first, *next;
2199
2200   do
2201     {
2202       next = chunk->next;
2203       free (chunk->base);
2204       chunk = next;
2205     }
2206   while (chunk && chunk != pool->first);
2207 }
2208
2209 /* Reserve LEN bytes from a memory pool.  */
2210 unsigned char *
2211 _cpp_pool_reserve (pool, len)
2212      cpp_pool *pool;
2213      unsigned int len;
2214 {
2215   len = POOL_ALIGN (len, pool->align);
2216   if (len > (unsigned int) POOL_ROOM (pool))
2217     _cpp_next_chunk (pool, len, 0);
2218
2219   return POOL_FRONT (pool);
2220 }
2221
2222 /* Allocate LEN bytes from a memory pool.  */
2223 unsigned char *
2224 _cpp_pool_alloc (pool, len)
2225      cpp_pool *pool;
2226      unsigned int len;
2227 {
2228   unsigned char *result = _cpp_pool_reserve (pool, len);
2229
2230   POOL_COMMIT (pool, len);
2231   return result;
2232 }