gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41
  42 /* MULTIBYTE_CHARS support only works for native compilers.
  43    ??? Ideally what we want is to model widechar support after
  44    the current floating point support.  */
  45 #ifdef CROSS_COMPILE
  46 #undef MULTIBYTE_CHARS
  47 #endif
  48
  49 #ifdef MULTIBYTE_CHARS
  50 #include "mbchar.h"
  51 #include <locale.h>
  52 #endif
  53
  54 /* Tokens with SPELL_STRING store their spelling in the token list,
  55    and it's length in the token->val.name.len.  */
  56 enum spell_type
  57 {
  58   SPELL_OPERATOR = 0,
  59   SPELL_CHAR,
  60   SPELL_IDENT,
  61   SPELL_STRING,
  62   SPELL_NONE
  63 };
  64
  65 struct token_spelling
  66 {
  67   enum spell_type category;
  68   const unsigned char *name;
  69 };
  70
  71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  72                                              U":>", U"<%", U"%>"};
  73
  74 #define OP(e, s) { SPELL_OPERATOR, U s           },
  75 #define TK(e, s) { s,              U STRINGX (e) },
  76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  77 #undef OP
  78 #undef TK
  79
  80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  82
  83 static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
  84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
  85 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  86
  87 static int skip_block_comment PARAMS ((cpp_reader *));
  88 static int skip_line_comment PARAMS ((cpp_reader *));
  89 static void adjust_column PARAMS ((cpp_reader *));
  90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  92 static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
  93                                                     const U_CHAR *));
  94 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  95 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  96 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  97 static void unterminated PARAMS ((cpp_reader *, int));
  98 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  99 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
 100 static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
 101 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
 102 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
 103 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
 104                                    const unsigned char *, unsigned int *));
 105 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
 106
 107 static unsigned int hex_digit_value PARAMS ((unsigned int));
 108 static _cpp_buff *new_buff PARAMS ((size_t));
 109
 110 /* Utility routine:
 111
 112    Compares, the token TOKEN to the NUL-terminated string STRING.
 113    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 114
 115 int
 116 cpp_ideq (token, string)
 117      const cpp_token *token;
 118      const char *string;
 119 {
 120   if (token->type != CPP_NAME)
 121     return 0;
 122
 123   return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
 124 }
 125
 126 /* Call when meeting a newline.  Returns the character after the newline
 127    (or carriage-return newline combination), or EOF.  */
 128 static cppchar_t
 129 handle_newline (pfile, newline_char)
 130      cpp_reader *pfile;
 131      cppchar_t newline_char;
 132 {
 133   cpp_buffer *buffer;
 134   cppchar_t next = EOF;
 135
 136   pfile->line++;
 137   buffer = pfile->buffer;
 138   buffer->col_adjust = 0;
 139   buffer->line_base = buffer->cur;
 140
 141   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 142   if (buffer->cur < buffer->rlimit)
 143     {
 144       next = *buffer->cur++;
 145       if (next + newline_char == '\r' + '\n')
 146         {
 147           buffer->line_base = buffer->cur;
 148           if (buffer->cur < buffer->rlimit)
 149             next = *buffer->cur++;
 150           else
 151             next = EOF;
 152         }
 153     }
 154
 155   buffer->read_ahead = next;
 156   return next;
 157 }
 158
 159 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 160    encountered.  It warns if necessary, and returns true if the
 161    trigraph should be honoured.  FROM_CHAR is the third character of a
 162    trigraph, and presumed to be the previous character for position
 163    reporting.  */
 164 static int
 165 trigraph_ok (pfile, from_char)
 166      cpp_reader *pfile;
 167      cppchar_t from_char;
 168 {
 169   int accept = CPP_OPTION (pfile, trigraphs);
 170
 171   /* Don't warn about trigraphs in comments.  */
 172   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 173     {
 174       cpp_buffer *buffer = pfile->buffer;
 175
 176       if (accept)
 177         cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
 178                                "trigraph ??%c converted to %c",
 179                                (int) from_char,
 180                                (int) _cpp_trigraph_map[from_char]);
 181       else if (buffer->cur != buffer->last_Wtrigraphs)
 182         {
 183           buffer->last_Wtrigraphs = buffer->cur;
 184           cpp_warning_with_line (pfile, pfile->line,
 185                                  CPP_BUF_COL (buffer) - 2,
 186                                  "trigraph ??%c ignored", (int) from_char);
 187         }
 188     }
 189
 190   return accept;
 191 }
 192
 193 /* Assumes local variables buffer and result.  */
 194 #define ACCEPT_CHAR(t) \
 195   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 196
 197 /* When we move to multibyte character sets, add to these something
 198    that saves and restores the state of the multibyte conversion
 199    library.  This probably involves saving and restoring a "cookie".
 200    In the case of glibc it is an 8-byte structure, so is not a high
 201    overhead operation.  In any case, it's out of the fast path.  */
 202 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 203 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 204
 205 /* Skips any escaped newlines introduced by NEXT, which is either a
 206    '?' or a '\\'.  Returns the next character, which will also have
 207    been placed in buffer->read_ahead.  This routine performs
 208    preprocessing stages 1 and 2 of the ISO C standard.  */
 209 static cppchar_t
 210 skip_escaped_newlines (pfile, next)
 211      cpp_reader *pfile;
 212      cppchar_t next;
 213 {
 214   cpp_buffer *buffer = pfile->buffer;
 215
 216   /* Only do this if we apply stages 1 and 2.  */
 217   if (!buffer->from_stage3)
 218     {
 219       cppchar_t next1;
 220       const unsigned char *saved_cur;
 221       int space;
 222
 223       do
 224         {
 225           if (buffer->cur == buffer->rlimit)
 226             break;
 227
 228           SAVE_STATE ();
 229           if (next == '?')
 230             {
 231               next1 = *buffer->cur++;
 232               if (next1 != '?' || buffer->cur == buffer->rlimit)
 233                 {
 234                   RESTORE_STATE ();
 235                   break;
 236                 }
 237
 238               next1 = *buffer->cur++;
 239               if (!_cpp_trigraph_map[next1]
 240                   || !trigraph_ok (pfile, next1))
 241                 {
 242                   RESTORE_STATE ();
 243                   break;
 244                 }
 245
 246               /* We have a full trigraph here.  */
 247               next = _cpp_trigraph_map[next1];
 248               if (next != '\\' || buffer->cur == buffer->rlimit)
 249                 break;
 250               SAVE_STATE ();
 251             }
 252
 253           /* We have a backslash, and room for at least one more character.  */
 254           space = 0;
 255           do
 256             {
 257               next1 = *buffer->cur++;
 258               if (!is_nvspace (next1))
 259                 break;
 260               space = 1;
 261             }
 262           while (buffer->cur < buffer->rlimit);
 263
 264           if (!is_vspace (next1))
 265             {
 266               RESTORE_STATE ();
 267               break;
 268             }
 269
 270           if (space && !pfile->state.lexing_comment)
 271             cpp_warning (pfile, "backslash and newline separated by space");
 272
 273           next = handle_newline (pfile, next1);
 274           if (next == EOF)
 275             cpp_pedwarn (pfile, "backslash-newline at end of file");
 276         }
 277       while (next == '\\' || next == '?');
 278     }
 279
 280   buffer->read_ahead = next;
 281   return next;
 282 }
 283
 284 /* Obtain the next character, after trigraph conversion and skipping
 285    an arbitrary string of escaped newlines.  The common case of no
 286    trigraphs or escaped newlines falls through quickly.  */
 287 static cppchar_t
 288 get_effective_char (pfile)
 289      cpp_reader *pfile;
 290 {
 291   cpp_buffer *buffer = pfile->buffer;
 292   cppchar_t next = EOF;
 293
 294   if (buffer->cur < buffer->rlimit)
 295     {
 296       next = *buffer->cur++;
 297
 298       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 299          can introduce escaped newlines, which we want to skip, or
 300          UCNs, which, depending upon lexer state, we will handle in
 301          the future.  */
 302       if (next == '?' || next == '\\')
 303         next = skip_escaped_newlines (pfile, next);
 304     }
 305
 306   buffer->read_ahead = next;
 307   return next;
 308 }
 309
 310 /* Skip a C-style block comment.  We find the end of the comment by
 311    seeing if an asterisk is before every '/' we encounter.  Returns
 312    non-zero if comment terminated by EOF, zero otherwise.  */
 313 static int
 314 skip_block_comment (pfile)
 315      cpp_reader *pfile;
 316 {
 317   cpp_buffer *buffer = pfile->buffer;
 318   cppchar_t c = EOF, prevc = EOF;
 319
 320   pfile->state.lexing_comment = 1;
 321   while (buffer->cur != buffer->rlimit)
 322     {
 323       prevc = c, c = *buffer->cur++;
 324
 325     next_char:
 326       /* FIXME: For speed, create a new character class of characters
 327          of interest inside block comments.  */
 328       if (c == '?' || c == '\\')
 329         c = skip_escaped_newlines (pfile, c);
 330
 331       /* People like decorating comments with '*', so check for '/'
 332          instead for efficiency.  */
 333       if (c == '/')
 334         {
 335           if (prevc == '*')
 336             break;
 337
 338           /* Warn about potential nested comments, but not if the '/'
 339              comes immediately before the true comment delimeter.
 340              Don't bother to get it right across escaped newlines.  */
 341           if (CPP_OPTION (pfile, warn_comments)
 342               && buffer->cur != buffer->rlimit)
 343             {
 344               prevc = c, c = *buffer->cur++;
 345               if (c == '*' && buffer->cur != buffer->rlimit)
 346                 {
 347                   prevc = c, c = *buffer->cur++;
 348                   if (c != '/')
 349                     cpp_warning_with_line (pfile, pfile->line,
 350                                            CPP_BUF_COL (buffer) - 2,
 351                                            "\"/*\" within comment");
 352                 }
 353               goto next_char;
 354             }
 355         }
 356       else if (is_vspace (c))
 357         {
 358           prevc = c, c = handle_newline (pfile, c);
 359           goto next_char;
 360         }
 361       else if (c == '\t')
 362         adjust_column (pfile);
 363     }
 364
 365   pfile->state.lexing_comment = 0;
 366   buffer->read_ahead = EOF;
 367   return c != '/' || prevc != '*';
 368 }
 369
 370 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 371    non-zero if a multiline comment.  The following new line, if any,
 372    is left in buffer->read_ahead.  */
 373 static int
 374 skip_line_comment (pfile)
 375      cpp_reader *pfile;
 376 {
 377   cpp_buffer *buffer = pfile->buffer;
 378   unsigned int orig_line = pfile->line;
 379   cppchar_t c;
 380
 381   pfile->state.lexing_comment = 1;
 382   do
 383     {
 384       c = EOF;
 385       if (buffer->cur == buffer->rlimit)
 386         break;
 387
 388       c = *buffer->cur++;
 389       if (c == '?' || c == '\\')
 390         c = skip_escaped_newlines (pfile, c);
 391     }
 392   while (!is_vspace (c));
 393
 394   pfile->state.lexing_comment = 0;
 395   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 396   return orig_line != pfile->line;
 397 }
 398
 399 /* pfile->buffer->cur is one beyond the \t character.  Update
 400    col_adjust so we track the column correctly.  */
 401 static void
 402 adjust_column (pfile)
 403      cpp_reader *pfile;
 404 {
 405   cpp_buffer *buffer = pfile->buffer;
 406   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 407
 408   /* Round it up to multiple of the tabstop, but subtract 1 since the
 409      tab itself occupies a character position.  */
 410   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 411                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 412 }
 413
 414 /* Skips whitespace, saving the next non-whitespace character.
 415    Adjusts pfile->col_adjust to account for tabs.  Without this,
 416    tokens might be assigned an incorrect column.  */
 417 static void
 418 skip_whitespace (pfile, c)
 419      cpp_reader *pfile;
 420      cppchar_t c;
 421 {
 422   cpp_buffer *buffer = pfile->buffer;
 423   unsigned int warned = 0;
 424
 425   do
 426     {
 427       /* Horizontal space always OK.  */
 428       if (c == ' ')
 429         ;
 430       else if (c == '\t')
 431         adjust_column (pfile);
 432       /* Just \f \v or \0 left.  */
 433       else if (c == '\0')
 434         {
 435           if (!warned)
 436             {
 437               cpp_warning (pfile, "null character(s) ignored");
 438               warned = 1;
 439             }
 440         }
 441       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 442         cpp_pedwarn_with_line (pfile, pfile->line,
 443                                CPP_BUF_COL (buffer),
 444                                "%s in preprocessing directive",
 445                                c == '\f' ? "form feed" : "vertical tab");
 446
 447       c = EOF;
 448       if (buffer->cur == buffer->rlimit)
 449         break;
 450       c = *buffer->cur++;
 451     }
 452   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 453   while (is_nvspace (c));
 454
 455   /* Remember the next character.  */
 456   buffer->read_ahead = c;
 457 }
 458
 459 /* See if the characters of a number token are valid in a name (no
 460    '.', '+' or '-').  */
 461 static int
 462 name_p (pfile, string)
 463      cpp_reader *pfile;
 464      const cpp_string *string;
 465 {
 466   unsigned int i;
 467
 468   for (i = 0; i < string->len; i++)
 469     if (!is_idchar (string->text[i]))
 470       return 0;
 471
 472   return 1;
 473 }
 474
 475 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 476    a critical inner loop.  The common case is an identifier which has
 477    not been split by backslash-newline, does not contain a dollar
 478    sign, and has already been scanned (roughly 10:1 ratio of
 479    seen:unseen identifiers in normal code; the distribution is
 480    Poisson-like).  Second most common case is a new identifier, not
 481    split and no dollar sign.  The other possibilities are rare and
 482    have been relegated to parse_identifier_slow.  */
 483
 484 static cpp_hashnode *
 485 parse_identifier (pfile)
 486      cpp_reader *pfile;
 487 {
 488   cpp_hashnode *result;
 489   const U_CHAR *cur, *rlimit;
 490
 491   /* Fast-path loop.  Skim over a normal identifier.
 492      N.B. ISIDNUM does not include $.  */
 493   cur    = pfile->buffer->cur - 1;
 494   rlimit = pfile->buffer->rlimit;
 495   do
 496     cur++;
 497   while (cur < rlimit && ISIDNUM (*cur));
 498
 499   /* Check for slow-path cases.  */
 500   if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
 501     result = parse_identifier_slow (pfile, cur);
 502   else
 503     {
 504       const U_CHAR *base = pfile->buffer->cur - 1;
 505       result = (cpp_hashnode *)
 506         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 507       pfile->buffer->cur = cur;
 508     }
 509
 510   /* Rarely, identifiers require diagnostics when lexed.
 511      XXX Has to be forced out of the fast path.  */
 512   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 513                         && !pfile->state.skipping, 0))
 514     {
 515       /* It is allowed to poison the same identifier twice.  */
 516       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 517         cpp_error (pfile, "attempt to use poisoned \"%s\"",
 518                    NODE_NAME (result));
 519
 520       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 521          replacement list of a variadic macro.  */
 522       if (result == pfile->spec_nodes.n__VA_ARGS__
 523           && !pfile->state.va_args_ok)
 524         cpp_pedwarn (pfile,
 525         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 526     }
 527
 528   return result;
 529 }
 530
 531 /* Slow path.  This handles identifiers which have been split, and
 532    identifiers which contain dollar signs.  The part of the identifier
 533    from PFILE->buffer->cur-1 to CUR has already been scanned.  */
 534 static cpp_hashnode *
 535 parse_identifier_slow (pfile, cur)
 536      cpp_reader *pfile;
 537      const U_CHAR *cur;
 538 {
 539   cpp_buffer *buffer = pfile->buffer;
 540   const U_CHAR *base = buffer->cur - 1;
 541   struct obstack *stack = &pfile->hash_table->stack;
 542   unsigned int c, saw_dollar = 0, len;
 543
 544   /* Copy the part of the token which is known to be okay.  */
 545   obstack_grow (stack, base, cur - base);
 546
 547   /* Now process the part which isn't.  We are looking at one of
 548      '$', '\\', or '?' on entry to this loop.  */
 549   c = *cur++;
 550   buffer->cur = cur;
 551   do
 552     {
 553       while (is_idchar (c))
 554         {
 555           obstack_1grow (stack, c);
 556
 557           if (c == '$')
 558             saw_dollar++;
 559
 560           c = EOF;
 561           if (buffer->cur == buffer->rlimit)
 562             break;
 563
 564           c = *buffer->cur++;
 565         }
 566
 567       /* Potential escaped newline?  */
 568       if (c != '?' && c != '\\')
 569         break;
 570       c = skip_escaped_newlines (pfile, c);
 571     }
 572   while (is_idchar (c));
 573
 574   /* Remember the next character.  */
 575   buffer->read_ahead = c;
 576
 577   /* $ is not a identifier character in the standard, but is commonly
 578      accepted as an extension.  Don't warn about it in skipped
 579      conditional blocks.  */
 580   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 581     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 582
 583   /* Identifiers are null-terminated.  */
 584   len = obstack_object_size (stack);
 585   obstack_1grow (stack, '\0');
 586
 587   return (cpp_hashnode *)
 588     ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
 589 }
 590
 591 /* Parse a number, skipping embedded backslash-newlines.  */
 592 static void
 593 parse_number (pfile, number, c, leading_period)
 594      cpp_reader *pfile;
 595      cpp_string *number;
 596      cppchar_t c;
 597      int leading_period;
 598 {
 599   cpp_buffer *buffer = pfile->buffer;
 600   unsigned char *dest, *limit;
 601
 602   dest = BUFF_FRONT (pfile->u_buff);
 603   limit = BUFF_LIMIT (pfile->u_buff);
 604
 605   /* Place a leading period.  */
 606   if (leading_period)
 607     {
 608       if (dest == limit)
 609         {
 610           _cpp_extend_buff (pfile, &pfile->u_buff, 1);
 611           dest = BUFF_FRONT (pfile->u_buff);
 612           limit = BUFF_LIMIT (pfile->u_buff);
 613         }
 614       *dest++ = '.';
 615     }
 616
 617   do
 618     {
 619       do
 620         {
 621           /* Need room for terminating null.  */
 622           if ((size_t) (limit - dest) < 2)
 623             {
 624               size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 625               _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 626               dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 627               limit = BUFF_LIMIT (pfile->u_buff);
 628             }
 629           *dest++ = c;
 630
 631           c = EOF;
 632           if (buffer->cur == buffer->rlimit)
 633             break;
 634
 635           c = *buffer->cur++;
 636         }
 637       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 638
 639       /* Potential escaped newline?  */
 640       if (c != '?' && c != '\\')
 641         break;
 642       c = skip_escaped_newlines (pfile, c);
 643     }
 644   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 645
 646   /* Remember the next character.  */
 647   buffer->read_ahead = c;
 648
 649   /* Null-terminate the number.  */
 650   *dest = '\0';
 651
 652   number->text = BUFF_FRONT (pfile->u_buff);
 653   number->len = dest - number->text;
 654   BUFF_FRONT (pfile->u_buff) = dest + 1;
 655 }
 656
 657 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 658 static void
 659 unterminated (pfile, term)
 660      cpp_reader *pfile;
 661      int term;
 662 {
 663   cpp_error (pfile, "missing terminating %c character", term);
 664
 665   if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
 666     {
 667       cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
 668                            "possible start of unterminated string literal");
 669       pfile->mls_line = 0;
 670     }
 671 }
 672
 673 /* Subroutine of parse_string.  */
 674 static int
 675 unescaped_terminator_p (pfile, dest)
 676      cpp_reader *pfile;
 677      const unsigned char *dest;
 678 {
 679   const unsigned char *start, *temp;
 680
 681   /* In #include-style directives, terminators are not escapeable.  */
 682   if (pfile->state.angled_headers)
 683     return 1;
 684
 685   start = BUFF_FRONT (pfile->u_buff);
 686
 687   /* An odd number of consecutive backslashes represents an escaped
 688      terminator.  */
 689   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 690     ;
 691
 692   return ((dest - temp) & 1) == 0;
 693 }
 694
 695 /* Parses a string, character constant, or angle-bracketed header file
 696    name.  Handles embedded trigraphs and escaped newlines.  The stored
 697    string is guaranteed NUL-terminated, but it is not guaranteed that
 698    this is the first NUL since embedded NULs are preserved.
 699
 700    Multi-line strings are allowed, but they are deprecated.  */
 701 static void
 702 parse_string (pfile, token, terminator)
 703      cpp_reader *pfile;
 704      cpp_token *token;
 705      cppchar_t terminator;
 706 {
 707   cpp_buffer *buffer = pfile->buffer;
 708   unsigned char *dest, *limit;
 709   cppchar_t c;
 710   bool warned_nulls = false, warned_multi = false;
 711
 712   dest = BUFF_FRONT (pfile->u_buff);
 713   limit = BUFF_LIMIT (pfile->u_buff);
 714
 715   for (;;)
 716     {
 717       if (buffer->cur == buffer->rlimit)
 718         c = EOF;
 719       else
 720         c = *buffer->cur++;
 721
 722     have_char:
 723       /* We need space for the terminating NUL.  */
 724       if ((size_t) (limit - dest) < 1)
 725         {
 726           size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 727           _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 728           dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 729           limit = BUFF_LIMIT (pfile->u_buff);
 730         }
 731
 732       if (c == EOF)
 733         {
 734           unterminated (pfile, terminator);
 735           break;
 736         }
 737
 738       /* Handle trigraphs, escaped newlines etc.  */
 739       if (c == '?' || c == '\\')
 740         c = skip_escaped_newlines (pfile, c);
 741
 742       if (c == terminator && unescaped_terminator_p (pfile, dest))
 743         {
 744           c = EOF;
 745           break;
 746         }
 747       else if (is_vspace (c))
 748         {
 749           /* In assembly language, silently terminate string and
 750              character literals at end of line.  This is a kludge
 751              around not knowing where comments are.  */
 752           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 753             break;
 754
 755           /* Character constants and header names may not extend over
 756              multiple lines.  In Standard C, neither may strings.
 757              Unfortunately, we accept multiline strings as an
 758              extension, except in #include family directives.  */
 759           if (terminator != '"' || pfile->state.angled_headers)
 760             {
 761               unterminated (pfile, terminator);
 762               break;
 763             }
 764
 765           if (!warned_multi)
 766             {
 767               warned_multi = true;
 768               cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 769             }
 770
 771           if (pfile->mls_line == 0)
 772             {
 773               pfile->mls_line = token->line;
 774               pfile->mls_col = token->col;
 775             }
 776
 777           c = handle_newline (pfile, c);
 778           *dest++ = '\n';
 779           goto have_char;
 780         }
 781       else if (c == '\0' && !warned_nulls)
 782         {
 783           warned_nulls = true;
 784           cpp_warning (pfile, "null character(s) preserved in literal");
 785         }
 786
 787       *dest++ = c;
 788     }
 789
 790   /* Remember the next character.  */
 791   buffer->read_ahead = c;
 792   *dest = '\0';
 793
 794   token->val.str.text = BUFF_FRONT (pfile->u_buff);
 795   token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
 796   BUFF_FRONT (pfile->u_buff) = dest + 1;
 797 }
 798
 799 /* The stored comment includes the comment start and any terminator.  */
 800 static void
 801 save_comment (pfile, token, from)
 802      cpp_reader *pfile;
 803      cpp_token *token;
 804      const unsigned char *from;
 805 {
 806   unsigned char *buffer;
 807   unsigned int len;
 808
 809   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 810   /* C++ comments probably (not definitely) have moved past a new
 811      line, which we don't want to save in the comment.  */
 812   if (pfile->buffer->read_ahead != EOF)
 813     len--;
 814   buffer = _cpp_unaligned_alloc (pfile, len);
 815
 816   token->type = CPP_COMMENT;
 817   token->val.str.len = len;
 818   token->val.str.text = buffer;
 819
 820   buffer[0] = '/';
 821   memcpy (buffer + 1, from, len - 1);
 822 }
 823
 824 /* Subroutine of _cpp_lex_direct to handle '%'.  A little tricky, since we
 825    want to avoid stepping back when lexing %:%X.  */
 826 static void
 827 lex_percent (pfile, result)
 828      cpp_reader *pfile;
 829      cpp_token *result;
 830 {
 831   cpp_buffer *buffer= pfile->buffer;
 832   cppchar_t c;
 833
 834   result->type = CPP_MOD;
 835   /* Parsing %:%X could leave an extra character.  */
 836   if (buffer->extra_char == EOF)
 837     c = get_effective_char (pfile);
 838   else
 839     {
 840       c = buffer->read_ahead = buffer->extra_char;
 841       buffer->extra_char = EOF;
 842     }
 843
 844   if (c == '=')
 845     ACCEPT_CHAR (CPP_MOD_EQ);
 846   else if (CPP_OPTION (pfile, digraphs))
 847     {
 848       if (c == ':')
 849         {
 850           result->flags |= DIGRAPH;
 851           ACCEPT_CHAR (CPP_HASH);
 852           if (get_effective_char (pfile) == '%')
 853             {
 854               buffer->extra_char = get_effective_char (pfile);
 855               if (buffer->extra_char == ':')
 856                 {
 857                   buffer->extra_char = EOF;
 858                   ACCEPT_CHAR (CPP_PASTE);
 859                 }
 860               else
 861                 /* We'll catch the extra_char when we're called back.  */
 862                 buffer->read_ahead = '%';
 863             }
 864         }
 865       else if (c == '>')
 866         {
 867           result->flags |= DIGRAPH;
 868           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 869         }
 870     }
 871 }
 872
 873 /* Subroutine of _cpp_lex_direct to handle '.'.  This is tricky, since we
 874    want to avoid stepping back when lexing '...' or '.123'.  In the
 875    latter case we should also set a flag for parse_number.  */
 876 static void
 877 lex_dot (pfile, result)
 878      cpp_reader *pfile;
 879      cpp_token *result;
 880 {
 881   cpp_buffer *buffer = pfile->buffer;
 882   cppchar_t c;
 883
 884   /* Parsing ..X could leave an extra character.  */
 885   if (buffer->extra_char == EOF)
 886     c = get_effective_char (pfile);
 887   else
 888     {
 889       c = buffer->read_ahead = buffer->extra_char;
 890       buffer->extra_char = EOF;
 891     }
 892
 893   /* All known character sets have 0...9 contiguous.  */
 894   if (c >= '0' && c <= '9')
 895     {
 896       result->type = CPP_NUMBER;
 897       parse_number (pfile, &result->val.str, c, 1);
 898     }
 899   else
 900     {
 901       result->type = CPP_DOT;
 902       if (c == '.')
 903         {
 904           buffer->extra_char = get_effective_char (pfile);
 905           if (buffer->extra_char == '.')
 906             {
 907               buffer->extra_char = EOF;
 908               ACCEPT_CHAR (CPP_ELLIPSIS);
 909             }
 910           else
 911             /* We'll catch the extra_char when we're called back.  */
 912             buffer->read_ahead = '.';
 913         }
 914       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 915         ACCEPT_CHAR (CPP_DOT_STAR);
 916     }
 917 }
 918
 919 /* Allocate COUNT tokens for RUN.  */
 920 void
 921 _cpp_init_tokenrun (run, count)
 922      tokenrun *run;
 923      unsigned int count;
 924 {
 925   run->base = xnewvec (cpp_token, count);
 926   run->limit = run->base + count;
 927   run->next = NULL;
 928 }
 929
 930 /* Returns the next tokenrun, or creates one if there is none.  */
 931 static tokenrun *
 932 next_tokenrun (run)
 933      tokenrun *run;
 934 {
 935   if (run->next == NULL)
 936     {
 937       run->next = xnew (tokenrun);
 938       run->next->prev = run;
 939       _cpp_init_tokenrun (run->next, 250);
 940     }
 941
 942   return run->next;
 943 }
 944
 945 /* Allocate a single token that is invalidated at the same time as the
 946    rest of the tokens on the line.  Has its line and col set to the
 947    same as the last lexed token, so that diagnostics appear in the
 948    right place.  */
 949 cpp_token *
 950 _cpp_temp_token (pfile)
 951      cpp_reader *pfile;
 952 {
 953   cpp_token *old, *result;
 954
 955   old = pfile->cur_token - 1;
 956   if (pfile->cur_token == pfile->cur_run->limit)
 957     {
 958       pfile->cur_run = next_tokenrun (pfile->cur_run);
 959       pfile->cur_token = pfile->cur_run->base;
 960     }
 961
 962   result = pfile->cur_token++;
 963   result->line = old->line;
 964   result->col = old->col;
 965   return result;
 966 }
 967
 968 /* Lex a token into RESULT (external interface).  Takes care of issues
 969    like directive handling, token lookahead, multiple include
 970    opimisation and skipping.  */
 971 const cpp_token *
 972 _cpp_lex_token (pfile)
 973      cpp_reader *pfile;
 974 {
 975   cpp_token *result;
 976
 977   for (;;)
 978     {
 979       if (pfile->cur_token == pfile->cur_run->limit)
 980         {
 981           pfile->cur_run = next_tokenrun (pfile->cur_run);
 982           pfile->cur_token = pfile->cur_run->base;
 983         }
 984
 985       if (pfile->lookaheads)
 986         {
 987           pfile->lookaheads--;
 988           result = pfile->cur_token++;
 989         }
 990       else
 991         result = _cpp_lex_direct (pfile);
 992
 993       if (result->flags & BOL)
 994         {
 995           /* Is this a directive.  If _cpp_handle_directive returns
 996              false, it is an assembler #.  */
 997           if (result->type == CPP_HASH
 998               && !pfile->state.parsing_args
 999               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1000             continue;
1001           if (pfile->cb.line_change && !pfile->state.skipping)
1002             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
1003         }
1004
1005       /* We don't skip tokens in directives.  */
1006       if (pfile->state.in_directive)
1007         break;
1008
1009       /* Outside a directive, invalidate controlling macros.  At file
1010          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1011          get here and MI optimisation works.  */
1012       pfile->mi_valid = false;
1013
1014       if (!pfile->state.skipping || result->type == CPP_EOF)
1015         break;
1016     }
1017
1018   return result;
1019 }
1020
1021 /* Lex a token into pfile->cur_token, which is also incremented, to
1022    get diagnostics pointing to the correct location.
1023
1024    Does not handle issues such as token lookahead, multiple-include
1025    optimisation, directives, skipping etc.  This function is only
1026    suitable for use by _cpp_lex_token, and in special cases like
1027    lex_expansion_token which doesn't care for any of these issues.
1028
1029    When meeting a newline, returns CPP_EOF if parsing a directive,
1030    otherwise returns to the start of the token buffer if permissible.
1031    Returns the location of the lexed token.  */
1032 cpp_token *
1033 _cpp_lex_direct (pfile)
1034      cpp_reader *pfile;
1035 {
1036   cppchar_t c;
1037   cpp_buffer *buffer;
1038   const unsigned char *comment_start;
1039   cpp_token *result = pfile->cur_token++;
1040
1041  fresh_line:
1042   buffer = pfile->buffer;
1043   result->flags = buffer->saved_flags;
1044   buffer->saved_flags = 0;
1045  update_tokens_line:
1046   result->line = pfile->line;
1047
1048  skipped_white:
1049   c = buffer->read_ahead;
1050   if (c == EOF && buffer->cur < buffer->rlimit)
1051     c = *buffer->cur++;
1052   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1053   buffer->read_ahead = EOF;
1054
1055  trigraph:
1056   switch (c)
1057     {
1058     case EOF:
1059       buffer->saved_flags = BOL;
1060       if (!pfile->state.parsing_args && !pfile->state.in_directive)
1061         {
1062           if (buffer->cur != buffer->line_base)
1063             {
1064               /* Non-empty files should end in a newline.  Don't warn
1065                  for command line and _Pragma buffers.  */
1066               if (!buffer->from_stage3)
1067                 cpp_pedwarn (pfile, "no newline at end of file");
1068               handle_newline (pfile, '\n');
1069             }
1070
1071           /* Don't pop the last buffer.  */
1072           if (buffer->prev)
1073             {
1074               unsigned char stop = buffer->return_at_eof;
1075
1076               _cpp_pop_buffer (pfile);
1077               if (!stop)
1078                 goto fresh_line;
1079             }
1080         }
1081       result->type = CPP_EOF;
1082       break;
1083
1084     case ' ': case '\t': case '\f': case '\v': case '\0':
1085       skip_whitespace (pfile, c);
1086       result->flags |= PREV_WHITE;
1087       goto skipped_white;
1088
1089     case '\n': case '\r':
1090       handle_newline (pfile, c);
1091       buffer->saved_flags = BOL;
1092       if (! pfile->state.in_directive)
1093         {
1094           if (pfile->state.parsing_args == 2)
1095             buffer->saved_flags |= PREV_WHITE;
1096           if (!pfile->keep_tokens)
1097             {
1098               pfile->cur_run = &pfile->base_run;
1099               result = pfile->base_run.base;
1100               pfile->cur_token = result + 1;
1101             }
1102           goto fresh_line;
1103         }
1104       result->type = CPP_EOF;
1105       break;
1106
1107     case '?':
1108     case '\\':
1109       /* These could start an escaped newline, or '?' a trigraph.  Let
1110          skip_escaped_newlines do all the work.  */
1111       {
1112         unsigned int line = pfile->line;
1113
1114         c = skip_escaped_newlines (pfile, c);
1115         if (line != pfile->line)
1116           /* We had at least one escaped newline of some sort, and the
1117              next character is in buffer->read_ahead.  Update the
1118              token's line and column.  */
1119             goto update_tokens_line;
1120
1121         /* We are either the original '?' or '\\', or a trigraph.  */
1122         result->type = CPP_QUERY;
1123         buffer->read_ahead = EOF;
1124         if (c == '\\')
1125           goto random_char;
1126         else if (c != '?')
1127           goto trigraph;
1128       }
1129       break;
1130
1131     case '0': case '1': case '2': case '3': case '4':
1132     case '5': case '6': case '7': case '8': case '9':
1133       result->type = CPP_NUMBER;
1134       parse_number (pfile, &result->val.str, c, 0);
1135       break;
1136
1137     case '$':
1138       if (!CPP_OPTION (pfile, dollars_in_ident))
1139         goto random_char;
1140       /* Fall through...  */
1141
1142     case '_':
1143     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1144     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1145     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1146     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1147     case 'y': case 'z':
1148     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1149     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1150     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1151     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1152     case 'Y': case 'Z':
1153       result->type = CPP_NAME;
1154       result->val.node = parse_identifier (pfile);
1155
1156       /* 'L' may introduce wide characters or strings.  */
1157       if (result->val.node == pfile->spec_nodes.n_L)
1158         {
1159           c = buffer->read_ahead;
1160           if (c == EOF && buffer->cur < buffer->rlimit)
1161             c = *buffer->cur;
1162           if (c == '\'' || c == '"')
1163             {
1164               buffer->cur++;
1165               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1166               goto make_string;
1167             }
1168         }
1169       /* Convert named operators to their proper types.  */
1170       else if (result->val.node->flags & NODE_OPERATOR)
1171         {
1172           result->flags |= NAMED_OP;
1173           result->type = result->val.node->value.operator;
1174         }
1175       break;
1176
1177     case '\'':
1178     case '"':
1179       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1180     make_string:
1181       parse_string (pfile, result, c);
1182       break;
1183
1184     case '/':
1185       /* A potential block or line comment.  */
1186       comment_start = buffer->cur;
1187       result->type = CPP_DIV;
1188       c = get_effective_char (pfile);
1189       if (c == '=')
1190         ACCEPT_CHAR (CPP_DIV_EQ);
1191       if (c != '/' && c != '*')
1192         break;
1193
1194       if (c == '*')
1195         {
1196           if (skip_block_comment (pfile))
1197             cpp_error (pfile, "unterminated comment");
1198         }
1199       else
1200         {
1201           if (!CPP_OPTION (pfile, cplusplus_comments)
1202               && !CPP_IN_SYSTEM_HEADER (pfile))
1203             break;
1204
1205           /* Warn about comments only if pedantically GNUC89, and not
1206              in system headers.  */
1207           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1208               && ! buffer->warned_cplusplus_comments)
1209             {
1210               cpp_pedwarn (pfile,
1211                            "C++ style comments are not allowed in ISO C89");
1212               cpp_pedwarn (pfile,
1213                            "(this will be reported only once per input file)");
1214               buffer->warned_cplusplus_comments = 1;
1215             }
1216
1217           /* Skip_line_comment updates buffer->read_ahead.  */
1218           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1219             cpp_warning (pfile, "multi-line comment");
1220         }
1221
1222       /* Skipping the comment has updated buffer->read_ahead.  */
1223       if (!pfile->state.save_comments)
1224         {
1225           result->flags |= PREV_WHITE;
1226           goto update_tokens_line;
1227         }
1228
1229       /* Save the comment as a token in its own right.  */
1230       save_comment (pfile, result, comment_start);
1231       break;
1232
1233     case '<':
1234       if (pfile->state.angled_headers)
1235         {
1236           result->type = CPP_HEADER_NAME;
1237           c = '>';              /* terminator.  */
1238           goto make_string;
1239         }
1240
1241       result->type = CPP_LESS;
1242       c = get_effective_char (pfile);
1243       if (c == '=')
1244         ACCEPT_CHAR (CPP_LESS_EQ);
1245       else if (c == '<')
1246         {
1247           ACCEPT_CHAR (CPP_LSHIFT);
1248           if (get_effective_char (pfile) == '=')
1249             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1250         }
1251       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1252         {
1253           ACCEPT_CHAR (CPP_MIN);
1254           if (get_effective_char (pfile) == '=')
1255             ACCEPT_CHAR (CPP_MIN_EQ);
1256         }
1257       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1258         {
1259           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1260           result->flags |= DIGRAPH;
1261         }
1262       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1263         {
1264           ACCEPT_CHAR (CPP_OPEN_BRACE);
1265           result->flags |= DIGRAPH;
1266         }
1267       break;
1268
1269     case '>':
1270       result->type = CPP_GREATER;
1271       c = get_effective_char (pfile);
1272       if (c == '=')
1273         ACCEPT_CHAR (CPP_GREATER_EQ);
1274       else if (c == '>')
1275         {
1276           ACCEPT_CHAR (CPP_RSHIFT);
1277           if (get_effective_char (pfile) == '=')
1278             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1279         }
1280       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1281         {
1282           ACCEPT_CHAR (CPP_MAX);
1283           if (get_effective_char (pfile) == '=')
1284             ACCEPT_CHAR (CPP_MAX_EQ);
1285         }
1286       break;
1287
1288     case '%':
1289       lex_percent (pfile, result);
1290       break;
1291
1292     case '.':
1293       lex_dot (pfile, result);
1294       break;
1295
1296     case '+':
1297       result->type = CPP_PLUS;
1298       c = get_effective_char (pfile);
1299       if (c == '=')
1300         ACCEPT_CHAR (CPP_PLUS_EQ);
1301       else if (c == '+')
1302         ACCEPT_CHAR (CPP_PLUS_PLUS);
1303       break;
1304
1305     case '-':
1306       result->type = CPP_MINUS;
1307       c = get_effective_char (pfile);
1308       if (c == '>')
1309         {
1310           ACCEPT_CHAR (CPP_DEREF);
1311           if (CPP_OPTION (pfile, cplusplus)
1312               && get_effective_char (pfile) == '*')
1313             ACCEPT_CHAR (CPP_DEREF_STAR);
1314         }
1315       else if (c == '=')
1316         ACCEPT_CHAR (CPP_MINUS_EQ);
1317       else if (c == '-')
1318         ACCEPT_CHAR (CPP_MINUS_MINUS);
1319       break;
1320
1321     case '*':
1322       result->type = CPP_MULT;
1323       if (get_effective_char (pfile) == '=')
1324         ACCEPT_CHAR (CPP_MULT_EQ);
1325       break;
1326
1327     case '=':
1328       result->type = CPP_EQ;
1329       if (get_effective_char (pfile) == '=')
1330         ACCEPT_CHAR (CPP_EQ_EQ);
1331       break;
1332
1333     case '!':
1334       result->type = CPP_NOT;
1335       if (get_effective_char (pfile) == '=')
1336         ACCEPT_CHAR (CPP_NOT_EQ);
1337       break;
1338
1339     case '&':
1340       result->type = CPP_AND;
1341       c = get_effective_char (pfile);
1342       if (c == '=')
1343         ACCEPT_CHAR (CPP_AND_EQ);
1344       else if (c == '&')
1345         ACCEPT_CHAR (CPP_AND_AND);
1346       break;
1347
1348     case '#':
1349       result->type = CPP_HASH;
1350       if (get_effective_char (pfile) == '#')
1351           ACCEPT_CHAR (CPP_PASTE);
1352       break;
1353
1354     case '|':
1355       result->type = CPP_OR;
1356       c = get_effective_char (pfile);
1357       if (c == '=')
1358         ACCEPT_CHAR (CPP_OR_EQ);
1359       else if (c == '|')
1360         ACCEPT_CHAR (CPP_OR_OR);
1361       break;
1362
1363     case '^':
1364       result->type = CPP_XOR;
1365       if (get_effective_char (pfile) == '=')
1366         ACCEPT_CHAR (CPP_XOR_EQ);
1367       break;
1368
1369     case ':':
1370       result->type = CPP_COLON;
1371       c = get_effective_char (pfile);
1372       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1373         ACCEPT_CHAR (CPP_SCOPE);
1374       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1375         {
1376           result->flags |= DIGRAPH;
1377           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1378         }
1379       break;
1380
1381     case '~': result->type = CPP_COMPL; break;
1382     case ',': result->type = CPP_COMMA; break;
1383     case '(': result->type = CPP_OPEN_PAREN; break;
1384     case ')': result->type = CPP_CLOSE_PAREN; break;
1385     case '[': result->type = CPP_OPEN_SQUARE; break;
1386     case ']': result->type = CPP_CLOSE_SQUARE; break;
1387     case '{': result->type = CPP_OPEN_BRACE; break;
1388     case '}': result->type = CPP_CLOSE_BRACE; break;
1389     case ';': result->type = CPP_SEMICOLON; break;
1390
1391       /* @ is a punctuator in Objective C.  */
1392     case '@': result->type = CPP_ATSIGN; break;
1393
1394     random_char:
1395     default:
1396       result->type = CPP_OTHER;
1397       result->val.c = c;
1398       break;
1399     }
1400
1401   return result;
1402 }
1403
1404 /* An upper bound on the number of bytes needed to spell a token,
1405    including preceding whitespace.  */
1406 unsigned int
1407 cpp_token_len (token)
1408      const cpp_token *token;
1409 {
1410   unsigned int len;
1411
1412   switch (TOKEN_SPELL (token))
1413     {
1414     default:            len = 0;                                break;
1415     case SPELL_STRING:  len = token->val.str.len;               break;
1416     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1417     }
1418   /* 1 for whitespace, 4 for comment delimeters.  */
1419   return len + 5;
1420 }
1421
1422 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1423    already contain the enough space to hold the token's spelling.
1424    Returns a pointer to the character after the last character
1425    written.  */
1426 unsigned char *
1427 cpp_spell_token (pfile, token, buffer)
1428      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1429      const cpp_token *token;
1430      unsigned char *buffer;
1431 {
1432   switch (TOKEN_SPELL (token))
1433     {
1434     case SPELL_OPERATOR:
1435       {
1436         const unsigned char *spelling;
1437         unsigned char c;
1438
1439         if (token->flags & DIGRAPH)
1440           spelling
1441             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1442         else if (token->flags & NAMED_OP)
1443           goto spell_ident;
1444         else
1445           spelling = TOKEN_NAME (token);
1446
1447         while ((c = *spelling++) != '\0')
1448           *buffer++ = c;
1449       }
1450       break;
1451
1452     case SPELL_IDENT:
1453       spell_ident:
1454       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1455       buffer += NODE_LEN (token->val.node);
1456       break;
1457
1458     case SPELL_STRING:
1459       {
1460         int left, right, tag;
1461         switch (token->type)
1462           {
1463           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1464           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1465           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1466           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1467           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1468           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1469           }
1470         if (tag) *buffer++ = tag;
1471         if (left) *buffer++ = left;
1472         memcpy (buffer, token->val.str.text, token->val.str.len);
1473         buffer += token->val.str.len;
1474         if (right) *buffer++ = right;
1475       }
1476       break;
1477
1478     case SPELL_CHAR:
1479       *buffer++ = token->val.c;
1480       break;
1481
1482     case SPELL_NONE:
1483       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1484       break;
1485     }
1486
1487   return buffer;
1488 }
1489
1490 /* Returns a token as a null-terminated string.  The string is
1491    temporary, and automatically freed later.  Useful for diagnostics.  */
1492 unsigned char *
1493 cpp_token_as_text (pfile, token)
1494      cpp_reader *pfile;
1495      const cpp_token *token;
1496 {
1497   unsigned int len = cpp_token_len (token);
1498   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1499
1500   end = cpp_spell_token (pfile, token, start);
1501   end[0] = '\0';
1502
1503   return start;
1504 }
1505
1506 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1507 const char *
1508 cpp_type2name (type)
1509      enum cpp_ttype type;
1510 {
1511   return (const char *) token_spellings[type].name;
1512 }
1513
1514 /* Writes the spelling of token to FP, without any preceding space.
1515    Separated from cpp_spell_token for efficiency - to avoid stdio
1516    double-buffering.  */
1517 void
1518 cpp_output_token (token, fp)
1519      const cpp_token *token;
1520      FILE *fp;
1521 {
1522   switch (TOKEN_SPELL (token))
1523     {
1524     case SPELL_OPERATOR:
1525       {
1526         const unsigned char *spelling;
1527         int c;
1528
1529         if (token->flags & DIGRAPH)
1530           spelling
1531             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1532         else if (token->flags & NAMED_OP)
1533           goto spell_ident;
1534         else
1535           spelling = TOKEN_NAME (token);
1536
1537         c = *spelling;
1538         do
1539           putc (c, fp);
1540         while ((c = *++spelling) != '\0');
1541       }
1542       break;
1543
1544     spell_ident:
1545     case SPELL_IDENT:
1546       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1547     break;
1548
1549     case SPELL_STRING:
1550       {
1551         int left, right, tag;
1552         switch (token->type)
1553           {
1554           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1555           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1556           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1557           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1558           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1559           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1560           }
1561         if (tag) putc (tag, fp);
1562         if (left) putc (left, fp);
1563         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1564         if (right) putc (right, fp);
1565       }
1566       break;
1567
1568     case SPELL_CHAR:
1569       putc (token->val.c, fp);
1570       break;
1571
1572     case SPELL_NONE:
1573       /* An error, most probably.  */
1574       break;
1575     }
1576 }
1577
1578 /* Compare two tokens.  */
1579 int
1580 _cpp_equiv_tokens (a, b)
1581      const cpp_token *a, *b;
1582 {
1583   if (a->type == b->type && a->flags == b->flags)
1584     switch (TOKEN_SPELL (a))
1585       {
1586       default:                  /* Keep compiler happy.  */
1587       case SPELL_OPERATOR:
1588         return 1;
1589       case SPELL_CHAR:
1590         return a->val.c == b->val.c; /* Character.  */
1591       case SPELL_NONE:
1592         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1593       case SPELL_IDENT:
1594         return a->val.node == b->val.node;
1595       case SPELL_STRING:
1596         return (a->val.str.len == b->val.str.len
1597                 && !memcmp (a->val.str.text, b->val.str.text,
1598                             a->val.str.len));
1599       }
1600
1601   return 0;
1602 }
1603
1604 /* Returns nonzero if a space should be inserted to avoid an
1605    accidental token paste for output.  For simplicity, it is
1606    conservative, and occasionally advises a space where one is not
1607    needed, e.g. "." and ".2".  */
1608
1609 int
1610 cpp_avoid_paste (pfile, token1, token2)
1611      cpp_reader *pfile;
1612      const cpp_token *token1, *token2;
1613 {
1614   enum cpp_ttype a = token1->type, b = token2->type;
1615   cppchar_t c;
1616
1617   if (token1->flags & NAMED_OP)
1618     a = CPP_NAME;
1619   if (token2->flags & NAMED_OP)
1620     b = CPP_NAME;
1621
1622   c = EOF;
1623   if (token2->flags & DIGRAPH)
1624     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1625   else if (token_spellings[b].category == SPELL_OPERATOR)
1626     c = token_spellings[b].name[0];
1627
1628   /* Quickly get everything that can paste with an '='.  */
1629   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1630     return 1;
1631
1632   switch (a)
1633     {
1634     case CPP_GREATER:   return c == '>' || c == '?';
1635     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1636     case CPP_PLUS:      return c == '+';
1637     case CPP_MINUS:     return c == '-' || c == '>';
1638     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1639     case CPP_MOD:       return c == ':' || c == '>';
1640     case CPP_AND:       return c == '&';
1641     case CPP_OR:        return c == '|';
1642     case CPP_COLON:     return c == ':' || c == '>';
1643     case CPP_DEREF:     return c == '*';
1644     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1645     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1646     case CPP_NAME:      return ((b == CPP_NUMBER
1647                                  && name_p (pfile, &token2->val.str))
1648                                 || b == CPP_NAME
1649                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1650     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1651                                 || c == '.' || c == '+' || c == '-');
1652     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1653                                 && token1->val.c == '@'
1654                                 && (b == CPP_NAME || b == CPP_STRING));
1655     default:            break;
1656     }
1657
1658   return 0;
1659 }
1660
1661 /* Output all the remaining tokens on the current line, and a newline
1662    character, to FP.  Leading whitespace is removed.  If there are
1663    macros, special token padding is not performed.  */
1664 void
1665 cpp_output_line (pfile, fp)
1666      cpp_reader *pfile;
1667      FILE *fp;
1668 {
1669   const cpp_token *token;
1670
1671   token = cpp_get_token (pfile);
1672   while (token->type != CPP_EOF)
1673     {
1674       cpp_output_token (token, fp);
1675       token = cpp_get_token (pfile);
1676       if (token->flags & PREV_WHITE)
1677         putc (' ', fp);
1678     }
1679
1680   putc ('\n', fp);
1681 }
1682
1683 /* Returns the value of a hexadecimal digit.  */
1684 static unsigned int
1685 hex_digit_value (c)
1686      unsigned int c;
1687 {
1688   if (c >= 'a' && c <= 'f')
1689     return c - 'a' + 10;
1690   if (c >= 'A' && c <= 'F')
1691     return c - 'A' + 10;
1692   if (c >= '0' && c <= '9')
1693     return c - '0';
1694   abort ();
1695 }
1696
1697 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1698    failure if cpplib is not parsing C++ or C99.  Such failure is
1699    silent, and no variables are updated.  Otherwise returns 0, and
1700    warns if -Wtraditional.
1701
1702    [lex.charset]: The character designated by the universal character
1703    name \UNNNNNNNN is that character whose character short name in
1704    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1705    universal character name \uNNNN is that character whose character
1706    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1707    for a universal character name is less than 0x20 or in the range
1708    0x7F-0x9F (inclusive), or if the universal character name
1709    designates a character in the basic source character set, then the
1710    program is ill-formed.
1711
1712    We assume that wchar_t is Unicode, so we don't need to do any
1713    mapping.  Is this ever wrong?
1714
1715    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1716    LIMIT is the end of the string or charconst.  PSTR is updated to
1717    point after the UCS on return, and the UCS is written into PC.  */
1718
1719 static int
1720 maybe_read_ucs (pfile, pstr, limit, pc)
1721      cpp_reader *pfile;
1722      const unsigned char **pstr;
1723      const unsigned char *limit;
1724      unsigned int *pc;
1725 {
1726   const unsigned char *p = *pstr;
1727   unsigned int code = 0;
1728   unsigned int c = *pc, length;
1729
1730   /* Only attempt to interpret a UCS for C++ and C99.  */
1731   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1732     return 1;
1733
1734   if (CPP_WTRADITIONAL (pfile))
1735     cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1736
1737   length = (c == 'u' ? 4: 8);
1738
1739   if ((size_t) (limit - p) < length)
1740     {
1741       cpp_error (pfile, "incomplete universal-character-name");
1742       /* Skip to the end to avoid more diagnostics.  */
1743       p = limit;
1744     }
1745   else
1746     {
1747       for (; length; length--, p++)
1748         {
1749           c = *p;
1750           if (ISXDIGIT (c))
1751             code = (code << 4) + hex_digit_value (c);
1752           else
1753             {
1754               cpp_error (pfile,
1755                          "non-hex digit '%c' in universal-character-name", c);
1756               /* We shouldn't skip in case there are multibyte chars.  */
1757               break;
1758             }
1759         }
1760     }
1761
1762 #ifdef TARGET_EBCDIC
1763   cpp_error (pfile, "universal-character-name on EBCDIC target");
1764   code = 0x3f;  /* EBCDIC invalid character */
1765 #else
1766  /* True extended characters are OK.  */
1767   if (code >= 0xa0
1768       && !(code & 0x80000000)
1769       && !(code >= 0xD800 && code <= 0xDFFF))
1770     ;
1771   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1772      hex escapes so that this also works with EBCDIC hosts.  */
1773   else if (code == 0x24 || code == 0x40 || code == 0x60)
1774     ;
1775   /* Don't give another error if one occurred above.  */
1776   else if (length == 0)
1777     cpp_error (pfile, "universal-character-name out of range");
1778 #endif
1779
1780   *pstr = p;
1781   *pc = code;
1782   return 0;
1783 }
1784
1785 /* Interpret an escape sequence, and return its value.  PSTR points to
1786    the input pointer, which is just after the backslash.  LIMIT is how
1787    much text we have.  MASK is a bitmask for the precision for the
1788    destination type (char or wchar_t).  TRADITIONAL, if true, does not
1789    interpret escapes that did not exist in traditional C.
1790
1791    Handles all relevant diagnostics.  */
1792
1793 unsigned int
1794 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1795      cpp_reader *pfile;
1796      const unsigned char **pstr;
1797      const unsigned char *limit;
1798      unsigned HOST_WIDE_INT mask;
1799      int traditional;
1800 {
1801   int unknown = 0;
1802   const unsigned char *str = *pstr;
1803   unsigned int c = *str++;
1804
1805   switch (c)
1806     {
1807     case '\\': case '\'': case '"': case '?': break;
1808     case 'b': c = TARGET_BS;      break;
1809     case 'f': c = TARGET_FF;      break;
1810     case 'n': c = TARGET_NEWLINE; break;
1811     case 'r': c = TARGET_CR;      break;
1812     case 't': c = TARGET_TAB;     break;
1813     case 'v': c = TARGET_VT;      break;
1814
1815     case '(': case '{': case '[': case '%':
1816       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1817          '\%' is used to prevent SCCS from getting confused.  */
1818       unknown = CPP_PEDANTIC (pfile);
1819       break;
1820
1821     case 'a':
1822       if (CPP_WTRADITIONAL (pfile))
1823         cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1824       if (!traditional)
1825         c = TARGET_BELL;
1826       break;
1827
1828     case 'e': case 'E':
1829       if (CPP_PEDANTIC (pfile))
1830         cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1831       c = TARGET_ESC;
1832       break;
1833
1834     case 'u': case 'U':
1835       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1836       break;
1837
1838     case 'x':
1839       if (CPP_WTRADITIONAL (pfile))
1840         cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1841
1842       if (!traditional)
1843         {
1844           unsigned int i = 0, overflow = 0;
1845           int digits_found = 0;
1846
1847           while (str < limit)
1848             {
1849               c = *str;
1850               if (! ISXDIGIT (c))
1851                 break;
1852               str++;
1853               overflow |= i ^ (i << 4 >> 4);
1854               i = (i << 4) + hex_digit_value (c);
1855               digits_found = 1;
1856             }
1857
1858           if (!digits_found)
1859             cpp_error (pfile, "\\x used with no following hex digits");
1860
1861           if (overflow | (i != (i & mask)))
1862             {
1863               cpp_pedwarn (pfile, "hex escape sequence out of range");
1864               i &= mask;
1865             }
1866           c = i;
1867         }
1868       break;
1869
1870     case '0':  case '1':  case '2':  case '3':
1871     case '4':  case '5':  case '6':  case '7':
1872       {
1873         unsigned int i = c - '0';
1874         int count = 0;
1875
1876         while (str < limit && ++count < 3)
1877           {
1878             c = *str;
1879             if (c < '0' || c > '7')
1880               break;
1881             str++;
1882             i = (i << 3) + c - '0';
1883           }
1884
1885         if (i != (i & mask))
1886           {
1887             cpp_pedwarn (pfile, "octal escape sequence out of range");
1888             i &= mask;
1889           }
1890         c = i;
1891       }
1892       break;
1893
1894     default:
1895       unknown = 1;
1896       break;
1897     }
1898
1899   if (unknown)
1900     {
1901       if (ISGRAPH (c))
1902         cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1903       else
1904         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1905     }
1906
1907   if (c > mask)
1908     cpp_pedwarn (pfile, "escape sequence out of range for character");
1909
1910   *pstr = str;
1911   return c;
1912 }
1913
1914 #ifndef MAX_CHAR_TYPE_SIZE
1915 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1916 #endif
1917
1918 #ifndef MAX_WCHAR_TYPE_SIZE
1919 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1920 #endif
1921
1922 /* Interpret a (possibly wide) character constant in TOKEN.
1923    WARN_MULTI warns about multi-character charconsts, if not
1924    TRADITIONAL.  TRADITIONAL also indicates not to interpret escapes
1925    that did not exist in traditional C.  PCHARS_SEEN points to a
1926    variable that is filled in with the number of characters seen.  */
1927 HOST_WIDE_INT
1928 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1929      cpp_reader *pfile;
1930      const cpp_token *token;
1931      int warn_multi;
1932      int traditional;
1933      unsigned int *pchars_seen;
1934 {
1935   const unsigned char *str = token->val.str.text;
1936   const unsigned char *limit = str + token->val.str.len;
1937   unsigned int chars_seen = 0;
1938   unsigned int width, max_chars, c;
1939   unsigned HOST_WIDE_INT mask;
1940   HOST_WIDE_INT result = 0;
1941
1942 #ifdef MULTIBYTE_CHARS
1943   (void) local_mbtowc (NULL, NULL, 0);
1944 #endif
1945
1946   /* Width in bits.  */
1947   if (token->type == CPP_CHAR)
1948     width = MAX_CHAR_TYPE_SIZE;
1949   else
1950     width = MAX_WCHAR_TYPE_SIZE;
1951
1952   if (width < HOST_BITS_PER_WIDE_INT)
1953     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1954   else
1955     mask = ~0;
1956   max_chars = HOST_BITS_PER_WIDE_INT / width;
1957
1958   while (str < limit)
1959     {
1960 #ifdef MULTIBYTE_CHARS
1961       wchar_t wc;
1962       int char_len;
1963
1964       char_len = local_mbtowc (&wc, str, limit - str);
1965       if (char_len == -1)
1966         {
1967           cpp_warning (pfile, "ignoring invalid multibyte character");
1968           c = *str++;
1969         }
1970       else
1971         {
1972           str += char_len;
1973           c = wc;
1974         }
1975 #else
1976       c = *str++;
1977 #endif
1978
1979       if (c == '\\')
1980         c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1981
1982 #ifdef MAP_CHARACTER
1983       if (ISPRINT (c))
1984         c = MAP_CHARACTER (c);
1985 #endif
1986
1987       /* Merge character into result; ignore excess chars.  */
1988       if (++chars_seen <= max_chars)
1989         {
1990           if (width < HOST_BITS_PER_WIDE_INT)
1991             result = (result << width) | (c & mask);
1992           else
1993             result = c;
1994         }
1995     }
1996
1997   if (chars_seen == 0)
1998     cpp_error (pfile, "empty character constant");
1999   else if (chars_seen > max_chars)
2000     {
2001       chars_seen = max_chars;
2002       cpp_warning (pfile, "character constant too long");
2003     }
2004   else if (chars_seen > 1 && !traditional && warn_multi)
2005     cpp_warning (pfile, "multi-character character constant");
2006
2007   /* If char type is signed, sign-extend the constant.  The
2008      __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
2009   if (token->type == CPP_CHAR && chars_seen)
2010     {
2011       unsigned int nbits = chars_seen * width;
2012       unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2013
2014       if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2015           || ((result >> (nbits - 1)) & 1) == 0)
2016         result &= mask;
2017       else
2018         result |= ~mask;
2019     }
2020
2021   *pchars_seen = chars_seen;
2022   return result;
2023 }
2024
2025 /* Memory buffers.  Changing these three constants can have a dramatic
2026    effect on performance.  The values here are reasonable defaults,
2027    but might be tuned.  If you adjust them, be sure to test across a
2028    range of uses of cpplib, including heavy nested function-like macro
2029    expansion.  Also check the change in peak memory usage (NJAMD is a
2030    good tool for this).  */
2031 #define MIN_BUFF_SIZE 8000
2032 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (8000 + (MIN_SIZE) * 3 / 2)
2033 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2034         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2035
2036 struct dummy
2037 {
2038   char c;
2039   union
2040   {
2041     double d;
2042     int *p;
2043   } u;
2044 };
2045
2046 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2047 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
2048
2049 /* Create a new allocation buffer.  Place the control block at the end
2050    of the buffer, so that buffer overflows will cause immediate chaos.  */
2051 static _cpp_buff *
2052 new_buff (len)
2053      size_t len;
2054 {
2055   _cpp_buff *result;
2056   unsigned char *base;
2057
2058   if (len < MIN_BUFF_SIZE)
2059     len = MIN_BUFF_SIZE;
2060   len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
2061
2062   base = xmalloc (len + sizeof (_cpp_buff));
2063   result = (_cpp_buff *) (base + len);
2064   result->base = base;
2065   result->cur = base;
2066   result->limit = base + len;
2067   result->next = NULL;
2068   return result;
2069 }
2070
2071 /* Place a chain of unwanted allocation buffers on the free list.  */
2072 void
2073 _cpp_release_buff (pfile, buff)
2074      cpp_reader *pfile;
2075      _cpp_buff *buff;
2076 {
2077   _cpp_buff *end = buff;
2078
2079   while (end->next)
2080     end = end->next;
2081   end->next = pfile->free_buffs;
2082   pfile->free_buffs = buff;
2083 }
2084
2085 /* Return a free buffer of size at least MIN_SIZE.  */
2086 _cpp_buff *
2087 _cpp_get_buff (pfile, min_size)
2088      cpp_reader *pfile;
2089      size_t min_size;
2090 {
2091   _cpp_buff *result, **p;
2092
2093   for (p = &pfile->free_buffs;; p = &(*p)->next)
2094     {
2095       size_t size;
2096
2097       if (*p == NULL)
2098         return new_buff (min_size);
2099       result = *p;
2100       size = result->limit - result->base;
2101       /* Return a buffer that's big enough, but don't waste one that's
2102          way too big.  */
2103       if (size >= min_size && size < BUFF_SIZE_UPPER_BOUND (min_size))
2104         break;
2105     }
2106
2107   *p = result->next;
2108   result->next = NULL;
2109   result->cur = result->base;
2110   return result;
2111 }
2112
2113 /* Creates a new buffer with enough space to hold the the uncommitted
2114    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2115    the excess bytes to the new buffer.  Chains the new buffer after
2116    BUFF, and returns the new buffer.  */
2117 _cpp_buff *
2118 _cpp_append_extend_buff (pfile, buff, min_extra)
2119      cpp_reader *pfile;
2120      _cpp_buff *buff;
2121      size_t min_extra;
2122 {
2123   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2124   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2125
2126   buff->next = new_buff;
2127   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2128   return new_buff;
2129 }
2130
2131 /* Creates a new buffer with enough space to hold the the uncommitted
2132    remaining bytes of the buffer pointed to by BUFF, and at least
2133    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2134    Chains the new buffer before the buffer pointed to by BUFF, and
2135    updates the pointer to point to the new buffer.  */
2136 void
2137 _cpp_extend_buff (pfile, pbuff, min_extra)
2138      cpp_reader *pfile;
2139      _cpp_buff **pbuff;
2140      size_t min_extra;
2141 {
2142   _cpp_buff *new_buff, *old_buff = *pbuff;
2143   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2144
2145   new_buff = _cpp_get_buff (pfile, size);
2146   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2147   new_buff->next = old_buff;
2148   *pbuff = new_buff;
2149 }
2150
2151 /* Free a chain of buffers starting at BUFF.  */
2152 void
2153 _cpp_free_buff (buff)
2154      _cpp_buff *buff;
2155 {
2156   _cpp_buff *next;
2157
2158   for (; buff; buff = next)
2159     {
2160       next = buff->next;
2161       free (buff->base);
2162     }
2163 }
2164
2165 /* Allocate permanent, unaligned storage of length LEN.  */
2166 unsigned char *
2167 _cpp_unaligned_alloc (pfile, len)
2168      cpp_reader *pfile;
2169      size_t len;
2170 {
2171   _cpp_buff *buff = pfile->u_buff;
2172   unsigned char *result = buff->cur;
2173
2174   if (len > (size_t) (buff->limit - result))
2175     {
2176       buff = _cpp_get_buff (pfile, len);
2177       buff->next = pfile->u_buff;
2178       pfile->u_buff = buff;
2179       result = buff->cur;
2180     }
2181
2182   buff->cur = result + len;
2183   return result;
2184 }
2185
2186 /* Allocate permanent, unaligned storage of length LEN.  */
2187 unsigned char *
2188 _cpp_aligned_alloc (pfile, len)
2189      cpp_reader *pfile;
2190      size_t len;
2191 {
2192   _cpp_buff *buff = pfile->a_buff;
2193   unsigned char *result = buff->cur;
2194
2195   if (len > (size_t) (buff->limit - result))
2196     {
2197       buff = _cpp_get_buff (pfile, len);
2198       buff->next = pfile->a_buff;
2199       pfile->a_buff = buff;
2200       result = buff->cur;
2201     }
2202
2203   buff->cur = result + len;
2204   return result;
2205 }