gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /*
  24
  25 Cleanups to do:-
  26
  27 o -dM and with _cpp_dump_list: too many \n output.
  28 o Put a printer object in cpp_reader?
  29 o Check line numbers assigned to all errors.
  30 o Replace strncmp with memcmp almost everywhere.
  31 o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
  32 o Convert do_ functions to return void.  Kaveh thinks its OK; and said he'll
  33   give it a run when we've got some code.
  34 o Distinguish integers, floats, and 'other' pp-numbers.
  35 o Store ints and char constants as binary values.
  36 o New command-line assertion syntax.
  37 o Work towards functions in cpperror.c taking a message level parameter.
  38   If we do this, merge the common code of do_warning and do_error.
  39 o Comment all functions, and describe macro expansion algorithm.
  40 o Move as much out of header files as possible.
  41 o Remove single quote pairs `', and some '', from diagnostics.
  42 o Correct pastability test for CPP_NAME and CPP_NUMBER.
  43
  44 */
  45
  46 #include "config.h"
  47 #include "system.h"
  48 #include "intl.h"
  49 #include "cpplib.h"
  50 #include "cpphash.h"
  51 #include "symcat.h"
  52
  53 static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
  54 static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
  55
  56 /* Flags for cpp_context.  */
  57 #define CONTEXT_PASTEL  (1 << 0) /* An argument context on LHS of ##.  */
  58 #define CONTEXT_PASTER  (1 << 1) /* An argument context on RHS of ##.  */
  59 #define CONTEXT_RAW     (1 << 2) /* If argument tokens already expanded.  */
  60 #define CONTEXT_ARG     (1 << 3) /* If an argument context.  */
  61
  62 typedef struct cpp_context cpp_context;
  63 struct cpp_context
  64 {
  65   union
  66   {
  67     const cpp_toklist *list;    /* Used for macro contexts only.  */
  68     const cpp_token **arg;      /* Used for arg contexts only.  */
  69   } u;
  70
  71   /* Pushed token to be returned by next call to get_raw_token.  */
  72   const cpp_token *pushed_token;
  73
  74   struct macro_args *args;      /* The arguments for a function-like
  75                                    macro.  NULL otherwise.  */
  76   unsigned short posn;          /* Current posn, index into u.  */
  77   unsigned short count;         /* No. of tokens in u.  */
  78   unsigned short level;
  79   unsigned char flags;
  80 };
  81
  82 typedef struct macro_args macro_args;
  83 struct macro_args
  84 {
  85   unsigned int *ends;
  86   const cpp_token **tokens;
  87   unsigned int capacity;
  88   unsigned int used;
  89   unsigned short level;
  90 };
  91
  92 static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
  93 static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
  94                                            macro_args *, unsigned int *));
  95 static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
  96 static void save_token PARAMS ((macro_args *, const cpp_token *));
  97 static int pop_context PARAMS ((cpp_reader *));
  98 static int push_macro_context PARAMS ((cpp_reader *, const cpp_token *));
  99 static void push_arg_context PARAMS ((cpp_reader *, const cpp_token *));
 100 static void free_macro_args PARAMS ((macro_args *));
 101
 102 #define auto_expand_name_space(list) \
 103     _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
 104 static void safe_fwrite         PARAMS ((cpp_reader *, const U_CHAR *,
 105                                          size_t, FILE *));
 106 static void dump_param_spelling PARAMS ((cpp_reader *, const cpp_toklist *,
 107                                          unsigned int));
 108 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
 109                                          unsigned int));
 110
 111 static void process_directive   PARAMS ((cpp_reader *, const cpp_token *));
 112 static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
 113                                                 unsigned char *));
 114 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
 115                                                      const unsigned char *));
 116 static int skip_block_comment PARAMS ((cpp_reader *));
 117 static int skip_line_comment PARAMS ((cpp_reader *));
 118 static void adjust_column PARAMS ((cpp_reader *, const U_CHAR *));
 119 static void skip_whitespace PARAMS ((cpp_reader *, int));
 120 static const U_CHAR *parse_name PARAMS ((cpp_reader *, cpp_token *,
 121                                    const U_CHAR *, const U_CHAR *));
 122 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_string *));
 123 static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
 124                                   unsigned int));
 125 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
 126 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
 127                                   const unsigned char *,
 128                                   unsigned int, unsigned int));
 129 static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
 130 static int lex_next PARAMS ((cpp_reader *, int));
 131 static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
 132                                       const cpp_token *));
 133
 134 static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
 135 static void expand_context_stack PARAMS ((cpp_reader *));
 136 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
 137                                             unsigned char *));
 138 static void output_token PARAMS ((cpp_reader *, const cpp_token *,
 139                                   const cpp_token *));
 140 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
 141                                           cpp_token *));
 142 static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
 143                                             unsigned int));
 144 static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
 145 static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
 146                                                 const cpp_token *));
 147 static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
 148 static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
 149                                                        const cpp_token *));
 150 static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
 151                                          const cpp_token *, int *));
 152 static unsigned int prevent_macro_expansion     PARAMS ((cpp_reader *));
 153 static void restore_macro_expansion     PARAMS ((cpp_reader *, unsigned int));
 154 static cpp_token *get_temp_token        PARAMS ((cpp_reader *));
 155 static void release_temp_tokens         PARAMS ((cpp_reader *));
 156 static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
 157 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
 158
 159 #define INIT_TOKEN_STR(list, token) \
 160   do {(token)->val.str.len = 0; \
 161       (token)->val.str.text = (list)->namebuf + (list)->name_used; \
 162   } while (0)
 163
 164 #define VALID_SIGN(c, prevc) \
 165   (((c) == '+' || (c) == '-') && \
 166    ((prevc) == 'e' || (prevc) == 'E' \
 167     || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
 168
 169 /* Handle LF, CR, CR-LF and LF-CR style newlines.  Assumes next
 170    character, if any, is in buffer.  */
 171
 172 #define handle_newline(cur, limit, c) \
 173  do { \
 174   if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
 175     (cur)++; \
 176   pfile->buffer->lineno++; \
 177   pfile->buffer->line_base = (cur); \
 178   pfile->col_adjust = 0; \
 179  } while (0)
 180
 181 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
 182 #define PREV_TOKEN_TYPE (cur_token[-1].type)
 183
 184 #define PUSH_TOKEN(ttype) cur_token++->type = (ttype)
 185 #define REVISE_TOKEN(ttype) cur_token[-1].type = (ttype)
 186 #define BACKUP_TOKEN(ttype) (--cur_token)->type = (ttype)
 187 #define BACKUP_DIGRAPH(ttype) do { \
 188   BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
 189
 190 /* An upper bound on the number of bytes needed to spell a token,
 191    including preceding whitespace.  */
 192 #define TOKEN_LEN(token) (5 + (TOKEN_SPELL(token) == SPELL_STRING       \
 193                                ? (token)->val.str.len                   \
 194                                : (TOKEN_SPELL(token) == SPELL_IDENT     \
 195                                   ? (token)->val.node->length           \
 196                                   : 0)))
 197
 198 #define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
 199 #define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
 200
 201 #define ASSIGN_FLAGS_AND_POS(d, s) \
 202   do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
 203       if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
 204   } while (0)
 205
 206 /* f is flags, just consisting of PREV_WHITE | BOL.  */
 207 #define MODIFY_FLAGS_AND_POS(d, s, f) \
 208   do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
 209       if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
 210   } while (0)
 211
 212 #define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
 213 #define I(e, s) {SPELL_IDENT, s},
 214 #define S(e, s) {SPELL_STRING, s},
 215 #define C(e, s) {SPELL_CHAR, s},
 216 #define N(e, s) {SPELL_NONE, s},
 217
 218 const struct token_spelling
 219 token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
 220
 221 #undef T
 222 #undef I
 223 #undef S
 224 #undef C
 225 #undef N
 226
 227 /* For debugging: the internal names of the tokens.  */
 228 #define T(e, s) U STRINGX(e),
 229 #define I(e, s) U STRINGX(e),
 230 #define S(e, s) U STRINGX(e),
 231 #define C(e, s) U STRINGX(e),
 232 #define N(e, s) U STRINGX(e),
 233
 234 const U_CHAR *const token_names[N_TTYPES] = { TTYPE_TABLE };
 235
 236 #undef T
 237 #undef I
 238 #undef S
 239 #undef C
 240 #undef N
 241
 242 /* The following table is used by trigraph_ok/trigraph_replace.  If we
 243    have designated initializers, it can be constant data; otherwise,
 244    it is set up at runtime by _cpp_init_input_buffer.  */
 245
 246 #if (GCC_VERSION >= 2007)
 247 #define init_trigraph_map()  /* nothing */
 248 #define TRIGRAPH_MAP \
 249 __extension__ static const U_CHAR trigraph_map[UCHAR_MAX + 1] = {
 250 #define END };
 251 #define s(p, v) [p] = v,
 252 #else
 253 #define TRIGRAPH_MAP static U_CHAR trigraph_map[UCHAR_MAX + 1] = { 0 }; \
 254  static void init_trigraph_map PARAMS ((void)) { \
 255  unsigned char *x = trigraph_map;
 256 #define END }
 257 #define s(p, v) x[p] = v;
 258 #endif
 259
 260 TRIGRAPH_MAP
 261   s('=', '#')   s(')', ']')     s('!', '|')
 262   s('(', '[')   s('\'', '^')    s('>', '}')
 263   s('/', '\\')  s('<', '{')     s('-', '~')
 264 END
 265
 266 #undef TRIGRAPH_MAP
 267 #undef END
 268 #undef s
 269
 270 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars.  */
 271
 272 void
 273 _cpp_grow_token_buffer (pfile, n)
 274      cpp_reader *pfile;
 275      long n;
 276 {
 277   long old_written = CPP_WRITTEN (pfile);
 278   pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
 279   pfile->token_buffer = (U_CHAR *)
 280     xrealloc(pfile->token_buffer, pfile->token_buffer_size);
 281   CPP_SET_WRITTEN (pfile, old_written);
 282 }
 283
 284 /* Deal with the annoying semantics of fwrite.  */
 285 static void
 286 safe_fwrite (pfile, buf, len, fp)
 287      cpp_reader *pfile;
 288      const U_CHAR *buf;
 289      size_t len;
 290      FILE *fp;
 291 {
 292   size_t count;
 293
 294   while (len)
 295     {
 296       count = fwrite (buf, 1, len, fp);
 297       if (count == 0)
 298         goto error;
 299       len -= count;
 300       buf += count;
 301     }
 302   return;
 303
 304  error:
 305   cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
 306 }
 307
 308 /* Notify the compiler proper that the current line number has jumped,
 309    or the current file name has changed.  */
 310
 311 static void
 312 output_line_command (pfile, print, line)
 313      cpp_reader *pfile;
 314      cpp_printer *print;
 315      unsigned int line;
 316 {
 317   cpp_buffer *ip = CPP_BUFFER (pfile);
 318   enum { same = 0, enter, leave, rname } change;
 319   static const char * const codes[] = { "", " 1", " 2", "" };
 320
 321   if (line == 0)
 322     return;
 323
 324   /* End the previous line of text.  */
 325   if (pfile->need_newline)
 326     putc ('\n', print->outf);
 327   pfile->need_newline = 0;
 328
 329   if (CPP_OPTION (pfile, no_line_commands))
 330     return;
 331
 332   /* If ip is null, we've been called from cpp_finish, and they just
 333      needed the final flush and trailing newline.  */
 334   if (!ip)
 335     return;
 336
 337   if (pfile->include_depth == print->last_id)
 338     {
 339       /* Determine whether the current filename has changed, and if so,
 340          how.  'nominal_fname' values are unique, so they can be compared
 341          by comparing pointers.  */
 342       if (ip->nominal_fname == print->last_fname)
 343         change = same;
 344       else
 345         change = rname;
 346     }
 347   else
 348     {
 349       if (pfile->include_depth > print->last_id)
 350         change = enter;
 351       else
 352         change = leave;
 353       print->last_id = pfile->include_depth;
 354     }
 355   print->last_fname = ip->nominal_fname;
 356
 357   /* If the current file has not changed, we can output a few newlines
 358      instead if we want to increase the line number by a small amount.
 359      We cannot do this if print->lineno is zero, because that means we
 360      haven't output any line commands yet.  (The very first line
 361      command output is a `same_file' command.)  */
 362   if (change == same && print->lineno > 0
 363       && line >= print->lineno && line < print->lineno + 8)
 364     {
 365       while (line > print->lineno)
 366         {
 367           putc ('\n', print->outf);
 368           print->lineno++;
 369         }
 370       return;
 371     }
 372
 373 #ifndef NO_IMPLICIT_EXTERN_C
 374   if (CPP_OPTION (pfile, cplusplus))
 375     fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
 376              codes[change],
 377              ip->inc->sysp ? " 3" : "",
 378              (ip->inc->sysp == 2) ? " 4" : "");
 379   else
 380 #endif
 381     fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
 382              codes[change],
 383              ip->inc->sysp ? " 3" : "");
 384   print->lineno = line;
 385 }
 386
 387 /* Write the contents of the token_buffer to the output stream, and
 388    clear the token_buffer.  Also handles generating line commands and
 389    keeping track of file transitions.  */
 390
 391 void
 392 cpp_output_tokens (pfile, print, line)
 393      cpp_reader *pfile;
 394      cpp_printer *print;
 395      unsigned int line;
 396 {
 397   if (CPP_WRITTEN (pfile) - print->written)
 398     {
 399       safe_fwrite (pfile, pfile->token_buffer,
 400                    CPP_WRITTEN (pfile) - print->written, print->outf);
 401       pfile->need_newline = 1;
 402       if (print->lineno)
 403         print->lineno++;
 404
 405       CPP_SET_WRITTEN (pfile, print->written);
 406     }
 407   output_line_command (pfile, print, line);
 408 }
 409
 410 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output.  */
 411
 412 void
 413 cpp_scan_buffer_nooutput (pfile)
 414      cpp_reader *pfile;
 415 {
 416   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 417   const cpp_token *token;
 418
 419   /* In no-output mode, we can ignore everything but directives.  */
 420   for (;;)
 421     {
 422       token = _cpp_get_token (pfile);
 423
 424       if (token->type == CPP_EOF)
 425         {
 426           cpp_pop_buffer (pfile);
 427           if (CPP_BUFFER (pfile) == stop)
 428             break;
 429         }
 430
 431       if (token->type == CPP_HASH && token->flags & BOL
 432           && pfile->token_list.directive)
 433         {
 434           process_directive (pfile, token);
 435           continue;
 436         }
 437
 438       _cpp_skip_rest_of_line (pfile);
 439     }
 440 }
 441
 442 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT.  */
 443 void
 444 cpp_scan_buffer (pfile, print)
 445      cpp_reader *pfile;
 446      cpp_printer *print;
 447 {
 448   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 449   const cpp_token *token, *prev = 0;
 450
 451   for (;;)
 452     {
 453       token = _cpp_get_token (pfile);
 454       if (token->type == CPP_EOF)
 455         {
 456           cpp_pop_buffer (pfile);
 457           if (CPP_BUFFER (pfile) == stop)
 458             return;
 459
 460           cpp_output_tokens (pfile, print, CPP_BUF_LINE (CPP_BUFFER (pfile)));
 461           prev = 0;
 462           continue;
 463         }
 464
 465       if (token->flags & BOL)
 466         {
 467           if (token->type == CPP_HASH && pfile->token_list.directive)
 468             {
 469               process_directive (pfile, token);
 470               continue;
 471             }
 472
 473           cpp_output_tokens (pfile, print, pfile->token_list.line);
 474           prev = 0;
 475         }
 476
 477       if (token->type != CPP_PLACEMARKER)
 478         output_token (pfile, token, prev);
 479
 480       prev = token;
 481     }
 482 }
 483
 484 /* Scan a single line of the input into the token_buffer.  */
 485 int
 486 cpp_scan_line (pfile)
 487      cpp_reader *pfile;
 488 {
 489   const cpp_token *token, *prev = 0;
 490
 491   if (pfile->buffer == NULL)
 492     return 0;
 493
 494   do
 495     {
 496       token = cpp_get_token (pfile);
 497       if (token->type == CPP_EOF)
 498         {
 499           cpp_pop_buffer (pfile);
 500           break;
 501         }
 502
 503       /* If the last token on a line results from a macro expansion,
 504          the check below will fail to stop us from proceeding to the
 505          next line - so make sure we stick in a newline, at least.  */
 506       if (token->flags & BOL)
 507         CPP_PUTC (pfile, '\n');
 508
 509       output_token (pfile, token, prev);
 510       prev = token;
 511     }
 512   while (pfile->cur_context > 0
 513          || pfile->contexts[0].posn < pfile->contexts[0].count);
 514   return 1;
 515 }
 516
 517 /* Helper routine used by parse_include, which can't see spell_token.
 518    Reinterpret the current line as an h-char-sequence (< ... >); we are
 519    looking at the first token after the <.  */
 520 const cpp_token *
 521 _cpp_glue_header_name (pfile)
 522      cpp_reader *pfile;
 523 {
 524   unsigned int written = CPP_WRITTEN (pfile);
 525   const cpp_token *t;
 526   cpp_token *hdr;
 527   U_CHAR *buf;
 528   size_t len;
 529
 530   for (;;)
 531     {
 532       t = _cpp_get_token (pfile);
 533       if (t->type == CPP_GREATER || t->type == CPP_EOF)
 534         break;
 535
 536       CPP_RESERVE (pfile, TOKEN_LEN (t));
 537       if (t->flags & PREV_WHITE)
 538         CPP_PUTC_Q (pfile, ' ');
 539       pfile->limit = spell_token (pfile, t, pfile->limit);
 540     }
 541
 542   if (t->type == CPP_EOF)
 543     cpp_error (pfile, "missing terminating > character");
 544
 545   len = CPP_WRITTEN (pfile) - written;
 546   buf = xmalloc (len);
 547   memcpy (buf, pfile->token_buffer + written, len);
 548   CPP_SET_WRITTEN (pfile, written);
 549
 550   hdr = get_temp_token (pfile);
 551   hdr->type = CPP_HEADER_NAME;
 552   hdr->flags = 0;
 553   hdr->val.str.text = buf;
 554   hdr->val.str.len = len;
 555   return hdr;
 556 }
 557
 558 /* Token-buffer helper functions.  */
 559
 560 /* Expand a token list's string space. It is *vital* that
 561    list->tokens_used is correct, to get pointer fix-up right.  */
 562 void
 563 _cpp_expand_name_space (list, len)
 564      cpp_toklist *list;
 565      unsigned int len;
 566 {
 567   const U_CHAR *old_namebuf;
 568
 569   old_namebuf = list->namebuf;
 570   list->name_cap += len;
 571   list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
 572
 573   /* Fix up token text pointers.  */
 574   if (list->namebuf != old_namebuf)
 575     {
 576       unsigned int i;
 577
 578       for (i = 0; i < list->tokens_used; i++)
 579         if (token_spellings[list->tokens[i].type].type == SPELL_STRING)
 580           list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
 581     }
 582 }
 583
 584 /* If there is not enough room for LEN more characters, expand the
 585    list by just enough to have room for LEN characters.  */
 586 void
 587 _cpp_reserve_name_space (list, len)
 588      cpp_toklist *list;
 589      unsigned int len;
 590 {
 591   unsigned int room = list->name_cap - list->name_used;
 592
 593   if (room < len)
 594     _cpp_expand_name_space (list, len - room);
 595 }
 596
 597 /* Expand the number of tokens in a list.  */
 598 void
 599 _cpp_expand_token_space (list, count)
 600      cpp_toklist *list;
 601      unsigned int count;
 602 {
 603   unsigned int n;
 604
 605   list->tokens_cap += count;
 606   n = list->tokens_cap;
 607   if (list->flags & LIST_OFFSET)
 608     list->tokens--, n++;
 609   list->tokens = (cpp_token *)
 610     xrealloc (list->tokens, n * sizeof (cpp_token));
 611   if (list->flags & LIST_OFFSET)
 612     list->tokens++;             /* Skip the dummy.  */
 613 }
 614
 615 /* Initialize a token list.  If flags is DUMMY_TOKEN, we allocate
 616    an extra token in front of the token list, as this allows the lexer
 617    to always peek at the previous token without worrying about
 618    underflowing the list, and some initial space.  Otherwise, no
 619    token- or name-space is allocated, and there is no dummy token.  */
 620 void
 621 _cpp_init_toklist (list, flags)
 622      cpp_toklist *list;
 623      int flags;
 624 {
 625   if (flags == NO_DUMMY_TOKEN)
 626     {
 627       list->tokens_cap = 0;
 628       list->tokens = 0;
 629       list->name_cap = 0;
 630       list->namebuf = 0;
 631       list->flags = 0;
 632     }
 633   else
 634     {
 635       /* Initialize token space.  Put a dummy token before the start
 636          that will fail matches.  */
 637       list->tokens_cap = 256;   /* 4K's worth.  */
 638       list->tokens = (cpp_token *)
 639         xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
 640       list->tokens[0].type = CPP_EOF;
 641       list->tokens++;
 642
 643       /* Initialize name space.  */
 644       list->name_cap = 1024;
 645       list->namebuf = (unsigned char *) xmalloc (list->name_cap);
 646       list->flags = LIST_OFFSET;
 647     }
 648
 649   _cpp_clear_toklist (list);
 650 }
 651
 652 /* Clear a token list.  */
 653 void
 654 _cpp_clear_toklist (list)
 655      cpp_toklist *list;
 656 {
 657   list->tokens_used = 0;
 658   list->name_used = 0;
 659   list->directive = 0;
 660   list->paramc = 0;
 661   list->params_len = 0;
 662   list->flags &= LIST_OFFSET;  /* clear all but that one */
 663 }
 664
 665 /* Free a token list.  Does not free the list itself, which may be
 666    embedded in a larger structure.  */
 667 void
 668 _cpp_free_toklist (list)
 669      const cpp_toklist *list;
 670 {
 671   if (list->flags & LIST_OFFSET)
 672     free (list->tokens - 1);    /* Backup over dummy token.  */
 673   else
 674     free (list->tokens);
 675   free (list->namebuf);
 676 }
 677
 678 /* Compare two tokens.  */
 679 int
 680 _cpp_equiv_tokens (a, b)
 681      const cpp_token *a, *b;
 682 {
 683   if (a->type == b->type && a->flags == b->flags)
 684     switch (token_spellings[a->type].type)
 685       {
 686       default:                  /* Keep compiler happy.  */
 687       case SPELL_OPERATOR:
 688         return 1;
 689       case SPELL_CHAR:
 690       case SPELL_NONE:
 691         return a->val.aux == b->val.aux; /* arg_no or character.  */
 692       case SPELL_IDENT:
 693         return a->val.node == b->val.node;
 694       case SPELL_STRING:
 695         return (a->val.str.len == b->val.str.len
 696                 && !memcmp (a->val.str.text, b->val.str.text,
 697                             a->val.str.len));
 698       }
 699
 700   return 0;
 701 }
 702
 703 /* Compare two token lists.  */
 704 int
 705 _cpp_equiv_toklists (a, b)
 706      const cpp_toklist *a, *b;
 707 {
 708   unsigned int i;
 709
 710   if (a->tokens_used != b->tokens_used
 711       || a->flags != b->flags
 712       || a->paramc != b->paramc)
 713     return 0;
 714
 715   for (i = 0; i < a->tokens_used; i++)
 716     if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
 717       return 0;
 718   return 1;
 719 }
 720
 721 /* Utility routine:
 722
 723    Compares, the token TOKEN to the NUL-terminated string STRING.
 724    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 725
 726 int
 727 cpp_ideq (token, string)
 728      const cpp_token *token;
 729      const char *string;
 730 {
 731   if (token->type != CPP_NAME)
 732     return 0;
 733
 734   return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
 735 }
 736
 737 /* Lexing algorithm.
 738
 739  The original lexer in cpplib was made up of two passes: a first pass
 740  that replaced trigraphs and deleted esacped newlines, and a second
 741  pass that tokenized the result of the first pass.  Tokenisation was
 742  performed by peeking at the next character in the input stream.  For
 743  example, if the input stream contained "!=", the handler for the !
 744  character would peek at the next character, and if it were a '='
 745  would skip over it, and return a "!=" token, otherwise it would
 746  return just the "!" token.
 747
 748  To implement a single-pass lexer, this peeking ahead is unworkable.
 749  An arbitrary number of escaped newlines, and trigraphs (in particular
 750  ??/ which translates to the escape \), could separate the '!' and '='
 751  in the input stream, yet the next token is still a "!=".
 752
 753  Suppose instead that we lex by one logical line at a time, producing
 754  a token list or stack for each logical line, and when seeing the '!'
 755  push a CPP_NOT token on the list.  Then if the '!' is part of a
 756  longer token ("!=") we know we must see the remainder of the token by
 757  the time we reach the end of the logical line.  Thus we can have the
 758  '=' handler look at the previous token (at the end of the list / top
 759  of the stack) and see if it is a "!" token, and if so, instead of
 760  pushing a "=" token revise the existing token to be a "!=" token.
 761
 762  This works in the presence of escaped newlines, because the '\' would
 763  have been pushed on the top of the stack as a CPP_BACKSLASH.  The
 764  newline ('\n' or '\r') handler looks at the token at the top of the
 765  stack to see if it is a CPP_BACKSLASH, and if so discards both.
 766  Hence the '=' handler would never see any intervening tokens.
 767
 768  To make trigraphs work in this context, as in precedence trigraphs
 769  are highest and converted before anything else, the '?' handler does
 770  lookahead to see if it is a trigraph, and if so skips the trigraph
 771  and pushes the token it represents onto the top of the stack.  This
 772  also works in the particular case of a CPP_BACKSLASH trigraph.
 773
 774  To the preprocessor, whitespace is only significant to the point of
 775  knowing whether whitespace precedes a particular token.  For example,
 776  the '=' handler needs to know whether there was whitespace between it
 777  and a "!" token on the top of the stack, to make the token conversion
 778  decision correctly.  So each token has a PREV_WHITE flag to
 779  indicate this - the standard permits consecutive whitespace to be
 780  regarded as a single space.  The compiler front ends are not
 781  interested in whitespace at all; they just require a token stream.
 782  Another place where whitespace is significant to the preprocessor is
 783  a #define statment - if there is whitespace between the macro name
 784  and an initial "(" token the macro is "object-like", otherwise it is
 785  a function-like macro that takes arguments.
 786
 787  However, all is not rosy.  Parsing of identifiers, numbers, comments
 788  and strings becomes trickier because of the possibility of raw
 789  trigraphs and escaped newlines in the input stream.
 790
 791  The trigraphs are three consecutive characters beginning with two
 792  question marks.  A question mark is not valid as part of a number or
 793  identifier, so parsing of a number or identifier terminates normally
 794  upon reaching it, returning to the mainloop which handles the
 795  trigraph just like it would in any other position.  Similarly for the
 796  backslash of a backslash-newline combination.  So we just need the
 797  escaped-newline dropper in the mainloop to check if the token on the
 798  top of the stack after dropping the escaped newline is a number or
 799  identifier, and if so to continue the processing it as if nothing had
 800  happened.
 801
 802  For strings, we replace trigraphs whenever we reach a quote or
 803  newline, because there might be a backslash trigraph escaping them.
 804  We need to be careful that we start trigraph replacing from where we
 805  left off previously, because it is possible for a first scan to leave
 806  "fake" trigraphs that a second scan would pick up as real (e.g. the
 807  sequence "????/\n=" would find a fake ??= trigraph after removing the
 808  escaped newline.)
 809
 810  For line comments, on reaching a newline we scan the previous
 811  character(s) to see if it escaped, and continue if it is.  Block
 812  comments ignore everything and just focus on finding the comment
 813  termination mark.  The only difficult thing, and it is surprisingly
 814  tricky, is checking if an asterisk precedes the final slash since
 815  they could be separated by escaped newlines.  If the preprocessor is
 816  invoked with the output comments option, we don't bother removing
 817  escaped newlines and replacing trigraphs for output.
 818
 819  Finally, numbers can begin with a period, which is pushed initially
 820  as a CPP_DOT token in its own right.  The digit handler checks if the
 821  previous token was a CPP_DOT not separated by whitespace, and if so
 822  pops it off the stack and pushes a period into the number's buffer
 823  before calling the number parser.
 824
 825 */
 826
 827 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
 828                                                     U":>", U"<%", U"%>"};
 829
 830 /* Call when a trigraph is encountered.  It warns if necessary, and
 831    returns true if the trigraph should be honoured.  END is the third
 832    character of a trigraph in the input stream.  */
 833 static int
 834 trigraph_ok (pfile, end)
 835      cpp_reader *pfile;
 836      const unsigned char *end;
 837 {
 838   int accept = CPP_OPTION (pfile, trigraphs);
 839
 840   if (CPP_OPTION (pfile, warn_trigraphs))
 841     {
 842       unsigned int col = end - 1 - pfile->buffer->line_base;
 843       if (accept)
 844         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
 845                                "trigraph ??%c converted to %c",
 846                                (int) *end, (int) trigraph_map[*end]);
 847       else
 848         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
 849                                "trigraph ??%c ignored", (int) *end);
 850     }
 851   return accept;
 852 }
 853
 854 /* Scan a string for trigraphs, warning or replacing them inline as
 855    appropriate.  When parsing a string, we must call this routine
 856    before processing a newline character (if trigraphs are enabled),
 857    since the newline might be escaped by a preceding backslash
 858    trigraph sequence.  Returns a pointer to the end of the name after
 859    replacement.  */
 860
 861 static unsigned char *
 862 trigraph_replace (pfile, src, limit)
 863      cpp_reader *pfile;
 864      unsigned char *src;
 865      unsigned char *limit;
 866 {
 867   unsigned char *dest;
 868
 869   /* Starting with src[1], find two consecutive '?'.  The case of no
 870      trigraphs is streamlined.  */
 871
 872   for (src++; src + 1 < limit; src += 2)
 873     {
 874       if (src[0] != '?')
 875         continue;
 876
 877       /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s.  */
 878       if (src[-1] == '?')
 879         src--;
 880       else if (src + 2 == limit || src[1] != '?')
 881         continue;
 882
 883       /* Check if it really is a trigraph.  */
 884       if (trigraph_map[src[2]] == 0)
 885         continue;
 886
 887       dest = src;
 888       goto trigraph_found;
 889     }
 890   return limit;
 891
 892   /* Now we have a trigraph, we need to scan the remaining buffer, and
 893      copy-shifting its contents left if replacement is enabled.  */
 894   for (; src + 2 < limit; dest++, src++)
 895     if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
 896       {
 897       trigraph_found:
 898         src += 2;
 899         if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
 900           *dest = trigraph_map[*src];
 901       }
 902
 903   /* Copy remaining (at most 2) characters.  */
 904   while (src < limit)
 905     *dest++ = *src++;
 906   return dest;
 907 }
 908
 909 /* If CUR is a backslash or the end of a trigraphed backslash, return
 910    a pointer to its beginning, otherwise NULL.  We don't read beyond
 911    the buffer start, because there is the start of the comment in the
 912    buffer.  */
 913 static const unsigned char *
 914 backslash_start (pfile, cur)
 915      cpp_reader *pfile;
 916      const unsigned char *cur;
 917 {
 918   if (cur[0] == '\\')
 919     return cur;
 920   if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
 921       && trigraph_ok (pfile, cur))
 922     return cur - 2;
 923   return 0;
 924 }
 925
 926 /* Skip a C-style block comment.  This is probably the trickiest
 927    handler.  We find the end of the comment by seeing if an asterisk
 928    is before every '/' we encounter.  The nasty complication is that a
 929    previous asterisk may be separated by one or more escaped newlines.
 930    Returns non-zero if comment terminated by EOF, zero otherwise.  */
 931 static int
 932 skip_block_comment (pfile)
 933      cpp_reader *pfile;
 934 {
 935   cpp_buffer *buffer = pfile->buffer;
 936   const unsigned char *char_after_star = 0;
 937   const unsigned char *cur = buffer->cur;
 938
 939   for (; cur < buffer->rlimit; )
 940     {
 941       unsigned char c = *cur++;
 942
 943       /* People like decorating comments with '*', so check for
 944          '/' instead for efficiency.  */
 945       if (c == '/')
 946         {
 947           /* Don't view / then * then / as finishing the comment.  */
 948           if ((cur[-2] == '*' && cur - 1 > buffer->cur)
 949               || cur - 1 == char_after_star)
 950             {
 951               buffer->cur = cur;
 952               return 0;
 953             }
 954
 955           /* Warn about potential nested comments, but not when
 956              the final character inside the comment is a '/'.
 957              Don't bother to get it right across escaped newlines.  */
 958           if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
 959               && cur[0] == '*' && cur[1] != '/')
 960             {
 961               buffer->cur = cur;
 962               cpp_warning (pfile, "'/*' within comment");
 963             }
 964         }
 965       else if (is_vspace (c))
 966         {
 967           const unsigned char* bslash = backslash_start (pfile, cur - 2);
 968
 969           handle_newline (cur, buffer->rlimit, c);
 970           /* Work correctly if there is an asterisk before an
 971              arbirtrarily long sequence of escaped newlines.  */
 972           if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
 973             char_after_star = cur;
 974           else
 975             char_after_star = 0;
 976         }
 977       else if (c == '\t')
 978         adjust_column (pfile, cur - 1);
 979     }
 980
 981   buffer->cur = cur;
 982   return 1;
 983 }
 984
 985 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 986    non-zero if a multiline comment.  */
 987 static int
 988 skip_line_comment (pfile)
 989      cpp_reader *pfile;
 990 {
 991   cpp_buffer *buffer = pfile->buffer;
 992   register const unsigned char *cur = buffer->cur;
 993   int multiline = 0;
 994
 995   for (; cur < buffer->rlimit; )
 996     {
 997       unsigned char c = *cur++;
 998
 999       if (is_vspace (c))
1000         {
1001           /* Check for a (trigaph?) backslash escaping the newline.  */
1002           if (!backslash_start (pfile, cur - 2))
1003             goto out;
1004           multiline = 1;
1005           handle_newline (cur, buffer->rlimit, c);
1006         }
1007     }
1008   cur++;
1009
1010  out:
1011   buffer->cur = cur - 1;        /* Leave newline for caller.  */
1012   return multiline;
1013 }
1014
1015 /* TAB points to a \t character.  Update col_adjust so we track the
1016    column correctly.  */
1017 static void
1018 adjust_column (pfile, tab)
1019      cpp_reader *pfile;
1020      const U_CHAR *tab;
1021 {
1022   /* Zero-based column.  */
1023   unsigned int col = CPP_BUF_COLUMN (pfile->buffer, tab);
1024
1025   /* Round it up to multiple of the tabstop, but subtract 1 since the
1026      tab itself occupies a character position.  */
1027   pfile->col_adjust += (CPP_OPTION (pfile, tabstop)
1028                         - col % CPP_OPTION (pfile, tabstop)) - 1;
1029 }
1030
1031 /* Skips whitespace, stopping at next non-whitespace character.
1032    Adjusts pfile->col_adjust to account for tabs.  This enables tokens
1033    to be assigned the correct column.  */
1034 static void
1035 skip_whitespace (pfile, in_directive)
1036      cpp_reader *pfile;
1037      int in_directive;
1038 {
1039   cpp_buffer *buffer = pfile->buffer;
1040   unsigned short warned = 0;
1041
1042   /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
1043   while (buffer->cur < buffer->rlimit)
1044     {
1045       unsigned char c = *buffer->cur;
1046
1047       if (!is_nvspace (c))
1048         break;
1049
1050       buffer->cur++;
1051       /* Horizontal space always OK.  */
1052       if (c == ' ')
1053         continue;
1054       else if (c == '\t')
1055         adjust_column (pfile, buffer->cur - 1);
1056       /* Must be \f \v or \0.  */
1057       else if (c == '\0')
1058         {
1059           if (!warned)
1060             cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
1061                                    CPP_BUF_COL (buffer),
1062                                    "embedded null character ignored");
1063           warned = 1;
1064         }
1065       else if (in_directive && CPP_PEDANTIC (pfile))
1066         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1067                                CPP_BUF_COL (buffer),
1068                                "%s in preprocessing directive",
1069                                c == '\f' ? "form feed" : "vertical tab");
1070     }
1071 }
1072
1073 /* Parse (append) an identifier.  Calculates the hash value of the
1074    token while parsing, for performance.  The algorithm *must* match
1075    cpp_lookup().  */
1076 static const U_CHAR *
1077 parse_name (pfile, tok, cur, rlimit)
1078      cpp_reader *pfile;
1079      cpp_token *tok;
1080      const U_CHAR *cur, *rlimit;
1081 {
1082   const U_CHAR *name;
1083   unsigned int len;
1084   unsigned int r;
1085
1086   name = cur;
1087   r = 0;
1088   while (cur < rlimit)
1089     {
1090       if (! is_idchar (*cur))
1091         break;
1092       /* $ is not a legal identifier character in the standard, but is
1093          commonly accepted as an extension.  Don't warn about it in
1094          skipped conditional blocks. */
1095       if (*cur == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
1096         {
1097           CPP_BUFFER (pfile)->cur = cur;
1098           cpp_pedwarn (pfile, "'$' character in identifier");
1099         }
1100
1101       r = HASHSTEP (r, cur);
1102       cur++;
1103     }
1104   len = cur - name;
1105
1106   if (tok->val.node == 0)
1107     tok->val.node = _cpp_lookup_with_hash (pfile, name, len, r);
1108   else
1109     {
1110       unsigned int oldlen = tok->val.node->length;
1111       U_CHAR *newname = alloca (oldlen + len);
1112       memcpy (newname, tok->val.node->name, oldlen);
1113       memcpy (newname + oldlen, name, len);
1114       tok->val.node = cpp_lookup (pfile, newname, len + oldlen);
1115     }
1116
1117   return cur;
1118 }
1119
1120 /* Parse (append) a number.  */
1121 static void
1122 parse_number (pfile, list, name)
1123      cpp_reader *pfile;
1124      cpp_toklist *list;
1125      cpp_string *name;
1126 {
1127   const unsigned char *name_limit;
1128   unsigned char *namebuf;
1129   cpp_buffer *buffer = pfile->buffer;
1130   register const unsigned char *cur = buffer->cur;
1131
1132  expanded:
1133   name_limit = list->namebuf + list->name_cap;
1134   namebuf = list->namebuf + list->name_used;
1135
1136   for (; cur < buffer->rlimit && namebuf < name_limit; )
1137     {
1138       unsigned char c = *namebuf = *cur; /* Copy a single char.  */
1139
1140       /* Perhaps we should accept '$' here if we accept it for
1141          identifiers.  We know namebuf[-1] is safe, because for c to
1142          be a sign we must have pushed at least one character.  */
1143       if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1144         goto out;
1145
1146       namebuf++;
1147       cur++;
1148     }
1149
1150   /* Run out of name space?  */
1151   if (cur < buffer->rlimit)
1152     {
1153       list->name_used = namebuf - list->namebuf;
1154       auto_expand_name_space (list);
1155       goto expanded;
1156     }
1157
1158  out:
1159   buffer->cur = cur;
1160   name->len = namebuf - name->text;
1161   list->name_used = namebuf - list->namebuf;
1162 }
1163
1164 /* Places a string terminated by an unescaped TERMINATOR into a
1165    cpp_string, which should be expandable and thus at the top of the
1166    list's stack.  Handles embedded trigraphs, if necessary, and
1167    escaped newlines.
1168
1169    Can be used for character constants (terminator = '\''), string
1170    constants ('"') and angled headers ('>').  Multi-line strings are
1171    allowed, except for within directives.  */
1172
1173 static void
1174 parse_string (pfile, list, token, terminator)
1175      cpp_reader *pfile;
1176      cpp_toklist *list;
1177      cpp_token *token;
1178      unsigned int terminator;
1179 {
1180   cpp_buffer *buffer = pfile->buffer;
1181   cpp_string *name = &token->val.str;
1182   register const unsigned char *cur = buffer->cur;
1183   const unsigned char *name_limit;
1184   unsigned char *namebuf;
1185   unsigned int null_count = 0;
1186   unsigned int trigraphed = list->name_used;
1187
1188  expanded:
1189   name_limit = list->namebuf + list->name_cap;
1190   namebuf = list->namebuf + list->name_used;
1191
1192   for (; cur < buffer->rlimit && namebuf < name_limit; )
1193     {
1194       unsigned int c = *namebuf++ = *cur++; /* Copy a single char.  */
1195
1196       if (c == '\0')
1197         null_count++;
1198       else if (c == terminator || is_vspace (c))
1199         {
1200           /* Needed for trigraph_replace and multiline string warning.  */
1201           buffer->cur = cur;
1202
1203           /* Scan for trigraphs before checking if backslash-escaped.  */
1204           if ((CPP_OPTION (pfile, trigraphs)
1205                || CPP_OPTION (pfile, warn_trigraphs))
1206               && namebuf - (list->namebuf + trigraphed) >= 3)
1207             {
1208               namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1209                                           namebuf);
1210               /* The test above guarantees trigraphed will be positive.  */
1211               trigraphed = namebuf - list->namebuf - 2;
1212             }
1213
1214           namebuf--;     /* Drop the newline / terminator from the name.  */
1215           if (is_vspace (c))
1216             {
1217               /* Drop a backslash newline, and continue. */
1218               if (namebuf[-1] == '\\')
1219                 {
1220                   handle_newline (cur, buffer->rlimit, c);
1221                   namebuf--;
1222                   continue;
1223                 }
1224
1225               cur--;
1226
1227               /* In assembly language, silently terminate strings of
1228                  either variety at end of line.  This is a kludge
1229                  around not knowing where comments are.  */
1230               if (CPP_OPTION (pfile, lang_asm))
1231                 goto out;
1232
1233               /* Character constants and header names may not extend
1234                  over multiple lines.  In Standard C, neither may
1235                  strings.  We accept multiline strings as an
1236                  extension.  (Even in directives - otherwise, glibc's
1237                  longlong.h breaks.)  */
1238               if (terminator != '"')
1239                 goto unterminated;
1240
1241               cur++;  /* Move forwards again.  */
1242
1243               if (pfile->multiline_string_line == 0)
1244                 {
1245                   pfile->multiline_string_line = token->line;
1246                   pfile->multiline_string_column = token->col;
1247                   if (CPP_PEDANTIC (pfile))
1248                     cpp_pedwarn (pfile, "multi-line string constant");
1249                 }
1250
1251               *namebuf++ = '\n';
1252               handle_newline (cur, buffer->rlimit, c);
1253             }
1254           else
1255             {
1256               unsigned char *temp;
1257
1258               /* An odd number of consecutive backslashes represents
1259                  an escaped terminator.  */
1260               temp = namebuf - 1;
1261               while (temp >= name->text && *temp == '\\')
1262                 temp--;
1263
1264               if ((namebuf - temp) & 1)
1265                 goto out;
1266               namebuf++;
1267             }
1268         }
1269     }
1270
1271   /* Run out of name space?  */
1272   if (cur < buffer->rlimit)
1273     {
1274       list->name_used = namebuf - list->namebuf;
1275       auto_expand_name_space (list);
1276       goto expanded;
1277     }
1278
1279   /* We may not have trigraph-replaced the input for this code path,
1280      but as the input is in error by being unterminated we don't
1281      bother.  Prevent warnings about no newlines at EOF.  */
1282   if (is_vspace (cur[-1]))
1283     cur--;
1284
1285  unterminated:
1286   cpp_error (pfile, "missing terminating %c character", (int) terminator);
1287
1288   if (terminator == '\"' && pfile->multiline_string_line != list->line
1289       && pfile->multiline_string_line != 0)
1290     {
1291       cpp_error_with_line (pfile, pfile->multiline_string_line,
1292                            pfile->multiline_string_column,
1293                            "possible start of unterminated string literal");
1294       pfile->multiline_string_line = 0;
1295     }
1296
1297  out:
1298   buffer->cur = cur;
1299   name->len = namebuf - name->text;
1300   list->name_used = namebuf - list->namebuf;
1301
1302   if (null_count > 0)
1303     cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1304                          : "null character preserved"));
1305 }
1306
1307 /* The character TYPE helps us distinguish comment types: '*' = C
1308    style, '/' = C++ style.  For code simplicity, the stored comment
1309    includes the comment start and any terminator.  */
1310
1311 #define COMMENT_START_LEN 2
1312 static void
1313 save_comment (list, token, from, len, type)
1314      cpp_toklist *list;
1315      cpp_token *token;
1316      const unsigned char *from;
1317      unsigned int len;
1318      unsigned int type;
1319 {
1320   unsigned char *buffer;
1321
1322   len += COMMENT_START_LEN;
1323
1324   if (list->name_used + len > list->name_cap)
1325     _cpp_expand_name_space (list, len);
1326
1327   INIT_TOKEN_STR (list, token);
1328   token->type = CPP_COMMENT;
1329   token->val.str.len = len;
1330
1331   buffer = list->namebuf + list->name_used;
1332   list->name_used += len;
1333
1334   /* Copy the comment.  */
1335   if (type == '*')
1336     {
1337       *buffer++ = '/';
1338       *buffer++ = '*';
1339     }
1340   else
1341     {
1342       *buffer++ = type;
1343       *buffer++ = type;
1344     }
1345   memcpy (buffer, from, len - COMMENT_START_LEN);
1346 }
1347
1348 /*
1349  *  The tokenizer's main loop.  Returns a token list, representing a
1350  *  logical line in the input file.  On EOF after some tokens have
1351  *  been processed, we return immediately.  Then in next call, or if
1352  *  EOF occurred at the beginning of a logical line, a single CPP_EOF
1353  *  token is placed in the list.
1354  *
1355  *  Implementation relies almost entirely on lookback, rather than
1356  *  looking forwards.  This means that tokenization requires just
1357  *  a single pass of the file, even in the presence of trigraphs and
1358  *  escaped newlines, providing significant performance benefits.
1359  *  Trigraph overhead is negligible if they are disabled, and low
1360  *  even when enabled.
1361  */
1362
1363 #define KNOWN_DIRECTIVE() (list->directive != 0)
1364 #define MIGHT_BE_DIRECTIVE() \
1365 (cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
1366
1367 static void
1368 lex_line (pfile, list)
1369      cpp_reader *pfile;
1370      cpp_toklist *list;
1371 {
1372   cpp_token *cur_token, *token_limit, *first;
1373   cpp_buffer *buffer = pfile->buffer;
1374   const unsigned char *cur = buffer->cur;
1375   unsigned char flags = 0;
1376   unsigned int first_token = list->tokens_used;
1377
1378   if (!(list->flags & LIST_OFFSET))
1379     (abort) ();
1380
1381   list->file = buffer->nominal_fname;
1382   list->line = CPP_BUF_LINE (buffer);
1383   pfile->col_adjust = 0;
1384   pfile->in_lex_line = 1;
1385   if (cur == buffer->buf)
1386     list->flags |= BEG_OF_FILE;
1387
1388  expanded:
1389   token_limit = list->tokens + list->tokens_cap;
1390   cur_token = list->tokens + list->tokens_used;
1391
1392   for (; cur < buffer->rlimit && cur_token < token_limit;)
1393     {
1394       unsigned char c;
1395
1396       /* Optimize non-vertical whitespace skipping; most tokens are
1397          probably separated by whitespace. (' ' '\t' '\v' '\f' '\0').  */
1398       c = *cur;
1399       if (is_nvspace (c))
1400         {
1401           buffer->cur = cur;
1402           skip_whitespace (pfile, (list->tokens[first_token].type == CPP_HASH
1403                                    && cur_token > &list->tokens[first_token]));
1404           cur = buffer->cur;
1405
1406           flags = PREV_WHITE;
1407           if (cur == buffer->rlimit)
1408             break;
1409           c = *cur;
1410         }
1411       cur++;
1412
1413       /* Initialize current token.  CPP_EOF will not be fixed up by
1414          expand_name_space.  */
1415       list->tokens_used = cur_token - list->tokens + 1;
1416       cur_token->type = CPP_EOF;
1417       cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1418       cur_token->line = CPP_BUF_LINE (buffer);
1419       cur_token->flags = flags;
1420       flags = 0;
1421
1422       switch (c)
1423         {
1424         case '0': case '1': case '2': case '3': case '4':
1425         case '5': case '6': case '7': case '8': case '9':
1426           {
1427             int prev_dot;
1428
1429             cur--;              /* Backup character.  */
1430             prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1431             if (prev_dot)
1432               cur_token--;
1433             INIT_TOKEN_STR (list, cur_token);
1434             /* Prepend an immediately previous CPP_DOT token.  */
1435             if (prev_dot)
1436               {
1437                 if (list->name_cap == list->name_used)
1438                   auto_expand_name_space (list);
1439
1440                 cur_token->val.str.len = 1;
1441                 list->namebuf[list->name_used++] = '.';
1442               }
1443
1444           continue_number:
1445             cur_token->type = CPP_NUMBER; /* Before parse_number.  */
1446             buffer->cur = cur;
1447             parse_number (pfile, list, &cur_token->val.str);
1448             cur = buffer->cur;
1449           }
1450           /* Check for # 123 form of #line.  */
1451           if (MIGHT_BE_DIRECTIVE ())
1452             list->directive = _cpp_check_linemarker (pfile, cur_token,
1453                                                      !(cur_token[-1].flags
1454                                                        & PREV_WHITE));
1455           cur_token++;
1456           break;
1457
1458         letter:
1459         case '_':
1460         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1461         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1462         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1463         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1464         case 'y': case 'z':
1465         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1466         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1467         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1468         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1469         case 'Y': case 'Z':
1470           cur--;                     /* Backup character.  */
1471           cur_token->val.node = 0;
1472           cur_token->type = CPP_NAME; /* Identifier, macro etc.  */
1473
1474         continue_name:
1475           cur = parse_name (pfile, cur_token, cur, buffer->rlimit);
1476
1477           if (MIGHT_BE_DIRECTIVE ())
1478             list->directive = _cpp_check_directive (pfile, cur_token,
1479                                                     !(list->tokens[0].flags
1480                                                       & PREV_WHITE));
1481           cur_token++;
1482           break;
1483
1484         case '\'':
1485         case '\"':
1486           cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
1487           /* Do we have a wide string?  */
1488           if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1489               && cur_token[-1].val.node == pfile->spec_nodes->n_L)
1490             BACKUP_TOKEN (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
1491
1492         do_parse_string:
1493           /* Here c is one of ' " or >.  */
1494           INIT_TOKEN_STR (list, cur_token);
1495           buffer->cur = cur;
1496           parse_string (pfile, list, cur_token, c);
1497           cur = buffer->cur;
1498           cur_token++;
1499           break;
1500
1501         case '/':
1502           cur_token->type = CPP_DIV;
1503           if (IMMED_TOKEN ())
1504             {
1505               if (PREV_TOKEN_TYPE == CPP_DIV)
1506                 {
1507                   /* We silently allow C++ comments in system headers,
1508                      irrespective of conformance mode, because lots of
1509                      broken systems do that and trying to clean it up
1510                      in fixincludes is a nightmare.  */
1511                   if (CPP_IN_SYSTEM_HEADER (pfile))
1512                     goto do_line_comment;
1513                   else if (CPP_OPTION (pfile, cplusplus_comments))
1514                     {
1515                       if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1516                           && ! buffer->warned_cplusplus_comments)
1517                         {
1518                           buffer->cur = cur;
1519                           cpp_pedwarn (pfile,
1520                              "C++ style comments are not allowed in ISO C89");
1521                           cpp_pedwarn (pfile,
1522                           "(this will be reported only once per input file)");
1523                           buffer->warned_cplusplus_comments = 1;
1524                         }
1525                     do_line_comment:
1526                       buffer->cur = cur;
1527 #if 0 /* Leave until new lexer in place.  */
1528                       if (cur[-2] != c)
1529                         cpp_warning (pfile,
1530                                      "comment start split across lines");
1531 #endif
1532                       if (skip_line_comment (pfile))
1533                         cpp_warning (pfile, "multi-line comment");
1534
1535                       /* Back-up to first '-' or '/'.  */
1536                       cur_token--;
1537                       if (!CPP_OPTION (pfile, discard_comments)
1538                           && (!KNOWN_DIRECTIVE()
1539                               || (list->directive->flags & COMMENTS)))
1540                         save_comment (list, cur_token++, cur,
1541                                       buffer->cur - cur, c);
1542                       else
1543                         flags = PREV_WHITE;
1544
1545                       cur = buffer->cur;
1546                       break;
1547                     }
1548                 }
1549             }
1550           cur_token++;
1551           break;
1552
1553         case '*':
1554           cur_token->type = CPP_MULT;
1555           if (IMMED_TOKEN ())
1556             {
1557               if (PREV_TOKEN_TYPE == CPP_DIV)
1558                 {
1559                   buffer->cur = cur;
1560 #if 0 /* Leave until new lexer in place.  */
1561                   if (cur[-2] != '/')
1562                     cpp_warning (pfile,
1563                                  "comment start '/*' split across lines");
1564 #endif
1565                   if (skip_block_comment (pfile))
1566                     cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1567                                          "unterminated comment");
1568 #if 0 /* Leave until new lexer in place.  */
1569                   else if (buffer->cur[-2] != '*')
1570                     cpp_warning (pfile,
1571                                  "comment end '*/' split across lines");
1572 #endif
1573                   /* Back up to opening '/'.  */
1574                   cur_token--;
1575                   if (!CPP_OPTION (pfile, discard_comments)
1576                       && (!KNOWN_DIRECTIVE()
1577                           || (list->directive->flags & COMMENTS)))
1578                     save_comment (list, cur_token++, cur,
1579                                   buffer->cur - cur, c);
1580                   else
1581                     flags = PREV_WHITE;
1582
1583                   cur = buffer->cur;
1584                   break;
1585                 }
1586               else if (CPP_OPTION (pfile, cplusplus))
1587                 {
1588                   /* In C++, there are .* and ->* operators.  */
1589                   if (PREV_TOKEN_TYPE == CPP_DEREF)
1590                     BACKUP_TOKEN (CPP_DEREF_STAR);
1591                   else if (PREV_TOKEN_TYPE == CPP_DOT)
1592                     BACKUP_TOKEN (CPP_DOT_STAR);
1593                 }
1594             }
1595           cur_token++;
1596           break;
1597
1598         case '\n':
1599         case '\r':
1600           handle_newline (cur, buffer->rlimit, c);
1601           if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
1602             {
1603               if (IMMED_TOKEN ())
1604                 {
1605                   /* Remove the escaped newline.  Then continue to process
1606                      any interrupted name or number.  */
1607                   cur_token--;
1608                   /* Backslash-newline may not be immediately followed by
1609                      EOF (C99 5.1.1.2).  */
1610                   if (cur >= buffer->rlimit)
1611                     {
1612                       cpp_pedwarn (pfile, "backslash-newline at end of file");
1613                       break;
1614                     }
1615                   if (IMMED_TOKEN ())
1616                     {
1617                       cur_token--;
1618                       if (cur_token->type == CPP_NAME)
1619                         goto continue_name;
1620                       else if (cur_token->type == CPP_NUMBER)
1621                         goto continue_number;
1622                       cur_token++;
1623                     }
1624                   /* Remember whitespace setting.  */
1625                   flags = cur_token->flags;
1626                   break;
1627                 }
1628               else
1629                 {
1630                   buffer->cur = cur;
1631                   cpp_warning (pfile,
1632                                "backslash and newline separated by space");
1633                 }
1634             }
1635           else if (MIGHT_BE_DIRECTIVE ())
1636             {
1637               /* "Null directive." C99 6.10.7: A preprocessing
1638                  directive of the form # <new-line> has no effect.
1639
1640                  But it is still a directive, and therefore disappears
1641                  from the output. */
1642               cur_token--;
1643               if (cur_token->flags & PREV_WHITE
1644                   && CPP_WTRADITIONAL (pfile))
1645                 cpp_warning (pfile, "K+R C ignores #\\n with the # indented");
1646             }
1647
1648           /* Skip vertical space until we have at least one token to
1649              return.  */
1650           if (cur_token != &list->tokens[first_token])
1651             goto out;
1652           list->line = CPP_BUF_LINE (buffer);
1653           break;
1654
1655         case '-':
1656           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
1657             REVISE_TOKEN (CPP_MINUS_MINUS);
1658           else
1659             PUSH_TOKEN (CPP_MINUS);
1660           break;
1661
1662         make_hash:
1663         case '#':
1664           /* The digraph flag checking ensures that ## and %:%:
1665              are interpreted as CPP_PASTE, but #%: and %:# are not.  */
1666           if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1667               && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1668             REVISE_TOKEN (CPP_PASTE);
1669           else
1670             PUSH_TOKEN (CPP_HASH);
1671           break;
1672
1673         case ':':
1674           cur_token->type = CPP_COLON;
1675           if (IMMED_TOKEN ())
1676             {
1677               if (PREV_TOKEN_TYPE == CPP_COLON
1678                   && CPP_OPTION (pfile, cplusplus))
1679                 BACKUP_TOKEN (CPP_SCOPE);
1680               else if (CPP_OPTION (pfile, digraphs))
1681                 {
1682                   /* Digraph: "<:" is a '['  */
1683                   if (PREV_TOKEN_TYPE == CPP_LESS)
1684                     BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1685                   /* Digraph: "%:" is a '#'  */
1686                   else if (PREV_TOKEN_TYPE == CPP_MOD)
1687                     {
1688                       (--cur_token)->flags |= DIGRAPH;
1689                       goto make_hash;
1690                     }
1691                 }
1692             }
1693           cur_token++;
1694           break;
1695
1696         case '&':
1697           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1698             REVISE_TOKEN (CPP_AND_AND);
1699           else
1700             PUSH_TOKEN (CPP_AND);
1701           break;
1702
1703         make_or:
1704         case '|':
1705           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1706             REVISE_TOKEN (CPP_OR_OR);
1707           else
1708             PUSH_TOKEN (CPP_OR);
1709           break;
1710
1711         case '+':
1712           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1713             REVISE_TOKEN (CPP_PLUS_PLUS);
1714           else
1715             PUSH_TOKEN (CPP_PLUS);
1716           break;
1717
1718         case '=':
1719             /* This relies on equidistance of "?=" and "?" tokens.  */
1720           if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1721             REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1722           else
1723             PUSH_TOKEN (CPP_EQ);
1724           break;
1725
1726         case '>':
1727           cur_token->type = CPP_GREATER;
1728           if (IMMED_TOKEN ())
1729             {
1730               if (PREV_TOKEN_TYPE == CPP_GREATER)
1731                 BACKUP_TOKEN (CPP_RSHIFT);
1732               else if (PREV_TOKEN_TYPE == CPP_MINUS)
1733                 BACKUP_TOKEN (CPP_DEREF);
1734               else if (CPP_OPTION (pfile, digraphs))
1735                 {
1736                   /* Digraph: ":>" is a ']'  */
1737                   if (PREV_TOKEN_TYPE == CPP_COLON)
1738                     BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1739                   /* Digraph: "%>" is a '}'  */
1740                   else if (PREV_TOKEN_TYPE == CPP_MOD)
1741                     BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1742                 }
1743             }
1744           cur_token++;
1745           break;
1746
1747         case '<':
1748           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1749             {
1750               REVISE_TOKEN (CPP_LSHIFT);
1751               break;
1752             }
1753           /* Is this the beginning of a header name?  */
1754           if (KNOWN_DIRECTIVE () && (list->directive->flags & INCL))
1755             {
1756               c = '>';  /* Terminator.  */
1757               cur_token->type = CPP_HEADER_NAME;
1758               goto do_parse_string;
1759             }
1760           PUSH_TOKEN (CPP_LESS);
1761           break;
1762
1763         case '%':
1764           /* Digraph: "<%" is a '{'  */
1765           cur_token->type = CPP_MOD;
1766           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS
1767               && CPP_OPTION (pfile, digraphs))
1768             BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1769           cur_token++;
1770           break;
1771
1772         case '?':
1773           if (cur + 1 < buffer->rlimit && *cur == '?'
1774               && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1775             {
1776               /* Handle trigraph.  */
1777               cur++;
1778               switch (*cur++)
1779                 {
1780                 case '(': goto make_open_square;
1781                 case ')': goto make_close_square;
1782                 case '<': goto make_open_brace;
1783                 case '>': goto make_close_brace;
1784                 case '=': goto make_hash;
1785                 case '!': goto make_or;
1786                 case '-': goto make_complement;
1787                 case '/': goto make_backslash;
1788                 case '\'': goto make_xor;
1789                 }
1790             }
1791           if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1792             {
1793               /* GNU C++ defines <? and >? operators.  */
1794               if (PREV_TOKEN_TYPE == CPP_LESS)
1795                 {
1796                   REVISE_TOKEN (CPP_MIN);
1797                   break;
1798                 }
1799               else if (PREV_TOKEN_TYPE == CPP_GREATER)
1800                 {
1801                   REVISE_TOKEN (CPP_MAX);
1802                   break;
1803                 }
1804             }
1805           PUSH_TOKEN (CPP_QUERY);
1806           break;
1807
1808         case '.':
1809           if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1810               && IMMED_TOKEN ()
1811               && !(cur_token[-1].flags & PREV_WHITE))
1812             {
1813               cur_token -= 2;
1814               PUSH_TOKEN (CPP_ELLIPSIS);
1815             }
1816           else
1817             PUSH_TOKEN (CPP_DOT);
1818           break;
1819
1820         make_complement:
1821         case '~': PUSH_TOKEN (CPP_COMPL); break;
1822         make_xor:
1823         case '^': PUSH_TOKEN (CPP_XOR); break;
1824         make_open_brace:
1825         case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1826         make_close_brace:
1827         case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1828         make_open_square:
1829         case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1830         make_close_square:
1831         case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1832         make_backslash:
1833         case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1834         case '!': PUSH_TOKEN (CPP_NOT); break;
1835         case ',': PUSH_TOKEN (CPP_COMMA); break;
1836         case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1837         case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1838         case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
1839
1840         case '$':
1841           if (CPP_OPTION (pfile, dollars_in_ident))
1842             goto letter;
1843           /* Fall through */
1844         default:
1845           cur_token->val.aux = c;
1846           PUSH_TOKEN (CPP_OTHER);
1847           break;
1848         }
1849     }
1850
1851   /* Run out of token space?  */
1852   if (cur_token == token_limit)
1853     {
1854       list->tokens_used = cur_token - list->tokens;
1855       _cpp_expand_token_space (list, 256);
1856       goto expanded;
1857     }
1858
1859   cur_token->flags = flags;
1860   if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1861     {
1862       if (cur > buffer->buf && !is_vspace (cur[-1]))
1863         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1864                                CPP_BUF_COLUMN (buffer, cur),
1865                                "no newline at end of file");
1866       cur_token++->type = CPP_EOF;
1867     }
1868
1869  out:
1870   /* All tokens are allocated, so the memory location is fixed.  */
1871   first = &list->tokens[first_token];
1872
1873   /* Don't complain about the null directive, nor directives in
1874      assembly source: we don't know where the comments are, and # may
1875      introduce assembler pseudo-ops.  Don't complain about invalid
1876      directives in skipped conditional groups (6.10 p4).  */
1877   if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1878       && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1879     {
1880       if (first[1].type == CPP_NAME)
1881         cpp_error (pfile, "invalid preprocessing directive #%.*s",
1882                    (int) first[1].val.node->length, first[1].val.node->name);
1883       else
1884         cpp_error (pfile, "invalid preprocessing directive");
1885     }
1886
1887   /* Put EOF at end of known directives.  This covers "directives do
1888      not extend beyond the end of the line (description 6.10 part 2)".  */
1889   if (KNOWN_DIRECTIVE () || !pfile->done_initializing)
1890     {
1891       pfile->first_directive_token = first;
1892       cur_token++->type = CPP_EOF;
1893     }
1894
1895   /* Directives, known or not, always start a new line.  */
1896   if (first_token == 0 || list->tokens[first_token].type == CPP_HASH)
1897     first->flags |= BOL;
1898   else
1899     /* 6.10.3.10: Within the sequence of preprocessing tokens making
1900        up the invocation of a function-like macro, new line is
1901        considered a normal white-space character.  */
1902     first->flags |= PREV_WHITE;
1903
1904   buffer->cur = cur;
1905   list->tokens_used = cur_token - list->tokens;
1906   pfile->in_lex_line = 0;
1907 }
1908
1909 /* Write the spelling of a token TOKEN, with any appropriate
1910    whitespace before it, to the token_buffer.  PREV is the previous
1911    token, which is used to determine if we need to shove in an extra
1912    space in order to avoid accidental token paste.  */
1913 static void
1914 output_token (pfile, token, prev)
1915      cpp_reader *pfile;
1916      const cpp_token *token, *prev;
1917 {
1918   int dummy;
1919
1920   if (token->col && (token->flags & BOL))
1921     {
1922       /* Supply enough whitespace to put this token in its original
1923          column.  Don't bother trying to reconstruct tabs; we can't
1924          get it right in general, and nothing ought to care.  (Yes,
1925          some things do care; the fault lies with them.)  */
1926       unsigned char *buffer;
1927       unsigned int spaces = token->col - 1;
1928
1929       CPP_RESERVE (pfile, token->col);
1930       buffer = pfile->limit;
1931
1932       while (spaces--)
1933         *buffer++ = ' ';
1934       pfile->limit = buffer;
1935     }
1936   else if (token->flags & PREV_WHITE)
1937     CPP_PUTC (pfile, ' ');
1938   else if (prev)
1939     {
1940       /* Check for and prevent accidental token pasting.  */
1941       if (can_paste (pfile, prev, token, &dummy) != CPP_EOF)
1942         CPP_PUTC (pfile, ' ');
1943       /* can_paste doesn't catch all the accidental pastes.
1944          Consider a + ++b - if there is not a space between the + and ++, it
1945          will be misparsed as a++ + b.  */
1946       else if ((prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1947                || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS))
1948         CPP_PUTC (pfile, ' ');
1949     }
1950
1951   CPP_RESERVE (pfile, TOKEN_LEN (token));
1952   pfile->limit = spell_token (pfile, token, pfile->limit);
1953 }
1954
1955 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1956    already contain the enough space to hold the token's spelling.
1957    Returns a pointer to the character after the last character
1958    written.  */
1959
1960 static unsigned char *
1961 spell_token (pfile, token, buffer)
1962      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1963      const cpp_token *token;
1964      unsigned char *buffer;
1965 {
1966   switch (token_spellings[token->type].type)
1967     {
1968     case SPELL_OPERATOR:
1969       {
1970         const unsigned char *spelling;
1971         unsigned char c;
1972
1973         if (token->flags & DIGRAPH)
1974           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1975         else
1976           spelling = token_spellings[token->type].spelling;
1977
1978         while ((c = *spelling++) != '\0')
1979           *buffer++ = c;
1980       }
1981       break;
1982
1983     case SPELL_IDENT:
1984       memcpy (buffer, token->val.node->name, token->val.node->length);
1985       buffer += token->val.node->length;
1986       break;
1987
1988     case SPELL_STRING:
1989       {
1990         if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
1991           *buffer++ = 'L';
1992
1993         if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1994           *buffer++ = '"';
1995         if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1996           *buffer++ = '\'';
1997
1998         memcpy (buffer, token->val.str.text, token->val.str.len);
1999         buffer += token->val.str.len;
2000
2001         if (token->type == CPP_STRING || token->type == CPP_WSTRING)
2002           *buffer++ = '"';
2003         if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
2004           *buffer++ = '\'';
2005       }
2006       break;
2007
2008     case SPELL_CHAR:
2009       *buffer++ = token->val.aux;
2010       break;
2011
2012     case SPELL_NONE:
2013       cpp_ice (pfile, "Unspellable token %s", token_names[token->type]);
2014       break;
2015     }
2016
2017   return buffer;
2018 }
2019
2020 /* Return the spelling of a token known to be an operator.
2021    Does not distinguish digraphs from their counterparts.  */
2022 const unsigned char *
2023 _cpp_spell_operator (type)
2024      enum cpp_ttype type;
2025 {
2026   if (token_spellings[type].type == SPELL_OPERATOR)
2027     return token_spellings[type].spelling;
2028   else
2029     return token_names[type];
2030 }
2031
2032
2033 /* Macro expansion algorithm.
2034
2035 Macro expansion is implemented by a single-pass algorithm; there are
2036 no rescan passes involved.  cpp_get_token expands just enough to be
2037 able to return a token to the caller, a consequence is that when it
2038 returns the preprocessor can be in a state of mid-expansion.  The
2039 algorithm does not work by fully expanding a macro invocation into
2040 some kind of token list, and then returning them one by one.
2041
2042 Our expansion state is recorded in a context stack.  We start out with
2043 a single context on the stack, let's call it base context.  This
2044 consists of the token list returned by lex_line that forms the next
2045 logical line in the source file.
2046
2047 The current level in the context stack is stored in the cur_context
2048 member of the cpp_reader structure.  The context it references keeps,
2049 amongst other things, a count of how many tokens form that context and
2050 our position within those tokens.
2051
2052 Fundamentally, calling cpp_get_token will return the next token from
2053 the current context.  If we're at the end of the current context, that
2054 context is popped from the stack first, unless it is the base context,
2055 in which case the next logical line is lexed from the source file.
2056
2057 However, before returning the token, if it is a CPP_NAME token
2058 _cpp_get_token checks to see if it is a macro and if it is enabled.
2059 Each time it encounters a macro name, it calls push_macro_context.
2060 This function checks that the macro should be expanded (with
2061 is_macro_enabled), and if so pushes a new macro context on the stack
2062 which becomes the current context.  It then loops back to read the
2063 first token of the macro context.
2064
2065 A macro context basically consists of the token list representing the
2066 macro's replacement list, which was saved in the hash table by
2067 save_macro_expansion when its #define statement was parsed.  If the
2068 macro is function-like, it also contains the tokens that form the
2069 arguments to the macro.  I say more about macro arguments below, but
2070 for now just saying that each argument is a set of pointers to tokens
2071 is enough.
2072
2073 When taking tokens from a macro context, we may get a CPP_MACRO_ARG
2074 token.  This represents an argument passed to the macro, with the
2075 argument number stored in the token's AUX field.  The argument should
2076 be substituted, this is achieved by pushing an "argument context".  An
2077 argument context is just refers to the tokens forming the argument,
2078 which are obtained directly from the macro context.  The STRINGIFY
2079 flag on a CPP_MACRO_ARG token indicates that the argument should be
2080 stringified.
2081
2082 Here's a few simple rules the context stack obeys:-
2083
2084   1) The lex_line token list is always context zero.
2085
2086   2) Context 1, if it exists, must be a macro context.
2087
2088   3) An argument context can only appear above a macro context.
2089
2090   4) A macro context can appear above the base context, another macro
2091   context, or an argument context.
2092
2093   5) These imply that the minimal level of an argument context is 2.
2094
2095 The only tricky thing left is ensuring that macros are enabled and
2096 disabled correctly.  The algorithm controls macro expansion by the
2097 level of the context a token is taken from in the context stack.  If a
2098 token is taken from a level equal to no_expand_level (a member of
2099 struct cpp_reader), no expansion is performed.
2100
2101 When popping a context off the stack, if no_expand_level equals the
2102 level of the popped context, it is reduced by one to match the new
2103 context level, so that expansion is still disabled.  It does not
2104 increase if a context is pushed, though.  It starts out life as
2105 UINT_MAX, which has the effect that initially macro expansion is
2106 enabled.  I explain how this mechanism works below.
2107
2108 The standard requires:-
2109
2110   1) Arguments to be fully expanded before substitution.
2111
2112   2) Stringified arguments to not be expanded, nor the tokens
2113   immediately surrounding a ## operator.
2114
2115   3) Continual rescanning until there are no more macros left to
2116   replace.
2117
2118   4) Once a macro has been expanded in stage 1) or 3), it cannot be
2119   expanded again during later rescans.  This prevents infinite
2120   recursion.
2121
2122 The first thing to observe is that stage 3) is mostly redundant.
2123 Since a macro is disabled once it has been expanded, how can a rescan
2124 find an unexpanded macro name?  There are only two cases where this is
2125 possible:-
2126
2127   a) If the macro name results from a token paste operation.
2128
2129   b) If the macro in question is a function-like macro that hasn't
2130   already been expanded because previously there was not the required
2131   '(' token immediately following it.  This is only possible when an
2132   argument is substituted, and after substitution the last token of
2133   the argument can bind with a parenthesis appearing in the tokens
2134   following the substitution.  Note that if the '(' appears within the
2135   argument, the ')' must too, as expanding macro arguments cannot
2136   "suck in" tokens outside the argument.
2137
2138 So we tackle this as follows.  When parsing the macro invocation for
2139 arguments, we record the tokens forming each argument as a list of
2140 pointers to those tokens.  We do not expand any tokens that are "raw",
2141 i.e. directly from the macro invocation, but other tokens that come
2142 from (nested) argument substitution are fully expanded.
2143
2144 This is achieved by setting the no_expand_level to that of the macro
2145 invocation.  A CPP_MACRO_ARG token never appears in the list of tokens
2146 forming an argument, because parse_args (indirectly) calls
2147 get_raw_token which automatically pushes argument contexts and traces
2148 into them.  Since these contexts are at a higher level than the
2149 no_expand_level, they get fully macro expanded.
2150
2151 "Raw" and non-raw tokens are separated in arguments by null pointers,
2152 with the policy that the initial state of an argument is raw.  If the
2153 first token is not raw, it should be preceded by a null pointer.  When
2154 tracing through the tokens of an argument context, each time
2155 get_raw_token encounters a null pointer, it toggles the flag
2156 CONTEXT_RAW.
2157
2158 This flag, when set, indicates to is_macro_disabled that we are
2159 reading raw tokens which should be macro-expanded.  Similarly, if
2160 clear, is_macro_disabled suppresses re-expansion.
2161
2162 It's probably time for an example.
2163
2164 #define hash #
2165 #define str(x) #x
2166 #define xstr(y) str(y hash)
2167 str(hash)                       // "hash"
2168 xstr(hash)                      // "# hash"
2169
2170 In the invocation of str, parse_args turns off macro expansion and so
2171 parses the argument as <hash>.  This is the only token (pointer)
2172 passed as the argument to str.  Since <hash> is raw there is no need
2173 for an initial null pointer.  stringify_arg is called from
2174 get_raw_token when tracing through the expansion of str, since the
2175 argument has the STRINGIFY flag set.  stringify_arg turns off
2176 macro_expansion by setting the no_expand_level to that of the argument
2177 context.  Thus it gets the token <hash> and stringifies it to "hash"
2178 correctly.
2179
2180 Similary xstr is passed <hash>.  However, when parse_args is parsing
2181 the invocation of str() in xstr's expansion, get_raw_token encounters
2182 a CPP_MACRO_ARG token for y.  Transparently to parse_args, it pushes
2183 an argument context, and enters the tokens of the argument,
2184 i.e. <hash>.  This is at a higher context level than parse_args
2185 disabled, and so is_macro_disabled permits expansion of it and a macro
2186 context is pushed on top of the argument context.  This contains the
2187 <#> token, and the end result is that <hash> is macro expanded.
2188 However, after popping off the argument context, the <hash> of xstr's
2189 expansion does not get macro expanded because we're back at the
2190 no_expand_level.  The end result is that the argument passed to str is
2191 <NULL> <#> <NULL> <hash>.  Note the nulls - policy is we start off
2192 raw, <#> is not raw, but then <hash> is.
2193
2194 */
2195
2196
2197 /* Free the storage allocated for macro arguments.  */
2198 static void
2199 free_macro_args (args)
2200      macro_args *args;
2201 {
2202   if (args->tokens)
2203     free ((PTR) args->tokens);
2204   free (args->ends);
2205   free (args);
2206 }
2207
2208 /* Determines if a macro has been already used (and is therefore
2209    disabled).  */
2210 static int
2211 is_macro_disabled (pfile, expansion, token)
2212      cpp_reader *pfile;
2213      const cpp_toklist *expansion;
2214      const cpp_token *token;
2215 {
2216   cpp_context *context = CURRENT_CONTEXT (pfile);
2217
2218   /* Don't expand anything if this file has already been preprocessed.  */
2219   if (CPP_OPTION (pfile, preprocessed))
2220     return 1;
2221
2222   /* Arguments on either side of ## are inserted in place without
2223      macro expansion (6.10.3.3.2).  Conceptually, any macro expansion
2224      occurs during a later rescan pass.  The effect is that we expand
2225      iff we would as part of the macro's expansion list, so we should
2226      drop to the macro's context.  */
2227   if (IS_ARG_CONTEXT (context))
2228     {
2229       if (token->flags & PASTED)
2230         context--;
2231       else if (!(context->flags & CONTEXT_RAW))
2232         return 1;
2233       else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2234         context--;
2235     }
2236
2237   /* Have we already used this macro?  */
2238   while (context->level > 0)
2239     {
2240       if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2241         return 1;
2242       /* Raw argument tokens are judged based on the token list they
2243          came from.  */
2244       if (context->flags & CONTEXT_RAW)
2245         context = pfile->contexts + context->level;
2246       else
2247         context--;
2248     }
2249
2250   /* Function-like macros may be disabled if the '(' is not in the
2251      current context.  We check this without disrupting the context
2252      stack.  */
2253   if (expansion->paramc >= 0)
2254     {
2255       const cpp_token *next;
2256       unsigned int prev_nme;
2257
2258       context = CURRENT_CONTEXT (pfile);
2259       /* Drop down any contexts we're at the end of: the '(' may
2260          appear in lower macro expansions, or in the rest of the file.  */
2261       while (context->posn == context->count && context > pfile->contexts)
2262         {
2263           context--;
2264           /* If we matched, we are disabled, as we appear in the
2265              expansion of each macro we meet.  */
2266           if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2267             return 1;
2268         }
2269
2270       prev_nme = pfile->no_expand_level;
2271       pfile->no_expand_level = context - pfile->contexts;
2272       next = _cpp_get_token (pfile);
2273       restore_macro_expansion (pfile, prev_nme);
2274       if (next->type != CPP_OPEN_PAREN)
2275         {
2276           _cpp_push_token (pfile, next);
2277           if (CPP_WTRADITIONAL (pfile))
2278             cpp_warning (pfile,
2279          "function macro %.*s must be used with arguments in traditional C",
2280                          (int) token->val.node->length, token->val.node->name);
2281           return 1;
2282         }
2283     }
2284
2285   return 0;
2286 }
2287
2288 /* Add a token to the set of tokens forming the arguments to the macro
2289    being parsed in parse_args.  */
2290 static void
2291 save_token (args, token)
2292      macro_args *args;
2293      const cpp_token *token;
2294 {
2295   if (args->used == args->capacity)
2296     {
2297       args->capacity += args->capacity + 100;
2298       args->tokens = (const cpp_token **)
2299         xrealloc ((PTR) args->tokens,
2300                   args->capacity * sizeof (const cpp_token *));
2301     }
2302   args->tokens[args->used++] = token;
2303 }
2304
2305 /* Take and save raw tokens until we finish one argument.  Empty
2306    arguments are saved as a single CPP_PLACEMARKER token.  */
2307 static const cpp_token *
2308 parse_arg (pfile, var_args, paren_context, args, pcount)
2309      cpp_reader *pfile;
2310      int var_args;
2311      unsigned int paren_context;
2312      macro_args *args;
2313      unsigned int *pcount;
2314 {
2315   const cpp_token *token;
2316   unsigned int paren = 0, count = 0;
2317   int raw, was_raw = 1;
2318
2319   for (count = 0;; count++)
2320     {
2321       token = _cpp_get_token (pfile);
2322
2323       switch (token->type)
2324         {
2325         default:
2326           break;
2327
2328         case CPP_OPEN_PAREN:
2329           paren++;
2330           break;
2331
2332         case CPP_CLOSE_PAREN:
2333           if (paren-- != 0)
2334             break;
2335           goto out;
2336
2337         case CPP_COMMA:
2338           /* Commas are not terminators within parantheses or var_args.  */
2339           if (paren || var_args)
2340             break;
2341           goto out;
2342
2343         case CPP_EOF:           /* Error reported by caller.  */
2344           goto out;
2345         }
2346
2347       raw = pfile->cur_context <= paren_context;
2348       if (raw != was_raw)
2349         {
2350           was_raw = raw;
2351           save_token (args, 0);
2352           count++;
2353         }
2354       save_token (args, token);
2355     }
2356
2357  out:
2358   if (count == 0)
2359     {
2360       /* Duplicate the placemarker.  Then we can set its flags and
2361          position and safely be using more than one.  */
2362       save_token (args, duplicate_token (pfile, &placemarker_token));
2363       count++;
2364     }
2365
2366   *pcount = count;
2367   return token;
2368 }
2369
2370 /* This macro returns true if the argument starting at offset O of arglist
2371    A is empty - that is, it's either a single PLACEMARKER token, or a null
2372    pointer followed by a PLACEMARKER.  */
2373
2374 #define empty_argument(A, O) \
2375  ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2376                  : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2377
2378 /* Parse the arguments making up a macro invocation.  Nested arguments
2379    are automatically macro expanded, but immediate macros are not
2380    expanded; this enables e.g. operator # to work correctly.  Returns
2381    non-zero on error.  */
2382 static int
2383 parse_args (pfile, hp, args)
2384      cpp_reader *pfile;
2385      cpp_hashnode *hp;
2386      macro_args *args;
2387 {
2388   const cpp_token *token;
2389   const cpp_toklist *macro;
2390   unsigned int total = 0;
2391   unsigned int paren_context = pfile->cur_context;
2392   int argc = 0;
2393
2394   macro = hp->value.expansion;
2395   do
2396     {
2397       unsigned int count;
2398
2399       token = parse_arg (pfile, (argc + 1 == macro->paramc
2400                                  && (macro->flags & VAR_ARGS)),
2401                          paren_context, args, &count);
2402       if (argc < macro->paramc)
2403         {
2404           total += count;
2405           args->ends[argc] = total;
2406         }
2407       argc++;
2408     }
2409   while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
2410
2411   if (token->type == CPP_EOF)
2412     {
2413       cpp_error (pfile, "unterminated invocation of macro \"%.*s\"",
2414                  hp->length, hp->name);
2415       return 1;
2416     }
2417   else if (argc < macro->paramc)
2418     {
2419       /* A rest argument is allowed to not appear in the invocation at all.
2420          e.g. #define debug(format, args...) ...
2421          debug("string");
2422          This is exactly the same as if the rest argument had received no
2423          tokens - debug("string",);  This extension is deprecated.  */
2424
2425       if (argc + 1 == macro->paramc && (macro->flags & GNU_REST_ARGS))
2426         {
2427           /* Duplicate the placemarker.  Then we can set its flags and
2428              position and safely be using more than one.  */
2429           save_token (args, duplicate_token (pfile, &placemarker_token));
2430           args->ends[argc] = total + 1;
2431           return 0;
2432         }
2433       else
2434         {
2435           cpp_error (pfile,
2436                      "insufficient arguments in invocation of macro \"%.*s\"",
2437                      hp->length, hp->name);
2438           return 1;
2439         }
2440     }
2441   /* An empty argument to an empty function-like macro is fine.  */
2442   else if (argc > macro->paramc
2443            && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2444     {
2445       cpp_error (pfile,
2446                  "too many arguments in invocation of macro \"%.*s\"",
2447                  hp->length, hp->name);
2448       return 1;
2449     }
2450
2451   return 0;
2452 }
2453
2454 /* Adds backslashes before all backslashes and double quotes appearing
2455    in strings.  Non-printable characters are converted to octal.  */
2456 static U_CHAR *
2457 quote_string (dest, src, len)
2458      U_CHAR *dest;
2459      const U_CHAR *src;
2460      unsigned int len;
2461 {
2462   while (len--)
2463     {
2464       U_CHAR c = *src++;
2465
2466       if (c == '\\' || c == '"')
2467         {
2468           *dest++ = '\\';
2469           *dest++ = c;
2470         }
2471       else
2472         {
2473           if (ISPRINT (c))
2474             *dest++ = c;
2475           else
2476             {
2477               sprintf ((char *) dest, "\\%03o", c);
2478               dest += 4;
2479             }
2480         }
2481     }
2482
2483   return dest;
2484 }
2485
2486 /* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2487    CPP_STRING token containing TEXT in quoted form.  */
2488 static cpp_token *
2489 make_string_token (token, text, len)
2490      cpp_token *token;
2491      const U_CHAR *text;
2492      unsigned int len;
2493 {
2494   U_CHAR *buf;
2495
2496   buf = (U_CHAR *) xmalloc (len * 4);
2497   token->type = CPP_STRING;
2498   token->flags = 0;
2499   token->val.str.text = buf;
2500   token->val.str.len = quote_string (buf, text, len) - buf;
2501   return token;
2502 }
2503
2504 /* Allocates and converts a temporary token to a CPP_NUMBER token,
2505    evaluating to NUMBER.  */
2506 static cpp_token *
2507 alloc_number_token (pfile, number)
2508      cpp_reader *pfile;
2509      int number;
2510 {
2511   cpp_token *result;
2512   char *buf;
2513
2514   result = get_temp_token (pfile);
2515   buf = xmalloc (20);
2516   sprintf (buf, "%d", number);
2517
2518   result->type = CPP_NUMBER;
2519   result->flags = 0;
2520   result->val.str.text = (U_CHAR *) buf;
2521   result->val.str.len = strlen (buf);
2522   return result;
2523 }
2524
2525 /* Returns a temporary token from the temporary token store of PFILE.  */
2526 static cpp_token *
2527 get_temp_token (pfile)
2528      cpp_reader *pfile;
2529 {
2530   if (pfile->temp_used == pfile->temp_alloced)
2531     {
2532       if (pfile->temp_used == pfile->temp_cap)
2533         {
2534           pfile->temp_cap += pfile->temp_cap + 20;
2535           pfile->temp_tokens = (cpp_token **) xrealloc
2536             (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2537         }
2538       pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2539         (sizeof (cpp_token));
2540     }
2541
2542   return pfile->temp_tokens[pfile->temp_used++];
2543 }
2544
2545 /* Release (not free) for re-use the temporary tokens of PFILE.  */
2546 static void
2547 release_temp_tokens (pfile)
2548      cpp_reader *pfile;
2549 {
2550   while (pfile->temp_used)
2551     {
2552       cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2553
2554       if (token_spellings[token->type].type == SPELL_STRING)
2555         {
2556           free ((char *) token->val.str.text);
2557           token->val.str.text = 0;
2558         }
2559     }
2560 }
2561
2562 /* Free all of PFILE's dynamically-allocated temporary tokens.  */
2563 void
2564 _cpp_free_temp_tokens (pfile)
2565      cpp_reader *pfile;
2566 {
2567   if (pfile->temp_tokens)
2568     {
2569       /* It is possible, though unlikely (looking for '(' of a funlike
2570          macro into EOF), that we haven't released the tokens yet.  */
2571       release_temp_tokens (pfile);
2572       while (pfile->temp_alloced)
2573         free (pfile->temp_tokens[--pfile->temp_alloced]);
2574       free (pfile->temp_tokens);
2575     }
2576
2577   if (pfile->date)
2578     {
2579       free ((char *) pfile->date->val.str.text);
2580       free (pfile->date);
2581       free ((char *) pfile->time->val.str.text);
2582       free (pfile->time);
2583     }
2584 }
2585
2586 /* Copy TOKEN into a temporary token from PFILE's store.  */
2587 static cpp_token *
2588 duplicate_token (pfile, token)
2589      cpp_reader *pfile;
2590      const cpp_token *token;
2591 {
2592   cpp_token *result = get_temp_token (pfile);
2593
2594   *result = *token;
2595   if (token_spellings[token->type].type == SPELL_STRING)
2596     {
2597       U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2598       memcpy (buff, token->val.str.text, token->val.str.len);
2599       result->val.str.text = buff;
2600     }
2601   return result;
2602 }
2603
2604 /* Determine whether two tokens can be pasted together, and if so,
2605    what the resulting token is.  Returns CPP_EOF if the tokens cannot
2606    be pasted, or the appropriate type for the merged token if they
2607    can.  */
2608 static enum cpp_ttype
2609 can_paste (pfile, token1, token2, digraph)
2610      cpp_reader * pfile;
2611      const cpp_token *token1, *token2;
2612      int* digraph;
2613 {
2614   enum cpp_ttype a = token1->type, b = token2->type;
2615   int cxx = CPP_OPTION (pfile, cplusplus);
2616
2617   if (a <= CPP_LAST_EQ && b == CPP_EQ)
2618     return a + (CPP_EQ_EQ - CPP_EQ);
2619
2620   switch (a)
2621     {
2622     case CPP_GREATER:
2623       if (b == a) return CPP_RSHIFT;
2624       if (b == CPP_QUERY && cxx)        return CPP_MAX;
2625       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
2626       break;
2627     case CPP_LESS:
2628       if (b == a) return CPP_LSHIFT;
2629       if (b == CPP_QUERY && cxx)        return CPP_MIN;
2630       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
2631       if (CPP_OPTION (pfile, digraphs))
2632         {
2633           if (b == CPP_COLON)
2634             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2635           if (b == CPP_MOD)
2636             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
2637         }
2638       break;
2639
2640     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
2641     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
2642     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
2643
2644     case CPP_MINUS:
2645       if (b == a)               return CPP_MINUS_MINUS;
2646       if (b == CPP_GREATER)     return CPP_DEREF;
2647       break;
2648     case CPP_COLON:
2649       if (b == a && cxx)        return CPP_SCOPE;
2650       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
2651         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2652       break;
2653
2654     case CPP_MOD:
2655       if (CPP_OPTION (pfile, digraphs))
2656         {
2657           if (b == CPP_GREATER)
2658             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
2659           if (b == CPP_COLON)
2660             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
2661         }
2662       break;
2663     case CPP_DEREF:
2664       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2665       break;
2666     case CPP_DOT:
2667       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2668       if (b == CPP_NUMBER)      return CPP_NUMBER;
2669       break;
2670
2671     case CPP_HASH:
2672       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2673         /* %:%: digraph */
2674         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2675       break;
2676
2677     case CPP_NAME:
2678       if (b == CPP_NAME)        return CPP_NAME;
2679       if (b == CPP_NUMBER
2680           && is_numstart(token2->val.str.text[0]))       return CPP_NAME;
2681       if (b == CPP_CHAR
2682           && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
2683       if (b == CPP_STRING
2684           && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
2685       break;
2686
2687     case CPP_NUMBER:
2688       if (b == CPP_NUMBER)      return CPP_NUMBER;
2689       if (b == CPP_NAME)        return CPP_NUMBER;
2690       if (b == CPP_DOT)         return CPP_NUMBER;
2691       /* Numbers cannot have length zero, so this is safe.  */
2692       if ((b == CPP_PLUS || b == CPP_MINUS)
2693           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
2694         return CPP_NUMBER;
2695       break;
2696
2697     default:
2698       break;
2699     }
2700
2701   return CPP_EOF;
2702 }
2703
2704 /* Check if TOKEN is to be ##-pasted with the token after it.  */
2705 static const cpp_token *
2706 maybe_paste_with_next (pfile, token)
2707      cpp_reader *pfile;
2708      const cpp_token *token;
2709 {
2710   cpp_token *pasted;
2711   const cpp_token *second;
2712   cpp_context *context = CURRENT_CONTEXT (pfile);
2713
2714   /* Is this token on the LHS of ## ? */
2715
2716   while ((token->flags & PASTE_LEFT)
2717          || ((context->flags & CONTEXT_PASTEL)
2718              && context->posn == context->count))
2719     {
2720       /* Suppress macro expansion for next token, but don't conflict
2721          with the other method of suppression.  If it is an argument,
2722          macro expansion within the argument will still occur.  */
2723       pfile->paste_level = pfile->cur_context;
2724       second = _cpp_get_token (pfile);
2725       pfile->paste_level = 0;
2726
2727       /* Ignore placemarker argument tokens (cannot be from an empty
2728          macro since macros are not expanded).  */
2729       if (token->type == CPP_PLACEMARKER)
2730         pasted = duplicate_token (pfile, second);
2731       else if (second->type == CPP_PLACEMARKER)
2732         {
2733           cpp_context *mac_context = CURRENT_CONTEXT (pfile) - 1;
2734           /* GCC has special extended semantics for a ## b where b is
2735              a varargs parameter: a disappears if b consists of no
2736              tokens.  This extension is deprecated.  */
2737           if ((mac_context->u.list->flags & GNU_REST_ARGS)
2738               && (mac_context->u.list->tokens[mac_context->posn-1].val.aux + 1
2739                   == (unsigned) mac_context->u.list->paramc))
2740             {
2741               cpp_warning (pfile, "deprecated GNU ## extension used");
2742               pasted = duplicate_token (pfile, second);
2743             }
2744           else
2745             pasted = duplicate_token (pfile, token);
2746         }
2747       else
2748         {
2749           int digraph = 0;
2750           enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
2751
2752           if (type == CPP_EOF)
2753             {
2754               if (CPP_OPTION (pfile, warn_paste))
2755                 cpp_warning (pfile,
2756                         "pasting would not give a valid preprocessing token");
2757               _cpp_push_token (pfile, second);
2758               return token;
2759             }
2760
2761           if (type == CPP_NAME || type == CPP_NUMBER)
2762             {
2763               /* Join spellings.  */
2764               U_CHAR *buf, *end;
2765
2766               pasted = get_temp_token (pfile);
2767               buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2768               end = spell_token (pfile, token, buf);
2769               end = spell_token (pfile, second, end);
2770               *end = '\0';
2771
2772               if (type == CPP_NAME)
2773                 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2774               else
2775                 {
2776                   pasted->val.str.text = uxstrdup (buf);
2777                   pasted->val.str.len = end - buf;
2778                 }
2779             }
2780           else if (type == CPP_WCHAR || type == CPP_WSTRING)
2781             pasted = duplicate_token (pfile, second);
2782           else
2783             {
2784               pasted = get_temp_token (pfile);
2785               pasted->val.integer = 0;
2786             }
2787
2788           pasted->type = type;
2789           pasted->flags = digraph ? DIGRAPH : 0;
2790         }
2791
2792       /* The pasted token gets the whitespace flags and position of the
2793          first token, the PASTE_LEFT flag of the second token, plus the
2794          PASTED flag to indicate it is the result of a paste.  However, we
2795          want to preserve the DIGRAPH flag.  */
2796       pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2797       pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2798                         | (second->flags & PASTE_LEFT) | PASTED);
2799       pasted->col = token->col;
2800       pasted->line = token->line;
2801
2802       /* See if there is another token to be pasted onto the one we just
2803          constructed.  */
2804       token = pasted;
2805       context = CURRENT_CONTEXT (pfile);
2806       /* and loop */
2807     }
2808   return token;
2809 }
2810
2811 /* Convert a token sequence to a single string token according to the
2812    rules of the ISO C #-operator.  */
2813 #define INIT_SIZE 200
2814 static cpp_token *
2815 stringify_arg (pfile, token)
2816      cpp_reader *pfile;
2817      const cpp_token *token;
2818 {
2819   cpp_token *result;
2820   unsigned char *main_buf;
2821   unsigned int prev_value, backslash_count = 0;
2822   unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2823
2824   push_arg_context (pfile, token);
2825   prev_value  = prevent_macro_expansion (pfile);
2826   main_buf = (unsigned char *) xmalloc (buf_cap);
2827
2828   result = get_temp_token (pfile);
2829   ASSIGN_FLAGS_AND_POS (result, token);
2830
2831   for (; (token = _cpp_get_token (pfile))->type != CPP_EOF; )
2832     {
2833       int escape;
2834       unsigned char *buf;
2835       unsigned int len = TOKEN_LEN (token);
2836
2837       escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2838                 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2839       if (escape)
2840         len *= 4 + 1;
2841
2842       if (buf_used + len > buf_cap)
2843         {
2844           buf_cap = buf_used + len + INIT_SIZE;
2845           main_buf = xrealloc (main_buf, buf_cap);
2846         }
2847
2848       if (whitespace && (token->flags & PREV_WHITE))
2849         main_buf[buf_used++] = ' ';
2850
2851       if (escape)
2852         buf = (unsigned char *) xmalloc (len);
2853       else
2854         buf = main_buf + buf_used;
2855
2856       len = spell_token (pfile, token, buf) - buf;
2857       if (escape)
2858         {
2859           buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2860           free (buf);
2861         }
2862       else
2863         buf_used += len;
2864
2865       whitespace = 1;
2866       if (token->type == CPP_BACKSLASH)
2867         backslash_count++;
2868       else
2869         backslash_count = 0;
2870     }
2871
2872   /* Ignore the final \ of invalid string literals.  */
2873   if (backslash_count & 1)
2874     {
2875       cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2876       buf_used--;
2877     }
2878
2879   result->type = CPP_STRING;
2880   result->val.str.text = main_buf;
2881   result->val.str.len = buf_used;
2882   restore_macro_expansion (pfile, prev_value);
2883   return result;
2884 }
2885
2886 /* Allocate more room on the context stack of PFILE.  */
2887 static void
2888 expand_context_stack (pfile)
2889      cpp_reader *pfile;
2890 {
2891   pfile->context_cap += pfile->context_cap + 20;
2892   pfile->contexts = (cpp_context *)
2893     xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2894 }
2895
2896 /* Push the context of macro NODE onto the context stack.  TOKEN is
2897    the CPP_NAME token invoking the macro.  */
2898 static int
2899 push_macro_context (pfile, token)
2900      cpp_reader *pfile;
2901      const cpp_token *token;
2902 {
2903   unsigned char orig_flags;
2904   macro_args *args;
2905   cpp_context *context;
2906   cpp_hashnode *node = token->val.node;
2907
2908   /* Token's flags may change when parsing args containing a nested
2909      invocation of this macro.  */
2910   orig_flags = token->flags & (PREV_WHITE | BOL);
2911   args = 0;
2912   if (node->value.expansion->paramc >= 0)
2913     {
2914       unsigned int error, prev_nme;
2915
2916       /* Allocate room for the argument contexts, and parse them.  */
2917       args  = (macro_args *) xmalloc (sizeof (macro_args));
2918       args->ends = (unsigned int *)
2919         xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2920       args->tokens = 0;
2921       args->capacity = 0;
2922       args->used = 0;
2923       args->level = pfile->cur_context;
2924
2925       prev_nme = prevent_macro_expansion (pfile);
2926       pfile->args = args;
2927       error = parse_args (pfile, node, args);
2928       pfile->args = 0;
2929       restore_macro_expansion (pfile, prev_nme);
2930       if (error)
2931         {
2932           free_macro_args (args);
2933           return 1;
2934         }
2935     }
2936
2937   /* Now push its context.  */
2938   pfile->cur_context++;
2939   if (pfile->cur_context == pfile->context_cap)
2940     expand_context_stack (pfile);
2941
2942   context = CURRENT_CONTEXT (pfile);
2943   context->u.list = node->value.expansion;
2944   context->args = args;
2945   context->posn = 0;
2946   context->count = context->u.list->tokens_used;
2947   context->level = pfile->cur_context;
2948   context->flags = 0;
2949   context->pushed_token = 0;
2950
2951   /* Set the flags of the first token.  We know there must
2952      be one, empty macros are a single placemarker token.  */
2953   MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2954
2955   return 0;
2956 }
2957
2958 /* Push an argument to the current macro onto the context stack.
2959    TOKEN is the MACRO_ARG token representing the argument expansion.  */
2960 static void
2961 push_arg_context (pfile, token)
2962      cpp_reader *pfile;
2963      const cpp_token *token;
2964 {
2965   cpp_context *context;
2966   macro_args *args;
2967
2968   pfile->cur_context++;
2969   if (pfile->cur_context == pfile->context_cap)
2970       expand_context_stack (pfile);
2971
2972   context = CURRENT_CONTEXT (pfile);
2973   args = context[-1].args;
2974
2975   context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2976   context->u.arg = args->tokens + context->count;
2977   context->count = args->ends[token->val.aux] - context->count;
2978   context->args = 0;
2979   context->posn = 0;
2980   context->level = args->level;
2981   context->flags = CONTEXT_ARG | CONTEXT_RAW;
2982   context->pushed_token = 0;
2983
2984   /* Set the flags of the first token.  There is one.  */
2985   {
2986     const cpp_token *first = context->u.arg[0];
2987     if (!first)
2988       first = context->u.arg[1];
2989
2990     MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2991                           token->flags & (PREV_WHITE | BOL));
2992   }
2993
2994   if (token->flags & PASTE_LEFT)
2995     context->flags |= CONTEXT_PASTEL;
2996   if (pfile->paste_level)
2997     context->flags |= CONTEXT_PASTER;
2998 }
2999
3000 /* "Unget" a token.  It is effectively inserted in the token queue and
3001    will be returned by the next call to get_raw_token.  */
3002 void
3003 _cpp_push_token (pfile, token)
3004      cpp_reader *pfile;
3005      const cpp_token *token;
3006 {
3007   cpp_context *context = CURRENT_CONTEXT (pfile);
3008   if (context->pushed_token)
3009     cpp_ice (pfile, "two tokens pushed in a row");
3010   if (token->type != CPP_EOF)
3011     context->pushed_token = token;
3012   /* Don't push back a directive's CPP_EOF, step back instead.  */
3013   else if (pfile->cur_context == 0)
3014     pfile->contexts[0].posn--;
3015 }
3016
3017 /* Handle a preprocessing directive.  TOKEN is the CPP_HASH token
3018    introducing the directive.  */
3019 static void
3020 process_directive (pfile, token)
3021      cpp_reader *pfile;
3022      const cpp_token *token;
3023 {
3024   const struct directive *d = pfile->token_list.directive;
3025   int prev_nme = 0;
3026
3027   /* Skip over the directive name.  */
3028   if (token[1].type == CPP_NAME)
3029     _cpp_get_raw_token (pfile);
3030   else if (token[1].type != CPP_NUMBER)
3031     cpp_ice (pfile, "directive begins with %s?!",
3032              token_names[token[1].type]);
3033
3034   /* Flush pending tokens at this point, in case the directive produces
3035      output.  XXX Directive output won't be visible to a direct caller of
3036      cpp_get_token.  */
3037   if (pfile->printer && CPP_WRITTEN (pfile) - pfile->printer->written)
3038     cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
3039
3040   if (! (d->flags & EXPAND))
3041     prev_nme = prevent_macro_expansion (pfile);
3042   (void) (*d->handler) (pfile);
3043   if (! (d->flags & EXPAND))
3044     restore_macro_expansion (pfile, prev_nme);
3045   _cpp_skip_rest_of_line (pfile);
3046 }
3047
3048 /* The external interface to return the next token.  All macro
3049    expansion and directive processing is handled internally, the
3050    caller only ever sees the output after preprocessing.  */
3051 const cpp_token *
3052 cpp_get_token (pfile)
3053      cpp_reader *pfile;
3054 {
3055   const cpp_token *token;
3056   /* Loop till we hit a non-directive, non-placemarker token.  */
3057   for (;;)
3058     {
3059       token = _cpp_get_token (pfile);
3060
3061       if (token->type == CPP_PLACEMARKER)
3062         continue;
3063
3064       if (token->type == CPP_HASH && token->flags & BOL
3065           && pfile->token_list.directive)
3066         {
3067           process_directive (pfile, token);
3068           continue;
3069         }
3070
3071       return token;
3072     }
3073 }
3074
3075 /* The internal interface to return the next token.  There are two
3076    differences between the internal and external interfaces: the
3077    internal interface may return a PLACEMARKER token, and it does not
3078    process directives.  */
3079 const cpp_token *
3080 _cpp_get_token (pfile)
3081      cpp_reader *pfile;
3082 {
3083   const cpp_token *token;
3084   cpp_hashnode *node;
3085
3086   /* Loop until we hit a non-macro token.  */
3087   for (;;)
3088     {
3089       token = get_raw_token (pfile);
3090
3091       /* Short circuit EOF. */
3092       if (token->type == CPP_EOF)
3093         return token;
3094
3095       /* If we are skipping... */
3096       if (pfile->skipping)
3097         {
3098           /* we still have to process directives,  */
3099           if (pfile->token_list.directive)
3100             return token;
3101
3102           /* but everything else is ignored.  */
3103           _cpp_skip_rest_of_line (pfile);
3104           continue;
3105         }
3106
3107       /* If there's a potential control macro and we get here, then that
3108          #ifndef didn't cover the entire file and its argument shouldn't
3109          be taken as a control macro.  */
3110       pfile->potential_control_macro = 0;
3111
3112       /* See if there's a token to paste with this one.  */
3113       if (!pfile->paste_level)
3114         token = maybe_paste_with_next (pfile, token);
3115
3116       /* If it isn't a macro, return it now.  */
3117       if (token->type != CPP_NAME
3118           || token->val.node->type == T_VOID)
3119         return token;
3120
3121       /* Is macro expansion disabled in general?  */
3122       if (pfile->no_expand_level == pfile->cur_context || pfile->paste_level)
3123         return token;
3124
3125       node = token->val.node;
3126       if (node->type != T_MACRO)
3127         return special_symbol (pfile, node, token);
3128
3129       if (is_macro_disabled (pfile, node->value.expansion, token))
3130         return token;
3131
3132       if (pfile->cur_context > CPP_STACK_MAX)
3133         {
3134           cpp_error (pfile, "macros nested too deep invoking '%s'", node->name);
3135           return token;
3136         }
3137
3138       if (push_macro_context (pfile, token))
3139         return token;
3140       /* else loop */
3141     }
3142 }
3143
3144 /* Returns the next raw token, i.e. without performing macro
3145    expansion.  Argument contexts are automatically entered.  */
3146 static const cpp_token *
3147 get_raw_token (pfile)
3148      cpp_reader *pfile;
3149 {
3150   const cpp_token *result;
3151   cpp_context *context;
3152
3153   for (;;)
3154     {
3155       context = CURRENT_CONTEXT (pfile);
3156       if (context->pushed_token)
3157         {
3158           result = context->pushed_token;
3159           context->pushed_token = 0;
3160         }
3161       else if (context->posn == context->count)
3162         {
3163           if (pop_context (pfile))
3164             return &eof_token;
3165           continue;
3166         }
3167       else
3168         {
3169           if (IS_ARG_CONTEXT (context))
3170             {
3171               result = context->u.arg[context->posn++];
3172               if (result == 0)
3173                 {
3174                   context->flags ^= CONTEXT_RAW;
3175                   result = context->u.arg[context->posn++];
3176                 }
3177               return result;    /* Cannot be a CPP_MACRO_ARG */
3178             }
3179           result = &context->u.list->tokens[context->posn++];
3180         }
3181
3182       if (result->type != CPP_MACRO_ARG)
3183         return result;
3184
3185       if (result->flags & STRINGIFY_ARG)
3186         return stringify_arg (pfile, result);
3187
3188       push_arg_context (pfile, result);
3189     }
3190 }
3191
3192 /* Internal interface to get the token without macro expanding.  */
3193 const cpp_token *
3194 _cpp_get_raw_token (pfile)
3195      cpp_reader *pfile;
3196 {
3197   int prev_nme = prevent_macro_expansion (pfile);
3198   const cpp_token *result = _cpp_get_token (pfile);
3199   restore_macro_expansion (pfile, prev_nme);
3200   return result;
3201 }
3202
3203 /* A thin wrapper to lex_line.  CLEAR is non-zero if the current token
3204    list should be overwritten, or zero if we need to append
3205    (typically, if we are within the arguments to a macro, or looking
3206    for the '(' to start a function-like macro invocation).  */
3207 static int
3208 lex_next (pfile, clear)
3209      cpp_reader *pfile;
3210      int clear;
3211 {
3212   cpp_toklist *list = &pfile->token_list;
3213   const cpp_token *old_list = list->tokens;
3214   unsigned int old_used = list->tokens_used;
3215
3216   if (clear)
3217     {
3218       /* Release all temporary tokens.  */
3219       _cpp_clear_toklist (list);
3220       pfile->contexts[0].posn = 0;
3221       if (pfile->temp_used)
3222         release_temp_tokens (pfile);
3223     }
3224
3225   lex_line (pfile, list);
3226   pfile->contexts[0].count = list->tokens_used;
3227
3228   if (!clear && pfile->args)
3229     {
3230       /* Fix up argument token pointers.  */
3231       if (old_list != list->tokens)
3232         {
3233           unsigned int i;
3234
3235           for (i = 0; i < pfile->args->used; i++)
3236             {
3237               const cpp_token *token = pfile->args->tokens[i];
3238               if (token >= old_list && token < old_list + old_used)
3239                 pfile->args->tokens[i] = (const cpp_token *)
3240                 ((char *) token + ((char *) list->tokens - (char *) old_list));
3241             }
3242         }
3243
3244       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3245          tokens within the list of arguments that would otherwise act as
3246          preprocessing directives, the behavior is undefined.
3247
3248          This implementation will report a hard error and treat the
3249          'sequence of preprocessing tokens' as part of the macro argument,
3250          not a directive.
3251
3252          Note if pfile->args == 0, we're OK since we're only inside a
3253          macro argument after a '('.  */
3254       if (list->directive)
3255         {
3256           cpp_error_with_line (pfile, list->tokens[old_used].line,
3257                                list->tokens[old_used].col,
3258                                "#%s may not be used inside a macro argument",
3259                                list->directive->name);
3260           return 1;
3261         }
3262     }
3263
3264   return 0;
3265 }
3266
3267 /* Pops a context off the context stack.  If we're at the bottom, lexes
3268    the next logical line.  Returns EOF if we're at the end of the
3269    argument list to the # operator, or if it is illegal to "overflow"
3270    into the rest of the file (e.g. 6.10.3.1.1).  */
3271 static int
3272 pop_context (pfile)
3273      cpp_reader *pfile;
3274 {
3275   cpp_context *context;
3276
3277   if (pfile->cur_context == 0)
3278     {
3279       /* If we are currently processing a directive, do not advance.  6.10
3280          paragraph 2: A new-line character ends the directive even if it
3281          occurs within what would otherwise be an invocation of a
3282          function-like macro.  */
3283       if (pfile->token_list.directive)
3284         return 1;
3285
3286       return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3287     }
3288
3289   /* Argument contexts, when parsing args or handling # operator
3290      return CPP_EOF at the end.  */
3291   context = CURRENT_CONTEXT (pfile);
3292   if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3293     return 1;
3294
3295   /* Free resources when leaving macro contexts.  */
3296   if (context->args)
3297     free_macro_args (context->args);
3298
3299   if (pfile->cur_context == pfile->no_expand_level)
3300     pfile->no_expand_level--;
3301   pfile->cur_context--;
3302
3303   return 0;
3304 }
3305
3306 /* Turn off macro expansion at the current context level.  */
3307 static unsigned int
3308 prevent_macro_expansion (pfile)
3309      cpp_reader *pfile;
3310 {
3311   unsigned int prev_value = pfile->no_expand_level;
3312   pfile->no_expand_level = pfile->cur_context;
3313   return prev_value;
3314 }
3315
3316 /* Restore macro expansion to its previous state.  */
3317 static void
3318 restore_macro_expansion (pfile, prev_value)
3319      cpp_reader *pfile;
3320      unsigned int prev_value;
3321 {
3322   pfile->no_expand_level = prev_value;
3323 }
3324
3325 /* Used by cpperror.c to obtain the correct line and column to report
3326    in a diagnostic.  */
3327 unsigned int
3328 _cpp_get_line (pfile, pcol)
3329      cpp_reader *pfile;
3330      unsigned int *pcol;
3331 {
3332   unsigned int index;
3333   const cpp_token *cur_token;
3334
3335   if (pfile->in_lex_line)
3336     index = pfile->token_list.tokens_used;
3337   else
3338     index = pfile->contexts[0].posn;
3339
3340   cur_token = &pfile->token_list.tokens[index - 1];
3341   if (pcol)
3342     *pcol = cur_token->col;
3343   return cur_token->line;
3344 }
3345
3346 #define DSC(str) (const U_CHAR *)str, sizeof str - 1
3347 static const char * const monthnames[] =
3348 {
3349   "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3350   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3351 };
3352
3353 /* Handle builtin macros like __FILE__.  */
3354 static const cpp_token *
3355 special_symbol (pfile, node, token)
3356      cpp_reader *pfile;
3357      cpp_hashnode *node;
3358      const cpp_token *token;
3359 {
3360   cpp_token *result;
3361   cpp_buffer *ip;
3362
3363   switch (node->type)
3364     {
3365     case T_FILE:
3366     case T_BASE_FILE:
3367       {
3368         const char *file;
3369
3370         ip = CPP_BUFFER (pfile);
3371         if (ip == 0)
3372           file = "";
3373         else
3374           {
3375             if (node->type == T_BASE_FILE)
3376               while (CPP_PREV_BUFFER (ip) != NULL)
3377                 ip = CPP_PREV_BUFFER (ip);
3378
3379             file = ip->nominal_fname;
3380           }
3381         result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3382                                     strlen (file));
3383       }
3384       break;
3385
3386     case T_INCLUDE_LEVEL:
3387       /* pfile->include_depth counts the primary source as level 1,
3388          but historically __INCLUDE_DEPTH__ has called the primary
3389          source level 0.  */
3390       result = alloc_number_token (pfile, pfile->include_depth - 1);
3391       break;
3392
3393     case T_SPECLINE:
3394       /* If __LINE__ is embedded in a macro, it must expand to the
3395          line of the macro's invocation, not its definition.
3396          Otherwise things like assert() will not work properly.  */
3397       result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3398       break;
3399
3400     case T_STDC:
3401       {
3402         int stdc = 1;
3403
3404 #ifdef STDC_0_IN_SYSTEM_HEADERS
3405         if (CPP_IN_SYSTEM_HEADER (pfile)
3406             && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
3407           stdc = 0;
3408 #endif
3409         result = alloc_number_token (pfile, stdc);
3410       }
3411       break;
3412
3413     case T_DATE:
3414     case T_TIME:
3415       if (pfile->date == 0)
3416         {
3417           /* Allocate __DATE__ and __TIME__ from permanent storage,
3418              and save them in pfile so we don't have to do this again.
3419              We don't generate these strings at init time because
3420              time() and localtime() are very slow on some systems.  */
3421           time_t tt = time (NULL);
3422           struct tm *tb = localtime (&tt);
3423
3424           pfile->date = make_string_token
3425             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3426           pfile->time = make_string_token
3427             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3428
3429           sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
3430                    monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3431           sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
3432                    tb->tm_hour, tb->tm_min, tb->tm_sec);
3433         }
3434       result = node->type == T_DATE ? pfile->date: pfile->time;
3435       break;
3436
3437     case T_POISON:
3438       cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
3439       return token;
3440
3441     default:
3442       cpp_ice (pfile, "invalid special hash type");
3443       return token;
3444     }
3445
3446   ASSIGN_FLAGS_AND_POS (result, token);
3447   return result;
3448 }
3449 #undef DSC
3450
3451 /* Dump the original user's spelling of argument index ARG_NO to the
3452    macro whose expansion is LIST.  */
3453 static void
3454 dump_param_spelling (pfile, list, arg_no)
3455      cpp_reader *pfile;
3456      const cpp_toklist *list;
3457      unsigned int arg_no;
3458 {
3459   const U_CHAR *param = list->namebuf;
3460
3461   while (arg_no--)
3462     param += ustrlen (param) + 1;
3463   CPP_PUTS (pfile, param, ustrlen (param));
3464 }
3465
3466 /* Dump a token list to the output.  */
3467 void
3468 _cpp_dump_list (pfile, list, token, flush)
3469      cpp_reader *pfile;
3470      const cpp_toklist *list;
3471      const cpp_token *token;
3472      int flush;
3473 {
3474   const cpp_token *limit = list->tokens + list->tokens_used;
3475   const cpp_token *prev = 0;
3476
3477   /* Avoid the CPP_EOF.  */
3478   if (list->directive)
3479     limit--;
3480
3481   while (token < limit)
3482     {
3483       if (token->type == CPP_MACRO_ARG)
3484         {
3485           if (token->flags & PREV_WHITE)
3486             CPP_PUTC (pfile, ' ');
3487           if (token->flags & STRINGIFY_ARG)
3488             CPP_PUTC (pfile, '#');
3489           dump_param_spelling (pfile, list, token->val.aux);
3490         }
3491       else
3492         output_token (pfile, token, prev);
3493       if (token->flags & PASTE_LEFT)
3494         CPP_PUTS (pfile, " ##", 3);
3495       prev = token;
3496       token++;
3497     }
3498
3499   if (flush && pfile->printer)
3500     cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
3501 }
3502
3503 /* Allocate pfile->input_buffer, and initialize trigraph_map[]
3504    if it hasn't happened already.  */
3505
3506 void
3507 _cpp_init_input_buffer (pfile)
3508      cpp_reader *pfile;
3509 {
3510   cpp_context *base;
3511
3512   init_trigraph_map ();
3513   _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3514   pfile->no_expand_level = UINT_MAX;
3515   pfile->context_cap = 20;
3516   pfile->cur_context = 0;
3517
3518   pfile->contexts = (cpp_context *)
3519     xmalloc (pfile->context_cap * sizeof (cpp_context));
3520
3521   /* Clear the base context.  */
3522   base = &pfile->contexts[0];
3523   base->u.list = &pfile->token_list;
3524   base->posn = 0;
3525   base->count = 0;
3526   base->args = 0;
3527   base->level = 0;
3528   base->flags = 0;
3529   base->pushed_token = 0;
3530 }
3531
3532 /* Moves to the end of the directive line, popping contexts as
3533    necessary.  */
3534 void
3535 _cpp_skip_rest_of_line (pfile)
3536      cpp_reader *pfile;
3537 {
3538   /* Discard all stacked contexts.  */
3539   int i;
3540   for (i = pfile->cur_context; i > 0; i--)
3541     if (pfile->contexts[i].args)
3542       free_macro_args (pfile->contexts[i].args);
3543
3544   if (pfile->no_expand_level <= pfile->cur_context)
3545     pfile->no_expand_level = 0;
3546   pfile->cur_context = 0;
3547
3548   /* Clear the base context, and clear the directive pointer so that
3549      get_raw_token will advance to the next line.  */
3550   pfile->contexts[0].count = 0;
3551   pfile->contexts[0].posn = 0;
3552   pfile->token_list.directive = 0;
3553 }
3554
3555 /* Directive handler wrapper used by the command line option
3556    processor.  */
3557 void
3558 _cpp_run_directive (pfile, dir, buf, count)
3559      cpp_reader *pfile;
3560      const struct directive *dir;
3561      const char *buf;
3562      size_t count;
3563 {
3564   if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3565     {
3566       unsigned int prev_lvl = 0;
3567
3568       /* Scan the line now, else prevent_macro_expansion won't work.  */
3569       lex_next (pfile, 1);
3570       if (! (dir->flags & EXPAND))
3571         prev_lvl = prevent_macro_expansion (pfile);
3572
3573       (void) (*dir->handler) (pfile);
3574
3575       if (! (dir->flags & EXPAND))
3576         restore_macro_expansion (pfile, prev_lvl);
3577
3578       _cpp_skip_rest_of_line (pfile);
3579       cpp_pop_buffer (pfile);
3580     }
3581 }