1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
27 o -dM and with _cpp_dump_list: too many \n output.
28 o Put a printer object in cpp_reader?
29 o Check line numbers assigned to all errors.
30 o Replace strncmp with memcmp almost everywhere.
31 o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
32 o Convert do_ functions to return void. Kaveh thinks its OK; and said he'll
33 give it a run when we've got some code.
34 o Distinguish integers, floats, and 'other' pp-numbers.
35 o Store ints and char constants as binary values.
36 o New command-line assertion syntax.
37 o Work towards functions in cpperror.c taking a message level parameter.
38 If we do this, merge the common code of do_warning and do_error.
39 o Comment all functions, and describe macro expansion algorithm.
40 o Move as much out of header files as possible.
41 o Remove single quote pairs `', and some '', from diagnostics.
42 o Correct pastability test for CPP_NAME and CPP_NUMBER.
53 #define auto_expand_name_space(list) \
54 _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
55 static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
57 static void dump_param_spelling PARAMS ((cpp_reader *, const cpp_toklist *,
59 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
62 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
63 static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
65 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
66 const unsigned char *));
67 static int skip_block_comment PARAMS ((cpp_reader *));
68 static int skip_line_comment PARAMS ((cpp_reader *));
69 static void skip_whitespace PARAMS ((cpp_reader *, int));
70 static const U_CHAR *parse_name PARAMS ((cpp_reader *, cpp_token *,
71 const U_CHAR *, const U_CHAR *));
72 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_string *));
73 static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
75 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
76 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
77 const unsigned char *,
78 unsigned int, unsigned int));
79 static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
80 static int lex_next PARAMS ((cpp_reader *, int));
81 static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
84 static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
85 static void expand_context_stack PARAMS ((cpp_reader *));
86 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
88 static void output_token PARAMS ((cpp_reader *, const cpp_token *,
90 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
92 static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
94 static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
95 static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
97 static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
98 static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
100 static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
101 const cpp_token *, int *));
102 static unsigned int prevent_macro_expansion PARAMS ((cpp_reader *));
103 static void restore_macro_expansion PARAMS ((cpp_reader *, unsigned int));
104 static cpp_token *get_temp_token PARAMS ((cpp_reader *));
105 static void release_temp_tokens PARAMS ((cpp_reader *));
106 static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
107 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
109 #define INIT_TOKEN_STR(list, token) \
110 do {(token)->val.str.len = 0; \
111 (token)->val.str.text = (list)->namebuf + (list)->name_used; \
114 #define VALID_SIGN(c, prevc) \
115 (((c) == '+' || (c) == '-') && \
116 ((prevc) == 'e' || (prevc) == 'E' \
117 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
119 /* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
120 character, if any, is in buffer. */
122 #define handle_newline(cur, limit, c) \
124 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
126 pfile->buffer->lineno++; \
127 pfile->buffer->line_base = (cur); \
128 pfile->col_adjust = 0; \
131 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
132 #define PREV_TOKEN_TYPE (cur_token[-1].type)
134 #define PUSH_TOKEN(ttype) cur_token++->type = ttype
135 #define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
136 #define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
137 #define BACKUP_DIGRAPH(ttype) do { \
138 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
140 /* An upper bound on the number of bytes needed to spell a token,
141 including preceding whitespace. */
142 #define TOKEN_SPELL(token) token_spellings[(token)->type].type
143 #define TOKEN_LEN(token) (5 + (TOKEN_SPELL(token) == SPELL_STRING \
144 ? (token)->val.str.len \
145 : (TOKEN_SPELL(token) == SPELL_IDENT \
146 ? (token)->val.node->length \
149 #define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
150 #define I(e, s) {SPELL_IDENT, s},
151 #define S(e, s) {SPELL_STRING, s},
152 #define C(e, s) {SPELL_CHAR, s},
153 #define N(e, s) {SPELL_NONE, s},
155 const struct token_spelling
156 token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
164 /* For debugging: the internal names of the tokens. */
165 #define T(e, s) U STRINGX(e) + 4,
166 #define I(e, s) U STRINGX(e) + 4,
167 #define S(e, s) U STRINGX(e) + 4,
168 #define C(e, s) U STRINGX(e) + 4,
169 #define N(e, s) U STRINGX(e) + 4,
171 const U_CHAR *const token_names[N_TTYPES] = { TTYPE_TABLE };
179 /* The following table is used by trigraph_ok/trigraph_replace. If we
180 have designated initializers, it can be constant data; otherwise,
181 it is set up at runtime by _cpp_init_input_buffer. */
183 #if (GCC_VERSION >= 2007)
184 #define init_trigraph_map() /* nothing */
185 #define TRIGRAPH_MAP \
186 __extension__ static const U_CHAR trigraph_map[UCHAR_MAX + 1] = {
188 #define s(p, v) [p] = v,
190 #define TRIGRAPH_MAP static U_CHAR trigraph_map[UCHAR_MAX + 1] = { 0 }; \
191 static void init_trigraph_map PARAMS ((void)) { \
192 unsigned char *x = trigraph_map;
194 #define s(p, v) x[p] = v;
198 s('=', '#') s(')', ']') s('!', '|')
199 s('(', '[') s('\'', '^') s('>', '}')
200 s('/', '\\') s('<', '{') s('-', '~')
207 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
210 _cpp_grow_token_buffer (pfile, n)
214 long old_written = CPP_WRITTEN (pfile);
215 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
216 pfile->token_buffer = (U_CHAR *)
217 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
218 CPP_SET_WRITTEN (pfile, old_written);
221 /* Deal with the annoying semantics of fwrite. */
223 safe_fwrite (pfile, buf, len, fp)
233 count = fwrite (buf, 1, len, fp);
242 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
245 /* Notify the compiler proper that the current line number has jumped,
246 or the current file name has changed. */
249 output_line_command (pfile, print, line)
254 cpp_buffer *ip = CPP_BUFFER (pfile);
255 enum { same = 0, enter, leave, rname } change;
256 static const char * const codes[] = { "", " 1", " 2", "" };
261 /* End the previous line of text. */
262 if (pfile->need_newline)
263 putc ('\n', print->outf);
264 pfile->need_newline = 0;
266 if (CPP_OPTION (pfile, no_line_commands))
269 /* If ip is null, we've been called from cpp_finish, and they just
270 needed the final flush and trailing newline. */
274 if (pfile->include_depth == print->last_id)
276 /* Determine whether the current filename has changed, and if so,
277 how. 'nominal_fname' values are unique, so they can be compared
278 by comparing pointers. */
279 if (ip->nominal_fname == print->last_fname)
286 if (pfile->include_depth > print->last_id)
290 print->last_id = pfile->include_depth;
292 print->last_fname = ip->nominal_fname;
294 /* If the current file has not changed, we can output a few newlines
295 instead if we want to increase the line number by a small amount.
296 We cannot do this if print->lineno is zero, because that means we
297 haven't output any line commands yet. (The very first line
298 command output is a `same_file' command.) */
299 if (change == same && print->lineno > 0
300 && line >= print->lineno && line < print->lineno + 8)
302 while (line > print->lineno)
304 putc ('\n', print->outf);
310 #ifndef NO_IMPLICIT_EXTERN_C
311 if (CPP_OPTION (pfile, cplusplus))
312 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
314 ip->inc->sysp ? " 3" : "",
315 (ip->inc->sysp == 2) ? " 4" : "");
318 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
320 ip->inc->sysp ? " 3" : "");
321 print->lineno = line;
324 /* Write the contents of the token_buffer to the output stream, and
325 clear the token_buffer. Also handles generating line commands and
326 keeping track of file transitions. */
329 cpp_output_tokens (pfile, print, line)
334 if (CPP_WRITTEN (pfile) - print->written)
336 safe_fwrite (pfile, pfile->token_buffer,
337 CPP_WRITTEN (pfile) - print->written, print->outf);
338 pfile->need_newline = 1;
342 CPP_SET_WRITTEN (pfile, print->written);
344 output_line_command (pfile, print, line);
347 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
350 cpp_scan_buffer_nooutput (pfile)
353 unsigned int old_written = CPP_WRITTEN (pfile);
354 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
358 /* In no-output mode, we can ignore everything but directives. */
359 const cpp_token *token = cpp_get_token (pfile);
360 if (token->type == CPP_EOF)
362 cpp_pop_buffer (pfile);
363 if (CPP_BUFFER (pfile) == stop)
366 _cpp_skip_rest_of_line (pfile);
368 CPP_SET_WRITTEN (pfile, old_written);
371 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
374 cpp_scan_buffer (pfile, print)
378 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
379 const cpp_token *token, *prev = 0;
383 token = cpp_get_token (pfile);
384 if (token->type == CPP_EOF)
386 cpp_pop_buffer (pfile);
387 if (CPP_BUFFER (pfile) == stop)
389 cpp_output_tokens (pfile, print, CPP_BUF_LINE (CPP_BUFFER (pfile)));
394 if (token->flags & BOL)
396 cpp_output_tokens (pfile, print, pfile->token_list.line);
400 output_token (pfile, token, prev);
405 /* Helper routine used by parse_include, which can't see spell_token.
406 Reinterpret the current line as an h-char-sequence (< ... >); we are
407 looking at the first token after the <. */
409 _cpp_glue_header_name (pfile)
412 unsigned int written = CPP_WRITTEN (pfile);
420 t = cpp_get_token (pfile);
421 if (t->type == CPP_GREATER || t->type == CPP_EOF)
424 CPP_RESERVE (pfile, TOKEN_LEN (t));
425 if (t->flags & PREV_WHITE)
426 CPP_PUTC_Q (pfile, ' ');
427 pfile->limit = spell_token (pfile, t, pfile->limit);
430 if (t->type == CPP_EOF)
431 cpp_error (pfile, "missing terminating > character");
433 len = CPP_WRITTEN (pfile) - written;
435 memcpy (buf, pfile->token_buffer + written, len);
436 CPP_SET_WRITTEN (pfile, written);
438 hdr = get_temp_token (pfile);
439 hdr->type = CPP_HEADER_NAME;
441 hdr->val.str.text = buf;
442 hdr->val.str.len = len;
446 /* Token-buffer helper functions. */
448 /* Expand a token list's string space. It is *vital* that
449 list->tokens_used is correct, to get pointer fix-up right. */
451 _cpp_expand_name_space (list, len)
455 const U_CHAR *old_namebuf;
457 old_namebuf = list->namebuf;
458 list->name_cap += len;
459 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
461 /* Fix up token text pointers. */
462 if (list->namebuf != old_namebuf)
466 for (i = 0; i < list->tokens_used; i++)
467 if (token_spellings[list->tokens[i].type].type == SPELL_STRING)
468 list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
472 /* If there is not enough room for LEN more characters, expand the
473 list by just enough to have room for LEN characters. */
475 _cpp_reserve_name_space (list, len)
479 unsigned int room = list->name_cap - list->name_used;
482 _cpp_expand_name_space (list, len - room);
485 /* Expand the number of tokens in a list. */
487 _cpp_expand_token_space (list, count)
493 list->tokens_cap += count;
494 n = list->tokens_cap;
495 if (list->flags & LIST_OFFSET)
497 list->tokens = (cpp_token *)
498 xrealloc (list->tokens, n * sizeof (cpp_token));
499 if (list->flags & LIST_OFFSET)
500 list->tokens++; /* Skip the dummy. */
503 /* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
504 an extra token in front of the token list, as this allows the lexer
505 to always peek at the previous token without worrying about
506 underflowing the list, and some initial space. Otherwise, no
507 token- or name-space is allocated, and there is no dummy token. */
509 _cpp_init_toklist (list, flags)
513 if (flags == NO_DUMMY_TOKEN)
515 list->tokens_cap = 0;
523 /* Initialize token space. Put a dummy token before the start
524 that will fail matches. */
525 list->tokens_cap = 256; /* 4K's worth. */
526 list->tokens = (cpp_token *)
527 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
528 list->tokens[0].type = CPP_EOF;
531 /* Initialize name space. */
532 list->name_cap = 1024;
533 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
534 list->flags = LIST_OFFSET;
537 _cpp_clear_toklist (list);
540 /* Clear a token list. */
542 _cpp_clear_toklist (list)
545 list->tokens_used = 0;
549 list->params_len = 0;
550 list->flags &= LIST_OFFSET; /* clear all but that one */
553 /* Free a token list. Does not free the list itself, which may be
554 embedded in a larger structure. */
556 _cpp_free_toklist (list)
557 const cpp_toklist *list;
559 if (list->flags & LIST_OFFSET)
560 free (list->tokens - 1); /* Backup over dummy token. */
563 free (list->namebuf);
566 /* Compare two tokens. */
568 _cpp_equiv_tokens (a, b)
569 const cpp_token *a, *b;
571 if (a->type == b->type && a->flags == b->flags)
572 switch (token_spellings[a->type].type)
574 default: /* Keep compiler happy. */
579 return a->val.aux == b->val.aux; /* arg_no or character. */
581 return a->val.node == b->val.node;
583 return (a->val.str.len == b->val.str.len
584 && !memcmp (a->val.str.text, b->val.str.text,
591 /* Compare two token lists. */
593 _cpp_equiv_toklists (a, b)
594 const cpp_toklist *a, *b;
598 if (a->tokens_used != b->tokens_used
599 || a->flags != b->flags
600 || a->paramc != b->paramc)
603 for (i = 0; i < a->tokens_used; i++)
604 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
611 Compares, the token TOKEN to the NUL-terminated string STRING.
612 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
615 cpp_ideq (token, string)
616 const cpp_token *token;
619 if (token->type != CPP_NAME)
622 return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
627 The original lexer in cpplib was made up of two passes: a first pass
628 that replaced trigraphs and deleted esacped newlines, and a second
629 pass that tokenized the result of the first pass. Tokenisation was
630 performed by peeking at the next character in the input stream. For
631 example, if the input stream contained "!=", the handler for the !
632 character would peek at the next character, and if it were a '='
633 would skip over it, and return a "!=" token, otherwise it would
634 return just the "!" token.
636 To implement a single-pass lexer, this peeking ahead is unworkable.
637 An arbitrary number of escaped newlines, and trigraphs (in particular
638 ??/ which translates to the escape \), could separate the '!' and '='
639 in the input stream, yet the next token is still a "!=".
641 Suppose instead that we lex by one logical line at a time, producing
642 a token list or stack for each logical line, and when seeing the '!'
643 push a CPP_NOT token on the list. Then if the '!' is part of a
644 longer token ("!=") we know we must see the remainder of the token by
645 the time we reach the end of the logical line. Thus we can have the
646 '=' handler look at the previous token (at the end of the list / top
647 of the stack) and see if it is a "!" token, and if so, instead of
648 pushing a "=" token revise the existing token to be a "!=" token.
650 This works in the presence of escaped newlines, because the '\' would
651 have been pushed on the top of the stack as a CPP_BACKSLASH. The
652 newline ('\n' or '\r') handler looks at the token at the top of the
653 stack to see if it is a CPP_BACKSLASH, and if so discards both.
654 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
655 the '=' handler would never see any intervening escaped newlines.
657 To make trigraphs work in this context, as in precedence trigraphs
658 are highest and converted before anything else, the '?' handler does
659 lookahead to see if it is a trigraph, and if so skips the trigraph
660 and pushes the token it represents onto the top of the stack. This
661 also works in the particular case of a CPP_BACKSLASH trigraph.
663 To the preprocessor, whitespace is only significant to the point of
664 knowing whether whitespace precedes a particular token. For example,
665 the '=' handler needs to know whether there was whitespace between it
666 and a "!" token on the top of the stack, to make the token conversion
667 decision correctly. So each token has a PREV_WHITE flag to
668 indicate this - the standard permits consecutive whitespace to be
669 regarded as a single space. The compiler front ends are not
670 interested in whitespace at all; they just require a token stream.
671 Another place where whitespace is significant to the preprocessor is
672 a #define statment - if there is whitespace between the macro name
673 and an initial "(" token the macro is "object-like", otherwise it is
674 a function-like macro that takes arguments.
676 However, all is not rosy. Parsing of identifiers, numbers, comments
677 and strings becomes trickier because of the possibility of raw
678 trigraphs and escaped newlines in the input stream.
680 The trigraphs are three consecutive characters beginning with two
681 question marks. A question mark is not valid as part of a number or
682 identifier, so parsing of a number or identifier terminates normally
683 upon reaching it, returning to the mainloop which handles the
684 trigraph just like it would in any other position. Similarly for the
685 backslash of a backslash-newline combination. So we just need the
686 escaped-newline dropper in the mainloop to check if the token on the
687 top of the stack after dropping the escaped newline is a number or
688 identifier, and if so to continue the processing it as if nothing had
691 For strings, we replace trigraphs whenever we reach a quote or
692 newline, because there might be a backslash trigraph escaping them.
693 We need to be careful that we start trigraph replacing from where we
694 left off previously, because it is possible for a first scan to leave
695 "fake" trigraphs that a second scan would pick up as real (e.g. the
696 sequence "????/\n=" would find a fake ??= trigraph after removing the
699 For line comments, on reaching a newline we scan the previous
700 character(s) to see if it escaped, and continue if it is. Block
701 comments ignore everything and just focus on finding the comment
702 termination mark. The only difficult thing, and it is surprisingly
703 tricky, is checking if an asterisk precedes the final slash since
704 they could be separated by escaped newlines. If the preprocessor is
705 invoked with the output comments option, we don't bother removing
706 escaped newlines and replacing trigraphs for output.
708 Finally, numbers can begin with a period, which is pushed initially
709 as a CPP_DOT token in its own right. The digit handler checks if the
710 previous token was a CPP_DOT not separated by whitespace, and if so
711 pops it off the stack and pushes a period into the number's buffer
712 before calling the number parser.
716 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
717 U":>", U"<%", U"%>"};
719 /* Call when a trigraph is encountered. It warns if necessary, and
720 returns true if the trigraph should be honoured. END is the third
721 character of a trigraph in the input stream. */
723 trigraph_ok (pfile, end)
725 const unsigned char *end;
727 int accept = CPP_OPTION (pfile, trigraphs);
729 if (CPP_OPTION (pfile, warn_trigraphs))
731 unsigned int col = end - 1 - pfile->buffer->line_base;
733 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
734 "trigraph ??%c converted to %c",
735 (int) *end, (int) trigraph_map[*end]);
737 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
738 "trigraph ??%c ignored", (int) *end);
743 /* Scan a string for trigraphs, warning or replacing them inline as
744 appropriate. When parsing a string, we must call this routine
745 before processing a newline character (if trigraphs are enabled),
746 since the newline might be escaped by a preceding backslash
747 trigraph sequence. Returns a pointer to the end of the name after
750 static unsigned char *
751 trigraph_replace (pfile, src, limit)
754 unsigned char *limit;
758 /* Starting with src[1], find two consecutive '?'. The case of no
759 trigraphs is streamlined. */
761 for (src++; src + 1 < limit; src += 2)
766 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
769 else if (src + 2 == limit || src[1] != '?')
772 /* Check if it really is a trigraph. */
773 if (trigraph_map[src[2]] == 0)
781 /* Now we have a trigraph, we need to scan the remaining buffer, and
782 copy-shifting its contents left if replacement is enabled. */
783 for (; src + 2 < limit; dest++, src++)
784 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
788 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
789 *dest = trigraph_map[*src];
792 /* Copy remaining (at most 2) characters. */
798 /* If CUR is a backslash or the end of a trigraphed backslash, return
799 a pointer to its beginning, otherwise NULL. We don't read beyond
800 the buffer start, because there is the start of the comment in the
802 static const unsigned char *
803 backslash_start (pfile, cur)
805 const unsigned char *cur;
809 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
810 && trigraph_ok (pfile, cur))
815 /* Skip a C-style block comment. This is probably the trickiest
816 handler. We find the end of the comment by seeing if an asterisk
817 is before every '/' we encounter. The nasty complication is that a
818 previous asterisk may be separated by one or more escaped newlines.
819 Returns non-zero if comment terminated by EOF, zero otherwise. */
821 skip_block_comment (pfile)
824 cpp_buffer *buffer = pfile->buffer;
825 const unsigned char *char_after_star = 0;
826 register const unsigned char *cur = buffer->cur;
829 /* Inner loop would think the comment has ended if the first comment
830 character is a '/'. Avoid this and keep the inner loop clean by
831 skipping such a character. */
832 if (cur < buffer->rlimit && cur[0] == '/')
835 for (; cur < buffer->rlimit; )
837 unsigned char c = *cur++;
839 /* People like decorating comments with '*', so check for
840 '/' instead for efficiency. */
843 if (cur[-2] == '*' || cur - 1 == char_after_star)
846 /* Warn about potential nested comments, but not when
847 the final character inside the comment is a '/'.
848 Don't bother to get it right across escaped newlines. */
849 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
850 && cur[0] == '*' && cur[1] != '/')
853 cpp_warning (pfile, "'/*' within comment");
856 else if (is_vspace (c))
858 const unsigned char* bslash = backslash_start (pfile, cur - 2);
860 handle_newline (cur, buffer->rlimit, c);
861 /* Work correctly if there is an asterisk before an
862 arbirtrarily long sequence of escaped newlines. */
863 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
864 char_after_star = cur;
876 /* Skip a C++ or Chill line comment. Handles escaped newlines.
877 Returns non-zero if a multiline comment. */
879 skip_line_comment (pfile)
882 cpp_buffer *buffer = pfile->buffer;
883 register const unsigned char *cur = buffer->cur;
886 for (; cur < buffer->rlimit; )
888 unsigned char c = *cur++;
892 /* Check for a (trigaph?) backslash escaping the newline. */
893 if (!backslash_start (pfile, cur - 2))
896 handle_newline (cur, buffer->rlimit, c);
902 buffer->cur = cur - 1; /* Leave newline for caller. */
906 /* Skips whitespace, stopping at next non-whitespace character.
907 Adjusts pfile->col_adjust to account for tabs. This enables tokens
908 to be assigned the correct column. */
910 skip_whitespace (pfile, in_directive)
914 cpp_buffer *buffer = pfile->buffer;
915 unsigned short warned = 0;
917 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
918 while (buffer->cur < buffer->rlimit)
920 unsigned char c = *buffer->cur;
926 /* Horizontal space always OK. */
930 pfile->col_adjust += CPP_OPTION (pfile, tabstop) - 1
931 - (CPP_BUF_COL (buffer) - 1) % CPP_OPTION(pfile, tabstop);
932 /* Must be \f \v or \0. */
936 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
937 CPP_BUF_COL (buffer),
938 "embedded null character ignored");
941 else if (in_directive && CPP_PEDANTIC (pfile))
942 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
943 CPP_BUF_COL (buffer),
944 "%s in preprocessing directive",
945 c == '\f' ? "form feed" : "vertical tab");
949 /* Parse (append) an identifier. */
950 static inline const U_CHAR *
951 parse_name (pfile, tok, cur, rlimit)
954 const U_CHAR *cur, *rlimit;
956 const U_CHAR *name = cur;
961 if (! is_idchar (*cur))
963 /* $ is not a legal identifier character in the standard, but is
964 commonly accepted as an extension. Don't warn about it in
965 skipped conditional blocks. */
966 if (*cur == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
968 CPP_BUFFER (pfile)->cur = cur;
969 cpp_pedwarn (pfile, "'$' character in identifier");
977 unsigned int oldlen = tok->val.node->length;
978 U_CHAR *newname = alloca (oldlen + len);
979 memcpy (newname, tok->val.node->name, oldlen);
980 memcpy (newname + oldlen, name, len);
985 tok->val.node = cpp_lookup (pfile, name, len);
989 /* Parse (append) a number. */
991 parse_number (pfile, list, name)
996 const unsigned char *name_limit;
997 unsigned char *namebuf;
998 cpp_buffer *buffer = pfile->buffer;
999 register const unsigned char *cur = buffer->cur;
1002 name_limit = list->namebuf + list->name_cap;
1003 namebuf = list->namebuf + list->name_used;
1005 for (; cur < buffer->rlimit && namebuf < name_limit; )
1007 unsigned char c = *namebuf = *cur; /* Copy a single char. */
1009 /* Perhaps we should accept '$' here if we accept it for
1010 identifiers. We know namebuf[-1] is safe, because for c to
1011 be a sign we must have pushed at least one character. */
1012 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1019 /* Run out of name space? */
1020 if (cur < buffer->rlimit)
1022 list->name_used = namebuf - list->namebuf;
1023 auto_expand_name_space (list);
1029 name->len = namebuf - name->text;
1030 list->name_used = namebuf - list->namebuf;
1033 /* Places a string terminated by an unescaped TERMINATOR into a
1034 cpp_string, which should be expandable and thus at the top of the
1035 list's stack. Handles embedded trigraphs, if necessary, and
1038 Can be used for character constants (terminator = '\''), string
1039 constants ('"') and angled headers ('>'). Multi-line strings are
1040 allowed, except for within directives. */
1043 parse_string (pfile, list, token, terminator)
1047 unsigned int terminator;
1049 cpp_buffer *buffer = pfile->buffer;
1050 cpp_string *name = &token->val.str;
1051 register const unsigned char *cur = buffer->cur;
1052 const unsigned char *name_limit;
1053 unsigned char *namebuf;
1054 unsigned int null_count = 0;
1055 unsigned int trigraphed = list->name_used;
1058 name_limit = list->namebuf + list->name_cap;
1059 namebuf = list->namebuf + list->name_used;
1061 for (; cur < buffer->rlimit && namebuf < name_limit; )
1063 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
1067 else if (c == terminator || is_vspace (c))
1069 /* Needed for trigraph_replace and multiline string warning. */
1072 /* Scan for trigraphs before checking if backslash-escaped. */
1073 if ((CPP_OPTION (pfile, trigraphs)
1074 || CPP_OPTION (pfile, warn_trigraphs))
1075 && namebuf - (list->namebuf + trigraphed) >= 3)
1077 namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1079 /* The test above guarantees trigraphed will be positive. */
1080 trigraphed = namebuf - list->namebuf - 2;
1083 namebuf--; /* Drop the newline / terminator from the name. */
1086 /* Drop a backslash newline, and continue. */
1087 if (namebuf[-1] == '\\')
1089 handle_newline (cur, buffer->rlimit, c);
1096 /* In Fortran and assembly language, silently terminate
1097 strings of either variety at end of line. This is a
1098 kludge around not knowing where comments are in these
1100 if (CPP_OPTION (pfile, lang_fortran)
1101 || CPP_OPTION (pfile, lang_asm))
1104 /* Character constants, headers and asserts may not
1105 extend over multiple lines. In Standard C, neither
1106 may strings. We accept multiline strings as an
1107 extension. (Even in directives - otherwise, glibc's
1108 longlong.h breaks.) */
1109 if (terminator != '"')
1112 cur++; /* Move forwards again. */
1114 if (pfile->multiline_string_line == 0)
1116 pfile->multiline_string_line = token->line;
1117 pfile->multiline_string_column = token->col;
1118 if (CPP_PEDANTIC (pfile))
1119 cpp_pedwarn (pfile, "multi-line string constant");
1123 handle_newline (cur, buffer->rlimit, c);
1127 unsigned char *temp;
1129 /* An odd number of consecutive backslashes represents
1130 an escaped terminator. */
1132 while (temp >= name->text && *temp == '\\')
1135 if ((namebuf - temp) & 1)
1142 /* Run out of name space? */
1143 if (cur < buffer->rlimit)
1145 list->name_used = namebuf - list->namebuf;
1146 auto_expand_name_space (list);
1150 /* We may not have trigraph-replaced the input for this code path,
1151 but as the input is in error by being unterminated we don't
1152 bother. Prevent warnings about no newlines at EOF. */
1153 if (is_vspace (cur[-1]))
1157 cpp_error (pfile, "missing terminating %c character", (int) terminator);
1159 if (terminator == '\"' && pfile->multiline_string_line != list->line
1160 && pfile->multiline_string_line != 0)
1162 cpp_error_with_line (pfile, pfile->multiline_string_line,
1163 pfile->multiline_string_column,
1164 "possible start of unterminated string literal");
1165 pfile->multiline_string_line = 0;
1170 name->len = namebuf - name->text;
1171 list->name_used = namebuf - list->namebuf;
1174 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1175 : "null character preserved"));
1178 /* The character TYPE helps us distinguish comment types: '*' = C
1179 style, '-' = Chill-style and '/' = C++ style. For code simplicity,
1180 the stored comment includes the comment start and any terminator. */
1182 #define COMMENT_START_LEN 2
1184 save_comment (list, token, from, len, type)
1187 const unsigned char *from;
1191 unsigned char *buffer;
1193 len += COMMENT_START_LEN;
1195 if (list->name_used + len > list->name_cap)
1196 _cpp_expand_name_space (list, len);
1198 INIT_TOKEN_STR (list, token);
1199 token->type = CPP_COMMENT;
1200 token->val.str.len = len;
1202 buffer = list->namebuf + list->name_used;
1203 list->name_used += len;
1205 /* Copy the comment. */
1216 memcpy (buffer, from, len - COMMENT_START_LEN);
1220 * The tokenizer's main loop. Returns a token list, representing a
1221 * logical line in the input file. On EOF after some tokens have
1222 * been processed, we return immediately. Then in next call, or if
1223 * EOF occurred at the beginning of a logical line, a single CPP_EOF
1224 * token is placed in the list.
1226 * Implementation relies almost entirely on lookback, rather than
1227 * looking forwards. This means that tokenization requires just
1228 * a single pass of the file, even in the presence of trigraphs and
1229 * escaped newlines, providing significant performance benefits.
1230 * Trigraph overhead is negligible if they are disabled, and low
1231 * even when enabled.
1234 #define KNOWN_DIRECTIVE() (list->directive != 0)
1235 #define MIGHT_BE_DIRECTIVE() \
1236 (cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
1239 lex_line (pfile, list)
1243 cpp_token *cur_token, *token_limit, *first;
1244 cpp_buffer *buffer = pfile->buffer;
1245 const unsigned char *cur = buffer->cur;
1246 unsigned char flags = 0;
1247 unsigned int first_token = list->tokens_used;
1249 if (!(list->flags & LIST_OFFSET))
1252 list->file = buffer->nominal_fname;
1253 list->line = CPP_BUF_LINE (buffer);
1254 pfile->col_adjust = 0;
1255 pfile->in_lex_line = 1;
1256 if (cur == buffer->buf)
1257 list->flags |= BEG_OF_FILE;
1260 token_limit = list->tokens + list->tokens_cap;
1261 cur_token = list->tokens + list->tokens_used;
1263 for (; cur < buffer->rlimit && cur_token < token_limit;)
1267 /* Optimize non-vertical whitespace skipping; most tokens are
1268 probably separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
1273 skip_whitespace (pfile, (list->tokens[first_token].type == CPP_HASH
1274 && cur_token > &list->tokens[first_token]));
1278 if (cur == buffer->rlimit)
1284 /* Initialize current token. CPP_EOF will not be fixed up by
1285 expand_name_space. */
1286 list->tokens_used = cur_token - list->tokens + 1;
1287 cur_token->type = CPP_EOF;
1288 cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1289 cur_token->line = CPP_BUF_LINE (buffer);
1290 cur_token->flags = flags;
1295 case '0': case '1': case '2': case '3': case '4':
1296 case '5': case '6': case '7': case '8': case '9':
1300 cur--; /* Backup character. */
1301 prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1304 INIT_TOKEN_STR (list, cur_token);
1305 /* Prepend an immediately previous CPP_DOT token. */
1308 if (list->name_cap == list->name_used)
1309 auto_expand_name_space (list);
1311 cur_token->val.str.len = 1;
1312 list->namebuf[list->name_used++] = '.';
1316 cur_token->type = CPP_NUMBER; /* Before parse_number. */
1318 parse_number (pfile, list, &cur_token->val.str);
1321 /* Check for # 123 form of #line. */
1322 if (MIGHT_BE_DIRECTIVE ())
1323 list->directive = _cpp_check_linemarker (pfile, cur_token,
1324 !(cur_token[-1].flags
1331 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1332 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1333 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1334 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1336 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1337 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1338 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1339 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1341 cur--; /* Backup character. */
1342 cur_token->val.node = 0;
1343 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
1346 cur = parse_name (pfile, cur_token, cur, buffer->rlimit);
1348 if (MIGHT_BE_DIRECTIVE ())
1349 list->directive = _cpp_check_directive (pfile, cur_token,
1350 !(list->tokens[0].flags
1356 /* Character constants are not recognized when processing Fortran,
1357 or if -traditional. */
1358 if (CPP_OPTION (pfile, lang_fortran) || CPP_TRADITIONAL (pfile))
1363 /* Traditionally, escaped strings are not strings. */
1364 if (CPP_TRADITIONAL (pfile) && IMMED_TOKEN ()
1365 && PREV_TOKEN_TYPE == CPP_BACKSLASH)
1368 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
1369 /* Do we have a wide string? */
1370 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1371 && cur_token[-1].val.node == pfile->spec_nodes->n_L
1372 && !CPP_TRADITIONAL (pfile))
1374 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
1378 /* Here c is one of ' " or >. */
1379 INIT_TOKEN_STR (list, cur_token);
1381 parse_string (pfile, list, cur_token, c);
1387 cur_token->type = CPP_DIV;
1390 if (PREV_TOKEN_TYPE == CPP_DIV)
1392 /* We silently allow C++ comments in system headers,
1393 irrespective of conformance mode, because lots of
1394 broken systems do that and trying to clean it up
1395 in fixincludes is a nightmare. */
1396 if (CPP_IN_SYSTEM_HEADER (pfile))
1397 goto do_line_comment;
1398 else if (CPP_OPTION (pfile, cplusplus_comments))
1400 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1401 && ! buffer->warned_cplusplus_comments)
1405 "C++ style comments are not allowed in ISO C89");
1407 "(this will be reported only once per input file)");
1408 buffer->warned_cplusplus_comments = 1;
1412 #if 0 /* Leave until new lexer in place. */
1415 "comment start split across lines");
1417 if (skip_line_comment (pfile))
1418 cpp_warning (pfile, "multi-line comment");
1420 /* Back-up to first '-' or '/'. */
1422 if (!CPP_OPTION (pfile, discard_comments)
1423 && (!KNOWN_DIRECTIVE()
1424 || (list->directive->flags & COMMENTS)))
1425 save_comment (list, cur_token++, cur,
1426 buffer->cur - cur, c);
1427 else if (!CPP_OPTION (pfile, traditional))
1439 cur_token->type = CPP_MULT;
1442 if (PREV_TOKEN_TYPE == CPP_DIV)
1445 #if 0 /* Leave until new lexer in place. */
1448 "comment start '/*' split across lines");
1450 if (skip_block_comment (pfile))
1451 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1452 "unterminated comment");
1453 #if 0 /* Leave until new lexer in place. */
1454 else if (buffer->cur[-2] != '*')
1456 "comment end '*/' split across lines");
1458 /* Back up to opening '/'. */
1460 if (!CPP_OPTION (pfile, discard_comments)
1461 && (!KNOWN_DIRECTIVE()
1462 || (list->directive->flags & COMMENTS)))
1463 save_comment (list, cur_token++, cur,
1464 buffer->cur - cur, c);
1465 else if (!CPP_OPTION (pfile, traditional))
1471 else if (CPP_OPTION (pfile, cplusplus))
1473 /* In C++, there are .* and ->* operators. */
1474 if (PREV_TOKEN_TYPE == CPP_DEREF)
1475 BACKUP_TOKEN (CPP_DEREF_STAR);
1476 else if (PREV_TOKEN_TYPE == CPP_DOT)
1477 BACKUP_TOKEN (CPP_DOT_STAR);
1485 handle_newline (cur, buffer->rlimit, c);
1486 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
1490 /* Remove the escaped newline. Then continue to process
1491 any interrupted name or number. */
1493 /* Backslash-newline may not be immediately followed by
1494 EOF (C99 5.1.1.2). */
1495 if (cur >= buffer->rlimit)
1497 cpp_pedwarn (pfile, "backslash-newline at end of file");
1503 if (cur_token->type == CPP_NAME)
1505 else if (cur_token->type == CPP_NUMBER)
1506 goto continue_number;
1509 /* Remember whitespace setting. */
1510 flags = cur_token->flags;
1517 "backslash and newline separated by space");
1520 else if (MIGHT_BE_DIRECTIVE ())
1522 /* "Null directive." C99 6.10.7: A preprocessing
1523 directive of the form # <new-line> has no effect.
1525 But it is still a directive, and therefore disappears
1528 if (cur_token->flags & PREV_WHITE)
1530 if (CPP_WTRADITIONAL (pfile))
1532 "K+R C ignores #\\n with the # indented");
1533 if (CPP_TRADITIONAL (pfile))
1538 /* Skip vertical space until we have at least one token to
1540 if (cur_token != &list->tokens[first_token])
1542 list->line = CPP_BUF_LINE (buffer);
1546 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
1548 if (CPP_OPTION (pfile, chill))
1549 goto do_line_comment;
1550 REVISE_TOKEN (CPP_MINUS_MINUS);
1553 PUSH_TOKEN (CPP_MINUS);
1558 /* The digraph flag checking ensures that ## and %:%:
1559 are interpreted as CPP_PASTE, but #%: and %:# are not. */
1560 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1561 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1562 REVISE_TOKEN (CPP_PASTE);
1564 PUSH_TOKEN (CPP_HASH);
1568 cur_token->type = CPP_COLON;
1571 if (PREV_TOKEN_TYPE == CPP_COLON
1572 && CPP_OPTION (pfile, cplusplus))
1573 BACKUP_TOKEN (CPP_SCOPE);
1574 else if (CPP_OPTION (pfile, digraphs))
1576 /* Digraph: "<:" is a '[' */
1577 if (PREV_TOKEN_TYPE == CPP_LESS)
1578 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1579 /* Digraph: "%:" is a '#' */
1580 else if (PREV_TOKEN_TYPE == CPP_MOD)
1582 (--cur_token)->flags |= DIGRAPH;
1591 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1592 REVISE_TOKEN (CPP_AND_AND);
1594 PUSH_TOKEN (CPP_AND);
1599 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1600 REVISE_TOKEN (CPP_OR_OR);
1602 PUSH_TOKEN (CPP_OR);
1606 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1607 REVISE_TOKEN (CPP_PLUS_PLUS);
1609 PUSH_TOKEN (CPP_PLUS);
1613 /* This relies on equidistance of "?=" and "?" tokens. */
1614 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1615 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1617 PUSH_TOKEN (CPP_EQ);
1621 cur_token->type = CPP_GREATER;
1624 if (PREV_TOKEN_TYPE == CPP_GREATER)
1625 BACKUP_TOKEN (CPP_RSHIFT);
1626 else if (PREV_TOKEN_TYPE == CPP_MINUS)
1627 BACKUP_TOKEN (CPP_DEREF);
1628 else if (CPP_OPTION (pfile, digraphs))
1630 /* Digraph: ":>" is a ']' */
1631 if (PREV_TOKEN_TYPE == CPP_COLON)
1632 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1633 /* Digraph: "%>" is a '}' */
1634 else if (PREV_TOKEN_TYPE == CPP_MOD)
1635 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1642 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1644 REVISE_TOKEN (CPP_LSHIFT);
1647 /* Is this the beginning of a header name? */
1648 if (KNOWN_DIRECTIVE () && (list->directive->flags & INCL))
1650 c = '>'; /* Terminator. */
1651 cur_token->type = CPP_HEADER_NAME;
1652 goto do_parse_string;
1654 PUSH_TOKEN (CPP_LESS);
1658 /* Digraph: "<%" is a '{' */
1659 cur_token->type = CPP_MOD;
1660 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS
1661 && CPP_OPTION (pfile, digraphs))
1662 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1667 if (cur + 1 < buffer->rlimit && *cur == '?'
1668 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1670 /* Handle trigraph. */
1674 case '(': goto make_open_square;
1675 case ')': goto make_close_square;
1676 case '<': goto make_open_brace;
1677 case '>': goto make_close_brace;
1678 case '=': goto make_hash;
1679 case '!': goto make_or;
1680 case '-': goto make_complement;
1681 case '/': goto make_backslash;
1682 case '\'': goto make_xor;
1685 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1687 /* GNU C++ defines <? and >? operators. */
1688 if (PREV_TOKEN_TYPE == CPP_LESS)
1690 REVISE_TOKEN (CPP_MIN);
1693 else if (PREV_TOKEN_TYPE == CPP_GREATER)
1695 REVISE_TOKEN (CPP_MAX);
1699 PUSH_TOKEN (CPP_QUERY);
1703 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1705 && !(cur_token[-1].flags & PREV_WHITE))
1708 PUSH_TOKEN (CPP_ELLIPSIS);
1711 PUSH_TOKEN (CPP_DOT);
1715 case '~': PUSH_TOKEN (CPP_COMPL); break;
1717 case '^': PUSH_TOKEN (CPP_XOR); break;
1719 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1721 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1723 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1725 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1727 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1728 case '!': PUSH_TOKEN (CPP_NOT); break;
1729 case ',': PUSH_TOKEN (CPP_COMMA); break;
1730 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1731 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1732 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
1735 if (CPP_OPTION (pfile, dollars_in_ident))
1740 cur_token->val.aux = c;
1741 PUSH_TOKEN (CPP_OTHER);
1746 /* Run out of token space? */
1747 if (cur_token == token_limit)
1749 list->tokens_used = cur_token - list->tokens;
1750 _cpp_expand_token_space (list, 256);
1754 cur_token->flags = flags;
1755 if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1757 if (cur > buffer->buf && !is_vspace (cur[-1]))
1758 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1759 CPP_BUF_COLUMN (buffer, cur),
1760 "no newline at end of file");
1761 cur_token++->type = CPP_EOF;
1765 /* All tokens are allocated, so the memory location is fixed. */
1766 first = &list->tokens[first_token];
1768 /* Don't complain about the null directive, nor directives in
1769 assembly source: we don't know where the comments are, and # may
1770 introduce assembler pseudo-ops. Don't complain about invalid
1771 directives in skipped conditional groups (6.10 p4). */
1772 if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1773 && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1775 if (first[1].type == CPP_NAME)
1776 cpp_error (pfile, "invalid preprocessing directive #%.*s",
1777 (int) first[1].val.node->length, first[1].val.node->name);
1779 cpp_error (pfile, "invalid preprocessing directive");
1782 /* Put EOF at end of known directives. This covers "directives do
1783 not extend beyond the end of the line (description 6.10 part 2)". */
1784 if (KNOWN_DIRECTIVE () || !pfile->done_initializing)
1786 pfile->first_directive_token = first;
1787 cur_token++->type = CPP_EOF;
1790 /* Directives, known or not, always start a new line. */
1791 if (first_token == 0 || list->tokens[first_token].type == CPP_HASH)
1792 first->flags |= BOL;
1794 /* 6.10.3.10: Within the sequence of preprocessing tokens making
1795 up the invocation of a function-like macro, new line is
1796 considered a normal white-space character. */
1797 first->flags |= PREV_WHITE;
1800 list->tokens_used = cur_token - list->tokens;
1801 pfile->in_lex_line = 0;
1804 /* Write the spelling of a token TOKEN, with any appropriate
1805 whitespace before it, to the token_buffer. PREV is the previous
1806 token, which is used to determine if we need to shove in an extra
1807 space in order to avoid accidental token paste. */
1809 output_token (pfile, token, prev)
1811 const cpp_token *token, *prev;
1815 if (token->col && (token->flags & BOL))
1817 /* Supply enough whitespace to put this token in its original
1818 column. Don't bother trying to reconstruct tabs; we can't
1819 get it right in general, and nothing ought to care. (Yes,
1820 some things do care; the fault lies with them.) */
1821 unsigned char *buffer;
1822 unsigned int spaces = token->col - 1;
1824 CPP_RESERVE (pfile, token->col);
1825 buffer = pfile->limit;
1829 pfile->limit = buffer;
1831 else if (token->flags & PREV_WHITE)
1832 CPP_PUTC (pfile, ' ');
1833 /* Check for and prevent accidental token pasting, in ANSI mode. */
1835 else if (!CPP_TRADITIONAL (pfile) && prev)
1837 if (can_paste (pfile, prev, token, &dummy) != CPP_EOF)
1838 CPP_PUTC (pfile, ' ');
1839 /* can_paste catches most of the accidental paste cases, but not all.
1840 Consider a + ++b - if there is not a space between the + and ++, it
1841 will be misparsed as a++ + b. */
1842 else if ((prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1843 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS))
1844 CPP_PUTC (pfile, ' ');
1847 CPP_RESERVE (pfile, TOKEN_LEN (token));
1848 pfile->limit = spell_token (pfile, token, pfile->limit);
1851 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1852 already contain the enough space to hold the token's spelling.
1853 Returns a pointer to the character after the last character
1856 static unsigned char *
1857 spell_token (pfile, token, buffer)
1858 cpp_reader *pfile; /* Would be nice to be rid of this... */
1859 const cpp_token *token;
1860 unsigned char *buffer;
1862 switch (token_spellings[token->type].type)
1864 case SPELL_OPERATOR:
1866 const unsigned char *spelling;
1869 if (token->flags & DIGRAPH)
1870 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1872 spelling = token_spellings[token->type].spelling;
1874 while ((c = *spelling++) != '\0')
1880 memcpy (buffer, token->val.node->name, token->val.node->length);
1881 buffer += token->val.node->length;
1886 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
1889 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1891 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1894 memcpy (buffer, token->val.str.text, token->val.str.len);
1895 buffer += token->val.str.len;
1897 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1899 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1905 *buffer++ = token->val.aux;
1909 cpp_ice (pfile, "Unspellable token %s", token_names[token->type]);
1916 /* Return the spelling of a token known to be an operator.
1917 Does not distinguish digraphs from their counterparts. */
1918 const unsigned char *
1919 _cpp_spell_operator (type)
1920 enum cpp_ttype type;
1922 if (token_spellings[type].type == SPELL_OPERATOR)
1923 return token_spellings[type].spelling;
1925 return token_names[type];
1929 /* Macro expansion algorithm. TODO. */
1931 static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
1932 static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
1934 #define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
1935 #define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
1937 /* Flags for cpp_context. */
1938 #define CONTEXT_PASTEL (1 << 0) /* An argument context on LHS of ##. */
1939 #define CONTEXT_PASTER (1 << 1) /* An argument context on RHS of ##. */
1940 #define CONTEXT_RAW (1 << 2) /* If argument tokens already expanded. */
1941 #define CONTEXT_ARG (1 << 3) /* If an argument context. */
1943 #define ASSIGN_FLAGS_AND_POS(d, s) \
1944 do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
1945 if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
1948 /* f is flags, just consisting of PREV_WHITE | BOL. */
1949 #define MODIFY_FLAGS_AND_POS(d, s, f) \
1950 do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
1951 if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
1954 typedef struct cpp_context cpp_context;
1959 const cpp_toklist *list; /* Used for macro contexts only. */
1960 const cpp_token **arg; /* Used for arg contexts only. */
1963 /* Pushed token to be returned by next call to cpp_get_token. */
1964 const cpp_token *pushed_token;
1966 struct macro_args *args; /* 0 for arguments and object-like macros. */
1967 unsigned short posn; /* Current posn, index into u. */
1968 unsigned short count; /* No. of tokens in u. */
1969 unsigned short level;
1970 unsigned char flags;
1973 typedef struct macro_args macro_args;
1977 const cpp_token **tokens;
1978 unsigned int capacity;
1980 unsigned short level;
1983 static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
1984 static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
1985 macro_args *, unsigned int *));
1986 static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
1987 static void save_token PARAMS ((macro_args *, const cpp_token *));
1988 static const cpp_token *push_arg_context PARAMS ((cpp_reader *,
1989 const cpp_token *));
1990 static int do_pop_context PARAMS ((cpp_reader *));
1991 static const cpp_token *pop_context PARAMS ((cpp_reader *));
1992 static const cpp_token *push_macro_context PARAMS ((cpp_reader *,
1994 const cpp_token *));
1995 static void free_macro_args PARAMS ((macro_args *));
1997 /* Free the storage allocated for macro arguments. */
1999 free_macro_args (args)
2003 free (args->tokens);
2008 /* Determines if a macro has been already used (and is therefore
2011 is_macro_disabled (pfile, expansion, token)
2013 const cpp_toklist *expansion;
2014 const cpp_token *token;
2016 cpp_context *context = CURRENT_CONTEXT (pfile);
2018 /* Don't expand anything if this file has already been preprocessed. */
2019 if (CPP_OPTION (pfile, preprocessed))
2022 /* Arguments on either side of ## are inserted in place without
2023 macro expansion (6.10.3.3.2). Conceptually, any macro expansion
2024 occurs during a later rescan pass. The effect is that we expand
2025 iff we would as part of the macro's expansion list, so we should
2026 drop to the macro's context. */
2027 if (IS_ARG_CONTEXT (context))
2029 if (token->flags & PASTED)
2031 else if (!(context->flags & CONTEXT_RAW))
2033 else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2037 /* Have we already used this macro? */
2038 while (context->level > 0)
2040 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2042 /* Raw argument tokens are judged based on the token list they
2044 if (context->flags & CONTEXT_RAW)
2045 context = pfile->contexts + context->level;
2050 /* Function-like macros may be disabled if the '(' is not in the
2051 current context. We check this without disrupting the context
2053 if (expansion->paramc >= 0)
2055 const cpp_token *next;
2056 unsigned int prev_nme;
2058 context = CURRENT_CONTEXT (pfile);
2059 /* Drop down any contexts we're at the end of: the '(' may
2060 appear in lower macro expansions, or in the rest of the file. */
2061 while (context->posn == context->count && context > pfile->contexts)
2064 /* If we matched, we are disabled, as we appear in the
2065 expansion of each macro we meet. */
2066 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2070 prev_nme = pfile->no_expand_level;
2071 pfile->no_expand_level = context - pfile->contexts;
2072 next = cpp_get_token (pfile);
2073 restore_macro_expansion (pfile, prev_nme);
2074 if (next->type != CPP_OPEN_PAREN)
2076 _cpp_push_token (pfile, next);
2077 if (CPP_OPTION (pfile, warn_traditional))
2079 "function macro %.*s must be used with arguments in traditional C",
2080 (int) token->val.node->length, token->val.node->name);
2088 /* Add a token to the set of tokens forming the arguments to the macro
2089 being parsed in parse_args. */
2091 save_token (args, token)
2093 const cpp_token *token;
2095 if (args->used == args->capacity)
2097 args->capacity += args->capacity + 100;
2098 args->tokens = (const cpp_token **)
2099 xrealloc (args->tokens, args->capacity * sizeof (const cpp_token *));
2101 args->tokens[args->used++] = token;
2104 /* Take and save raw tokens until we finish one argument. Empty
2105 arguments are saved as a single CPP_PLACEMARKER token. */
2106 static const cpp_token *
2107 parse_arg (pfile, var_args, paren_context, args, pcount)
2110 unsigned int paren_context;
2112 unsigned int *pcount;
2114 const cpp_token *token;
2115 unsigned int paren = 0, count = 0;
2116 int raw, was_raw = 1;
2118 for (count = 0;; count++)
2120 token = cpp_get_token (pfile);
2122 switch (token->type)
2127 case CPP_OPEN_PAREN:
2131 case CPP_CLOSE_PAREN:
2137 /* Commas are not terminators within parantheses or var_args. */
2138 if (paren || var_args)
2142 case CPP_EOF: /* Error reported by caller. */
2146 raw = pfile->cur_context <= paren_context;
2150 save_token (args, 0);
2153 save_token (args, token);
2159 /* Duplicate the placemarker. Then we can set its flags and
2160 position and safely be using more than one. */
2161 save_token (args, duplicate_token (pfile, &placemarker_token));
2169 /* This macro returns true if the argument starting at offset O of arglist
2170 A is empty - that is, it's either a single PLACEMARKER token, or a null
2171 pointer followed by a PLACEMARKER. */
2173 #define empty_argument(A, O) \
2174 ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2175 : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2177 /* Parse the arguments making up a macro invocation. Nested arguments
2178 are automatically macro expanded, but immediate macros are not
2179 expanded; this enables e.g. operator # to work correctly. Returns
2180 non-zero on error. */
2182 parse_args (pfile, hp, args)
2187 const cpp_token *token;
2188 const cpp_toklist *macro;
2189 unsigned int total = 0;
2190 unsigned int paren_context = pfile->cur_context;
2193 macro = hp->value.expansion;
2198 token = parse_arg (pfile, (argc + 1 == macro->paramc
2199 && (macro->flags & VAR_ARGS)),
2200 paren_context, args, &count);
2201 if (argc < macro->paramc)
2204 args->ends[argc] = total;
2208 while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
2210 if (token->type == CPP_EOF)
2212 cpp_error (pfile, "unterminated invocation of macro \"%.*s\"",
2213 hp->length, hp->name);
2216 else if (argc < macro->paramc)
2218 /* A rest argument is allowed to not appear in the invocation at all.
2219 e.g. #define debug(format, args...) ...
2221 This is exactly the same as if the rest argument had received no
2222 tokens - debug("string",); This extension is deprecated. */
2224 if (argc + 1 == macro->paramc && (macro->flags & GNU_REST_ARGS))
2226 /* Duplicate the placemarker. Then we can set its flags and
2227 position and safely be using more than one. */
2228 save_token (args, duplicate_token (pfile, &placemarker_token));
2229 args->ends[argc] = total + 1;
2235 "insufficient arguments in invocation of macro \"%.*s\"",
2236 hp->length, hp->name);
2240 /* An empty argument to an empty function-like macro is fine. */
2241 else if (argc > macro->paramc
2242 && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2245 "too many arguments in invocation of macro \"%.*s\"",
2246 hp->length, hp->name);
2253 /* Adds backslashes before all backslashes and double quotes appearing
2254 in strings. Non-printable characters are converted to octal. */
2256 quote_string (dest, src, len)
2265 if (c == '\\' || c == '"')
2276 sprintf ((char *) dest, "\\%03o", c);
2285 /* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2286 CPP_STRING token containing TEXT in quoted form. */
2288 make_string_token (token, text, len)
2295 buf = (U_CHAR *) xmalloc (len * 4);
2296 token->type = CPP_STRING;
2298 token->val.str.text = buf;
2299 token->val.str.len = quote_string (buf, text, len) - buf;
2303 /* Allocates and converts a temporary token to a CPP_NUMBER token,
2304 evaluating to NUMBER. */
2306 alloc_number_token (pfile, number)
2313 result = get_temp_token (pfile);
2315 sprintf (buf, "%d", number);
2317 result->type = CPP_NUMBER;
2319 result->val.str.text = (U_CHAR *) buf;
2320 result->val.str.len = strlen (buf);
2324 /* Returns a temporary token from the temporary token store of PFILE. */
2326 get_temp_token (pfile)
2329 if (pfile->temp_used == pfile->temp_alloced)
2331 if (pfile->temp_used == pfile->temp_cap)
2333 pfile->temp_cap += pfile->temp_cap + 20;
2334 pfile->temp_tokens = (cpp_token **) xrealloc
2335 (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2337 pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2338 (sizeof (cpp_token));
2341 return pfile->temp_tokens[pfile->temp_used++];
2344 /* Release (not free) for re-use the temporary tokens of PFILE. */
2346 release_temp_tokens (pfile)
2349 while (pfile->temp_used)
2351 cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2353 if (token_spellings[token->type].type == SPELL_STRING)
2355 free ((char *) token->val.str.text);
2356 token->val.str.text = 0;
2361 /* Free all of PFILE's dynamically-allocated temporary tokens. */
2363 _cpp_free_temp_tokens (pfile)
2366 if (pfile->temp_tokens)
2368 /* It is possible, though unlikely (looking for '(' of a funlike
2369 macro into EOF), that we haven't released the tokens yet. */
2370 release_temp_tokens (pfile);
2371 while (pfile->temp_alloced)
2372 free (pfile->temp_tokens[--pfile->temp_alloced]);
2373 free (pfile->temp_tokens);
2378 free ((char *) pfile->date->val.str.text);
2380 free ((char *) pfile->time->val.str.text);
2385 /* Copy TOKEN into a temporary token from PFILE's store. */
2387 duplicate_token (pfile, token)
2389 const cpp_token *token;
2391 cpp_token *result = get_temp_token (pfile);
2394 if (token_spellings[token->type].type == SPELL_STRING)
2396 U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2397 memcpy (buff, token->val.str.text, token->val.str.len);
2398 result->val.str.text = buff;
2403 /* Determine whether two tokens can be pasted together, and if so,
2404 what the resulting token is. Returns CPP_EOF if the tokens cannot
2405 be pasted, or the appropriate type for the merged token if they
2407 static enum cpp_ttype
2408 can_paste (pfile, token1, token2, digraph)
2410 const cpp_token *token1, *token2;
2413 enum cpp_ttype a = token1->type, b = token2->type;
2414 int cxx = CPP_OPTION (pfile, cplusplus);
2416 if (a <= CPP_LAST_EQ && b == CPP_EQ)
2417 return a + (CPP_EQ_EQ - CPP_EQ);
2422 if (b == a) return CPP_RSHIFT;
2423 if (b == CPP_QUERY && cxx) return CPP_MAX;
2424 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
2427 if (b == a) return CPP_LSHIFT;
2428 if (b == CPP_QUERY && cxx) return CPP_MIN;
2429 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
2430 if (CPP_OPTION (pfile, digraphs))
2433 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2435 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
2439 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
2440 case CPP_AND: if (b == a) return CPP_AND_AND; break;
2441 case CPP_OR: if (b == a) return CPP_OR_OR; break;
2444 if (b == a) return CPP_MINUS_MINUS;
2445 if (b == CPP_GREATER) return CPP_DEREF;
2448 if (b == a && cxx) return CPP_SCOPE;
2449 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
2450 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2454 if (CPP_OPTION (pfile, digraphs))
2456 if (b == CPP_GREATER)
2457 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
2459 {*digraph = 1; return CPP_HASH;} /* %: digraph */
2463 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2466 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2467 if (b == CPP_NUMBER) return CPP_NUMBER;
2471 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2473 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2477 if (b == CPP_NAME) return CPP_NAME;
2479 && is_numstart(token2->val.str.text[0])) return CPP_NAME;
2481 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
2483 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
2487 if (b == CPP_NUMBER) return CPP_NUMBER;
2488 if (b == CPP_NAME) return CPP_NUMBER;
2489 if (b == CPP_DOT) return CPP_NUMBER;
2490 /* Numbers cannot have length zero, so this is safe. */
2491 if ((b == CPP_PLUS || b == CPP_MINUS)
2492 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
2503 /* Check if TOKEN is to be ##-pasted with the token after it. */
2504 static const cpp_token *
2505 maybe_paste_with_next (pfile, token)
2507 const cpp_token *token;
2510 const cpp_token *second;
2511 cpp_context *context = CURRENT_CONTEXT (pfile);
2513 /* Is this token on the LHS of ## ? */
2514 if (!((context->flags & CONTEXT_PASTEL) && context->posn == context->count)
2515 && !(token->flags & PASTE_LEFT))
2518 /* Prevent recursion, and possibly pushing back more than one token. */
2519 if (pfile->paste_level)
2522 /* Suppress macro expansion for next token, but don't conflict with
2523 the other method of suppression. If it is an argument, macro
2524 expansion within the argument will still occur. */
2525 pfile->paste_level = pfile->cur_context;
2526 second = cpp_get_token (pfile);
2527 pfile->paste_level = 0;
2529 /* Ignore placemarker argument tokens (cannot be from an empty macro
2530 since macros are not expanded). */
2531 if (token->type == CPP_PLACEMARKER)
2532 pasted = duplicate_token (pfile, second);
2533 else if (second->type == CPP_PLACEMARKER)
2535 cpp_context *mac_context = CURRENT_CONTEXT (pfile) - 1;
2536 /* GCC has special extended semantics for a ## b where b is a
2537 varargs parameter: a disappears if b consists of no tokens.
2538 This extension is deprecated. */
2539 if ((mac_context->u.list->flags & GNU_REST_ARGS)
2540 && (mac_context->u.list->tokens[mac_context->posn - 1].val.aux + 1
2541 == (unsigned) mac_context->u.list->paramc))
2543 cpp_warning (pfile, "deprecated GNU ## extension used");
2544 pasted = duplicate_token (pfile, second);
2547 pasted = duplicate_token (pfile, token);
2552 enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
2554 if (type == CPP_EOF)
2556 if (CPP_OPTION (pfile, warn_paste))
2558 "pasting would not give a valid preprocessing token");
2559 _cpp_push_token (pfile, second);
2563 if (type == CPP_NAME || type == CPP_NUMBER)
2565 /* Join spellings. */
2568 pasted = get_temp_token (pfile);
2569 buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2570 end = spell_token (pfile, token, buf);
2571 end = spell_token (pfile, second, end);
2574 if (type == CPP_NAME)
2575 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2578 pasted->val.str.text = uxstrdup (buf);
2579 pasted->val.str.len = end - buf;
2582 else if (type == CPP_WCHAR || type == CPP_WSTRING)
2583 pasted = duplicate_token (pfile, second);
2586 pasted = get_temp_token (pfile);
2587 pasted->val.integer = 0;
2590 pasted->type = type;
2591 pasted->flags = digraph ? DIGRAPH : 0;
2594 /* The pasted token gets the whitespace flags and position of the
2595 first token, the PASTE_LEFT flag of the second token, plus the
2596 PASTED flag to indicate it is the result of a paste. However, we
2597 want to preserve the DIGRAPH flag. */
2598 pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2599 pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2600 | (second->flags & PASTE_LEFT) | PASTED);
2601 pasted->col = token->col;
2602 pasted->line = token->line;
2604 return maybe_paste_with_next (pfile, pasted);
2607 /* Convert a token sequence to a single string token according to the
2608 rules of the ISO C #-operator. */
2609 #define INIT_SIZE 200
2611 stringify_arg (pfile, token)
2613 const cpp_token *token;
2616 unsigned char *main_buf;
2617 unsigned int prev_value, backslash_count = 0;
2618 unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2620 prev_value = prevent_macro_expansion (pfile);
2621 main_buf = (unsigned char *) xmalloc (buf_cap);
2623 result = get_temp_token (pfile);
2624 ASSIGN_FLAGS_AND_POS (result, token);
2626 for (; (token = cpp_get_token (pfile))->type != CPP_EOF; )
2630 unsigned int len = TOKEN_LEN (token);
2632 escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2633 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2637 if (buf_used + len > buf_cap)
2639 buf_cap = buf_used + len + INIT_SIZE;
2640 main_buf = xrealloc (main_buf, buf_cap);
2643 if (whitespace && (token->flags & PREV_WHITE))
2644 main_buf[buf_used++] = ' ';
2647 buf = (unsigned char *) xmalloc (len);
2649 buf = main_buf + buf_used;
2651 len = spell_token (pfile, token, buf) - buf;
2654 buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2661 if (token->type == CPP_BACKSLASH)
2664 backslash_count = 0;
2667 /* Ignore the final \ of invalid string literals. */
2668 if (backslash_count & 1)
2670 cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2674 result->type = CPP_STRING;
2675 result->val.str.text = main_buf;
2676 result->val.str.len = buf_used;
2677 restore_macro_expansion (pfile, prev_value);
2681 /* Allocate more room on the context stack of PFILE. */
2683 expand_context_stack (pfile)
2686 pfile->context_cap += pfile->context_cap + 20;
2687 pfile->contexts = (cpp_context *)
2688 xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2691 /* Push the context of macro NODE onto the context stack. TOKEN is
2692 the CPP_NAME token invoking the macro. */
2693 static const cpp_token *
2694 push_macro_context (pfile, node, token)
2697 const cpp_token *token;
2699 unsigned char orig_flags;
2701 cpp_context *context;
2703 if (pfile->cur_context > CPP_STACK_MAX)
2705 cpp_error (pfile, "infinite macro recursion invoking '%s'", node->name);
2709 /* Token's flags may change when parsing args containing a nested
2710 invocation of this macro. */
2711 orig_flags = token->flags & (PREV_WHITE | BOL);
2713 if (node->value.expansion->paramc >= 0)
2715 unsigned int error, prev_nme;
2717 /* Allocate room for the argument contexts, and parse them. */
2718 args = (macro_args *) xmalloc (sizeof (macro_args));
2719 args->ends = (unsigned int *)
2720 xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2724 args->level = pfile->cur_context;
2726 prev_nme = prevent_macro_expansion (pfile);
2728 error = parse_args (pfile, node, args);
2730 restore_macro_expansion (pfile, prev_nme);
2733 free_macro_args (args);
2738 /* Now push its context. */
2739 pfile->cur_context++;
2740 if (pfile->cur_context == pfile->context_cap)
2741 expand_context_stack (pfile);
2743 context = CURRENT_CONTEXT (pfile);
2744 context->u.list = node->value.expansion;
2745 context->args = args;
2747 context->count = context->u.list->tokens_used;
2748 context->level = pfile->cur_context;
2750 context->pushed_token = 0;
2752 /* Set the flags of the first token. We know there must
2753 be one, empty macros are a single placemarker token. */
2754 MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2756 return cpp_get_token (pfile);
2759 /* Push an argument to the current macro onto the context stack.
2760 TOKEN is the MACRO_ARG token representing the argument expansion. */
2761 static const cpp_token *
2762 push_arg_context (pfile, token)
2764 const cpp_token *token;
2766 cpp_context *context;
2769 pfile->cur_context++;
2770 if (pfile->cur_context == pfile->context_cap)
2771 expand_context_stack (pfile);
2773 context = CURRENT_CONTEXT (pfile);
2774 args = context[-1].args;
2776 context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2777 context->u.arg = args->tokens + context->count;
2778 context->count = args->ends[token->val.aux] - context->count;
2781 context->level = args->level;
2782 context->flags = CONTEXT_ARG | CONTEXT_RAW;
2783 context->pushed_token = 0;
2785 /* Set the flags of the first token. There is one. */
2787 const cpp_token *first = context->u.arg[0];
2789 first = context->u.arg[1];
2791 MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2792 token->flags & (PREV_WHITE | BOL));
2795 if (token->flags & STRINGIFY_ARG)
2796 return stringify_arg (pfile, token);
2798 if (token->flags & PASTE_LEFT)
2799 context->flags |= CONTEXT_PASTEL;
2800 if (pfile->paste_level)
2801 context->flags |= CONTEXT_PASTER;
2803 return get_raw_token (pfile);
2806 /* "Unget" a token. It is effectively inserted in the token queue and
2807 will be returned by the next call to get_raw_token. */
2809 _cpp_push_token (pfile, token)
2811 const cpp_token *token;
2813 cpp_context *context = CURRENT_CONTEXT (pfile);
2814 if (context->pushed_token)
2815 cpp_ice (pfile, "two tokens pushed in a row");
2816 if (token->type != CPP_EOF)
2817 context->pushed_token = token;
2818 /* Don't push back a directive's CPP_EOF, step back instead. */
2819 else if (pfile->cur_context == 0)
2820 pfile->contexts[0].posn--;
2823 /* Handle a preprocessing directive. TOKEN is the CPP_HASH token
2824 introducing the directive. */
2826 process_directive (pfile, token)
2828 const cpp_token *token;
2830 const struct directive *d = pfile->token_list.directive;
2833 /* Skip over the directive name. */
2834 if (token[1].type == CPP_NAME)
2835 _cpp_get_raw_token (pfile);
2836 else if (token[1].type != CPP_NUMBER)
2837 cpp_ice (pfile, "directive begins with %s?!",
2838 token_names[token[1].type]);
2840 /* Flush pending tokens at this point, in case the directive produces
2841 output. XXX Directive output won't be visible to a direct caller of
2843 if (pfile->printer && CPP_WRITTEN (pfile) - pfile->printer->written)
2844 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
2846 if (! (d->flags & EXPAND))
2847 prev_nme = prevent_macro_expansion (pfile);
2848 (void) (*d->handler) (pfile);
2849 if (! (d->flags & EXPAND))
2850 restore_macro_expansion (pfile, prev_nme);
2851 _cpp_skip_rest_of_line (pfile);
2854 /* The external interface to return the next token. All macro
2855 expansion and directive processing is handled internally, the
2856 caller only ever sees the output after preprocessing. */
2858 cpp_get_token (pfile)
2861 const cpp_token *token;
2864 /* Loop till we hit a non-directive, non-skipped, non-placemarker token. */
2867 token = get_raw_token (pfile);
2868 if (token->flags & BOL && token->type == CPP_HASH
2869 && pfile->token_list.directive)
2871 process_directive (pfile, token);
2875 /* Short circuit EOF. */
2876 if (token->type == CPP_EOF)
2879 if (pfile->skipping && ! pfile->token_list.directive)
2881 _cpp_skip_rest_of_line (pfile);
2887 /* If there's a potential control macro and we get here, then that
2888 #ifndef didn't cover the entire file and its argument shouldn't
2889 be taken as a control macro. */
2890 pfile->potential_control_macro = 0;
2892 token = maybe_paste_with_next (pfile, token);
2894 if (token->type != CPP_NAME)
2897 /* Is macro expansion disabled in general? */
2898 if (pfile->no_expand_level == pfile->cur_context || pfile->paste_level)
2901 node = token->val.node;
2902 if (node->type == T_VOID)
2905 if (node->type == T_MACRO)
2907 if (is_macro_disabled (pfile, node->value.expansion, token))
2910 return push_macro_context (pfile, node, token);
2913 return special_symbol (pfile, node, token);
2916 /* Returns the next raw token, i.e. without performing macro
2917 expansion. Argument contexts are automatically entered. */
2918 static const cpp_token *
2919 get_raw_token (pfile)
2922 const cpp_token *result;
2923 cpp_context *context = CURRENT_CONTEXT (pfile);
2925 if (context->pushed_token)
2927 result = context->pushed_token;
2928 context->pushed_token = 0;
2930 else if (context->posn == context->count)
2931 result = pop_context (pfile);
2934 if (IS_ARG_CONTEXT (context))
2936 result = context->u.arg[context->posn++];
2939 context->flags ^= CONTEXT_RAW;
2940 result = context->u.arg[context->posn++];
2942 return result; /* Cannot be a CPP_MACRO_ARG */
2944 result = &context->u.list->tokens[context->posn++];
2947 if (result->type == CPP_MACRO_ARG)
2948 result = push_arg_context (pfile, result);
2952 /* Internal interface to get the token without macro expanding. */
2954 _cpp_get_raw_token (pfile)
2957 int prev_nme = prevent_macro_expansion (pfile);
2958 const cpp_token *result = cpp_get_token (pfile);
2959 restore_macro_expansion (pfile, prev_nme);
2963 /* A thin wrapper to lex_line. CLEAR is non-zero if the current token
2964 list should be overwritten, or zero if we need to append
2965 (typically, if we are within the arguments to a macro, or looking
2966 for the '(' to start a function-like macro invocation). */
2968 lex_next (pfile, clear)
2972 cpp_toklist *list = &pfile->token_list;
2973 const cpp_token *old_list = list->tokens;
2974 unsigned int old_used = list->tokens_used;
2976 /* If we are currently processing a directive, do not advance. 6.10
2977 paragraph 2: A new-line character ends the directive even if it
2978 occurs within what would otherwise be an invocation of a
2979 function-like macro.
2981 It is possible that clear == 1 too; e.g. "#if funlike_macro ("
2982 since parse_args swallowed the directive's EOF. */
2983 if (list->directive)
2988 /* Release all temporary tokens. */
2989 _cpp_clear_toklist (list);
2990 pfile->contexts[0].posn = 0;
2991 if (pfile->temp_used)
2992 release_temp_tokens (pfile);
2995 lex_line (pfile, list);
2996 pfile->contexts[0].count = list->tokens_used;
2998 if (!clear && pfile->args)
3000 /* Fix up argument token pointers. */
3001 if (old_list != list->tokens)
3005 for (i = 0; i < pfile->args->used; i++)
3007 const cpp_token *token = pfile->args->tokens[i];
3008 if (token >= old_list && token < old_list + old_used)
3009 pfile->args->tokens[i] = (const cpp_token *)
3010 ((char *) token + ((char *) list->tokens - (char *) old_list));
3014 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3015 tokens within the list of arguments that would otherwise act as
3016 preprocessing directives, the behavior is undefined.
3018 This implementation will report a hard error and treat the
3019 'sequence of preprocessing tokens' as part of the macro argument,
3022 Note if pfile->args == 0, we're OK since we're only inside a
3023 macro argument after a '('. */
3024 if (list->directive)
3026 cpp_error_with_line (pfile, list->tokens[old_used].line,
3027 list->tokens[old_used].col,
3028 "#%s may not be used inside a macro argument",
3029 list->directive->name);
3037 /* Pops a context of the context stack. If we're at the bottom, lexes
3038 the next logical line. Returns 1 if we're at the end of the
3039 argument list to the # operator, or if it is illegal to "overflow"
3040 into the rest of the file (e.g. 6.10.3.1.1). */
3042 do_pop_context (pfile)
3045 cpp_context *context;
3047 if (pfile->cur_context == 0)
3048 return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3050 /* Argument contexts, when parsing args or handling # operator
3051 return CPP_EOF at the end. */
3052 context = CURRENT_CONTEXT (pfile);
3053 if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3056 /* Free resources when leaving macro contexts. */
3058 free_macro_args (context->args);
3060 if (pfile->cur_context == pfile->no_expand_level)
3061 pfile->no_expand_level--;
3062 pfile->cur_context--;
3067 /* Move down the context stack, and return the next raw token. */
3068 static const cpp_token *
3072 if (do_pop_context (pfile))
3074 return get_raw_token (pfile);
3077 /* Turn off macro expansion at the current context level. */
3079 prevent_macro_expansion (pfile)
3082 unsigned int prev_value = pfile->no_expand_level;
3083 pfile->no_expand_level = pfile->cur_context;
3087 /* Restore macro expansion to its previous state. */
3089 restore_macro_expansion (pfile, prev_value)
3091 unsigned int prev_value;
3093 pfile->no_expand_level = prev_value;
3096 /* Used by cpperror.c to obtain the correct line and column to report
3099 _cpp_get_line (pfile, pcol)
3104 const cpp_token *cur_token;
3106 if (pfile->in_lex_line)
3107 index = pfile->token_list.tokens_used;
3109 index = pfile->contexts[0].posn;
3111 cur_token = &pfile->token_list.tokens[index - 1];
3113 *pcol = cur_token->col;
3114 return cur_token->line;
3117 #define DSC(str) (const U_CHAR *)str, sizeof str - 1
3118 static const char * const monthnames[] =
3120 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3121 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3124 /* Handle builtin macros like __FILE__. */
3125 static const cpp_token *
3126 special_symbol (pfile, node, token)
3129 const cpp_token *token;
3141 ip = CPP_BUFFER (pfile);
3146 if (node->type == T_BASE_FILE)
3147 while (CPP_PREV_BUFFER (ip) != NULL)
3148 ip = CPP_PREV_BUFFER (ip);
3150 file = ip->nominal_fname;
3152 result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3157 case T_INCLUDE_LEVEL:
3159 int true_indepth = 0;
3161 /* Do not count the primary source file in the include level. */
3162 ip = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
3166 ip = CPP_PREV_BUFFER (ip);
3168 result = alloc_number_token (pfile, true_indepth);
3173 /* If __LINE__ is embedded in a macro, it must expand to the
3174 line of the macro's invocation, not its definition.
3175 Otherwise things like assert() will not work properly. */
3176 result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3183 #ifdef STDC_0_IN_SYSTEM_HEADERS
3184 if (CPP_IN_SYSTEM_HEADER (pfile)
3185 && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
3188 result = alloc_number_token (pfile, stdc);
3194 if (pfile->date == 0)
3196 /* Allocate __DATE__ and __TIME__ from permanent storage,
3197 and save them in pfile so we don't have to do this again.
3198 We don't generate these strings at init time because
3199 time() and localtime() are very slow on some systems. */
3200 time_t tt = time (NULL);
3201 struct tm *tb = localtime (&tt);
3203 pfile->date = make_string_token
3204 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3205 pfile->time = make_string_token
3206 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3208 sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
3209 monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3210 sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
3211 tb->tm_hour, tb->tm_min, tb->tm_sec);
3213 result = node->type == T_DATE ? pfile->date: pfile->time;
3217 cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
3221 cpp_ice (pfile, "invalid special hash type");
3225 ASSIGN_FLAGS_AND_POS (result, token);
3230 /* Dump the original user's spelling of argument index ARG_NO to the
3231 macro whose expansion is LIST. */
3233 dump_param_spelling (pfile, list, arg_no)
3235 const cpp_toklist *list;
3236 unsigned int arg_no;
3238 const U_CHAR *param = list->namebuf;
3241 param += ustrlen (param) + 1;
3242 CPP_PUTS (pfile, param, ustrlen (param));
3245 /* Dump a token list to the output. */
3247 _cpp_dump_list (pfile, list, token, flush)
3249 const cpp_toklist *list;
3250 const cpp_token *token;
3253 const cpp_token *limit = list->tokens + list->tokens_used;
3254 const cpp_token *prev = 0;
3256 /* Avoid the CPP_EOF. */
3257 if (list->directive)
3260 while (token < limit)
3262 if (token->type == CPP_MACRO_ARG)
3264 if (token->flags & PREV_WHITE)
3265 CPP_PUTC (pfile, ' ');
3266 if (token->flags & STRINGIFY_ARG)
3267 CPP_PUTC (pfile, '#');
3268 dump_param_spelling (pfile, list, token->val.aux);
3271 output_token (pfile, token, prev);
3272 if (token->flags & PASTE_LEFT)
3273 CPP_PUTS (pfile, " ##", 3);
3278 if (flush && pfile->printer)
3279 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
3282 /* Allocate pfile->input_buffer, and initialize trigraph_map[]
3283 if it hasn't happened already. */
3286 _cpp_init_input_buffer (pfile)
3289 init_trigraph_map ();
3290 pfile->context_cap = 20;
3291 pfile->contexts = (cpp_context *)
3292 xmalloc (pfile->context_cap * sizeof (cpp_context));
3293 pfile->cur_context = 0;
3294 pfile->contexts[0].u.list = &pfile->token_list;
3296 pfile->contexts[0].posn = 0;
3297 pfile->contexts[0].count = 0;
3298 pfile->no_expand_level = UINT_MAX;
3300 _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3303 /* Moves to the end of the directive line, popping contexts as
3306 _cpp_skip_rest_of_line (pfile)
3309 /* Get to base context. Clear parsing args and each contexts flags,
3310 since these can cause pop_context to return without popping. */
3311 pfile->no_expand_level = UINT_MAX;
3312 while (pfile->cur_context != 0)
3314 pfile->contexts[pfile->cur_context].flags = 0;
3315 do_pop_context (pfile);
3318 pfile->contexts[pfile->cur_context].count = 0;
3319 pfile->contexts[pfile->cur_context].posn = 0;
3320 pfile->token_list.directive = 0;
3323 /* Directive handler wrapper used by the command line option
3326 _cpp_run_directive (pfile, dir, buf, count)
3328 const struct directive *dir;
3332 if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3334 unsigned int prev_lvl = 0;
3335 /* scan the line now, else prevent_macro_expansion won't work */
3336 do_pop_context (pfile);
3337 if (! (dir->flags & EXPAND))
3338 prev_lvl = prevent_macro_expansion (pfile);
3340 (void) (*dir->handler) (pfile);
3342 if (! (dir->flags & EXPAND))
3343 restore_macro_expansion (pfile, prev_lvl);
3345 _cpp_skip_rest_of_line (pfile);
3346 cpp_pop_buffer (pfile);