1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
43 /* Tokens with SPELL_STRING store their spelling in the token list,
44 and it's length in the token->val.name.len. */
56 enum spell_type category;
57 const unsigned char *name;
60 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
63 #define OP(e, s) { SPELL_OPERATOR, U s },
64 #define TK(e, s) { s, U STRINGX (e) },
65 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
69 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
70 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
72 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
73 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
74 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
76 static int skip_block_comment PARAMS ((cpp_reader *));
77 static int skip_line_comment PARAMS ((cpp_reader *));
78 static void adjust_column PARAMS ((cpp_reader *));
79 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
80 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
81 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
82 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
83 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
84 static void unterminated PARAMS ((cpp_reader *, int));
85 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
86 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
87 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
88 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
89 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
91 static cpp_chunk *new_chunk PARAMS ((unsigned int));
92 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
96 Compares, the token TOKEN to the NUL-terminated string STRING.
97 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
100 cpp_ideq (token, string)
101 const cpp_token *token;
104 if (token->type != CPP_NAME)
107 return !ustrcmp (token->val.node->name, (const U_CHAR *) string);
110 /* Call when meeting a newline. Returns the character after the newline
111 (or carriage-return newline combination), or EOF. */
113 handle_newline (buffer, newline_char)
115 cppchar_t newline_char;
117 cppchar_t next = EOF;
119 buffer->col_adjust = 0;
121 buffer->line_base = buffer->cur;
123 /* Handle CR-LF and LF-CR combinations, get the next character. */
124 if (buffer->cur < buffer->rlimit)
126 next = *buffer->cur++;
127 if (next + newline_char == '\r' + '\n')
129 buffer->line_base = buffer->cur;
130 if (buffer->cur < buffer->rlimit)
131 next = *buffer->cur++;
137 buffer->read_ahead = next;
141 /* Subroutine of skip_escaped_newlines; called when a trigraph is
142 encountered. It warns if necessary, and returns true if the
143 trigraph should be honoured. FROM_CHAR is the third character of a
144 trigraph, and presumed to be the previous character for position
147 trigraph_ok (pfile, from_char)
151 int accept = CPP_OPTION (pfile, trigraphs);
153 /* Don't warn about trigraphs in comments. */
154 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
156 cpp_buffer *buffer = pfile->buffer;
158 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
159 "trigraph ??%c converted to %c",
161 (int) _cpp_trigraph_map[from_char]);
162 else if (buffer->cur != buffer->last_Wtrigraphs)
164 buffer->last_Wtrigraphs = buffer->cur;
165 cpp_warning_with_line (pfile, buffer->lineno,
166 CPP_BUF_COL (buffer) - 2,
167 "trigraph ??%c ignored", (int) from_char);
174 /* Assumes local variables buffer and result. */
175 #define ACCEPT_CHAR(t) \
176 do { result->type = t; buffer->read_ahead = EOF; } while (0)
178 /* When we move to multibyte character sets, add to these something
179 that saves and restores the state of the multibyte conversion
180 library. This probably involves saving and restoring a "cookie".
181 In the case of glibc it is an 8-byte structure, so is not a high
182 overhead operation. In any case, it's out of the fast path. */
183 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
184 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
186 /* Skips any escaped newlines introduced by NEXT, which is either a
187 '?' or a '\\'. Returns the next character, which will also have
188 been placed in buffer->read_ahead. This routine performs
189 preprocessing stages 1 and 2 of the ISO C standard. */
191 skip_escaped_newlines (buffer, next)
195 /* Only do this if we apply stages 1 and 2. */
196 if (!buffer->from_stage3)
199 const unsigned char *saved_cur;
204 if (buffer->cur == buffer->rlimit)
210 next1 = *buffer->cur++;
211 if (next1 != '?' || buffer->cur == buffer->rlimit)
217 next1 = *buffer->cur++;
218 if (!_cpp_trigraph_map[next1]
219 || !trigraph_ok (buffer->pfile, next1))
225 /* We have a full trigraph here. */
226 next = _cpp_trigraph_map[next1];
227 if (next != '\\' || buffer->cur == buffer->rlimit)
232 /* We have a backslash, and room for at least one more character. */
236 next1 = *buffer->cur++;
237 if (!is_nvspace (next1))
241 while (buffer->cur < buffer->rlimit);
243 if (!is_vspace (next1))
249 if (space && !buffer->pfile->state.lexing_comment)
250 cpp_warning (buffer->pfile,
251 "backslash and newline separated by space");
253 next = handle_newline (buffer, next1);
255 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
257 while (next == '\\' || next == '?');
260 buffer->read_ahead = next;
264 /* Obtain the next character, after trigraph conversion and skipping
265 an arbitrary string of escaped newlines. The common case of no
266 trigraphs or escaped newlines falls through quickly. */
268 get_effective_char (buffer)
271 cppchar_t next = EOF;
273 if (buffer->cur < buffer->rlimit)
275 next = *buffer->cur++;
277 /* '?' can introduce trigraphs (and therefore backslash); '\\'
278 can introduce escaped newlines, which we want to skip, or
279 UCNs, which, depending upon lexer state, we will handle in
281 if (next == '?' || next == '\\')
282 next = skip_escaped_newlines (buffer, next);
285 buffer->read_ahead = next;
289 /* Skip a C-style block comment. We find the end of the comment by
290 seeing if an asterisk is before every '/' we encounter. Returns
291 non-zero if comment terminated by EOF, zero otherwise. */
293 skip_block_comment (pfile)
296 cpp_buffer *buffer = pfile->buffer;
297 cppchar_t c = EOF, prevc = EOF;
299 pfile->state.lexing_comment = 1;
300 while (buffer->cur != buffer->rlimit)
302 prevc = c, c = *buffer->cur++;
305 /* FIXME: For speed, create a new character class of characters
306 of interest inside block comments. */
307 if (c == '?' || c == '\\')
308 c = skip_escaped_newlines (buffer, c);
310 /* People like decorating comments with '*', so check for '/'
311 instead for efficiency. */
317 /* Warn about potential nested comments, but not if the '/'
318 comes immediately before the true comment delimeter.
319 Don't bother to get it right across escaped newlines. */
320 if (CPP_OPTION (pfile, warn_comments)
321 && buffer->cur != buffer->rlimit)
323 prevc = c, c = *buffer->cur++;
324 if (c == '*' && buffer->cur != buffer->rlimit)
326 prevc = c, c = *buffer->cur++;
328 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
329 CPP_BUF_COL (buffer),
330 "\"/*\" within comment");
335 else if (is_vspace (c))
337 prevc = c, c = handle_newline (buffer, c);
341 adjust_column (pfile);
344 pfile->state.lexing_comment = 0;
345 buffer->read_ahead = EOF;
346 return c != '/' || prevc != '*';
349 /* Skip a C++ line comment. Handles escaped newlines. Returns
350 non-zero if a multiline comment. The following new line, if any,
351 is left in buffer->read_ahead. */
353 skip_line_comment (pfile)
356 cpp_buffer *buffer = pfile->buffer;
357 unsigned int orig_lineno = buffer->lineno;
360 pfile->state.lexing_comment = 1;
364 if (buffer->cur == buffer->rlimit)
368 if (c == '?' || c == '\\')
369 c = skip_escaped_newlines (buffer, c);
371 while (!is_vspace (c));
373 pfile->state.lexing_comment = 0;
374 buffer->read_ahead = c; /* Leave any newline for caller. */
375 return orig_lineno != buffer->lineno;
378 /* pfile->buffer->cur is one beyond the \t character. Update
379 col_adjust so we track the column correctly. */
381 adjust_column (pfile)
384 cpp_buffer *buffer = pfile->buffer;
385 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
387 /* Round it up to multiple of the tabstop, but subtract 1 since the
388 tab itself occupies a character position. */
389 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
390 - col % CPP_OPTION (pfile, tabstop)) - 1;
393 /* Skips whitespace, saving the next non-whitespace character.
394 Adjusts pfile->col_adjust to account for tabs. Without this,
395 tokens might be assigned an incorrect column. */
397 skip_whitespace (pfile, c)
401 cpp_buffer *buffer = pfile->buffer;
402 unsigned int warned = 0;
406 /* Horizontal space always OK. */
410 adjust_column (pfile);
411 /* Just \f \v or \0 left. */
416 cpp_warning (pfile, "null character(s) ignored");
420 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
421 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
422 CPP_BUF_COL (buffer),
423 "%s in preprocessing directive",
424 c == '\f' ? "form feed" : "vertical tab");
427 if (buffer->cur == buffer->rlimit)
431 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
432 while (is_nvspace (c));
434 /* Remember the next character. */
435 buffer->read_ahead = c;
438 /* See if the characters of a number token are valid in a name (no
441 name_p (pfile, string)
443 const cpp_string *string;
447 for (i = 0; i < string->len; i++)
448 if (!is_idchar (string->text[i]))
454 /* Parse an identifier, skipping embedded backslash-newlines.
455 Calculate the hash value of the token while parsing, for improved
456 performance. The hashing algorithm *must* match cpp_lookup(). */
458 static cpp_hashnode *
459 parse_identifier (pfile, c)
463 cpp_hashnode *result;
464 cpp_buffer *buffer = pfile->buffer;
465 unsigned char *dest, *limit;
466 unsigned int r = 0, saw_dollar = 0;
468 dest = POOL_FRONT (&pfile->ident_pool);
469 limit = POOL_LIMIT (&pfile->ident_pool);
475 /* Need room for terminating null. */
476 if (dest + 1 >= limit)
477 limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
486 if (buffer->cur == buffer->rlimit)
491 while (is_idchar (c));
493 /* Potential escaped newline? */
494 if (c != '?' && c != '\\')
496 c = skip_escaped_newlines (buffer, c);
498 while (is_idchar (c));
500 /* Remember the next character. */
501 buffer->read_ahead = c;
503 /* $ is not a identifier character in the standard, but is commonly
504 accepted as an extension. Don't warn about it in skipped
505 conditional blocks. */
506 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
507 cpp_pedwarn (pfile, "'$' character(s) in identifier");
509 /* Identifiers are null-terminated. */
512 /* This routine commits the memory if necessary. */
513 result = _cpp_lookup_with_hash (pfile,
514 dest - POOL_FRONT (&pfile->ident_pool), r);
516 /* Some identifiers require diagnostics when lexed. */
517 if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
519 /* It is allowed to poison the same identifier twice. */
520 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
521 cpp_error (pfile, "attempt to use poisoned \"%s\"", result->name);
523 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
524 replacement list of a variadic macro. */
525 if (result == pfile->spec_nodes.n__VA_ARGS__
526 && !pfile->state.va_args_ok)
527 cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
533 /* Parse a number, skipping embedded backslash-newlines. */
535 parse_number (pfile, number, c, leading_period)
541 cpp_buffer *buffer = pfile->buffer;
542 cpp_pool *pool = &pfile->ident_pool;
543 unsigned char *dest, *limit;
545 dest = POOL_FRONT (pool);
546 limit = POOL_LIMIT (pool);
548 /* Place a leading period. */
552 limit = _cpp_next_chunk (pool, 0, &dest);
560 /* Need room for terminating null. */
561 if (dest + 1 >= limit)
562 limit = _cpp_next_chunk (pool, 0, &dest);
566 if (buffer->cur == buffer->rlimit)
571 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
573 /* Potential escaped newline? */
574 if (c != '?' && c != '\\')
576 c = skip_escaped_newlines (buffer, c);
578 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
580 /* Remember the next character. */
581 buffer->read_ahead = c;
583 /* Null-terminate the number. */
586 number->text = POOL_FRONT (pool);
587 number->len = dest - number->text;
588 POOL_COMMIT (pool, number->len + 1);
591 /* Subroutine of parse_string. Emits error for unterminated strings. */
593 unterminated (pfile, term)
597 cpp_error (pfile, "missing terminating %c character", term);
599 if (term == '\"' && pfile->mlstring_pos.line
600 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
602 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
603 pfile->mlstring_pos.col,
604 "possible start of unterminated string literal");
605 pfile->mlstring_pos.line = 0;
609 /* Subroutine of parse_string. */
611 unescaped_terminator_p (pfile, dest)
613 const unsigned char *dest;
615 const unsigned char *start, *temp;
617 /* In #include-style directives, terminators are not escapeable. */
618 if (pfile->state.angled_headers)
621 start = POOL_FRONT (&pfile->ident_pool);
623 /* An odd number of consecutive backslashes represents an escaped
625 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
628 return ((dest - temp) & 1) == 0;
631 /* Parses a string, character constant, or angle-bracketed header file
632 name. Handles embedded trigraphs and escaped newlines.
634 Multi-line strings are allowed, but they are deprecated within
637 parse_string (pfile, token, terminator)
640 cppchar_t terminator;
642 cpp_buffer *buffer = pfile->buffer;
643 cpp_pool *pool = &pfile->ident_pool;
644 unsigned char *dest, *limit;
646 unsigned int nulls = 0;
648 dest = POOL_FRONT (pool);
649 limit = POOL_LIMIT (pool);
653 if (buffer->cur == buffer->rlimit)
656 unterminated (pfile, terminator);
662 /* Handle trigraphs, escaped newlines etc. */
663 if (c == '?' || c == '\\')
664 c = skip_escaped_newlines (buffer, c);
666 if (c == terminator && unescaped_terminator_p (pfile, dest))
671 else if (is_vspace (c))
673 /* In assembly language, silently terminate string and
674 character literals at end of line. This is a kludge
675 around not knowing where comments are. */
676 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
679 /* Character constants and header names may not extend over
680 multiple lines. In Standard C, neither may strings.
681 Unfortunately, we accept multiline strings as an
682 extension, except in #include family directives. */
683 if (terminator != '"' || pfile->state.angled_headers)
685 unterminated (pfile, terminator);
689 if (pfile->mlstring_pos.line == 0)
691 pfile->mlstring_pos = pfile->lexer_pos;
692 if (CPP_PEDANTIC (pfile))
693 cpp_pedwarn (pfile, "multi-line string constant");
696 handle_newline (buffer, c); /* Stores to read_ahead. */
702 cpp_warning (pfile, "null character(s) preserved in literal");
705 /* No terminating null for strings - they could contain nulls. */
707 limit = _cpp_next_chunk (pool, 0, &dest);
710 /* If we had a new line, the next character is in read_ahead. */
713 c = buffer->read_ahead;
718 /* Remember the next character. */
719 buffer->read_ahead = c;
721 token->val.str.text = POOL_FRONT (pool);
722 token->val.str.len = dest - token->val.str.text;
723 POOL_COMMIT (pool, token->val.str.len);
726 /* The stored comment includes the comment start and any terminator. */
728 save_comment (pfile, token, from)
731 const unsigned char *from;
733 unsigned char *buffer;
736 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
737 /* C++ comments probably (not definitely) have moved past a new
738 line, which we don't want to save in the comment. */
739 if (pfile->buffer->read_ahead != EOF)
741 buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
743 token->type = CPP_COMMENT;
744 token->val.str.len = len;
745 token->val.str.text = buffer;
748 memcpy (buffer + 1, from, len - 1);
751 /* Subroutine of lex_token to handle '%'. A little tricky, since we
752 want to avoid stepping back when lexing %:%X. */
754 lex_percent (buffer, result)
760 result->type = CPP_MOD;
761 /* Parsing %:%X could leave an extra character. */
762 if (buffer->extra_char == EOF)
763 c = get_effective_char (buffer);
766 c = buffer->read_ahead = buffer->extra_char;
767 buffer->extra_char = EOF;
771 ACCEPT_CHAR (CPP_MOD_EQ);
772 else if (CPP_OPTION (buffer->pfile, digraphs))
776 result->flags |= DIGRAPH;
777 ACCEPT_CHAR (CPP_HASH);
778 if (get_effective_char (buffer) == '%')
780 buffer->extra_char = get_effective_char (buffer);
781 if (buffer->extra_char == ':')
783 buffer->extra_char = EOF;
784 ACCEPT_CHAR (CPP_PASTE);
787 /* We'll catch the extra_char when we're called back. */
788 buffer->read_ahead = '%';
793 result->flags |= DIGRAPH;
794 ACCEPT_CHAR (CPP_CLOSE_BRACE);
799 /* Subroutine of lex_token to handle '.'. This is tricky, since we
800 want to avoid stepping back when lexing '...' or '.123'. In the
801 latter case we should also set a flag for parse_number. */
803 lex_dot (pfile, result)
807 cpp_buffer *buffer = pfile->buffer;
810 /* Parsing ..X could leave an extra character. */
811 if (buffer->extra_char == EOF)
812 c = get_effective_char (buffer);
815 c = buffer->read_ahead = buffer->extra_char;
816 buffer->extra_char = EOF;
819 /* All known character sets have 0...9 contiguous. */
820 if (c >= '0' && c <= '9')
822 result->type = CPP_NUMBER;
823 parse_number (pfile, &result->val.str, c, 1);
827 result->type = CPP_DOT;
830 buffer->extra_char = get_effective_char (buffer);
831 if (buffer->extra_char == '.')
833 buffer->extra_char = EOF;
834 ACCEPT_CHAR (CPP_ELLIPSIS);
837 /* We'll catch the extra_char when we're called back. */
838 buffer->read_ahead = '.';
840 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
841 ACCEPT_CHAR (CPP_DOT_STAR);
846 _cpp_lex_token (pfile, result)
852 const unsigned char *comment_start;
856 bol = pfile->state.next_bol;
858 buffer = pfile->buffer;
859 pfile->state.next_bol = 0;
860 result->flags = buffer->saved_flags;
861 buffer->saved_flags = 0;
863 pfile->lexer_pos.line = buffer->lineno;
865 pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
867 c = buffer->read_ahead;
868 if (c == EOF && buffer->cur < buffer->rlimit)
871 pfile->lexer_pos.col++;
875 buffer->read_ahead = EOF;
879 /* Non-empty files should end in a newline. Ignore for command
880 line and _Pragma buffers. */
881 if (pfile->lexer_pos.col != 0 && !buffer->from_stage3)
882 cpp_pedwarn (pfile, "no newline at end of file");
883 pfile->state.next_bol = 1;
884 pfile->skipping = 0; /* In case missing #endif. */
885 result->type = CPP_EOF;
886 /* Don't do MI optimisation. */
889 case ' ': case '\t': case '\f': case '\v': case '\0':
890 skip_whitespace (pfile, c);
891 result->flags |= PREV_WHITE;
894 case '\n': case '\r':
895 if (!pfile->state.in_directive)
897 handle_newline (buffer, c);
899 pfile->lexer_pos.output_line = buffer->lineno;
900 /* This is a new line, so clear any white space flag.
901 Newlines in arguments are white space (6.10.3.10);
902 parse_arg takes care of that. */
903 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
907 /* Don't let directives spill over to the next line. */
908 buffer->read_ahead = c;
909 pfile->state.next_bol = 1;
910 result->type = CPP_EOF;
911 /* Don't break; pfile->skipping might be true. */
916 /* These could start an escaped newline, or '?' a trigraph. Let
917 skip_escaped_newlines do all the work. */
919 unsigned int lineno = buffer->lineno;
921 c = skip_escaped_newlines (buffer, c);
922 if (lineno != buffer->lineno)
923 /* We had at least one escaped newline of some sort, and the
924 next character is in buffer->read_ahead. Update the
925 token's line and column. */
928 /* We are either the original '?' or '\\', or a trigraph. */
929 result->type = CPP_QUERY;
930 buffer->read_ahead = EOF;
938 case '0': case '1': case '2': case '3': case '4':
939 case '5': case '6': case '7': case '8': case '9':
940 result->type = CPP_NUMBER;
941 parse_number (pfile, &result->val.str, c, 0);
945 if (!CPP_OPTION (pfile, dollars_in_ident))
947 /* Fall through... */
950 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
951 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
952 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
953 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
955 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
956 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
957 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
958 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
960 result->type = CPP_NAME;
961 result->val.node = parse_identifier (pfile, c);
963 /* 'L' may introduce wide characters or strings. */
964 if (result->val.node == pfile->spec_nodes.n_L)
966 c = buffer->read_ahead; /* For make_string. */
967 if (c == '\'' || c == '"')
969 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
973 /* Convert named operators to their proper types. */
974 else if (result->val.node->flags & NODE_OPERATOR)
976 result->flags |= NAMED_OP;
977 result->type = result->val.node->value.operator;
983 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
985 parse_string (pfile, result, c);
989 /* A potential block or line comment. */
990 comment_start = buffer->cur;
991 result->type = CPP_DIV;
992 c = get_effective_char (buffer);
994 ACCEPT_CHAR (CPP_DIV_EQ);
995 if (c != '/' && c != '*')
1000 if (skip_block_comment (pfile))
1001 cpp_error_with_line (pfile, pfile->lexer_pos.line,
1002 pfile->lexer_pos.col,
1003 "unterminated comment");
1007 if (!CPP_OPTION (pfile, cplusplus_comments)
1008 && !CPP_IN_SYSTEM_HEADER (pfile))
1011 /* Warn about comments only if pedantically GNUC89, and not
1012 in system headers. */
1013 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1014 && ! buffer->warned_cplusplus_comments)
1017 "C++ style comments are not allowed in ISO C89");
1019 "(this will be reported only once per input file)");
1020 buffer->warned_cplusplus_comments = 1;
1023 /* Skip_line_comment updates buffer->read_ahead. */
1024 if (skip_line_comment (pfile))
1025 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1026 pfile->lexer_pos.col,
1027 "multi-line comment");
1030 /* Skipping the comment has updated buffer->read_ahead. */
1031 if (!pfile->state.save_comments)
1033 result->flags |= PREV_WHITE;
1037 /* Save the comment as a token in its own right. */
1038 save_comment (pfile, result, comment_start);
1039 /* Don't do MI optimisation. */
1043 if (pfile->state.angled_headers)
1045 result->type = CPP_HEADER_NAME;
1046 c = '>'; /* terminator. */
1050 result->type = CPP_LESS;
1051 c = get_effective_char (buffer);
1053 ACCEPT_CHAR (CPP_LESS_EQ);
1056 ACCEPT_CHAR (CPP_LSHIFT);
1057 if (get_effective_char (buffer) == '=')
1058 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1060 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1062 ACCEPT_CHAR (CPP_MIN);
1063 if (get_effective_char (buffer) == '=')
1064 ACCEPT_CHAR (CPP_MIN_EQ);
1066 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1068 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1069 result->flags |= DIGRAPH;
1071 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1073 ACCEPT_CHAR (CPP_OPEN_BRACE);
1074 result->flags |= DIGRAPH;
1079 result->type = CPP_GREATER;
1080 c = get_effective_char (buffer);
1082 ACCEPT_CHAR (CPP_GREATER_EQ);
1085 ACCEPT_CHAR (CPP_RSHIFT);
1086 if (get_effective_char (buffer) == '=')
1087 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1089 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1091 ACCEPT_CHAR (CPP_MAX);
1092 if (get_effective_char (buffer) == '=')
1093 ACCEPT_CHAR (CPP_MAX_EQ);
1098 lex_percent (buffer, result);
1099 if (result->type == CPP_HASH)
1104 lex_dot (pfile, result);
1108 result->type = CPP_PLUS;
1109 c = get_effective_char (buffer);
1111 ACCEPT_CHAR (CPP_PLUS_EQ);
1113 ACCEPT_CHAR (CPP_PLUS_PLUS);
1117 result->type = CPP_MINUS;
1118 c = get_effective_char (buffer);
1121 ACCEPT_CHAR (CPP_DEREF);
1122 if (CPP_OPTION (pfile, cplusplus)
1123 && get_effective_char (buffer) == '*')
1124 ACCEPT_CHAR (CPP_DEREF_STAR);
1127 ACCEPT_CHAR (CPP_MINUS_EQ);
1129 ACCEPT_CHAR (CPP_MINUS_MINUS);
1133 result->type = CPP_MULT;
1134 if (get_effective_char (buffer) == '=')
1135 ACCEPT_CHAR (CPP_MULT_EQ);
1139 result->type = CPP_EQ;
1140 if (get_effective_char (buffer) == '=')
1141 ACCEPT_CHAR (CPP_EQ_EQ);
1145 result->type = CPP_NOT;
1146 if (get_effective_char (buffer) == '=')
1147 ACCEPT_CHAR (CPP_NOT_EQ);
1151 result->type = CPP_AND;
1152 c = get_effective_char (buffer);
1154 ACCEPT_CHAR (CPP_AND_EQ);
1156 ACCEPT_CHAR (CPP_AND_AND);
1160 c = buffer->extra_char; /* Can be set by error condition below. */
1163 buffer->read_ahead = c;
1164 buffer->extra_char = EOF;
1167 c = get_effective_char (buffer);
1171 ACCEPT_CHAR (CPP_PASTE);
1175 result->type = CPP_HASH;
1179 if (pfile->state.parsing_args)
1181 /* 6.10.3 paragraph 11: If there are sequences of
1182 preprocessing tokens within the list of arguments that
1183 would otherwise act as preprocessing directives, the
1184 behavior is undefined.
1186 This implementation will report a hard error, terminate
1187 the macro invocation, and proceed to process the
1190 "directives may not be used inside a macro argument");
1192 /* Put a '#' in lookahead, return CPP_EOF for parse_arg. */
1193 buffer->extra_char = buffer->read_ahead;
1194 buffer->read_ahead = '#';
1195 pfile->state.next_bol = 1;
1196 result->type = CPP_EOF;
1198 /* Get whitespace right - newline_in_args sets it. */
1199 if (pfile->lexer_pos.col == 1)
1200 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1204 /* This is the hash introducing a directive. */
1205 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1206 goto done_directive; /* bol still 1. */
1207 /* This is in fact an assembler #. */
1213 result->type = CPP_OR;
1214 c = get_effective_char (buffer);
1216 ACCEPT_CHAR (CPP_OR_EQ);
1218 ACCEPT_CHAR (CPP_OR_OR);
1222 result->type = CPP_XOR;
1223 if (get_effective_char (buffer) == '=')
1224 ACCEPT_CHAR (CPP_XOR_EQ);
1228 result->type = CPP_COLON;
1229 c = get_effective_char (buffer);
1230 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1231 ACCEPT_CHAR (CPP_SCOPE);
1232 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1234 result->flags |= DIGRAPH;
1235 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1239 case '~': result->type = CPP_COMPL; break;
1240 case ',': result->type = CPP_COMMA; break;
1241 case '(': result->type = CPP_OPEN_PAREN; break;
1242 case ')': result->type = CPP_CLOSE_PAREN; break;
1243 case '[': result->type = CPP_OPEN_SQUARE; break;
1244 case ']': result->type = CPP_CLOSE_SQUARE; break;
1245 case '{': result->type = CPP_OPEN_BRACE; break;
1246 case '}': result->type = CPP_CLOSE_BRACE; break;
1247 case ';': result->type = CPP_SEMICOLON; break;
1250 if (CPP_OPTION (pfile, objc))
1252 /* In Objective C, '@' may begin keywords or strings, like
1253 @keyword or @"string". It would be nice to call
1254 get_effective_char here and test the result. However, we
1255 would then need to pass 2 characters to parse_identifier,
1256 making it ugly and slowing down its main loop. Instead,
1257 we assume we have an identifier, and recover if not. */
1258 result->type = CPP_NAME;
1259 result->val.node = parse_identifier (pfile, c);
1260 if (result->val.node->length != 1)
1263 /* OK, so it wasn't an identifier. Maybe a string? */
1264 if (buffer->read_ahead == '"')
1267 ACCEPT_CHAR (CPP_OSTRING);
1275 result->type = CPP_OTHER;
1280 if (pfile->skipping)
1283 /* If not in a directive, this token invalidates controlling macros. */
1284 if (!pfile->state.in_directive)
1285 pfile->mi_state = MI_FAILED;
1288 /* An upper bound on the number of bytes needed to spell a token,
1289 including preceding whitespace. */
1291 cpp_token_len (token)
1292 const cpp_token *token;
1296 switch (TOKEN_SPELL (token))
1298 default: len = 0; break;
1299 case SPELL_STRING: len = token->val.str.len; break;
1300 case SPELL_IDENT: len = token->val.node->length; break;
1302 /* 1 for whitespace, 4 for comment delimeters. */
1306 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1307 already contain the enough space to hold the token's spelling.
1308 Returns a pointer to the character after the last character
1311 cpp_spell_token (pfile, token, buffer)
1312 cpp_reader *pfile; /* Would be nice to be rid of this... */
1313 const cpp_token *token;
1314 unsigned char *buffer;
1316 switch (TOKEN_SPELL (token))
1318 case SPELL_OPERATOR:
1320 const unsigned char *spelling;
1323 if (token->flags & DIGRAPH)
1324 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1325 else if (token->flags & NAMED_OP)
1328 spelling = TOKEN_NAME (token);
1330 while ((c = *spelling++) != '\0')
1337 memcpy (buffer, token->val.node->name, token->val.node->length);
1338 buffer += token->val.node->length;
1343 int left, right, tag;
1344 switch (token->type)
1346 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1347 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1348 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1349 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1350 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1351 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1352 default: left = '\0'; right = '\0'; tag = '\0'; break;
1354 if (tag) *buffer++ = tag;
1355 if (left) *buffer++ = left;
1356 memcpy (buffer, token->val.str.text, token->val.str.len);
1357 buffer += token->val.str.len;
1358 if (right) *buffer++ = right;
1363 *buffer++ = token->val.c;
1367 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1374 /* Returns a token as a null-terminated string. The string is
1375 temporary, and automatically freed later. Useful for diagnostics. */
1377 cpp_token_as_text (pfile, token)
1379 const cpp_token *token;
1381 unsigned int len = cpp_token_len (token);
1382 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1384 end = cpp_spell_token (pfile, token, start);
1390 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1392 cpp_type2name (type)
1393 enum cpp_ttype type;
1395 return (const char *) token_spellings[type].name;
1398 /* Writes the spelling of token to FP. Separate from cpp_spell_token
1399 for efficiency - to avoid double-buffering. Also, outputs a space
1400 if PREV_WHITE is flagged. */
1402 cpp_output_token (token, fp)
1403 const cpp_token *token;
1406 if (token->flags & PREV_WHITE)
1409 switch (TOKEN_SPELL (token))
1411 case SPELL_OPERATOR:
1413 const unsigned char *spelling;
1415 if (token->flags & DIGRAPH)
1416 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1417 else if (token->flags & NAMED_OP)
1420 spelling = TOKEN_NAME (token);
1422 ufputs (spelling, fp);
1428 ufputs (token->val.node->name, fp);
1433 int left, right, tag;
1434 switch (token->type)
1436 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1437 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1438 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1439 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1440 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1441 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1442 default: left = '\0'; right = '\0'; tag = '\0'; break;
1444 if (tag) putc (tag, fp);
1445 if (left) putc (left, fp);
1446 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1447 if (right) putc (right, fp);
1452 putc (token->val.c, fp);
1456 /* An error, most probably. */
1461 /* Compare two tokens. */
1463 _cpp_equiv_tokens (a, b)
1464 const cpp_token *a, *b;
1466 if (a->type == b->type && a->flags == b->flags)
1467 switch (TOKEN_SPELL (a))
1469 default: /* Keep compiler happy. */
1470 case SPELL_OPERATOR:
1473 return a->val.c == b->val.c; /* Character. */
1475 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1477 return a->val.node == b->val.node;
1479 return (a->val.str.len == b->val.str.len
1480 && !memcmp (a->val.str.text, b->val.str.text,
1488 /* Compare two token lists. */
1490 _cpp_equiv_toklists (a, b)
1491 const struct toklist *a, *b;
1493 unsigned int i, count;
1495 count = a->limit - a->first;
1496 if (count != (b->limit - b->first))
1499 for (i = 0; i < count; i++)
1500 if (! _cpp_equiv_tokens (&a->first[i], &b->first[i]))
1507 /* Determine whether two tokens can be pasted together, and if so,
1508 what the resulting token is. Returns CPP_EOF if the tokens cannot
1509 be pasted, or the appropriate type for the merged token if they
1512 cpp_can_paste (pfile, token1, token2, digraph)
1514 const cpp_token *token1, *token2;
1517 enum cpp_ttype a = token1->type, b = token2->type;
1518 int cxx = CPP_OPTION (pfile, cplusplus);
1520 /* Treat named operators as if they were ordinary NAMEs. */
1521 if (token1->flags & NAMED_OP)
1523 if (token2->flags & NAMED_OP)
1526 if (a <= CPP_LAST_EQ && b == CPP_EQ)
1527 return a + (CPP_EQ_EQ - CPP_EQ);
1532 if (b == a) return CPP_RSHIFT;
1533 if (b == CPP_QUERY && cxx) return CPP_MAX;
1534 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1537 if (b == a) return CPP_LSHIFT;
1538 if (b == CPP_QUERY && cxx) return CPP_MIN;
1539 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
1540 if (CPP_OPTION (pfile, digraphs))
1543 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1545 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1549 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1550 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1551 case CPP_OR: if (b == a) return CPP_OR_OR; break;
1554 if (b == a) return CPP_MINUS_MINUS;
1555 if (b == CPP_GREATER) return CPP_DEREF;
1558 if (b == a && cxx) return CPP_SCOPE;
1559 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1560 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1564 if (CPP_OPTION (pfile, digraphs))
1566 if (b == CPP_GREATER)
1567 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1569 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1573 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1576 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1577 if (b == CPP_NUMBER) return CPP_NUMBER;
1581 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1583 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1587 if (b == CPP_NAME) return CPP_NAME;
1589 && name_p (pfile, &token2->val.str)) return CPP_NAME;
1591 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1593 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1597 if (b == CPP_NUMBER) return CPP_NUMBER;
1598 if (b == CPP_NAME) return CPP_NUMBER;
1599 if (b == CPP_DOT) return CPP_NUMBER;
1600 /* Numbers cannot have length zero, so this is safe. */
1601 if ((b == CPP_PLUS || b == CPP_MINUS)
1602 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1607 if (CPP_OPTION (pfile, objc) && token1->val.c == '@')
1609 if (b == CPP_NAME) return CPP_NAME;
1610 if (b == CPP_STRING) return CPP_OSTRING;
1620 /* Returns nonzero if a space should be inserted to avoid an
1621 accidental token paste for output. For simplicity, it is
1622 conservative, and occasionally advises a space where one is not
1623 needed, e.g. "." and ".2". */
1626 cpp_avoid_paste (pfile, token1, token2)
1628 const cpp_token *token1, *token2;
1630 enum cpp_ttype a = token1->type, b = token2->type;
1633 if (token1->flags & NAMED_OP)
1635 if (token2->flags & NAMED_OP)
1639 if (token2->flags & DIGRAPH)
1640 c = digraph_spellings[b - CPP_FIRST_DIGRAPH][0];
1641 else if (token_spellings[b].category == SPELL_OPERATOR)
1642 c = token_spellings[b].name[0];
1644 /* Quickly get everything that can paste with an '='. */
1645 if (a <= CPP_LAST_EQ && c == '=')
1650 case CPP_GREATER: return c == '>' || c == '?';
1651 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1652 case CPP_PLUS: return c == '+';
1653 case CPP_MINUS: return c == '-' || c == '>';
1654 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1655 case CPP_MOD: return c == ':' || c == '>';
1656 case CPP_AND: return c == '&';
1657 case CPP_OR: return c == '|';
1658 case CPP_COLON: return c == ':' || c == '>';
1659 case CPP_DEREF: return c == '*';
1660 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1661 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1662 case CPP_NAME: return ((b == CPP_NUMBER
1663 && name_p (pfile, &token2->val.str))
1665 || b == CPP_CHAR || b == CPP_STRING); /* L */
1666 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1667 || c == '.' || c == '+' || c == '-');
1668 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1669 && token1->val.c == '@'
1670 && (b == CPP_NAME || b == CPP_STRING));
1677 /* Output all the remaining tokens on the current line, and a newline
1678 character, to FP. Leading whitespace is removed. */
1680 cpp_output_line (pfile, fp)
1686 cpp_get_token (pfile, &token);
1687 token.flags &= ~PREV_WHITE;
1688 while (token.type != CPP_EOF)
1690 cpp_output_token (&token, fp);
1691 cpp_get_token (pfile, &token);
1709 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1712 chunk_suitable (pool, chunk, size)
1717 /* Being at least twice SIZE means we can use memcpy in
1718 _cpp_next_chunk rather than memmove. Besides, it's a good idea
1720 return (chunk && pool->locked != chunk
1721 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
1724 /* Returns the end of the new pool. PTR points to a char in the old
1725 pool, and is updated to point to the same char in the new pool. */
1727 _cpp_next_chunk (pool, len, ptr)
1730 unsigned char **ptr;
1732 cpp_chunk *chunk = pool->cur->next;
1734 /* LEN is the minimum size we want in the new pool. */
1735 len += POOL_ROOM (pool);
1736 if (! chunk_suitable (pool, chunk, len))
1738 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
1740 chunk->next = pool->cur->next;
1741 pool->cur->next = chunk;
1744 /* Update the pointer before changing chunk's front. */
1746 *ptr += chunk->base - POOL_FRONT (pool);
1748 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
1749 chunk->front = chunk->base;
1752 return POOL_LIMIT (pool);
1759 unsigned char *base;
1762 size = ALIGN (size, DEFAULT_ALIGNMENT);
1763 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
1764 /* Put the chunk descriptor at the end. Then chunk overruns will
1765 cause obvious chaos. */
1766 result = (cpp_chunk *) (base + size);
1767 result->base = base;
1768 result->front = base;
1769 result->limit = base + size;
1776 _cpp_init_pool (pool, size, align, temp)
1778 unsigned int size, align, temp;
1781 align = DEFAULT_ALIGNMENT;
1782 if (align & (align - 1))
1784 pool->align = align;
1785 pool->cur = new_chunk (size);
1789 pool->cur->next = pool->cur;
1793 _cpp_lock_pool (pool)
1796 if (pool->locks++ == 0)
1797 pool->locked = pool->cur;
1801 _cpp_unlock_pool (pool)
1804 if (--pool->locks == 0)
1809 _cpp_free_pool (pool)
1812 cpp_chunk *chunk = pool->cur, *next;
1820 while (chunk && chunk != pool->cur);
1823 /* Reserve LEN bytes from a memory pool. */
1825 _cpp_pool_reserve (pool, len)
1829 len = ALIGN (len, pool->align);
1830 if (len > (unsigned int) POOL_ROOM (pool))
1831 _cpp_next_chunk (pool, len, 0);
1833 return POOL_FRONT (pool);
1836 /* Allocate LEN bytes from a memory pool. */
1838 _cpp_pool_alloc (pool, len)
1842 unsigned char *result = _cpp_pool_reserve (pool, len);
1844 POOL_COMMIT (pool, len);