1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
43 /* Tokens with SPELL_STRING store their spelling in the token list,
44 and it's length in the token->val.name.len. */
56 enum spell_type category;
57 const unsigned char *name;
60 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
63 #define OP(e, s) { SPELL_OPERATOR, U s },
64 #define TK(e, s) { s, U STRINGX (e) },
65 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
69 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
70 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
72 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
73 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
74 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
76 static int skip_block_comment PARAMS ((cpp_reader *));
77 static int skip_line_comment PARAMS ((cpp_reader *));
78 static void adjust_column PARAMS ((cpp_reader *));
79 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
80 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
81 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
82 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
83 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
84 static void unterminated PARAMS ((cpp_reader *, int));
85 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
86 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
87 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
88 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
89 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
91 static cpp_chunk *new_chunk PARAMS ((unsigned int));
92 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
96 Compares, the token TOKEN to the NUL-terminated string STRING.
97 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
100 cpp_ideq (token, string)
101 const cpp_token *token;
104 if (token->type != CPP_NAME)
107 return !ustrcmp (token->val.node->name, (const U_CHAR *) string);
110 /* Call when meeting a newline. Returns the character after the newline
111 (or carriage-return newline combination), or EOF. */
113 handle_newline (buffer, newline_char)
115 cppchar_t newline_char;
117 cppchar_t next = EOF;
119 buffer->col_adjust = 0;
121 buffer->line_base = buffer->cur;
123 /* Handle CR-LF and LF-CR combinations, get the next character. */
124 if (buffer->cur < buffer->rlimit)
126 next = *buffer->cur++;
127 if (next + newline_char == '\r' + '\n')
129 buffer->line_base = buffer->cur;
130 if (buffer->cur < buffer->rlimit)
131 next = *buffer->cur++;
137 buffer->read_ahead = next;
141 /* Subroutine of skip_escaped_newlines; called when a trigraph is
142 encountered. It warns if necessary, and returns true if the
143 trigraph should be honoured. FROM_CHAR is the third character of a
144 trigraph, and presumed to be the previous character for position
147 trigraph_ok (pfile, from_char)
151 int accept = CPP_OPTION (pfile, trigraphs);
153 /* Don't warn about trigraphs in comments. */
154 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
156 cpp_buffer *buffer = pfile->buffer;
158 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
159 "trigraph ??%c converted to %c",
161 (int) _cpp_trigraph_map[from_char]);
163 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
164 "trigraph ??%c ignored", (int) from_char);
170 /* Assumes local variables buffer and result. */
171 #define ACCEPT_CHAR(t) \
172 do { result->type = t; buffer->read_ahead = EOF; } while (0)
174 /* When we move to multibyte character sets, add to these something
175 that saves and restores the state of the multibyte conversion
176 library. This probably involves saving and restoring a "cookie".
177 In the case of glibc it is an 8-byte structure, so is not a high
178 overhead operation. In any case, it's out of the fast path. */
179 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
180 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
182 /* Skips any escaped newlines introduced by NEXT, which is either a
183 '?' or a '\\'. Returns the next character, which will also have
184 been placed in buffer->read_ahead. This routine performs
185 preprocessing stages 1 and 2 of the ISO C standard. */
187 skip_escaped_newlines (buffer, next)
191 /* Only do this if we apply stages 1 and 2. */
192 if (!buffer->from_stage3)
195 const unsigned char *saved_cur;
200 if (buffer->cur == buffer->rlimit)
206 next1 = *buffer->cur++;
207 if (next1 != '?' || buffer->cur == buffer->rlimit)
213 next1 = *buffer->cur++;
214 if (!_cpp_trigraph_map[next1]
215 || !trigraph_ok (buffer->pfile, next1))
221 /* We have a full trigraph here. */
222 next = _cpp_trigraph_map[next1];
223 if (next != '\\' || buffer->cur == buffer->rlimit)
228 /* We have a backslash, and room for at least one more character. */
232 next1 = *buffer->cur++;
233 if (!is_nvspace (next1))
237 while (buffer->cur < buffer->rlimit);
239 if (!is_vspace (next1))
246 cpp_warning (buffer->pfile,
247 "backslash and newline separated by space");
249 next = handle_newline (buffer, next1);
251 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
253 while (next == '\\' || next == '?');
256 buffer->read_ahead = next;
260 /* Obtain the next character, after trigraph conversion and skipping
261 an arbitrary string of escaped newlines. The common case of no
262 trigraphs or escaped newlines falls through quickly. */
264 get_effective_char (buffer)
267 cppchar_t next = EOF;
269 if (buffer->cur < buffer->rlimit)
271 next = *buffer->cur++;
273 /* '?' can introduce trigraphs (and therefore backslash); '\\'
274 can introduce escaped newlines, which we want to skip, or
275 UCNs, which, depending upon lexer state, we will handle in
277 if (next == '?' || next == '\\')
278 next = skip_escaped_newlines (buffer, next);
281 buffer->read_ahead = next;
285 /* Skip a C-style block comment. We find the end of the comment by
286 seeing if an asterisk is before every '/' we encounter. Returns
287 non-zero if comment terminated by EOF, zero otherwise. */
289 skip_block_comment (pfile)
292 cpp_buffer *buffer = pfile->buffer;
293 cppchar_t c = EOF, prevc = EOF;
295 pfile->state.lexing_comment = 1;
296 while (buffer->cur != buffer->rlimit)
298 prevc = c, c = *buffer->cur++;
301 /* FIXME: For speed, create a new character class of characters
302 of interest inside block comments. */
303 if (c == '?' || c == '\\')
304 c = skip_escaped_newlines (buffer, c);
306 /* People like decorating comments with '*', so check for '/'
307 instead for efficiency. */
313 /* Warn about potential nested comments, but not if the '/'
314 comes immediately before the true comment delimeter.
315 Don't bother to get it right across escaped newlines. */
316 if (CPP_OPTION (pfile, warn_comments)
317 && buffer->cur != buffer->rlimit)
319 prevc = c, c = *buffer->cur++;
320 if (c == '*' && buffer->cur != buffer->rlimit)
322 prevc = c, c = *buffer->cur++;
324 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
325 CPP_BUF_COL (buffer),
326 "\"/*\" within comment");
331 else if (is_vspace (c))
333 prevc = c, c = handle_newline (buffer, c);
337 adjust_column (pfile);
340 pfile->state.lexing_comment = 0;
341 buffer->read_ahead = EOF;
342 return c != '/' || prevc != '*';
345 /* Skip a C++ line comment. Handles escaped newlines. Returns
346 non-zero if a multiline comment. The following new line, if any,
347 is left in buffer->read_ahead. */
349 skip_line_comment (pfile)
352 cpp_buffer *buffer = pfile->buffer;
353 unsigned int orig_lineno = buffer->lineno;
356 pfile->state.lexing_comment = 1;
360 if (buffer->cur == buffer->rlimit)
364 if (c == '?' || c == '\\')
365 c = skip_escaped_newlines (buffer, c);
367 while (!is_vspace (c));
369 pfile->state.lexing_comment = 0;
370 buffer->read_ahead = c; /* Leave any newline for caller. */
371 return orig_lineno != buffer->lineno;
374 /* pfile->buffer->cur is one beyond the \t character. Update
375 col_adjust so we track the column correctly. */
377 adjust_column (pfile)
380 cpp_buffer *buffer = pfile->buffer;
381 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
383 /* Round it up to multiple of the tabstop, but subtract 1 since the
384 tab itself occupies a character position. */
385 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
386 - col % CPP_OPTION (pfile, tabstop)) - 1;
389 /* Skips whitespace, saving the next non-whitespace character.
390 Adjusts pfile->col_adjust to account for tabs. Without this,
391 tokens might be assigned an incorrect column. */
393 skip_whitespace (pfile, c)
397 cpp_buffer *buffer = pfile->buffer;
398 unsigned int warned = 0;
402 /* Horizontal space always OK. */
406 adjust_column (pfile);
407 /* Just \f \v or \0 left. */
412 cpp_warning (pfile, "null character(s) ignored");
416 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
417 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
418 CPP_BUF_COL (buffer),
419 "%s in preprocessing directive",
420 c == '\f' ? "form feed" : "vertical tab");
423 if (buffer->cur == buffer->rlimit)
427 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
428 while (is_nvspace (c));
430 /* Remember the next character. */
431 buffer->read_ahead = c;
434 /* See if the characters of a number token are valid in a name (no
437 name_p (pfile, string)
439 const cpp_string *string;
443 for (i = 0; i < string->len; i++)
444 if (!is_idchar (string->text[i]))
450 /* Parse an identifier, skipping embedded backslash-newlines.
451 Calculate the hash value of the token while parsing, for improved
452 performance. The hashing algorithm *must* match cpp_lookup(). */
454 static cpp_hashnode *
455 parse_identifier (pfile, c)
459 cpp_hashnode *result;
460 cpp_buffer *buffer = pfile->buffer;
461 unsigned char *dest, *limit;
462 unsigned int r = 0, saw_dollar = 0;
464 dest = POOL_FRONT (&pfile->ident_pool);
465 limit = POOL_LIMIT (&pfile->ident_pool);
471 /* Need room for terminating null. */
472 if (dest + 1 >= limit)
473 limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
482 if (buffer->cur == buffer->rlimit)
487 while (is_idchar (c));
489 /* Potential escaped newline? */
490 if (c != '?' && c != '\\')
492 c = skip_escaped_newlines (buffer, c);
494 while (is_idchar (c));
496 /* Remember the next character. */
497 buffer->read_ahead = c;
499 /* $ is not a identifier character in the standard, but is commonly
500 accepted as an extension. Don't warn about it in skipped
501 conditional blocks. */
502 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
503 cpp_pedwarn (pfile, "'$' character(s) in identifier");
505 /* Identifiers are null-terminated. */
508 /* This routine commits the memory if necessary. */
509 result = _cpp_lookup_with_hash (pfile,
510 dest - POOL_FRONT (&pfile->ident_pool), r);
512 /* Some identifiers require diagnostics when lexed. */
513 if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
515 /* It is allowed to poison the same identifier twice. */
516 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
517 cpp_error (pfile, "attempt to use poisoned \"%s\"", result->name);
519 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
520 replacement list of a variable-arguments macro. */
521 if (result == pfile->spec_nodes.n__VA_ARGS__
522 && !pfile->state.va_args_ok)
523 cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variable-argument macro");
529 /* Parse a number, skipping embedded backslash-newlines. */
531 parse_number (pfile, number, c, leading_period)
537 cpp_buffer *buffer = pfile->buffer;
538 cpp_pool *pool = pfile->string_pool;
539 unsigned char *dest, *limit;
541 dest = POOL_FRONT (pool);
542 limit = POOL_LIMIT (pool);
544 /* Place a leading period. */
548 limit = _cpp_next_chunk (pool, 0, &dest);
556 /* Need room for terminating null. */
557 if (dest + 1 >= limit)
558 limit = _cpp_next_chunk (pool, 0, &dest);
562 if (buffer->cur == buffer->rlimit)
567 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
569 /* Potential escaped newline? */
570 if (c != '?' && c != '\\')
572 c = skip_escaped_newlines (buffer, c);
574 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
576 /* Remember the next character. */
577 buffer->read_ahead = c;
579 /* Null-terminate the number. */
582 number->text = POOL_FRONT (pool);
583 number->len = dest - number->text;
584 POOL_COMMIT (pool, number->len + 1);
587 /* Subroutine of parse_string. Emits error for unterminated strings. */
589 unterminated (pfile, term)
593 cpp_error (pfile, "missing terminating %c character", term);
595 if (term == '\"' && pfile->mlstring_pos.line
596 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
598 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
599 pfile->mlstring_pos.col,
600 "possible start of unterminated string literal");
601 pfile->mlstring_pos.line = 0;
605 /* Subroutine of parse_string. */
607 unescaped_terminator_p (pfile, dest)
609 const unsigned char *dest;
611 const unsigned char *start, *temp;
613 /* In #include-style directives, terminators are not escapeable. */
614 if (pfile->state.angled_headers)
617 start = POOL_FRONT (pfile->string_pool);
619 /* An odd number of consecutive backslashes represents an escaped
621 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
624 return ((dest - temp) & 1) == 0;
627 /* Parses a string, character constant, or angle-bracketed header file
628 name. Handles embedded trigraphs and escaped newlines.
630 Multi-line strings are allowed, but they are deprecated within
633 parse_string (pfile, token, terminator)
636 cppchar_t terminator;
638 cpp_buffer *buffer = pfile->buffer;
639 cpp_pool *pool = pfile->string_pool;
640 unsigned char *dest, *limit;
642 unsigned int nulls = 0;
644 dest = POOL_FRONT (pool);
645 limit = POOL_LIMIT (pool);
649 if (buffer->cur == buffer->rlimit)
652 unterminated (pfile, terminator);
658 /* Handle trigraphs, escaped newlines etc. */
659 if (c == '?' || c == '\\')
660 c = skip_escaped_newlines (buffer, c);
662 if (c == terminator && unescaped_terminator_p (pfile, dest))
667 else if (is_vspace (c))
669 /* In assembly language, silently terminate string and
670 character literals at end of line. This is a kludge
671 around not knowing where comments are. */
672 if (CPP_OPTION (pfile, lang_asm) && terminator != '>')
675 /* Character constants and header names may not extend over
676 multiple lines. In Standard C, neither may strings.
677 Unfortunately, we accept multiline strings as an
678 extension, except in #include family directives. */
679 if (terminator != '"' || pfile->state.angled_headers)
681 unterminated (pfile, terminator);
685 if (pfile->mlstring_pos.line == 0)
687 pfile->mlstring_pos = pfile->lexer_pos;
688 if (CPP_PEDANTIC (pfile))
689 cpp_pedwarn (pfile, "multi-line string constant");
692 handle_newline (buffer, c); /* Stores to read_ahead. */
698 cpp_warning (pfile, "null character(s) preserved in literal");
701 /* No terminating null for strings - they could contain nulls. */
703 limit = _cpp_next_chunk (pool, 0, &dest);
706 /* If we had a new line, the next character is in read_ahead. */
709 c = buffer->read_ahead;
714 /* Remember the next character. */
715 buffer->read_ahead = c;
717 token->val.str.text = POOL_FRONT (pool);
718 token->val.str.len = dest - token->val.str.text;
719 POOL_COMMIT (pool, token->val.str.len);
722 /* The stored comment includes the comment start and any terminator. */
724 save_comment (pfile, token, from)
727 const unsigned char *from;
729 unsigned char *buffer;
732 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
733 /* C++ comments probably (not definitely) have moved past a new
734 line, which we don't want to save in the comment. */
735 if (pfile->buffer->read_ahead != EOF)
737 buffer = _cpp_pool_alloc (pfile->string_pool, len);
739 token->type = CPP_COMMENT;
740 token->val.str.len = len;
741 token->val.str.text = buffer;
744 memcpy (buffer + 1, from, len - 1);
747 /* Subroutine of lex_token to handle '%'. A little tricky, since we
748 want to avoid stepping back when lexing %:%X. */
750 lex_percent (buffer, result)
756 result->type = CPP_MOD;
757 /* Parsing %:%X could leave an extra character. */
758 if (buffer->extra_char == EOF)
759 c = get_effective_char (buffer);
762 c = buffer->read_ahead = buffer->extra_char;
763 buffer->extra_char = EOF;
767 ACCEPT_CHAR (CPP_MOD_EQ);
768 else if (CPP_OPTION (buffer->pfile, digraphs))
772 result->flags |= DIGRAPH;
773 ACCEPT_CHAR (CPP_HASH);
774 if (get_effective_char (buffer) == '%')
776 buffer->extra_char = get_effective_char (buffer);
777 if (buffer->extra_char == ':')
779 buffer->extra_char = EOF;
780 ACCEPT_CHAR (CPP_PASTE);
783 /* We'll catch the extra_char when we're called back. */
784 buffer->read_ahead = '%';
789 result->flags |= DIGRAPH;
790 ACCEPT_CHAR (CPP_CLOSE_BRACE);
795 /* Subroutine of lex_token to handle '.'. This is tricky, since we
796 want to avoid stepping back when lexing '...' or '.123'. In the
797 latter case we should also set a flag for parse_number. */
799 lex_dot (pfile, result)
803 cpp_buffer *buffer = pfile->buffer;
806 /* Parsing ..X could leave an extra character. */
807 if (buffer->extra_char == EOF)
808 c = get_effective_char (buffer);
811 c = buffer->read_ahead = buffer->extra_char;
812 buffer->extra_char = EOF;
815 /* All known character sets have 0...9 contiguous. */
816 if (c >= '0' && c <= '9')
818 result->type = CPP_NUMBER;
819 parse_number (pfile, &result->val.str, c, 1);
823 result->type = CPP_DOT;
826 buffer->extra_char = get_effective_char (buffer);
827 if (buffer->extra_char == '.')
829 buffer->extra_char = EOF;
830 ACCEPT_CHAR (CPP_ELLIPSIS);
833 /* We'll catch the extra_char when we're called back. */
834 buffer->read_ahead = '.';
836 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
837 ACCEPT_CHAR (CPP_DOT_STAR);
842 _cpp_lex_token (pfile, result)
848 const unsigned char *comment_start;
849 unsigned char bol = pfile->state.next_bol;
852 buffer = pfile->buffer;
853 pfile->state.next_bol = 0;
856 pfile->lexer_pos.line = buffer->lineno;
858 pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
860 c = buffer->read_ahead;
861 if (c == EOF && buffer->cur < buffer->rlimit)
864 pfile->lexer_pos.col++;
868 buffer->read_ahead = EOF;
872 /* Non-empty files should end in a newline. Ignore for command
873 line and _Pragma buffers. */
874 if (pfile->lexer_pos.col != 0 && !buffer->from_stage3)
875 cpp_pedwarn (pfile, "no newline at end of file");
876 pfile->state.next_bol = 1;
877 result->type = CPP_EOF;
880 case ' ': case '\t': case '\f': case '\v': case '\0':
881 skip_whitespace (pfile, c);
882 result->flags |= PREV_WHITE;
885 case '\n': case '\r':
886 if (!pfile->state.in_directive)
888 handle_newline (buffer, c);
890 pfile->lexer_pos.output_line = buffer->lineno;
892 /* Newlines in arguments are white space (6.10.3.10).
893 Otherwise, clear any white space flag. */
894 if (pfile->state.parsing_args)
895 result->flags |= PREV_WHITE;
897 result->flags &= ~PREV_WHITE;
901 /* Don't let directives spill over to the next line. */
902 buffer->read_ahead = c;
903 pfile->state.next_bol = 1;
904 result->type = CPP_EOF;
909 /* These could start an escaped newline, or '?' a trigraph. Let
910 skip_escaped_newlines do all the work. */
912 unsigned int lineno = buffer->lineno;
914 c = skip_escaped_newlines (buffer, c);
915 if (lineno != buffer->lineno)
916 /* We had at least one escaped newline of some sort, and the
917 next character is in buffer->read_ahead. Update the
918 token's line and column. */
921 /* We are either the original '?' or '\\', or a trigraph. */
922 result->type = CPP_QUERY;
923 buffer->read_ahead = EOF;
931 case '0': case '1': case '2': case '3': case '4':
932 case '5': case '6': case '7': case '8': case '9':
933 result->type = CPP_NUMBER;
934 parse_number (pfile, &result->val.str, c, 0);
938 if (!CPP_OPTION (pfile, dollars_in_ident))
940 /* Fall through... */
943 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
944 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
945 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
946 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
948 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
949 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
950 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
951 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
953 result->type = CPP_NAME;
954 result->val.node = parse_identifier (pfile, c);
956 /* 'L' may introduce wide characters or strings. */
957 if (result->val.node == pfile->spec_nodes.n_L)
959 c = buffer->read_ahead; /* For make_string. */
960 if (c == '\'' || c == '"')
962 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
966 /* Convert named operators to their proper types. */
967 else if (result->val.node->flags & NODE_OPERATOR)
969 result->flags |= NAMED_OP;
970 result->type = result->val.node->value.operator;
976 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
978 parse_string (pfile, result, c);
982 /* A potential block or line comment. */
983 comment_start = buffer->cur;
984 result->type = CPP_DIV;
985 c = get_effective_char (buffer);
987 ACCEPT_CHAR (CPP_DIV_EQ);
988 if (c != '/' && c != '*')
993 if (skip_block_comment (pfile))
994 cpp_error_with_line (pfile, pfile->lexer_pos.line,
995 pfile->lexer_pos.col,
996 "unterminated comment");
1000 if (!CPP_OPTION (pfile, cplusplus_comments)
1001 && !CPP_IN_SYSTEM_HEADER (pfile))
1004 /* We silently allow C++ comments in system headers,
1005 irrespective of conformance mode, because lots of
1006 broken systems do that and trying to clean it up in
1007 fixincludes is a nightmare. */
1008 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1009 && ! buffer->warned_cplusplus_comments)
1012 "C++ style comments are not allowed in ISO C89");
1014 "(this will be reported only once per input file)");
1015 buffer->warned_cplusplus_comments = 1;
1018 /* Skip_line_comment updates buffer->read_ahead. */
1019 if (skip_line_comment (pfile))
1020 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1021 pfile->lexer_pos.col,
1022 "multi-line comment");
1025 /* Skipping the comment has updated buffer->read_ahead. */
1026 if (!pfile->state.save_comments)
1028 result->flags |= PREV_WHITE;
1032 /* Save the comment as a token in its own right. */
1033 save_comment (pfile, result, comment_start);
1037 if (pfile->state.angled_headers)
1039 result->type = CPP_HEADER_NAME;
1040 c = '>'; /* terminator. */
1044 result->type = CPP_LESS;
1045 c = get_effective_char (buffer);
1047 ACCEPT_CHAR (CPP_LESS_EQ);
1050 ACCEPT_CHAR (CPP_LSHIFT);
1051 if (get_effective_char (buffer) == '=')
1052 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1054 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1056 ACCEPT_CHAR (CPP_MIN);
1057 if (get_effective_char (buffer) == '=')
1058 ACCEPT_CHAR (CPP_MIN_EQ);
1060 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1062 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1063 result->flags |= DIGRAPH;
1065 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1067 ACCEPT_CHAR (CPP_OPEN_BRACE);
1068 result->flags |= DIGRAPH;
1073 result->type = CPP_GREATER;
1074 c = get_effective_char (buffer);
1076 ACCEPT_CHAR (CPP_GREATER_EQ);
1079 ACCEPT_CHAR (CPP_RSHIFT);
1080 if (get_effective_char (buffer) == '=')
1081 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1083 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1085 ACCEPT_CHAR (CPP_MAX);
1086 if (get_effective_char (buffer) == '=')
1087 ACCEPT_CHAR (CPP_MAX_EQ);
1092 lex_percent (buffer, result);
1093 if (result->type == CPP_HASH)
1098 lex_dot (pfile, result);
1102 result->type = CPP_PLUS;
1103 c = get_effective_char (buffer);
1105 ACCEPT_CHAR (CPP_PLUS_EQ);
1107 ACCEPT_CHAR (CPP_PLUS_PLUS);
1111 result->type = CPP_MINUS;
1112 c = get_effective_char (buffer);
1115 ACCEPT_CHAR (CPP_DEREF);
1116 if (CPP_OPTION (pfile, cplusplus)
1117 && get_effective_char (buffer) == '*')
1118 ACCEPT_CHAR (CPP_DEREF_STAR);
1121 ACCEPT_CHAR (CPP_MINUS_EQ);
1123 ACCEPT_CHAR (CPP_MINUS_MINUS);
1127 result->type = CPP_MULT;
1128 if (get_effective_char (buffer) == '=')
1129 ACCEPT_CHAR (CPP_MULT_EQ);
1133 result->type = CPP_EQ;
1134 if (get_effective_char (buffer) == '=')
1135 ACCEPT_CHAR (CPP_EQ_EQ);
1139 result->type = CPP_NOT;
1140 if (get_effective_char (buffer) == '=')
1141 ACCEPT_CHAR (CPP_NOT_EQ);
1145 result->type = CPP_AND;
1146 c = get_effective_char (buffer);
1148 ACCEPT_CHAR (CPP_AND_EQ);
1150 ACCEPT_CHAR (CPP_AND_AND);
1154 c = buffer->extra_char; /* Can be set by error condition below. */
1157 buffer->read_ahead = c;
1158 buffer->extra_char = EOF;
1161 c = get_effective_char (buffer);
1165 ACCEPT_CHAR (CPP_PASTE);
1169 result->type = CPP_HASH;
1173 if (pfile->state.parsing_args)
1175 /* 6.10.3 paragraph 11: If there are sequences of
1176 preprocessing tokens within the list of arguments that
1177 would otherwise act as preprocessing directives, the
1178 behavior is undefined.
1180 This implementation will report a hard error, terminate
1181 the macro invocation, and proceed to process the
1184 "directives may not be used inside a macro argument");
1186 /* Put a '#' in lookahead, return CPP_EOF for parse_arg. */
1187 buffer->extra_char = buffer->read_ahead;
1188 buffer->read_ahead = '#';
1189 pfile->state.next_bol = 1;
1190 result->type = CPP_EOF;
1192 /* Get whitespace right - newline_in_args sets it. */
1193 if (pfile->lexer_pos.col == 1)
1194 result->flags &= ~PREV_WHITE;
1198 /* This is the hash introducing a directive. */
1199 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1200 goto done_directive; /* bol still 1. */
1201 /* This is in fact an assembler #. */
1207 result->type = CPP_OR;
1208 c = get_effective_char (buffer);
1210 ACCEPT_CHAR (CPP_OR_EQ);
1212 ACCEPT_CHAR (CPP_OR_OR);
1216 result->type = CPP_XOR;
1217 if (get_effective_char (buffer) == '=')
1218 ACCEPT_CHAR (CPP_XOR_EQ);
1222 result->type = CPP_COLON;
1223 c = get_effective_char (buffer);
1224 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1225 ACCEPT_CHAR (CPP_SCOPE);
1226 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1228 result->flags |= DIGRAPH;
1229 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1233 case '~': result->type = CPP_COMPL; break;
1234 case ',': result->type = CPP_COMMA; break;
1235 case '(': result->type = CPP_OPEN_PAREN; break;
1236 case ')': result->type = CPP_CLOSE_PAREN; break;
1237 case '[': result->type = CPP_OPEN_SQUARE; break;
1238 case ']': result->type = CPP_CLOSE_SQUARE; break;
1239 case '{': result->type = CPP_OPEN_BRACE; break;
1240 case '}': result->type = CPP_CLOSE_BRACE; break;
1241 case ';': result->type = CPP_SEMICOLON; break;
1244 if (CPP_OPTION (pfile, objc))
1246 /* In Objective C, '@' may begin keywords or strings, like
1247 @keyword or @"string". It would be nice to call
1248 get_effective_char here and test the result. However, we
1249 would then need to pass 2 characters to parse_identifier,
1250 making it ugly and slowing down its main loop. Instead,
1251 we assume we have an identifier, and recover if not. */
1252 result->type = CPP_NAME;
1253 result->val.node = parse_identifier (pfile, c);
1254 if (result->val.node->length != 1)
1257 /* OK, so it wasn't an identifier. Maybe a string? */
1258 if (buffer->read_ahead == '"')
1261 ACCEPT_CHAR (CPP_OSTRING);
1269 result->type = CPP_OTHER;
1275 /* An upper bound on the number of bytes needed to spell a token,
1276 including preceding whitespace. */
1278 cpp_token_len (token)
1279 const cpp_token *token;
1283 switch (TOKEN_SPELL (token))
1285 default: len = 0; break;
1286 case SPELL_STRING: len = token->val.str.len; break;
1287 case SPELL_IDENT: len = token->val.node->length; break;
1289 /* 1 for whitespace, 4 for comment delimeters. */
1293 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1294 already contain the enough space to hold the token's spelling.
1295 Returns a pointer to the character after the last character
1298 cpp_spell_token (pfile, token, buffer)
1299 cpp_reader *pfile; /* Would be nice to be rid of this... */
1300 const cpp_token *token;
1301 unsigned char *buffer;
1303 switch (TOKEN_SPELL (token))
1305 case SPELL_OPERATOR:
1307 const unsigned char *spelling;
1310 if (token->flags & DIGRAPH)
1311 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1312 else if (token->flags & NAMED_OP)
1315 spelling = TOKEN_NAME (token);
1317 while ((c = *spelling++) != '\0')
1324 memcpy (buffer, token->val.node->name, token->val.node->length);
1325 buffer += token->val.node->length;
1330 int left, right, tag;
1331 switch (token->type)
1333 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1334 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1335 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1336 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1337 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1338 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1339 default: left = '\0'; right = '\0'; tag = '\0'; break;
1341 if (tag) *buffer++ = tag;
1342 if (left) *buffer++ = left;
1343 memcpy (buffer, token->val.str.text, token->val.str.len);
1344 buffer += token->val.str.len;
1345 if (right) *buffer++ = right;
1350 *buffer++ = token->val.c;
1354 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1361 /* Returns a token as a null-terminated string. The string is
1362 temporary, and automatically freed later. Useful for diagnostics. */
1364 cpp_token_as_text (pfile, token)
1366 const cpp_token *token;
1368 unsigned int len = cpp_token_len (token);
1369 unsigned char *start = _cpp_pool_alloc (&pfile->temp_string_pool, len), *end;
1371 end = cpp_spell_token (pfile, token, start);
1377 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1379 cpp_type2name (type)
1380 enum cpp_ttype type;
1382 return (const char *) token_spellings[type].name;
1385 /* Writes the spelling of token to FP. Separate from cpp_spell_token
1386 for efficiency - to avoid double-buffering. Also, outputs a space
1387 if PREV_WHITE is flagged. */
1389 cpp_output_token (token, fp)
1390 const cpp_token *token;
1393 if (token->flags & PREV_WHITE)
1396 switch (TOKEN_SPELL (token))
1398 case SPELL_OPERATOR:
1400 const unsigned char *spelling;
1402 if (token->flags & DIGRAPH)
1403 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1404 else if (token->flags & NAMED_OP)
1407 spelling = TOKEN_NAME (token);
1409 ufputs (spelling, fp);
1415 ufputs (token->val.node->name, fp);
1420 int left, right, tag;
1421 switch (token->type)
1423 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1424 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1425 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1426 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1427 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1428 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1429 default: left = '\0'; right = '\0'; tag = '\0'; break;
1431 if (tag) putc (tag, fp);
1432 if (left) putc (left, fp);
1433 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1434 if (right) putc (right, fp);
1439 putc (token->val.c, fp);
1443 /* An error, most probably. */
1448 /* Compare two tokens. */
1450 _cpp_equiv_tokens (a, b)
1451 const cpp_token *a, *b;
1453 if (a->type == b->type && a->flags == b->flags)
1454 switch (TOKEN_SPELL (a))
1456 default: /* Keep compiler happy. */
1457 case SPELL_OPERATOR:
1460 return a->val.c == b->val.c; /* Character. */
1462 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1464 return a->val.node == b->val.node;
1466 return (a->val.str.len == b->val.str.len
1467 && !memcmp (a->val.str.text, b->val.str.text,
1475 /* Compare two token lists. */
1477 _cpp_equiv_toklists (a, b)
1478 const struct toklist *a, *b;
1480 unsigned int i, count;
1482 count = a->limit - a->first;
1483 if (count != (b->limit - b->first))
1486 for (i = 0; i < count; i++)
1487 if (! _cpp_equiv_tokens (&a->first[i], &b->first[i]))
1494 /* Determine whether two tokens can be pasted together, and if so,
1495 what the resulting token is. Returns CPP_EOF if the tokens cannot
1496 be pasted, or the appropriate type for the merged token if they
1499 cpp_can_paste (pfile, token1, token2, digraph)
1501 const cpp_token *token1, *token2;
1504 enum cpp_ttype a = token1->type, b = token2->type;
1505 int cxx = CPP_OPTION (pfile, cplusplus);
1507 /* Treat named operators as if they were ordinary NAMEs. */
1508 if (token1->flags & NAMED_OP)
1510 if (token2->flags & NAMED_OP)
1513 if (a <= CPP_LAST_EQ && b == CPP_EQ)
1514 return a + (CPP_EQ_EQ - CPP_EQ);
1519 if (b == a) return CPP_RSHIFT;
1520 if (b == CPP_QUERY && cxx) return CPP_MAX;
1521 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1524 if (b == a) return CPP_LSHIFT;
1525 if (b == CPP_QUERY && cxx) return CPP_MIN;
1526 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
1527 if (CPP_OPTION (pfile, digraphs))
1530 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1532 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1536 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1537 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1538 case CPP_OR: if (b == a) return CPP_OR_OR; break;
1541 if (b == a) return CPP_MINUS_MINUS;
1542 if (b == CPP_GREATER) return CPP_DEREF;
1545 if (b == a && cxx) return CPP_SCOPE;
1546 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1547 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1551 if (CPP_OPTION (pfile, digraphs))
1553 if (b == CPP_GREATER)
1554 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1556 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1560 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1563 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1564 if (b == CPP_NUMBER) return CPP_NUMBER;
1568 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1570 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1574 if (b == CPP_NAME) return CPP_NAME;
1576 && name_p (pfile, &token2->val.str)) return CPP_NAME;
1578 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1580 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1584 if (b == CPP_NUMBER) return CPP_NUMBER;
1585 if (b == CPP_NAME) return CPP_NUMBER;
1586 if (b == CPP_DOT) return CPP_NUMBER;
1587 /* Numbers cannot have length zero, so this is safe. */
1588 if ((b == CPP_PLUS || b == CPP_MINUS)
1589 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1594 if (CPP_OPTION (pfile, objc) && token1->val.c == '@')
1596 if (b == CPP_NAME) return CPP_NAME;
1597 if (b == CPP_STRING) return CPP_OSTRING;
1607 /* Returns nonzero if a space should be inserted to avoid an
1608 accidental token paste for output. For simplicity, it is
1609 conservative, and occasionally advises a space where one is not
1610 needed, e.g. "." and ".2". */
1613 cpp_avoid_paste (pfile, token1, token2)
1615 const cpp_token *token1, *token2;
1617 enum cpp_ttype a = token1->type, b = token2->type;
1620 if (token1->flags & NAMED_OP)
1622 if (token2->flags & NAMED_OP)
1626 if (token2->flags & DIGRAPH)
1627 c = digraph_spellings[b - CPP_FIRST_DIGRAPH][0];
1628 else if (token_spellings[b].category == SPELL_OPERATOR)
1629 c = token_spellings[b].name[0];
1631 /* Quickly get everything that can paste with an '='. */
1632 if (a <= CPP_LAST_EQ && c == '=')
1637 case CPP_GREATER: return c == '>' || c == '?';
1638 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1639 case CPP_PLUS: return c == '+';
1640 case CPP_MINUS: return c == '-' || c == '>';
1641 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1642 case CPP_MOD: return c == ':' || c == '>';
1643 case CPP_AND: return c == '&';
1644 case CPP_OR: return c == '|';
1645 case CPP_COLON: return c == ':' || c == '>';
1646 case CPP_DEREF: return c == '*';
1647 case CPP_DOT: return c == '.' || c == '%';
1648 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1649 case CPP_NAME: return ((b == CPP_NUMBER
1650 && name_p (pfile, &token2->val.str))
1652 || b == CPP_CHAR || b == CPP_STRING); /* L */
1653 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1654 || c == '.' || c == '+' || c == '-');
1655 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1656 && token1->val.c == '@'
1657 && (b == CPP_NAME || b == CPP_STRING));
1664 /* Output all the remaining tokens on the current line, and a newline
1665 character, to FP. Leading whitespace is removed. */
1667 cpp_output_line (pfile, fp)
1673 _cpp_get_token (pfile, &token);
1674 token.flags &= ~PREV_WHITE;
1675 while (token.type != CPP_EOF)
1677 cpp_output_token (&token, fp);
1678 _cpp_get_token (pfile, &token);
1696 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1699 chunk_suitable (pool, chunk, size)
1704 /* Being at least twice SIZE means we can use memcpy in
1705 _cpp_next_chunk rather than memmove. Besides, it's a good idea
1707 return (chunk && pool->locked != chunk
1708 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
1711 /* Returns the end of the new pool. PTR points to a char in the old
1712 pool, and is updated to point to the same char in the new pool. */
1714 _cpp_next_chunk (pool, len, ptr)
1717 unsigned char **ptr;
1719 cpp_chunk *chunk = pool->cur->next;
1721 /* LEN is the minimum size we want in the new pool. */
1722 len += POOL_ROOM (pool);
1723 if (! chunk_suitable (pool, chunk, len))
1725 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
1727 chunk->next = pool->cur->next;
1728 pool->cur->next = chunk;
1731 /* Update the pointer before changing chunk's front. */
1733 *ptr += chunk->base - POOL_FRONT (pool);
1735 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
1736 chunk->front = chunk->base;
1739 return POOL_LIMIT (pool);
1746 unsigned char *base;
1749 size = ALIGN (size, DEFAULT_ALIGNMENT);
1750 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
1751 /* Put the chunk descriptor at the end. Then chunk overruns will
1752 cause obvious chaos. */
1753 result = (cpp_chunk *) (base + size);
1754 result->base = base;
1755 result->front = base;
1756 result->limit = base + size;
1763 _cpp_init_pool (pool, size, align, temp)
1765 unsigned int size, align, temp;
1768 align = DEFAULT_ALIGNMENT;
1769 if (align & (align - 1))
1771 pool->align = align;
1772 pool->cur = new_chunk (size);
1776 pool->cur->next = pool->cur;
1780 _cpp_lock_pool (pool)
1783 if (pool->locks++ == 0)
1784 pool->locked = pool->cur;
1788 _cpp_unlock_pool (pool)
1791 if (--pool->locks == 0)
1796 _cpp_free_pool (pool)
1799 cpp_chunk *chunk = pool->cur, *next;
1807 while (chunk && chunk != pool->cur);
1810 /* Reserve LEN bytes from a memory pool. */
1812 _cpp_pool_reserve (pool, len)
1816 len = ALIGN (len, pool->align);
1817 if (len > (unsigned int) POOL_ROOM (pool))
1818 _cpp_next_chunk (pool, len, 0);
1820 return POOL_FRONT (pool);
1823 /* Allocate LEN bytes from a memory pool. */
1825 _cpp_pool_alloc (pool, len)
1829 unsigned char *result = _cpp_pool_reserve (pool, len);
1831 POOL_COMMIT (pool, len);