1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
42 /* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
46 #undef MULTIBYTE_CHARS
49 #ifdef MULTIBYTE_CHARS
54 /* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
67 enum spell_type category;
68 const unsigned char *name;
71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
74 #define OP(e, s) { SPELL_OPERATOR, U s },
75 #define TK(e, s) { s, U STRINGX (e) },
76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
83 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
85 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
87 static int skip_block_comment PARAMS ((cpp_reader *));
88 static int skip_line_comment PARAMS ((cpp_reader *));
89 static void adjust_column PARAMS ((cpp_reader *));
90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
92 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
93 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
94 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
95 static void unterminated PARAMS ((cpp_reader *, int));
96 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
97 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
98 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
99 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
100 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
101 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
102 const unsigned char *, unsigned int *));
104 static cpp_chunk *new_chunk PARAMS ((unsigned int));
105 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
106 static unsigned int hex_digit_value PARAMS ((unsigned int));
110 Compares, the token TOKEN to the NUL-terminated string STRING.
111 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
114 cpp_ideq (token, string)
115 const cpp_token *token;
118 if (token->type != CPP_NAME)
121 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
124 /* Call when meeting a newline. Returns the character after the newline
125 (or carriage-return newline combination), or EOF. */
127 handle_newline (buffer, newline_char)
129 cppchar_t newline_char;
131 cppchar_t next = EOF;
133 buffer->col_adjust = 0;
135 buffer->line_base = buffer->cur;
137 /* Handle CR-LF and LF-CR combinations, get the next character. */
138 if (buffer->cur < buffer->rlimit)
140 next = *buffer->cur++;
141 if (next + newline_char == '\r' + '\n')
143 buffer->line_base = buffer->cur;
144 if (buffer->cur < buffer->rlimit)
145 next = *buffer->cur++;
151 buffer->read_ahead = next;
155 /* Subroutine of skip_escaped_newlines; called when a trigraph is
156 encountered. It warns if necessary, and returns true if the
157 trigraph should be honoured. FROM_CHAR is the third character of a
158 trigraph, and presumed to be the previous character for position
161 trigraph_ok (pfile, from_char)
165 int accept = CPP_OPTION (pfile, trigraphs);
167 /* Don't warn about trigraphs in comments. */
168 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
170 cpp_buffer *buffer = pfile->buffer;
172 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
173 "trigraph ??%c converted to %c",
175 (int) _cpp_trigraph_map[from_char]);
176 else if (buffer->cur != buffer->last_Wtrigraphs)
178 buffer->last_Wtrigraphs = buffer->cur;
179 cpp_warning_with_line (pfile, buffer->lineno,
180 CPP_BUF_COL (buffer) - 2,
181 "trigraph ??%c ignored", (int) from_char);
188 /* Assumes local variables buffer and result. */
189 #define ACCEPT_CHAR(t) \
190 do { result->type = t; buffer->read_ahead = EOF; } while (0)
192 /* When we move to multibyte character sets, add to these something
193 that saves and restores the state of the multibyte conversion
194 library. This probably involves saving and restoring a "cookie".
195 In the case of glibc it is an 8-byte structure, so is not a high
196 overhead operation. In any case, it's out of the fast path. */
197 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
198 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
200 /* Skips any escaped newlines introduced by NEXT, which is either a
201 '?' or a '\\'. Returns the next character, which will also have
202 been placed in buffer->read_ahead. This routine performs
203 preprocessing stages 1 and 2 of the ISO C standard. */
205 skip_escaped_newlines (buffer, next)
209 /* Only do this if we apply stages 1 and 2. */
210 if (!buffer->from_stage3)
213 const unsigned char *saved_cur;
218 if (buffer->cur == buffer->rlimit)
224 next1 = *buffer->cur++;
225 if (next1 != '?' || buffer->cur == buffer->rlimit)
231 next1 = *buffer->cur++;
232 if (!_cpp_trigraph_map[next1]
233 || !trigraph_ok (buffer->pfile, next1))
239 /* We have a full trigraph here. */
240 next = _cpp_trigraph_map[next1];
241 if (next != '\\' || buffer->cur == buffer->rlimit)
246 /* We have a backslash, and room for at least one more character. */
250 next1 = *buffer->cur++;
251 if (!is_nvspace (next1))
255 while (buffer->cur < buffer->rlimit);
257 if (!is_vspace (next1))
263 if (space && !buffer->pfile->state.lexing_comment)
264 cpp_warning (buffer->pfile,
265 "backslash and newline separated by space");
267 next = handle_newline (buffer, next1);
269 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
271 while (next == '\\' || next == '?');
274 buffer->read_ahead = next;
278 /* Obtain the next character, after trigraph conversion and skipping
279 an arbitrary string of escaped newlines. The common case of no
280 trigraphs or escaped newlines falls through quickly. */
282 get_effective_char (buffer)
285 cppchar_t next = EOF;
287 if (buffer->cur < buffer->rlimit)
289 next = *buffer->cur++;
291 /* '?' can introduce trigraphs (and therefore backslash); '\\'
292 can introduce escaped newlines, which we want to skip, or
293 UCNs, which, depending upon lexer state, we will handle in
295 if (next == '?' || next == '\\')
296 next = skip_escaped_newlines (buffer, next);
299 buffer->read_ahead = next;
303 /* Skip a C-style block comment. We find the end of the comment by
304 seeing if an asterisk is before every '/' we encounter. Returns
305 non-zero if comment terminated by EOF, zero otherwise. */
307 skip_block_comment (pfile)
310 cpp_buffer *buffer = pfile->buffer;
311 cppchar_t c = EOF, prevc = EOF;
313 pfile->state.lexing_comment = 1;
314 while (buffer->cur != buffer->rlimit)
316 prevc = c, c = *buffer->cur++;
319 /* FIXME: For speed, create a new character class of characters
320 of interest inside block comments. */
321 if (c == '?' || c == '\\')
322 c = skip_escaped_newlines (buffer, c);
324 /* People like decorating comments with '*', so check for '/'
325 instead for efficiency. */
331 /* Warn about potential nested comments, but not if the '/'
332 comes immediately before the true comment delimeter.
333 Don't bother to get it right across escaped newlines. */
334 if (CPP_OPTION (pfile, warn_comments)
335 && buffer->cur != buffer->rlimit)
337 prevc = c, c = *buffer->cur++;
338 if (c == '*' && buffer->cur != buffer->rlimit)
340 prevc = c, c = *buffer->cur++;
342 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
343 CPP_BUF_COL (buffer),
344 "\"/*\" within comment");
349 else if (is_vspace (c))
351 prevc = c, c = handle_newline (buffer, c);
355 adjust_column (pfile);
358 pfile->state.lexing_comment = 0;
359 buffer->read_ahead = EOF;
360 return c != '/' || prevc != '*';
363 /* Skip a C++ line comment. Handles escaped newlines. Returns
364 non-zero if a multiline comment. The following new line, if any,
365 is left in buffer->read_ahead. */
367 skip_line_comment (pfile)
370 cpp_buffer *buffer = pfile->buffer;
371 unsigned int orig_lineno = buffer->lineno;
374 pfile->state.lexing_comment = 1;
378 if (buffer->cur == buffer->rlimit)
382 if (c == '?' || c == '\\')
383 c = skip_escaped_newlines (buffer, c);
385 while (!is_vspace (c));
387 pfile->state.lexing_comment = 0;
388 buffer->read_ahead = c; /* Leave any newline for caller. */
389 return orig_lineno != buffer->lineno;
392 /* pfile->buffer->cur is one beyond the \t character. Update
393 col_adjust so we track the column correctly. */
395 adjust_column (pfile)
398 cpp_buffer *buffer = pfile->buffer;
399 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
401 /* Round it up to multiple of the tabstop, but subtract 1 since the
402 tab itself occupies a character position. */
403 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
404 - col % CPP_OPTION (pfile, tabstop)) - 1;
407 /* Skips whitespace, saving the next non-whitespace character.
408 Adjusts pfile->col_adjust to account for tabs. Without this,
409 tokens might be assigned an incorrect column. */
411 skip_whitespace (pfile, c)
415 cpp_buffer *buffer = pfile->buffer;
416 unsigned int warned = 0;
420 /* Horizontal space always OK. */
424 adjust_column (pfile);
425 /* Just \f \v or \0 left. */
430 cpp_warning (pfile, "null character(s) ignored");
434 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
435 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
436 CPP_BUF_COL (buffer),
437 "%s in preprocessing directive",
438 c == '\f' ? "form feed" : "vertical tab");
441 if (buffer->cur == buffer->rlimit)
445 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
446 while (is_nvspace (c));
448 /* Remember the next character. */
449 buffer->read_ahead = c;
452 /* See if the characters of a number token are valid in a name (no
455 name_p (pfile, string)
457 const cpp_string *string;
461 for (i = 0; i < string->len; i++)
462 if (!is_idchar (string->text[i]))
468 /* Parse an identifier, skipping embedded backslash-newlines.
469 Calculate the hash value of the token while parsing, for improved
470 performance. The hashing algorithm *must* match cpp_lookup(). */
472 static cpp_hashnode *
473 parse_identifier (pfile, c)
477 cpp_hashnode *result;
478 cpp_buffer *buffer = pfile->buffer;
479 unsigned int saw_dollar = 0, len;
480 struct obstack *stack = &pfile->hash_table->stack;
486 obstack_1grow (stack, c);
492 if (buffer->cur == buffer->rlimit)
497 while (is_idchar (c));
499 /* Potential escaped newline? */
500 if (c != '?' && c != '\\')
502 c = skip_escaped_newlines (buffer, c);
504 while (is_idchar (c));
506 /* Remember the next character. */
507 buffer->read_ahead = c;
509 /* $ is not a identifier character in the standard, but is commonly
510 accepted as an extension. Don't warn about it in skipped
511 conditional blocks. */
512 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
513 cpp_pedwarn (pfile, "'$' character(s) in identifier");
515 /* Identifiers are null-terminated. */
516 len = obstack_object_size (stack);
517 obstack_1grow (stack, '\0');
519 /* This routine commits the memory if necessary. */
520 result = (cpp_hashnode *)
521 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
523 /* Some identifiers require diagnostics when lexed. */
524 if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
526 /* It is allowed to poison the same identifier twice. */
527 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
528 cpp_error (pfile, "attempt to use poisoned \"%s\"",
531 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
532 replacement list of a variadic macro. */
533 if (result == pfile->spec_nodes.n__VA_ARGS__
534 && !pfile->state.va_args_ok)
535 cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
541 /* Parse a number, skipping embedded backslash-newlines. */
543 parse_number (pfile, number, c, leading_period)
549 cpp_buffer *buffer = pfile->buffer;
550 cpp_pool *pool = &pfile->ident_pool;
551 unsigned char *dest, *limit;
553 dest = POOL_FRONT (pool);
554 limit = POOL_LIMIT (pool);
556 /* Place a leading period. */
560 limit = _cpp_next_chunk (pool, 0, &dest);
568 /* Need room for terminating null. */
569 if (dest + 1 >= limit)
570 limit = _cpp_next_chunk (pool, 0, &dest);
574 if (buffer->cur == buffer->rlimit)
579 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
581 /* Potential escaped newline? */
582 if (c != '?' && c != '\\')
584 c = skip_escaped_newlines (buffer, c);
586 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
588 /* Remember the next character. */
589 buffer->read_ahead = c;
591 /* Null-terminate the number. */
594 number->text = POOL_FRONT (pool);
595 number->len = dest - number->text;
596 POOL_COMMIT (pool, number->len + 1);
599 /* Subroutine of parse_string. Emits error for unterminated strings. */
601 unterminated (pfile, term)
605 cpp_error (pfile, "missing terminating %c character", term);
607 if (term == '\"' && pfile->mlstring_pos.line
608 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
610 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
611 pfile->mlstring_pos.col,
612 "possible start of unterminated string literal");
613 pfile->mlstring_pos.line = 0;
617 /* Subroutine of parse_string. */
619 unescaped_terminator_p (pfile, dest)
621 const unsigned char *dest;
623 const unsigned char *start, *temp;
625 /* In #include-style directives, terminators are not escapeable. */
626 if (pfile->state.angled_headers)
629 start = POOL_FRONT (&pfile->ident_pool);
631 /* An odd number of consecutive backslashes represents an escaped
633 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
636 return ((dest - temp) & 1) == 0;
639 /* Parses a string, character constant, or angle-bracketed header file
640 name. Handles embedded trigraphs and escaped newlines. The stored
641 string is guaranteed NUL-terminated, but it is not guaranteed that
642 this is the first NUL since embedded NULs are preserved.
644 Multi-line strings are allowed, but they are deprecated. */
646 parse_string (pfile, token, terminator)
649 cppchar_t terminator;
651 cpp_buffer *buffer = pfile->buffer;
652 cpp_pool *pool = &pfile->ident_pool;
653 unsigned char *dest, *limit;
655 unsigned int nulls = 0;
657 dest = POOL_FRONT (pool);
658 limit = POOL_LIMIT (pool);
662 if (buffer->cur == buffer->rlimit)
668 /* We need space for the terminating NUL. */
670 limit = _cpp_next_chunk (pool, 0, &dest);
674 unterminated (pfile, terminator);
678 /* Handle trigraphs, escaped newlines etc. */
679 if (c == '?' || c == '\\')
680 c = skip_escaped_newlines (buffer, c);
682 if (c == terminator && unescaped_terminator_p (pfile, dest))
687 else if (is_vspace (c))
689 /* In assembly language, silently terminate string and
690 character literals at end of line. This is a kludge
691 around not knowing where comments are. */
692 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
695 /* Character constants and header names may not extend over
696 multiple lines. In Standard C, neither may strings.
697 Unfortunately, we accept multiline strings as an
698 extension, except in #include family directives. */
699 if (terminator != '"' || pfile->state.angled_headers)
701 unterminated (pfile, terminator);
705 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
706 if (pfile->mlstring_pos.line == 0)
707 pfile->mlstring_pos = pfile->lexer_pos;
709 c = handle_newline (buffer, c);
716 cpp_warning (pfile, "null character(s) preserved in literal");
722 /* Remember the next character. */
723 buffer->read_ahead = c;
726 token->val.str.text = POOL_FRONT (pool);
727 token->val.str.len = dest - token->val.str.text;
728 POOL_COMMIT (pool, token->val.str.len + 1);
731 /* The stored comment includes the comment start and any terminator. */
733 save_comment (pfile, token, from)
736 const unsigned char *from;
738 unsigned char *buffer;
741 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
742 /* C++ comments probably (not definitely) have moved past a new
743 line, which we don't want to save in the comment. */
744 if (pfile->buffer->read_ahead != EOF)
746 buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
748 token->type = CPP_COMMENT;
749 token->val.str.len = len;
750 token->val.str.text = buffer;
753 memcpy (buffer + 1, from, len - 1);
756 /* Subroutine of lex_token to handle '%'. A little tricky, since we
757 want to avoid stepping back when lexing %:%X. */
759 lex_percent (buffer, result)
765 result->type = CPP_MOD;
766 /* Parsing %:%X could leave an extra character. */
767 if (buffer->extra_char == EOF)
768 c = get_effective_char (buffer);
771 c = buffer->read_ahead = buffer->extra_char;
772 buffer->extra_char = EOF;
776 ACCEPT_CHAR (CPP_MOD_EQ);
777 else if (CPP_OPTION (buffer->pfile, digraphs))
781 result->flags |= DIGRAPH;
782 ACCEPT_CHAR (CPP_HASH);
783 if (get_effective_char (buffer) == '%')
785 buffer->extra_char = get_effective_char (buffer);
786 if (buffer->extra_char == ':')
788 buffer->extra_char = EOF;
789 ACCEPT_CHAR (CPP_PASTE);
792 /* We'll catch the extra_char when we're called back. */
793 buffer->read_ahead = '%';
798 result->flags |= DIGRAPH;
799 ACCEPT_CHAR (CPP_CLOSE_BRACE);
804 /* Subroutine of lex_token to handle '.'. This is tricky, since we
805 want to avoid stepping back when lexing '...' or '.123'. In the
806 latter case we should also set a flag for parse_number. */
808 lex_dot (pfile, result)
812 cpp_buffer *buffer = pfile->buffer;
815 /* Parsing ..X could leave an extra character. */
816 if (buffer->extra_char == EOF)
817 c = get_effective_char (buffer);
820 c = buffer->read_ahead = buffer->extra_char;
821 buffer->extra_char = EOF;
824 /* All known character sets have 0...9 contiguous. */
825 if (c >= '0' && c <= '9')
827 result->type = CPP_NUMBER;
828 parse_number (pfile, &result->val.str, c, 1);
832 result->type = CPP_DOT;
835 buffer->extra_char = get_effective_char (buffer);
836 if (buffer->extra_char == '.')
838 buffer->extra_char = EOF;
839 ACCEPT_CHAR (CPP_ELLIPSIS);
842 /* We'll catch the extra_char when we're called back. */
843 buffer->read_ahead = '.';
845 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
846 ACCEPT_CHAR (CPP_DOT_STAR);
851 _cpp_lex_token (pfile, result)
857 const unsigned char *comment_start;
861 bol = pfile->state.next_bol;
863 buffer = pfile->buffer;
864 pfile->state.next_bol = 0;
865 result->flags = buffer->saved_flags;
866 buffer->saved_flags = 0;
868 pfile->lexer_pos.line = buffer->lineno;
870 pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
872 c = buffer->read_ahead;
873 if (c == EOF && buffer->cur < buffer->rlimit)
876 pfile->lexer_pos.col++;
880 buffer->read_ahead = EOF;
884 /* Non-empty files should end in a newline. Checking "bol" too
885 prevents multiple warnings when hitting the EOF more than
886 once, like in a directive. Don't warn for command line and
888 if (pfile->lexer_pos.col != 0 && !bol && !buffer->from_stage3)
889 cpp_pedwarn (pfile, "no newline at end of file");
890 pfile->state.next_bol = 1;
891 pfile->skipping = 0; /* In case missing #endif. */
892 result->type = CPP_EOF;
893 /* Don't do MI optimisation. */
896 case ' ': case '\t': case '\f': case '\v': case '\0':
897 skip_whitespace (pfile, c);
898 result->flags |= PREV_WHITE;
901 case '\n': case '\r':
902 if (!pfile->state.in_directive)
904 handle_newline (buffer, c);
906 pfile->lexer_pos.output_line = buffer->lineno;
907 /* This is a new line, so clear any white space flag.
908 Newlines in arguments are white space (6.10.3.10);
909 parse_arg takes care of that. */
910 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
914 /* Don't let directives spill over to the next line. */
915 buffer->read_ahead = c;
916 pfile->state.next_bol = 1;
917 result->type = CPP_EOF;
918 /* Don't break; pfile->skipping might be true. */
923 /* These could start an escaped newline, or '?' a trigraph. Let
924 skip_escaped_newlines do all the work. */
926 unsigned int lineno = buffer->lineno;
928 c = skip_escaped_newlines (buffer, c);
929 if (lineno != buffer->lineno)
930 /* We had at least one escaped newline of some sort, and the
931 next character is in buffer->read_ahead. Update the
932 token's line and column. */
935 /* We are either the original '?' or '\\', or a trigraph. */
936 result->type = CPP_QUERY;
937 buffer->read_ahead = EOF;
945 case '0': case '1': case '2': case '3': case '4':
946 case '5': case '6': case '7': case '8': case '9':
947 result->type = CPP_NUMBER;
948 parse_number (pfile, &result->val.str, c, 0);
952 if (!CPP_OPTION (pfile, dollars_in_ident))
954 /* Fall through... */
957 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
958 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
959 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
960 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
962 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
963 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
964 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
965 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
967 result->type = CPP_NAME;
968 result->val.node = parse_identifier (pfile, c);
970 /* 'L' may introduce wide characters or strings. */
971 if (result->val.node == pfile->spec_nodes.n_L)
973 c = buffer->read_ahead; /* For make_string. */
974 if (c == '\'' || c == '"')
976 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
980 /* Convert named operators to their proper types. */
981 else if (result->val.node->flags & NODE_OPERATOR)
983 result->flags |= NAMED_OP;
984 result->type = result->val.node->value.operator;
990 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
992 parse_string (pfile, result, c);
996 /* A potential block or line comment. */
997 comment_start = buffer->cur;
998 result->type = CPP_DIV;
999 c = get_effective_char (buffer);
1001 ACCEPT_CHAR (CPP_DIV_EQ);
1002 if (c != '/' && c != '*')
1004 if (buffer->from_stage3)
1009 if (skip_block_comment (pfile))
1010 cpp_error_with_line (pfile, pfile->lexer_pos.line,
1011 pfile->lexer_pos.col,
1012 "unterminated comment");
1016 if (!CPP_OPTION (pfile, cplusplus_comments)
1017 && !CPP_IN_SYSTEM_HEADER (pfile))
1020 /* Warn about comments only if pedantically GNUC89, and not
1021 in system headers. */
1022 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1023 && ! buffer->warned_cplusplus_comments)
1026 "C++ style comments are not allowed in ISO C89");
1028 "(this will be reported only once per input file)");
1029 buffer->warned_cplusplus_comments = 1;
1032 /* Skip_line_comment updates buffer->read_ahead. */
1033 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1034 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1035 pfile->lexer_pos.col,
1036 "multi-line comment");
1039 /* Skipping the comment has updated buffer->read_ahead. */
1040 if (!pfile->state.save_comments)
1042 result->flags |= PREV_WHITE;
1046 /* Save the comment as a token in its own right. */
1047 save_comment (pfile, result, comment_start);
1048 /* Don't do MI optimisation. */
1052 if (pfile->state.angled_headers)
1054 result->type = CPP_HEADER_NAME;
1055 c = '>'; /* terminator. */
1059 result->type = CPP_LESS;
1060 c = get_effective_char (buffer);
1062 ACCEPT_CHAR (CPP_LESS_EQ);
1065 ACCEPT_CHAR (CPP_LSHIFT);
1066 if (get_effective_char (buffer) == '=')
1067 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1069 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1071 ACCEPT_CHAR (CPP_MIN);
1072 if (get_effective_char (buffer) == '=')
1073 ACCEPT_CHAR (CPP_MIN_EQ);
1075 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1077 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1078 result->flags |= DIGRAPH;
1080 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1082 ACCEPT_CHAR (CPP_OPEN_BRACE);
1083 result->flags |= DIGRAPH;
1088 result->type = CPP_GREATER;
1089 c = get_effective_char (buffer);
1091 ACCEPT_CHAR (CPP_GREATER_EQ);
1094 ACCEPT_CHAR (CPP_RSHIFT);
1095 if (get_effective_char (buffer) == '=')
1096 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1098 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1100 ACCEPT_CHAR (CPP_MAX);
1101 if (get_effective_char (buffer) == '=')
1102 ACCEPT_CHAR (CPP_MAX_EQ);
1107 lex_percent (buffer, result);
1108 if (result->type == CPP_HASH)
1113 lex_dot (pfile, result);
1117 result->type = CPP_PLUS;
1118 c = get_effective_char (buffer);
1120 ACCEPT_CHAR (CPP_PLUS_EQ);
1122 ACCEPT_CHAR (CPP_PLUS_PLUS);
1126 result->type = CPP_MINUS;
1127 c = get_effective_char (buffer);
1130 ACCEPT_CHAR (CPP_DEREF);
1131 if (CPP_OPTION (pfile, cplusplus)
1132 && get_effective_char (buffer) == '*')
1133 ACCEPT_CHAR (CPP_DEREF_STAR);
1136 ACCEPT_CHAR (CPP_MINUS_EQ);
1138 ACCEPT_CHAR (CPP_MINUS_MINUS);
1142 result->type = CPP_MULT;
1143 if (get_effective_char (buffer) == '=')
1144 ACCEPT_CHAR (CPP_MULT_EQ);
1148 result->type = CPP_EQ;
1149 if (get_effective_char (buffer) == '=')
1150 ACCEPT_CHAR (CPP_EQ_EQ);
1154 result->type = CPP_NOT;
1155 if (get_effective_char (buffer) == '=')
1156 ACCEPT_CHAR (CPP_NOT_EQ);
1160 result->type = CPP_AND;
1161 c = get_effective_char (buffer);
1163 ACCEPT_CHAR (CPP_AND_EQ);
1165 ACCEPT_CHAR (CPP_AND_AND);
1169 c = buffer->extra_char; /* Can be set by error condition below. */
1172 buffer->read_ahead = c;
1173 buffer->extra_char = EOF;
1176 c = get_effective_char (buffer);
1180 ACCEPT_CHAR (CPP_PASTE);
1184 result->type = CPP_HASH;
1188 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1189 tokens within the list of arguments that would otherwise act
1190 as preprocessing directives, the behavior is undefined.
1192 This implementation will report a hard error, terminate the
1193 macro invocation, and proceed to process the directive. */
1194 if (pfile->state.parsing_args)
1196 if (pfile->state.parsing_args == 2)
1198 "directives may not be used inside a macro argument");
1200 /* Put a '#' in lookahead, return CPP_EOF for parse_arg. */
1201 buffer->extra_char = buffer->read_ahead;
1202 buffer->read_ahead = '#';
1203 pfile->state.next_bol = 1;
1204 result->type = CPP_EOF;
1206 /* Get whitespace right - newline_in_args sets it. */
1207 if (pfile->lexer_pos.col == 1)
1208 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1212 /* This is the hash introducing a directive. */
1213 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1214 goto done_directive; /* bol still 1. */
1215 /* This is in fact an assembler #. */
1220 result->type = CPP_OR;
1221 c = get_effective_char (buffer);
1223 ACCEPT_CHAR (CPP_OR_EQ);
1225 ACCEPT_CHAR (CPP_OR_OR);
1229 result->type = CPP_XOR;
1230 if (get_effective_char (buffer) == '=')
1231 ACCEPT_CHAR (CPP_XOR_EQ);
1235 result->type = CPP_COLON;
1236 c = get_effective_char (buffer);
1237 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1238 ACCEPT_CHAR (CPP_SCOPE);
1239 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1241 result->flags |= DIGRAPH;
1242 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1246 case '~': result->type = CPP_COMPL; break;
1247 case ',': result->type = CPP_COMMA; break;
1248 case '(': result->type = CPP_OPEN_PAREN; break;
1249 case ')': result->type = CPP_CLOSE_PAREN; break;
1250 case '[': result->type = CPP_OPEN_SQUARE; break;
1251 case ']': result->type = CPP_CLOSE_SQUARE; break;
1252 case '{': result->type = CPP_OPEN_BRACE; break;
1253 case '}': result->type = CPP_CLOSE_BRACE; break;
1254 case ';': result->type = CPP_SEMICOLON; break;
1256 /* @ is a punctuator in Objective C. */
1257 case '@': result->type = CPP_ATSIGN; break;
1261 result->type = CPP_OTHER;
1266 if (pfile->skipping)
1269 /* If not in a directive, this token invalidates controlling macros. */
1270 if (!pfile->state.in_directive)
1271 pfile->mi_state = MI_FAILED;
1274 /* An upper bound on the number of bytes needed to spell a token,
1275 including preceding whitespace. */
1277 cpp_token_len (token)
1278 const cpp_token *token;
1282 switch (TOKEN_SPELL (token))
1284 default: len = 0; break;
1285 case SPELL_STRING: len = token->val.str.len; break;
1286 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1288 /* 1 for whitespace, 4 for comment delimeters. */
1292 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1293 already contain the enough space to hold the token's spelling.
1294 Returns a pointer to the character after the last character
1297 cpp_spell_token (pfile, token, buffer)
1298 cpp_reader *pfile; /* Would be nice to be rid of this... */
1299 const cpp_token *token;
1300 unsigned char *buffer;
1302 switch (TOKEN_SPELL (token))
1304 case SPELL_OPERATOR:
1306 const unsigned char *spelling;
1309 if (token->flags & DIGRAPH)
1311 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1312 else if (token->flags & NAMED_OP)
1315 spelling = TOKEN_NAME (token);
1317 while ((c = *spelling++) != '\0')
1324 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1325 buffer += NODE_LEN (token->val.node);
1330 int left, right, tag;
1331 switch (token->type)
1333 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1334 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1335 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1336 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1337 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1338 default: left = '\0'; right = '\0'; tag = '\0'; break;
1340 if (tag) *buffer++ = tag;
1341 if (left) *buffer++ = left;
1342 memcpy (buffer, token->val.str.text, token->val.str.len);
1343 buffer += token->val.str.len;
1344 if (right) *buffer++ = right;
1349 *buffer++ = token->val.c;
1353 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1360 /* Returns a token as a null-terminated string. The string is
1361 temporary, and automatically freed later. Useful for diagnostics. */
1363 cpp_token_as_text (pfile, token)
1365 const cpp_token *token;
1367 unsigned int len = cpp_token_len (token);
1368 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1370 end = cpp_spell_token (pfile, token, start);
1376 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1378 cpp_type2name (type)
1379 enum cpp_ttype type;
1381 return (const char *) token_spellings[type].name;
1384 /* Writes the spelling of token to FP. Separate from cpp_spell_token
1385 for efficiency - to avoid double-buffering. Also, outputs a space
1386 if PREV_WHITE is flagged. */
1388 cpp_output_token (token, fp)
1389 const cpp_token *token;
1392 if (token->flags & PREV_WHITE)
1395 switch (TOKEN_SPELL (token))
1397 case SPELL_OPERATOR:
1399 const unsigned char *spelling;
1401 if (token->flags & DIGRAPH)
1403 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1404 else if (token->flags & NAMED_OP)
1407 spelling = TOKEN_NAME (token);
1409 ufputs (spelling, fp);
1415 ufputs (NODE_NAME (token->val.node), fp);
1420 int left, right, tag;
1421 switch (token->type)
1423 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1424 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1425 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1426 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1427 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1428 default: left = '\0'; right = '\0'; tag = '\0'; break;
1430 if (tag) putc (tag, fp);
1431 if (left) putc (left, fp);
1432 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1433 if (right) putc (right, fp);
1438 putc (token->val.c, fp);
1442 /* An error, most probably. */
1447 /* Compare two tokens. */
1449 _cpp_equiv_tokens (a, b)
1450 const cpp_token *a, *b;
1452 if (a->type == b->type && a->flags == b->flags)
1453 switch (TOKEN_SPELL (a))
1455 default: /* Keep compiler happy. */
1456 case SPELL_OPERATOR:
1459 return a->val.c == b->val.c; /* Character. */
1461 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1463 return a->val.node == b->val.node;
1465 return (a->val.str.len == b->val.str.len
1466 && !memcmp (a->val.str.text, b->val.str.text,
1473 /* Determine whether two tokens can be pasted together, and if so,
1474 what the resulting token is. Returns CPP_EOF if the tokens cannot
1475 be pasted, or the appropriate type for the merged token if they
1478 cpp_can_paste (pfile, token1, token2, digraph)
1480 const cpp_token *token1, *token2;
1483 enum cpp_ttype a = token1->type, b = token2->type;
1484 int cxx = CPP_OPTION (pfile, cplusplus);
1486 /* Treat named operators as if they were ordinary NAMEs. */
1487 if (token1->flags & NAMED_OP)
1489 if (token2->flags & NAMED_OP)
1492 if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1493 return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1498 if (b == a) return CPP_RSHIFT;
1499 if (b == CPP_QUERY && cxx) return CPP_MAX;
1500 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1503 if (b == a) return CPP_LSHIFT;
1504 if (b == CPP_QUERY && cxx) return CPP_MIN;
1505 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
1506 if (CPP_OPTION (pfile, digraphs))
1509 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1511 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1515 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1516 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1517 case CPP_OR: if (b == a) return CPP_OR_OR; break;
1520 if (b == a) return CPP_MINUS_MINUS;
1521 if (b == CPP_GREATER) return CPP_DEREF;
1524 if (b == a && cxx) return CPP_SCOPE;
1525 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1526 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1530 if (CPP_OPTION (pfile, digraphs))
1532 if (b == CPP_GREATER)
1533 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1535 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1539 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1542 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1543 if (b == CPP_NUMBER) return CPP_NUMBER;
1547 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1549 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1553 if (b == CPP_NAME) return CPP_NAME;
1555 && name_p (pfile, &token2->val.str)) return CPP_NAME;
1557 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1559 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1563 if (b == CPP_NUMBER) return CPP_NUMBER;
1564 if (b == CPP_NAME) return CPP_NUMBER;
1565 if (b == CPP_DOT) return CPP_NUMBER;
1566 /* Numbers cannot have length zero, so this is safe. */
1567 if ((b == CPP_PLUS || b == CPP_MINUS)
1568 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1579 /* Returns nonzero if a space should be inserted to avoid an
1580 accidental token paste for output. For simplicity, it is
1581 conservative, and occasionally advises a space where one is not
1582 needed, e.g. "." and ".2". */
1585 cpp_avoid_paste (pfile, token1, token2)
1587 const cpp_token *token1, *token2;
1589 enum cpp_ttype a = token1->type, b = token2->type;
1592 if (token1->flags & NAMED_OP)
1594 if (token2->flags & NAMED_OP)
1598 if (token2->flags & DIGRAPH)
1599 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1600 else if (token_spellings[b].category == SPELL_OPERATOR)
1601 c = token_spellings[b].name[0];
1603 /* Quickly get everything that can paste with an '='. */
1604 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1609 case CPP_GREATER: return c == '>' || c == '?';
1610 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1611 case CPP_PLUS: return c == '+';
1612 case CPP_MINUS: return c == '-' || c == '>';
1613 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1614 case CPP_MOD: return c == ':' || c == '>';
1615 case CPP_AND: return c == '&';
1616 case CPP_OR: return c == '|';
1617 case CPP_COLON: return c == ':' || c == '>';
1618 case CPP_DEREF: return c == '*';
1619 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1620 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1621 case CPP_NAME: return ((b == CPP_NUMBER
1622 && name_p (pfile, &token2->val.str))
1624 || b == CPP_CHAR || b == CPP_STRING); /* L */
1625 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1626 || c == '.' || c == '+' || c == '-');
1627 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1628 && token1->val.c == '@'
1629 && (b == CPP_NAME || b == CPP_STRING));
1636 /* Output all the remaining tokens on the current line, and a newline
1637 character, to FP. Leading whitespace is removed. */
1639 cpp_output_line (pfile, fp)
1645 cpp_get_token (pfile, &token);
1646 token.flags &= ~PREV_WHITE;
1647 while (token.type != CPP_EOF)
1649 cpp_output_token (&token, fp);
1650 cpp_get_token (pfile, &token);
1656 /* Returns the value of a hexadecimal digit. */
1661 if (c >= 'a' && c <= 'f')
1662 return c - 'a' + 10;
1663 if (c >= 'A' && c <= 'F')
1664 return c - 'A' + 10;
1665 if (c >= '0' && c <= '9')
1670 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1671 failure if cpplib is not parsing C++ or C99. Such failure is
1672 silent, and no variables are updated. Otherwise returns 0, and
1673 warns if -Wtraditional.
1675 [lex.charset]: The character designated by the universal character
1676 name \UNNNNNNNN is that character whose character short name in
1677 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1678 universal character name \uNNNN is that character whose character
1679 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1680 for a universal character name is less than 0x20 or in the range
1681 0x7F-0x9F (inclusive), or if the universal character name
1682 designates a character in the basic source character set, then the
1683 program is ill-formed.
1685 We assume that wchar_t is Unicode, so we don't need to do any
1686 mapping. Is this ever wrong?
1688 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1689 LIMIT is the end of the string or charconst. PSTR is updated to
1690 point after the UCS on return, and the UCS is written into PC. */
1693 maybe_read_ucs (pfile, pstr, limit, pc)
1695 const unsigned char **pstr;
1696 const unsigned char *limit;
1699 const unsigned char *p = *pstr;
1700 unsigned int code = 0;
1701 unsigned int c = *pc, length;
1703 /* Only attempt to interpret a UCS for C++ and C99. */
1704 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1707 if (CPP_WTRADITIONAL (pfile))
1708 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1710 for (length = (c == 'u' ? 4: 8); length; --length)
1714 cpp_error (pfile, "incomplete universal-character-name");
1721 code = (code << 4) + hex_digit_value (c);
1727 "non-hex digit '%c' in universal-character-name", c);
1733 #ifdef TARGET_EBCDIC
1734 cpp_error (pfile, "universal-character-name on EBCDIC target");
1735 code = 0x3f; /* EBCDIC invalid character */
1737 if (code > 0x9f && !(code & 0x80000000))
1738 ; /* True extended character, OK. */
1739 else if (code >= 0x20 && code < 0x7f)
1741 /* ASCII printable character. The C character set consists of all of
1742 these except $, @ and `. We use hex escapes so that this also
1743 works with EBCDIC hosts. */
1744 if (code != 0x24 && code != 0x40 && code != 0x60)
1745 cpp_error (pfile, "universal-character-name used for '%c'", code);
1748 cpp_error (pfile, "invalid universal-character-name");
1756 /* Interpret an escape sequence, and return its value. PSTR points to
1757 the input pointer, which is just after the backslash. LIMIT is how
1758 much text we have. MASK is a bitmask for the precision for the
1759 destination type (char or wchar_t). TRADITIONAL, if true, does not
1760 interpret escapes that did not exist in traditional C.
1762 Handles all relevant diagnostics. */
1765 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1767 const unsigned char **pstr;
1768 const unsigned char *limit;
1769 unsigned HOST_WIDE_INT mask;
1773 const unsigned char *str = *pstr;
1774 unsigned int c = *str++;
1778 case '\\': case '\'': case '"': case '?': break;
1779 case 'b': c = TARGET_BS; break;
1780 case 'f': c = TARGET_FF; break;
1781 case 'n': c = TARGET_NEWLINE; break;
1782 case 'r': c = TARGET_CR; break;
1783 case 't': c = TARGET_TAB; break;
1784 case 'v': c = TARGET_VT; break;
1786 case '(': case '{': case '[': case '%':
1787 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1788 '\%' is used to prevent SCCS from getting confused. */
1789 unknown = CPP_PEDANTIC (pfile);
1793 if (CPP_WTRADITIONAL (pfile))
1794 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1800 if (CPP_PEDANTIC (pfile))
1801 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1806 unknown = maybe_read_ucs (pfile, &str, limit, &c);
1810 if (CPP_WTRADITIONAL (pfile))
1811 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1815 unsigned int i = 0, overflow = 0;
1816 int digits_found = 0;
1824 overflow |= i ^ (i << 4 >> 4);
1825 i = (i << 4) + hex_digit_value (c);
1830 cpp_error (pfile, "\\x used with no following hex digits");
1832 if (overflow | (i != (i & mask)))
1834 cpp_pedwarn (pfile, "hex escape sequence out of range");
1841 case '0': case '1': case '2': case '3':
1842 case '4': case '5': case '6': case '7':
1844 unsigned int i = c - '0';
1847 while (str < limit && ++count < 3)
1850 if (c < '0' || c > '7')
1853 i = (i << 3) + c - '0';
1856 if (i != (i & mask))
1858 cpp_pedwarn (pfile, "octal escape sequence out of range");
1873 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1875 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1879 cpp_pedwarn (pfile, "escape sequence out of range for character");
1885 #ifndef MAX_CHAR_TYPE_SIZE
1886 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1889 #ifndef MAX_WCHAR_TYPE_SIZE
1890 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1893 /* Interpret a (possibly wide) character constant in TOKEN.
1894 WARN_MULTI warns about multi-character charconsts, if not
1895 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1896 that did not exist in traditional C. PCHARS_SEEN points to a
1897 variable that is filled in with the number of characters seen. */
1899 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1901 const cpp_token *token;
1904 unsigned int *pchars_seen;
1906 const unsigned char *str = token->val.str.text;
1907 const unsigned char *limit = str + token->val.str.len;
1908 unsigned int chars_seen = 0;
1909 unsigned int width, max_chars, c;
1910 unsigned HOST_WIDE_INT mask;
1911 HOST_WIDE_INT result = 0;
1913 #ifdef MULTIBYTE_CHARS
1914 (void) local_mbtowc (NULL, NULL, 0);
1917 /* Width in bits. */
1918 if (token->type == CPP_CHAR)
1919 width = MAX_CHAR_TYPE_SIZE;
1921 width = MAX_WCHAR_TYPE_SIZE;
1923 if (width < HOST_BITS_PER_WIDE_INT)
1924 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1927 max_chars = HOST_BITS_PER_WIDE_INT / width;
1931 #ifdef MULTIBYTE_CHARS
1935 char_len = local_mbtowc (&wc, str, limit - str);
1938 cpp_warning (pfile, "ignoring invalid multibyte character");
1951 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1953 #ifdef MAP_CHARACTER
1955 c = MAP_CHARACTER (c);
1958 /* Merge character into result; ignore excess chars. */
1959 if (++chars_seen <= max_chars)
1961 if (width < HOST_BITS_PER_WIDE_INT)
1962 result = (result << width) | (c & mask);
1968 if (chars_seen == 0)
1969 cpp_error (pfile, "empty character constant");
1970 else if (chars_seen > max_chars)
1972 chars_seen = max_chars;
1973 cpp_error (pfile, "character constant too long");
1975 else if (chars_seen > 1 && !traditional && warn_multi)
1976 cpp_warning (pfile, "multi-character character constant");
1978 /* If char type is signed, sign-extend the constant. The
1979 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
1980 if (token->type == CPP_CHAR && chars_seen)
1982 unsigned int nbits = chars_seen * width;
1983 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
1985 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
1986 || ((result >> (nbits - 1)) & 1) == 0)
1992 *pchars_seen = chars_seen;
2008 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2011 chunk_suitable (pool, chunk, size)
2016 /* Being at least twice SIZE means we can use memcpy in
2017 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2019 return (chunk && pool->locked != chunk
2020 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2023 /* Returns the end of the new pool. PTR points to a char in the old
2024 pool, and is updated to point to the same char in the new pool. */
2026 _cpp_next_chunk (pool, len, ptr)
2029 unsigned char **ptr;
2031 cpp_chunk *chunk = pool->cur->next;
2033 /* LEN is the minimum size we want in the new pool. */
2034 len += POOL_ROOM (pool);
2035 if (! chunk_suitable (pool, chunk, len))
2037 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2039 chunk->next = pool->cur->next;
2040 pool->cur->next = chunk;
2043 /* Update the pointer before changing chunk's front. */
2045 *ptr += chunk->base - POOL_FRONT (pool);
2047 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2048 chunk->front = chunk->base;
2051 return POOL_LIMIT (pool);
2058 unsigned char *base;
2061 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2062 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2063 /* Put the chunk descriptor at the end. Then chunk overruns will
2064 cause obvious chaos. */
2065 result = (cpp_chunk *) (base + size);
2066 result->base = base;
2067 result->front = base;
2068 result->limit = base + size;
2075 _cpp_init_pool (pool, size, align, temp)
2077 unsigned int size, align, temp;
2080 align = DEFAULT_ALIGNMENT;
2081 if (align & (align - 1))
2083 pool->align = align;
2084 pool->cur = new_chunk (size);
2088 pool->cur->next = pool->cur;
2092 _cpp_lock_pool (pool)
2095 if (pool->locks++ == 0)
2096 pool->locked = pool->cur;
2100 _cpp_unlock_pool (pool)
2103 if (--pool->locks == 0)
2108 _cpp_free_pool (pool)
2111 cpp_chunk *chunk = pool->cur, *next;
2119 while (chunk && chunk != pool->cur);
2122 /* Reserve LEN bytes from a memory pool. */
2124 _cpp_pool_reserve (pool, len)
2128 len = POOL_ALIGN (len, pool->align);
2129 if (len > (unsigned int) POOL_ROOM (pool))
2130 _cpp_next_chunk (pool, len, 0);
2132 return POOL_FRONT (pool);
2135 /* Allocate LEN bytes from a memory pool. */
2137 _cpp_pool_alloc (pool, len)
2141 unsigned char *result = _cpp_pool_reserve (pool, len);
2143 POOL_COMMIT (pool, len);