1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
42 /* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
46 #undef MULTIBYTE_CHARS
49 #ifdef MULTIBYTE_CHARS
54 /* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
67 enum spell_type category;
68 const unsigned char *name;
71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
74 #define OP(e, s) { SPELL_OPERATOR, U s },
75 #define TK(e, s) { s, U STRINGX (e) },
76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
83 static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
85 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
87 static int skip_block_comment PARAMS ((cpp_reader *));
88 static int skip_line_comment PARAMS ((cpp_reader *));
89 static void adjust_column PARAMS ((cpp_reader *));
90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
92 static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
94 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
95 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
96 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
97 static void unterminated PARAMS ((cpp_reader *, int));
98 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
99 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
100 static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
101 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
102 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
103 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
104 const unsigned char *, unsigned int *));
105 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
107 static unsigned int hex_digit_value PARAMS ((unsigned int));
108 static _cpp_buff *new_buff PARAMS ((size_t));
112 Compares, the token TOKEN to the NUL-terminated string STRING.
113 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
116 cpp_ideq (token, string)
117 const cpp_token *token;
120 if (token->type != CPP_NAME)
123 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
126 /* Call when meeting a newline. Returns the character after the newline
127 (or carriage-return newline combination), or EOF. */
129 handle_newline (pfile, newline_char)
131 cppchar_t newline_char;
134 cppchar_t next = EOF;
137 buffer = pfile->buffer;
138 buffer->col_adjust = 0;
139 buffer->line_base = buffer->cur;
141 /* Handle CR-LF and LF-CR combinations, get the next character. */
142 if (buffer->cur < buffer->rlimit)
144 next = *buffer->cur++;
145 if (next + newline_char == '\r' + '\n')
147 buffer->line_base = buffer->cur;
148 if (buffer->cur < buffer->rlimit)
149 next = *buffer->cur++;
155 buffer->read_ahead = next;
159 /* Subroutine of skip_escaped_newlines; called when a trigraph is
160 encountered. It warns if necessary, and returns true if the
161 trigraph should be honoured. FROM_CHAR is the third character of a
162 trigraph, and presumed to be the previous character for position
165 trigraph_ok (pfile, from_char)
169 int accept = CPP_OPTION (pfile, trigraphs);
171 /* Don't warn about trigraphs in comments. */
172 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
174 cpp_buffer *buffer = pfile->buffer;
177 cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
178 "trigraph ??%c converted to %c",
180 (int) _cpp_trigraph_map[from_char]);
181 else if (buffer->cur != buffer->last_Wtrigraphs)
183 buffer->last_Wtrigraphs = buffer->cur;
184 cpp_warning_with_line (pfile, pfile->line,
185 CPP_BUF_COL (buffer) - 2,
186 "trigraph ??%c ignored", (int) from_char);
193 /* Assumes local variables buffer and result. */
194 #define ACCEPT_CHAR(t) \
195 do { result->type = t; buffer->read_ahead = EOF; } while (0)
197 /* When we move to multibyte character sets, add to these something
198 that saves and restores the state of the multibyte conversion
199 library. This probably involves saving and restoring a "cookie".
200 In the case of glibc it is an 8-byte structure, so is not a high
201 overhead operation. In any case, it's out of the fast path. */
202 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
203 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
205 /* Skips any escaped newlines introduced by NEXT, which is either a
206 '?' or a '\\'. Returns the next character, which will also have
207 been placed in buffer->read_ahead. This routine performs
208 preprocessing stages 1 and 2 of the ISO C standard. */
210 skip_escaped_newlines (pfile, next)
214 cpp_buffer *buffer = pfile->buffer;
216 /* Only do this if we apply stages 1 and 2. */
217 if (!buffer->from_stage3)
220 const unsigned char *saved_cur;
225 if (buffer->cur == buffer->rlimit)
231 next1 = *buffer->cur++;
232 if (next1 != '?' || buffer->cur == buffer->rlimit)
238 next1 = *buffer->cur++;
239 if (!_cpp_trigraph_map[next1]
240 || !trigraph_ok (pfile, next1))
246 /* We have a full trigraph here. */
247 next = _cpp_trigraph_map[next1];
248 if (next != '\\' || buffer->cur == buffer->rlimit)
253 /* We have a backslash, and room for at least one more character. */
257 next1 = *buffer->cur++;
258 if (!is_nvspace (next1))
262 while (buffer->cur < buffer->rlimit);
264 if (!is_vspace (next1))
270 if (space && !pfile->state.lexing_comment)
271 cpp_warning (pfile, "backslash and newline separated by space");
273 next = handle_newline (pfile, next1);
275 cpp_pedwarn (pfile, "backslash-newline at end of file");
277 while (next == '\\' || next == '?');
280 buffer->read_ahead = next;
284 /* Obtain the next character, after trigraph conversion and skipping
285 an arbitrary string of escaped newlines. The common case of no
286 trigraphs or escaped newlines falls through quickly. */
288 get_effective_char (pfile)
291 cpp_buffer *buffer = pfile->buffer;
292 cppchar_t next = EOF;
294 if (buffer->cur < buffer->rlimit)
296 next = *buffer->cur++;
298 /* '?' can introduce trigraphs (and therefore backslash); '\\'
299 can introduce escaped newlines, which we want to skip, or
300 UCNs, which, depending upon lexer state, we will handle in
302 if (next == '?' || next == '\\')
303 next = skip_escaped_newlines (pfile, next);
306 buffer->read_ahead = next;
310 /* Skip a C-style block comment. We find the end of the comment by
311 seeing if an asterisk is before every '/' we encounter. Returns
312 non-zero if comment terminated by EOF, zero otherwise. */
314 skip_block_comment (pfile)
317 cpp_buffer *buffer = pfile->buffer;
318 cppchar_t c = EOF, prevc = EOF;
320 pfile->state.lexing_comment = 1;
321 while (buffer->cur != buffer->rlimit)
323 prevc = c, c = *buffer->cur++;
326 /* FIXME: For speed, create a new character class of characters
327 of interest inside block comments. */
328 if (c == '?' || c == '\\')
329 c = skip_escaped_newlines (pfile, c);
331 /* People like decorating comments with '*', so check for '/'
332 instead for efficiency. */
338 /* Warn about potential nested comments, but not if the '/'
339 comes immediately before the true comment delimeter.
340 Don't bother to get it right across escaped newlines. */
341 if (CPP_OPTION (pfile, warn_comments)
342 && buffer->cur != buffer->rlimit)
344 prevc = c, c = *buffer->cur++;
345 if (c == '*' && buffer->cur != buffer->rlimit)
347 prevc = c, c = *buffer->cur++;
349 cpp_warning_with_line (pfile, pfile->line,
350 CPP_BUF_COL (buffer) - 2,
351 "\"/*\" within comment");
356 else if (is_vspace (c))
358 prevc = c, c = handle_newline (pfile, c);
362 adjust_column (pfile);
365 pfile->state.lexing_comment = 0;
366 buffer->read_ahead = EOF;
367 return c != '/' || prevc != '*';
370 /* Skip a C++ line comment. Handles escaped newlines. Returns
371 non-zero if a multiline comment. The following new line, if any,
372 is left in buffer->read_ahead. */
374 skip_line_comment (pfile)
377 cpp_buffer *buffer = pfile->buffer;
378 unsigned int orig_line = pfile->line;
381 pfile->state.lexing_comment = 1;
385 if (buffer->cur == buffer->rlimit)
389 if (c == '?' || c == '\\')
390 c = skip_escaped_newlines (pfile, c);
392 while (!is_vspace (c));
394 pfile->state.lexing_comment = 0;
395 buffer->read_ahead = c; /* Leave any newline for caller. */
396 return orig_line != pfile->line;
399 /* pfile->buffer->cur is one beyond the \t character. Update
400 col_adjust so we track the column correctly. */
402 adjust_column (pfile)
405 cpp_buffer *buffer = pfile->buffer;
406 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
408 /* Round it up to multiple of the tabstop, but subtract 1 since the
409 tab itself occupies a character position. */
410 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
411 - col % CPP_OPTION (pfile, tabstop)) - 1;
414 /* Skips whitespace, saving the next non-whitespace character.
415 Adjusts pfile->col_adjust to account for tabs. Without this,
416 tokens might be assigned an incorrect column. */
418 skip_whitespace (pfile, c)
422 cpp_buffer *buffer = pfile->buffer;
423 unsigned int warned = 0;
427 /* Horizontal space always OK. */
431 adjust_column (pfile);
432 /* Just \f \v or \0 left. */
437 cpp_warning (pfile, "null character(s) ignored");
441 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
442 cpp_pedwarn_with_line (pfile, pfile->line,
443 CPP_BUF_COL (buffer),
444 "%s in preprocessing directive",
445 c == '\f' ? "form feed" : "vertical tab");
448 if (buffer->cur == buffer->rlimit)
452 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
453 while (is_nvspace (c));
455 /* Remember the next character. */
456 buffer->read_ahead = c;
459 /* See if the characters of a number token are valid in a name (no
462 name_p (pfile, string)
464 const cpp_string *string;
468 for (i = 0; i < string->len; i++)
469 if (!is_idchar (string->text[i]))
475 /* Parse an identifier, skipping embedded backslash-newlines. This is
476 a critical inner loop. The common case is an identifier which has
477 not been split by backslash-newline, does not contain a dollar
478 sign, and has already been scanned (roughly 10:1 ratio of
479 seen:unseen identifiers in normal code; the distribution is
480 Poisson-like). Second most common case is a new identifier, not
481 split and no dollar sign. The other possibilities are rare and
482 have been relegated to parse_identifier_slow. */
484 static cpp_hashnode *
485 parse_identifier (pfile)
488 cpp_hashnode *result;
489 const U_CHAR *cur, *rlimit;
491 /* Fast-path loop. Skim over a normal identifier.
492 N.B. ISIDNUM does not include $. */
493 cur = pfile->buffer->cur - 1;
494 rlimit = pfile->buffer->rlimit;
497 while (cur < rlimit && ISIDNUM (*cur));
499 /* Check for slow-path cases. */
500 if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
501 result = parse_identifier_slow (pfile, cur);
504 const U_CHAR *base = pfile->buffer->cur - 1;
505 result = (cpp_hashnode *)
506 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
507 pfile->buffer->cur = cur;
510 /* Rarely, identifiers require diagnostics when lexed.
511 XXX Has to be forced out of the fast path. */
512 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
513 && !pfile->state.skipping, 0))
515 /* It is allowed to poison the same identifier twice. */
516 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
517 cpp_error (pfile, "attempt to use poisoned \"%s\"",
520 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
521 replacement list of a variadic macro. */
522 if (result == pfile->spec_nodes.n__VA_ARGS__
523 && !pfile->state.va_args_ok)
525 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
531 /* Slow path. This handles identifiers which have been split, and
532 identifiers which contain dollar signs. The part of the identifier
533 from PFILE->buffer->cur-1 to CUR has already been scanned. */
534 static cpp_hashnode *
535 parse_identifier_slow (pfile, cur)
539 cpp_buffer *buffer = pfile->buffer;
540 const U_CHAR *base = buffer->cur - 1;
541 struct obstack *stack = &pfile->hash_table->stack;
542 unsigned int c, saw_dollar = 0, len;
544 /* Copy the part of the token which is known to be okay. */
545 obstack_grow (stack, base, cur - base);
547 /* Now process the part which isn't. We are looking at one of
548 '$', '\\', or '?' on entry to this loop. */
553 while (is_idchar (c))
555 obstack_1grow (stack, c);
561 if (buffer->cur == buffer->rlimit)
567 /* Potential escaped newline? */
568 if (c != '?' && c != '\\')
570 c = skip_escaped_newlines (pfile, c);
572 while (is_idchar (c));
574 /* Remember the next character. */
575 buffer->read_ahead = c;
577 /* $ is not a identifier character in the standard, but is commonly
578 accepted as an extension. Don't warn about it in skipped
579 conditional blocks. */
580 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
581 cpp_pedwarn (pfile, "'$' character(s) in identifier");
583 /* Identifiers are null-terminated. */
584 len = obstack_object_size (stack);
585 obstack_1grow (stack, '\0');
587 return (cpp_hashnode *)
588 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
591 /* Parse a number, skipping embedded backslash-newlines. */
593 parse_number (pfile, number, c, leading_period)
599 cpp_buffer *buffer = pfile->buffer;
600 unsigned char *dest, *limit;
602 dest = BUFF_FRONT (pfile->u_buff);
603 limit = BUFF_LIMIT (pfile->u_buff);
605 /* Place a leading period. */
610 _cpp_extend_buff (pfile, &pfile->u_buff, 1);
611 dest = BUFF_FRONT (pfile->u_buff);
612 limit = BUFF_LIMIT (pfile->u_buff);
621 /* Need room for terminating null. */
622 if ((size_t) (limit - dest) < 2)
624 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
625 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
626 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
627 limit = BUFF_LIMIT (pfile->u_buff);
632 if (buffer->cur == buffer->rlimit)
637 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
639 /* Potential escaped newline? */
640 if (c != '?' && c != '\\')
642 c = skip_escaped_newlines (pfile, c);
644 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
646 /* Remember the next character. */
647 buffer->read_ahead = c;
649 /* Null-terminate the number. */
652 number->text = BUFF_FRONT (pfile->u_buff);
653 number->len = dest - number->text;
654 BUFF_FRONT (pfile->u_buff) = dest + 1;
657 /* Subroutine of parse_string. Emits error for unterminated strings. */
659 unterminated (pfile, term)
663 cpp_error (pfile, "missing terminating %c character", term);
665 if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
667 cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
668 "possible start of unterminated string literal");
673 /* Subroutine of parse_string. */
675 unescaped_terminator_p (pfile, dest)
677 const unsigned char *dest;
679 const unsigned char *start, *temp;
681 /* In #include-style directives, terminators are not escapeable. */
682 if (pfile->state.angled_headers)
685 start = BUFF_FRONT (pfile->u_buff);
687 /* An odd number of consecutive backslashes represents an escaped
689 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
692 return ((dest - temp) & 1) == 0;
695 /* Parses a string, character constant, or angle-bracketed header file
696 name. Handles embedded trigraphs and escaped newlines. The stored
697 string is guaranteed NUL-terminated, but it is not guaranteed that
698 this is the first NUL since embedded NULs are preserved.
700 Multi-line strings are allowed, but they are deprecated. */
702 parse_string (pfile, token, terminator)
705 cppchar_t terminator;
707 cpp_buffer *buffer = pfile->buffer;
708 unsigned char *dest, *limit;
710 bool warned_nulls = false, warned_multi = false;
712 dest = BUFF_FRONT (pfile->u_buff);
713 limit = BUFF_LIMIT (pfile->u_buff);
717 if (buffer->cur == buffer->rlimit)
723 /* We need space for the terminating NUL. */
724 if ((size_t) (limit - dest) < 1)
726 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
727 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
728 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
729 limit = BUFF_LIMIT (pfile->u_buff);
734 unterminated (pfile, terminator);
738 /* Handle trigraphs, escaped newlines etc. */
739 if (c == '?' || c == '\\')
740 c = skip_escaped_newlines (pfile, c);
742 if (c == terminator && unescaped_terminator_p (pfile, dest))
747 else if (is_vspace (c))
749 /* In assembly language, silently terminate string and
750 character literals at end of line. This is a kludge
751 around not knowing where comments are. */
752 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
755 /* Character constants and header names may not extend over
756 multiple lines. In Standard C, neither may strings.
757 Unfortunately, we accept multiline strings as an
758 extension, except in #include family directives. */
759 if (terminator != '"' || pfile->state.angled_headers)
761 unterminated (pfile, terminator);
768 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
771 if (pfile->mls_line == 0)
773 pfile->mls_line = token->line;
774 pfile->mls_col = token->col;
777 c = handle_newline (pfile, c);
781 else if (c == '\0' && !warned_nulls)
784 cpp_warning (pfile, "null character(s) preserved in literal");
790 /* Remember the next character. */
791 buffer->read_ahead = c;
794 token->val.str.text = BUFF_FRONT (pfile->u_buff);
795 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
796 BUFF_FRONT (pfile->u_buff) = dest + 1;
799 /* The stored comment includes the comment start and any terminator. */
801 save_comment (pfile, token, from)
804 const unsigned char *from;
806 unsigned char *buffer;
809 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
810 /* C++ comments probably (not definitely) have moved past a new
811 line, which we don't want to save in the comment. */
812 if (pfile->buffer->read_ahead != EOF)
814 buffer = _cpp_unaligned_alloc (pfile, len);
816 token->type = CPP_COMMENT;
817 token->val.str.len = len;
818 token->val.str.text = buffer;
821 memcpy (buffer + 1, from, len - 1);
824 /* Subroutine of _cpp_lex_direct to handle '%'. A little tricky, since we
825 want to avoid stepping back when lexing %:%X. */
827 lex_percent (pfile, result)
831 cpp_buffer *buffer= pfile->buffer;
834 result->type = CPP_MOD;
835 /* Parsing %:%X could leave an extra character. */
836 if (buffer->extra_char == EOF)
837 c = get_effective_char (pfile);
840 c = buffer->read_ahead = buffer->extra_char;
841 buffer->extra_char = EOF;
845 ACCEPT_CHAR (CPP_MOD_EQ);
846 else if (CPP_OPTION (pfile, digraphs))
850 result->flags |= DIGRAPH;
851 ACCEPT_CHAR (CPP_HASH);
852 if (get_effective_char (pfile) == '%')
854 buffer->extra_char = get_effective_char (pfile);
855 if (buffer->extra_char == ':')
857 buffer->extra_char = EOF;
858 ACCEPT_CHAR (CPP_PASTE);
861 /* We'll catch the extra_char when we're called back. */
862 buffer->read_ahead = '%';
867 result->flags |= DIGRAPH;
868 ACCEPT_CHAR (CPP_CLOSE_BRACE);
873 /* Subroutine of _cpp_lex_direct to handle '.'. This is tricky, since we
874 want to avoid stepping back when lexing '...' or '.123'. In the
875 latter case we should also set a flag for parse_number. */
877 lex_dot (pfile, result)
881 cpp_buffer *buffer = pfile->buffer;
884 /* Parsing ..X could leave an extra character. */
885 if (buffer->extra_char == EOF)
886 c = get_effective_char (pfile);
889 c = buffer->read_ahead = buffer->extra_char;
890 buffer->extra_char = EOF;
893 /* All known character sets have 0...9 contiguous. */
894 if (c >= '0' && c <= '9')
896 result->type = CPP_NUMBER;
897 parse_number (pfile, &result->val.str, c, 1);
901 result->type = CPP_DOT;
904 buffer->extra_char = get_effective_char (pfile);
905 if (buffer->extra_char == '.')
907 buffer->extra_char = EOF;
908 ACCEPT_CHAR (CPP_ELLIPSIS);
911 /* We'll catch the extra_char when we're called back. */
912 buffer->read_ahead = '.';
914 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
915 ACCEPT_CHAR (CPP_DOT_STAR);
919 /* Allocate COUNT tokens for RUN. */
921 _cpp_init_tokenrun (run, count)
925 run->base = xnewvec (cpp_token, count);
926 run->limit = run->base + count;
930 /* Returns the next tokenrun, or creates one if there is none. */
935 if (run->next == NULL)
937 run->next = xnew (tokenrun);
938 run->next->prev = run;
939 _cpp_init_tokenrun (run->next, 250);
945 /* Allocate a single token that is invalidated at the same time as the
946 rest of the tokens on the line. Has its line and col set to the
947 same as the last lexed token, so that diagnostics appear in the
950 _cpp_temp_token (pfile)
953 cpp_token *old, *result;
955 old = pfile->cur_token - 1;
956 if (pfile->cur_token == pfile->cur_run->limit)
958 pfile->cur_run = next_tokenrun (pfile->cur_run);
959 pfile->cur_token = pfile->cur_run->base;
962 result = pfile->cur_token++;
963 result->line = old->line;
964 result->col = old->col;
968 /* Lex a token into RESULT (external interface). Takes care of issues
969 like directive handling, token lookahead, multiple include
970 opimisation and skipping. */
972 _cpp_lex_token (pfile)
979 if (pfile->cur_token == pfile->cur_run->limit)
981 pfile->cur_run = next_tokenrun (pfile->cur_run);
982 pfile->cur_token = pfile->cur_run->base;
985 if (pfile->lookaheads)
988 result = pfile->cur_token++;
991 result = _cpp_lex_direct (pfile);
993 if (result->flags & BOL)
995 /* Is this a directive. If _cpp_handle_directive returns
996 false, it is an assembler #. */
997 if (result->type == CPP_HASH
998 && !pfile->state.parsing_args
999 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1001 if (pfile->cb.line_change && !pfile->state.skipping)
1002 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
1005 /* We don't skip tokens in directives. */
1006 if (pfile->state.in_directive)
1009 /* Outside a directive, invalidate controlling macros. At file
1010 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1011 get here and MI optimisation works. */
1012 pfile->mi_valid = false;
1014 if (!pfile->state.skipping || result->type == CPP_EOF)
1021 /* Lex a token into pfile->cur_token, which is also incremented, to
1022 get diagnostics pointing to the correct location.
1024 Does not handle issues such as token lookahead, multiple-include
1025 optimisation, directives, skipping etc. This function is only
1026 suitable for use by _cpp_lex_token, and in special cases like
1027 lex_expansion_token which doesn't care for any of these issues.
1029 When meeting a newline, returns CPP_EOF if parsing a directive,
1030 otherwise returns to the start of the token buffer if permissible.
1031 Returns the location of the lexed token. */
1033 _cpp_lex_direct (pfile)
1038 const unsigned char *comment_start;
1039 cpp_token *result = pfile->cur_token++;
1042 buffer = pfile->buffer;
1043 result->flags = buffer->saved_flags;
1044 buffer->saved_flags = 0;
1046 result->line = pfile->line;
1049 c = buffer->read_ahead;
1050 if (c == EOF && buffer->cur < buffer->rlimit)
1052 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1053 buffer->read_ahead = EOF;
1059 buffer->saved_flags = BOL;
1060 if (!pfile->state.parsing_args && !pfile->state.in_directive)
1062 if (buffer->cur != buffer->line_base)
1064 /* Non-empty files should end in a newline. Don't warn
1065 for command line and _Pragma buffers. */
1066 if (!buffer->from_stage3)
1067 cpp_pedwarn (pfile, "no newline at end of file");
1068 handle_newline (pfile, '\n');
1071 /* Don't pop the last buffer. */
1074 unsigned char stop = buffer->return_at_eof;
1076 _cpp_pop_buffer (pfile);
1081 result->type = CPP_EOF;
1084 case ' ': case '\t': case '\f': case '\v': case '\0':
1085 skip_whitespace (pfile, c);
1086 result->flags |= PREV_WHITE;
1089 case '\n': case '\r':
1090 handle_newline (pfile, c);
1091 buffer->saved_flags = BOL;
1092 if (! pfile->state.in_directive)
1094 if (pfile->state.parsing_args == 2)
1095 buffer->saved_flags |= PREV_WHITE;
1096 if (!pfile->keep_tokens)
1098 pfile->cur_run = &pfile->base_run;
1099 result = pfile->base_run.base;
1100 pfile->cur_token = result + 1;
1104 result->type = CPP_EOF;
1109 /* These could start an escaped newline, or '?' a trigraph. Let
1110 skip_escaped_newlines do all the work. */
1112 unsigned int line = pfile->line;
1114 c = skip_escaped_newlines (pfile, c);
1115 if (line != pfile->line)
1116 /* We had at least one escaped newline of some sort, and the
1117 next character is in buffer->read_ahead. Update the
1118 token's line and column. */
1119 goto update_tokens_line;
1121 /* We are either the original '?' or '\\', or a trigraph. */
1122 result->type = CPP_QUERY;
1123 buffer->read_ahead = EOF;
1131 case '0': case '1': case '2': case '3': case '4':
1132 case '5': case '6': case '7': case '8': case '9':
1133 result->type = CPP_NUMBER;
1134 parse_number (pfile, &result->val.str, c, 0);
1138 if (!CPP_OPTION (pfile, dollars_in_ident))
1140 /* Fall through... */
1143 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1144 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1145 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1146 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1148 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1149 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1150 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1151 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1153 result->type = CPP_NAME;
1154 result->val.node = parse_identifier (pfile);
1156 /* 'L' may introduce wide characters or strings. */
1157 if (result->val.node == pfile->spec_nodes.n_L)
1159 c = buffer->read_ahead;
1160 if (c == EOF && buffer->cur < buffer->rlimit)
1162 if (c == '\'' || c == '"')
1165 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1169 /* Convert named operators to their proper types. */
1170 else if (result->val.node->flags & NODE_OPERATOR)
1172 result->flags |= NAMED_OP;
1173 result->type = result->val.node->value.operator;
1179 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1181 parse_string (pfile, result, c);
1185 /* A potential block or line comment. */
1186 comment_start = buffer->cur;
1187 result->type = CPP_DIV;
1188 c = get_effective_char (pfile);
1190 ACCEPT_CHAR (CPP_DIV_EQ);
1191 if (c != '/' && c != '*')
1196 if (skip_block_comment (pfile))
1197 cpp_error (pfile, "unterminated comment");
1201 if (!CPP_OPTION (pfile, cplusplus_comments)
1202 && !CPP_IN_SYSTEM_HEADER (pfile))
1205 /* Warn about comments only if pedantically GNUC89, and not
1206 in system headers. */
1207 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1208 && ! buffer->warned_cplusplus_comments)
1211 "C++ style comments are not allowed in ISO C89");
1213 "(this will be reported only once per input file)");
1214 buffer->warned_cplusplus_comments = 1;
1217 /* Skip_line_comment updates buffer->read_ahead. */
1218 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1219 cpp_warning (pfile, "multi-line comment");
1222 /* Skipping the comment has updated buffer->read_ahead. */
1223 if (!pfile->state.save_comments)
1225 result->flags |= PREV_WHITE;
1226 goto update_tokens_line;
1229 /* Save the comment as a token in its own right. */
1230 save_comment (pfile, result, comment_start);
1234 if (pfile->state.angled_headers)
1236 result->type = CPP_HEADER_NAME;
1237 c = '>'; /* terminator. */
1241 result->type = CPP_LESS;
1242 c = get_effective_char (pfile);
1244 ACCEPT_CHAR (CPP_LESS_EQ);
1247 ACCEPT_CHAR (CPP_LSHIFT);
1248 if (get_effective_char (pfile) == '=')
1249 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1251 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1253 ACCEPT_CHAR (CPP_MIN);
1254 if (get_effective_char (pfile) == '=')
1255 ACCEPT_CHAR (CPP_MIN_EQ);
1257 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1259 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1260 result->flags |= DIGRAPH;
1262 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1264 ACCEPT_CHAR (CPP_OPEN_BRACE);
1265 result->flags |= DIGRAPH;
1270 result->type = CPP_GREATER;
1271 c = get_effective_char (pfile);
1273 ACCEPT_CHAR (CPP_GREATER_EQ);
1276 ACCEPT_CHAR (CPP_RSHIFT);
1277 if (get_effective_char (pfile) == '=')
1278 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1280 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1282 ACCEPT_CHAR (CPP_MAX);
1283 if (get_effective_char (pfile) == '=')
1284 ACCEPT_CHAR (CPP_MAX_EQ);
1289 lex_percent (pfile, result);
1293 lex_dot (pfile, result);
1297 result->type = CPP_PLUS;
1298 c = get_effective_char (pfile);
1300 ACCEPT_CHAR (CPP_PLUS_EQ);
1302 ACCEPT_CHAR (CPP_PLUS_PLUS);
1306 result->type = CPP_MINUS;
1307 c = get_effective_char (pfile);
1310 ACCEPT_CHAR (CPP_DEREF);
1311 if (CPP_OPTION (pfile, cplusplus)
1312 && get_effective_char (pfile) == '*')
1313 ACCEPT_CHAR (CPP_DEREF_STAR);
1316 ACCEPT_CHAR (CPP_MINUS_EQ);
1318 ACCEPT_CHAR (CPP_MINUS_MINUS);
1322 result->type = CPP_MULT;
1323 if (get_effective_char (pfile) == '=')
1324 ACCEPT_CHAR (CPP_MULT_EQ);
1328 result->type = CPP_EQ;
1329 if (get_effective_char (pfile) == '=')
1330 ACCEPT_CHAR (CPP_EQ_EQ);
1334 result->type = CPP_NOT;
1335 if (get_effective_char (pfile) == '=')
1336 ACCEPT_CHAR (CPP_NOT_EQ);
1340 result->type = CPP_AND;
1341 c = get_effective_char (pfile);
1343 ACCEPT_CHAR (CPP_AND_EQ);
1345 ACCEPT_CHAR (CPP_AND_AND);
1349 result->type = CPP_HASH;
1350 if (get_effective_char (pfile) == '#')
1351 ACCEPT_CHAR (CPP_PASTE);
1355 result->type = CPP_OR;
1356 c = get_effective_char (pfile);
1358 ACCEPT_CHAR (CPP_OR_EQ);
1360 ACCEPT_CHAR (CPP_OR_OR);
1364 result->type = CPP_XOR;
1365 if (get_effective_char (pfile) == '=')
1366 ACCEPT_CHAR (CPP_XOR_EQ);
1370 result->type = CPP_COLON;
1371 c = get_effective_char (pfile);
1372 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1373 ACCEPT_CHAR (CPP_SCOPE);
1374 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1376 result->flags |= DIGRAPH;
1377 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1381 case '~': result->type = CPP_COMPL; break;
1382 case ',': result->type = CPP_COMMA; break;
1383 case '(': result->type = CPP_OPEN_PAREN; break;
1384 case ')': result->type = CPP_CLOSE_PAREN; break;
1385 case '[': result->type = CPP_OPEN_SQUARE; break;
1386 case ']': result->type = CPP_CLOSE_SQUARE; break;
1387 case '{': result->type = CPP_OPEN_BRACE; break;
1388 case '}': result->type = CPP_CLOSE_BRACE; break;
1389 case ';': result->type = CPP_SEMICOLON; break;
1391 /* @ is a punctuator in Objective C. */
1392 case '@': result->type = CPP_ATSIGN; break;
1396 result->type = CPP_OTHER;
1404 /* An upper bound on the number of bytes needed to spell a token,
1405 including preceding whitespace. */
1407 cpp_token_len (token)
1408 const cpp_token *token;
1412 switch (TOKEN_SPELL (token))
1414 default: len = 0; break;
1415 case SPELL_STRING: len = token->val.str.len; break;
1416 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1418 /* 1 for whitespace, 4 for comment delimeters. */
1422 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1423 already contain the enough space to hold the token's spelling.
1424 Returns a pointer to the character after the last character
1427 cpp_spell_token (pfile, token, buffer)
1428 cpp_reader *pfile; /* Would be nice to be rid of this... */
1429 const cpp_token *token;
1430 unsigned char *buffer;
1432 switch (TOKEN_SPELL (token))
1434 case SPELL_OPERATOR:
1436 const unsigned char *spelling;
1439 if (token->flags & DIGRAPH)
1441 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1442 else if (token->flags & NAMED_OP)
1445 spelling = TOKEN_NAME (token);
1447 while ((c = *spelling++) != '\0')
1454 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1455 buffer += NODE_LEN (token->val.node);
1460 int left, right, tag;
1461 switch (token->type)
1463 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1464 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1465 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1466 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1467 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1468 default: left = '\0'; right = '\0'; tag = '\0'; break;
1470 if (tag) *buffer++ = tag;
1471 if (left) *buffer++ = left;
1472 memcpy (buffer, token->val.str.text, token->val.str.len);
1473 buffer += token->val.str.len;
1474 if (right) *buffer++ = right;
1479 *buffer++ = token->val.c;
1483 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1490 /* Returns a token as a null-terminated string. The string is
1491 temporary, and automatically freed later. Useful for diagnostics. */
1493 cpp_token_as_text (pfile, token)
1495 const cpp_token *token;
1497 unsigned int len = cpp_token_len (token);
1498 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1500 end = cpp_spell_token (pfile, token, start);
1506 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1508 cpp_type2name (type)
1509 enum cpp_ttype type;
1511 return (const char *) token_spellings[type].name;
1514 /* Writes the spelling of token to FP, without any preceding space.
1515 Separated from cpp_spell_token for efficiency - to avoid stdio
1516 double-buffering. */
1518 cpp_output_token (token, fp)
1519 const cpp_token *token;
1522 switch (TOKEN_SPELL (token))
1524 case SPELL_OPERATOR:
1526 const unsigned char *spelling;
1529 if (token->flags & DIGRAPH)
1531 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1532 else if (token->flags & NAMED_OP)
1535 spelling = TOKEN_NAME (token);
1540 while ((c = *++spelling) != '\0');
1546 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1551 int left, right, tag;
1552 switch (token->type)
1554 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1555 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1556 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1557 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1558 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1559 default: left = '\0'; right = '\0'; tag = '\0'; break;
1561 if (tag) putc (tag, fp);
1562 if (left) putc (left, fp);
1563 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1564 if (right) putc (right, fp);
1569 putc (token->val.c, fp);
1573 /* An error, most probably. */
1578 /* Compare two tokens. */
1580 _cpp_equiv_tokens (a, b)
1581 const cpp_token *a, *b;
1583 if (a->type == b->type && a->flags == b->flags)
1584 switch (TOKEN_SPELL (a))
1586 default: /* Keep compiler happy. */
1587 case SPELL_OPERATOR:
1590 return a->val.c == b->val.c; /* Character. */
1592 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1594 return a->val.node == b->val.node;
1596 return (a->val.str.len == b->val.str.len
1597 && !memcmp (a->val.str.text, b->val.str.text,
1604 /* Returns nonzero if a space should be inserted to avoid an
1605 accidental token paste for output. For simplicity, it is
1606 conservative, and occasionally advises a space where one is not
1607 needed, e.g. "." and ".2". */
1610 cpp_avoid_paste (pfile, token1, token2)
1612 const cpp_token *token1, *token2;
1614 enum cpp_ttype a = token1->type, b = token2->type;
1617 if (token1->flags & NAMED_OP)
1619 if (token2->flags & NAMED_OP)
1623 if (token2->flags & DIGRAPH)
1624 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1625 else if (token_spellings[b].category == SPELL_OPERATOR)
1626 c = token_spellings[b].name[0];
1628 /* Quickly get everything that can paste with an '='. */
1629 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1634 case CPP_GREATER: return c == '>' || c == '?';
1635 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1636 case CPP_PLUS: return c == '+';
1637 case CPP_MINUS: return c == '-' || c == '>';
1638 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1639 case CPP_MOD: return c == ':' || c == '>';
1640 case CPP_AND: return c == '&';
1641 case CPP_OR: return c == '|';
1642 case CPP_COLON: return c == ':' || c == '>';
1643 case CPP_DEREF: return c == '*';
1644 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1645 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1646 case CPP_NAME: return ((b == CPP_NUMBER
1647 && name_p (pfile, &token2->val.str))
1649 || b == CPP_CHAR || b == CPP_STRING); /* L */
1650 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1651 || c == '.' || c == '+' || c == '-');
1652 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1653 && token1->val.c == '@'
1654 && (b == CPP_NAME || b == CPP_STRING));
1661 /* Output all the remaining tokens on the current line, and a newline
1662 character, to FP. Leading whitespace is removed. If there are
1663 macros, special token padding is not performed. */
1665 cpp_output_line (pfile, fp)
1669 const cpp_token *token;
1671 token = cpp_get_token (pfile);
1672 while (token->type != CPP_EOF)
1674 cpp_output_token (token, fp);
1675 token = cpp_get_token (pfile);
1676 if (token->flags & PREV_WHITE)
1683 /* Returns the value of a hexadecimal digit. */
1688 if (c >= 'a' && c <= 'f')
1689 return c - 'a' + 10;
1690 if (c >= 'A' && c <= 'F')
1691 return c - 'A' + 10;
1692 if (c >= '0' && c <= '9')
1697 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1698 failure if cpplib is not parsing C++ or C99. Such failure is
1699 silent, and no variables are updated. Otherwise returns 0, and
1700 warns if -Wtraditional.
1702 [lex.charset]: The character designated by the universal character
1703 name \UNNNNNNNN is that character whose character short name in
1704 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1705 universal character name \uNNNN is that character whose character
1706 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1707 for a universal character name is less than 0x20 or in the range
1708 0x7F-0x9F (inclusive), or if the universal character name
1709 designates a character in the basic source character set, then the
1710 program is ill-formed.
1712 We assume that wchar_t is Unicode, so we don't need to do any
1713 mapping. Is this ever wrong?
1715 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1716 LIMIT is the end of the string or charconst. PSTR is updated to
1717 point after the UCS on return, and the UCS is written into PC. */
1720 maybe_read_ucs (pfile, pstr, limit, pc)
1722 const unsigned char **pstr;
1723 const unsigned char *limit;
1726 const unsigned char *p = *pstr;
1727 unsigned int code = 0;
1728 unsigned int c = *pc, length;
1730 /* Only attempt to interpret a UCS for C++ and C99. */
1731 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1734 if (CPP_WTRADITIONAL (pfile))
1735 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1737 length = (c == 'u' ? 4: 8);
1739 if ((size_t) (limit - p) < length)
1741 cpp_error (pfile, "incomplete universal-character-name");
1742 /* Skip to the end to avoid more diagnostics. */
1747 for (; length; length--, p++)
1751 code = (code << 4) + hex_digit_value (c);
1755 "non-hex digit '%c' in universal-character-name", c);
1756 /* We shouldn't skip in case there are multibyte chars. */
1762 #ifdef TARGET_EBCDIC
1763 cpp_error (pfile, "universal-character-name on EBCDIC target");
1764 code = 0x3f; /* EBCDIC invalid character */
1766 /* True extended characters are OK. */
1768 && !(code & 0x80000000)
1769 && !(code >= 0xD800 && code <= 0xDFFF))
1771 /* The standard permits $, @ and ` to be specified as UCNs. We use
1772 hex escapes so that this also works with EBCDIC hosts. */
1773 else if (code == 0x24 || code == 0x40 || code == 0x60)
1775 /* Don't give another error if one occurred above. */
1776 else if (length == 0)
1777 cpp_error (pfile, "universal-character-name out of range");
1785 /* Interpret an escape sequence, and return its value. PSTR points to
1786 the input pointer, which is just after the backslash. LIMIT is how
1787 much text we have. MASK is a bitmask for the precision for the
1788 destination type (char or wchar_t). TRADITIONAL, if true, does not
1789 interpret escapes that did not exist in traditional C.
1791 Handles all relevant diagnostics. */
1794 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1796 const unsigned char **pstr;
1797 const unsigned char *limit;
1798 unsigned HOST_WIDE_INT mask;
1802 const unsigned char *str = *pstr;
1803 unsigned int c = *str++;
1807 case '\\': case '\'': case '"': case '?': break;
1808 case 'b': c = TARGET_BS; break;
1809 case 'f': c = TARGET_FF; break;
1810 case 'n': c = TARGET_NEWLINE; break;
1811 case 'r': c = TARGET_CR; break;
1812 case 't': c = TARGET_TAB; break;
1813 case 'v': c = TARGET_VT; break;
1815 case '(': case '{': case '[': case '%':
1816 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1817 '\%' is used to prevent SCCS from getting confused. */
1818 unknown = CPP_PEDANTIC (pfile);
1822 if (CPP_WTRADITIONAL (pfile))
1823 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1829 if (CPP_PEDANTIC (pfile))
1830 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1835 unknown = maybe_read_ucs (pfile, &str, limit, &c);
1839 if (CPP_WTRADITIONAL (pfile))
1840 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1844 unsigned int i = 0, overflow = 0;
1845 int digits_found = 0;
1853 overflow |= i ^ (i << 4 >> 4);
1854 i = (i << 4) + hex_digit_value (c);
1859 cpp_error (pfile, "\\x used with no following hex digits");
1861 if (overflow | (i != (i & mask)))
1863 cpp_pedwarn (pfile, "hex escape sequence out of range");
1870 case '0': case '1': case '2': case '3':
1871 case '4': case '5': case '6': case '7':
1873 unsigned int i = c - '0';
1876 while (str < limit && ++count < 3)
1879 if (c < '0' || c > '7')
1882 i = (i << 3) + c - '0';
1885 if (i != (i & mask))
1887 cpp_pedwarn (pfile, "octal escape sequence out of range");
1902 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1904 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1908 cpp_pedwarn (pfile, "escape sequence out of range for character");
1914 #ifndef MAX_CHAR_TYPE_SIZE
1915 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1918 #ifndef MAX_WCHAR_TYPE_SIZE
1919 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1922 /* Interpret a (possibly wide) character constant in TOKEN.
1923 WARN_MULTI warns about multi-character charconsts, if not
1924 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1925 that did not exist in traditional C. PCHARS_SEEN points to a
1926 variable that is filled in with the number of characters seen. */
1928 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1930 const cpp_token *token;
1933 unsigned int *pchars_seen;
1935 const unsigned char *str = token->val.str.text;
1936 const unsigned char *limit = str + token->val.str.len;
1937 unsigned int chars_seen = 0;
1938 unsigned int width, max_chars, c;
1939 unsigned HOST_WIDE_INT mask;
1940 HOST_WIDE_INT result = 0;
1942 #ifdef MULTIBYTE_CHARS
1943 (void) local_mbtowc (NULL, NULL, 0);
1946 /* Width in bits. */
1947 if (token->type == CPP_CHAR)
1948 width = MAX_CHAR_TYPE_SIZE;
1950 width = MAX_WCHAR_TYPE_SIZE;
1952 if (width < HOST_BITS_PER_WIDE_INT)
1953 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1956 max_chars = HOST_BITS_PER_WIDE_INT / width;
1960 #ifdef MULTIBYTE_CHARS
1964 char_len = local_mbtowc (&wc, str, limit - str);
1967 cpp_warning (pfile, "ignoring invalid multibyte character");
1980 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1982 #ifdef MAP_CHARACTER
1984 c = MAP_CHARACTER (c);
1987 /* Merge character into result; ignore excess chars. */
1988 if (++chars_seen <= max_chars)
1990 if (width < HOST_BITS_PER_WIDE_INT)
1991 result = (result << width) | (c & mask);
1997 if (chars_seen == 0)
1998 cpp_error (pfile, "empty character constant");
1999 else if (chars_seen > max_chars)
2001 chars_seen = max_chars;
2002 cpp_warning (pfile, "character constant too long");
2004 else if (chars_seen > 1 && !traditional && warn_multi)
2005 cpp_warning (pfile, "multi-character character constant");
2007 /* If char type is signed, sign-extend the constant. The
2008 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
2009 if (token->type == CPP_CHAR && chars_seen)
2011 unsigned int nbits = chars_seen * width;
2012 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2014 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2015 || ((result >> (nbits - 1)) & 1) == 0)
2021 *pchars_seen = chars_seen;
2025 /* Memory buffers. Changing these three constants can have a dramatic
2026 effect on performance. The values here are reasonable defaults,
2027 but might be tuned. If you adjust them, be sure to test across a
2028 range of uses of cpplib, including heavy nested function-like macro
2029 expansion. Also check the change in peak memory usage (NJAMD is a
2030 good tool for this). */
2031 #define MIN_BUFF_SIZE 8000
2032 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (8000 + (MIN_SIZE) * 3 / 2)
2033 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2034 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2046 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2047 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
2049 /* Create a new allocation buffer. Place the control block at the end
2050 of the buffer, so that buffer overflows will cause immediate chaos. */
2056 unsigned char *base;
2058 if (len < MIN_BUFF_SIZE)
2059 len = MIN_BUFF_SIZE;
2060 len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
2062 base = xmalloc (len + sizeof (_cpp_buff));
2063 result = (_cpp_buff *) (base + len);
2064 result->base = base;
2066 result->limit = base + len;
2067 result->next = NULL;
2071 /* Place a chain of unwanted allocation buffers on the free list. */
2073 _cpp_release_buff (pfile, buff)
2077 _cpp_buff *end = buff;
2081 end->next = pfile->free_buffs;
2082 pfile->free_buffs = buff;
2085 /* Return a free buffer of size at least MIN_SIZE. */
2087 _cpp_get_buff (pfile, min_size)
2091 _cpp_buff *result, **p;
2093 for (p = &pfile->free_buffs;; p = &(*p)->next)
2098 return new_buff (min_size);
2100 size = result->limit - result->base;
2101 /* Return a buffer that's big enough, but don't waste one that's
2103 if (size >= min_size && size < BUFF_SIZE_UPPER_BOUND (min_size))
2108 result->next = NULL;
2109 result->cur = result->base;
2113 /* Creates a new buffer with enough space to hold the the uncommitted
2114 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2115 the excess bytes to the new buffer. Chains the new buffer after
2116 BUFF, and returns the new buffer. */
2118 _cpp_append_extend_buff (pfile, buff, min_extra)
2123 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2124 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2126 buff->next = new_buff;
2127 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2131 /* Creates a new buffer with enough space to hold the the uncommitted
2132 remaining bytes of the buffer pointed to by BUFF, and at least
2133 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2134 Chains the new buffer before the buffer pointed to by BUFF, and
2135 updates the pointer to point to the new buffer. */
2137 _cpp_extend_buff (pfile, pbuff, min_extra)
2142 _cpp_buff *new_buff, *old_buff = *pbuff;
2143 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2145 new_buff = _cpp_get_buff (pfile, size);
2146 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2147 new_buff->next = old_buff;
2151 /* Free a chain of buffers starting at BUFF. */
2153 _cpp_free_buff (buff)
2158 for (; buff; buff = next)
2165 /* Allocate permanent, unaligned storage of length LEN. */
2167 _cpp_unaligned_alloc (pfile, len)
2171 _cpp_buff *buff = pfile->u_buff;
2172 unsigned char *result = buff->cur;
2174 if (len > (size_t) (buff->limit - result))
2176 buff = _cpp_get_buff (pfile, len);
2177 buff->next = pfile->u_buff;
2178 pfile->u_buff = buff;
2182 buff->cur = result + len;
2186 /* Allocate permanent, unaligned storage of length LEN. */
2188 _cpp_aligned_alloc (pfile, len)
2192 _cpp_buff *buff = pfile->a_buff;
2193 unsigned char *result = buff->cur;
2195 if (len > (size_t) (buff->limit - result))
2197 buff = _cpp_get_buff (pfile, len);
2198 buff->next = pfile->a_buff;
2199 pfile->a_buff = buff;
2203 buff->cur = result + len;