1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
29 #define PEEKBUF(BUFFER, N) \
30 ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
31 #define GETBUF(BUFFER) \
32 ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
33 #define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
35 #define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
36 #define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
37 #define GETC() GETBUF (CPP_BUFFER (pfile))
38 #define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
40 static void skip_block_comment PARAMS ((cpp_reader *));
41 static void skip_line_comment PARAMS ((cpp_reader *));
42 static int maybe_macroexpand PARAMS ((cpp_reader *, long));
43 static int skip_comment PARAMS ((cpp_reader *, int));
44 static int copy_comment PARAMS ((cpp_reader *, int));
45 static void skip_string PARAMS ((cpp_reader *, int));
46 static void parse_string PARAMS ((cpp_reader *, int));
47 static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
48 static int null_cleanup PARAMS ((cpp_buffer *, cpp_reader *));
49 static void null_warning PARAMS ((cpp_reader *, unsigned int));
51 static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
53 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
55 static void bump_column PARAMS ((cpp_printer *, unsigned int,
57 static void expand_name_space PARAMS ((cpp_toklist *));
58 static void expand_token_space PARAMS ((cpp_toklist *));
59 static void init_token_list PARAMS ((cpp_reader *, cpp_toklist *, int));
60 static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
63 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
66 _cpp_grow_token_buffer (pfile, n)
70 long old_written = CPP_WRITTEN (pfile);
71 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
72 pfile->token_buffer = (U_CHAR *)
73 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
74 CPP_SET_WRITTEN (pfile, old_written);
78 null_cleanup (pbuf, pfile)
79 cpp_buffer *pbuf ATTRIBUTE_UNUSED;
80 cpp_reader *pfile ATTRIBUTE_UNUSED;
85 /* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
86 If BUFFER != NULL, then use the LENGTH characters in BUFFER
87 as the new input buffer.
88 Return the new buffer, or NULL on failure. */
91 cpp_push_buffer (pfile, buffer, length)
96 cpp_buffer *buf = CPP_BUFFER (pfile);
98 if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
100 cpp_fatal (pfile, "macro or `#include' recursion too deep");
104 new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
106 new->if_stack = pfile->if_stack;
107 new->cleanup = null_cleanup;
108 new->buf = new->cur = buffer;
109 new->rlimit = buffer + length;
112 new->line_base = NULL;
114 CPP_BUFFER (pfile) = new;
119 cpp_pop_buffer (pfile)
122 cpp_buffer *buf = CPP_BUFFER (pfile);
123 if (ACTIVE_MARK_P (pfile))
124 cpp_ice (pfile, "mark active in cpp_pop_buffer");
125 (*buf->cleanup) (buf, pfile);
126 CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
128 pfile->buffer_stack_depth--;
129 return CPP_BUFFER (pfile);
132 /* Deal with the annoying semantics of fwrite. */
134 safe_fwrite (pfile, buf, len, fp)
144 count = fwrite (buf, 1, len, fp);
153 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
156 /* Notify the compiler proper that the current line number has jumped,
157 or the current file name has changed. */
160 output_line_command (pfile, print, line)
165 cpp_buffer *ip = cpp_file_buffer (pfile);
166 enum { same = 0, enter, leave, rname } change;
167 static const char * const codes[] = { "", " 1", " 2", "" };
169 if (CPP_OPTION (pfile, no_line_commands))
172 /* Determine whether the current filename has changed, and if so,
173 how. 'nominal_fname' values are unique, so they can be compared
174 by comparing pointers. */
175 if (ip->nominal_fname == print->last_fname)
179 if (pfile->buffer_stack_depth == print->last_bsd)
183 if (pfile->buffer_stack_depth > print->last_bsd)
187 print->last_bsd = pfile->buffer_stack_depth;
189 print->last_fname = ip->nominal_fname;
191 /* If the current file has not changed, we can output a few newlines
192 instead if we want to increase the line number by a small amount.
193 We cannot do this if print->lineno is zero, because that means we
194 haven't output any line commands yet. (The very first line
195 command output is a `same_file' command.) */
196 if (change == same && print->lineno != 0
197 && line >= print->lineno && line < print->lineno + 8)
199 while (line > print->lineno)
201 putc ('\n', print->outf);
207 #ifndef NO_IMPLICIT_EXTERN_C
208 if (CPP_OPTION (pfile, cplusplus))
209 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
211 ip->system_header_p ? " 3" : "",
212 (ip->system_header_p == 2) ? " 4" : "");
215 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
217 ip->system_header_p ? " 3" : "");
218 print->lineno = line;
221 /* Write the contents of the token_buffer to the output stream, and
222 clear the token_buffer. Also handles generating line commands and
223 keeping track of file transitions. */
226 cpp_output_tokens (pfile, print)
232 if (CPP_WRITTEN (pfile) - print->written)
234 if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
236 safe_fwrite (pfile, pfile->token_buffer,
237 CPP_WRITTEN (pfile) - print->written, print->outf);
240 ip = cpp_file_buffer (pfile);
242 output_line_command (pfile, print, CPP_BUF_LINE (ip));
244 CPP_SET_WRITTEN (pfile, print->written);
247 /* Helper for cpp_output_list - increases the column number to match
248 what we expect it to be. */
251 bump_column (print, from, to)
253 unsigned int from, to;
255 unsigned int tabs, spcs;
256 unsigned int delta = to - from;
258 /* Only if FROM is 0, advance by tabs. */
260 tabs = delta / 8, spcs = delta % 8;
262 tabs = 0, spcs = delta;
264 while (tabs--) putc ('\t', print->outf);
265 while (spcs--) putc (' ', print->outf);
268 /* Write out the list L onto pfile->token_buffer. This function is
271 1) pfile->token_buffer is not going to continue to exist.
272 2) At the moment, tokens don't carry the information described
273 in cpplib.h; they are all strings.
274 3) The list has to be a complete line, and has to be written starting
275 at the beginning of a line. */
278 cpp_output_list (pfile, print, list)
281 const cpp_toklist *list;
284 unsigned int curcol = 1;
286 /* XXX Probably does not do what is intended. */
287 if (print->lineno != list->line)
288 output_line_command (pfile, print, list->line);
290 for (i = 0; i < list->tokens_used; i++)
292 if (list->tokens[i].type == CPP_VSPACE)
294 output_line_command (pfile, print, list->tokens[i].aux);
298 if (curcol < list->tokens[i].col)
300 /* Insert space to bring the column to what it should be. */
301 bump_column (print, curcol - 1, list->tokens[i].col);
302 curcol = list->tokens[i].col;
304 /* XXX We may have to insert space to prevent an accidental
306 safe_fwrite (pfile, list->namebuf + list->tokens[i].val.name.offset,
307 list->tokens[i].val.name.len, print->outf);
308 curcol += list->tokens[i].val.name.len;
312 /* Scan a string (which may have escape marks), perform macro expansion,
313 and write the result to the token_buffer. */
316 _cpp_expand_to_buffer (pfile, buf, length)
322 enum cpp_ttype token;
327 cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
331 /* Copy the buffer, because it might be in an unsafe place - for
332 example, a sequence on the token_buffer, where the pointers will
333 be invalidated if we enlarge the token_buffer. */
334 buf1 = alloca (length);
335 memcpy (buf1, buf, length);
337 /* Set up the input on the input stack. */
338 ip = cpp_push_buffer (pfile, buf1, length);
343 /* Scan the input, create the output. */
346 token = cpp_get_token (pfile);
347 if (token == CPP_EOF)
349 if (token == CPP_POP && CPP_BUFFER (pfile) == ip)
351 cpp_pop_buffer (pfile);
357 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output.
358 Then pop the buffer. */
361 cpp_scan_buffer_nooutput (pfile)
364 cpp_buffer *buffer = CPP_BUFFER (pfile);
365 enum cpp_ttype token;
366 unsigned int old_written = CPP_WRITTEN (pfile);
367 /* In no-output mode, we can ignore everything but directives. */
370 if (! pfile->only_seen_white)
371 _cpp_skip_rest_of_line (pfile);
372 token = cpp_get_token (pfile);
373 if (token == CPP_EOF)
375 if (token == CPP_POP && CPP_BUFFER (pfile) == buffer)
377 cpp_pop_buffer (pfile);
381 CPP_SET_WRITTEN (pfile, old_written);
384 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT.
385 Then pop the buffer. */
388 cpp_scan_buffer (pfile, print)
392 cpp_buffer *buffer = CPP_BUFFER (pfile);
393 enum cpp_ttype token;
397 token = cpp_get_token (pfile);
398 if ((token == CPP_POP && !CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
399 || token == CPP_EOF || token == CPP_VSPACE
400 /* XXX Temporary kluge - force flush after #include only */
401 || (token == CPP_DIRECTIVE
402 && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
404 cpp_output_tokens (pfile, print);
405 if (token == CPP_EOF)
407 if (token == CPP_POP && CPP_BUFFER (pfile) == buffer)
409 cpp_pop_buffer (pfile);
416 /* Return the topmost cpp_buffer that corresponds to a file (not a macro). */
419 cpp_file_buffer (pfile)
424 for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
425 if (ip->ihash != NULL)
430 /* Token-buffer helper functions. */
432 /* Expand a token list's string space. */
434 expand_name_space (list)
438 list->namebuf = (unsigned char *) xrealloc (list->namebuf,
442 /* Expand the number of tokens in a list. */
444 expand_token_space (list)
447 list->tokens_cap *= 2;
448 list->tokens = (cpp_token *)
449 xrealloc (list->tokens, list->tokens_cap * sizeof (cpp_token));
452 /* Initialise a token list. */
454 init_token_list (pfile, list, recycle)
459 /* Recycling a used list saves 2 free-malloc pairs. */
462 list->tokens_used = 0;
467 /* Initialise token space. */
468 list->tokens_cap = 256; /* 4K on Intel. */
469 list->tokens_used = 0;
470 list->tokens = (cpp_token *)
471 xmalloc (list->tokens_cap * sizeof (cpp_token));
473 /* Initialise name space. */
474 list->name_cap = 1024;
476 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
480 list->line = pfile->buffer->lineno;
481 list->dir_handler = 0;
485 /* Scan an entire line and create a token list for it. Does not
486 macro-expand or execute directives. */
489 _cpp_scan_line (pfile, list)
498 init_token_list (pfile, list, 1);
500 written = CPP_WRITTEN (pfile);
505 col = CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base;
506 type = _cpp_lex_token (pfile);
507 len = CPP_WRITTEN (pfile) - written;
508 CPP_SET_WRITTEN (pfile, written);
509 if (type == CPP_HSPACE)
511 if (CPP_PEDANTIC (pfile))
512 pedantic_whitespace (pfile, pfile->token_buffer + written, len);
516 else if (type == CPP_COMMENT)
517 /* Only happens when processing -traditional macro definitions.
518 Do not give this a token entry, but do not change space_before
522 if (list->tokens_used >= list->tokens_cap)
523 expand_token_space (list);
524 if (list->name_used + len >= list->name_cap)
525 expand_name_space (list);
527 if (type == CPP_MACRO)
531 list->tokens[i].type = type;
532 list->tokens[i].col = col;
533 list->tokens[i].flags = space_before ? HSPACE_BEFORE : 0;
535 if (type == CPP_VSPACE)
538 list->tokens[i].val.name.len = len;
539 list->tokens[i].val.name.offset = list->name_used;
540 memcpy (list->namebuf + list->name_used, CPP_PWRITTEN (pfile), len);
541 list->name_used += len;
545 list->tokens[i].aux = CPP_BUFFER (pfile)->lineno + 1;
547 /* XXX Temporary kluge: put back the newline. */
552 /* Skip a C-style block comment. We know it's a comment, and point is
553 at the second character of the starter. */
555 skip_block_comment (pfile)
558 unsigned int line, col;
559 const U_CHAR *limit, *cur;
562 line = CPP_BUF_LINE (CPP_BUFFER (pfile));
563 col = CPP_BUF_COL (CPP_BUFFER (pfile));
564 limit = CPP_BUFFER (pfile)->rlimit;
565 cur = CPP_BUFFER (pfile)->cur;
570 if (c == '\n' || c == '\r')
572 /* \r cannot be a macro escape marker here. */
573 if (!ACTIVE_MARK_P (pfile))
574 CPP_BUMP_LINE_CUR (pfile, cur);
578 /* Check for teminator. */
579 if (cur < limit && *cur == '/')
582 /* Warn about comment starter embedded in comment. */
583 if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
584 cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
585 cur - CPP_BUFFER (pfile)->line_base,
586 "'/*' within comment");
590 cpp_error_with_line (pfile, line, col, "unterminated comment");
593 CPP_BUFFER (pfile)->cur = cur + 1;
596 /* Skip a C++/Chill line comment. We know it's a comment, and point
597 is at the second character of the initiator. */
599 skip_line_comment (pfile)
607 /* We don't have to worry about EOF in here. */
610 /* Don't consider final '\n' to be part of comment. */
616 /* \r cannot be a macro escape marker here. */
617 if (!ACTIVE_MARK_P (pfile))
618 CPP_BUMP_LINE (pfile);
619 if (CPP_OPTION (pfile, warn_comments))
620 cpp_warning (pfile, "backslash-newline within line comment");
625 /* Skip a comment - C, C++, or Chill style. M is the first character
626 of the comment marker. If this really is a comment, skip to its
627 end and return ' '. If this is not a comment, return M (which will
631 skip_comment (pfile, m)
635 if (m == '/' && PEEKC() == '*')
637 skip_block_comment (pfile);
640 else if (m == '/' && PEEKC() == '/')
642 if (CPP_BUFFER (pfile)->system_header_p)
644 /* We silently allow C++ comments in system headers, irrespective
645 of conformance mode, because lots of busted systems do that
646 and trying to clean it up in fixincludes is a nightmare. */
647 skip_line_comment (pfile);
650 else if (CPP_OPTION (pfile, cplusplus_comments))
652 if (! CPP_BUFFER (pfile)->warned_cplusplus_comments)
654 if (CPP_WTRADITIONAL (pfile))
656 "C++ style comments are not allowed in traditional C");
657 else if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile))
659 "C++ style comments are not allowed in ISO C89");
660 if (CPP_WTRADITIONAL (pfile)
661 || (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)))
663 "(this will be reported only once per input file)");
664 CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
666 skip_line_comment (pfile);
672 else if (m == '-' && PEEKC() == '-'
673 && CPP_OPTION (pfile, chill))
675 skip_line_comment (pfile);
682 /* Identical to skip_comment except that it copies the comment into the
683 token_buffer. This is used if !discard_comments. */
685 copy_comment (pfile, m)
689 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
692 if (skip_comment (pfile, m) == m)
695 limit = CPP_BUFFER (pfile)->cur;
696 CPP_RESERVE (pfile, limit - start + 2);
697 CPP_PUTC_Q (pfile, m);
698 for (; start <= limit; start++)
700 CPP_PUTC_Q (pfile, *start);
706 null_warning (pfile, count)
711 cpp_warning (pfile, "embedded null character ignored");
713 cpp_warning (pfile, "embedded null characters ignored");
716 /* Skip whitespace \-newline and comments. Does not macro-expand. */
719 _cpp_skip_hspace (pfile)
722 unsigned int null_count = 0;
730 else if (is_hspace(c))
732 if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
733 cpp_pedwarn (pfile, "%s in preprocessing directive",
734 c == '\f' ? "formfeed" : "vertical tab");
740 /* \r is a backslash-newline marker if !has_escapes, and
741 a deletable-whitespace or no-reexpansion marker otherwise. */
742 if (CPP_BUFFER (pfile)->has_escapes)
750 CPP_BUMP_LINE (pfile);
752 else if (c == '/' || c == '-')
754 c = skip_comment (pfile, c);
764 null_warning (pfile, null_count);
767 /* Read and discard the rest of the current line. */
770 _cpp_skip_rest_of_line (pfile)
784 if (! CPP_BUFFER (pfile)->has_escapes)
785 CPP_BUMP_LINE (pfile);
790 skip_string (pfile, c);
795 skip_comment (pfile, c);
800 if (CPP_PEDANTIC (pfile))
801 cpp_pedwarn (pfile, "%s in preprocessing directive",
802 c == '\f' ? "formfeed" : "vertical tab");
809 /* Parse an identifier starting with C. */
812 _cpp_parse_name (pfile, c)
824 if (c == '$' && CPP_PEDANTIC (pfile))
825 cpp_pedwarn (pfile, "`$' in identifier");
827 CPP_RESERVE(pfile, 2); /* One more for final NUL. */
828 CPP_PUTC_Q (pfile, c);
836 /* Parse and skip over a string starting with C. A single quoted
837 string is treated like a double -- some programs (e.g., troff) are
838 perverse this way. (However, a single quoted string is not allowed
839 to extend over multiple lines.) */
841 skip_string (pfile, c)
845 unsigned int start_line, start_column;
846 unsigned int null_count = 0;
848 start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
849 start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
856 cpp_error_with_line (pfile, start_line, start_column,
857 "unterminated string or character constant");
858 if (pfile->multiline_string_line != start_line
859 && pfile->multiline_string_line != 0)
860 cpp_error_with_line (pfile,
861 pfile->multiline_string_line, -1,
862 "possible real start of unterminated constant");
863 pfile->multiline_string_line = 0;
871 CPP_BUMP_LINE (pfile);
872 /* In Fortran and assembly language, silently terminate
873 strings of either variety at end of line. This is a
874 kludge around not knowing where comments are in these
876 if (CPP_OPTION (pfile, lang_fortran)
877 || CPP_OPTION (pfile, lang_asm))
882 /* Character constants may not extend over multiple lines.
883 In Standard C, neither may strings. We accept multiline
884 strings as an extension. */
887 cpp_error_with_line (pfile, start_line, start_column,
888 "unterminated character constant");
892 if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
893 cpp_pedwarn_with_line (pfile, start_line, start_column,
894 "string constant runs past end of line");
895 if (pfile->multiline_string_line == 0)
896 pfile->multiline_string_line = start_line;
900 if (CPP_BUFFER (pfile)->has_escapes)
902 cpp_ice (pfile, "\\r escape inside string constant");
906 /* Backslash newline is replaced by nothing at all. */
907 CPP_BUMP_LINE (pfile);
924 cpp_warning (pfile, "null character in string or character constant");
925 else if (null_count > 1)
926 cpp_warning (pfile, "null characters in string or character constant");
929 /* Parse a string and copy it to the output. */
932 parse_string (pfile, c)
936 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
939 skip_string (pfile, c);
941 limit = CPP_BUFFER (pfile)->cur;
942 CPP_RESERVE (pfile, limit - start + 2);
943 CPP_PUTC_Q (pfile, c);
944 for (; start < limit; start++)
946 CPP_PUTC_Q (pfile, *start);
949 /* Read an assertion into the token buffer, converting to
950 canonical form: `#predicate(a n swe r)' The next non-whitespace
951 character to read should be the first letter of the predicate.
952 Returns 0 for syntax error, 1 for bare predicate, 2 for predicate
953 with answer (see callers for why). In case of 0, an error has been
956 _cpp_parse_assertion (pfile)
960 _cpp_skip_hspace (pfile);
964 cpp_error (pfile, "assertion without predicate");
967 else if (! is_idstart(c))
969 cpp_error (pfile, "assertion predicate is not an identifier");
972 CPP_PUTC(pfile, '#');
974 _cpp_parse_name (pfile, c);
979 if (is_hspace(c) || c == '\r')
980 _cpp_skip_hspace (pfile);
986 CPP_PUTC(pfile, '(');
989 while ((c = GETC()) != ')')
995 CPP_PUTC(pfile, ' ');
999 else if (c == '\n' || c == EOF)
1001 if (c == '\n') FORWARD(-1);
1002 cpp_error (pfile, "un-terminated assertion answer");
1006 /* \r cannot be a macro escape here. */
1007 CPP_BUMP_LINE (pfile);
1010 CPP_PUTC (pfile, c);
1015 if (pfile->limit[-1] == ' ')
1016 pfile->limit[-1] = ')';
1017 else if (pfile->limit[-1] == '(')
1019 cpp_error (pfile, "empty token sequence in assertion");
1023 CPP_PUTC (pfile, ')');
1028 /* Get the next token, and add it to the text in pfile->token_buffer.
1029 Return the kind of token we got. */
1032 _cpp_lex_token (pfile)
1036 enum cpp_ttype token;
1038 if (CPP_BUFFER (pfile) == NULL)
1049 if (PEEKC () == '=')
1053 if (CPP_OPTION (pfile, discard_comments))
1054 c = skip_comment (pfile, c);
1056 c = copy_comment (pfile, c);
1060 /* Comments are equivalent to spaces.
1061 For -traditional, a comment is equivalent to nothing. */
1062 if (!CPP_OPTION (pfile, discard_comments))
1064 else if (CPP_TRADITIONAL (pfile))
1066 if (pfile->parsing_define_directive)
1072 CPP_PUTC (pfile, c);
1077 CPP_PUTC (pfile, c);
1080 if (pfile->parsing_if_directive)
1082 CPP_ADJUST_WRITTEN (pfile, -1);
1083 if (_cpp_parse_assertion (pfile))
1084 return CPP_ASSERTION;
1088 if (pfile->parsing_define_directive)
1094 CPP_PUTC (pfile, c2);
1096 else if (c2 == '%' && PEEKN (1) == ':')
1098 /* Digraph: "%:" == "#". */
1100 CPP_RESERVE (pfile, 2);
1101 CPP_PUTC_Q (pfile, c2);
1102 CPP_PUTC_Q (pfile, GETC ());
1110 if (!pfile->only_seen_white)
1113 /* Remove the "#" or "%:" from the token buffer. */
1114 CPP_ADJUST_WRITTEN (pfile, (c == '#' ? -1 : -2));
1115 return CPP_DIRECTIVE;
1119 parse_string (pfile, c);
1120 return c == '\'' ? CPP_CHAR : CPP_STRING;
1123 if (!CPP_OPTION (pfile, dollars_in_ident))
1129 /* Digraph: ":>" == "]". */
1131 || (c2 == ':' && CPP_OPTION (pfile, cplusplus)))
1139 if (c2 == c || c2 == '=')
1144 /* Digraphs: "%:" == "#", "%>" == "}". */
1149 CPP_RESERVE (pfile, 2);
1150 CPP_PUTC_Q (pfile, c);
1151 CPP_PUTC_Q (pfile, c2);
1157 CPP_RESERVE (pfile, 2);
1158 CPP_PUTC_Q (pfile, c);
1159 CPP_PUTC_Q (pfile, c2);
1160 return CPP_OPEN_BRACE;
1162 /* else fall through */
1168 if (PEEKC () == '=')
1176 if (CPP_OPTION (pfile, chill))
1177 goto comment; /* Chill style comment */
1185 if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
1187 /* In C++, there's a ->* operator. */
1189 CPP_RESERVE (pfile, 4);
1190 CPP_PUTC_Q (pfile, c);
1191 CPP_PUTC_Q (pfile, GETC ());
1192 CPP_PUTC_Q (pfile, GETC ());
1200 if (pfile->parsing_include_directive)
1204 CPP_PUTC (pfile, c);
1208 if (c == '\n' || c == EOF)
1211 "missing '>' in `#include <FILENAME>'");
1216 if (!CPP_BUFFER (pfile)->has_escapes)
1218 /* Backslash newline is replaced by nothing. */
1219 CPP_ADJUST_WRITTEN (pfile, -1);
1220 CPP_BUMP_LINE (pfile);
1224 /* We might conceivably get \r- or \r<space> in
1225 here. Just delete 'em. */
1227 if (d != '-' && d != ' ')
1228 cpp_ice (pfile, "unrecognized escape \\r%c", d);
1229 CPP_ADJUST_WRITTEN (pfile, -1);
1235 /* Digraphs: "<%" == "{", "<:" == "[". */
1240 CPP_RESERVE (pfile, 2);
1241 CPP_PUTC_Q (pfile, c);
1242 CPP_PUTC_Q (pfile, c2);
1243 return CPP_CLOSE_BRACE;
1247 /* else fall through */
1252 /* GNU C++ supports MIN and MAX operators <? and >?. */
1253 if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
1256 CPP_RESERVE (pfile, 3);
1257 CPP_PUTC_Q (pfile, c);
1258 CPP_PUTC_Q (pfile, c2);
1259 if (PEEKC () == '=')
1260 CPP_PUTC_Q (pfile, GETC ());
1267 CPP_PUTC (pfile, c);
1272 /* In C++ there's a .* operator. */
1273 if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
1276 if (c2 == '.' && PEEKN(1) == '.')
1278 CPP_RESERVE (pfile, 3);
1279 CPP_PUTC_Q (pfile, '.');
1280 CPP_PUTC_Q (pfile, '.');
1281 CPP_PUTC_Q (pfile, '.');
1283 return CPP_ELLIPSIS;
1288 CPP_RESERVE (pfile, 2);
1289 CPP_PUTC_Q (pfile, c);
1290 CPP_PUTC_Q (pfile, GETC ());
1295 if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
1297 CPP_PUTC (pfile, c);
1299 parse_string (pfile, c);
1300 return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
1304 case '0': case '1': case '2': case '3': case '4':
1305 case '5': case '6': case '7': case '8': case '9':
1310 CPP_RESERVE (pfile, 2);
1311 CPP_PUTC_Q (pfile, c);
1315 if (!is_numchar(c) && c != '.'
1316 && ((c2 != 'e' && c2 != 'E'
1317 && ((c2 != 'p' && c2 != 'P')
1318 || CPP_OPTION (pfile, c89)))
1319 || (c != '+' && c != '-')))
1325 case 'b': case 'c': case 'd': case 'h': case 'o':
1326 case 'B': case 'C': case 'D': case 'H': case 'O':
1327 if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
1329 CPP_RESERVE (pfile, 2);
1330 CPP_PUTC_Q (pfile, c);
1331 CPP_PUTC_Q (pfile, '\'');
1337 goto chill_number_eof;
1340 CPP_PUTC (pfile, c);
1344 CPP_RESERVE (pfile, 2);
1345 CPP_PUTC_Q (pfile, c);
1358 case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1359 case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1360 case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1361 case 'x': case 'y': case 'z':
1362 case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1363 case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1364 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1367 _cpp_parse_name (pfile, c);
1370 case ' ': case '\t': case '\v': case '\f': case '\0':
1379 CPP_PUTC (pfile, c);
1381 if (c == EOF || !is_hspace(c))
1386 null_warning (pfile, null_count);
1391 if (CPP_BUFFER (pfile)->has_escapes)
1396 if (pfile->output_escapes)
1397 CPP_PUTS (pfile, "\r-", 2);
1398 _cpp_parse_name (pfile, GETC ());
1403 /* "\r " means a space, but only if necessary to prevent
1404 accidental token concatenation. */
1405 CPP_RESERVE (pfile, 2);
1406 if (pfile->output_escapes)
1407 CPP_PUTC_Q (pfile, '\r');
1408 CPP_PUTC_Q (pfile, c);
1413 cpp_ice (pfile, "unrecognized escape \\r%c", c);
1419 /* Backslash newline is ignored. */
1420 if (!ACTIVE_MARK_P (pfile))
1421 CPP_BUMP_LINE (pfile);
1426 CPP_PUTC (pfile, c);
1429 case '(': token = CPP_OPEN_PAREN; goto char1;
1430 case ')': token = CPP_CLOSE_PAREN; goto char1;
1431 case '{': token = CPP_OPEN_BRACE; goto char1;
1432 case '}': token = CPP_CLOSE_BRACE; goto char1;
1433 case ',': token = CPP_COMMA; goto char1;
1434 case ';': token = CPP_SEMICOLON; goto char1;
1440 CPP_PUTC (pfile, c);
1445 /* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1446 Caller is expected to have checked no_macro_expand. */
1448 maybe_macroexpand (pfile, written)
1452 U_CHAR *macro = pfile->token_buffer + written;
1453 size_t len = CPP_WRITTEN (pfile) - written;
1454 HASHNODE *hp = _cpp_lookup (pfile, macro, len);
1458 if (hp->type == T_DISABLED)
1460 if (pfile->output_escapes)
1462 /* Insert a no-reexpand marker before IDENT. */
1463 CPP_RESERVE (pfile, 2);
1464 CPP_ADJUST_WRITTEN (pfile, 2);
1465 macro = pfile->token_buffer + written;
1467 memmove (macro + 2, macro, len);
1473 if (hp->type == T_EMPTY)
1475 /* Special case optimization: macro expands to nothing. */
1476 CPP_SET_WRITTEN (pfile, written);
1477 CPP_PUTC_Q (pfile, ' ');
1481 /* If macro wants an arglist, verify that a '(' follows. */
1482 if (hp->type == T_MACRO && hp->value.defn->nargs >= 0)
1484 int macbuf_whitespace = 0;
1487 while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1489 const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1492 _cpp_skip_hspace (pfile);
1499 if (point != CPP_BUFFER (pfile)->cur)
1500 macbuf_whitespace = 1;
1504 goto not_macro_call;
1505 cpp_pop_buffer (pfile);
1508 CPP_SET_MARK (pfile);
1511 _cpp_skip_hspace (pfile);
1518 CPP_GOTO_MARK (pfile);
1523 if (macbuf_whitespace)
1524 CPP_PUTC (pfile, ' ');
1530 /* This is now known to be a macro call.
1531 Expand the macro, reading arguments as needed,
1532 and push the expansion on the input stack. */
1533 _cpp_macroexpand (pfile, hp);
1534 CPP_SET_WRITTEN (pfile, written);
1538 /* Complain about \v or \f in a preprocessing directive (constraint
1539 violation, C99 6.10 para 5). Caller has checked CPP_PEDANTIC. */
1541 pedantic_whitespace (pfile, p, len)
1549 cpp_pedwarn (pfile, "vertical tab in preprocessing directive");
1550 else if (*p == '\f')
1551 cpp_pedwarn (pfile, "form feed in preprocessing directive");
1559 cpp_get_token (pfile)
1562 enum cpp_ttype token;
1563 long written = CPP_WRITTEN (pfile);
1566 token = _cpp_lex_token (pfile);
1571 pfile->potential_control_macro = 0;
1572 pfile->only_seen_white = 0;
1576 if (pfile->only_seen_white == 0)
1577 pfile->only_seen_white = 1;
1578 CPP_BUMP_LINE (pfile);
1586 pfile->potential_control_macro = 0;
1587 if (_cpp_handle_directive (pfile))
1588 return CPP_DIRECTIVE;
1589 pfile->only_seen_white = 0;
1590 CPP_PUTC (pfile, '#');
1594 pfile->potential_control_macro = 0;
1595 pfile->only_seen_white = 0;
1596 if (! pfile->no_macro_expand
1597 && maybe_macroexpand (pfile, written))
1602 if (CPP_BUFFER (pfile) == NULL)
1604 if (CPP_BUFFER (pfile)->manual_pop)
1605 /* If we've been reading from redirected input, the
1606 frontend will pop the buffer. */
1609 if (CPP_BUFFER (pfile)->seen_eof)
1611 cpp_pop_buffer (pfile);
1616 _cpp_handle_eof (pfile);
1622 /* Like cpp_get_token, but skip spaces and comments. */
1625 cpp_get_non_space_token (pfile)
1628 int old_written = CPP_WRITTEN (pfile);
1631 enum cpp_ttype token = cpp_get_token (pfile);
1632 if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
1634 CPP_SET_WRITTEN (pfile, old_written);
1638 /* Like cpp_get_token, except that it does not execute directives,
1639 does not consume vertical space, discards horizontal space, and
1640 automatically pops off macro buffers. */
1642 _cpp_get_directive_token (pfile)
1646 enum cpp_ttype token;
1649 old_written = CPP_WRITTEN (pfile);
1650 token = _cpp_lex_token (pfile);
1657 /* Put it back and return VSPACE. */
1659 CPP_ADJUST_WRITTEN (pfile, -1);
1663 if (CPP_PEDANTIC (pfile))
1664 pedantic_whitespace (pfile, pfile->token_buffer + old_written,
1665 CPP_WRITTEN (pfile) - old_written);
1666 CPP_SET_WRITTEN (pfile, old_written);
1671 /* Don't execute the directive, but don't smash it to OTHER either. */
1672 CPP_PUTC (pfile, '#');
1673 return CPP_DIRECTIVE;
1676 if (! pfile->no_macro_expand
1677 && maybe_macroexpand (pfile, old_written))
1682 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1684 cpp_pop_buffer (pfile);
1688 /* This can happen for files that don't end with a newline,
1689 and for cpp_define and friends. Pretend they do, so
1690 callers don't have to deal. A warning will be issued by
1691 someone else, if necessary. */
1696 /* Determine the current line and column. Used only by read_and_prescan. */
1698 find_position (start, limit, linep)
1701 unsigned long *linep;
1703 unsigned long line = *linep;
1704 U_CHAR *lbase = start;
1705 while (start < limit)
1707 U_CHAR ch = *start++;
1708 if (ch == '\n' || ch == '\r')
1718 /* The following table is used by _cpp_read_and_prescan. If we have
1719 designated initializers, it can be constant data; otherwise, it is
1720 set up at runtime by _cpp_init_input_buffer. */
1723 #define UCHAR_MAX 255 /* assume 8-bit bytes */
1726 #if (GCC_VERSION >= 2007) || (__STDC_VERSION__ >= 199901L)
1727 #define init_chartab() /* nothing */
1728 #define CHARTAB static const unsigned char chartab[UCHAR_MAX + 1] = {
1730 #define s(p, v) [p] = v,
1732 #define CHARTAB static unsigned char chartab[UCHAR_MAX + 1] = { 0 }; \
1733 static void init_chartab PARAMS ((void)) { \
1734 unsigned char *x = chartab;
1736 #define s(p, v) x[p] = v;
1739 /* Table of characters that can't be handled in the inner loop.
1740 Also contains the mapping between trigraph third characters and their
1742 #define SPECCASE_CR 1
1743 #define SPECCASE_BACKSLASH 2
1744 #define SPECCASE_QUESTION 3
1747 s('\r', SPECCASE_CR)
1748 s('\\', SPECCASE_BACKSLASH)
1749 s('?', SPECCASE_QUESTION)
1751 s('=', '#') s(')', ']') s('!', '|')
1752 s('(', '[') s('\'', '^') s('>', '}')
1753 s('/', '\\') s('<', '{') s('-', '~')
1760 #define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1761 #define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1763 /* Read the entire contents of file DESC into buffer BUF. LEN is how
1764 much memory to allocate initially; more will be allocated if
1765 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1766 canonical form (\n). If enabled, convert and/or warn about
1767 trigraphs. Convert backslash-newline to a one-character escape
1768 (\r) and remove it from "embarrassing" places (i.e. the middle of a
1769 token). If there is no newline at the end of the file, add one and
1770 warn. Returns -1 on failure, or the actual length of the data to
1773 This function does a lot of work, and can be a serious performance
1774 bottleneck. It has been tuned heavily; make sure you understand it
1775 before hacking. The common case - no trigraphs, Unix style line
1776 breaks, backslash-newline set off by whitespace, newline at EOF -
1777 has been optimized at the expense of the others. The performance
1778 penalty for DOS style line breaks (\r\n) is about 15%.
1780 Warnings lose particularly heavily since we have to determine the
1781 line number, which involves scanning from the beginning of the file
1782 or from the last warning. The penalty for the absence of a newline
1783 at the end of reload1.c is about 60%. (reload1.c is 329k.)
1785 If your file has more than one kind of end-of-line marker, you
1786 will get messed-up line numbering.
1788 So that the cases of the switch statement do not have to concern
1789 themselves with the complications of reading beyond the end of the
1790 buffer, the buffer is guaranteed to have at least 3 characters in
1791 it (or however many are left in the file, if less) on entry to the
1792 switch. This is enough to handle trigraphs and the "\\\n\r" and
1795 The end of the buffer is marked by a '\\', which, being a special
1796 character, guarantees we will exit the fast-scan loops and perform
1800 _cpp_read_and_prescan (pfile, fp, desc, len)
1806 U_CHAR *buf = (U_CHAR *) xmalloc (len);
1807 U_CHAR *ip, *op, *line_base;
1810 unsigned int deferred_newlines;
1815 deferred_newlines = 0;
1819 ibase = pfile->input_buffer + 3;
1821 ip[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */
1825 U_CHAR *near_buff_end;
1827 count = read (desc, ibase, pfile->input_buffer_len);
1831 ibase[count] = '\\'; /* Marks end of buffer */
1834 near_buff_end = pfile->input_buffer + count;
1839 size_t delta_line_base;
1843 This could happen if the file is larger than half the
1844 maximum address space of the machine. */
1847 delta_op = op - buf;
1848 delta_line_base = line_base - buf;
1849 buf = (U_CHAR *) xrealloc (buf, len);
1850 op = buf + delta_op;
1851 line_base = buf + delta_line_base;
1858 /* Allow normal processing of the (at most 2) remaining
1859 characters. The end-of-buffer marker is still present
1860 and prevents false matches within the switch. */
1861 near_buff_end = ibase - 1;
1868 /* Deal with \-newline, potentially in the middle of a token. */
1869 if (deferred_newlines)
1871 if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
1873 /* Previous was not white space. Skip to white
1874 space, if we can, before outputting the \r's */
1876 while (ip[span] != ' '
1879 && NORMAL(ip[span]))
1881 memcpy (op, ip, span);
1884 if (! NORMAL(ip[0]))
1887 while (deferred_newlines)
1888 deferred_newlines--, *op++ = '\r';
1891 /* Copy as much as we can without special treatment. */
1893 while (NORMAL (ip[span])) span++;
1894 memcpy (op, ip, span);
1899 if (ip > near_buff_end) /* Do we have enough chars? */
1901 switch (chartab[*ip++])
1903 case SPECCASE_CR: /* \r */
1912 case SPECCASE_BACKSLASH: /* \ */
1915 deferred_newlines++;
1917 if (*ip == '\r') ip++;
1919 else if (*ip == '\r')
1921 deferred_newlines++;
1923 if (*ip == '\n') ip++;
1929 case SPECCASE_QUESTION: /* ? */
1933 *op++ = '?'; /* Normal non-trigraph case */
1942 if (CPP_OPTION (pfile, warn_trigraphs))
1945 line_base = find_position (line_base, op, &line);
1946 col = op - line_base + 1;
1947 if (CPP_OPTION (pfile, trigraphs))
1948 cpp_warning_with_line (pfile, line, col,
1949 "trigraph ??%c converted to %c", d, t);
1951 cpp_warning_with_line (pfile, line, col,
1952 "trigraph ??%c ignored", d);
1956 if (CPP_OPTION (pfile, trigraphs))
1958 op[-1] = t; /* Overwrite '?' */
1963 goto do_speccase; /* May need buffer refill */
1975 /* Copy previous char plus unprocessed (at most 2) chars
1976 to beginning of buffer, refill it with another
1977 read(), and continue processing */
1978 memmove (ip - count - 1, ip - 1, 4 - (ip - near_buff_end));
1988 line_base = find_position (line_base, op, &line);
1989 col = op - line_base + 1;
1990 cpp_warning_with_line (pfile, line, col, "no newline at end of file");
1991 if (offset + 1 > len)
1994 if (offset + 1 > len)
1996 buf = (U_CHAR *) xrealloc (buf, len);
2002 fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
2006 cpp_notice (pfile, "%s is too large (>%lu bytes)", fp->ihash->name,
2007 (unsigned long)offset);
2012 cpp_error_from_errno (pfile, fp->ihash->name);
2017 /* Allocate pfile->input_buffer, and initialize chartab[]
2018 if it hasn't happened already. */
2021 _cpp_init_input_buffer (pfile)
2027 init_token_list (pfile, &pfile->directbuf, 0);
2029 /* Determine the appropriate size for the input buffer. Normal C
2030 source files are smaller than eight K. */
2031 /* 8Kbytes of buffer proper, 1 to detect running off the end without
2032 address arithmetic all the time, and 3 for pushback during buffer
2033 refill, in case there's a potential trigraph or end-of-line
2034 digraph at the end of a block. */
2036 tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
2037 pfile->input_buffer = tmp;
2038 pfile->input_buffer_len = 8192;