1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
28 #define PEEKN(N) (CPP_BUFFER (pfile)->rlimit - CPP_BUFFER (pfile)->cur >= (N) \
29 ? CPP_BUFFER (pfile)->cur[N] : EOF)
30 #define FORWARD(N) CPP_FORWARD (CPP_BUFFER (pfile), (N))
31 #define GETC() CPP_BUF_GET (CPP_BUFFER (pfile))
32 #define PEEKC() CPP_BUF_PEEK (CPP_BUFFER (pfile))
34 static void skip_block_comment PARAMS ((cpp_reader *));
35 static void skip_line_comment PARAMS ((cpp_reader *));
36 static int maybe_macroexpand PARAMS ((cpp_reader *, long));
37 static int skip_comment PARAMS ((cpp_reader *, int));
38 static int copy_comment PARAMS ((cpp_reader *, int));
39 static void skip_string PARAMS ((cpp_reader *, int));
40 static void parse_string PARAMS ((cpp_reader *, int));
41 static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
42 static int null_cleanup PARAMS ((cpp_buffer *, cpp_reader *));
44 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
47 _cpp_grow_token_buffer (pfile, n)
51 long old_written = CPP_WRITTEN (pfile);
52 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
53 pfile->token_buffer = (U_CHAR *)
54 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
55 CPP_SET_WRITTEN (pfile, old_written);
59 null_cleanup (pbuf, pfile)
60 cpp_buffer *pbuf ATTRIBUTE_UNUSED;
61 cpp_reader *pfile ATTRIBUTE_UNUSED;
66 /* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
67 If BUFFER != NULL, then use the LENGTH characters in BUFFER
68 as the new input buffer.
69 Return the new buffer, or NULL on failure. */
72 cpp_push_buffer (pfile, buffer, length)
77 cpp_buffer *buf = CPP_BUFFER (pfile);
79 if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
81 cpp_fatal (pfile, "macro or `#include' recursion too deep");
85 new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
87 new->if_stack = pfile->if_stack;
88 new->cleanup = null_cleanup;
89 new->buf = new->cur = buffer;
90 new->alimit = new->rlimit = buffer + length;
93 new->line_base = NULL;
95 CPP_BUFFER (pfile) = new;
100 cpp_pop_buffer (pfile)
103 cpp_buffer *buf = CPP_BUFFER (pfile);
104 if (ACTIVE_MARK_P (pfile))
105 cpp_ice (pfile, "mark active in cpp_pop_buffer");
106 (*buf->cleanup) (buf, pfile);
107 CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
109 pfile->buffer_stack_depth--;
110 return CPP_BUFFER (pfile);
113 /* Scan until CPP_BUFFER (PFILE) is exhausted into PFILE->token_buffer.
114 Pop the buffer when done. */
117 cpp_scan_buffer (pfile)
120 cpp_buffer *buffer = CPP_BUFFER (pfile);
121 enum cpp_token token;
122 if (CPP_OPTION (pfile, no_output))
124 long old_written = CPP_WRITTEN (pfile);
125 /* In no-output mode, we can ignore everything but directives. */
128 if (! pfile->only_seen_white)
129 _cpp_skip_rest_of_line (pfile);
130 token = cpp_get_token (pfile);
131 if (token == CPP_EOF) /* Should not happen ... */
133 if (token == CPP_POP && CPP_BUFFER (pfile) == buffer)
135 if (CPP_PREV_BUFFER (CPP_BUFFER (pfile)) != NULL)
136 cpp_pop_buffer (pfile);
140 CPP_SET_WRITTEN (pfile, old_written);
146 token = cpp_get_token (pfile);
147 if (token == CPP_EOF) /* Should not happen ... */
149 if (token == CPP_POP && CPP_BUFFER (pfile) == buffer)
151 if (CPP_PREV_BUFFER (CPP_BUFFER (pfile)) != NULL)
152 cpp_pop_buffer (pfile);
160 * Rescan a string (which may have escape marks) into pfile's buffer.
161 * Place the result in pfile->token_buffer.
163 * The input is copied before it is scanned, so it is safe to pass
164 * it something from the token_buffer that will get overwritten
165 * (because it follows CPP_WRITTEN). This is used by do_include.
169 cpp_expand_to_buffer (pfile, buf, length)
174 register cpp_buffer *ip;
180 cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
184 /* Set up the input on the input stack. */
186 buf1 = (U_CHAR *) alloca (length + 1);
187 memcpy (buf1, buf, length);
190 ip = cpp_push_buffer (pfile, buf1, length);
195 /* Scan the input, create the output. */
196 save_no_output = CPP_OPTION (pfile, no_output);
197 CPP_OPTION (pfile, no_output) = 0;
198 CPP_OPTION (pfile, no_line_commands)++;
199 cpp_scan_buffer (pfile);
200 CPP_OPTION (pfile, no_line_commands)--;
201 CPP_OPTION (pfile, no_output) = save_no_output;
203 CPP_NUL_TERMINATE (pfile);
207 cpp_buf_line_and_col (pbuf, linep, colp)
208 register cpp_buffer *pbuf;
213 *linep = pbuf->lineno;
215 *colp = pbuf->cur - pbuf->line_base;
225 /* Return the topmost cpp_buffer that corresponds to a file (not a macro). */
228 cpp_file_buffer (pfile)
233 for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
234 if (ip->ihash != NULL)
239 /* Skip a C-style block comment. We know it's a comment, and point is
240 at the second character of the starter. */
242 skip_block_comment (pfile)
249 cpp_buf_line_and_col (CPP_BUFFER (pfile), &line, &col);
255 cpp_error_with_line (pfile, line, col, "unterminated comment");
258 else if (c == '\n' || c == '\r')
260 /* \r cannot be a macro escape marker here. */
261 if (!ACTIVE_MARK_P (pfile))
262 CPP_BUMP_LINE (pfile);
264 else if (c == '/' && prev_c == '*')
266 else if (c == '*' && prev_c == '/'
267 && CPP_OPTION (pfile, warn_comments))
268 cpp_warning (pfile, "`/*' within comment");
274 /* Skip a C++/Chill line comment. We know it's a comment, and point
275 is at the second character of the initiator. */
277 skip_line_comment (pfile)
285 /* We don't have to worry about EOF in here. */
288 /* Don't consider final '\n' to be part of comment. */
294 /* \r cannot be a macro escape marker here. */
295 if (!ACTIVE_MARK_P (pfile))
296 CPP_BUMP_LINE (pfile);
297 if (CPP_OPTION (pfile, warn_comments))
298 cpp_warning (pfile, "backslash-newline within line comment");
303 /* Skip a comment - C, C++, or Chill style. M is the first character
304 of the comment marker. If this really is a comment, skip to its
305 end and return ' '. If this is not a comment, return M (which will
309 skip_comment (pfile, m)
313 if (m == '/' && PEEKC() == '*')
315 skip_block_comment (pfile);
318 else if (m == '/' && PEEKC() == '/')
320 if (CPP_BUFFER (pfile)->system_header_p)
322 /* We silently allow C++ comments in system headers, irrespective
323 of conformance mode, because lots of busted systems do that
324 and trying to clean it up in fixincludes is a nightmare. */
325 skip_line_comment (pfile);
328 else if (CPP_OPTION (pfile, cplusplus_comments))
330 if (CPP_OPTION (pfile, c89)
331 && CPP_PEDANTIC (pfile)
332 && ! CPP_BUFFER (pfile)->warned_cplusplus_comments)
335 "C++ style comments are not allowed in ISO C89");
337 "(this will be reported only once per input file)");
338 CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
340 skip_line_comment (pfile);
346 else if (m == '-' && PEEKC() == '-'
347 && CPP_OPTION (pfile, chill))
349 skip_line_comment (pfile);
356 /* Identical to skip_comment except that it copies the comment into the
357 token_buffer. This is used if !discard_comments. */
359 copy_comment (pfile, m)
363 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
366 if (skip_comment (pfile, m) == m)
369 limit = CPP_BUFFER (pfile)->cur;
370 CPP_RESERVE (pfile, limit - start + 2);
371 CPP_PUTC_Q (pfile, m);
372 for (; start <= limit; start++)
374 CPP_PUTC_Q (pfile, *start);
379 /* Skip whitespace \-newline and comments. Does not macro-expand. */
382 _cpp_skip_hspace (pfile)
391 else if (is_hspace(c))
393 if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
394 cpp_pedwarn (pfile, "%s in preprocessing directive",
395 c == '\f' ? "formfeed" : "vertical tab");
399 /* \r is a backslash-newline marker if !has_escapes, and
400 a deletable-whitespace or no-reexpansion marker otherwise. */
401 if (CPP_BUFFER (pfile)->has_escapes)
409 CPP_BUMP_LINE (pfile);
411 else if (c == '/' || c == '-')
413 c = skip_comment (pfile, c);
423 /* Read and discard the rest of the current line. */
426 _cpp_skip_rest_of_line (pfile)
440 if (! CPP_BUFFER (pfile)->has_escapes)
441 CPP_BUMP_LINE (pfile);
446 skip_string (pfile, c);
451 skip_comment (pfile, c);
456 if (CPP_PEDANTIC (pfile))
457 cpp_pedwarn (pfile, "%s in preprocessing directive",
458 c == '\f' ? "formfeed" : "vertical tab");
465 /* Parse an identifier starting with C. */
468 _cpp_parse_name (pfile, c)
480 if (c == '$' && CPP_PEDANTIC (pfile))
481 cpp_pedwarn (pfile, "`$' in identifier");
483 CPP_RESERVE(pfile, 2); /* One more for final NUL. */
484 CPP_PUTC_Q (pfile, c);
489 CPP_NUL_TERMINATE_Q (pfile);
493 /* Parse and skip over a string starting with C. A single quoted
494 string is treated like a double -- some programs (e.g., troff) are
495 perverse this way. (However, a single quoted string is not allowed
496 to extend over multiple lines.) */
498 skip_string (pfile, c)
502 long start_line, start_column;
503 cpp_buf_line_and_col (cpp_file_buffer (pfile), &start_line, &start_column);
511 cpp_error_with_line (pfile, start_line, start_column,
512 "unterminated string or character constant");
513 if (pfile->multiline_string_line != start_line
514 && pfile->multiline_string_line != 0)
515 cpp_error_with_line (pfile,
516 pfile->multiline_string_line, -1,
517 "possible real start of unterminated constant");
518 pfile->multiline_string_line = 0;
522 CPP_BUMP_LINE (pfile);
523 /* In Fortran and assembly language, silently terminate
524 strings of either variety at end of line. This is a
525 kludge around not knowing where comments are in these
527 if (CPP_OPTION (pfile, lang_fortran)
528 || CPP_OPTION (pfile, lang_asm))
533 /* Character constants may not extend over multiple lines.
534 In Standard C, neither may strings. We accept multiline
535 strings as an extension. */
538 cpp_error_with_line (pfile, start_line, start_column,
539 "unterminated character constant");
543 if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
544 cpp_pedwarn_with_line (pfile, start_line, start_column,
545 "string constant runs past end of line");
546 if (pfile->multiline_string_line == 0)
547 pfile->multiline_string_line = start_line;
551 if (CPP_BUFFER (pfile)->has_escapes)
553 cpp_ice (pfile, "\\r escape inside string constant");
557 /* Backslash newline is replaced by nothing at all. */
558 CPP_BUMP_LINE (pfile);
574 /* Parse a string and copy it to the output. */
577 parse_string (pfile, c)
581 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
584 skip_string (pfile, c);
586 limit = CPP_BUFFER (pfile)->cur;
587 CPP_RESERVE (pfile, limit - start + 2);
588 CPP_PUTC_Q (pfile, c);
589 for (; start < limit; start++)
591 CPP_PUTC_Q (pfile, *start);
594 /* Read an assertion into the token buffer, converting to
595 canonical form: `#predicate(a n swe r)' The next non-whitespace
596 character to read should be the first letter of the predicate.
597 Returns 0 for syntax error, 1 for bare predicate, 2 for predicate
598 with answer (see callers for why). In case of 0, an error has been
601 _cpp_parse_assertion (pfile)
605 _cpp_skip_hspace (pfile);
609 cpp_error (pfile, "assertion predicate is not an identifier");
612 CPP_PUTC(pfile, '#');
614 _cpp_parse_name (pfile, c);
619 if (is_hspace(c) || c == '\r')
620 _cpp_skip_hspace (pfile);
626 CPP_PUTC(pfile, '(');
629 while ((c = GETC()) != ')')
635 CPP_PUTC(pfile, ' ');
639 else if (c == '\n' || c == EOF)
641 if (c == '\n') FORWARD(-1);
642 cpp_error (pfile, "un-terminated assertion answer");
646 /* \r cannot be a macro escape here. */
647 CPP_BUMP_LINE (pfile);
655 if (pfile->limit[-1] == ' ')
656 pfile->limit[-1] = ')';
657 else if (pfile->limit[-1] == '(')
659 cpp_error (pfile, "empty token sequence in assertion");
663 CPP_PUTC (pfile, ')');
665 CPP_NUL_TERMINATE (pfile);
669 /* Get the next token, and add it to the text in pfile->token_buffer.
670 Return the kind of token we got. */
673 _cpp_lex_token (pfile)
676 register int c, c2, c3;
677 enum cpp_token token;
691 if (CPP_OPTION (pfile, discard_comments))
692 c = skip_comment (pfile, c);
694 c = copy_comment (pfile, c);
698 /* Comments are equivalent to spaces.
699 For -traditional, a comment is equivalent to nothing. */
700 if (CPP_TRADITIONAL (pfile) || !CPP_OPTION (pfile, discard_comments))
709 if (pfile->parsing_if_directive)
711 _cpp_skip_hspace (pfile);
712 _cpp_parse_assertion (pfile);
713 return CPP_ASSERTION;
716 if (pfile->parsing_define_directive && ! CPP_TRADITIONAL (pfile))
718 CPP_RESERVE (pfile, 3);
719 CPP_PUTC_Q (pfile, '#');
720 CPP_NUL_TERMINATE_Q (pfile);
722 return CPP_STRINGIZE;
725 CPP_PUTC_Q (pfile, '#');
726 CPP_NUL_TERMINATE_Q (pfile);
730 if (!pfile->only_seen_white)
732 /* -traditional directives are recognized only with the # in
734 XXX Layering violation. */
735 if (CPP_TRADITIONAL (pfile)
736 && CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base != 1)
738 return CPP_DIRECTIVE;
742 parse_string (pfile, c);
743 pfile->only_seen_white = 0;
744 return c == '\'' ? CPP_CHAR : CPP_STRING;
747 if (!CPP_OPTION (pfile, dollars_in_ident))
752 if (CPP_OPTION (pfile, cplusplus) && PEEKC () == ':')
760 if (c2 == c || c2 == '=')
777 if (CPP_OPTION (pfile, chill))
778 goto comment; /* Chill style comment */
786 if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
788 /* In C++, there's a ->* operator. */
790 pfile->only_seen_white = 0;
791 CPP_RESERVE (pfile, 4);
792 CPP_PUTC_Q (pfile, c);
793 CPP_PUTC_Q (pfile, GETC ());
794 CPP_PUTC_Q (pfile, GETC ());
795 CPP_NUL_TERMINATE_Q (pfile);
803 if (pfile->parsing_include_directive)
811 if (c == '\n' || c == EOF)
814 "missing '>' in `#include <FILENAME>'");
819 if (!CPP_BUFFER (pfile)->has_escapes)
821 /* Backslash newline is replaced by nothing. */
822 CPP_ADJUST_WRITTEN (pfile, -1);
823 CPP_BUMP_LINE (pfile);
827 /* We might conceivably get \r- or \r<space> in
828 here. Just delete 'em. */
830 if (d != '-' && d != ' ')
831 cpp_ice (pfile, "unrecognized escape \\r%c", d);
832 CPP_ADJUST_WRITTEN (pfile, -1);
838 /* else fall through */
843 /* GNU C++ supports MIN and MAX operators <? and >?. */
844 if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
847 CPP_RESERVE (pfile, 4);
849 CPP_PUTC (pfile, c2);
852 CPP_PUTC_Q (pfile, GETC ());
853 CPP_NUL_TERMINATE_Q (pfile);
854 pfile->only_seen_white = 0;
861 CPP_RESERVE(pfile, 2);
862 CPP_PUTC_Q (pfile, '.');
867 /* In C++ there's a .* operator. */
868 if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
871 if (c2 == '.' && PEEKN(1) == '.')
873 CPP_RESERVE(pfile, 4);
874 CPP_PUTC_Q (pfile, '.');
875 CPP_PUTC_Q (pfile, '.');
876 CPP_PUTC_Q (pfile, '.');
878 CPP_NUL_TERMINATE_Q (pfile);
879 pfile->only_seen_white = 0;
886 pfile->only_seen_white = 0;
887 CPP_RESERVE(pfile, 3);
888 CPP_PUTC_Q (pfile, c);
889 CPP_PUTC_Q (pfile, GETC ());
890 CPP_NUL_TERMINATE_Q (pfile);
895 if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
899 parse_string (pfile, c);
900 pfile->only_seen_white = 0;
901 return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
905 case '0': case '1': case '2': case '3': case '4':
906 case '5': case '6': case '7': case '8': case '9':
911 CPP_RESERVE (pfile, 2);
912 CPP_PUTC_Q (pfile, c);
916 if (!is_numchar(c) && c != '.'
917 && ((c2 != 'e' && c2 != 'E'
918 && ((c2 != 'p' && c2 != 'P')
919 || CPP_OPTION (pfile, c89)))
920 || (c != '+' && c != '-')))
925 CPP_NUL_TERMINATE_Q (pfile);
926 pfile->only_seen_white = 0;
928 case 'b': case 'c': case 'd': case 'h': case 'o':
929 case 'B': case 'C': case 'D': case 'H': case 'O':
930 if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
932 pfile->only_seen_white = 0;
933 CPP_RESERVE (pfile, 2);
934 CPP_PUTC_Q (pfile, c);
935 CPP_PUTC_Q (pfile, '\'');
941 goto chill_number_eof;
948 CPP_RESERVE (pfile, 2);
949 CPP_PUTC_Q (pfile, c);
950 CPP_NUL_TERMINATE_Q (pfile);
957 CPP_NUL_TERMINATE (pfile);
964 case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
965 case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
966 case 'r': case 's': case 't': case 'u': case 'v': case 'w':
967 case 'x': case 'y': case 'z':
968 case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
969 case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
970 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
973 pfile->only_seen_white = 0;
974 _cpp_parse_name (pfile, c);
977 case ' ': case '\t': case '\v':
982 if (c == EOF || !is_hspace(c))
989 if (CPP_BUFFER (pfile)->has_escapes)
994 if (pfile->output_escapes)
995 CPP_PUTS (pfile, "\r-", 2);
996 _cpp_parse_name (pfile, GETC ());
1001 CPP_RESERVE (pfile, 2);
1002 if (pfile->output_escapes)
1003 CPP_PUTC_Q (pfile, '\r');
1004 CPP_PUTC_Q (pfile, c);
1009 cpp_ice (pfile, "unrecognized escape \\r%c", c);
1015 /* Backslash newline is ignored. */
1016 CPP_BUMP_LINE (pfile);
1021 CPP_PUTC (pfile, c);
1022 if (pfile->only_seen_white == 0)
1023 pfile->only_seen_white = 1;
1024 CPP_BUMP_LINE (pfile);
1025 if (! CPP_OPTION (pfile, no_line_commands))
1028 if (CPP_BUFFER (pfile)->lineno != pfile->lineno)
1029 _cpp_output_line_command (pfile, same_file);
1033 case '(': token = CPP_LPAREN; goto char1;
1034 case ')': token = CPP_RPAREN; goto char1;
1035 case '{': token = CPP_LBRACE; goto char1;
1036 case '}': token = CPP_RBRACE; goto char1;
1037 case ',': token = CPP_COMMA; goto char1;
1038 case ';': token = CPP_SEMICOLON; goto char1;
1044 pfile->only_seen_white = 0;
1045 CPP_PUTC (pfile, c);
1050 /* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1051 Caller is expected to have checked no_macro_expand. */
1053 maybe_macroexpand (pfile, written)
1057 U_CHAR *macro = pfile->token_buffer + written;
1058 size_t len = CPP_WRITTEN (pfile) - written;
1059 HASHNODE *hp = _cpp_lookup (pfile, macro, len);
1063 if (hp->type == T_DISABLED)
1065 if (pfile->output_escapes)
1067 /* Insert a no-reexpand marker before IDENT. */
1068 CPP_RESERVE (pfile, 2);
1069 CPP_ADJUST_WRITTEN (pfile, 2);
1070 macro = pfile->token_buffer + written;
1072 memmove (macro + 2, macro, len);
1079 /* If macro wants an arglist, verify that a '(' follows. */
1080 if (hp->type == T_MACRO && hp->value.defn->nargs >= 0)
1082 int macbuf_whitespace = 0;
1085 while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1087 const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1090 _cpp_skip_hspace (pfile);
1097 if (point != CPP_BUFFER (pfile)->cur)
1098 macbuf_whitespace = 1;
1102 goto not_macro_call;
1103 cpp_pop_buffer (pfile);
1106 CPP_SET_MARK (pfile);
1109 _cpp_skip_hspace (pfile);
1116 CPP_GOTO_MARK (pfile);
1121 if (macbuf_whitespace)
1122 CPP_PUTC (pfile, ' ');
1128 /* This is now known to be a macro call.
1129 Expand the macro, reading arguments as needed,
1130 and push the expansion on the input stack. */
1131 _cpp_macroexpand (pfile, hp);
1132 CPP_SET_WRITTEN (pfile, written);
1137 cpp_get_token (pfile)
1140 enum cpp_token token;
1141 long written = CPP_WRITTEN (pfile);
1144 token = _cpp_lex_token (pfile);
1152 if (_cpp_handle_directive (pfile))
1153 return CPP_DIRECTIVE;
1154 pfile->only_seen_white = 0;
1155 CPP_PUTC (pfile, '#');
1159 if (! pfile->no_macro_expand
1160 && maybe_macroexpand (pfile, written))
1165 if (CPP_BUFFER (pfile)->manual_pop)
1166 /* If we've been reading from redirected input, the
1167 frontend will pop the buffer. */
1169 else if (CPP_BUFFER (pfile)->seen_eof)
1171 if (CPP_PREV_BUFFER (CPP_BUFFER (pfile)) == NULL)
1174 cpp_pop_buffer (pfile);
1179 _cpp_handle_eof (pfile);
1185 /* Like cpp_get_token, but skip spaces and comments. */
1188 cpp_get_non_space_token (pfile)
1191 int old_written = CPP_WRITTEN (pfile);
1194 enum cpp_token token = cpp_get_token (pfile);
1195 if (token != CPP_COMMENT && token != CPP_POP
1196 && token != CPP_HSPACE && token != CPP_VSPACE)
1198 CPP_SET_WRITTEN (pfile, old_written);
1202 /* Like cpp_get_token, except that it does not read past end-of-line.
1203 Also, horizontal space is skipped, and macros are popped. */
1206 _cpp_get_directive_token (pfile)
1209 long old_written = CPP_WRITTEN (pfile);
1210 enum cpp_token token;
1214 _cpp_skip_hspace (pfile);
1215 if (PEEKC () == '\n')
1218 token = cpp_get_token (pfile);
1219 /* token could be hspace at the beginning of a macro. */
1220 if (token == CPP_HSPACE || token == CPP_COMMENT)
1222 CPP_SET_WRITTEN (pfile, old_written);
1226 /* token cannot be vspace, it would have been caught above. */
1227 if (token == CPP_VSPACE)
1229 cpp_ice (pfile, "VSPACE in get_directive_token");
1233 /* token cannot be POP unless the buffer is a macro buffer. */
1234 if (token != CPP_POP)
1237 if (! CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1239 cpp_ice (pfile, "POP of file buffer in get_directive_token");
1243 /* We must pop the buffer by hand, or else cpp_get_token might
1244 hand us white space or newline on the next invocation. */
1245 cpp_pop_buffer (pfile);
1249 /* Determine the current line and column. Used only by read_and_prescan. */
1251 find_position (start, limit, linep)
1254 unsigned long *linep;
1256 unsigned long line = *linep;
1257 U_CHAR *lbase = start;
1258 while (start < limit)
1260 U_CHAR ch = *start++;
1261 if (ch == '\n' || ch == '\r')
1271 /* These are tables used by _cpp_read_and_prescan. If we have
1272 designated initializers, they can be constant data; otherwise, they
1273 are set up at runtime by _cpp_init_input_buffer. */
1276 #define UCHAR_MAX 255 /* assume 8-bit bytes */
1279 #if (GCC_VERSION >= 2007) || (__STDC_VERSION__ >= 199901L)
1280 #define CHARTAB(name) static const unsigned char name[UCHAR_MAX + 1]
1281 #define init_speccase() /* nothing */
1282 #define init_trigraph_map() /* nothing */
1283 #define SPECCASE CHARTAB(speccase) = {
1284 #define TRIGRAPH_MAP CHARTAB(trigraph_map) = {
1286 #define s(p, v) [p] = v,
1288 #define CHARTAB(name) static unsigned char name[UCHAR_MAX + 1]
1289 #define SPECCASE CHARTAB(speccase) = { 0 }; \
1290 static void init_speccase PARAMS ((void)) { \
1291 unsigned char *x = speccase;
1292 #define TRIGRAPH_MAP CHARTAB(trigraph_map) = { 0 }; \
1293 static void init_trigraph_map PARAMS ((void)) { \
1294 unsigned char *x = trigraph_map;
1296 #define s(p, v) x[p] = v;
1299 /* Table of characters that can't be handled in the inner loop.
1300 Keep these contiguous to optimize the performance of the code generated
1301 for the switch that uses them. */
1302 #define SPECCASE_EMPTY 0
1303 #define SPECCASE_CR 1
1304 #define SPECCASE_BACKSLASH 2
1305 #define SPECCASE_QUESTION 3
1308 s('\r', SPECCASE_CR)
1309 s('\\', SPECCASE_BACKSLASH)
1310 s('?', SPECCASE_QUESTION)
1313 /* Map of trigraph third characters to their replacements. */
1316 s('=', '#') s(')', ']') s('!', '|')
1317 s('(', '[') s('\'', '^') s('>', '}')
1318 s('/', '\\') s('<', '{') s('-', '~')
1327 /* Read the entire contents of file DESC into buffer BUF. LEN is how
1328 much memory to allocate initially; more will be allocated if
1329 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1330 canonical form (\n). If enabled, convert and/or warn about
1331 trigraphs. Convert backslash-newline to a one-character escape
1332 (\r) and remove it from "embarrassing" places (i.e. the middle of a
1333 token). If there is no newline at the end of the file, add one and
1334 warn. Returns -1 on failure, or the actual length of the data to
1337 This function does a lot of work, and can be a serious performance
1338 bottleneck. It has been tuned heavily; make sure you understand it
1339 before hacking. The common case - no trigraphs, Unix style line
1340 breaks, backslash-newline set off by whitespace, newline at EOF -
1341 has been optimized at the expense of the others. The performance
1342 penalty for DOS style line breaks (\r\n) is about 15%.
1344 Warnings lose particularly heavily since we have to determine the
1345 line number, which involves scanning from the beginning of the file
1346 or from the last warning. The penalty for the absence of a newline
1347 at the end of reload1.c is about 60%. (reload1.c is 329k.)
1349 If your file has more than one kind of end-of-line marker, you
1350 will get messed-up line numbering.
1352 So that the cases of the switch statement do not have to concern
1353 themselves with the complications of reading beyond the end of the
1354 buffer, the buffer is guaranteed to have at least 3 characters in
1355 it (or however many are left in the file, if less) on entry to the
1356 switch. This is enough to handle trigraphs and the "\\\n\r" and
1359 The end of the buffer is marked by a '\\', which, being a special
1360 character, guarantees we will exit the fast-scan loops and perform
1364 _cpp_read_and_prescan (pfile, fp, desc, len)
1370 U_CHAR *buf = (U_CHAR *) xmalloc (len);
1371 U_CHAR *ip, *op, *line_base;
1374 unsigned int deferred_newlines;
1379 deferred_newlines = 0;
1383 ibase = pfile->input_buffer + 3;
1385 ip[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */
1389 U_CHAR *near_buff_end;
1391 /* Copy previous char plus unprocessed (at most 2) chars
1392 to beginning of buffer, refill it with another
1393 read(), and continue processing */
1394 memcpy(ip - count - 1, ip - 1, 3);
1397 count = read (desc, ibase, pfile->input_buffer_len);
1401 ibase[count] = '\\'; /* Marks end of buffer */
1404 near_buff_end = pfile->input_buffer + count;
1409 size_t delta_line_base;
1413 This could happen if the file is larger than half the
1414 maximum address space of the machine. */
1417 delta_op = op - buf;
1418 delta_line_base = line_base - buf;
1419 buf = (U_CHAR *) xrealloc (buf, len);
1420 op = buf + delta_op;
1421 line_base = buf + delta_line_base;
1428 /* Allow normal processing of the (at most 2) remaining
1429 characters. The end-of-buffer marker is still present
1430 and prevents false matches within the switch. */
1431 near_buff_end = ibase - 1;
1438 /* Deal with \-newline, potentially in the middle of a token. */
1439 if (deferred_newlines)
1441 if (op != buf && op[-1] != ' ' && op[-1] != '\n' && op[-1] != '\t' && op[-1] != '\r')
1443 /* Previous was not white space. Skip to white
1444 space, if we can, before outputting the \r's */
1446 while (ip[span] != ' '
1449 && speccase[ip[span]] == SPECCASE_EMPTY)
1451 memcpy (op, ip, span);
1454 if (speccase[ip[0]] != SPECCASE_EMPTY)
1457 while (deferred_newlines)
1458 deferred_newlines--, *op++ = '\r';
1461 /* Copy as much as we can without special treatment. */
1463 while (speccase[ip[span]] == SPECCASE_EMPTY) span++;
1464 memcpy (op, ip, span);
1469 if (ip > near_buff_end) /* Do we have enough chars? */
1471 switch (speccase[*ip++])
1473 case SPECCASE_CR: /* \r */
1482 case SPECCASE_BACKSLASH: /* \ */
1485 deferred_newlines++;
1487 if (*ip == '\r') ip++;
1489 else if (*ip == '\r')
1491 deferred_newlines++;
1493 if (*ip == '\n') ip++;
1499 case SPECCASE_QUESTION: /* ? */
1503 *op++ = '?'; /* Normal non-trigraph case */
1508 t = trigraph_map[d];
1512 if (CPP_OPTION (pfile, warn_trigraphs))
1515 line_base = find_position (line_base, op, &line);
1516 col = op - line_base + 1;
1517 if (CPP_OPTION (pfile, trigraphs))
1518 cpp_warning_with_line (pfile, line, col,
1519 "trigraph ??%c converted to %c", d, t);
1521 cpp_warning_with_line (pfile, line, col,
1522 "trigraph ??%c ignored", d);
1526 if (CPP_OPTION (pfile, trigraphs))
1528 op[-1] = t; /* Overwrite '?' */
1533 goto do_speccase; /* May need buffer refill */
1553 line_base = find_position (line_base, op, &line);
1554 col = op - line_base + 1;
1555 cpp_warning_with_line (pfile, line, col, "no newline at end of file\n");
1556 if (offset + 1 > len)
1559 if (offset + 1 > len)
1561 buf = (U_CHAR *) xrealloc (buf, len);
1567 fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
1571 cpp_error (pfile, "file is too large (>%lu bytes)\n", (unsigned long)offset);
1576 cpp_error_from_errno (pfile, fp->ihash->name);
1581 /* Allocate pfile->input_buffer, and initialize speccase[] and
1582 trigraph_map[] if it hasn't happened already. */
1585 _cpp_init_input_buffer (pfile)
1591 init_trigraph_map ();
1593 /* Determine the appropriate size for the input buffer. Normal C
1594 source files are smaller than eight K. */
1595 /* 8Kbytes of buffer proper, 1 to detect running off the end without
1596 address arithmetic all the time, and 3 for pushback during buffer
1597 refill, in case there's a potential trigraph or end-of-line
1598 digraph at the end of a block. */
1600 tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
1601 pfile->input_buffer = tmp;
1602 pfile->input_buffer_len = 8192;