#include "cpplib.h"
#include "cpphash.h"
+#ifdef HAVE_MMAP_FILE
+# include <sys/mman.h>
+#endif
+
#define PEEKBUF(BUFFER, N) \
((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
#define GETBUF(BUFFER) \
static void bump_column PARAMS ((cpp_printer *, unsigned int,
unsigned int));
static void expand_name_space PARAMS ((cpp_toklist *, unsigned int));
-static void expand_token_space PARAMS ((cpp_toklist *));
-static void init_token_list PARAMS ((cpp_reader *, cpp_toklist *, int));
static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
unsigned int));
#ifdef NEW_LEXER
-static void expand_comment_space PARAMS ((cpp_toklist *));
void init_trigraph_map PARAMS ((void));
static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
unsigned char *));
static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
static void parse_string2 PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
- unsigned int));
+ unsigned int, int));
static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
-static void save_comment PARAMS ((cpp_toklist *, const unsigned char *,
- unsigned int, unsigned int, unsigned int));
+static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
+ const unsigned char *,
+ unsigned int, unsigned int));
void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
-static unsigned char * spell_token PARAMS ((cpp_reader *, cpp_token *,
+static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
unsigned char *, int));
typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
cpp_token *));
/* Macros on a cpp_name. */
-#define INIT_NAME(list, name) \
- do {(name).len = 0; \
- (name).text = (list)->namebuf + (list)->name_used;} while (0)
-
-#define IS_DIRECTIVE(list) (TOK_TYPE (list, 0) == CPP_HASH)
-#define COLUMN(cur) ((cur) - buffer->line_base)
+#define INIT_TOKEN_NAME(list, token) \
+ do {(token)->val.name.len = 0; \
+ (token)->val.name.text = (list)->namebuf + (list)->name_used; \
+ (list)->tokens_used = token - (list)->tokens + 1; \
+ } while (0)
/* Maybe put these in the ISTABLE eventually. */
#define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
(cur)++; \
CPP_BUMP_LINE_CUR (pfile, (cur)); \
+ pfile->col_adjust = 0; \
} while (0)
#define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
/* An upper bound on the number of bytes needed to spell a token,
including preceding whitespace. */
-#define TOKEN_LEN(token) (5 + (token_spellings[token->type].type > \
- SPELL_NONE ? token->val.name.len: 0))
+#define TOKEN_LEN(token) (5 + (token_spellings[(token)->type].type > \
+ SPELL_NONE ? (token)->val.name.len: 0))
#endif
/* Order here matters. Those beyond SPELL_NONE store their spelling
in the token list, and it's length in the token->val.name.len. */
-#define SPELL_OPERATOR 0
-#define SPELL_CHAR 2 /* FIXME: revert order after transition. */
-#define SPELL_NONE 1
-#define SPELL_IDENT 3
-#define SPELL_STRING 4
+enum spell_type
+{
+ SPELL_OPERATOR = 0,
+ SPELL_NONE,
+ SPELL_CHAR, /* FIXME: revert order of NONE and CHAR after transition. */
+ SPELL_IDENT,
+ SPELL_STRING
+};
#define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
#define I(e, s) {SPELL_IDENT, s},
static const struct token_spelling
{
- U_CHAR type;
+ ENUM_BITFIELD(spell_type) type : CHAR_BIT;
const U_CHAR *spelling;
} token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
}
else if (buf->macro)
{
- HASHNODE *m = buf->macro;
+ cpp_hashnode *m = buf->macro;
m->disabled = 0;
if ((m->type == T_FMACRO && buf->mapped)
for (;;)
{
token = cpp_get_token (pfile);
- if (token == CPP_EOF || token == CPP_VSPACE
+ if (token == CPP_VSPACE || token == CPP_EOF
/* XXX Temporary kluge - force flush after #include only */
|| (token == CPP_DIRECTIVE
&& CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
/* Token-buffer helper functions. */
-/* Expand a token list's string space. */
+/* Expand a token list's string space. It is *vital* that
+ list->tokens_used is correct, to get pointer fix-up right. */
static void
expand_name_space (list, len)
cpp_toklist *list;
unsigned int len;
{
const U_CHAR *old_namebuf;
- ptrdiff_t delta;
old_namebuf = list->namebuf;
list->name_cap += len;
list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
/* Fix up token text pointers. */
- delta = list->namebuf - old_namebuf;
- if (delta)
+ if (list->namebuf != old_namebuf)
{
unsigned int i;
for (i = 0; i < list->tokens_used; i++)
if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
- list->tokens[i].val.name.text += delta;
+ list->tokens[i].val.name.text += (list->namebuf - old_namebuf);
}
}
/* Expand the number of tokens in a list. */
-static void
-expand_token_space (list)
+void
+_cpp_expand_token_space (list, count)
cpp_toklist *list;
+ unsigned int count;
{
- list->tokens_cap *= 2;
+ unsigned int n;
+
+ list->tokens_cap += count;
+ n = list->tokens_cap;
+ if (list->flags & LIST_OFFSET)
+ list->tokens--, n++;
list->tokens = (cpp_token *)
- xrealloc (list->tokens - 1, (list->tokens_cap + 1) * sizeof (cpp_token));
- list->tokens++; /* Skip the dummy. */
+ xrealloc (list->tokens, n * sizeof (cpp_token));
+ if (list->flags & LIST_OFFSET)
+ list->tokens++; /* Skip the dummy. */
}
-/* Initialize a token list. We allocate an extra token in front of
- the token list, as this allows us to always peek at the previous
- token without worrying about underflowing the list. */
-static void
-init_token_list (pfile, list, recycle)
- cpp_reader *pfile;
+/* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
+ an extra token in front of the token list, as this allows the lexer
+ to always peek at the previous token without worrying about
+ underflowing the list, and some initial space. Otherwise, no
+ token- or name-space is allocated, and there is no dummy token. */
+void
+_cpp_init_toklist (list, flags)
cpp_toklist *list;
- int recycle;
+ int flags;
{
- /* Recycling a used list saves 3 free-malloc pairs. */
- if (!recycle)
+ /* We malloc zero bytes because we may want to realloc later, and
+ some old implementations don't like realloc-ing a null pointer. */
+ if (flags == NO_DUMMY_TOKEN)
+ {
+ list->tokens_cap = 0;
+ list->tokens = (cpp_token *) malloc (0);
+ list->name_cap = 0;
+ list->flags = 0;
+ }
+ else
{
/* Initialize token space. Put a dummy token before the start
- that will fail matches. */
+ that will fail matches. */
list->tokens_cap = 256; /* 4K's worth. */
list->tokens = (cpp_token *)
xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
/* Initialize name space. */
list->name_cap = 1024;
- list->namebuf = (unsigned char *) xmalloc (list->name_cap);
-
- /* Only create a comment space on demand. */
- list->comments_cap = 0;
- list->comments = 0;
+ list->flags = LIST_OFFSET;
}
+ /* Allocate name space. */
+ list->namebuf = (unsigned char *) xmalloc (list->name_cap);
+
+ _cpp_clear_toklist (list);
+}
+
+/* Clear a token list. */
+void
+_cpp_clear_toklist (list)
+ cpp_toklist *list;
+{
list->tokens_used = 0;
list->name_used = 0;
- list->comments_used = 0;
- if (pfile->buffer)
- list->line = pfile->buffer->lineno;
- list->dir_handler = 0;
- list->dir_flags = 0;
+ list->dirno = -1;
+ list->flags &= LIST_OFFSET; /* clear all but that one */
}
-/* Scan an entire line and create a token list for it. Does not
- macro-expand or execute directives. */
+/* Free a token list. Does not free the list itself, which may be
+ embedded in a larger structure. */
+void
+_cpp_free_toklist (list)
+ cpp_toklist *list;
+{
+ if (list->flags & LIST_OFFSET)
+ free (list->tokens - 1); /* Backup over dummy token. */
+ else
+ free (list->tokens);
+ free (list->namebuf);
+}
+
+/* Slice a token list: copy the sublist [START, FINISH) into COPY.
+ COPY is assumed not to be initialized. The comment space is not
+ copied. */
+void
+_cpp_slice_toklist (copy, start, finish)
+ cpp_toklist *copy;
+ const cpp_token *start, *finish;
+{
+ unsigned int i, n;
+ size_t bytes;
+
+ n = finish - start;
+ copy->tokens_cap = n;
+ copy->tokens = (cpp_token *) xmalloc (n * sizeof (cpp_token));
+ memcpy (copy->tokens, start, n * sizeof (cpp_token));
+
+ bytes = 0;
+ for (i = 0; i < n; i++)
+ if (token_spellings[start[i].type].type > SPELL_NONE)
+ bytes += start[i].val.name.len;
+
+ copy->namebuf = xmalloc (bytes);
+ bytes = 0;
+ for (i = 0; i < n; i++)
+ if (token_spellings[start[i].type].type > SPELL_NONE)
+ {
+ memcpy (copy->namebuf + bytes,
+ start[i].val.name.text, start[i].val.name.len);
+ copy->tokens[i].val.name.text = copy->namebuf + bytes;
+ bytes += start[i].val.name.len;
+ }
+
+ copy->tokens_cap = n;
+ copy->tokens_used = n;
+ copy->name_used = bytes;
+ copy->name_cap = bytes;
+
+ copy->flags = 0;
+ copy->dirno = -1;
+}
+/* Shrink a token list down to the minimum size. */
void
-_cpp_scan_line (pfile, list)
+_cpp_squeeze_toklist (list)
+ cpp_toklist *list;
+{
+ long delta;
+ const U_CHAR *old_namebuf;
+
+ if (list->flags & LIST_OFFSET)
+ {
+ list->tokens--;
+ memmove (list->tokens, list->tokens + 1,
+ list->tokens_used * sizeof (cpp_token));
+ list->tokens = xrealloc (list->tokens,
+ list->tokens_used * sizeof (cpp_token));
+ list->flags &= ~LIST_OFFSET;
+ }
+ else
+ list->tokens = xrealloc (list->tokens,
+ list->tokens_used * sizeof (cpp_token));
+ list->tokens_cap = list->tokens_used;
+
+ old_namebuf = list->namebuf;
+ list->namebuf = xrealloc (list->namebuf, list->name_used);
+ list->name_cap = list->name_used;
+
+ /* Fix up token text pointers. */
+ delta = list->namebuf - old_namebuf;
+ if (delta)
+ {
+ unsigned int i;
+
+ for (i = 0; i < list->tokens_used; i++)
+ if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
+ list->tokens[i].val.name.text += delta;
+ }
+}
+
+/* Compare two tokens. */
+int
+_cpp_equiv_tokens (a, b)
+ const cpp_token *a, *b;
+{
+ if (a->type != b->type
+ || a->flags != b->flags
+ || a->aux != b->aux)
+ return 0;
+
+ if (token_spellings[a->type].type > SPELL_NONE)
+ {
+ if (a->val.name.len != b->val.name.len
+ || ustrncmp(a->val.name.text,
+ b->val.name.text,
+ a->val.name.len))
+ return 0;
+ }
+ return 1;
+}
+
+/* Compare two token lists. */
+int
+_cpp_equiv_toklists (a, b)
+ const cpp_toklist *a, *b;
+{
+ unsigned int i;
+
+ if (a->tokens_used != b->tokens_used)
+ return 0;
+
+ for (i = 0; i < a->tokens_used; i++)
+ if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
+ return 0;
+ return 1;
+}
+
+/* Scan until we encounter a token of type STOP or a newline, and
+ create a token list for it. Does not macro-expand or execute
+ directives. The final token is not included in the list or
+ consumed from the input. Returns the type of the token stopped at. */
+
+enum cpp_ttype
+_cpp_scan_until (pfile, list, stop)
cpp_reader *pfile;
cpp_toklist *list;
+ enum cpp_ttype stop;
{
int i, col;
long written, len;
enum cpp_ttype type;
int space_before;
- init_token_list (pfile, list, 1);
+ _cpp_clear_toklist (list);
+ list->line = CPP_BUF_LINE (CPP_BUFFER (pfile));
written = CPP_WRITTEN (pfile);
i = 0;
continue;
if (list->tokens_used >= list->tokens_cap)
- expand_token_space (list);
+ _cpp_expand_token_space (list, 256);
if (list->name_used + len >= list->name_cap)
expand_name_space (list, list->name_used + len + 1 - list->name_cap);
if (type == CPP_MACRO)
type = CPP_NAME;
+ if (type == CPP_VSPACE || type == stop)
+ break;
+
list->tokens_used++;
TOK_TYPE (list, i) = type;
TOK_COL (list, i) = col;
+ TOK_AUX (list, i) = 0;
TOK_FLAGS (list, i) = space_before ? PREV_WHITESPACE : 0;
- if (type == CPP_VSPACE)
- break;
-
TOK_LEN (list, i) = len;
if (token_spellings[type].type > SPELL_NONE)
{
i++;
space_before = 0;
}
- TOK_AUX (list, i) = CPP_BUFFER (pfile)->lineno + 1;
- /* XXX Temporary kluge: put back the newline. */
+ /* XXX Temporary kluge: put back the newline (or whatever). */
FORWARD(-1);
-}
+ /* Don't consider the first token to have white before. */
+ TOK_FLAGS (list, 0) &= ~PREV_WHITESPACE;
+ return type;
+}
/* Skip a C-style block comment. We know it's a comment, and point is
at the second character of the starter. */
CPP_PUTC_Q (pfile, *start);
}
-/* Read an assertion into the token buffer, converting to
- canonical form: `#predicate(a n swe r)' The next non-whitespace
- character to read should be the first letter of the predicate.
- Returns 0 for syntax error, 1 for bare predicate, 2 for predicate
- with answer (see callers for why). In case of 0, an error has been
- printed. */
-int
-_cpp_parse_assertion (pfile)
- cpp_reader *pfile;
-{
- int c, dropwhite;
- _cpp_skip_hspace (pfile);
- c = PEEKC();
- if (c == '\n')
- {
- cpp_error (pfile, "assertion without predicate");
- return 0;
- }
- else if (! is_idstart(c))
- {
- cpp_error (pfile, "assertion predicate is not an identifier");
- return 0;
- }
- CPP_PUTC(pfile, '#');
- FORWARD(1);
- _cpp_parse_name (pfile, c);
-
- c = PEEKC();
- if (c != '(')
- {
- if (is_hspace(c) || c == '\r')
- _cpp_skip_hspace (pfile);
- c = PEEKC();
- }
- if (c != '(')
- return 1;
-
- CPP_PUTC(pfile, '(');
- FORWARD(1);
- dropwhite = 1;
- while ((c = GETC()) != ')')
- {
- if (is_space(c))
- {
- if (! dropwhite)
- {
- CPP_PUTC(pfile, ' ');
- dropwhite = 1;
- }
- }
- else if (c == '\n' || c == EOF)
- {
- if (c == '\n') FORWARD(-1);
- cpp_error (pfile, "un-terminated assertion answer");
- return 0;
- }
- else if (c == '\r')
- /* \r cannot be a macro escape here. */
- CPP_BUMP_LINE (pfile);
- else
- {
- CPP_PUTC (pfile, c);
- dropwhite = 0;
- }
- }
-
- if (pfile->limit[-1] == ' ')
- pfile->limit[-1] = ')';
- else if (pfile->limit[-1] == '(')
- {
- cpp_error (pfile, "empty token sequence in assertion");
- return 0;
- }
- else
- CPP_PUTC (pfile, ')');
-
- return 2;
-}
-
/* Get the next token, and add it to the text in pfile->token_buffer.
Return the kind of token we got. */
if (!CPP_OPTION (pfile, discard_comments))
return CPP_COMMENT;
else if (CPP_TRADITIONAL (pfile))
- {
- if (pfile->parsing_define_directive)
- return CPP_COMMENT;
- goto get_next;
- }
+ goto get_next;
else
{
CPP_PUTC (pfile, c);
CPP_PUTC (pfile, c);
hash:
- if (pfile->parsing_if_directive)
+ c2 = PEEKC ();
+ if (c2 == '#')
{
- CPP_ADJUST_WRITTEN (pfile, -1);
- if (_cpp_parse_assertion (pfile))
- return CPP_ASSERTION;
- return CPP_OTHER;
+ FORWARD (1);
+ CPP_PUTC (pfile, c2);
+ return CPP_PASTE;
}
-
- if (pfile->parsing_define_directive)
+ else if (c2 == '%' && PEEKN (1) == ':')
{
- c2 = PEEKC ();
- if (c2 == '#')
- {
- FORWARD (1);
- CPP_PUTC (pfile, c2);
- }
- else if (c2 == '%' && PEEKN (1) == ':')
- {
- /* Digraph: "%:" == "#". */
- FORWARD (1);
- CPP_RESERVE (pfile, 2);
- CPP_PUTC_Q (pfile, c2);
- CPP_PUTC_Q (pfile, GETC ());
- }
- else
- return CPP_HASH;
-
+ /* Digraph: "%:" == "#". */
+ FORWARD (1);
+ CPP_RESERVE (pfile, 2);
+ CPP_PUTC_Q (pfile, c2);
+ CPP_PUTC_Q (pfile, GETC ());
return CPP_PASTE;
}
-
- if (!pfile->only_seen_white)
- return CPP_OTHER;
-
- /* Remove the "#" or "%:" from the token buffer. */
- CPP_ADJUST_WRITTEN (pfile, (c == '#' ? -1 : -2));
- return CPP_DIRECTIVE;
+ else
+ return CPP_HASH;
case '\"':
case '\'':
{
U_CHAR *macro = pfile->token_buffer + written;
size_t len = CPP_WRITTEN (pfile) - written;
- HASHNODE *hp = _cpp_lookup (pfile, macro, len);
+ cpp_hashnode *hp = cpp_lookup (pfile, macro, len);
- /* _cpp_lookup never returns null. */
+ /* cpp_lookup never returns null. */
if (hp->type == T_VOID)
return 0;
if (hp->disabled || hp->type == T_IDENTITY)
not_macro_call:
if (macbuf_whitespace)
CPP_PUTC (pfile, ' ');
+
+ /* K+R treated this as a hard error. */
+ if (CPP_OPTION (pfile, warn_traditional))
+ cpp_warning (pfile,
+ "traditional C rejects function macro %s in non-function context",
+ hp->name);
return 0;
}
}
case CPP_COMMENT:
return token;
- case CPP_DIRECTIVE:
+ case CPP_HASH:
pfile->potential_control_macro = 0;
+ if (!pfile->only_seen_white)
+ return CPP_HASH;
+ /* XXX shouldn't have to do this - remove the hash or %: from
+ the token buffer. */
+ if (CPP_PWRITTEN (pfile)[-1] == '#')
+ CPP_ADJUST_WRITTEN (pfile, -1);
+ else
+ CPP_ADJUST_WRITTEN (pfile, -2);
+
if (_cpp_handle_directive (pfile))
- return CPP_DIRECTIVE;
+ return CPP_DIRECTIVE;
pfile->only_seen_white = 0;
CPP_PUTC (pfile, '#');
- return CPP_OTHER;
+ return CPP_HASH;
case CPP_MACRO:
pfile->potential_control_macro = 0;
goto get_next;
return CPP_HSPACE;
- case CPP_DIRECTIVE:
- /* Don't execute the directive, but don't smash it to OTHER either. */
- CPP_PUTC (pfile, '#');
- return CPP_DIRECTIVE;
-
case CPP_MACRO:
if (! pfile->no_macro_expand
&& maybe_macroexpand (pfile, old_written))
return lbase;
}
-/* The following table is used by _cpp_read_and_prescan. If we have
+/* The following table is used by _cpp_prescan. If we have
designated initializers, it can be constant data; otherwise, it is
set up at runtime by _cpp_init_input_buffer. */
-#ifndef UCHAR_MAX
-#define UCHAR_MAX 255 /* assume 8-bit bytes */
-#endif
-
#if (GCC_VERSION >= 2007)
#define init_chartab() /* nothing */
#define CHARTAB __extension__ static const U_CHAR chartab[UCHAR_MAX + 1] = {
#define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
#define NONTRI(c) ((c) <= SPECCASE_QUESTION)
-/* Read the entire contents of file DESC into buffer BUF. LEN is how
- much memory to allocate initially; more will be allocated if
- necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
+/* Prescan pass over a file already loaded into BUF. This is
+ translation phases 1 and 2 (C99 5.1.1.2).
+
+ Convert end-of-line markers (\n, \r, \r\n, \n\r) to
canonical form (\n). If enabled, convert and/or warn about
trigraphs. Convert backslash-newline to a one-character escape
(\r) and remove it from "embarrassing" places (i.e. the middle of a
at the end of reload1.c is about 60%. (reload1.c is 329k.)
If your file has more than one kind of end-of-line marker, you
- will get messed-up line numbering.
-
- So that the cases of the switch statement do not have to concern
- themselves with the complications of reading beyond the end of the
- buffer, the buffer is guaranteed to have at least 3 characters in
- it (or however many are left in the file, if less) on entry to the
- switch. This is enough to handle trigraphs and the "\\\n\r" and
- "\\\r\n" cases.
-
- The end of the buffer is marked by a '\\', which, being a special
- character, guarantees we will exit the fast-scan loops and perform
- a refill. */
-
-long
-_cpp_read_and_prescan (pfile, fp, desc, len)
+ will get messed-up line numbering. */
+
+ssize_t
+_cpp_prescan (pfile, fp, len)
cpp_reader *pfile;
cpp_buffer *fp;
- int desc;
- size_t len;
+ ssize_t len;
{
- U_CHAR *buf = (U_CHAR *) xmalloc (len);
- U_CHAR *ip, *op, *line_base;
- U_CHAR *ibase;
+ U_CHAR *buf, *op;
+ const U_CHAR *ibase, *ip, *ilimit;
+ U_CHAR *line_base;
unsigned long line;
unsigned int deferred_newlines;
- size_t offset;
- int count = 0;
- offset = 0;
- deferred_newlines = 0;
- op = buf;
- line_base = buf;
+ /* Allocate an extra byte in case we must add a trailing \n. */
+ buf = (U_CHAR *) xmalloc (len + 1);
+ line_base = op = buf;
+ ip = ibase = fp->buf;
+ ilimit = ibase + len;
line = 1;
- ibase = pfile->input_buffer + 3;
- ip = ibase;
- ip[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */
+ deferred_newlines = 0;
for (;;)
{
- U_CHAR *near_buff_end;
+ const U_CHAR *iq;
- count = read (desc, ibase, pfile->input_buffer_len);
- if (count < 0)
- goto error;
-
- ibase[count] = '\\'; /* Marks end of buffer */
- if (count)
+ /* Deal with \-newline, potentially in the middle of a token. */
+ if (deferred_newlines)
{
- near_buff_end = pfile->input_buffer + count;
- offset += count;
- if (offset > len)
+ if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
{
- size_t delta_op;
- size_t delta_line_base;
- len = offset * 2;
- if (offset > len)
- /* len overflowed.
- This could happen if the file is larger than half the
- maximum address space of the machine. */
- goto too_big;
-
- delta_op = op - buf;
- delta_line_base = line_base - buf;
- buf = (U_CHAR *) xrealloc (buf, len);
- op = buf + delta_op;
- line_base = buf + delta_line_base;
+ /* Previous was not white space. Skip to white
+ space, if we can, before outputting the \r's */
+ iq = ip;
+ while (iq < ilimit
+ && *iq != ' '
+ && *iq != '\t'
+ && *iq != '\n'
+ && NORMAL(*iq))
+ iq++;
+ memcpy (op, ip, iq - ip);
+ op += iq - ip;
+ ip += iq - ip;
+ if (! NORMAL(*ip))
+ goto do_speccase;
}
- }
- else
- {
- if (ip == ibase)
- break;
- /* Allow normal processing of the (at most 2) remaining
- characters. The end-of-buffer marker is still present
- and prevents false matches within the switch. */
- near_buff_end = ibase - 1;
+ while (deferred_newlines)
+ deferred_newlines--, *op++ = '\r';
}
- for (;;)
- {
- unsigned int span;
+ /* Copy as much as we can without special treatment. */
+ iq = ip;
+ while (iq < ilimit && NORMAL (*iq)) iq++;
+ memcpy (op, ip, iq - ip);
+ op += iq - ip;
+ ip += iq - ip;
+
+ do_speccase:
+ if (ip >= ilimit)
+ break;
- /* Deal with \-newline, potentially in the middle of a token. */
- if (deferred_newlines)
+ switch (chartab[*ip++])
+ {
+ case SPECCASE_CR: /* \r */
+ if (ip[-2] != '\n')
{
- if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
- {
- /* Previous was not white space. Skip to white
- space, if we can, before outputting the \r's */
- span = 0;
- while (ip[span] != ' '
- && ip[span] != '\t'
- && ip[span] != '\n'
- && NORMAL(ip[span]))
- span++;
- memcpy (op, ip, span);
- op += span;
- ip += span;
- if (! NORMAL(ip[0]))
- goto do_speccase;
- }
- while (deferred_newlines)
- deferred_newlines--, *op++ = '\r';
+ if (ip < ilimit && *ip == '\n')
+ ip++;
+ *op++ = '\n';
}
+ break;
- /* Copy as much as we can without special treatment. */
- span = 0;
- while (NORMAL (ip[span])) span++;
- memcpy (op, ip, span);
- op += span;
- ip += span;
-
- do_speccase:
- if (ip > near_buff_end) /* Do we have enough chars? */
- break;
- switch (chartab[*ip++])
+ case SPECCASE_BACKSLASH: /* \ */
+ backslash:
+ if (ip < ilimit)
{
- case SPECCASE_CR: /* \r */
- if (ip[-2] != '\n')
- {
- if (*ip == '\n')
- ip++;
- *op++ = '\n';
- }
- break;
-
- case SPECCASE_BACKSLASH: /* \ */
if (*ip == '\n')
{
deferred_newlines++;
ip++;
if (*ip == '\r') ip++;
+ break;
}
else if (*ip == '\r')
{
deferred_newlines++;
ip++;
if (*ip == '\n') ip++;
+ break;
}
- else
- *op++ = '\\';
- break;
+ }
- case SPECCASE_QUESTION: /* ? */
- {
- unsigned int d, t;
+ *op++ = '\\';
+ break;
- *op++ = '?'; /* Normal non-trigraph case */
- if (ip[0] != '?')
- break;
-
- d = ip[1];
- t = chartab[d];
- if (NONTRI (t))
- break;
+ case SPECCASE_QUESTION: /* ? */
+ {
+ unsigned int d, t;
- if (CPP_OPTION (pfile, warn_trigraphs))
- {
- unsigned long col;
- line_base = find_position (line_base, op, &line);
- col = op - line_base + 1;
- if (CPP_OPTION (pfile, trigraphs))
- cpp_warning_with_line (pfile, line, col,
- "trigraph ??%c converted to %c", d, t);
- else
- cpp_warning_with_line (pfile, line, col,
- "trigraph ??%c ignored", d);
- }
+ *op++ = '?'; /* Normal non-trigraph case */
+ if (ip > ilimit - 2 || ip[0] != '?')
+ break;
+
+ d = ip[1];
+ t = chartab[d];
+ if (NONTRI (t))
+ break;
- ip += 2;
+ if (CPP_OPTION (pfile, warn_trigraphs))
+ {
+ unsigned long col;
+ line_base = find_position (line_base, op, &line);
+ col = op - line_base + 1;
if (CPP_OPTION (pfile, trigraphs))
- {
- op[-1] = t; /* Overwrite '?' */
- if (t == '\\')
- {
- op--;
- *--ip = '\\';
- goto do_speccase; /* May need buffer refill */
- }
- }
+ cpp_warning_with_line (pfile, line, col,
+ "trigraph ??%c converted to %c", d, t);
else
+ cpp_warning_with_line (pfile, line, col,
+ "trigraph ??%c ignored", d);
+ }
+
+ ip += 2;
+ if (CPP_OPTION (pfile, trigraphs))
+ {
+ op[-1] = t; /* Overwrite '?' */
+ if (t == '\\')
{
- *op++ = '?';
- *op++ = d;
+ op--;
+ goto backslash;
}
}
- break;
- }
+ else
+ {
+ *op++ = '?';
+ *op++ = d;
+ }
+ }
+ break;
}
- /* Copy previous char plus unprocessed (at most 2) chars
- to beginning of buffer, refill it with another
- read(), and continue processing */
- memmove (ip - count - 1, ip - 1, 4 - (ip - near_buff_end));
- ip -= count;
}
- if (offset == 0)
- return 0;
+#ifdef HAVE_MMAP_FILE
+ if (fp->mapped)
+ munmap ((caddr_t) fp->buf, len);
+ else
+#endif
+ free ((PTR) fp->buf);
if (op[-1] != '\n')
{
line_base = find_position (line_base, op, &line);
col = op - line_base + 1;
cpp_warning_with_line (pfile, line, col, "no newline at end of file");
- if (offset + 1 > len)
- {
- len += 1;
- if (offset + 1 > len)
- goto too_big;
- buf = (U_CHAR *) xrealloc (buf, len);
- op = buf + offset;
- }
*op++ = '\n';
}
- fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
+ fp->buf = buf;
return op - buf;
-
- too_big:
- cpp_notice (pfile, "%s is too large (>%lu bytes)", fp->ihash->name,
- (unsigned long)offset);
- free (buf);
- return -1;
-
- error:
- cpp_error_from_errno (pfile, fp->ihash->name);
- free (buf);
- return -1;
}
/* Allocate pfile->input_buffer, and initialize chartab[]
U_CHAR *tmp;
init_chartab ();
- init_token_list (pfile, &pfile->directbuf, 0);
+ _cpp_init_toklist (&pfile->directbuf, NO_DUMMY_TOKEN);
/* Determine the appropriate size for the input buffer. Normal C
source files are smaller than eight K. */
U":>", U"<%", U"%>"};
static unsigned char trigraph_map[256];
-static void
-expand_comment_space (list)
- cpp_toklist *list;
-{
- if (list->comments_cap == 0)
- {
- list->comments_cap = 10;
- list->comments = (cpp_token *)
- xmalloc (list->comments_cap * sizeof (cpp_token));
- }
- else
- {
- list->comments_cap *= 2;
- list->comments = (cpp_token *)
- xrealloc (list->comments, list->comments_cap);
- }
-}
-
-void
-cpp_free_token_list (list)
- cpp_toklist *list;
-{
- if (list->comments)
- free (list->comments);
- free (list->tokens - 1); /* Backup over dummy token. */
- free (list->namebuf);
- free (list);
-}
-
void
init_trigraph_map ()
{
return multiline;
}
-/* Skips whitespace, stopping at next non-whitespace character. */
+/* Skips whitespace, stopping at next non-whitespace character.
+ Adjusts pfile->col_adjust to account for tabs. This enables tokens
+ to be assigned the correct column. */
static void
skip_whitespace (pfile, in_directive)
cpp_reader *pfile;
{
unsigned char c = *cur++;
+ if (c == '\t')
+ {
+ unsigned int col = CPP_BUF_COLUMN (buffer, cur - 1);
+ pfile->col_adjust += (CPP_OPTION (pfile, tabstop) - 1
+ - col % CPP_OPTION(pfile, tabstop));
+ }
if (IS_HSPACE(c)) /* FIXME: Fix ISTABLE. */
continue;
if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines. */
escaped newlines.
Can be used for character constants (terminator = '\''), string
- constants ('"'), angled headers ('>') and assertions (')'). */
+ constants ('"') and angled headers ('>'). Multi-line strings are
+ allowed, except for within directives. */
static void
-parse_string2 (pfile, list, name, terminator)
+parse_string2 (pfile, list, name, terminator, multiline_ok)
cpp_reader *pfile;
cpp_toklist *list;
cpp_name *name;
unsigned int terminator;
+ int multiline_ok;
{
cpp_buffer *buffer = pfile->buffer;
register const unsigned char *cur = buffer->cur;
extend over multiple lines. In Standard C, neither
may strings. We accept multiline strings as an
extension, but not in directives. */
- if (terminator != '"' || IS_DIRECTIVE (list))
+ if (!multiline_ok)
goto unterminated;
cur++; /* Move forwards again. */
#define COMMENT_START_LEN 2
static void
-save_comment (list, from, len, tok_no, type)
+save_comment (list, token, from, len, type)
cpp_toklist *list;
+ cpp_token *token;
const unsigned char *from;
unsigned int len;
- unsigned int tok_no;
unsigned int type;
{
- cpp_token *comment;
unsigned char *buffer;
len += COMMENT_START_LEN;
- if (list->comments_used == list->comments_cap)
- expand_comment_space (list);
-
if (list->name_used + len > list->name_cap)
expand_name_space (list, len);
- buffer = list->namebuf + list->name_used;
+ INIT_TOKEN_NAME (list, token);
+ token->type = CPP_COMMENT;
+ token->val.name.len = len;
- comment = &list->comments[list->comments_used++];
- comment->type = CPP_COMMENT;
- comment->aux = tok_no;
- comment->val.name.len = len;
- comment->val.name.text = buffer;
+ buffer = list->namebuf + list->name_used;
+ list->name_used += len;
+ /* Copy the comment. */
if (type == '*')
{
*buffer++ = '/';
*buffer++ = type;
*buffer++ = type;
}
-
memcpy (buffer, from, len - COMMENT_START_LEN);
- list->name_used += len;
}
/*
* The tokenizer's main loop. Returns a token list, representing a
- * logical line in the input file, terminated with a CPP_VSPACE
- * token. On EOF, a token list containing the single CPP_EOF token
- * is returned.
+ * logical line in the input file. On EOF after some tokens have
+ * been processed, we return immediately. Then in next call, or if
+ * EOF occurred at the beginning of a logical line, a single CPP_EOF
+ * token is placed in the list.
*
* Implementation relies almost entirely on lookback, rather than
* looking forwards. This means that tokenization requires just
* even when enabled.
*/
+#define IS_DIRECTIVE() (list->tokens[first_token].type == CPP_HASH)
+
void
_cpp_lex_line (pfile, list)
cpp_reader *pfile;
cpp_buffer *buffer = pfile->buffer;
register const unsigned char *cur = buffer->cur;
unsigned char flags = 0;
+ unsigned int first_token = list->tokens_used;
+ list->line = CPP_BUF_LINE (buffer);
+ pfile->col_adjust = 0;
expanded:
token_limit = list->tokens + list->tokens_cap;
cur_token = list->tokens + list->tokens_used;
{
unsigned char c = *cur++;
- /* Optimize whitespace skipping, in particular the case of a
- single whitespace character, as every other token is probably
- whitespace. (' ' '\t' '\v' '\f' '\0'). */
+ /* Optimize whitespace skipping, as most tokens are probably
+ separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
+
if (is_hspace ((unsigned int) c))
{
- if (c == '\0' || (cur < buffer->rlimit && is_hspace (*cur)))
- {
- buffer->cur = cur - (c == '\0'); /* Get the null warning. */
- skip_whitespace (pfile, IS_DIRECTIVE (list));
- cur = buffer->cur;
- }
+ /* Step back to get the null warning and tab correction. */
+ buffer->cur = cur - 1;
+ skip_whitespace (pfile, IS_DIRECTIVE ());
+ cur = buffer->cur;
+
flags = PREV_WHITESPACE;
if (cur == buffer->rlimit)
break;
}
/* Initialize current token. Its type is set in the switch. */
- cur_token->col = COLUMN (cur);
+ cur_token->col = CPP_BUF_COLUMN (buffer, cur);
cur_token->flags = flags;
flags = 0;
{
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
- cur--; /* Backup character. */
- if (PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ())
- {
- /* Prepend an immediately previous CPP_DOT token. */
- cur_token--;
- if (list->name_cap == list->name_used)
- auto_expand_name_space (list);
+ {
+ int prev_dot;
- cur_token->val.name.len = 1;
- cur_token->val.name.text = list->namebuf + list->name_used;
- list->namebuf[list->name_used++] = '.';
- }
- else
- INIT_NAME (list, cur_token->val.name);
+ cur--; /* Backup character. */
+ prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
+ if (prev_dot)
+ cur_token--;
+ INIT_TOKEN_NAME (list, cur_token);
+ /* Prepend an immediately previous CPP_DOT token. */
+ if (prev_dot)
+ {
+ if (list->name_cap == list->name_used)
+ auto_expand_name_space (list);
- continue_number:
- buffer->cur = cur;
- parse_number (pfile, list, &cur_token->val.name);
- cur = buffer->cur;
+ cur_token->val.name.len = 1;
+ list->namebuf[list->name_used++] = '.';
+ }
- PUSH_TOKEN (CPP_NUMBER); /* Number not yet interpreted. */
+ continue_number:
+ cur_token->type = CPP_NUMBER; /* Before parse_number. */
+ buffer->cur = cur;
+ parse_number (pfile, list, &cur_token->val.name);
+ cur = buffer->cur;
+ cur_token++;
+ }
break;
letter:
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
cur--; /* Backup character. */
- INIT_NAME (list, cur_token->val.name);
+ INIT_TOKEN_NAME (list, cur_token);
cur_token->type = CPP_NAME; /* Identifier, macro etc. */
continue_name:
cur = buffer->cur;
/* Find handler for newly created / extended directive. */
- if (IS_DIRECTIVE (list) && cur_token == &list->tokens[1])
+ if (IS_DIRECTIVE () && cur_token == &list->tokens[first_token + 1])
_cpp_check_directive (list, cur_token);
cur_token++;
break;
}
do_parse_string:
- /* Here c is one of ' " > or ). */
- INIT_NAME (list, cur_token->val.name);
+ /* Here c is one of ' " or >. */
+ INIT_TOKEN_NAME (list, cur_token);
buffer->cur = cur;
- parse_string2 (pfile, list, &cur_token->val.name, c);
+ parse_string2 (pfile, list, &cur_token->val.name, c,
+ c == '"' && !IS_DIRECTIVE());
cur = buffer->cur;
cur_token++;
break;
cpp_error_with_line (pfile, list->line,
cur_token[-1].col,
"multi-line comment");
- if (!CPP_OPTION (pfile, discard_comments))
- save_comment (list, cur, buffer->cur - cur,
- cur_token - 1 - list->tokens, c);
- cur = buffer->cur;
/* Back-up to first '-' or '/'. */
- cur_token -= 2;
+ cur_token--;
+ if (!CPP_OPTION (pfile, discard_comments)
+ && (!IS_DIRECTIVE() || list->dirno == 0))
+ save_comment (list, cur_token++, cur,
+ buffer->cur - cur, c);
+ cur = buffer->cur;
+
if (!CPP_OPTION (pfile, traditional))
flags = PREV_WHITESPACE;
+ break;
}
}
}
else if (buffer->cur[-2] != '*')
cpp_warning (pfile,
"comment end '*/' split across lines");
- if (!CPP_OPTION (pfile, discard_comments))
- save_comment (list, cur, buffer->cur - cur,
- cur_token - 1 - list->tokens, c);
- cur = buffer->cur;
+ /* Back up to opening '/'. */
cur_token--;
+ if (!CPP_OPTION (pfile, discard_comments)
+ && (!IS_DIRECTIVE() || list->dirno == 0))
+ save_comment (list, cur_token++, cur,
+ buffer->cur - cur, c);
+ cur = buffer->cur;
+
if (!CPP_OPTION (pfile, traditional))
flags = PREV_WHITESPACE;
break;
buffer->cur = cur;
cpp_warning (pfile, "backslash and newline separated by space");
}
- PUSH_TOKEN (CPP_VSPACE);
- goto out;
+ /* Skip vertical space until we have at least one token to
+ return. */
+ if (cur_token != &list->tokens[first_token])
+ goto out;
+ list->line = CPP_BUF_LINE (buffer);
+ break;
case '-':
if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
break;
}
/* Is this the beginning of a header name? */
- if (list->dir_flags & SYNTAX_INCLUDE)
+ if (list->flags & SYNTAX_INCLUDE)
{
c = '>'; /* Terminator. */
cur_token->type = CPP_HEADER_NAME;
cur_token++;
break;
- case '(':
- /* Is this the beginning of an assertion string? */
- if (list->dir_flags & SYNTAX_ASSERT)
- {
- c = ')'; /* Terminator. */
- cur_token->type = CPP_ASSERTION;
- goto do_parse_string;
- }
- PUSH_TOKEN (CPP_OPEN_PAREN);
- break;
-
case '?':
if (cur + 1 < buffer->rlimit && *cur == '?'
&& trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
case '!': PUSH_TOKEN (CPP_NOT); break;
case ',': PUSH_TOKEN (CPP_COMMA); break;
case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
+ case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
case '$':
if (cur_token == token_limit)
{
list->tokens_used = cur_token - list->tokens;
- expand_token_space (list);
+ _cpp_expand_token_space (list, 256);
goto expanded;
}
- cur_token->type = CPP_EOF;
cur_token->flags = flags;
-
- if (cur_token != &list->tokens[0])
+ if (cur_token == &list->tokens[first_token])
{
- /* Next call back will get just a CPP_EOF. */
- buffer->cur = cur;
- cpp_warning (pfile, "no newline at end of file");
- PUSH_TOKEN (CPP_VSPACE);
+ /* FIXME: move this warning to callers who care. */
+ if (cur > buffer->buf && !IS_NEWLINE (cur[-1]))
+ cpp_warning (pfile, "no newline at end of file");
+ cur_token++->type = CPP_EOF;
}
out:
+ list->tokens[first_token].flags |= BOL;
buffer->cur = cur;
-
list->tokens_used = cur_token - list->tokens;
-
- /* FIXME: take this check out and put it in the caller.
- list->directive == 0 indicates an unknown directive (but null
- directive is OK). This is the first time we can be sure the
- directive is invalid, and thus warn about it, because it might
- have been split by escaped newlines. Also, don't complain about
- invalid directives in assembly source, we don't know where the
- comments are, and # may introduce assembler pseudo-ops. */
-
- if (IS_DIRECTIVE (list) && list->dir_handler == 0
- && list->tokens[1].type != CPP_VSPACE
- && !CPP_OPTION (pfile, lang_asm))
- cpp_error_with_line (pfile, list->line, list->tokens[1].col,
- "invalid preprocessing directive");
}
/* Write the spelling of a token TOKEN to BUFFER. The buffer must
static unsigned char *
spell_token (pfile, token, buffer, whitespace)
cpp_reader *pfile; /* Would be nice to be rid of this... */
- cpp_token *token;
+ const cpp_token *token;
unsigned char *buffer;
int whitespace;
{
_cpp_lex_file (pfile)
cpp_reader* pfile;
{
- int recycle;
cpp_toklist* list;
init_trigraph_map ();
list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
+ _cpp_init_toklist (list, DUMMY_TOKEN);
- for (recycle = 0; ;)
+ for (;;)
{
- init_token_list (pfile, list, recycle);
- recycle = 1;
-
_cpp_lex_line (pfile, list);
if (list->tokens[0].type == CPP_EOF)
break;
- if (list->dir_handler)
- {
- if (list->dir_handler (pfile))
- {
- list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
- recycle = 0;
- }
- }
+#if 0
+ if (list->dirno)
+ _cpp_handle_directive (pfile, list);
else
+#endif
_cpp_output_list (pfile, list);
+ _cpp_clear_toklist (list);
}
}
cpp_reader *pfile;
cpp_toklist *list;
{
- cpp_token *token, *comment, *comment_before = 0;
-
- if (list->comments_used > 0)
- {
- comment = &list->comments[0];
- comment_before = &list->tokens[comment->aux];
- }
+ unsigned int i;
- token = &list->tokens[0];
- do
+ for (i = 0; i < list->tokens_used; i++)
{
- /* Output comments if -C. */
- while (token == comment_before)
- {
- /* Make space for the comment, and copy it out. */
- CPP_RESERVE (pfile, TOKEN_LEN (comment));
- pfile->limit = spell_token (pfile, comment, pfile->limit, 0);
-
- /* Stop if no comments left, or no more comments appear
- before the current token. */
- comment++;
- if (comment == list->comments + list->comments_used)
- break;
- comment_before = &list->tokens[comment->aux];
- }
-
- CPP_RESERVE (pfile, TOKEN_LEN (token));
- pfile->limit = spell_token (pfile, token, pfile->limit, 1);
+ CPP_RESERVE (pfile, TOKEN_LEN (&list->tokens[i]));
+ pfile->limit = spell_token (pfile, &list->tokens[i], pfile->limit, 1);
}
- while (token++->type != CPP_VSPACE);
}
#endif