#include "cpplib.h"
#include "cpphash.h"
+#ifdef HAVE_MMAP_FILE
+# include <sys/mman.h>
+#endif
+
#define PEEKBUF(BUFFER, N) \
((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
#define GETBUF(BUFFER) \
#ifdef NEW_LEXER
-static void expand_comment_space PARAMS ((cpp_toklist *));
void init_trigraph_map PARAMS ((void));
static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
unsigned char *));
static void parse_string2 PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
unsigned int, int));
static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
-static void save_comment PARAMS ((cpp_toklist *, const unsigned char *,
- unsigned int, unsigned int, unsigned int));
+static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
+ const unsigned char *,
+ unsigned int, unsigned int));
void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
/* An upper bound on the number of bytes needed to spell a token,
including preceding whitespace. */
-#define TOKEN_LEN(token) (5 + (token_spellings[token->type].type > \
- SPELL_NONE ? token->val.name.len: 0))
+#define TOKEN_LEN(token) (5 + (token_spellings[(token)->type].type > \
+ SPELL_NONE ? (token)->val.name.len: 0))
#endif
}
else if (buf->macro)
{
- HASHNODE *m = buf->macro;
+ cpp_hashnode *m = buf->macro;
m->disabled = 0;
if ((m->type == T_FMACRO && buf->mapped)
/* Allocate name space. */
list->namebuf = (unsigned char *) xmalloc (list->name_cap);
- /* Only create a comment space on demand. */
- list->comments_cap = 0;
- list->comments = 0;
-
_cpp_clear_toklist (list);
}
{
list->tokens_used = 0;
list->name_used = 0;
- list->comments_used = 0;
list->dirno = -1;
list->flags &= LIST_OFFSET; /* clear all but that one */
}
_cpp_free_toklist (list)
cpp_toklist *list;
{
- if (list->comments)
- free (list->comments);
if (list->flags & LIST_OFFSET)
free (list->tokens - 1); /* Backup over dummy token. */
else
copy->tokens_used = n;
copy->name_used = bytes;
copy->name_cap = bytes;
- copy->comments = 0;
- copy->comments_cap = 0;
- copy->comments_used = 0;
copy->flags = 0;
copy->dirno = -1;
if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
list->tokens[i].val.name.text += delta;
}
-
- if (list->comments_cap)
- {
- list->comments = xrealloc (list->comments,
- list->comments_used * sizeof (cpp_token));
- list->comments_cap = list->comments_used;
- }
}
/* Compare two tokens. */
{
U_CHAR *macro = pfile->token_buffer + written;
size_t len = CPP_WRITTEN (pfile) - written;
- HASHNODE *hp = _cpp_lookup (pfile, macro, len);
+ cpp_hashnode *hp = cpp_lookup (pfile, macro, len);
- /* _cpp_lookup never returns null. */
+ /* cpp_lookup never returns null. */
if (hp->type == T_VOID)
return 0;
if (hp->disabled || hp->type == T_IDENTITY)
return lbase;
}
-/* The following table is used by _cpp_read_and_prescan. If we have
+/* The following table is used by _cpp_prescan. If we have
designated initializers, it can be constant data; otherwise, it is
set up at runtime by _cpp_init_input_buffer. */
-#ifndef UCHAR_MAX
-#define UCHAR_MAX 255 /* assume 8-bit bytes */
-#endif
-
#if (GCC_VERSION >= 2007)
#define init_chartab() /* nothing */
#define CHARTAB __extension__ static const U_CHAR chartab[UCHAR_MAX + 1] = {
#define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
#define NONTRI(c) ((c) <= SPECCASE_QUESTION)
-/* Read the entire contents of file DESC into buffer BUF. LEN is how
- much memory to allocate initially; more will be allocated if
- necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
+/* Prescan pass over a file already loaded into BUF. This is
+ translation phases 1 and 2 (C99 5.1.1.2).
+
+ Convert end-of-line markers (\n, \r, \r\n, \n\r) to
canonical form (\n). If enabled, convert and/or warn about
trigraphs. Convert backslash-newline to a one-character escape
(\r) and remove it from "embarrassing" places (i.e. the middle of a
at the end of reload1.c is about 60%. (reload1.c is 329k.)
If your file has more than one kind of end-of-line marker, you
- will get messed-up line numbering.
-
- So that the cases of the switch statement do not have to concern
- themselves with the complications of reading beyond the end of the
- buffer, the buffer is guaranteed to have at least 3 characters in
- it (or however many are left in the file, if less) on entry to the
- switch. This is enough to handle trigraphs and the "\\\n\r" and
- "\\\r\n" cases.
-
- The end of the buffer is marked by a '\\', which, being a special
- character, guarantees we will exit the fast-scan loops and perform
- a refill. */
-
-long
-_cpp_read_and_prescan (pfile, fp, desc, len)
+ will get messed-up line numbering. */
+
+ssize_t
+_cpp_prescan (pfile, fp, len)
cpp_reader *pfile;
cpp_buffer *fp;
- int desc;
- size_t len;
+ ssize_t len;
{
- U_CHAR *buf = (U_CHAR *) xmalloc (len);
- U_CHAR *ip, *op, *line_base;
- U_CHAR *ibase;
+ U_CHAR *buf, *op;
+ const U_CHAR *ibase, *ip, *ilimit;
+ U_CHAR *line_base;
unsigned long line;
unsigned int deferred_newlines;
- size_t offset;
- int count = 0;
- offset = 0;
- deferred_newlines = 0;
- op = buf;
- line_base = buf;
+ /* Allocate an extra byte in case we must add a trailing \n. */
+ buf = (U_CHAR *) xmalloc (len + 1);
+ line_base = op = buf;
+ ip = ibase = fp->buf;
+ ilimit = ibase + len;
line = 1;
- ibase = pfile->input_buffer + 3;
- ip = ibase;
- ip[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */
+ deferred_newlines = 0;
for (;;)
{
- U_CHAR *near_buff_end;
+ const U_CHAR *iq;
- count = read (desc, ibase, pfile->input_buffer_len);
- if (count < 0)
- goto error;
-
- ibase[count] = '\\'; /* Marks end of buffer */
- if (count)
+ /* Deal with \-newline, potentially in the middle of a token. */
+ if (deferred_newlines)
{
- near_buff_end = pfile->input_buffer + count;
- offset += count;
- if (offset > len)
+ if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
{
- size_t delta_op;
- size_t delta_line_base;
- len = offset * 2;
- if (offset > len)
- /* len overflowed.
- This could happen if the file is larger than half the
- maximum address space of the machine. */
- goto too_big;
-
- delta_op = op - buf;
- delta_line_base = line_base - buf;
- buf = (U_CHAR *) xrealloc (buf, len);
- op = buf + delta_op;
- line_base = buf + delta_line_base;
+ /* Previous was not white space. Skip to white
+ space, if we can, before outputting the \r's */
+ iq = ip;
+ while (iq < ilimit
+ && *iq != ' '
+ && *iq != '\t'
+ && *iq != '\n'
+ && NORMAL(*iq))
+ iq++;
+ memcpy (op, ip, iq - ip);
+ op += iq - ip;
+ ip += iq - ip;
+ if (! NORMAL(*ip))
+ goto do_speccase;
}
- }
- else
- {
- if (ip == ibase)
- break;
- /* Allow normal processing of the (at most 2) remaining
- characters. The end-of-buffer marker is still present
- and prevents false matches within the switch. */
- near_buff_end = ibase - 1;
+ while (deferred_newlines)
+ deferred_newlines--, *op++ = '\r';
}
- for (;;)
- {
- unsigned int span;
+ /* Copy as much as we can without special treatment. */
+ iq = ip;
+ while (iq < ilimit && NORMAL (*iq)) iq++;
+ memcpy (op, ip, iq - ip);
+ op += iq - ip;
+ ip += iq - ip;
- /* Deal with \-newline, potentially in the middle of a token. */
- if (deferred_newlines)
+ do_speccase:
+ if (ip >= ilimit)
+ break;
+
+ switch (chartab[*ip++])
+ {
+ case SPECCASE_CR: /* \r */
+ if (ip[-2] != '\n')
{
- if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
- {
- /* Previous was not white space. Skip to white
- space, if we can, before outputting the \r's */
- span = 0;
- while (ip[span] != ' '
- && ip[span] != '\t'
- && ip[span] != '\n'
- && NORMAL(ip[span]))
- span++;
- memcpy (op, ip, span);
- op += span;
- ip += span;
- if (! NORMAL(ip[0]))
- goto do_speccase;
- }
- while (deferred_newlines)
- deferred_newlines--, *op++ = '\r';
+ if (ip < ilimit && *ip == '\n')
+ ip++;
+ *op++ = '\n';
}
+ break;
- /* Copy as much as we can without special treatment. */
- span = 0;
- while (NORMAL (ip[span])) span++;
- memcpy (op, ip, span);
- op += span;
- ip += span;
-
- do_speccase:
- if (ip > near_buff_end) /* Do we have enough chars? */
- break;
- switch (chartab[*ip++])
+ case SPECCASE_BACKSLASH: /* \ */
+ backslash:
+ if (ip < ilimit)
{
- case SPECCASE_CR: /* \r */
- if (ip[-2] != '\n')
- {
- if (*ip == '\n')
- ip++;
- *op++ = '\n';
- }
- break;
-
- case SPECCASE_BACKSLASH: /* \ */
if (*ip == '\n')
{
deferred_newlines++;
ip++;
if (*ip == '\r') ip++;
+ break;
}
else if (*ip == '\r')
{
deferred_newlines++;
ip++;
if (*ip == '\n') ip++;
+ break;
}
- else
- *op++ = '\\';
- break;
+ }
- case SPECCASE_QUESTION: /* ? */
- {
- unsigned int d, t;
+ *op++ = '\\';
+ break;
- *op++ = '?'; /* Normal non-trigraph case */
- if (ip[0] != '?')
- break;
-
- d = ip[1];
- t = chartab[d];
- if (NONTRI (t))
- break;
+ case SPECCASE_QUESTION: /* ? */
+ {
+ unsigned int d, t;
- if (CPP_OPTION (pfile, warn_trigraphs))
- {
- unsigned long col;
- line_base = find_position (line_base, op, &line);
- col = op - line_base + 1;
- if (CPP_OPTION (pfile, trigraphs))
- cpp_warning_with_line (pfile, line, col,
- "trigraph ??%c converted to %c", d, t);
- else
- cpp_warning_with_line (pfile, line, col,
- "trigraph ??%c ignored", d);
- }
+ *op++ = '?'; /* Normal non-trigraph case */
+ if (ip > ilimit - 2 || ip[0] != '?')
+ break;
+
+ d = ip[1];
+ t = chartab[d];
+ if (NONTRI (t))
+ break;
- ip += 2;
+ if (CPP_OPTION (pfile, warn_trigraphs))
+ {
+ unsigned long col;
+ line_base = find_position (line_base, op, &line);
+ col = op - line_base + 1;
if (CPP_OPTION (pfile, trigraphs))
- {
- op[-1] = t; /* Overwrite '?' */
- if (t == '\\')
- {
- op--;
- *--ip = '\\';
- goto do_speccase; /* May need buffer refill */
- }
- }
+ cpp_warning_with_line (pfile, line, col,
+ "trigraph ??%c converted to %c", d, t);
else
+ cpp_warning_with_line (pfile, line, col,
+ "trigraph ??%c ignored", d);
+ }
+
+ ip += 2;
+ if (CPP_OPTION (pfile, trigraphs))
+ {
+ op[-1] = t; /* Overwrite '?' */
+ if (t == '\\')
{
- *op++ = '?';
- *op++ = d;
+ op--;
+ goto backslash;
}
}
- break;
- }
+ else
+ {
+ *op++ = '?';
+ *op++ = d;
+ }
+ }
+ break;
}
- /* Copy previous char plus unprocessed (at most 2) chars
- to beginning of buffer, refill it with another
- read(), and continue processing */
- memmove (ip - count - 1, ip - 1, 4 - (ip - near_buff_end));
- ip -= count;
}
- if (offset == 0)
- return 0;
+#ifdef HAVE_MMAP_FILE
+ if (fp->mapped)
+ munmap ((caddr_t) fp->buf, len);
+ else
+#endif
+ free ((PTR) fp->buf);
if (op[-1] != '\n')
{
line_base = find_position (line_base, op, &line);
col = op - line_base + 1;
cpp_warning_with_line (pfile, line, col, "no newline at end of file");
- if (offset + 1 > len)
- {
- len += 1;
- if (offset + 1 > len)
- goto too_big;
- buf = (U_CHAR *) xrealloc (buf, len);
- op = buf + offset;
- }
*op++ = '\n';
}
- fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
+ fp->buf = buf;
return op - buf;
-
- too_big:
- cpp_notice (pfile, "%s is too large (>%lu bytes)", fp->ihash->name,
- (unsigned long)offset);
- free (buf);
- return -1;
-
- error:
- cpp_error_from_errno (pfile, fp->ihash->name);
- free (buf);
- return -1;
}
/* Allocate pfile->input_buffer, and initialize chartab[]
U":>", U"<%", U"%>"};
static unsigned char trigraph_map[256];
-static void
-expand_comment_space (list)
- cpp_toklist *list;
-{
- if (list->comments_cap == 0)
- {
- list->comments_cap = 10;
- list->comments = (cpp_token *)
- xmalloc (list->comments_cap * sizeof (cpp_token));
- }
- else
- {
- list->comments_cap *= 2;
- list->comments = (cpp_token *)
- xrealloc (list->comments, list->comments_cap);
- }
-}
-
void
init_trigraph_map ()
{
#define COMMENT_START_LEN 2
static void
-save_comment (list, from, len, tok_no, type)
+save_comment (list, token, from, len, type)
cpp_toklist *list;
+ cpp_token *token;
const unsigned char *from;
unsigned int len;
- unsigned int tok_no;
unsigned int type;
{
- cpp_token *comment;
unsigned char *buffer;
len += COMMENT_START_LEN;
- if (list->comments_used == list->comments_cap)
- expand_comment_space (list);
-
if (list->name_used + len > list->name_cap)
expand_name_space (list, len);
- comment = &list->comments[list->comments_used++];
- INIT_TOKEN_NAME (list, comment);
- comment->type = CPP_COMMENT;
- comment->aux = tok_no;
- comment->val.name.len = len;
+ INIT_TOKEN_NAME (list, token);
+ token->type = CPP_COMMENT;
+ token->val.name.len = len;
buffer = list->namebuf + list->name_used;
list->name_used += len;
/*
* The tokenizer's main loop. Returns a token list, representing a
- * logical line in the input file, terminated with a CPP_VSPACE
- * token. On EOF, a token list containing the single CPP_EOF token
- * is returned.
+ * logical line in the input file. On EOF after some tokens have
+ * been processed, we return immediately. Then in next call, or if
+ * EOF occurred at the beginning of a logical line, a single CPP_EOF
+ * token is placed in the list.
*
* Implementation relies almost entirely on lookback, rather than
* looking forwards. This means that tokenization requires just
unsigned char flags = 0;
unsigned int first_token = list->tokens_used;
+ list->line = CPP_BUF_LINE (buffer);
pfile->col_adjust = 0;
expanded:
token_limit = list->tokens + list->tokens_cap;
cpp_error_with_line (pfile, list->line,
cur_token[-1].col,
"multi-line comment");
- if (!CPP_OPTION (pfile, discard_comments))
- save_comment (list, cur, buffer->cur - cur,
- cur_token - 1 - list->tokens, c);
- cur = buffer->cur;
/* Back-up to first '-' or '/'. */
- cur_token -= 2;
+ cur_token--;
+ if (!CPP_OPTION (pfile, discard_comments)
+ && (!IS_DIRECTIVE() || list->dirno == 0))
+ save_comment (list, cur_token++, cur,
+ buffer->cur - cur, c);
+ cur = buffer->cur;
+
if (!CPP_OPTION (pfile, traditional))
flags = PREV_WHITESPACE;
+ break;
}
}
}
else if (buffer->cur[-2] != '*')
cpp_warning (pfile,
"comment end '*/' split across lines");
- if (!CPP_OPTION (pfile, discard_comments))
- save_comment (list, cur, buffer->cur - cur,
- cur_token - 1 - list->tokens, c);
- cur = buffer->cur;
+ /* Back up to opening '/'. */
cur_token--;
+ if (!CPP_OPTION (pfile, discard_comments)
+ && (!IS_DIRECTIVE() || list->dirno == 0))
+ save_comment (list, cur_token++, cur,
+ buffer->cur - cur, c);
+ cur = buffer->cur;
+
if (!CPP_OPTION (pfile, traditional))
flags = PREV_WHITESPACE;
break;
buffer->cur = cur;
cpp_warning (pfile, "backslash and newline separated by space");
}
- PUSH_TOKEN (CPP_VSPACE);
- goto out;
+ /* Skip vertical space until we have at least one token to
+ return. */
+ if (cur_token != &list->tokens[first_token])
+ goto out;
+ list->line = CPP_BUF_LINE (buffer);
+ break;
case '-':
if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
goto expanded;
}
- cur_token->type = CPP_EOF;
cur_token->flags = flags;
-
- if (cur_token != &list->tokens[first_token])
+ if (cur_token == &list->tokens[first_token])
{
- /* Next call back will get just a CPP_EOF. */
- buffer->cur = cur;
- cpp_warning (pfile, "no newline at end of file");
- PUSH_TOKEN (CPP_VSPACE);
+ /* FIXME: move this warning to callers who care. */
+ if (cur > buffer->buf && !IS_NEWLINE (cur[-1]))
+ cpp_warning (pfile, "no newline at end of file");
+ cur_token++->type = CPP_EOF;
}
out:
+ list->tokens[first_token].flags |= BOL;
buffer->cur = cur;
-
list->tokens_used = cur_token - list->tokens;
-
- /* FIXME: take this check out and put it in the caller.
- list->directive == 0 indicates an unknown directive (but null
- directive is OK). This is the first time we can be sure the
- directive is invalid, and thus warn about it, because it might
- have been split by escaped newlines. Also, don't complain about
- invalid directives in assembly source, we don't know where the
- comments are, and # may introduce assembler pseudo-ops. */
-
- if (IS_DIRECTIVE (list) && list->dirno == -1
- && list->tokens[1].type != CPP_VSPACE
- && !CPP_OPTION (pfile, lang_asm))
- cpp_error_with_line (pfile, list->line, list->tokens[1].col,
- "invalid preprocessing directive");
}
/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cpp_reader *pfile;
cpp_toklist *list;
{
- cpp_token *token, *comment, *comment_before = 0;
-
- if (list->comments_used > 0)
- {
- comment = &list->comments[0];
- comment_before = &list->tokens[comment->aux];
- }
+ unsigned int i;
- token = &list->tokens[0];
- do
+ for (i = 0; i < list->tokens_used; i++)
{
- /* Output comments if -C. */
- while (token == comment_before)
- {
- /* Make space for the comment, and copy it out. */
- CPP_RESERVE (pfile, TOKEN_LEN (comment));
- pfile->limit = spell_token (pfile, comment, pfile->limit, 0);
-
- /* Stop if no comments left, or no more comments appear
- before the current token. */
- comment++;
- if (comment == list->comments + list->comments_used)
- break;
- comment_before = &list->tokens[comment->aux];
- }
-
- CPP_RESERVE (pfile, TOKEN_LEN (token));
- pfile->limit = spell_token (pfile, token, pfile->limit, 1);
+ CPP_RESERVE (pfile, TOKEN_LEN (&list->tokens[i]));
+ pfile->limit = spell_token (pfile, &list->tokens[i], pfile->limit, 1);
}
- while (token++->type != CPP_VSPACE);
}
#endif