1 /* CPP Library - traditional lexical analysis and macro expansion.
2 Copyright (C) 2002 Free Software Foundation, Inc.
3 Contributed by Neil Booth, May 2002
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 2, or (at your option) any
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
24 /* The replacement text of a function-like macro is stored as a
25 contiguous sequence of aligned blocks, each representing the text
26 between subsequent parameters in that text.
28 Each block comprises the length of text contained therein, the
29 one-based index of the argument that immediately follows that text,
30 and the text itself. The final block in the macro expansion is
31 easily recognizable as it has an argument index of zero. */
35 unsigned int text_len;
36 unsigned short arg_index;
40 #define BLOCK_HEADER_LEN offsetof (struct block, text)
41 #define BLOCK_LEN(TEXT_LEN) CPP_ALIGN (BLOCK_HEADER_LEN + TEXT_LEN)
43 /* Structure holding information about a function-like macro
47 /* Memory buffer holding the trad_arg array. */
50 /* An array of size the number of macro parameters + 1, containing
51 the offsets of the start of each macro argument in the output
52 buffer. The argument continues until the character before the
53 start of the next one. */
56 /* The hashnode of the macro. */
59 /* The offset of the macro name in the output buffer. */
62 /* Zero-based index of argument being currently lexed. */
66 /* Lexing TODO: Maybe handle space in escaped newlines. Stop cpplex.c
67 from recognizing comments and directives during its lexing pass. */
69 static const uchar *handle_newline PARAMS ((cpp_reader *, const uchar *));
70 static const uchar *skip_escaped_newlines PARAMS ((cpp_reader *,
72 static const uchar *skip_whitespace PARAMS ((cpp_reader *, const uchar *,
74 static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *));
75 static const uchar *copy_comment PARAMS ((cpp_reader *, const uchar *, int));
76 static void scan_out_logical_line PARAMS ((cpp_reader *pfile, cpp_macro *));
77 static void check_output_buffer PARAMS ((cpp_reader *, size_t));
78 static void push_replacement_text PARAMS ((cpp_reader *, cpp_hashnode *));
79 static bool scan_parameters PARAMS ((cpp_reader *, cpp_macro *));
80 static bool recursive_macro PARAMS ((cpp_reader *, cpp_hashnode *));
81 static void save_replacement_text PARAMS ((cpp_reader *, cpp_macro *,
83 static void maybe_start_funlike PARAMS ((cpp_reader *, cpp_hashnode *,
84 const uchar *, struct fun_macro *));
85 static void save_argument PARAMS ((struct fun_macro *, size_t));
86 static void replace_args_and_push PARAMS ((cpp_reader *, struct fun_macro *));
87 static size_t canonicalize_text PARAMS ((uchar *, const uchar *, size_t,
90 /* Ensures we have N bytes' space in the output buffer, and
91 reallocates it if not. */
93 check_output_buffer (pfile, n)
97 /* We might need two bytes to terminate an unterminated comment, and
98 one more to terminate the line with a NUL. */
101 if (n > (size_t) (pfile->out.limit - pfile->out.cur))
103 size_t size = pfile->out.cur - pfile->out.base;
104 size_t new_size = (size + n) * 3 / 2;
107 = (uchar *) xrealloc (pfile->out.base, new_size);
108 pfile->out.limit = pfile->out.base + new_size;
109 pfile->out.cur = pfile->out.base + size;
113 /* To be called whenever a newline character is encountered in the
114 input file, at CUR. Handles DOS, Mac and Unix ends of line, and
115 increments pfile->line.
117 Returns a pointer the character after the newline sequence. */
119 handle_newline (pfile, cur)
124 if (cur[0] + cur[1] == '\r' + '\n')
129 /* CUR points to any character in the buffer, not necessarily a
130 backslash. Advances CUR until all escaped newlines are skipped,
131 and returns the new position.
133 Warns if a file buffer ends in an escaped newline. */
135 skip_escaped_newlines (pfile, cur)
139 const uchar *orig_cur = cur;
141 while (*cur == '\\' && is_vspace (cur[1]))
142 cur = handle_newline (pfile, cur + 1);
144 if (cur != orig_cur && cur == RLIMIT (pfile->context) && pfile->buffer->inc)
145 cpp_error (pfile, DL_PEDWARN, "backslash-newline at end of file");
150 /* CUR points to the asterisk introducing a comment in the input
151 buffer. IN_DEFINE is true if we are in the replacement text
154 The asterisk and following comment is copied to the buffer pointed
155 to by pfile->out.cur, which must be of sufficient size.
156 Unterminated comments are diagnosed, and correctly terminated in
157 the output. pfile->out.cur is updated depending upon IN_DEFINE,
158 -C, -CC and pfile->state.in_directive.
160 Returns a pointer to the first character after the comment in the
163 copy_comment (pfile, cur, in_define)
168 unsigned int from_line = pfile->line;
169 const uchar *limit = RLIMIT (pfile->context);
170 uchar *out = pfile->out.cur;
174 unsigned int c = *cur++;
179 /* An immediate slash does not terminate the comment. */
180 if (out[-2] == '*' && out - 2 > pfile->out.cur)
183 if (*cur == '*' && cur[1] != '/'
184 && CPP_OPTION (pfile, warn_comments))
185 cpp_error_with_line (pfile, DL_WARNING, pfile->line, 0,
186 "\"/*\" within comment");
188 else if (is_vspace (c))
190 cur = handle_newline (pfile, cur - 1);
191 /* Canonicalize newline sequences and skip escaped ones. */
200 cpp_error_with_line (pfile, DL_ERROR, from_line, 0, "unterminated comment");
205 /* Comments in directives become spaces so that tokens are properly
206 separated when the ISO preprocessor re-lexes the line. The
207 exception is #define. */
208 if (pfile->state.in_directive)
212 if (CPP_OPTION (pfile, discard_comments_in_macro_exp))
215 pfile->out.cur = out;
218 pfile->out.cur[-1] = ' ';
220 else if (CPP_OPTION (pfile, discard_comments))
223 pfile->out.cur = out;
228 /* CUR points to any character in the input buffer. Skips over all
229 contiguous horizontal white space and NULs, including comments if
230 SKIP_COMMENTS, until reaching the first non-horizontal-whitespace
231 character or the end of the current context. Escaped newlines are
234 The whitespace is copied verbatim to the output buffer, except that
235 comments are handled as described in copy_comment().
236 pfile->out.cur is updated.
238 Returns a pointer to the first character after the whitespace in
241 skip_whitespace (pfile, cur, skip_comments)
246 uchar *out = pfile->out.cur;
250 unsigned int c = *cur++;
253 if (is_nvspace (c) && c)
256 if (!c && cur != RLIMIT (pfile->context))
259 if (*cur == '/' && skip_comments)
261 const uchar *tmp = skip_escaped_newlines (pfile, cur);
264 pfile->out.cur = out;
265 cur = copy_comment (pfile, tmp, false /* in_define */);
266 out = pfile->out.cur;
272 if (c == '\\' && is_vspace (*cur))
274 cur = skip_escaped_newlines (pfile, cur);
281 pfile->out.cur = out;
285 /* Lexes and outputs an identifier starting at CUR, which is assumed
286 to point to a valid first character of an identifier. Returns
287 the hashnode, and updates out.cur. */
288 static cpp_hashnode *
289 lex_identifier (pfile, cur)
294 uchar *out = pfile->out.cur;
295 cpp_hashnode *result;
301 while (is_numchar (*cur));
302 cur = skip_escaped_newlines (pfile, cur);
304 while (is_numchar (*cur));
306 CUR (pfile->context) = cur;
307 len = out - pfile->out.cur;
308 result = (cpp_hashnode *) ht_lookup (pfile->hash_table, pfile->out.cur,
310 pfile->out.cur = out;
314 /* Overlays the true file buffer temporarily with text of length LEN
315 starting at START. The true buffer is restored upon calling
318 _cpp_overlay_buffer (pfile, start, len)
323 cpp_buffer *buffer = pfile->buffer;
325 buffer->saved_cur = buffer->cur;
326 buffer->saved_rlimit = buffer->rlimit;
329 buffer->rlimit = start + len;
331 pfile->saved_line = pfile->line;
334 /* Restores a buffer overlaid by _cpp_overlay_buffer(). */
336 _cpp_remove_overlay (pfile)
339 cpp_buffer *buffer = pfile->buffer;
341 buffer->cur = buffer->saved_cur;
342 buffer->rlimit = buffer->saved_rlimit;
344 pfile->line = pfile->saved_line;
347 /* Reads a logical line into the output buffer. Returns TRUE if there
348 is more text left in the buffer. */
350 _cpp_read_logical_line_trad (pfile)
353 cpp_buffer *buffer = pfile->buffer;
357 if (buffer->cur == buffer->rlimit)
361 /* Don't pop the last buffer. */
364 stop = buffer->return_at_eof;
365 _cpp_pop_buffer (pfile);
372 CUR (pfile->context) = buffer->cur;
373 RLIMIT (pfile->context) = buffer->rlimit;
374 scan_out_logical_line (pfile, NULL);
375 buffer->cur = CUR (pfile->context);
377 while (pfile->state.skipping);
382 /* Set up state for finding the opening '(' of a function-like
385 maybe_start_funlike (pfile, node, start, macro)
389 struct fun_macro *macro;
391 unsigned int n = node->value.macro->paramc + 1;
394 _cpp_release_buff (pfile, macro->buff);
395 macro->buff = _cpp_get_buff (pfile, n * sizeof (size_t));
396 macro->args = (size_t *) BUFF_FRONT (macro->buff);
398 macro->offset = start - pfile->out.base;
401 pfile->state.parsing_args = 1;
404 /* Save the OFFSET of the start of the next argument to MACRO. */
406 save_argument (macro, offset)
407 struct fun_macro *macro;
411 if (macro->argc <= macro->node->value.macro->paramc)
412 macro->args[macro->argc] = offset;
415 /* Copies the next logical line in the current buffer to the output
416 buffer. The output is guaranteed to terminate with a NUL
419 If MACRO is non-NULL, then we are scanning the replacement list of
420 MACRO, and we call save_replacement_text() every time we meet an
423 scan_out_logical_line (pfile, macro)
427 cpp_context *context;
429 unsigned int c, paren_depth = 0, quote = 0;
431 struct fun_macro fmacro;
436 pfile->out.cur = pfile->out.base;
437 pfile->out.first_line = pfile->line;
439 context = pfile->context;
441 check_output_buffer (pfile, RLIMIT (context) - cur);
442 out = pfile->out.cur;
449 /* There are only a few entities we need to catch: comments,
450 identifiers, newlines, escaped newlines, # and '\0'. */
454 if (cur - 1 != RLIMIT (context))
457 /* If this is a macro's expansion, pop it. */
460 pfile->out.cur = out - 1;
461 _cpp_pop_context (pfile);
465 /* Premature end of file. Fake a new line. */
467 if (!pfile->buffer->from_stage3)
468 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
469 if (pfile->state.parsing_args == 2)
470 cpp_error (pfile, DL_ERROR,
471 "unterminated argument list invoking macro \"%s\"",
472 NODE_NAME (fmacro.node));
476 case '\r': case '\n':
477 cur = handle_newline (pfile, cur - 1);
478 if (pfile->state.parsing_args == 2)
480 /* Newlines in arguments become a space. */
495 if (is_vspace (*cur))
496 out--, cur = skip_escaped_newlines (pfile, cur - 1);
499 /* Skip escaped quotes here, it's easier than above, but
500 take care to first skip escaped newlines. */
501 cur = skip_escaped_newlines (pfile, cur);
502 if (*cur == '\\' || *cur == '"' || *cur == '\'')
508 /* Traditional CPP does not recognize comments within
512 cur = skip_escaped_newlines (pfile, cur);
515 pfile->out.cur = out;
516 cur = copy_comment (pfile, cur, macro != 0);
517 out = pfile->out.cur;
523 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
524 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
525 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
526 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
528 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
529 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
530 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
531 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
533 if (quote == 0 || macro)
537 pfile->out.cur = --out;
538 node = lex_identifier (pfile, cur - 1);
540 if (node->type == NT_MACRO
541 && !pfile->state.skipping
542 && pfile->state.parsing_args != 2
543 && !pfile->state.prevent_expansion
544 && !recursive_macro (pfile, node))
546 if (node->value.macro->fun_like)
547 maybe_start_funlike (pfile, node, out, &fmacro);
550 /* Remove the object-like macro's name from the
551 output, and push its replacement text. */
552 pfile->out.cur = out;
553 push_replacement_text (pfile, node);
557 else if (macro && node->arg_index)
559 /* Found a parameter in the replacement text of a
560 #define. Remove its name from the output. */
561 pfile->out.cur = out;
562 save_replacement_text (pfile, macro, node->arg_index);
565 out = pfile->out.cur;
574 if (pfile->state.parsing_args == 1)
576 const uchar *p = pfile->out.base + fmacro.offset;
578 /* Invoke a prior function-like macro if there is only
579 white space in-between. */
580 while (is_numchar (*p))
582 while (is_space (*p))
587 pfile->state.parsing_args = 2;
589 out = pfile->out.base + fmacro.offset;
590 fmacro.args[0] = fmacro.offset;
593 pfile->state.parsing_args = 0;
599 if (quote == 0 && pfile->state.parsing_args == 2 && paren_depth == 1)
600 save_argument (&fmacro, out - pfile->out.base);
607 if (pfile->state.parsing_args == 2 && paren_depth == 0)
609 cpp_macro *m = fmacro.node->value.macro;
611 pfile->state.parsing_args = 0;
612 save_argument (&fmacro, out - pfile->out.base);
614 /* A single zero-length argument is no argument. */
617 && out == pfile->out.base + 1)
620 if (_cpp_arguments_ok (pfile, m, fmacro.node, fmacro.argc))
622 /* Remove the macro's invocation from the
623 output, and push its replacement text. */
624 pfile->out.cur = (pfile->out.base
627 replace_args_and_push (pfile, &fmacro);
635 /* At start of a line it's a directive. */
636 if (out - 1 == pfile->out.base && !pfile->state.in_directive)
638 /* This is a kludge. We want to have the ISO
639 preprocessor lex the next token. */
640 pfile->buffer->cur = cur;
641 if (_cpp_handle_directive (pfile, false /* indented */))
642 goto start_logical_line;
654 pfile->out.cur = out - 1;
656 _cpp_release_buff (pfile, fmacro.buff);
659 /* Push a context holding the replacement text of the macro NODE on
660 the context stack. NODE is either object-like, or a function-like
661 macro with no arguments. */
663 push_replacement_text (pfile, node)
667 cpp_macro *macro = node->value.macro;
669 _cpp_push_text_context (pfile, node, macro->exp.text, macro->count);
672 /* Returns TRUE if traditional macro recursion is detected. */
674 recursive_macro (pfile, node)
678 bool recursing = node->flags & NODE_DISABLED;
680 /* Object-like macros that are already expanding are necessarily
683 However, it is possible to have traditional function-like macros
684 that are not infinitely recursive but recurse to any given depth.
685 Further, it is easy to construct examples that get ever longer
686 until the point they stop recursing. So there is no easy way to
687 detect true recursion; instead we assume any expansion more than
688 20 deep since the first invocation of this macro must be
690 if (recursing && node->value.macro->fun_like)
693 cpp_context *context = pfile->context;
698 if (context->macro == node && depth > 20)
700 context = context->prev;
703 recursing = context != NULL;
707 cpp_error (pfile, DL_ERROR,
708 "detected recursion whilst expanding macro \"%s\"",
714 /* Push a context holding the replacement text of the macro NODE on
715 the context stack. NODE is either object-like, or a function-like
716 macro with no arguments. */
718 replace_args_and_push (pfile, fmacro)
720 struct fun_macro *fmacro;
722 cpp_macro *macro = fmacro->node->value.macro;
724 if (macro->paramc == 0)
725 push_replacement_text (pfile, fmacro->node);
733 /* Calculate the length of the argument-replaced text. */
734 for (exp = macro->exp.text;;)
736 struct block *b = (struct block *) exp;
739 if (b->arg_index == 0)
741 len += (fmacro->args[b->arg_index]
742 - fmacro->args[b->arg_index - 1] - 1);
743 exp += BLOCK_LEN (b->text_len);
746 /* Allocate room for the expansion plus NUL. */
747 buff = _cpp_get_buff (pfile, len + 1);
749 /* Copy the expansion and replace arguments. */
750 p = BUFF_FRONT (buff);
751 for (exp = macro->exp.text;;)
753 struct block *b = (struct block *) exp;
756 memcpy (p, b->text, b->text_len);
758 if (b->arg_index == 0)
760 arglen = (fmacro->args[b->arg_index]
761 - fmacro->args[b->arg_index - 1] - 1);
762 memcpy (p, pfile->out.base + fmacro->args[b->arg_index - 1],
765 exp += BLOCK_LEN (b->text_len);
770 _cpp_push_text_context (pfile, fmacro->node, BUFF_FRONT (buff), len);
772 /* So we free buffer allocation when macro is left. */
773 pfile->context->buff = buff;
777 /* Read and record the parameters, if any, of a function-like macro
778 definition. Destroys pfile->out.cur.
780 Returns true on success, false on failure (syntax error or a
781 duplicate parameter). On success, CUR (pfile->context) is just
782 past the closing parenthesis. */
784 scan_parameters (pfile, macro)
788 const uchar *cur = CUR (pfile->context) + 1;
793 cur = skip_whitespace (pfile, cur, true /* skip_comments */);
795 if (is_idstart (*cur))
798 if (_cpp_save_parameter (pfile, macro, lex_identifier (pfile, cur)))
800 cur = skip_whitespace (pfile, CUR (pfile->context),
801 true /* skip_comments */);
811 ok = (*cur == ')' && macro->paramc == 0);
815 CUR (pfile->context) = cur + (*cur == ')');
820 /* Save the text from pfile->out.base to pfile->out.cur as
821 the replacement text for the current macro, followed by argument
822 ARG_INDEX, with zero indicating the end of the replacement
825 save_replacement_text (pfile, macro, arg_index)
828 unsigned int arg_index;
830 size_t len = pfile->out.cur - pfile->out.base;
833 if (macro->paramc == 0)
835 /* Object-like and function-like macros without parameters
836 simply store their NUL-terminated replacement text. */
837 exp = _cpp_unaligned_alloc (pfile, len + 1);
838 memcpy (exp, pfile->out.base, len);
840 macro->exp.text = exp;
845 /* Store the text's length (unsigned int), the argument index
846 (unsigned short, base 1) and then the text. */
847 size_t blen = BLOCK_LEN (len);
850 if (macro->count + blen > BUFF_ROOM (pfile->a_buff))
851 _cpp_extend_buff (pfile, &pfile->a_buff, macro->count + blen);
853 exp = BUFF_FRONT (pfile->a_buff);
854 block = (struct block *) (exp + macro->count);
855 macro->exp.text = exp;
857 /* Write out the block information. */
858 block->text_len = len;
859 block->arg_index = arg_index;
860 memcpy (block->text, pfile->out.base, len);
862 /* Lex the rest into the start of the output buffer. */
863 pfile->out.cur = pfile->out.base;
865 macro->count += blen;
867 /* If we've finished, commit the memory. */
869 BUFF_FRONT (pfile->a_buff) += macro->count;
873 /* Analyze and save the replacement text of a macro. Returns true on
876 _cpp_create_trad_definition (pfile, macro)
883 CUR (pfile->context) = pfile->buffer->cur;
885 /* Is this a function-like macro? */
886 if (* CUR (pfile->context) == '(')
888 /* Setting macro to NULL indicates an error occurred, and
889 prevents unnecessary work in scan_out_logical_line. */
890 if (!scan_parameters (pfile, macro))
894 /* Success. Commit the parameter array. */
895 macro->params = (cpp_hashnode **) BUFF_FRONT (pfile->a_buff);
896 BUFF_FRONT (pfile->a_buff) = (uchar *) ¯o->params[macro->paramc];
901 /* Skip leading whitespace in the replacement text. */
903 = skip_whitespace (pfile, CUR (pfile->context),
904 CPP_OPTION (pfile, discard_comments_in_macro_exp));
906 pfile->state.prevent_expansion++;
907 scan_out_logical_line (pfile, macro);
908 pfile->state.prevent_expansion--;
913 /* Skip trailing white space. */
914 cur = pfile->out.base;
915 limit = pfile->out.cur;
916 while (limit > cur && is_space (limit[-1]))
918 pfile->out.cur = limit;
919 save_replacement_text (pfile, macro, 0);
924 /* Copy SRC of length LEN to DEST, but convert all contiguous
925 whitespace to a single space, provided it is not in quotes. The
926 quote currently in effect is pointed to by PQUOTE, and is updated
927 by the function. Returns the number of bytes copied. */
929 canonicalize_text (dest, src, len, pquote)
935 uchar *orig_dest = dest;
936 uchar quote = *pquote;
940 if (is_space (*src) && !quote)
944 while (len && is_space (*src));
949 if (*src == '\'' || *src == '"')
953 else if (quote == *src)
956 *dest++ = *src++, len--;
961 return dest - orig_dest;
964 /* Returns true if MACRO1 and MACRO2 have expansions different other
965 than in the form of their whitespace. */
967 _cpp_expansions_different_trad (macro1, macro2)
968 const cpp_macro *macro1, *macro2;
970 uchar *p1 = xmalloc (macro1->count + macro2->count);
971 uchar *p2 = p1 + macro1->count;
972 uchar quote1 = 0, quote2;
976 if (macro1->paramc > 0)
978 const uchar *exp1 = macro1->exp.text, *exp2 = macro2->exp.text;
983 struct block *b1 = (struct block *) exp1;
984 struct block *b2 = (struct block *) exp2;
986 if (b1->arg_index != b2->arg_index)
989 len1 = canonicalize_text (p1, b1->text, b1->text_len, "e1);
990 len2 = canonicalize_text (p2, b2->text, b2->text_len, "e2);
991 if (len1 != len2 || memcmp (p1, p2, len1))
993 if (b1->arg_index == 0)
998 exp1 += BLOCK_LEN (b1->text_len);
999 exp2 += BLOCK_LEN (b2->text_len);
1004 len1 = canonicalize_text (p1, macro1->exp.text, macro1->count, "e1);
1005 len2 = canonicalize_text (p2, macro2->exp.text, macro2->count, "e2);
1006 mismatch = (len1 != len2 || memcmp (p1, p2, len1));
1013 /* Prepare to be able to scan the current buffer. */
1015 _cpp_set_trad_context (pfile)
1018 cpp_buffer *buffer = pfile->buffer;
1019 cpp_context *context = pfile->context;
1021 if (pfile->context->prev)
1024 pfile->out.cur = pfile->out.base;
1025 CUR (context) = buffer->cur;
1026 RLIMIT (context) = buffer->rlimit;
1027 check_output_buffer (pfile, RLIMIT (context) - CUR (context));