1 /* This is the Assembler Pre-Processor
2 Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
4 This file is part of GAS, the GNU Assembler.
6 GAS is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GAS is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GAS; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
20 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
21 /* App, the assembler pre-processor. This pre-processor strips out excess
22 spaces, turns single-quoted characters into a decimal constant, and turns
23 # <number> <filename> <garbage> into a .line <number>\n.file <filename>
24 pair. This needs better error-handling.
28 #include "as.h" /* For BAD_CASE() only */
30 #if (__STDC__ != 1) && !defined(const)
31 #define const /* Nothing */
35 static const char symbol_chars[] =
36 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
38 #define LEX_IS_SYMBOL_COMPONENT 1
39 #define LEX_IS_WHITESPACE 2
40 #define LEX_IS_LINE_SEPARATOR 3
41 #define LEX_IS_COMMENT_START 4
42 #define LEX_IS_LINE_COMMENT_START 5
43 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
44 #define LEX_IS_TWOCHAR_COMMENT_2ND 7
45 #define LEX_IS_STRINGQUOTE 8
46 #define LEX_IS_COLON 9
47 #define LEX_IS_NEWLINE 10
48 #define LEX_IS_ONECHAR_QUOTE 11
49 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
50 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
51 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
52 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
53 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
54 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
56 static int process_escape PARAMS ((int));
58 /* FIXME-soon: The entire lexer/parser thingy should be
59 built statically at compile time rather than dynamically
60 each and every time the assembler is run. xoxorich. */
67 lex[' '] = LEX_IS_WHITESPACE;
68 lex['\t'] = LEX_IS_WHITESPACE;
69 lex['\n'] = LEX_IS_NEWLINE;
70 lex[';'] = LEX_IS_LINE_SEPARATOR;
71 lex['"'] = LEX_IS_STRINGQUOTE;
73 lex['\''] = LEX_IS_ONECHAR_QUOTE;
75 lex[':'] = LEX_IS_COLON;
79 #ifdef SINGLE_QUOTE_STRINGS
80 lex['\''] = LEX_IS_STRINGQUOTE;
83 /* Note that these override the previous defaults, e.g. if ';'
85 is a comment char, then it isn't a line separator. */
86 for (p = symbol_chars; *p; ++p)
88 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
89 } /* declare symbol characters */
91 for (p = comment_chars; *p; p++)
93 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
94 } /* declare comment chars */
96 for (p = line_comment_chars; *p; p++)
98 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
99 } /* declare line comment chars */
101 for (p = line_separator_chars; *p; p++)
103 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
104 } /* declare line separators */
106 /* Only allow slash-star comments if slash is not in use */
109 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
111 /* FIXME-soon. This is a bad hack but otherwise, we
112 can't do c-style comments when '/' is a line
113 comment char. xoxorich. */
116 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
118 } /* do_scrub_begin() */
125 return getc (scrub_file);
132 ungetc (ch, scrub_file);
133 } /* scrub_to_file() */
136 char *scrub_last_string;
141 return scrub_string == scrub_last_string ? EOF : *scrub_string++;
142 } /* scrub_from_string() */
148 *--scrub_string = ch;
149 } /* scrub_to_string() */
151 /* Saved state of the scrubber */
153 static int old_state;
154 static char *out_string;
155 static char out_buf[20];
156 static int add_newlines = 0;
158 /* Data structure for saving the state of app across #include's. Note that
159 app is called asynchronously to the parsing of the .include's, so our
160 state at the time .include is interpreted is completely unrelated.
161 That's why we have to save it all. */
168 char out_buf[sizeof (out_buf)];
171 char *scrub_last_string;
178 register struct app_save *saved;
180 saved = (struct app_save *) xmalloc (sizeof (*saved));
181 saved->state = state;
182 saved->old_state = old_state;
183 saved->out_string = out_string;
184 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
185 saved->add_newlines = add_newlines;
186 saved->scrub_string = scrub_string;
187 saved->scrub_last_string = scrub_last_string;
188 saved->scrub_file = scrub_file;
190 /* do_scrub_begin() is not useful, just wastes time. */
191 return (char *) saved;
198 register struct app_save *saved = (struct app_save *) arg;
200 /* There is no do_scrub_end (). */
201 state = saved->state;
202 old_state = saved->old_state;
203 out_string = saved->out_string;
204 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
205 add_newlines = saved->add_newlines;
206 scrub_string = saved->scrub_string;
207 scrub_last_string = saved->scrub_last_string;
208 scrub_file = saved->scrub_file;
213 /* @@ This assumes that \n &c are the same on host and target. This is not
240 do_scrub_next_char (get, unget)
244 /*State 0: beginning of normal line
245 1: After first whitespace on line (flush more white)
246 2: After first non-white (opcode) on line (keep 1white)
247 3: after second white on line (into operands) (flush white)
248 4: after putting out a .line, put out digits
249 5: parsing a string, then go to old-state
250 6: putting out \ escape in a "d string.
251 7: After putting out a .appfile, put out string.
252 8: After putting out a .appfile string, flush until newline.
253 9: After seeing symbol char in state 3 (keep 1white after symchar)
254 10: After seeing whitespace in state 9 (keep white before symchar)
255 -1: output string in out_string and go to the state in old_state
256 -2: flush text until a '*' '/' is seen, then go to state old_state
259 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
260 constructs like ``.loc 1 20''. This was turning into ``.loc
261 120''. States 9 and 10 ensure that a space is never dropped in
262 between characters which could appear in a identifier. Ian
263 Taylor, ian@cygnus.com. */
265 register int ch, ch2 = 0;
266 int not_cpp_line = 0;
272 if (*out_string == 0)
286 while (ch != EOF && ch != '\n' && ch != '*');
287 if (ch == '\n' || ch == EOF)
290 /* At this point, ch must be a '*' */
291 while ((ch = (*get) ()) == '*')
295 if (ch == EOF || ch == '/')
304 if (ch == EOF || (ch >= '0' && ch <= '9'))
308 while (ch != EOF && IS_WHITESPACE (ch))
313 out_string = "\n\t.appfile ";
316 return *out_string++;
320 while (ch != EOF && ch != '\n')
329 if (lex[ch] == LEX_IS_STRINGQUOTE)
341 as_warn ("End of file in string: inserted '\"'");
356 /* Handle strings broken across lines, by turning '\n' into
372 #endif /* BACKSLASH_V */
384 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
386 as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
388 #else /* ONLY_STANDARD_ESCAPES */
390 /* Accept \x as x for any x */
392 #endif /* ONLY_STANDARD_ESCAPES */
395 as_warn ("End of file in string: '\"' inserted");
414 /* OK, we are somewhere in states 0 through 4 or 9 through 10 */
422 as_warn ("End of file not at end of a line: Newline inserted.");
428 case LEX_IS_WHITESPACE:
430 /* Preserve a single whitespace character at the beginning of
439 while (ch != EOF && IS_WHITESPACE (ch));
443 if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
445 /* cpp never outputs a leading space before the #, so try to
446 avoid being confused. */
451 /* If we're in state 2, we've seen a non-white
452 character followed by whitespace. If the next
453 character is ':', this is whitespace after a label
454 name which we can ignore. */
455 if (state == 2 && lex[ch] == LEX_IS_COLON)
461 #if defined (LABELS_WITHOUT_COLONS) || defined (MRI)
462 /* Like above, but handles case where labels are not
463 required to have colons (and therefore must be identified
464 by their *position* in the input stream.) For a testcase
465 see hppa/more.parse/labelbug.s.
467 This also has the effect of sometimes leaving a whitespace
468 before a newline. Instead of trying to rework this horribly
469 broken and hairy code I'm just going to zap the extra space here. */
470 if (state == 2 && lex[ch] == LEX_IS_SYMBOL_COMPONENT)
476 /* Don't emit a space before a newline. */
477 if (state == 2 && lex[ch] == LEX_IS_NEWLINE)
488 goto recycle; /* Punted leading sp */
490 /* We can arrive here if we leave a leading whitespace character
491 at the beginning of a line. */
496 return ' '; /* Sp after opco */
498 goto recycle; /* Sp in operands */
501 state = 10; /* Sp after symbol char */
508 case LEX_IS_TWOCHAR_COMMENT_1ST:
510 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
517 if (ch2 != EOF && IS_NEWLINE (ch2))
521 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
524 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
530 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
535 as_warn ("End of file in multiline comment");
544 if (state == 9 || state == 10)
550 case LEX_IS_STRINGQUOTE:
551 if (state == 9 || state == 10)
559 case LEX_IS_ONECHAR_QUOTE:
563 as_warn ("End-of-file after a one-character quote; \\000 inserted");
569 ch = process_escape (ch);
571 sprintf (out_buf, "%d", (int) (unsigned char) ch);
574 /* None of these 'x constants for us. We want 'x'. */
575 if ((ch = (*get) ()) != '\'')
577 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
578 as_warn ("Missing close quote: (assumed)");
583 if (strlen (out_buf) == 1)
587 if (state == 9 || state == 10)
592 out_string = out_buf;
593 return *out_string++;
597 if (state == 9 || state == 10)
604 /* Roll out a bunch of newlines from inside comments, etc. */
610 /* fall thru into... */
612 case LEX_IS_LINE_SEPARATOR:
616 case LEX_IS_LINE_COMMENT_START:
617 if (state == 0) /* Only comment at start of line. */
619 /* FIXME-someday: The two character comment stuff was badly
620 thought out. On i386, we want '/' as line comment start
621 AND we want C style comments. hence this hack. The
622 whole lexical process should be reworked. xoxorich. */
629 return (do_scrub_next_char (get, unget));
642 while (ch != EOF && IS_WHITESPACE (ch));
645 as_warn ("EOF in comment: Newline inserted");
648 if (ch < '0' || ch > '9' || not_cpp_line)
650 /* Non-numerics: Eat whole comment line */
651 while (ch != EOF && !IS_NEWLINE (ch))
654 as_warn ("EOF in Comment: Newline inserted");
658 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
662 out_string = "\t.appline ";
663 return *out_string++;
666 /* We have a line comment character which is not at the start of
667 a line. If this is also a normal comment character, fall
668 through. Otherwise treat it as a default character. */
669 if (strchr (comment_chars, ch) == NULL)
672 case LEX_IS_COMMENT_START:
675 while (ch != EOF && !IS_NEWLINE (ch));
677 as_warn ("EOF in comment: Newline inserted");
681 case LEX_IS_SYMBOL_COMPONENT:
684 /* This is a symbol character following another symbol
685 character, with whitespace in between. We skipped the
686 whitespace earlier, so output it now. */
696 /* Some relatively `normal' character. */
699 state = 2; /* Now seeing opcode */
704 state = 2; /* Ditto */
709 if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
713 else if (state == 10)
720 return ch; /* Opcode or operands already */
728 const char comment_chars[] = "|";
729 const char line_comment_chars[] = "#";
736 while ((ch = do_scrub_next_char (stdin)) != EOF)