1 /* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
3 Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
5 /* This is the Assembler Pre-Processor
6 Copyright (C) 1987 Free Software Foundation, Inc.
8 This file is part of GAS, the GNU Assembler.
10 GAS is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
15 GAS is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GAS; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
24 /* App, the assembler pre-processor. This pre-processor strips out excess
25 spaces, turns single-quoted characters into a decimal constant, and turns
26 # <number> <filename> <garbage> into a .line <number>\n.app-file <filename>
27 pair. This needs better error-handling.
31 #include "as.h" /* For BAD_CASE() only */
34 #if (__STDC__ != 1) && !defined(const)
35 #define const /* Nothing */
39 static const char symbol_chars[] =
40 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
42 #define LEX_IS_SYMBOL_COMPONENT 1
43 #define LEX_IS_WHITESPACE 2
44 #define LEX_IS_LINE_SEPARATOR 3
45 #define LEX_IS_COMMENT_START 4
46 #define LEX_IS_LINE_COMMENT_START 5
47 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
48 #define LEX_IS_TWOCHAR_COMMENT_2ND 7
49 #define LEX_IS_STRINGQUOTE 8
50 #define LEX_IS_COLON 9
51 #define LEX_IS_NEWLINE 10
52 #define LEX_IS_ONECHAR_QUOTE 11
53 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
54 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
55 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
56 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
57 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
58 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
60 /* FIXME-soon: The entire lexer/parser thingy should be
61 built statically at compile time rather than dynamically
62 each and every time the assembler is run. xoxorich. */
69 lex[' '] = LEX_IS_WHITESPACE;
70 lex['\t'] = LEX_IS_WHITESPACE;
71 lex['\n'] = LEX_IS_NEWLINE;
72 lex[';'] = LEX_IS_LINE_SEPARATOR;
73 lex['"'] = LEX_IS_STRINGQUOTE;
74 lex['\''] = LEX_IS_ONECHAR_QUOTE;
75 lex[':'] = LEX_IS_COLON;
79 #ifdef SINGLE_QUOTE_STRINGS
80 lex['\''] = LEX_IS_STRINGQUOTE;
83 /* Note that these override the previous defaults, e.g. if ';'
85 is a comment char, then it isn't a line separator. */
86 for (p = symbol_chars; *p; ++p)
88 lex[*p] = LEX_IS_SYMBOL_COMPONENT;
89 } /* declare symbol characters */
91 for (p = line_comment_chars; *p; p++)
93 lex[*p] = LEX_IS_LINE_COMMENT_START;
94 } /* declare line comment chars */
96 for (p = comment_chars; *p; p++)
98 lex[*p] = LEX_IS_COMMENT_START;
99 } /* declare comment chars */
101 for (p = line_separator_chars; *p; p++)
103 lex[*p] = LEX_IS_LINE_SEPARATOR;
104 } /* declare line separators */
106 /* Only allow slash-star comments if slash is not in use */
109 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
111 /* FIXME-soon. This is a bad hack but otherwise, we
112 can't do c-style comments when '/' is a line
113 comment char. xoxorich. */
116 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
118 } /* do_scrub_begin() */
125 return getc (scrub_file);
132 ungetc (ch, scrub_file);
133 } /* scrub_to_file() */
136 char *scrub_last_string;
141 return scrub_string == scrub_last_string ? EOF : *scrub_string++;
142 } /* scrub_from_string() */
148 *--scrub_string = ch;
149 } /* scrub_to_string() */
151 /* Saved state of the scrubber */
153 static int old_state;
154 static char *out_string;
155 static char out_buf[20];
156 static int add_newlines = 0;
158 /* Data structure for saving the state of app across #include's. Note that
159 app is called asynchronously to the parsing of the .include's, so our
160 state at the time .include is interpreted is completely unrelated.
161 That's why we have to save it all. */
168 char out_buf[sizeof (out_buf)];
171 char *scrub_last_string;
178 register struct app_save *saved;
180 saved = (struct app_save *) xmalloc (sizeof (*saved));
181 saved->state = state;
182 saved->old_state = old_state;
183 saved->out_string = out_string;
184 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
185 saved->add_newlines = add_newlines;
186 saved->scrub_string = scrub_string;
187 saved->scrub_last_string = scrub_last_string;
188 saved->scrub_file = scrub_file;
190 /* do_scrub_begin() is not useful, just wastes time. */
191 return (char *) saved;
198 register struct app_save *saved = (struct app_save *) arg;
200 /* There is no do_scrub_end (). */
201 state = saved->state;
202 old_state = saved->old_state;
203 out_string = saved->out_string;
204 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
205 add_newlines = saved->add_newlines;
206 scrub_string = saved->scrub_string;
207 scrub_last_string = saved->scrub_last_string;
208 scrub_file = saved->scrub_file;
213 /* @@ This assumes that \n &c are the same on host and target. This is not
240 do_scrub_next_char (get, unget)
244 /*State 0: beginning of normal line
245 1: After first whitespace on line (flush more white)
246 2: After first non-white (opcode) on line (keep 1white)
247 3: after second white on line (into operands) (flush white)
248 4: after putting out a .line, put out digits
249 5: parsing a string, then go to old-state
250 6: putting out \ escape in a "d string.
251 7: After putting out a .app-file, put out string.
252 8: After putting out a .app-file string, flush until newline.
253 -1: output string in out_string and go to the state in old_state
254 -2: flush text until a '*' '/' is seen, then go to state old_state
257 register int ch, ch2 = 0;
263 if (*out_string == 0)
277 while (ch != EOF && ch != '\n' && ch != '*');
278 if (ch == '\n' || ch == EOF)
281 /* At this point, ch must be a '*' */
282 while ((ch = (*get) ()) == '*')
286 if (ch == EOF || ch == '/')
295 if (ch == EOF || (ch >= '0' && ch <= '9'))
299 while (ch != EOF && IS_WHITESPACE (ch))
304 out_string = "\n.app-file ";
307 return *out_string++;
311 while (ch != EOF && ch != '\n')
319 if (lex[ch] == LEX_IS_STRINGQUOTE)
331 as_warn ("End of file in string: inserted '\"'");
346 /* Handle strings broken across lines, by turning '\n' into
362 #endif /* BACKSLASH_V */
372 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
374 as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
376 #else /* ONLY_STANDARD_ESCAPES */
378 /* Accept \x as x for any x */
380 #endif /* ONLY_STANDARD_ESCAPES */
383 as_warn ("End of file in string: '\"' inserted");
402 /* OK, we are somewhere in states 0 through 4 */
410 as_warn ("End of file not at end of a line: Newline inserted.");
416 case LEX_IS_WHITESPACE:
419 while (ch != EOF && IS_WHITESPACE (ch));
423 if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
428 (*unget) (ch); /* Put back */
429 return ' '; /* Always return one space at start of line */
432 /* If we're in state 2, we've seen a non-white
433 character followed by whitespace. If the next
434 character is ':', this is whitespace after a label
435 name which we can ignore. */
436 if (state == 2 && lex[ch] == LEX_IS_COLON)
446 goto recycle; /* Punted leading sp */
448 BAD_CASE (state); /* We can't get here */
452 return ' '; /* Sp after opco */
454 goto recycle; /* Sp in operands */
460 case LEX_IS_TWOCHAR_COMMENT_1ST:
462 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
469 if (ch2 != EOF && IS_NEWLINE (ch2))
473 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
476 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
482 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
487 as_warn ("End of file in multiline comment");
500 case LEX_IS_STRINGQUOTE:
506 case LEX_IS_ONECHAR_QUOTE:
510 as_warn ("End-of-file after a one-character quote; \\000 inserted");
516 ch = process_escape (ch);
518 sprintf (out_buf, "%d", (int) (unsigned char) ch);
521 /* None of these 'x constants for us. We want 'x'.
523 if ((ch = (*get) ()) != '\'')
525 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
526 as_warn ("Missing close quote: (assumed)");
531 if (strlen (out_buf) == 1)
537 out_string = out_buf;
538 return *out_string++;
547 /* Roll out a bunch of newlines from inside comments, etc. */
553 /* fall thru into... */
555 case LEX_IS_LINE_SEPARATOR:
559 case LEX_IS_LINE_COMMENT_START:
560 if (state != 0) /* Not at start of line, act normal */
563 /* FIXME-someday: The two character comment stuff was badly
564 thought out. On i386, we want '/' as line comment start
565 AND we want C style comments. hence this hack. The
566 whole lexical process should be reworked. xoxorich. */
568 if (ch == '/' && (ch2 = (*get) ()) == '*')
571 return (do_scrub_next_char (get, unget));
580 while (ch != EOF && IS_WHITESPACE (ch));
583 as_warn ("EOF in comment: Newline inserted");
586 if (ch < '0' || ch > '9')
588 /* Non-numerics: Eat whole comment line */
589 while (ch != EOF && !IS_NEWLINE (ch))
592 as_warn ("EOF in Comment: Newline inserted");
596 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
600 out_string = ".line ";
601 return *out_string++;
603 case LEX_IS_COMMENT_START:
606 while (ch != EOF && !IS_NEWLINE (ch));
608 as_warn ("EOF in comment: Newline inserted");
614 /* Some relatively `normal' character. */
617 state = 2; /* Now seeing opcode */
622 state = 2; /* Ditto */
627 return ch; /* Opcode or operands already */
635 const char comment_chars[] = "|";
636 const char line_comment_chars[] = "#";
643 while ((ch = do_scrub_next_char (stdin)) != EOF)