1 /* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
3 Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
5 /* This is the Assembler Pre-Processor
6 Copyright (C) 1987 Free Software Foundation, Inc.
8 This file is part of GAS, the GNU Assembler.
10 GAS is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
15 GAS is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GAS; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
24 /* App, the assembler pre-processor. This pre-processor strips out excess
25 spaces, turns single-quoted characters into a decimal constant, and turns
26 # <number> <filename> <garbage> into a .line <number>\n.app-file <filename> pair.
27 This needs better error-handling.
31 #include "as.h" /* For BAD_CASE() only */
33 #if (__STDC__ != 1) && !defined(const)
34 #define const /* Nothing */
37 static char lex [256];
38 static char symbol_chars[] =
39 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
41 /* These will go in BSS if not defined elsewhere, producing empty strings. */
42 extern const char comment_chars[];
43 extern const char line_comment_chars[];
44 extern const char line_separator_chars[];
46 #define LEX_IS_SYMBOL_COMPONENT 1
47 #define LEX_IS_WHITESPACE 2
48 #define LEX_IS_LINE_SEPARATOR 3
49 #define LEX_IS_COMMENT_START 4
50 #define LEX_IS_LINE_COMMENT_START 5
51 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
52 #define LEX_IS_TWOCHAR_COMMENT_2ND 7
53 #define LEX_IS_STRINGQUOTE 8
54 #define LEX_IS_COLON 9
55 #define LEX_IS_NEWLINE 10
56 #define LEX_IS_ONECHAR_QUOTE 11
57 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
58 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
59 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
60 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
61 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
62 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
64 /* FIXME-soon: The entire lexer/parser thingy should be
65 built statically at compile time rather than dynamically
66 each and every time the assembler is run. xoxorich. */
68 void do_scrub_begin() {
71 lex[' '] = LEX_IS_WHITESPACE;
72 lex['\t'] = LEX_IS_WHITESPACE;
73 lex['\n'] = LEX_IS_NEWLINE;
74 lex[';'] = LEX_IS_LINE_SEPARATOR;
75 lex['"'] = LEX_IS_STRINGQUOTE;
76 lex['\''] = LEX_IS_ONECHAR_QUOTE;
77 lex[':'] = LEX_IS_COLON;
80 lex['\''] = LEX_IS_STRINGQUOTE;
82 /* Note that these override the previous defaults, e.g. if ';'
83 is a comment char, then it isn't a line separator. */
84 for (p = symbol_chars; *p; ++p) {
85 lex[*p] = LEX_IS_SYMBOL_COMPONENT;
86 } /* declare symbol characters */
88 for (p = line_comment_chars; *p; p++) {
89 lex[*p] = LEX_IS_LINE_COMMENT_START;
90 } /* declare line comment chars */
92 for (p = comment_chars; *p; p++) {
93 lex[*p] = LEX_IS_COMMENT_START;
94 } /* declare comment chars */
96 for (p = line_separator_chars; *p; p++) {
97 lex[*p] = LEX_IS_LINE_SEPARATOR;
98 } /* declare line separators */
100 /* Only allow slash-star comments if slash is not in use */
102 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
104 /* FIXME-soon. This is a bad hack but otherwise, we
105 can't do c-style comments when '/' is a line
106 comment char. xoxorich. */
108 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
110 } /* do_scrub_begin() */
114 int scrub_from_file() {
115 return getc(scrub_file);
118 void scrub_to_file(ch)
121 ungetc(ch,scrub_file);
122 } /* scrub_to_file() */
125 char *scrub_last_string;
127 int scrub_from_string() {
128 return scrub_string == scrub_last_string ? EOF : *scrub_string++;
129 } /* scrub_from_string() */
131 void scrub_to_string(ch)
135 } /* scrub_to_string() */
137 /* Saved state of the scrubber */
139 static int old_state;
140 static char *out_string;
141 static char out_buf[20];
142 static int add_newlines = 0;
144 /* Data structure for saving the state of app across #include's. Note that
145 app is called asynchronously to the parsing of the .include's, so our
146 state at the time .include is interpreted is completely unrelated.
147 That's why we have to save it all. */
153 char out_buf[sizeof (out_buf)];
156 char *scrub_last_string;
161 register struct app_save *saved;
163 saved = (struct app_save *) xmalloc(sizeof (*saved));
164 saved->state = state;
165 saved->old_state = old_state;
166 saved->out_string = out_string;
167 bcopy(saved->out_buf, out_buf, sizeof(out_buf));
168 saved->add_newlines = add_newlines;
169 saved->scrub_string = scrub_string;
170 saved->scrub_last_string = scrub_last_string;
171 saved->scrub_file = scrub_file;
173 /* do_scrub_begin() is not useful, just wastes time. */
174 return (char *)saved;
180 register struct app_save *saved = (struct app_save *)arg;
182 /* There is no do_scrub_end (). */
183 state = saved->state;
184 old_state = saved->old_state;
185 out_string = saved->out_string;
186 memcpy(saved->out_buf, out_buf, sizeof (out_buf));
187 add_newlines = saved->add_newlines;
188 scrub_string = saved->scrub_string;
189 scrub_last_string = saved->scrub_last_string;
190 scrub_file = saved->scrub_file;
195 int process_escape(ch)
218 int do_scrub_next_char(get,unget)
222 /*State 0: beginning of normal line
223 1: After first whitespace on line (flush more white)
224 2: After first non-white (opcode) on line (keep 1white)
225 3: after second white on line (into operands) (flush white)
226 4: after putting out a .line, put out digits
227 5: parsing a string, then go to old-state
228 6: putting out \ escape in a "d string.
229 7: After putting out a .app-file, put out string.
230 8: After putting out a .app-file string, flush until newline.
231 -1: output string in out_string and go to the state in old_state
232 -2: flush text until a '*' '/' is seen, then go to state old_state
235 register int ch, ch2 = 0;
250 } while(ch!=EOF && ch!='\n' && ch!='*');
251 if(ch=='\n' || ch==EOF)
254 /* At this point, ch must be a '*' */
255 while ( (ch=(*get)()) == '*' ){
258 if(ch==EOF || ch=='/')
267 if(ch==EOF || (ch>='0' && ch<='9'))
270 while(ch!=EOF && IS_WHITESPACE(ch))
274 out_string="\n.app-file ";
277 return *out_string++;
279 while(ch!=EOF && ch!='\n')
287 if(lex[ch]==LEX_IS_STRINGQUOTE) {
290 } else if(ch=='\\') {
294 as_warn("End of file in string: inserted '\"'");
306 /* This is neet. Turn "string
307 more string" into "string\n more string"
323 #endif /* BACKSLASH_V */
333 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
335 as_warn("Unknown escape '\\%c' in string: Ignored",ch);
337 #else /* ONLY_STANDARD_ESCAPES */
339 /* Accept \x as x for any x */
341 #endif /* ONLY_STANDARD_ESCAPES */
344 as_warn("End of file in string: '\"' inserted");
362 /* OK, we are somewhere in states 0 through 4 */
369 as_warn("End of file not at end of a line: Newline inserted.");
374 case LEX_IS_WHITESPACE:
376 while(ch!=EOF && IS_WHITESPACE(ch));
380 if(IS_COMMENT(ch) || (state==0 && IS_LINE_COMMENT(ch)) || ch=='/' || IS_LINE_SEPARATOR(ch)) {
384 (*unget)(ch); /* Put back */
385 return ' '; /* Always return one space at start of line */
388 case 0: state++; goto recycle; /* Punted leading sp */
389 case 1: BAD_CASE(state); /* We can't get here */
390 case 2: state++; (*unget)(ch); return ' '; /* Sp after opco */
391 case 3: goto recycle; /* Sp in operands */
392 default: BAD_CASE(state);
396 case LEX_IS_TWOCHAR_COMMENT_1ST:
398 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND) {
402 if(ch2 != EOF && IS_NEWLINE(ch2))
405 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
408 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)){
413 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
418 as_warn("End of file in multiline comment");
429 case LEX_IS_STRINGQUOTE:
435 case LEX_IS_ONECHAR_QUOTE:
438 as_warn("End-of-file after a one-character quote; \000 inserted");
443 ch = process_escape(ch);
445 sprintf(out_buf,"%d", (int)(unsigned char)ch);
447 /* None of these 'x constants for us. We want 'x'.
449 if ( (ch=(*get)()) != '\'' ) {
450 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
451 as_warn("Missing close quote: (assumed)");
460 return *out_string++;
469 /* Roll out a bunch of newlines from inside comments, etc. */
474 /* fall thru into... */
476 case LEX_IS_LINE_SEPARATOR:
480 case LEX_IS_LINE_COMMENT_START:
481 if (state != 0) /* Not at start of line, act normal */
484 /* FIXME-someday: The two character comment stuff was badly
485 thought out. On i386, we want '/' as line comment start
486 AND we want C style comments. hence this hack. The
487 whole lexical process should be reworked. xoxorich. */
489 if (ch == '/' && (ch2 = (*get)()) == '*') {
491 return(do_scrub_next_char(get, unget));
497 while(ch!=EOF && IS_WHITESPACE(ch));
499 as_warn("EOF in comment: Newline inserted");
502 if(ch<'0' || ch>'9') {
503 /* Non-numerics: Eat whole comment line */
504 while(ch!=EOF && !IS_NEWLINE(ch))
507 as_warn("EOF in Comment: Newline inserted");
511 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
516 return *out_string++;
518 case LEX_IS_COMMENT_START:
520 while(ch!=EOF && !IS_NEWLINE(ch));
522 as_warn("EOF in comment: Newline inserted");
528 /* Some relatively `normal' character. */
530 state=2; /* Now seeing opcode */
532 } else if(state==1) {
536 return ch; /* Opcode or operands already */
544 char comment_chars[] = "|";
545 char line_comment_chars[] = "#";
552 while((ch=do_scrub_next_char(stdin))!=EOF)