This commit was manufactured by cvs2svn to create branch 'binutils'.
[external/binutils.git] / gas / app.c
1 /* Copyright (C) 1987, 1990, 1991 Free Software Foundation, Inc.
2
3 Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
4 */
5 /* This is the Assembler Pre-Processor
6    Copyright (C) 1987 Free Software Foundation, Inc.
7
8 This file is part of GAS, the GNU Assembler.
9
10 GAS is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 1, or (at your option)
13 any later version.
14
15 GAS is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GAS; see the file COPYING.  If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
23
24 /* App, the assembler pre-processor.  This pre-processor strips out excess
25    spaces, turns single-quoted characters into a decimal constant, and turns
26    # <number> <filename> <garbage> into a .line <number>\n.app-file <filename> pair.
27    This needs better error-handling.
28  */
29
30 /* static const char rcsid[] = "$Id$"; */
31
32 #include <stdio.h>
33 #include "as.h"         /* For BAD_CASE() only */
34
35 #if !defined(__STDC__) && !defined(const)
36 #define const /* Nothing */
37 #endif
38
39 static char     lex [256];
40 static char     symbol_chars[] = 
41         "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
42
43 /* These will go in BSS if not defined elsewhere, producing empty strings. */
44 extern const char comment_chars[];
45 extern const char line_comment_chars[];
46 extern const char line_separator_chars[];
47
48 #define LEX_IS_SYMBOL_COMPONENT         1
49 #define LEX_IS_WHITESPACE               2
50 #define LEX_IS_LINE_SEPARATOR           3
51 #define LEX_IS_COMMENT_START            4
52 #define LEX_IS_LINE_COMMENT_START       5
53 #define LEX_IS_TWOCHAR_COMMENT_1ST      6
54 #define LEX_IS_TWOCHAR_COMMENT_2ND      7
55 #define LEX_IS_STRINGQUOTE              8
56 #define LEX_IS_COLON                    9
57 #define LEX_IS_NEWLINE                  10
58 #define LEX_IS_ONECHAR_QUOTE            11
59 #define IS_SYMBOL_COMPONENT(c)          (lex [c] == LEX_IS_SYMBOL_COMPONENT)
60 #define IS_WHITESPACE(c)                (lex [c] == LEX_IS_WHITESPACE)
61 #define IS_LINE_SEPARATOR(c)            (lex [c] == LEX_IS_LINE_SEPARATOR)
62 #define IS_COMMENT(c)                   (lex [c] == LEX_IS_COMMENT_START)
63 #define IS_LINE_COMMENT(c)              (lex [c] == LEX_IS_LINE_COMMENT_START)
64 #define IS_NEWLINE(c)                   (lex [c] == LEX_IS_NEWLINE)
65
66 void do_scrub_begin() {
67         const char *p;
68
69         lex [' ']               = LEX_IS_WHITESPACE;
70         lex ['\t']              = LEX_IS_WHITESPACE;
71         lex ['\n']              = LEX_IS_NEWLINE;
72         lex [';']               = LEX_IS_LINE_SEPARATOR;
73         lex ['"']               = LEX_IS_STRINGQUOTE;
74         lex ['\'']              = LEX_IS_ONECHAR_QUOTE;
75         lex [':']               = LEX_IS_COLON;
76
77         /* Note that these override the previous defaults, e.g. if ';'
78            is a comment char, then it isn't a line separator.  */
79         for (p =symbol_chars;*p;++p)
80                 lex[*p] = LEX_IS_SYMBOL_COMPONENT;
81         for (p=comment_chars;*p;p++)
82                 lex[*p] = LEX_IS_COMMENT_START;
83         for (p=line_comment_chars;*p;p++)
84                 lex[*p] = LEX_IS_LINE_COMMENT_START;
85         for (p=line_separator_chars;*p;p++)
86                 lex[*p] = LEX_IS_LINE_SEPARATOR;
87
88         /* Only allow slash-star comments if slash is not in use */
89         if (lex['/'] == 0) {
90                 lex ['/']       = LEX_IS_TWOCHAR_COMMENT_1ST;
91                 lex ['*']       = LEX_IS_TWOCHAR_COMMENT_2ND;
92         }
93 }
94
95 FILE *scrub_file;
96
97 int scrub_from_file() {
98         return getc(scrub_file);
99 }
100
101 void scrub_to_file(ch)
102 int ch;
103 {
104         ungetc(ch,scrub_file);
105 } /* scrub_to_file() */
106
107 char *scrub_string;
108 char *scrub_last_string;
109
110 int scrub_from_string() {
111         return scrub_string == scrub_last_string ? EOF : *scrub_string++;
112 } /* scrub_from_string() */
113
114 void scrub_to_string(ch)
115 int ch;
116 {
117         *--scrub_string=ch;
118 } /* scrub_to_string() */
119
120 /* Saved state of the scrubber */
121 static int state;
122 static int old_state;
123 static char *out_string;
124 static char out_buf[20];
125 static int add_newlines = 0;
126
127 /* Data structure for saving the state of app across #include's.  Note that
128    app is called asynchronously to the parsing of the .include's, so our
129    state at the time .include is interpreted is completely unrelated.
130    That's why we have to save it all.  */
131
132 struct app_save {
133   int state;
134   int old_state;
135   char *out_string;
136   char out_buf[sizeof (out_buf)];
137   int add_newlines;
138   char *scrub_string;
139   char *scrub_last_string;
140   FILE *scrub_file;
141 };
142
143 char *app_push() {
144   register struct app_save *saved;
145
146   saved = (struct app_save *) xmalloc(sizeof (*saved));
147   saved->state          = state;
148   saved->old_state      = old_state;
149   saved->out_string     = out_string;
150   bcopy(saved->out_buf, out_buf, sizeof(out_buf));
151   saved->add_newlines   = add_newlines;
152   saved->scrub_string   = scrub_string;
153   saved->scrub_last_string = scrub_last_string;
154   saved->scrub_file     = scrub_file;
155
156   /* do_scrub_begin() is not useful, just wastes time. */
157   return (char *)saved;
158 }
159
160 void app_pop(arg)
161 char *arg;
162 {
163   register struct app_save *saved = (struct app_save *)arg;
164
165   /* There is no do_scrub_end (). */
166   state         = saved->state;
167   old_state     = saved->old_state;
168   out_string    = saved->out_string;
169   bcopy (out_buf,  saved->out_buf, sizeof (out_buf));
170   add_newlines  = saved->add_newlines;
171   scrub_string  = saved->scrub_string;
172   scrub_last_string = saved->scrub_last_string;
173   scrub_file    = saved->scrub_file;
174
175   free (arg);
176 } /* app_pop() */
177
178 int do_scrub_next_char(get,unget)
179 int (*get)();
180 void (*unget)();
181 {
182         /*State 0: beginning of normal line
183                 1: After first whitespace on line (flush more white)
184                 2: After first non-white (opcode) on line (keep 1white)
185                 3: after second white on line (into operands) (flush white)
186                 4: after putting out a .line, put out digits
187                 5: parsing a string, then go to old-state
188                 6: putting out \ escape in a "d string.
189                 7: After putting out a .app-file, put out string.
190                 8: After putting out a .app-file string, flush until newline.
191                 -1: output string in out_string and go to the state in old_state
192                 -2: flush text until a '*' '/' is seen, then go to state old_state
193         */
194
195         register int ch, ch2;
196
197         switch (state) {
198         case -1: 
199                 ch= *out_string++;
200                 if(*out_string==0) {
201                         state=old_state;
202                         old_state=3;
203                 }
204                 return ch;
205         
206         case -2:
207                 for(;;) {
208                         do {
209                                 ch=(*get)();
210                         } while(ch!=EOF && ch!='\n' && ch!='*');
211                         if(ch=='\n' || ch==EOF)
212                                 return ch;
213
214                         /* At this point, ch must be a '*' */
215                         while ( (ch=(*get)()) == '*' ){
216                                 ;
217                         }
218                         if(ch==EOF || ch=='/')
219                                 break;
220                         (*unget)(ch);
221                 }
222                 state=old_state;
223                 return ' ';
224
225         case 4:
226                 ch=(*get)();
227                 if(ch==EOF || (ch>='0' && ch<='9'))
228                         return ch;
229                 else {
230                         while(ch!=EOF && IS_WHITESPACE(ch))
231                                 ch=(*get)();
232                         if(ch=='"') {
233                                 (*unget)(ch);
234                                 out_string="\n.app-file ";
235                                 old_state=7;
236                                 state= -1;
237                                 return *out_string++;
238                         } else {
239                                 while(ch!=EOF && ch!='\n')
240                                         ch=(*get)();
241                                 return ch;
242                         }
243                 }
244
245         case 5:
246                 ch=(*get)();
247                 if(ch=='"') {
248                         state=old_state;
249                         return '"';
250                 } else if(ch=='\\') {
251                         state=6;
252                         return ch;
253                 } else if(ch==EOF) {
254                         as_warn("End of file in string: inserted '\"'");
255                         state=old_state;
256                         (*unget)('\n');
257                         return '"';
258                 } else {
259                         return ch;
260                 }
261         
262         case 6:
263                 state=5;
264                 ch=(*get)();
265                 switch(ch) {
266                         /* This is neet.  Turn "string
267                            more string" into "string\n  more string"
268                          */
269                 case '\n':
270                         (*unget)('n');
271                         add_newlines++;
272                         return '\\';
273
274                 case '"':
275                 case '\\':
276                 case 'b':
277                 case 'f':
278                 case 'n':
279                 case 'r':
280                 case 't':
281 #ifdef BACKSLASH_V
282                 case 'v':
283 #endif /* BACKSLASH_V */
284                 case '0':
285                 case '1':
286                 case '2':
287                 case '3':
288                 case '4':
289                 case '5':
290                 case '6':
291                 case '7':
292                         break;
293
294 #ifdef ONLY_STANDARD_ESCAPES
295                 default:
296                         as_warn("Unknown escape '\\%c' in string: Ignored",ch);
297                         break;
298 #else /* ONLY_STANDARD_ESCAPES */
299                 default:
300                         /* Accept \x as x for any x */
301                         break;
302 #endif /* ONLY_STANDARD_ESCAPES */
303
304                 case EOF:
305                         as_warn("End of file in string: '\"' inserted");
306                         return '"';
307                 }
308                 return ch;
309
310         case 7:
311                 ch=(*get)();
312                 state=5;
313                 old_state=8;
314                 return ch;
315
316         case 8:
317                 do ch= (*get)();
318                 while(ch!='\n');
319                 state=0;
320                 return ch;
321         }
322
323         /* OK, we are somewhere in states 0 through 4 */
324
325 /* flushchar: */
326         ch=(*get)();
327  recycle:
328         if (ch == EOF) {
329                 if (state != 0)
330                         as_warn("End of file not at end of a line: Newline inserted.");
331                 return ch;
332         }
333
334         switch (lex[ch]) {
335         case LEX_IS_WHITESPACE:
336                 do ch=(*get)();
337                 while(ch!=EOF && IS_WHITESPACE(ch));
338                 if(ch==EOF)
339                         return ch;
340                 if(IS_COMMENT(ch) || (state==0 && IS_LINE_COMMENT(ch)) || ch=='/' || IS_LINE_SEPARATOR(ch)) {
341                         goto recycle;
342                 }
343                 switch (state) {
344                 case 0: state++; goto recycle;  /* Punted leading sp */
345                 case 1:          BAD_CASE(state); /* We can't get here */
346                 case 2: state++; (*unget)(ch); return ' ';  /* Sp after opco */
347                 case 3:          goto recycle;  /* Sp in operands */
348                 default:        BAD_CASE(state);
349                 }
350                 break;
351
352         case LEX_IS_TWOCHAR_COMMENT_1ST:
353                 ch2=(*get)();
354                 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND) {
355                         for(;;) {
356                                 do {
357                                         ch2=(*get)();
358                                         if(ch2 != EOF && IS_NEWLINE(ch2))
359                                                 add_newlines++;
360                                 } while(ch2!=EOF &&
361                                      (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
362
363                                 while (ch2!=EOF &&
364                                      (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)){
365                                         ch2=(*get)();
366                                 }
367
368                                 if(ch2==EOF 
369                                   || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
370                                         break;
371                                 (*unget)(ch);
372                         }
373                         if(ch2==EOF)
374                                 as_warn("End of file in multiline comment");
375
376                         ch = ' ';
377                         goto recycle;
378                 } else {
379                         if(ch2!=EOF)
380                                 (*unget)(ch2);
381                         return ch;
382                 }
383                 break;
384
385         case LEX_IS_STRINGQUOTE:
386                 old_state=state;
387                 state=5;
388                 return ch;
389
390         case LEX_IS_ONECHAR_QUOTE:
391                 ch=(*get)();
392                 if(ch==EOF) {
393                         as_warn("End-of-file after a one-character quote; \000 inserted");
394                         ch=0;
395                 }
396                 sprintf(out_buf,"%d", (int)(unsigned char)ch);
397
398                 /* None of these 'x constants for us.  We want 'x'.
399                  */
400                 if ( (ch=(*get)()) != '\'' ) {
401 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
402                         as_warn("Missing close quote: (assumed)");
403 #else
404                         (*unget)(ch);
405 #endif
406                 }
407
408                 old_state=state;
409                 state= -1;
410                 out_string=out_buf;
411                 return *out_string++;
412
413         case LEX_IS_COLON:
414                 if(state!=3)
415                         state=0;
416                 return ch;
417
418         case LEX_IS_NEWLINE:
419                 /* Roll out a bunch of newlines from inside comments, etc.  */
420                 if(add_newlines) {
421                         --add_newlines;
422                         (*unget)(ch);
423                 }
424                 /* fall thru into... */
425
426         case LEX_IS_LINE_SEPARATOR:
427                 state=0;
428                 return ch;
429
430         case LEX_IS_LINE_COMMENT_START:
431                 if (state != 0)         /* Not at start of line, act normal */
432                         goto de_fault;
433                 do ch=(*get)();
434                 while(ch!=EOF && IS_WHITESPACE(ch));
435                 if(ch==EOF) {
436                         as_warn("EOF in comment:  Newline inserted");
437                         return '\n';
438                 }
439                 if(ch<'0' || ch>'9') {
440                         /* Non-numerics:  Eat whole comment line */
441                         while(ch!=EOF && !IS_NEWLINE(ch))
442                                 ch=(*get)();
443                         if(ch==EOF)
444                                 as_warn("EOF in Comment: Newline inserted");
445                         state=0;
446                         return '\n';
447                 }
448                 /* Numerics begin comment.  Perhaps CPP `# 123 "filename"' */
449                 (*unget)(ch);
450                 old_state=4;
451                 state= -1;
452                 out_string=".line ";
453                 return *out_string++;
454
455         case LEX_IS_COMMENT_START:
456                 do ch=(*get)();
457                 while(ch!=EOF && !IS_NEWLINE(ch));
458                 if(ch==EOF)
459                         as_warn("EOF in comment:  Newline inserted");
460                 state=0;
461                 return '\n';
462
463         default:
464         de_fault:
465                 /* Some relatively `normal' character.  */
466                 if(state==0) {
467                         state=2;        /* Now seeing opcode */
468                         return ch;
469                 } else if(state==1) {
470                         state=2;        /* Ditto */
471                         return ch;
472                 } else {
473                         return ch;      /* Opcode or operands already */
474                 }
475         }
476         return -1;
477 }
478
479 #ifdef TEST
480
481 char comment_chars[] = "|";
482 char line_comment_chars[] = "#";
483
484 main()
485 {
486         int     ch;
487
488         app_begin();
489         while((ch=do_scrub_next_char(stdin))!=EOF)
490                 putc(ch,stdout);
491 }
492
493 as_warn(str)
494 char *str;
495 {
496         fputs(str,stderr);
497         putc('\n',stderr);
498 }
499 #endif
500
501 /*
502  * Local Variables:
503  * comment-column: 0
504  * fill-column: 131
505  * End:
506  */
507
508 /* end of app.c */