gas/app.c

   1 /* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
   2
   3    Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
   4    */
   5 /* This is the Assembler Pre-Processor
   6    Copyright (C) 1987 Free Software Foundation, Inc.
   7
   8    This file is part of GAS, the GNU Assembler.
   9
  10    GAS is free software; you can redistribute it and/or modify
  11    it under the terms of the GNU General Public License as published by
  12    the Free Software Foundation; either version 2, or (at your option)
  13    any later version.
  14
  15    GAS is distributed in the hope that it will be useful,
  16    but WITHOUT ANY WARRANTY; without even the implied warranty of
  17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18    GNU General Public License for more details.
  19
  20    You should have received a copy of the GNU General Public License
  21    along with GAS; see the file COPYING.  If not, write to
  22    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
  23
  24 /* App, the assembler pre-processor.  This pre-processor strips out excess
  25    spaces, turns single-quoted characters into a decimal constant, and turns
  26    # <number> <filename> <garbage> into a .line <number>\n.app-file <filename> pair.
  27    This needs better error-handling.
  28    */
  29
  30 #include <stdio.h>
  31 #include "as.h"         /* For BAD_CASE() only */
  32
  33 #if (__STDC__ != 1) && !defined(const)
  34 #define const /* Nothing */
  35 #endif
  36
  37 static char     lex [256];
  38 static char     symbol_chars[] =
  39     "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
  40
  41 /* These will go in BSS if not defined elsewhere, producing empty strings. */
  42 extern const char comment_chars[];
  43 extern const char line_comment_chars[];
  44 extern const char line_separator_chars[];
  45
  46 #define LEX_IS_SYMBOL_COMPONENT         1
  47 #define LEX_IS_WHITESPACE               2
  48 #define LEX_IS_LINE_SEPARATOR           3
  49 #define LEX_IS_COMMENT_START            4
  50 #define LEX_IS_LINE_COMMENT_START       5
  51 #define LEX_IS_TWOCHAR_COMMENT_1ST      6
  52 #define LEX_IS_TWOCHAR_COMMENT_2ND      7
  53 #define LEX_IS_STRINGQUOTE              8
  54 #define LEX_IS_COLON                    9
  55 #define LEX_IS_NEWLINE                  10
  56 #define LEX_IS_ONECHAR_QUOTE            11
  57 #define IS_SYMBOL_COMPONENT(c)          (lex[c] == LEX_IS_SYMBOL_COMPONENT)
  58 #define IS_WHITESPACE(c)                (lex[c] == LEX_IS_WHITESPACE)
  59 #define IS_LINE_SEPARATOR(c)            (lex[c] == LEX_IS_LINE_SEPARATOR)
  60 #define IS_COMMENT(c)                   (lex[c] == LEX_IS_COMMENT_START)
  61 #define IS_LINE_COMMENT(c)              (lex[c] == LEX_IS_LINE_COMMENT_START)
  62 #define IS_NEWLINE(c)                   (lex[c] == LEX_IS_NEWLINE)
  63
  64 /* FIXME-soon: The entire lexer/parser thingy should be
  65    built statically at compile time rather than dynamically
  66    each and every time the assembler is run.  xoxorich. */
  67
  68 void do_scrub_begin() {
  69         const char *p;
  70
  71         lex[' '] = LEX_IS_WHITESPACE;
  72         lex['\t'] = LEX_IS_WHITESPACE;
  73         lex['\n'] = LEX_IS_NEWLINE;
  74         lex[';'] = LEX_IS_LINE_SEPARATOR;
  75         lex['"'] = LEX_IS_STRINGQUOTE;
  76         lex['\''] = LEX_IS_ONECHAR_QUOTE;
  77         lex[':'] = LEX_IS_COLON;
  78
  79 #ifdef MRI
  80         lex['\''] = LEX_IS_STRINGQUOTE;
  81 #endif
  82         /* Note that these override the previous defaults, e.g. if ';'
  83            is a comment char, then it isn't a line separator.  */
  84         for (p = symbol_chars; *p; ++p) {
  85                 lex[*p] = LEX_IS_SYMBOL_COMPONENT;
  86         } /* declare symbol characters */
  87
  88         for (p = line_comment_chars; *p; p++) {
  89                 lex[*p] = LEX_IS_LINE_COMMENT_START;
  90         } /* declare line comment chars */
  91
  92         for (p = comment_chars; *p; p++) {
  93                 lex[*p] = LEX_IS_COMMENT_START;
  94         } /* declare comment chars */
  95
  96         for (p = line_separator_chars; *p; p++) {
  97                 lex[*p] = LEX_IS_LINE_SEPARATOR;
  98         } /* declare line separators */
  99
 100         /* Only allow slash-star comments if slash is not in use */
 101         if (lex['/'] == 0) {
 102                 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
 103         }
 104         /* FIXME-soon.  This is a bad hack but otherwise, we
 105            can't do c-style comments when '/' is a line
 106            comment char. xoxorich. */
 107         if (lex['*'] == 0) {
 108                 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
 109         }
 110 } /* do_scrub_begin() */
 111
 112 FILE *scrub_file;
 113
 114 int scrub_from_file() {
 115         return getc(scrub_file);
 116 }
 117
 118 void scrub_to_file(ch)
 119 int ch;
 120 {
 121         ungetc(ch,scrub_file);
 122 } /* scrub_to_file() */
 123
 124 char *scrub_string;
 125 char *scrub_last_string;
 126
 127 int scrub_from_string() {
 128         return scrub_string == scrub_last_string ? EOF : *scrub_string++;
 129 } /* scrub_from_string() */
 130
 131 void scrub_to_string(ch)
 132 int ch;
 133 {
 134         *--scrub_string=ch;
 135 } /* scrub_to_string() */
 136
 137 /* Saved state of the scrubber */
 138 static int state;
 139 static int old_state;
 140 static char *out_string;
 141 static char out_buf[20];
 142 static int add_newlines = 0;
 143
 144 /* Data structure for saving the state of app across #include's.  Note that
 145    app is called asynchronously to the parsing of the .include's, so our
 146    state at the time .include is interpreted is completely unrelated.
 147    That's why we have to save it all.  */
 148
 149 struct app_save {
 150         int state;
 151         int old_state;
 152         char *out_string;
 153         char out_buf[sizeof (out_buf)];
 154         int add_newlines;
 155         char *scrub_string;
 156         char *scrub_last_string;
 157         FILE *scrub_file;
 158 };
 159
 160 char *app_push() {
 161   register struct app_save *saved;
 162
 163   saved = (struct app_save *) xmalloc(sizeof (*saved));
 164   saved->state          = state;
 165   saved->old_state      = old_state;
 166   saved->out_string     = out_string;
 167   bcopy(saved->out_buf, out_buf, sizeof(out_buf));
 168   saved->add_newlines   = add_newlines;
 169   saved->scrub_string   = scrub_string;
 170   saved->scrub_last_string = scrub_last_string;
 171   saved->scrub_file     = scrub_file;
 172
 173   /* do_scrub_begin() is not useful, just wastes time. */
 174   return (char *)saved;
 175 }
 176
 177 void app_pop(arg)
 178 char *arg;
 179 {
 180         register struct app_save *saved = (struct app_save *)arg;
 181
 182         /* There is no do_scrub_end (). */
 183         state           = saved->state;
 184         old_state       = saved->old_state;
 185         out_string      = saved->out_string;
 186         memcpy(saved->out_buf, out_buf, sizeof (out_buf));
 187         add_newlines    = saved->add_newlines;
 188         scrub_string    = saved->scrub_string;
 189         scrub_last_string = saved->scrub_last_string;
 190         scrub_file      = saved->scrub_file;
 191
 192         free (arg);
 193 } /* app_pop() */
 194
 195 int process_escape(ch)
 196 char ch;
 197 {
 198   switch (ch)
 199 {
 200    case 'b':
 201     return '\b';
 202    case 'f':
 203     return '\f';
 204    case 'n':
 205     return '\n';
 206    case 'r':
 207     return '\r';
 208    case 't':
 209     return '\t';
 210    case '\'':
 211     return '\'';
 212    case '"':
 213     return '\'';
 214    default:
 215     return ch;
 216   }
 217 }
 218 int do_scrub_next_char(get,unget)
 219 int (*get)();
 220 void (*unget)();
 221 {
 222         /*State 0: beginning of normal line
 223           1: After first whitespace on line (flush more white)
 224           2: After first non-white (opcode) on line (keep 1white)
 225           3: after second white on line (into operands) (flush white)
 226           4: after putting out a .line, put out digits
 227           5: parsing a string, then go to old-state
 228           6: putting out \ escape in a "d string.
 229           7: After putting out a .app-file, put out string.
 230           8: After putting out a .app-file string, flush until newline.
 231           -1: output string in out_string and go to the state in old_state
 232           -2: flush text until a '*' '/' is seen, then go to state old_state
 233           */
 234
 235         register int ch, ch2 = 0;
 236
 237         switch (state) {
 238         case -1:
 239                 ch= *out_string++;
 240                 if(*out_string==0) {
 241                         state=old_state;
 242                         old_state=3;
 243                 }
 244                 return ch;
 245
 246         case -2:
 247                 for(;;) {
 248                         do {
 249                                 ch=(*get)();
 250                         } while(ch!=EOF && ch!='\n' && ch!='*');
 251                         if(ch=='\n' || ch==EOF)
 252                             return ch;
 253
 254                         /* At this point, ch must be a '*' */
 255                         while ( (ch=(*get)()) == '*' ){
 256                                 ;
 257                         }
 258                         if(ch==EOF || ch=='/')
 259                             break;
 260                         (*unget)(ch);
 261                 }
 262                 state=old_state;
 263                 return ' ';
 264
 265         case 4:
 266                 ch=(*get)();
 267                 if(ch==EOF || (ch>='0' && ch<='9'))
 268                     return ch;
 269                 else {
 270                         while(ch!=EOF && IS_WHITESPACE(ch))
 271                             ch=(*get)();
 272                         if(ch=='"') {
 273                                 (*unget)(ch);
 274                                 out_string="\n.app-file ";
 275                                 old_state=7;
 276                                 state= -1;
 277                                 return *out_string++;
 278                         } else {
 279                                 while(ch!=EOF && ch!='\n')
 280                                     ch=(*get)();
 281                                 return ch;
 282                         }
 283                 }
 284
 285         case 5:
 286                 ch=(*get)();
 287                 if(lex[ch]==LEX_IS_STRINGQUOTE) {
 288                         state=old_state;
 289                         return ch;
 290                 } else if(ch=='\\') {
 291                         state=6;
 292                         return ch;
 293                 } else if(ch==EOF) {
 294                         as_warn("End of file in string: inserted '\"'");
 295                         state=old_state;
 296                         (*unget)('\n');
 297                         return '"';
 298                 } else {
 299                         return ch;
 300                 }
 301
 302         case 6:
 303                 state=5;
 304                 ch=(*get)();
 305                 switch(ch) {
 306                         /* This is neet.  Turn "string
 307                            more string" into "string\n  more string"
 308                            */
 309                 case '\n':
 310                         (*unget)('n');
 311                         add_newlines++;
 312                         return '\\';
 313
 314                 case '"':
 315                 case '\\':
 316                 case 'b':
 317                 case 'f':
 318                 case 'n':
 319                 case 'r':
 320                 case 't':
 321 #ifdef BACKSLASH_V
 322                 case 'v':
 323 #endif /* BACKSLASH_V */
 324                 case '0':
 325                 case '1':
 326                 case '2':
 327                 case '3':
 328                 case '4':
 329                 case '5':
 330                 case '6':
 331                 case '7':
 332                         break;
 333 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
 334                 default:
 335                         as_warn("Unknown escape '\\%c' in string: Ignored",ch);
 336                         break;
 337 #else /* ONLY_STANDARD_ESCAPES */
 338                 default:
 339                         /* Accept \x as x for any x */
 340                         break;
 341 #endif /* ONLY_STANDARD_ESCAPES */
 342
 343                 case EOF:
 344                         as_warn("End of file in string: '\"' inserted");
 345                         return '"';
 346                 }
 347                 return ch;
 348
 349         case 7:
 350                 ch=(*get)();
 351                 state=5;
 352                 old_state=8;
 353                 return ch;
 354
 355         case 8:
 356                 do ch= (*get)();
 357                 while(ch!='\n');
 358                 state=0;
 359                 return ch;
 360         }
 361
 362         /* OK, we are somewhere in states 0 through 4 */
 363
 364 /* flushchar: */
 365         ch=(*get)();
 366  recycle:
 367         if (ch == EOF) {
 368                 if (state != 0)
 369                         as_warn("End of file not at end of a line: Newline inserted.");
 370                 return ch;
 371         }
 372
 373         switch (lex[ch]) {
 374         case LEX_IS_WHITESPACE:
 375                 do ch=(*get)();
 376                 while(ch!=EOF && IS_WHITESPACE(ch));
 377                 if(ch==EOF)
 378                         return ch;
 379
 380                 if(IS_COMMENT(ch) || (state==0 && IS_LINE_COMMENT(ch)) || ch=='/' || IS_LINE_SEPARATOR(ch)) {
 381                         goto recycle;
 382                 }
 383 #ifdef MRI
 384                 (*unget)(ch); /* Put back */
 385                 return ' '; /* Always return one space at start of line */
 386 #endif
 387                 switch (state) {
 388                 case 0: state++; goto recycle;  /* Punted leading sp */
 389                 case 1:          BAD_CASE(state); /* We can't get here */
 390                 case 2: state++; (*unget)(ch); return ' ';  /* Sp after opco */
 391                 case 3:          goto recycle;  /* Sp in operands */
 392                 default:        BAD_CASE(state);
 393                 }
 394                 break;
 395
 396         case LEX_IS_TWOCHAR_COMMENT_1ST:
 397                 ch2=(*get)();
 398                 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND) {
 399                         for(;;) {
 400                                 do {
 401                                         ch2=(*get)();
 402                                         if(ch2 != EOF && IS_NEWLINE(ch2))
 403                                                 add_newlines++;
 404                                 } while(ch2!=EOF &&
 405                                         (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
 406
 407                                 while (ch2!=EOF &&
 408                                        (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)){
 409                                         ch2=(*get)();
 410                                 }
 411
 412                                 if(ch2==EOF
 413                                   || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
 414                                         break;
 415                                 (*unget)(ch);
 416                         }
 417                         if(ch2==EOF)
 418                                 as_warn("End of file in multiline comment");
 419
 420                         ch = ' ';
 421                         goto recycle;
 422                 } else {
 423                         if(ch2!=EOF)
 424                             (*unget)(ch2);
 425                         return ch;
 426                 }
 427                 break;
 428
 429         case LEX_IS_STRINGQUOTE:
 430                 old_state=state;
 431                 state=5;
 432                 return ch;
 433 #ifndef MRI
 434 #ifndef IEEE_STYLE
 435         case LEX_IS_ONECHAR_QUOTE:
 436                 ch=(*get)();
 437                 if(ch==EOF) {
 438                         as_warn("End-of-file after a one-character quote; \000 inserted");
 439                         ch=0;
 440                 }
 441                 if (ch == '\\') {
 442                   ch = (*get)();
 443                   ch = process_escape(ch);
 444                 }
 445                 sprintf(out_buf,"%d", (int)(unsigned char)ch);
 446
 447                 /* None of these 'x constants for us.  We want 'x'.
 448                  */
 449                 if ( (ch=(*get)()) != '\'' ) {
 450 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
 451                         as_warn("Missing close quote: (assumed)");
 452 #else
 453                         (*unget)(ch);
 454 #endif
 455                 }
 456
 457                 old_state=state;
 458                 state= -1;
 459                 out_string=out_buf;
 460                 return *out_string++;
 461 #endif
 462 #endif
 463         case LEX_IS_COLON:
 464                 if(state!=3)
 465                         state=0;
 466                 return ch;
 467
 468         case LEX_IS_NEWLINE:
 469                 /* Roll out a bunch of newlines from inside comments, etc.  */
 470                 if(add_newlines) {
 471                         --add_newlines;
 472                         (*unget)(ch);
 473                 }
 474                 /* fall thru into... */
 475
 476         case LEX_IS_LINE_SEPARATOR:
 477                 state=0;
 478                 return ch;
 479
 480         case LEX_IS_LINE_COMMENT_START:
 481                 if (state != 0)         /* Not at start of line, act normal */
 482                         goto de_fault;
 483
 484  /* FIXME-someday: The two character comment stuff was badly
 485     thought out.  On i386, we want '/' as line comment start
 486     AND we want C style comments.  hence this hack.  The
 487     whole lexical process should be reworked.  xoxorich.  */
 488
 489                 if (ch == '/' && (ch2 = (*get)()) == '*') {
 490                         state = -2;
 491                         return(do_scrub_next_char(get, unget));
 492                 } else {
 493                         (*unget)(ch2);
 494                 } /* bad hack */
 495
 496                 do ch=(*get)();
 497                 while(ch!=EOF && IS_WHITESPACE(ch));
 498                 if(ch==EOF) {
 499                         as_warn("EOF in comment:  Newline inserted");
 500                         return '\n';
 501                 }
 502                 if(ch<'0' || ch>'9') {
 503                         /* Non-numerics:  Eat whole comment line */
 504                         while(ch!=EOF && !IS_NEWLINE(ch))
 505                             ch=(*get)();
 506                         if(ch==EOF)
 507                             as_warn("EOF in Comment: Newline inserted");
 508                         state=0;
 509                         return '\n';
 510                 }
 511                 /* Numerics begin comment.  Perhaps CPP `# 123 "filename"' */
 512                 (*unget)(ch);
 513                 old_state=4;
 514                 state= -1;
 515                 out_string=".line ";
 516                 return *out_string++;
 517
 518         case LEX_IS_COMMENT_START:
 519                 do ch=(*get)();
 520                 while(ch!=EOF && !IS_NEWLINE(ch));
 521                 if(ch==EOF)
 522                     as_warn("EOF in comment:  Newline inserted");
 523                 state=0;
 524                 return '\n';
 525
 526         default:
 527         de_fault:
 528                 /* Some relatively `normal' character.  */
 529                 if(state==0) {
 530                         state=2;        /* Now seeing opcode */
 531                         return ch;
 532                 } else if(state==1) {
 533                         state=2;        /* Ditto */
 534                         return ch;
 535                 } else {
 536                         return ch;      /* Opcode or operands already */
 537                 }
 538         }
 539         return -1;
 540 }
 541
 542 #ifdef TEST
 543
 544 char comment_chars[] = "|";
 545 char line_comment_chars[] = "#";
 546
 547 main()
 548 {
 549         int     ch;
 550
 551         app_begin();
 552         while((ch=do_scrub_next_char(stdin))!=EOF)
 553             putc(ch,stdout);
 554 }
 555
 556 as_warn(str)
 557 char *str;
 558 {
 559         fputs(str,stderr);
 560         putc('\n',stderr);
 561 }
 562 #endif
 563
 564 /*
 565  * Local Variables:
 566  * comment-column: 0
 567  * fill-column: 131
 568  * End:
 569  */
 570
 571 /* end of app.c */