gas/app.c

   1 /* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
   2
   3    Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
   4    */
   5 /* This is the Assembler Pre-Processor
   6    Copyright (C) 1987 Free Software Foundation, Inc.
   7
   8    This file is part of GAS, the GNU Assembler.
   9
  10    GAS is free software; you can redistribute it and/or modify
  11    it under the terms of the GNU General Public License as published by
  12    the Free Software Foundation; either version 2, or (at your option)
  13    any later version.
  14
  15    GAS is distributed in the hope that it will be useful,
  16    but WITHOUT ANY WARRANTY; without even the implied warranty of
  17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18    GNU General Public License for more details.
  19
  20    You should have received a copy of the GNU General Public License
  21    along with GAS; see the file COPYING.  If not, write to
  22    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
  23
  24 /* App, the assembler pre-processor.  This pre-processor strips out excess
  25    spaces, turns single-quoted characters into a decimal constant, and turns
  26    # <number> <filename> <garbage> into a .line <number>\n.app-file <filename>
  27    pair.  This needs better error-handling.
  28    */
  29
  30 #include <stdio.h>
  31 #include "as.h"                 /* For BAD_CASE() only */
  32 #include "read.h"
  33
  34 #if (__STDC__ != 1) && !defined(const)
  35 #define const                   /* Nothing */
  36 #endif
  37
  38 static char lex[256];
  39 static const char symbol_chars[] =
  40 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
  41
  42 #define LEX_IS_SYMBOL_COMPONENT         1
  43 #define LEX_IS_WHITESPACE               2
  44 #define LEX_IS_LINE_SEPARATOR           3
  45 #define LEX_IS_COMMENT_START            4
  46 #define LEX_IS_LINE_COMMENT_START       5
  47 #define LEX_IS_TWOCHAR_COMMENT_1ST      6
  48 #define LEX_IS_TWOCHAR_COMMENT_2ND      7
  49 #define LEX_IS_STRINGQUOTE              8
  50 #define LEX_IS_COLON                    9
  51 #define LEX_IS_NEWLINE                  10
  52 #define LEX_IS_ONECHAR_QUOTE            11
  53 #define IS_SYMBOL_COMPONENT(c)          (lex[c] == LEX_IS_SYMBOL_COMPONENT)
  54 #define IS_WHITESPACE(c)                (lex[c] == LEX_IS_WHITESPACE)
  55 #define IS_LINE_SEPARATOR(c)            (lex[c] == LEX_IS_LINE_SEPARATOR)
  56 #define IS_COMMENT(c)                   (lex[c] == LEX_IS_COMMENT_START)
  57 #define IS_LINE_COMMENT(c)              (lex[c] == LEX_IS_LINE_COMMENT_START)
  58 #define IS_NEWLINE(c)                   (lex[c] == LEX_IS_NEWLINE)
  59
  60 /* FIXME-soon: The entire lexer/parser thingy should be
  61    built statically at compile time rather than dynamically
  62    each and every time the assembler is run.  xoxorich. */
  63
  64 void
  65 do_scrub_begin ()
  66 {
  67   const char *p;
  68
  69   lex[' '] = LEX_IS_WHITESPACE;
  70   lex['\t'] = LEX_IS_WHITESPACE;
  71   lex['\n'] = LEX_IS_NEWLINE;
  72   lex[';'] = LEX_IS_LINE_SEPARATOR;
  73   lex['"'] = LEX_IS_STRINGQUOTE;
  74   lex['\''] = LEX_IS_ONECHAR_QUOTE;
  75   lex[':'] = LEX_IS_COLON;
  76
  77
  78
  79 #ifdef SINGLE_QUOTE_STRINGS
  80         lex['\''] = LEX_IS_STRINGQUOTE;
  81 #endif
  82
  83   /* Note that these override the previous defaults, e.g. if ';'
  84
  85            is a comment char, then it isn't a line separator.  */
  86   for (p = symbol_chars; *p; ++p)
  87     {
  88       lex[*p] = LEX_IS_SYMBOL_COMPONENT;
  89     }                           /* declare symbol characters */
  90
  91   for (p = line_comment_chars; *p; p++)
  92     {
  93       lex[*p] = LEX_IS_LINE_COMMENT_START;
  94     }                           /* declare line comment chars */
  95
  96   for (p = comment_chars; *p; p++)
  97     {
  98       lex[*p] = LEX_IS_COMMENT_START;
  99     }                           /* declare comment chars */
 100
 101   for (p = line_separator_chars; *p; p++)
 102     {
 103       lex[*p] = LEX_IS_LINE_SEPARATOR;
 104     }                           /* declare line separators */
 105
 106   /* Only allow slash-star comments if slash is not in use */
 107   if (lex['/'] == 0)
 108     {
 109       lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
 110     }
 111   /* FIXME-soon.  This is a bad hack but otherwise, we
 112            can't do c-style comments when '/' is a line
 113            comment char. xoxorich. */
 114   if (lex['*'] == 0)
 115     {
 116       lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
 117     }
 118 }                               /* do_scrub_begin() */
 119
 120 FILE *scrub_file;
 121
 122 int
 123 scrub_from_file ()
 124 {
 125   return getc (scrub_file);
 126 }
 127
 128 void
 129 scrub_to_file (ch)
 130      int ch;
 131 {
 132   ungetc (ch, scrub_file);
 133 }                               /* scrub_to_file() */
 134
 135 char *scrub_string;
 136 char *scrub_last_string;
 137
 138 int
 139 scrub_from_string ()
 140 {
 141   return scrub_string == scrub_last_string ? EOF : *scrub_string++;
 142 }                               /* scrub_from_string() */
 143
 144 void
 145 scrub_to_string (ch)
 146      int ch;
 147 {
 148   *--scrub_string = ch;
 149 }                               /* scrub_to_string() */
 150
 151 /* Saved state of the scrubber */
 152 static int state;
 153 static int old_state;
 154 static char *out_string;
 155 static char out_buf[20];
 156 static int add_newlines = 0;
 157
 158 /* Data structure for saving the state of app across #include's.  Note that
 159    app is called asynchronously to the parsing of the .include's, so our
 160    state at the time .include is interpreted is completely unrelated.
 161    That's why we have to save it all.  */
 162
 163 struct app_save
 164   {
 165     int state;
 166     int old_state;
 167     char *out_string;
 168     char out_buf[sizeof (out_buf)];
 169     int add_newlines;
 170     char *scrub_string;
 171     char *scrub_last_string;
 172     FILE *scrub_file;
 173   };
 174
 175 char *
 176 app_push ()
 177 {
 178   register struct app_save *saved;
 179
 180   saved = (struct app_save *) xmalloc (sizeof (*saved));
 181   saved->state = state;
 182   saved->old_state = old_state;
 183   saved->out_string = out_string;
 184   memcpy (out_buf, saved->out_buf, sizeof (out_buf));
 185   saved->add_newlines = add_newlines;
 186   saved->scrub_string = scrub_string;
 187   saved->scrub_last_string = scrub_last_string;
 188   saved->scrub_file = scrub_file;
 189
 190   /* do_scrub_begin() is not useful, just wastes time. */
 191   return (char *) saved;
 192 }
 193
 194 void
 195 app_pop (arg)
 196      char *arg;
 197 {
 198   register struct app_save *saved = (struct app_save *) arg;
 199
 200   /* There is no do_scrub_end (). */
 201   state = saved->state;
 202   old_state = saved->old_state;
 203   out_string = saved->out_string;
 204   memcpy (saved->out_buf, out_buf, sizeof (out_buf));
 205   add_newlines = saved->add_newlines;
 206   scrub_string = saved->scrub_string;
 207   scrub_last_string = saved->scrub_last_string;
 208   scrub_file = saved->scrub_file;
 209
 210   free (arg);
 211 }                               /* app_pop() */
 212
 213 /* @@ This assumes that \n &c are the same on host and target.  This is not
 214    necessarily true.  */
 215 int
 216 process_escape (ch)
 217      char ch;
 218 {
 219   switch (ch)
 220     {
 221     case 'b':
 222       return '\b';
 223     case 'f':
 224       return '\f';
 225     case 'n':
 226       return '\n';
 227     case 'r':
 228       return '\r';
 229     case 't':
 230       return '\t';
 231     case '\'':
 232       return '\'';
 233     case '"':
 234       return '\"';
 235     default:
 236       return ch;
 237     }
 238 }
 239 int
 240 do_scrub_next_char (get, unget)
 241      int (*get) ();
 242      void (*unget) ();
 243 {
 244   /*State 0: beginning of normal line
 245           1: After first whitespace on line (flush more white)
 246           2: After first non-white (opcode) on line (keep 1white)
 247           3: after second white on line (into operands) (flush white)
 248           4: after putting out a .line, put out digits
 249           5: parsing a string, then go to old-state
 250           6: putting out \ escape in a "d string.
 251           7: After putting out a .app-file, put out string.
 252           8: After putting out a .app-file string, flush until newline.
 253           -1: output string in out_string and go to the state in old_state
 254           -2: flush text until a '*' '/' is seen, then go to state old_state
 255           */
 256
 257   register int ch, ch2 = 0;
 258
 259   switch (state)
 260     {
 261     case -1:
 262       ch = *out_string++;
 263       if (*out_string == 0)
 264         {
 265           state = old_state;
 266           old_state = 3;
 267         }
 268       return ch;
 269
 270     case -2:
 271       for (;;)
 272         {
 273           do
 274             {
 275               ch = (*get) ();
 276             }
 277           while (ch != EOF && ch != '\n' && ch != '*');
 278           if (ch == '\n' || ch == EOF)
 279             return ch;
 280
 281           /* At this point, ch must be a '*' */
 282           while ((ch = (*get) ()) == '*')
 283             {
 284               ;
 285             }
 286           if (ch == EOF || ch == '/')
 287             break;
 288           (*unget) (ch);
 289         }
 290       state = old_state;
 291       return ' ';
 292
 293     case 4:
 294       ch = (*get) ();
 295       if (ch == EOF || (ch >= '0' && ch <= '9'))
 296         return ch;
 297       else
 298         {
 299           while (ch != EOF && IS_WHITESPACE (ch))
 300             ch = (*get) ();
 301           if (ch == '"')
 302             {
 303               (*unget) (ch);
 304               out_string = "\n.app-file ";
 305               old_state = 7;
 306               state = -1;
 307               return *out_string++;
 308             }
 309           else
 310             {
 311               while (ch != EOF && ch != '\n')
 312                 ch = (*get) ();
 313               return ch;
 314             }
 315         }
 316
 317     case 5:
 318       ch = (*get) ();
 319       if (lex[ch] == LEX_IS_STRINGQUOTE)
 320         {
 321           state = old_state;
 322           return ch;
 323         }
 324       else if (ch == '\\')
 325         {
 326           state = 6;
 327           return ch;
 328         }
 329       else if (ch == EOF)
 330         {
 331           as_warn ("End of file in string: inserted '\"'");
 332           state = old_state;
 333           (*unget) ('\n');
 334           return '"';
 335         }
 336       else
 337         {
 338           return ch;
 339         }
 340
 341     case 6:
 342       state = 5;
 343       ch = (*get) ();
 344       switch (ch)
 345         {
 346           /* Handle strings broken across lines, by turning '\n' into
 347              '\\' and 'n'.  */
 348         case '\n':
 349           (*unget) ('n');
 350           add_newlines++;
 351           return '\\';
 352
 353         case '"':
 354         case '\\':
 355         case 'b':
 356         case 'f':
 357         case 'n':
 358         case 'r':
 359         case 't':
 360 #ifdef BACKSLASH_V
 361         case 'v':
 362 #endif /* BACKSLASH_V */
 363         case '0':
 364         case '1':
 365         case '2':
 366         case '3':
 367         case '4':
 368         case '5':
 369         case '6':
 370         case '7':
 371           break;
 372 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
 373         default:
 374           as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
 375           break;
 376 #else /* ONLY_STANDARD_ESCAPES */
 377         default:
 378           /* Accept \x as x for any x */
 379           break;
 380 #endif /* ONLY_STANDARD_ESCAPES */
 381
 382         case EOF:
 383           as_warn ("End of file in string: '\"' inserted");
 384           return '"';
 385         }
 386       return ch;
 387
 388     case 7:
 389       ch = (*get) ();
 390       state = 5;
 391       old_state = 8;
 392       return ch;
 393
 394     case 8:
 395       do
 396         ch = (*get) ();
 397       while (ch != '\n');
 398       state = 0;
 399       return ch;
 400     }
 401
 402   /* OK, we are somewhere in states 0 through 4 */
 403
 404   /* flushchar: */
 405   ch = (*get) ();
 406 recycle:
 407   if (ch == EOF)
 408     {
 409       if (state != 0)
 410         as_warn ("End of file not at end of a line: Newline inserted.");
 411       return ch;
 412     }
 413
 414   switch (lex[ch])
 415     {
 416     case LEX_IS_WHITESPACE:
 417       do
 418         ch = (*get) ();
 419       while (ch != EOF && IS_WHITESPACE (ch));
 420       if (ch == EOF)
 421         return ch;
 422
 423       if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
 424         {
 425           goto recycle;
 426         }
 427 #ifdef MRI
 428       (*unget) (ch);            /* Put back */
 429       return ' ';               /* Always return one space at start of line */
 430 #endif
 431
 432       /* If we're in state 2, we've seen a non-white
 433          character followed by whitespace.  If the next
 434          character is ':', this is whitespace after a label
 435          name which we can ignore.  */
 436       if (state == 2 && lex[ch] == LEX_IS_COLON)
 437         {
 438           state = 0;
 439           return ch;
 440         }
 441
 442       switch (state)
 443         {
 444         case 0:
 445           state++;
 446           goto recycle;         /* Punted leading sp */
 447         case 1:
 448           BAD_CASE (state);     /* We can't get here */
 449         case 2:
 450           state++;
 451           (*unget) (ch);
 452           return ' ';           /* Sp after opco */
 453         case 3:
 454           goto recycle;         /* Sp in operands */
 455         default:
 456           BAD_CASE (state);
 457         }
 458       break;
 459
 460     case LEX_IS_TWOCHAR_COMMENT_1ST:
 461       ch2 = (*get) ();
 462       if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
 463         {
 464           for (;;)
 465             {
 466               do
 467                 {
 468                   ch2 = (*get) ();
 469                   if (ch2 != EOF && IS_NEWLINE (ch2))
 470                     add_newlines++;
 471                 }
 472               while (ch2 != EOF &&
 473                      (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
 474
 475               while (ch2 != EOF &&
 476                      (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
 477                 {
 478                   ch2 = (*get) ();
 479                 }
 480
 481               if (ch2 == EOF
 482                   || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
 483                 break;
 484               (*unget) (ch);
 485             }
 486           if (ch2 == EOF)
 487             as_warn ("End of file in multiline comment");
 488
 489           ch = ' ';
 490           goto recycle;
 491         }
 492       else
 493         {
 494           if (ch2 != EOF)
 495             (*unget) (ch2);
 496           return ch;
 497         }
 498       break;
 499
 500     case LEX_IS_STRINGQUOTE:
 501       old_state = state;
 502       state = 5;
 503       return ch;
 504 #ifndef MRI
 505 #ifndef IEEE_STYLE
 506     case LEX_IS_ONECHAR_QUOTE:
 507       ch = (*get) ();
 508       if (ch == EOF)
 509         {
 510           as_warn ("End-of-file after a one-character quote; \\000 inserted");
 511           ch = 0;
 512         }
 513       if (ch == '\\')
 514         {
 515           ch = (*get) ();
 516           ch = process_escape (ch);
 517         }
 518       sprintf (out_buf, "%d", (int) (unsigned char) ch);
 519
 520
 521       /* None of these 'x constants for us.  We want 'x'.
 522                  */
 523       if ((ch = (*get) ()) != '\'')
 524         {
 525 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
 526           as_warn ("Missing close quote: (assumed)");
 527 #else
 528           (*unget) (ch);
 529 #endif
 530         }
 531       if (strlen (out_buf) == 1)
 532         {
 533           return out_buf[0];
 534         }
 535       old_state = state;
 536       state = -1;
 537       out_string = out_buf;
 538       return *out_string++;
 539 #endif
 540 #endif
 541     case LEX_IS_COLON:
 542       if (state != 3)
 543         state = 0;
 544       return ch;
 545
 546     case LEX_IS_NEWLINE:
 547       /* Roll out a bunch of newlines from inside comments, etc.  */
 548       if (add_newlines)
 549         {
 550           --add_newlines;
 551           (*unget) (ch);
 552         }
 553       /* fall thru into... */
 554
 555     case LEX_IS_LINE_SEPARATOR:
 556       state = 0;
 557       return ch;
 558
 559     case LEX_IS_LINE_COMMENT_START:
 560       if (state != 0)           /* Not at start of line, act normal */
 561         goto de_fault;
 562
 563       /* FIXME-someday: The two character comment stuff was badly
 564     thought out.  On i386, we want '/' as line comment start
 565     AND we want C style comments.  hence this hack.  The
 566     whole lexical process should be reworked.  xoxorich.  */
 567
 568       if (ch == '/' && (ch2 = (*get) ()) == '*')
 569         {
 570           state = -2;
 571           return (do_scrub_next_char (get, unget));
 572         }
 573       else
 574         {
 575           (*unget) (ch2);
 576         }                       /* bad hack */
 577
 578       do
 579         ch = (*get) ();
 580       while (ch != EOF && IS_WHITESPACE (ch));
 581       if (ch == EOF)
 582         {
 583           as_warn ("EOF in comment:  Newline inserted");
 584           return '\n';
 585         }
 586       if (ch < '0' || ch > '9')
 587         {
 588           /* Non-numerics:  Eat whole comment line */
 589           while (ch != EOF && !IS_NEWLINE (ch))
 590             ch = (*get) ();
 591           if (ch == EOF)
 592             as_warn ("EOF in Comment: Newline inserted");
 593           state = 0;
 594           return '\n';
 595         }
 596       /* Numerics begin comment.  Perhaps CPP `# 123 "filename"' */
 597       (*unget) (ch);
 598       old_state = 4;
 599       state = -1;
 600       out_string = ".line ";
 601       return *out_string++;
 602
 603     case LEX_IS_COMMENT_START:
 604       do
 605         ch = (*get) ();
 606       while (ch != EOF && !IS_NEWLINE (ch));
 607       if (ch == EOF)
 608         as_warn ("EOF in comment:  Newline inserted");
 609       state = 0;
 610       return '\n';
 611
 612     default:
 613     de_fault:
 614       /* Some relatively `normal' character.  */
 615       if (state == 0)
 616         {
 617           state = 2;            /* Now seeing opcode */
 618           return ch;
 619         }
 620       else if (state == 1)
 621         {
 622           state = 2;            /* Ditto */
 623           return ch;
 624         }
 625       else
 626         {
 627           return ch;            /* Opcode or operands already */
 628         }
 629     }
 630   return -1;
 631 }
 632
 633 #ifdef TEST
 634
 635 const char comment_chars[] = "|";
 636 const char line_comment_chars[] = "#";
 637
 638 main ()
 639 {
 640   int ch;
 641
 642   app_begin ();
 643   while ((ch = do_scrub_next_char (stdin)) != EOF)
 644     putc (ch, stdout);
 645 }
 646
 647 as_warn (str)
 648      char *str;
 649 {
 650   fputs (str, stderr);
 651   putc ('\n', stderr);
 652 }
 653
 654 #endif
 655
 656 /*
 657  * Local Variables:
 658  * comment-column: 0
 659  * fill-column: 131
 660  * End:
 661  */
 662
 663 /* end of app.c */