gas/app.c

   1 /* This is the Assembler Pre-Processor
   2    Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 96, 97, 98, 1999
   3    Free Software Foundation, Inc.
   4
   5    This file is part of GAS, the GNU Assembler.
   6
   7    GAS is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    GAS is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GAS; see the file COPYING.  If not, write to the Free
  19    Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20    02111-1307, USA.  */
  21
  22 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
  23 /* App, the assembler pre-processor.  This pre-processor strips out excess
  24    spaces, turns single-quoted characters into a decimal constant, and turns
  25    # <number> <filename> <garbage> into a .line <number>\n.file <filename>
  26    pair.  This needs better error-handling.  */
  27
  28 #include <stdio.h>
  29 #include "as.h"                 /* For BAD_CASE() only */
  30
  31 #if (__STDC__ != 1)
  32 #ifndef const
  33 #define const  /* empty */
  34 #endif
  35 #endif
  36
  37 /* Whether we are scrubbing in m68k MRI mode.  This is different from
  38    flag_m68k_mri, because the two flags will be affected by the .mri
  39    pseudo-op at different times.  */
  40 static int scrub_m68k_mri;
  41
  42 /* The pseudo-op which switches in and out of MRI mode.  See the
  43    comment in do_scrub_chars.  */
  44 static const char mri_pseudo[] = ".mri 0";
  45
  46 #if defined TC_ARM && defined OBJ_ELF
  47 /* The pseudo-op for which we need to special-case `@' characters.
  48    See the comment in do_scrub_chars.  */
  49 static const char   symver_pseudo[] = ".symver";
  50 static const char * symver_state;
  51 #endif
  52
  53 static char lex[256];
  54 static const char symbol_chars[] =
  55 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
  56
  57 #define LEX_IS_SYMBOL_COMPONENT         1
  58 #define LEX_IS_WHITESPACE               2
  59 #define LEX_IS_LINE_SEPARATOR           3
  60 #define LEX_IS_COMMENT_START            4
  61 #define LEX_IS_LINE_COMMENT_START       5
  62 #define LEX_IS_TWOCHAR_COMMENT_1ST      6
  63 #define LEX_IS_STRINGQUOTE              8
  64 #define LEX_IS_COLON                    9
  65 #define LEX_IS_NEWLINE                  10
  66 #define LEX_IS_ONECHAR_QUOTE            11
  67 #ifdef TC_V850
  68 #define LEX_IS_DOUBLEDASH_1ST           12
  69 #endif
  70 #ifdef TC_M32R
  71 #define DOUBLEBAR_PARALLEL
  72 #endif
  73 #ifdef DOUBLEBAR_PARALLEL
  74 #define LEX_IS_DOUBLEBAR_1ST            13
  75 #endif
  76 #define IS_SYMBOL_COMPONENT(c)          (lex[c] == LEX_IS_SYMBOL_COMPONENT)
  77 #define IS_WHITESPACE(c)                (lex[c] == LEX_IS_WHITESPACE)
  78 #define IS_LINE_SEPARATOR(c)            (lex[c] == LEX_IS_LINE_SEPARATOR)
  79 #define IS_COMMENT(c)                   (lex[c] == LEX_IS_COMMENT_START)
  80 #define IS_LINE_COMMENT(c)              (lex[c] == LEX_IS_LINE_COMMENT_START)
  81 #define IS_NEWLINE(c)                   (lex[c] == LEX_IS_NEWLINE)
  82
  83 static int process_escape PARAMS ((int));
  84
  85 /* FIXME-soon: The entire lexer/parser thingy should be
  86    built statically at compile time rather than dynamically
  87    each and every time the assembler is run.  xoxorich. */
  88
  89 void
  90 do_scrub_begin (m68k_mri)
  91      int m68k_mri;
  92 {
  93   const char *p;
  94   int c;
  95
  96   scrub_m68k_mri = m68k_mri;
  97
  98   lex[' '] = LEX_IS_WHITESPACE;
  99   lex['\t'] = LEX_IS_WHITESPACE;
 100   lex['\r'] = LEX_IS_WHITESPACE;
 101   lex['\n'] = LEX_IS_NEWLINE;
 102   lex[';'] = LEX_IS_LINE_SEPARATOR;
 103   lex[':'] = LEX_IS_COLON;
 104
 105   if (! m68k_mri)
 106     {
 107       lex['"'] = LEX_IS_STRINGQUOTE;
 108
 109 #if ! defined (TC_HPPA) && ! defined (TC_I370)
 110       /* I370 uses single-quotes to delimit integer, float constants */
 111       lex['\''] = LEX_IS_ONECHAR_QUOTE;
 112 #endif
 113
 114 #ifdef SINGLE_QUOTE_STRINGS
 115       lex['\''] = LEX_IS_STRINGQUOTE;
 116 #endif
 117     }
 118
 119   /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
 120      in state 5 of do_scrub_chars must be changed.  */
 121
 122   /* Note that these override the previous defaults, e.g. if ';' is a
 123      comment char, then it isn't a line separator.  */
 124   for (p = symbol_chars; *p; ++p)
 125     {
 126       lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
 127     }                           /* declare symbol characters */
 128
 129   for (c = 128; c < 256; ++c)
 130     lex[c] = LEX_IS_SYMBOL_COMPONENT;
 131
 132 #ifdef tc_symbol_chars
 133   /* This macro permits the processor to specify all characters which
 134      may appears in an operand.  This will prevent the scrubber from
 135      discarding meaningful whitespace in certain cases.  The i386
 136      backend uses this to support prefixes, which can confuse the
 137      scrubber as to whether it is parsing operands or opcodes.  */
 138   for (p = tc_symbol_chars; *p; ++p)
 139     lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
 140 #endif
 141
 142   /* The m68k backend wants to be able to change comment_chars.  */
 143 #ifndef tc_comment_chars
 144 #define tc_comment_chars comment_chars
 145 #endif
 146   for (p = tc_comment_chars; *p; p++)
 147     {
 148       lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
 149     }                           /* declare comment chars */
 150
 151   for (p = line_comment_chars; *p; p++)
 152     {
 153       lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
 154     }                           /* declare line comment chars */
 155
 156   for (p = line_separator_chars; *p; p++)
 157     {
 158       lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
 159     }                           /* declare line separators */
 160
 161   /* Only allow slash-star comments if slash is not in use.
 162      FIXME: This isn't right.  We should always permit them.  */
 163   if (lex['/'] == 0)
 164     {
 165       lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
 166     }
 167
 168   if (m68k_mri)
 169     {
 170       lex['\''] = LEX_IS_STRINGQUOTE;
 171       lex[';'] = LEX_IS_COMMENT_START;
 172       lex['*'] = LEX_IS_LINE_COMMENT_START;
 173       /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
 174          then it can't be used in an expression.  */
 175       lex['!'] = LEX_IS_LINE_COMMENT_START;
 176     }
 177
 178 #ifdef TC_V850
 179   lex['-'] = LEX_IS_DOUBLEDASH_1ST;
 180 #endif
 181 #ifdef DOUBLEBAR_PARALLEL
 182   lex['|'] = LEX_IS_DOUBLEBAR_1ST;
 183 #endif
 184 #ifdef TC_D30V
 185   /* must do this is we want VLIW instruction with "->" or "<-" */
 186   lex['-'] = LEX_IS_SYMBOL_COMPONENT;
 187 #endif
 188 }                               /* do_scrub_begin() */
 189
 190 /* Saved state of the scrubber */
 191 static int state;
 192 static int old_state;
 193 static char *out_string;
 194 static char out_buf[20];
 195 static int add_newlines;
 196 static char *saved_input;
 197 static int saved_input_len;
 198 static char input_buffer[32 * 1024];
 199 static const char *mri_state;
 200 static char mri_last_ch;
 201
 202 /* Data structure for saving the state of app across #include's.  Note that
 203    app is called asynchronously to the parsing of the .include's, so our
 204    state at the time .include is interpreted is completely unrelated.
 205    That's why we have to save it all.  */
 206
 207 struct app_save
 208   {
 209     int          state;
 210     int          old_state;
 211     char *       out_string;
 212     char         out_buf[sizeof (out_buf)];
 213     int          add_newlines;
 214     char *       saved_input;
 215     int          saved_input_len;
 216     int          scrub_m68k_mri;
 217     const char * mri_state;
 218     char         mri_last_ch;
 219 #if defined TC_ARM && defined OBJ_ELF
 220     const char * symver_state;
 221 #endif
 222   };
 223
 224 char *
 225 app_push ()
 226 {
 227   register struct app_save *saved;
 228
 229   saved = (struct app_save *) xmalloc (sizeof (*saved));
 230   saved->state = state;
 231   saved->old_state = old_state;
 232   saved->out_string = out_string;
 233   memcpy (saved->out_buf, out_buf, sizeof (out_buf));
 234   saved->add_newlines = add_newlines;
 235   if (saved_input == NULL)
 236     saved->saved_input = NULL;
 237   else
 238     {
 239       saved->saved_input = xmalloc (saved_input_len);
 240       memcpy (saved->saved_input, saved_input, saved_input_len);
 241       saved->saved_input_len = saved_input_len;
 242     }
 243   saved->scrub_m68k_mri = scrub_m68k_mri;
 244   saved->mri_state = mri_state;
 245   saved->mri_last_ch = mri_last_ch;
 246 #if defined TC_ARM && defined OBJ_ELF
 247   saved->symver_state = symver_state;
 248 #endif
 249
 250   /* do_scrub_begin() is not useful, just wastes time. */
 251
 252   state = 0;
 253   saved_input = NULL;
 254
 255   return (char *) saved;
 256 }
 257
 258 void
 259 app_pop (arg)
 260      char *arg;
 261 {
 262   register struct app_save *saved = (struct app_save *) arg;
 263
 264   /* There is no do_scrub_end (). */
 265   state = saved->state;
 266   old_state = saved->old_state;
 267   out_string = saved->out_string;
 268   memcpy (out_buf, saved->out_buf, sizeof (out_buf));
 269   add_newlines = saved->add_newlines;
 270   if (saved->saved_input == NULL)
 271     saved_input = NULL;
 272   else
 273     {
 274       assert (saved->saved_input_len <= (int) (sizeof input_buffer));
 275       memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
 276       saved_input = input_buffer;
 277       saved_input_len = saved->saved_input_len;
 278       free (saved->saved_input);
 279     }
 280   scrub_m68k_mri = saved->scrub_m68k_mri;
 281   mri_state = saved->mri_state;
 282   mri_last_ch = saved->mri_last_ch;
 283 #if defined TC_ARM && defined OBJ_ELF
 284   symver_state = saved->symver_state;
 285 #endif
 286
 287   free (arg);
 288 }                               /* app_pop() */
 289
 290 /* @@ This assumes that \n &c are the same on host and target.  This is not
 291    necessarily true.  */
 292 static int
 293 process_escape (ch)
 294      int ch;
 295 {
 296   switch (ch)
 297     {
 298     case 'b':
 299       return '\b';
 300     case 'f':
 301       return '\f';
 302     case 'n':
 303       return '\n';
 304     case 'r':
 305       return '\r';
 306     case 't':
 307       return '\t';
 308     case '\'':
 309       return '\'';
 310     case '"':
 311       return '\"';
 312     default:
 313       return ch;
 314     }
 315 }
 316
 317 /* This function is called to process input characters.  The GET
 318    parameter is used to retrieve more input characters.  GET should
 319    set its parameter to point to a buffer, and return the length of
 320    the buffer; it should return 0 at end of file.  The scrubbed output
 321    characters are put into the buffer starting at TOSTART; the TOSTART
 322    buffer is TOLEN bytes in length.  The function returns the number
 323    of scrubbed characters put into TOSTART.  This will be TOLEN unless
 324    end of file was seen.  This function is arranged as a state
 325    machine, and saves its state so that it may return at any point.
 326    This is the way the old code used to work.  */
 327
 328 int
 329 do_scrub_chars (get, tostart, tolen)
 330      int (*get) PARAMS ((char *, int));
 331      char *tostart;
 332      int tolen;
 333 {
 334   char *to = tostart;
 335   char *toend = tostart + tolen;
 336   char *from;
 337   char *fromend;
 338   int fromlen;
 339   register int ch, ch2 = 0;
 340
 341   /*State 0: beginning of normal line
 342           1: After first whitespace on line (flush more white)
 343           2: After first non-white (opcode) on line (keep 1white)
 344           3: after second white on line (into operands) (flush white)
 345           4: after putting out a .line, put out digits
 346           5: parsing a string, then go to old-state
 347           6: putting out \ escape in a "d string.
 348           7: After putting out a .appfile, put out string.
 349           8: After putting out a .appfile string, flush until newline.
 350           9: After seeing symbol char in state 3 (keep 1white after symchar)
 351          10: After seeing whitespace in state 9 (keep white before symchar)
 352          11: After seeing a symbol character in state 0 (eg a label definition)
 353          -1: output string in out_string and go to the state in old_state
 354          -2: flush text until a '*' '/' is seen, then go to state old_state
 355 #ifdef TC_V850
 356          12: After seeing a dash, looking for a second dash as a start of comment.
 357 #endif
 358 #ifdef DOUBLEBAR_PARALLEL
 359          13: After seeing a vertical bar, looking for a second vertical bar as a parallel expression seperator.
 360 #endif
 361           */
 362
 363   /* I added states 9 and 10 because the MIPS ECOFF assembler uses
 364      constructs like ``.loc 1 20''.  This was turning into ``.loc
 365      120''.  States 9 and 10 ensure that a space is never dropped in
 366      between characters which could appear in a identifier.  Ian
 367      Taylor, ian@cygnus.com.
 368
 369      I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
 370      correctly on the PA (and any other target where colons are optional).
 371      Jeff Law, law@cs.utah.edu.
 372
 373      I added state 13 so that something like "cmp r1, r2 || trap #1" does not
 374      get squashed into "cmp r1,r2||trap#1", with the all important space
 375      between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
 376
 377   /* This macro gets the next input character.  */
 378
 379 #define GET()                                                   \
 380   (from < fromend                                               \
 381    ? * (unsigned char *) (from++)                               \
 382    : (saved_input = NULL,                                       \
 383       fromlen = (*get) (input_buffer, sizeof input_buffer),     \
 384       from = input_buffer,                                      \
 385       fromend = from + fromlen,                                 \
 386       (fromlen == 0                                             \
 387        ? EOF                                                    \
 388        : * (unsigned char *) (from++))))
 389
 390   /* This macro pushes a character back on the input stream.  */
 391
 392 #define UNGET(uch) (*--from = (uch))
 393
 394   /* This macro puts a character into the output buffer.  If this
 395      character fills the output buffer, this macro jumps to the label
 396      TOFULL.  We use this rather ugly approach because we need to
 397      handle two different termination conditions: EOF on the input
 398      stream, and a full output buffer.  It would be simpler if we
 399      always read in the entire input stream before processing it, but
 400      I don't want to make such a significant change to the assembler's
 401      memory usage.  */
 402
 403 #define PUT(pch)                        \
 404   do                                    \
 405     {                                   \
 406       *to++ = (pch);                    \
 407       if (to >= toend)                  \
 408         goto tofull;                    \
 409     }                                   \
 410   while (0)
 411
 412   if (saved_input != NULL)
 413     {
 414       from = saved_input;
 415       fromend = from + saved_input_len;
 416     }
 417   else
 418     {
 419       fromlen = (*get) (input_buffer, sizeof input_buffer);
 420       if (fromlen == 0)
 421         return 0;
 422       from = input_buffer;
 423       fromend = from + fromlen;
 424     }
 425
 426   while (1)
 427     {
 428       /* The cases in this switch end with continue, in order to
 429          branch back to the top of this while loop and generate the
 430          next output character in the appropriate state.  */
 431       switch (state)
 432         {
 433         case -1:
 434           ch = *out_string++;
 435           if (*out_string == '\0')
 436             {
 437               state = old_state;
 438               old_state = 3;
 439             }
 440           PUT (ch);
 441           continue;
 442
 443         case -2:
 444           for (;;)
 445             {
 446               do
 447                 {
 448                   ch = GET ();
 449
 450                   if (ch == EOF)
 451                     {
 452                       as_warn (_("end of file in comment"));
 453                       goto fromeof;
 454                     }
 455
 456                   if (ch == '\n')
 457                     PUT ('\n');
 458                 }
 459               while (ch != '*');
 460
 461               while ((ch = GET ()) == '*')
 462                 ;
 463
 464               if (ch == EOF)
 465                 {
 466                   as_warn (_("end of file in comment"));
 467                   goto fromeof;
 468                 }
 469
 470               if (ch == '/')
 471                 break;
 472
 473               UNGET (ch);
 474             }
 475
 476           state = old_state;
 477           UNGET (' ');
 478           continue;
 479
 480         case 4:
 481           ch = GET ();
 482           if (ch == EOF)
 483             goto fromeof;
 484           else if (ch >= '0' && ch <= '9')
 485             PUT (ch);
 486           else
 487             {
 488               while (ch != EOF && IS_WHITESPACE (ch))
 489                 ch = GET ();
 490               if (ch == '"')
 491                 {
 492                   UNGET (ch);
 493                   if (scrub_m68k_mri)
 494                     out_string = "\n\tappfile ";
 495                   else
 496                     out_string = "\n\t.appfile ";
 497                   old_state = 7;
 498                   state = -1;
 499                   PUT (*out_string++);
 500                 }
 501               else
 502                 {
 503                   while (ch != EOF && ch != '\n')
 504                     ch = GET ();
 505                   state = 0;
 506                   PUT (ch);
 507                 }
 508             }
 509           continue;
 510
 511         case 5:
 512           /* We are going to copy everything up to a quote character,
 513              with special handling for a backslash.  We try to
 514              optimize the copying in the simple case without using the
 515              GET and PUT macros.  */
 516           {
 517             char *s;
 518             int len;
 519
 520             for (s = from; s < fromend; s++)
 521               {
 522                 ch = *s;
 523                 /* This condition must be changed if the type of any
 524                    other character can be LEX_IS_STRINGQUOTE.  */
 525                 if (ch == '\\'
 526                     || ch == '"'
 527                     || ch == '\''
 528                     || ch == '\n')
 529                   break;
 530               }
 531             len = s - from;
 532             if (len > toend - to)
 533               len = toend - to;
 534             if (len > 0)
 535               {
 536                 memcpy (to, from, len);
 537                 to += len;
 538                 from += len;
 539               }
 540           }
 541
 542           ch = GET ();
 543           if (ch == EOF)
 544             {
 545               as_warn (_("end of file in string: inserted '\"'"));
 546               state = old_state;
 547               UNGET ('\n');
 548               PUT ('"');
 549             }
 550           else if (lex[ch] == LEX_IS_STRINGQUOTE)
 551             {
 552               state = old_state;
 553               PUT (ch);
 554             }
 555 #ifndef NO_STRING_ESCAPES
 556           else if (ch == '\\')
 557             {
 558               state = 6;
 559               PUT (ch);
 560             }
 561 #endif
 562           else if (scrub_m68k_mri && ch == '\n')
 563             {
 564               /* Just quietly terminate the string.  This permits lines like
 565                    bne  label   loop if we haven't reach end yet
 566                  */
 567               state = old_state;
 568               UNGET (ch);
 569               PUT ('\'');
 570             }
 571           else
 572             {
 573               PUT (ch);
 574             }
 575           continue;
 576
 577         case 6:
 578           state = 5;
 579           ch = GET ();
 580           switch (ch)
 581             {
 582               /* Handle strings broken across lines, by turning '\n' into
 583                  '\\' and 'n'.  */
 584             case '\n':
 585               UNGET ('n');
 586               add_newlines++;
 587               PUT ('\\');
 588               continue;
 589
 590             case '"':
 591             case '\\':
 592             case 'b':
 593             case 'f':
 594             case 'n':
 595             case 'r':
 596             case 't':
 597             case 'v':
 598             case 'x':
 599             case 'X':
 600             case '0':
 601             case '1':
 602             case '2':
 603             case '3':
 604             case '4':
 605             case '5':
 606             case '6':
 607             case '7':
 608               break;
 609 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
 610             default:
 611               as_warn (_("Unknown escape '\\%c' in string: Ignored"), ch);
 612               break;
 613 #else  /* ONLY_STANDARD_ESCAPES */
 614             default:
 615               /* Accept \x as x for any x */
 616               break;
 617 #endif /* ONLY_STANDARD_ESCAPES */
 618
 619             case EOF:
 620               as_warn (_("End of file in string: '\"' inserted"));
 621               PUT ('"');
 622               continue;
 623             }
 624           PUT (ch);
 625           continue;
 626
 627         case 7:
 628           ch = GET ();
 629           state = 5;
 630           old_state = 8;
 631           if (ch == EOF)
 632             goto fromeof;
 633           PUT (ch);
 634           continue;
 635
 636         case 8:
 637           do
 638             ch = GET ();
 639           while (ch != '\n' && ch != EOF);
 640           if (ch == EOF)
 641             goto fromeof;
 642           state = 0;
 643           PUT (ch);
 644           continue;
 645         }
 646
 647       /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
 648
 649       /* flushchar: */
 650       ch = GET ();
 651
 652     recycle:
 653
 654 #if defined TC_ARM && defined OBJ_ELF
 655       /* We need to watch out for .symver directives.  See the comment later
 656          in this function.  */
 657       if (symver_state == NULL)
 658         {
 659           if ((state == 0 || state == 1) && ch == symver_pseudo[0])
 660             symver_state = symver_pseudo + 1;
 661         }
 662       else
 663         {
 664           /* We advance to the next state if we find the right
 665              character.  */
 666           if (ch != '\0' && (*symver_state == ch))
 667             ++symver_state;
 668           else if (*symver_state != '\0')
 669             /* We did not get the expected character, or we didn't
 670                get a valid terminating character after seeing the
 671                entire pseudo-op, so we must go back to the beginning.  */
 672             symver_state = NULL;
 673           else
 674             {
 675               /* We've read the entire pseudo-op.  If this is the end
 676                  of the line, go back to the beginning.  */
 677               if (IS_NEWLINE (ch))
 678                 symver_state = NULL;
 679             }
 680         }
 681 #endif /* TC_ARM && OBJ_ELF */
 682
 683 #ifdef TC_M68K
 684       /* We want to have pseudo-ops which control whether we are in
 685          MRI mode or not.  Unfortunately, since m68k MRI mode affects
 686          the scrubber, that means that we need a special purpose
 687          recognizer here.  */
 688       if (mri_state == NULL)
 689         {
 690           if ((state == 0 || state == 1)
 691               && ch == mri_pseudo[0])
 692             mri_state = mri_pseudo + 1;
 693         }
 694       else
 695         {
 696           /* We advance to the next state if we find the right
 697              character, or if we need a space character and we get any
 698              whitespace character, or if we need a '0' and we get a
 699              '1' (this is so that we only need one state to handle
 700              ``.mri 0'' and ``.mri 1'').  */
 701           if (ch != '\0'
 702               && (*mri_state == ch
 703                   || (*mri_state == ' '
 704                       && lex[ch] == LEX_IS_WHITESPACE)
 705                   || (*mri_state == '0'
 706                       && ch == '1')))
 707             {
 708               mri_last_ch = ch;
 709               ++mri_state;
 710             }
 711           else if (*mri_state != '\0'
 712                    || (lex[ch] != LEX_IS_WHITESPACE
 713                        && lex[ch] != LEX_IS_NEWLINE))
 714             {
 715               /* We did not get the expected character, or we didn't
 716                  get a valid terminating character after seeing the
 717                  entire pseudo-op, so we must go back to the
 718                  beginning.  */
 719               mri_state = NULL;
 720             }
 721           else
 722             {
 723               /* We've read the entire pseudo-op.  mips_last_ch is
 724                  either '0' or '1' indicating whether to enter or
 725                  leave MRI mode.  */
 726               do_scrub_begin (mri_last_ch == '1');
 727               mri_state = NULL;
 728
 729               /* We continue handling the character as usual.  The
 730                  main gas reader must also handle the .mri pseudo-op
 731                  to control expression parsing and the like.  */
 732             }
 733         }
 734 #endif
 735
 736       if (ch == EOF)
 737         {
 738           if (state != 0)
 739             {
 740               as_warn (_("end of file not at end of a line; newline inserted"));
 741               state = 0;
 742               PUT ('\n');
 743             }
 744           goto fromeof;
 745         }
 746
 747       switch (lex[ch])
 748         {
 749         case LEX_IS_WHITESPACE:
 750           do
 751             {
 752               ch = GET ();
 753             }
 754           while (ch != EOF && IS_WHITESPACE (ch));
 755           if (ch == EOF)
 756             goto fromeof;
 757
 758           if (state == 0)
 759             {
 760               /* Preserve a single whitespace character at the
 761                  beginning of a line.  */
 762               state = 1;
 763               UNGET (ch);
 764               PUT (' ');
 765               break;
 766             }
 767
 768 #ifdef KEEP_WHITE_AROUND_COLON
 769           if (lex[ch] == LEX_IS_COLON)
 770             {
 771               /* only keep this white if there's no white *after* the colon */
 772               ch2 = GET ();
 773               UNGET (ch2);
 774               if (!IS_WHITESPACE (ch2))
 775                 {
 776                   state = 9;
 777                   UNGET (ch);
 778                   PUT (' ');
 779                   break;
 780                 }
 781             }
 782 #endif
 783           if (IS_COMMENT (ch)
 784               || ch == '/'
 785               || IS_LINE_SEPARATOR (ch))
 786             {
 787               if (scrub_m68k_mri)
 788                 {
 789                   /* In MRI mode, we keep these spaces.  */
 790                   UNGET (ch);
 791                   PUT (' ');
 792                   break;
 793                 }
 794               goto recycle;
 795             }
 796
 797           /* If we're in state 2 or 11, we've seen a non-white
 798              character followed by whitespace.  If the next character
 799              is ':', this is whitespace after a label name which we
 800              normally must ignore.  In MRI mode, though, spaces are
 801              not permitted between the label and the colon.  */
 802           if ((state == 2 || state == 11)
 803               && lex[ch] == LEX_IS_COLON
 804               && ! scrub_m68k_mri)
 805             {
 806               state = 1;
 807               PUT (ch);
 808               break;
 809             }
 810
 811           switch (state)
 812             {
 813             case 0:
 814               state++;
 815               goto recycle;     /* Punted leading sp */
 816             case 1:
 817               /* We can arrive here if we leave a leading whitespace
 818                  character at the beginning of a line.  */
 819               goto recycle;
 820             case 2:
 821               state = 3;
 822               if (to + 1 < toend)
 823                 {
 824                   /* Optimize common case by skipping UNGET/GET.  */
 825                   PUT (' ');    /* Sp after opco */
 826                   goto recycle;
 827                 }
 828               UNGET (ch);
 829               PUT (' ');
 830               break;
 831             case 3:
 832               if (scrub_m68k_mri)
 833                 {
 834                   /* In MRI mode, we keep these spaces.  */
 835                   UNGET (ch);
 836                   PUT (' ');
 837                   break;
 838                 }
 839               goto recycle;     /* Sp in operands */
 840             case 9:
 841             case 10:
 842               if (scrub_m68k_mri)
 843                 {
 844                   /* In MRI mode, we keep these spaces.  */
 845                   state = 3;
 846                   UNGET (ch);
 847                   PUT (' ');
 848                   break;
 849                 }
 850               state = 10;       /* Sp after symbol char */
 851               goto recycle;
 852             case 11:
 853               if (flag_m68k_mri
 854 #ifdef LABELS_WITHOUT_COLONS
 855                   || 1
 856 #endif
 857                   )
 858                 state = 1;
 859               else
 860                 {
 861                   /* We know that ch is not ':', since we tested that
 862                      case above.  Therefore this is not a label, so it
 863                      must be the opcode, and we've just seen the
 864                      whitespace after it.  */
 865                   state = 3;
 866                 }
 867               UNGET (ch);
 868               PUT (' ');        /* Sp after label definition.  */
 869               break;
 870             default:
 871               BAD_CASE (state);
 872             }
 873           break;
 874
 875         case LEX_IS_TWOCHAR_COMMENT_1ST:
 876           ch2 = GET ();
 877           if (ch2 == '*')
 878             {
 879               for (;;)
 880                 {
 881                   do
 882                     {
 883                       ch2 = GET ();
 884                       if (ch2 != EOF && IS_NEWLINE (ch2))
 885                         add_newlines++;
 886                     }
 887                   while (ch2 != EOF && ch2 != '*');
 888
 889                   while (ch2 == '*')
 890                     ch2 = GET ();
 891
 892                   if (ch2 == EOF || ch2 == '/')
 893                     break;
 894
 895                   /* This UNGET will ensure that we count newlines
 896                      correctly.  */
 897                   UNGET (ch2);
 898                 }
 899
 900               if (ch2 == EOF)
 901                 as_warn (_("end of file in multiline comment"));
 902
 903               ch = ' ';
 904               goto recycle;
 905             }
 906           else
 907             {
 908               if (ch2 != EOF)
 909                 UNGET (ch2);
 910               if (state == 9 || state == 10)
 911                 state = 3;
 912               PUT (ch);
 913             }
 914           break;
 915
 916         case LEX_IS_STRINGQUOTE:
 917           if (state == 10)
 918             {
 919               /* Preserve the whitespace in foo "bar" */
 920               UNGET (ch);
 921               state = 3;
 922               PUT (' ');
 923
 924               /* PUT didn't jump out.  We could just break, but we
 925                  know what will happen, so optimize a bit.  */
 926               ch = GET ();
 927               old_state = 3;
 928             }
 929           else if (state == 9)
 930             old_state = 3;
 931           else
 932             old_state = state;
 933           state = 5;
 934           PUT (ch);
 935           break;
 936
 937 #ifndef IEEE_STYLE
 938         case LEX_IS_ONECHAR_QUOTE:
 939           if (state == 10)
 940             {
 941               /* Preserve the whitespace in foo 'b' */
 942               UNGET (ch);
 943               state = 3;
 944               PUT (' ');
 945               break;
 946             }
 947           ch = GET ();
 948           if (ch == EOF)
 949             {
 950               as_warn (_("end of file after a one-character quote; \\0 inserted"));
 951               ch = 0;
 952             }
 953           if (ch == '\\')
 954             {
 955               ch = GET ();
 956               if (ch == EOF)
 957                 {
 958                   as_warn (_("end of file in escape character"));
 959                   ch = '\\';
 960                 }
 961               else
 962                 ch = process_escape (ch);
 963             }
 964           sprintf (out_buf, "%d", (int) (unsigned char) ch);
 965
 966           /* None of these 'x constants for us.  We want 'x'.  */
 967           if ((ch = GET ()) != '\'')
 968             {
 969 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
 970               as_warn (_("Missing close quote: (assumed)"));
 971 #else
 972               if (ch != EOF)
 973                 UNGET (ch);
 974 #endif
 975             }
 976           if (strlen (out_buf) == 1)
 977             {
 978               PUT (out_buf[0]);
 979               break;
 980             }
 981           if (state == 9)
 982             old_state = 3;
 983           else
 984             old_state = state;
 985           state = -1;
 986           out_string = out_buf;
 987           PUT (*out_string++);
 988           break;
 989 #endif
 990
 991         case LEX_IS_COLON:
 992 #ifdef KEEP_WHITE_AROUND_COLON
 993           state = 9;
 994 #else
 995           if (state == 9 || state == 10)
 996             state = 3;
 997           else if (state != 3)
 998             state = 1;
 999 #endif
1000           PUT (ch);
1001           break;
1002
1003         case LEX_IS_NEWLINE:
1004           /* Roll out a bunch of newlines from inside comments, etc.  */
1005           if (add_newlines)
1006             {
1007               --add_newlines;
1008               UNGET (ch);
1009             }
1010           /* fall thru into... */
1011
1012         case LEX_IS_LINE_SEPARATOR:
1013           state = 0;
1014           PUT (ch);
1015           break;
1016
1017 #ifdef TC_V850
1018         case LEX_IS_DOUBLEDASH_1ST:
1019           ch2 = GET();
1020           if (ch2 != '-')
1021             {
1022               UNGET (ch2);
1023               goto de_fault;
1024             }
1025           /* read and skip to end of line */
1026           do
1027             {
1028               ch = GET ();
1029             }
1030           while (ch != EOF && ch != '\n');
1031           if (ch == EOF)
1032             {
1033               as_warn (_("end of file in comment; newline inserted"));
1034             }
1035           state = 0;
1036           PUT ('\n');
1037           break;
1038 #endif
1039 #ifdef DOUBLEBAR_PARALLEL
1040         case LEX_IS_DOUBLEBAR_1ST:
1041           ch2 = GET();
1042           if (ch2 != '|')
1043             {
1044               UNGET (ch2);
1045               goto de_fault;
1046             }
1047           /* Reset back to state 1 and pretend that we are parsing a line from
1048              just after the first white space.  */
1049           state = 1;
1050           PUT ('|');
1051           PUT ('|');
1052           break;
1053 #endif
1054         case LEX_IS_LINE_COMMENT_START:
1055           /* FIXME-someday: The two character comment stuff was badly
1056              thought out.  On i386, we want '/' as line comment start
1057              AND we want C style comments.  hence this hack.  The
1058              whole lexical process should be reworked.  xoxorich.  */
1059           if (ch == '/')
1060             {
1061               ch2 = GET ();
1062               if (ch2 == '*')
1063                 {
1064                   old_state = 3;
1065                   state = -2;
1066                   break;
1067                 }
1068               else
1069                 {
1070                   UNGET (ch2);
1071                 }
1072             } /* bad hack */
1073
1074           if (state == 0 || state == 1) /* Only comment at start of line.  */
1075             {
1076               int startch;
1077
1078               startch = ch;
1079
1080               do
1081                 {
1082                   ch = GET ();
1083                 }
1084               while (ch != EOF && IS_WHITESPACE (ch));
1085               if (ch == EOF)
1086                 {
1087                   as_warn (_("end of file in comment; newline inserted"));
1088                   PUT ('\n');
1089                   break;
1090                 }
1091               if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1092                 {
1093                   /* Not a cpp line.  */
1094                   while (ch != EOF && !IS_NEWLINE (ch))
1095                     ch = GET ();
1096                   if (ch == EOF)
1097                     as_warn (_("EOF in Comment: Newline inserted"));
1098                   state = 0;
1099                   PUT ('\n');
1100                   break;
1101                 }
1102               /* Loks like `# 123 "filename"' from cpp.  */
1103               UNGET (ch);
1104               old_state = 4;
1105               state = -1;
1106               if (scrub_m68k_mri)
1107                 out_string = "\tappline ";
1108               else
1109                 out_string = "\t.appline ";
1110               PUT (*out_string++);
1111               break;
1112             }
1113
1114 #ifdef TC_D10V
1115           /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1116              Trap is the only short insn that has a first operand that is
1117              neither register nor label.
1118              We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1119              We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is already
1120              LEX_IS_LINE_COMMENT_START.  However, it is the only character in
1121              line_comment_chars for d10v, hence we can recognize it as such.  */
1122           /* An alternative approach would be to reset the state to 1 when
1123              we see '||', '<'- or '->', but that seems to be overkill.  */
1124           if (state == 10) PUT (' ');
1125 #endif
1126           /* We have a line comment character which is not at the
1127              start of a line.  If this is also a normal comment
1128              character, fall through.  Otherwise treat it as a default
1129              character.  */
1130           if (strchr (tc_comment_chars, ch) == NULL
1131               && (! scrub_m68k_mri
1132                   || (ch != '!' && ch != '*')))
1133             goto de_fault;
1134           if (scrub_m68k_mri
1135               && (ch == '!' || ch == '*' || ch == '#')
1136               && state != 1
1137               && state != 10)
1138             goto de_fault;
1139           /* Fall through.  */
1140         case LEX_IS_COMMENT_START:
1141 #if defined TC_ARM && defined OBJ_ELF
1142           /* On the ARM, `@' is the comment character.
1143              Unfortunately this is also a special character in ELF .symver
1144              directives (and .type, though we deal with those another way).  So
1145              we check if this line is such a directive, and treat the character
1146              as default if so.  This is a hack.  */
1147           if ((symver_state != NULL) && (*symver_state == 0))
1148             goto de_fault;
1149 #endif
1150           do
1151             {
1152               ch = GET ();
1153             }
1154           while (ch != EOF && !IS_NEWLINE (ch));
1155           if (ch == EOF)
1156             as_warn (_("end of file in comment; newline inserted"));
1157           state = 0;
1158           PUT ('\n');
1159           break;
1160
1161         case LEX_IS_SYMBOL_COMPONENT:
1162           if (state == 10)
1163             {
1164               /* This is a symbol character following another symbol
1165                  character, with whitespace in between.  We skipped
1166                  the whitespace earlier, so output it now.  */
1167               UNGET (ch);
1168               state = 3;
1169               PUT (' ');
1170               break;
1171             }
1172
1173           if (state == 3)
1174             state = 9;
1175
1176           /* This is a common case.  Quickly copy CH and all the
1177              following symbol component or normal characters.  */
1178           if (to + 1 < toend
1179               && mri_state == NULL
1180 #if defined TC_ARM && defined OBJ_ELF
1181               && symver_state == NULL
1182 #endif
1183               )
1184             {
1185               char *s;
1186               int len;
1187
1188               for (s = from; s < fromend; s++)
1189                 {
1190                   int type;
1191
1192                   ch2 = * (unsigned char *) s;
1193                   type = lex[ch2];
1194                   if (type != 0
1195                       && type != LEX_IS_SYMBOL_COMPONENT)
1196                     break;
1197                 }
1198               if (s > from)
1199                 {
1200                   /* Handle the last character normally, for
1201                      simplicity.  */
1202                   --s;
1203                 }
1204               len = s - from;
1205               if (len > (toend - to) - 1)
1206                 len = (toend - to) - 1;
1207               if (len > 0)
1208                 {
1209                   PUT (ch);
1210                   if (len > 8)
1211                     {
1212                       memcpy (to, from, len);
1213                       to += len;
1214                       from += len;
1215                     }
1216                   else
1217                     {
1218                       switch (len)
1219                         {
1220                         case 8: *to++ = *from++;
1221                         case 7: *to++ = *from++;
1222                         case 6: *to++ = *from++;
1223                         case 5: *to++ = *from++;
1224                         case 4: *to++ = *from++;
1225                         case 3: *to++ = *from++;
1226                         case 2: *to++ = *from++;
1227                         case 1: *to++ = *from++;
1228                         }
1229                     }
1230                   ch = GET ();
1231                 }
1232             }
1233
1234           /* Fall through.  */
1235         default:
1236         de_fault:
1237           /* Some relatively `normal' character.  */
1238           if (state == 0)
1239             {
1240               state = 11;       /* Now seeing label definition */
1241             }
1242           else if (state == 1)
1243             {
1244               state = 2;        /* Ditto */
1245             }
1246           else if (state == 9)
1247             {
1248               if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
1249                 state = 3;
1250             }
1251           else if (state == 10)
1252             {
1253               if (ch == '\\')
1254                 {
1255                   /* Special handling for backslash: a backslash may
1256                      be the beginning of a formal parameter (of a
1257                      macro) following another symbol character, with
1258                      whitespace in between.  If that is the case, we
1259                      output a space before the parameter.  Strictly
1260                      speaking, correct handling depends upon what the
1261                      macro parameter expands into; if the parameter
1262                      expands into something which does not start with
1263                      an operand character, then we don't want to keep
1264                      the space.  We don't have enough information to
1265                      make the right choice, so here we are making the
1266                      choice which is more likely to be correct.  */
1267                   PUT (' ');
1268                 }
1269
1270               state = 3;
1271             }
1272           PUT (ch);
1273           break;
1274         }
1275     }
1276
1277   /*NOTREACHED*/
1278
1279  fromeof:
1280   /* We have reached the end of the input.  */
1281   return to - tostart;
1282
1283  tofull:
1284   /* The output buffer is full.  Save any input we have not yet
1285      processed.  */
1286   if (fromend > from)
1287     {
1288       saved_input = from;
1289       saved_input_len = fromend - from;
1290     }
1291   else
1292     saved_input = NULL;
1293
1294   return to - tostart;
1295 }
1296
1297 /* end of app.c */