ragel/rlscan.rl

   1 /*
   2  *  Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca>
   3  */
   4
   5 /*  This file is part of Ragel.
   6  *
   7  *  Ragel is free software; you can redistribute it and/or modify
   8  *  it under the terms of the GNU General Public License as published by
   9  *  the Free Software Foundation; either version 2 of the License, or
  10  *  (at your option) any later version.
  11  *
  12  *  Ragel is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  *  GNU General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU General Public License
  18  *  along with Ragel; if not, write to the Free Software
  19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  */
  21
  22 #include <iostream>
  23 #include <fstream>
  24 #include <string.h>
  25
  26 #include "ragel.h"
  27 #include "rlparse.h"
  28 #include "parsedata.h"
  29 #include "avltree.h"
  30 #include "vector.h"
  31
  32
  33 using std::ifstream;
  34 using std::istream;
  35 using std::ostream;
  36 using std::cout;
  37 using std::cerr;
  38 using std::endl;
  39
  40 /* This is used for tracking the current stack of include file/machine pairs. It is
  41  * is used to detect and recursive include structure. */
  42 struct IncludeStackItem
  43 {
  44         IncludeStackItem( char *fileName, char *sectionName )
  45                 : fileName(fileName), sectionName(sectionName) {}
  46
  47         char *fileName;
  48         char *sectionName;
  49 };
  50
  51 typedef Vector<IncludeStackItem> IncludeStack;
  52 IncludeStack includeStack;
  53
  54 enum InlineBlockType
  55 {
  56         CurlyDelimited,
  57         SemiTerminated
  58 };
  59
  60 struct Scanner
  61 {
  62         Scanner( char *fileName, istream &input,
  63                         Parser *inclToParser, char *inclSectionTarg,
  64                         int include_depth )
  65         :
  66                 fileName(fileName), input(input),
  67                 inclToParser(inclToParser),
  68                 inclSectionTarg(inclSectionTarg),
  69                 include_depth(include_depth),
  70                 line(1), column(1), lastnl(0),
  71                 parser(0), active(false),
  72                 parserExistsError(false), ragelDefOpen(false),
  73                 whitespaceOn(true)
  74                 {}
  75
  76         bool recursiveInclude( IncludeStack &includeStack,
  77                         char *inclFileName, char *inclSectionName );
  78
  79         char *prepareFileName( char *fileName, int len )
  80         {
  81                 bool caseInsensitive;
  82                 Token tokenFnStr, tokenRes;
  83                 tokenFnStr.data = fileName;
  84                 tokenFnStr.length = len;
  85                 tokenFnStr.prepareLitString( tokenRes, caseInsensitive );
  86                 return tokenRes.data;
  87         }
  88
  89         void init();
  90         void token( int type, char *start, char *end );
  91         void token( int type, char c );
  92         void token( int type );
  93         void updateCol();
  94         void startSection();
  95         void endSection();
  96         void openRagelDef();
  97         void do_scan();
  98         bool parserExists();
  99         ostream &error();
 100
 101         char *fileName;
 102         istream &input;
 103         Parser *inclToParser;
 104         char *inclSectionTarg;
 105         int include_depth;
 106
 107         int cs;
 108         int line;
 109         char *word, *lit;
 110         int word_len, lit_len;
 111         InputLoc sectionLoc;
 112         char *tokstart, *tokend;
 113         int column;
 114         char *lastnl;
 115
 116         /* Set by machine statements, these persist from section to section
 117          * allowing for unnamed sections. */
 118         Parser *parser;
 119         bool active;
 120
 121         /* This is set if ragel has already emitted an error stating that
 122          * no section name has been seen and thus no parser exists. */
 123         bool parserExistsError;
 124         bool ragelDefOpen;
 125
 126         /* This is for inline code. By default it is on. It goes off for
 127          * statements and values in inline blocks which are parsed. */
 128         bool whitespaceOn;
 129 };
 130
 131 %%{
 132         machine section_parse;
 133         alphtype int;
 134         write data;
 135 }%%
 136
 137 void Scanner::init( )
 138 {
 139         %% write init;
 140 }
 141
 142 bool Scanner::parserExists()
 143 {
 144         if ( parser != 0 )
 145                 return true;
 146
 147         if ( ! parserExistsError ) {
 148                 error() << "include: there is no previous specification name" << endl;
 149                 parserExistsError = true;
 150         }
 151         return false;
 152 }
 153
 154 ostream &Scanner::error()
 155 {
 156         /* Maintain the error count. */
 157         gblErrorCount += 1;
 158
 159         cerr << fileName << ":" << line << ":" << column << ": ";
 160         return cerr;
 161 }
 162
 163 bool Scanner::recursiveInclude( IncludeStack &includeStack,
 164                         char *inclFileName, char *inclSectionName )
 165 {
 166         for ( IncludeStack::Iter si = includeStack; si.lte(); si++ ) {
 167                 if ( strcmp( si->fileName, inclFileName ) == 0 &&
 168                                 strcmp( si->sectionName, inclSectionName ) == 0 )
 169                 {
 170                         return true;
 171                 }
 172         }
 173         return false;
 174 }
 175
 176 void Scanner::updateCol()
 177 {
 178         char *from = lastnl;
 179         if ( from == 0 )
 180                 from = tokstart;
 181         //cerr << "adding " << tokend - from << " to column" << endl;
 182         column += tokend - from;
 183         lastnl = 0;
 184 }
 185
 186 void Scanner::token( int type, char c )
 187 {
 188         token( type, &c, &c + 1 );
 189 }
 190
 191 void Scanner::token( int type )
 192 {
 193         token( type, 0, 0 );
 194 }
 195
 196 %%{
 197         machine section_parse;
 198
 199         # This relies on the the kelbt implementation and the order
 200         # that tokens are declared.
 201         KW_Machine = 128;
 202         KW_Include = 129;
 203         KW_Write = 130;
 204         TK_Word = 131;
 205         TK_Literal = 132;
 206
 207         action clear_words { word = lit = 0; word_len = lit_len = 0; }
 208         action store_word { word = tokdata; word_len = toklen; }
 209         action store_lit { lit = tokdata; lit_len = toklen; }
 210
 211         action mach_err { error() << "bad machine statement" << endl; }
 212         action incl_err { error() << "bad include statement" << endl; }
 213         action write_err { error() << "bad write statement" << endl; }
 214
 215         action handle_machine
 216         {
 217                 /* Assign a name to the machine. */
 218                 char *machine = word;
 219
 220                 if ( inclSectionTarg == 0 ) {
 221                         active = true;
 222
 223                         ParserDictEl *pdEl = parserDict.find( machine );
 224                         if ( pdEl == 0 ) {
 225                                 pdEl = new ParserDictEl( machine );
 226                                 pdEl->value = new Parser( fileName, machine, sectionLoc );
 227                                 pdEl->value->init();
 228                                 parserDict.insert( pdEl );
 229                         }
 230
 231                         parser = pdEl->value;
 232                 }
 233                 else if ( strcmp( inclSectionTarg, machine ) == 0 ) {
 234                         /* found include target */
 235                         active = true;
 236                         parser = inclToParser;
 237                 }
 238                 else {
 239                         /* ignoring section */
 240                         active = false;
 241                         parser = 0;
 242                 }
 243         }
 244
 245         machine_stmt =
 246                 ( KW_Machine TK_Word @store_word ';' ) @handle_machine
 247                 <>err mach_err <>eof mach_err;
 248
 249         action handle_include
 250         {
 251                 if ( active && parserExists() ) {
 252                         char *inclSectionName = word;
 253                         char *inclFileName = 0;
 254
 255                         /* Implement defaults for the input file and section name. */
 256                         if ( inclSectionName == 0 )
 257                                 inclSectionName = parser->sectionName;
 258
 259                         if ( lit != 0 )
 260                                 inclFileName = prepareFileName( lit, lit_len );
 261                         else
 262                                 inclFileName = fileName;
 263
 264                         /* Check for a recursive include structure. Add the current file/section
 265                          * name then check if what we are including is already in the stack. */
 266                         includeStack.append( IncludeStackItem( fileName, parser->sectionName ) );
 267
 268                         if ( recursiveInclude( includeStack, inclFileName, inclSectionName ) )
 269                                 error() << "include: this is a recursive include operation" << endl;
 270                         else {
 271                                 /* Open the input file for reading. */
 272                                 ifstream *inFile = new ifstream( inclFileName );
 273                                 if ( ! inFile->is_open() ) {
 274                                         error() << "include: could not open " <<
 275                                                         inclFileName << " for reading" << endl;
 276                                 }
 277
 278                                 Scanner scanner( inclFileName, *inFile, parser,
 279                                                 inclSectionName, include_depth+1 );
 280                                 scanner.init();
 281                                 scanner.do_scan( );
 282                                 delete inFile;
 283                         }
 284
 285                         /* Remove the last element (len-1) */
 286                         includeStack.remove( -1 );
 287                 }
 288         }
 289
 290         include_names = (
 291                 TK_Word @store_word ( TK_Literal @store_lit )? |
 292                 TK_Literal @store_lit
 293         ) >clear_words;
 294
 295         include_stmt =
 296                 ( KW_Include include_names ';' ) @handle_include
 297                 <>err incl_err <>eof incl_err;
 298
 299         action write_command
 300         {
 301                 if ( active ) {
 302                         openRagelDef();
 303                         if ( strcmp( tokdata, "data" ) != 0 &&
 304                                         strcmp( tokdata, "init" ) != 0 &&
 305                                         strcmp( tokdata, "exec" ) != 0 &&
 306                                         strcmp( tokdata, "eof" ) != 0 )
 307                         {
 308                                 error() << "unknown write command" << endl;
 309                         }
 310                         *outStream << "  <write what=\"" << tokdata << "\">";
 311                 }
 312         }
 313
 314         action write_option
 315         {
 316                 if ( active )
 317                         *outStream << "<option>" << tokdata << "</option>";
 318         }
 319         action write_close
 320         {
 321                 if ( active )
 322                         *outStream << "</write>\n";
 323         }
 324
 325         write_stmt =
 326                 ( KW_Write TK_Word @write_command
 327                         ( TK_Word @write_option )* ';' @write_close )
 328                 <>err write_err <>eof write_err;
 329
 330         action handle_token
 331         {
 332                 /* Send the token off to the parser. */
 333                 if ( active && parserExists() ) {
 334                         InputLoc loc;
 335
 336                         #if 0
 337                         cerr << "scanner:" << line << ":" << column <<
 338                                         ": sending token to the parser " << lelNames[*p];
 339                         cerr << " " << toklen;
 340                         if ( tokdata != 0 )
 341                                 cerr << " " << tokdata;
 342                         cerr << endl;
 343                         #endif
 344
 345                         loc.fileName = fileName;
 346                         loc.line = line;
 347                         loc.col = column;
 348
 349                         parser->token( loc, type, tokdata, toklen );
 350                 }
 351         }
 352
 353         # Catch everything else.
 354         everything_else = ^( KW_Machine | KW_Include | KW_Write ) @handle_token;
 355
 356         main := (
 357                 machine_stmt |
 358                 include_stmt |
 359                 write_stmt |
 360                 everything_else
 361         )*;
 362 }%%
 363
 364 void Scanner::token( int type, char *start, char *end )
 365 {
 366         char *tokdata = 0;
 367         int toklen = 0;
 368         int *p = &type;
 369         int *pe = &type + 1;
 370
 371         if ( start != 0 ) {
 372                 toklen = end-start;
 373                 tokdata = new char[toklen+1];
 374                 memcpy( tokdata, start, toklen );
 375                 tokdata[toklen] = 0;
 376         }
 377
 378         %%{
 379                 machine section_parse;
 380                 write exec;
 381         }%%
 382
 383         updateCol();
 384 }
 385
 386 void Scanner::startSection( )
 387 {
 388         parserExistsError = false;
 389
 390         if ( include_depth == 0 ) {
 391                 if ( machineSpec == 0 && machineName == 0 )
 392                         *outStream << "</host>\n";
 393                 ragelDefOpen = false;
 394         }
 395
 396         sectionLoc.fileName = fileName;
 397         sectionLoc.line = line;
 398         sectionLoc.col = 0;
 399 }
 400
 401 void Scanner::openRagelDef()
 402 {
 403         if ( ! ragelDefOpen ) {
 404                 ragelDefOpen = true;
 405                 *outStream << "<ragel_def name=\"" << parser->sectionName << "\">\n";
 406         }
 407 }
 408
 409 void Scanner::endSection( )
 410 {
 411         /* Execute the eof actions for the section parser. */
 412         %%{
 413                 machine section_parse;
 414                 write eof;
 415         }%%
 416
 417         /* Close off the section with the parser. */
 418         if ( active && parserExists() ) {
 419                 InputLoc loc;
 420                 loc.fileName = fileName;
 421                 loc.line = line;
 422                 loc.col = 0;
 423
 424                 parser->token( loc, TK_EndSection, 0, 0 );
 425         }
 426
 427         if ( include_depth == 0 ) {
 428                 if ( ragelDefOpen ) {
 429                         *outStream << "</ragel_def>\n";
 430                         ragelDefOpen = false;
 431                 }
 432
 433                 if ( machineSpec == 0 && machineName == 0 ) {
 434                         /* The end section may include a newline on the end, so
 435                          * we use the last line, which will count the newline. */
 436                         *outStream << "<host line=\"" << line << "\">";
 437                 }
 438         }
 439 }
 440
 441 %%{
 442         machine rlscan;
 443
 444         # This is sent by the driver code.
 445         EOF = 0;
 446
 447         action inc_nl {
 448                 lastnl = p;
 449                 column = 0;
 450                 line++;
 451         }
 452         NL = '\n' @inc_nl;
 453
 454         # Identifiers, numbers, commetns, and other common things.
 455         ident = ( alpha | '_' ) ( alpha |digit |'_' )*;
 456         number = digit+;
 457         hex_number = '0x' [0-9a-fA-F]+;
 458
 459         c_comment =
 460                 '/*' ( any | NL )* :>> '*/';
 461
 462         cpp_comment =
 463                 '//' [^\n]* NL;
 464
 465         c_cpp_comment = c_comment | cpp_comment;
 466
 467         # These literal forms are common to C-like host code and ragel.
 468         s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'";
 469         d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"';
 470
 471         whitespace = [ \t] | NL;
 472         pound_comment = '#' [^\n]* NL;
 473
 474         # An inline block of code. This is specified as a scanned, but is sent to
 475         # the parser as one long block. The inline_block pointer is used to handle
 476         # the preservation of the data.
 477         inline_code := |*
 478                 # Inline expression keywords.
 479                 "fpc" => { token( KW_PChar ); };
 480                 "fc" => { token( KW_Char ); };
 481                 "fcurs" => { token( KW_CurState ); };
 482                 "ftargs" => { token( KW_TargState ); };
 483                 "fentry" => {
 484                         whitespaceOn = false;
 485                         token( KW_Entry );
 486                 };
 487
 488                 # Inline statement keywords.
 489                 "fhold" => {
 490                         whitespaceOn = false;
 491                         token( KW_Hold );
 492                 };
 493                 "fexec" => { token( KW_Exec, 0, 0 ); };
 494                 "fgoto" => {
 495                         whitespaceOn = false;
 496                         token( KW_Goto );
 497                 };
 498                 "fnext" => {
 499                         whitespaceOn = false;
 500                         token( KW_Next );
 501                 };
 502                 "fcall" => {
 503                         whitespaceOn = false;
 504                         token( KW_Call );
 505                 };
 506                 "fret" => {
 507                         whitespaceOn = false;
 508                         token( KW_Ret );
 509                 };
 510                 "fbreak" => {
 511                         whitespaceOn = false;
 512                         token( KW_Break );
 513                 };
 514
 515                 ident => { token( TK_Word, tokstart, tokend ); };
 516
 517                 number => { token( TK_UInt, tokstart, tokend ); };
 518                 hex_number => { token( TK_Hex, tokstart, tokend ); };
 519
 520                 ( s_literal | d_literal )
 521                         => { token( IL_Literal, tokstart, tokend ); };
 522
 523                 whitespace+ => {
 524                         if ( whitespaceOn )
 525                                 token( IL_WhiteSpace, tokstart, tokend );
 526                 };
 527                 c_cpp_comment => { token( IL_Comment, tokstart, tokend ); };
 528
 529                 "::" => { token( TK_NameSep, tokstart, tokend ); };
 530
 531                 # Some symbols need to go to the parser as with their cardinal value as
 532                 # the token type (as opposed to being sent as anonymous symbols)
 533                 # because they are part of the sequences which we interpret. The * ) ;
 534                 # symbols cause whitespace parsing to come back on. This gets turned
 535                 # off by some keywords.
 536
 537                 ";" => {
 538                         whitespaceOn = true;
 539                         token( *tokstart, tokstart, tokend );
 540                         if ( inlineBlockType == SemiTerminated )
 541                                 fgoto parser_def;
 542                 };
 543
 544                 [*)] => {
 545                         whitespaceOn = true;
 546                         token( *tokstart, tokstart, tokend );
 547                 };
 548
 549                 [,(] => { token( *tokstart, tokstart, tokend ); };
 550
 551                 '{' => {
 552                         token( IL_Symbol, tokstart, tokend );
 553                         curly_count += 1;
 554                 };
 555
 556                 '}' => {
 557                         if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) {
 558                                 /* Inline code block ends. */
 559                                 token( '}' );
 560                                 fgoto parser_def;
 561                         }
 562                         else {
 563                                 /* Either a semi terminated inline block or only the closing
 564                                  * brace of some inner scope, not the block's closing brace. */
 565                                 token( IL_Symbol, tokstart, tokend );
 566                         }
 567                 };
 568
 569                 EOF => {
 570                         error() << "unterminated code block" << endl;
 571                 };
 572
 573                 # Send every other character as a symbol.
 574                 any => { token( IL_Symbol, tokstart, tokend ); };
 575         *|;
 576
 577         or_literal := |*
 578                 # Escape sequences in OR expressions.
 579                 '\\0' => { token( RE_Char, '\0' ); };
 580                 '\\a' => { token( RE_Char, '\a' ); };
 581                 '\\b' => { token( RE_Char, '\b' ); };
 582                 '\\t' => { token( RE_Char, '\t' ); };
 583                 '\\n' => { token( RE_Char, '\n' ); };
 584                 '\\v' => { token( RE_Char, '\v' ); };
 585                 '\\f' => { token( RE_Char, '\f' ); };
 586                 '\\r' => { token( RE_Char, '\r' ); };
 587                 '\\\n' => { updateCol(); };
 588                 '\\' any => { token( RE_Char, tokstart+1, tokend ); };
 589
 590                 # Range dash in an OR expression.
 591                 '-' => { token( RE_Dash, 0, 0 ); };
 592
 593                 # Terminate an OR expression.
 594                 ']'     => { token( RE_SqClose ); fret; };
 595
 596                 EOF => {
 597                         error() << "unterminated OR literal" << endl;
 598                 };
 599
 600                 # Characters in an OR expression.
 601                 [^\]] => { token( RE_Char, tokstart, tokend ); };
 602
 603         *|;
 604
 605         re_literal := |*
 606                 # Escape sequences in regular expressions.
 607                 '\\0' => { token( RE_Char, '\0' ); };
 608                 '\\a' => { token( RE_Char, '\a' ); };
 609                 '\\b' => { token( RE_Char, '\b' ); };
 610                 '\\t' => { token( RE_Char, '\t' ); };
 611                 '\\n' => { token( RE_Char, '\n' ); };
 612                 '\\v' => { token( RE_Char, '\v' ); };
 613                 '\\f' => { token( RE_Char, '\f' ); };
 614                 '\\r' => { token( RE_Char, '\r' ); };
 615                 '\\\n' => { updateCol(); };
 616                 '\\' any => { token( RE_Char, tokstart+1, tokend ); };
 617
 618                 # Terminate an OR expression.
 619                 '/' [i]? => {
 620                         token( RE_Slash, tokstart, tokend );
 621                         fgoto parser_def;
 622                 };
 623
 624                 # Special characters.
 625                 '.' => { token( RE_Dot ); };
 626                 '*' => { token( RE_Star ); };
 627
 628                 '[' => { token( RE_SqOpen ); fcall or_literal; };
 629                 '[^' => { token( RE_SqOpenNeg ); fcall or_literal; };
 630
 631                 EOF => {
 632                         error() << "unterminated regular expression" << endl;
 633                 };
 634
 635                 # Characters in an OR expression.
 636                 [^\/] => { token( RE_Char, tokstart, tokend ); };
 637         *|;
 638
 639         write_statement := |*
 640                 ident => { token( TK_Word, tokstart, tokend ); } ;
 641                 [ \t\n]+ => { updateCol(); };
 642                 ';' => { token( ';' ); fgoto parser_def; };
 643
 644                 EOF => {
 645                         error() << "unterminated write statement" << endl;
 646                 };
 647         *|;
 648
 649         # Parser definitions.
 650         parser_def := |*
 651                 'machine' => { token( KW_Machine ); };
 652                 'include' => { token( KW_Include ); };
 653                 'write' => {
 654                         token( KW_Write );
 655                         fgoto write_statement;
 656                 };
 657                 'action' => { token( KW_Action ); };
 658                 'alphtype' => { token( KW_AlphType ); };
 659
 660                 # FIXME: Enable this post 5.17.
 661                 # 'range' => { token( KW_Range ); };
 662
 663                 'getkey' => {
 664                         token( KW_GetKey );
 665                         inlineBlockType = SemiTerminated;
 666                         fgoto inline_code;
 667                 };
 668                 'access' => {
 669                         token( KW_Access );
 670                         inlineBlockType = SemiTerminated;
 671                         fgoto inline_code;
 672                 };
 673                 'variable' => {
 674                         token( KW_Variable );
 675                         inlineBlockType = SemiTerminated;
 676                         fgoto inline_code;
 677                 };
 678                 'when' => { token( KW_When ); };
 679                 'eof' => { token( KW_Eof ); };
 680                 'err' => { token( KW_Err ); };
 681                 'lerr' => { token( KW_Lerr ); };
 682                 'to' => { token( KW_To ); };
 683                 'from' => { token( KW_From ); };
 684
 685                 # Identifiers.
 686                 ident => { token( TK_Word, tokstart, tokend ); } ;
 687
 688                 # Numbers
 689                 number => { token( TK_UInt, tokstart, tokend ); };
 690                 hex_number => { token( TK_Hex, tokstart, tokend ); };
 691
 692                 # Literals, with optionals.
 693                 ( s_literal | d_literal ) [i]?
 694                         => { token( TK_Literal, tokstart, tokend ); };
 695
 696                 '[' => { token( RE_SqOpen ); fcall or_literal; };
 697                 '[^' => { token( RE_SqOpenNeg ); fcall or_literal; };
 698
 699                 '/' => { token( RE_Slash ); fgoto re_literal; };
 700
 701                 # Ignore.
 702                 pound_comment => { updateCol(); };
 703
 704                 ':=' => { token( TK_ColonEquals ); };
 705
 706                 # To State Actions.
 707                 ">~" => { token( TK_StartToState ); };
 708                 "$~" => { token( TK_AllToState ); };
 709                 "%~" => { token( TK_FinalToState ); };
 710                 "<~" => { token( TK_NotStartToState ); };
 711                 "@~" => { token( TK_NotFinalToState ); };
 712                 "<>~" => { token( TK_MiddleToState ); };
 713
 714                 # From State actions
 715                 ">*" => { token( TK_StartFromState ); };
 716                 "$*" => { token( TK_AllFromState ); };
 717                 "%*" => { token( TK_FinalFromState ); };
 718                 "<*" => { token( TK_NotStartFromState ); };
 719                 "@*" => { token( TK_NotFinalFromState ); };
 720                 "<>*" => { token( TK_MiddleFromState ); };
 721
 722                 # EOF Actions.
 723                 ">/" => { token( TK_StartEOF ); };
 724                 "$/" => { token( TK_AllEOF ); };
 725                 "%/" => { token( TK_FinalEOF ); };
 726                 "</" => { token( TK_NotStartEOF ); };
 727                 "@/" => { token( TK_NotFinalEOF ); };
 728                 "<>/" => { token( TK_MiddleEOF ); };
 729
 730                 # Global Error actions.
 731                 ">!" => { token( TK_StartGblError ); };
 732                 "$!" => { token( TK_AllGblError ); };
 733                 "%!" => { token( TK_FinalGblError ); };
 734                 "<!" => { token( TK_NotStartGblError ); };
 735                 "@!" => { token( TK_NotFinalGblError ); };
 736                 "<>!" => { token( TK_MiddleGblError ); };
 737
 738                 # Local error actions.
 739                 ">^" => { token( TK_StartLocalError ); };
 740                 "$^" => { token( TK_AllLocalError ); };
 741                 "%^" => { token( TK_FinalLocalError ); };
 742                 "<^" => { token( TK_NotStartLocalError ); };
 743                 "@^" => { token( TK_NotFinalLocalError ); };
 744                 "<>^" => { token( TK_MiddleLocalError ); };
 745
 746                 # Middle.
 747                 "<>" => { token( TK_Middle ); };
 748
 749                 # Conditions.
 750                 '>?' => { token( TK_StartCond ); };
 751                 '$?' => { token( TK_AllCond ); };
 752                 '%?' => { token( TK_LeavingCond ); };
 753
 754                 '..' => { token( TK_DotDot ); };
 755                 '**' => { token( TK_StarStar ); };
 756                 '--' => { token( TK_DashDash ); };
 757                 '->' => { token( TK_Arrow ); };
 758                 '=>' => { token( TK_DoubleArrow ); };
 759
 760                 ":>"  => { token( TK_ColonGt ); };
 761                 ":>>" => { token( TK_ColonGtGt ); };
 762                 "<:"  => { token( TK_LtColon ); };
 763
 764                 # Opening of longest match.
 765                 "|*" => { token( TK_BarStar ); };
 766
 767                 '}%%' => {
 768                         /* In order to generate anything we must be in the top level file
 769                          * and the current spec must be active and there must not have been
 770                          * any parse errors. */
 771                         updateCol();
 772                         endSection();
 773                         fgoto main;
 774                 };
 775
 776                 [ \t]+ => { updateCol(); };
 777
 778                 # If we are in a single line machine then newline may end the spec.
 779                 NL => {
 780                         updateCol();
 781                         if ( singleLineSpec ) {
 782                                 /* In order to generate anything we must be in the top level file
 783                                  * and the current spec must be active and there must not have been
 784                                  * any parse errors. */
 785                                 endSection();
 786                                 fgoto main;
 787                         }
 788                 };
 789
 790                 '{' => {
 791                         token( '{' );
 792                         curly_count = 1;
 793                         inlineBlockType = CurlyDelimited;
 794                         fgoto inline_code;
 795                 };
 796
 797                 EOF => {
 798                         error() << "unterminated ragel section" << endl;
 799                 };
 800
 801                 any => { token( *tokstart ); } ;
 802         *|;
 803
 804         action pass {
 805                 updateCol();
 806
 807                 /* If no errors and we are at the bottom of the include stack (the
 808                  * source file listed on the command line) then write out the data. */
 809                 if ( include_depth == 0 && machineSpec == 0 && machineName == 0 )
 810                         xmlEscapeHost( *outStream, tokstart, tokend-tokstart );
 811         }
 812
 813         # Outside code scanner. These tokens get passed through.
 814         main := |*
 815                 ident => pass;
 816                 number => pass;
 817                 c_cpp_comment => pass;
 818                 s_literal | d_literal => pass;
 819                 '%%{' => {
 820                         updateCol();
 821                         singleLineSpec = false;
 822                         startSection();
 823                         fgoto parser_def;
 824                 };
 825                 '%%' => {
 826                         updateCol();
 827                         singleLineSpec = true;
 828                         startSection();
 829                         fgoto parser_def;
 830                 };
 831                 whitespace+ => pass;
 832                 EOF;
 833                 any => pass;
 834         *|;
 835
 836 }%%
 837
 838 %% write data;
 839
 840 void Scanner::do_scan()
 841 {
 842         int bufsize = 8;
 843         char *buf = new char[bufsize];
 844         const char last_char = 0;
 845         int cs, act, have = 0;
 846         int top, stack[1];
 847         int curly_count = 0;
 848         bool execute = true;
 849         bool singleLineSpec = false;
 850         InlineBlockType inlineBlockType;
 851
 852         %% write init;
 853
 854         while ( execute ) {
 855                 char *p = buf + have;
 856                 int space = bufsize - have;
 857
 858                 if ( space == 0 ) {
 859                         /* We filled up the buffer trying to scan a token. Grow it. */
 860                         bufsize = bufsize * 2;
 861                         char *newbuf = new char[bufsize];
 862
 863                         /* Recompute p and space. */
 864                         p = newbuf + have;
 865                         space = bufsize - have;
 866
 867                         /* Patch up pointers possibly in use. */
 868                         if ( tokstart != 0 )
 869                                 tokstart = newbuf + ( tokstart - buf );
 870                         tokend = newbuf + ( tokend - buf );
 871
 872                         /* Copy the new buffer in. */
 873                         memcpy( newbuf, buf, have );
 874                         delete[] buf;
 875                         buf = newbuf;
 876                 }
 877
 878                 input.read( p, space );
 879                 int len = input.gcount();
 880
 881                 /* If we see eof then append the EOF char. */
 882                 if ( len == 0 ) {
 883                         p[0] = last_char, len = 1;
 884                         execute = false;
 885                 }
 886
 887                 char *pe = p + len;
 888                 %% write exec;
 889
 890                 /* Check if we failed. */
 891                 if ( cs == rlscan_error ) {
 892                         /* Machine failed before finding a token. I'm not yet sure if this
 893                          * is reachable. */
 894                         error() << "scanner error" << endl;
 895                         exit(1);
 896                 }
 897
 898                 /* Decide if we need to preserve anything. */
 899                 char *preserve = tokstart;
 900
 901                 /* Now set up the prefix. */
 902                 if ( preserve == 0 )
 903                         have = 0;
 904                 else {
 905                         /* There is data that needs to be shifted over. */
 906                         have = pe - preserve;
 907                         memmove( buf, preserve, have );
 908                         unsigned int shiftback = preserve - buf;
 909                         if ( tokstart != 0 )
 910                                 tokstart -= shiftback;
 911                         tokend -= shiftback;
 912
 913                         preserve = buf;
 914                 }
 915         }
 916
 917         delete[] buf;
 918 }
 919
 920 void scan( char *fileName, istream &input )
 921 {
 922         Scanner scanner( fileName, input, 0, 0, 0 );
 923         scanner.init();
 924         scanner.do_scan();
 925
 926         InputLoc eofLoc;
 927         eofLoc.fileName = fileName;
 928         eofLoc.col = 1;
 929         eofLoc.line = scanner.line;
 930 }