ragel/rlscan.rl

   1 /*
   2  *  Copyright 2006-2007 Adrian Thurston <thurston@cs.queensu.ca>
   3  */
   4
   5 /*  This file is part of Ragel.
   6  *
   7  *  Ragel is free software; you can redistribute it and/or modify
   8  *  it under the terms of the GNU General Public License as published by
   9  *  the Free Software Foundation; either version 2 of the License, or
  10  *  (at your option) any later version.
  11  *
  12  *  Ragel is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  *  GNU General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU General Public License
  18  *  along with Ragel; if not, write to the Free Software
  19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  */
  21
  22 #include <iostream>
  23 #include <fstream>
  24 #include <string.h>
  25
  26 #include "ragel.h"
  27 #include "rlscan.h"
  28
  29 using std::ifstream;
  30 using std::istream;
  31 using std::ostream;
  32 using std::cout;
  33 using std::cerr;
  34 using std::endl;
  35
  36 enum InlineBlockType
  37 {
  38         CurlyDelimited,
  39         SemiTerminated
  40 };
  41
  42 %%{
  43         machine section_parse;
  44         alphtype int;
  45         write data;
  46 }%%
  47
  48 void Scanner::init( )
  49 {
  50         %% write init;
  51 }
  52
  53 bool Scanner::active()
  54 {
  55         if ( ignoreSection )
  56                 return false;
  57
  58         if ( parser == 0 && ! parserExistsError ) {
  59                 scan_error() << "there is no previous specification name" << endl;
  60                 parserExistsError = true;
  61         }
  62
  63         if ( parser == 0 )
  64                 return false;
  65
  66         return true;
  67 }
  68
  69 ostream &Scanner::scan_error()
  70 {
  71         /* Maintain the error count. */
  72         gblErrorCount += 1;
  73         cerr << fileName << ":" << line << ":" << column << ": ";
  74         return cerr;
  75 }
  76
  77 bool Scanner::recursiveInclude( char *inclFileName, char *inclSectionName )
  78 {
  79         for ( IncludeStack::Iter si = includeStack; si.lte(); si++ ) {
  80                 if ( strcmp( si->fileName, inclFileName ) == 0 &&
  81                                 strcmp( si->sectionName, inclSectionName ) == 0 )
  82                 {
  83                         return true;
  84                 }
  85         }
  86         return false;
  87 }
  88
  89 void Scanner::updateCol()
  90 {
  91         char *from = lastnl;
  92         if ( from == 0 )
  93                 from = tokstart;
  94         //cerr << "adding " << tokend - from << " to column" << endl;
  95         column += tokend - from;
  96         lastnl = 0;
  97 }
  98
  99 void Scanner::token( int type, char c )
 100 {
 101         token( type, &c, &c + 1 );
 102 }
 103
 104 void Scanner::token( int type )
 105 {
 106         token( type, 0, 0 );
 107 }
 108
 109 %%{
 110         machine section_parse;
 111
 112         # This relies on the the kelbt implementation and the order
 113         # that tokens are declared.
 114         KW_Machine = 128;
 115         KW_Include = 129;
 116         KW_Write = 130;
 117         TK_Word = 131;
 118         TK_Literal = 132;
 119
 120         action clear_words { word = lit = 0; word_len = lit_len = 0; }
 121         action store_word { word = tokdata; word_len = toklen; }
 122         action store_lit { lit = tokdata; lit_len = toklen; }
 123
 124         action mach_err { scan_error() << "bad machine statement" << endl; }
 125         action incl_err { scan_error() << "bad include statement" << endl; }
 126         action write_err { scan_error() << "bad write statement" << endl; }
 127
 128         action handle_machine
 129         {
 130                 /* Assign a name to the machine. */
 131                 char *machine = word;
 132
 133                 if ( inclSectionTarg == 0 ) {
 134                         ignoreSection = false;
 135
 136                         ParserDictEl *pdEl = parserDict.find( machine );
 137                         if ( pdEl == 0 ) {
 138                                 pdEl = new ParserDictEl( machine );
 139                                 pdEl->value = new Parser( fileName, machine, sectionLoc );
 140                                 pdEl->value->init();
 141                                 parserDict.insert( pdEl );
 142                         }
 143
 144                         parser = pdEl->value;
 145                 }
 146                 else if ( strcmp( inclSectionTarg, machine ) == 0 ) {
 147                         /* found include target */
 148                         ignoreSection = false;
 149                         parser = inclToParser;
 150                 }
 151                 else {
 152                         /* ignoring section */
 153                         ignoreSection = true;
 154                         parser = 0;
 155                 }
 156         }
 157
 158         machine_stmt =
 159                 ( KW_Machine TK_Word @store_word ';' ) @handle_machine
 160                 <>err mach_err <>eof mach_err;
 161
 162         action handle_include
 163         {
 164                 if ( active() ) {
 165                         char *inclSectionName = word;
 166                         char *inclFileName = 0;
 167
 168                         /* Implement defaults for the input file and section name. */
 169                         if ( inclSectionName == 0 )
 170                                 inclSectionName = parser->sectionName;
 171
 172                         if ( lit != 0 )
 173                                 inclFileName = prepareFileName( lit, lit_len );
 174                         else
 175                                 inclFileName = fileName;
 176
 177                         /* Check for a recursive include structure. Add the current file/section
 178                          * name then check if what we are including is already in the stack. */
 179                         includeStack.append( IncludeStackItem( fileName, parser->sectionName ) );
 180
 181                         if ( recursiveInclude( inclFileName, inclSectionName ) )
 182                                 scan_error() << "include: this is a recursive include operation" << endl;
 183                         else {
 184                                 /* Open the input file for reading. */
 185                                 ifstream *inFile = new ifstream( inclFileName );
 186                                 if ( ! inFile->is_open() ) {
 187                                         scan_error() << "include: could not open " <<
 188                                                         inclFileName << " for reading" << endl;
 189                                 }
 190
 191                                 Scanner scanner( inclFileName, *inFile, output, parser,
 192                                                 inclSectionName, includeDepth+1 );
 193                                 scanner.do_scan( );
 194                                 delete inFile;
 195                         }
 196
 197                         /* Remove the last element (len-1) */
 198                         includeStack.remove( -1 );
 199                 }
 200         }
 201
 202         include_names = (
 203                 TK_Word @store_word ( TK_Literal @store_lit )? |
 204                 TK_Literal @store_lit
 205         ) >clear_words;
 206
 207         include_stmt =
 208                 ( KW_Include include_names ';' ) @handle_include
 209                 <>err incl_err <>eof incl_err;
 210
 211         action write_command
 212         {
 213                 if ( active() && machineSpec == 0 && machineName == 0 ) {
 214                         output << "<write"
 215                                         " def_name=\"" << parser->sectionName << "\""
 216                                         " line=\"" << line << "\""
 217                                         " col=\"" << column << "\""
 218                                         ">";
 219                 }
 220         }
 221
 222         action write_arg
 223         {
 224                 if ( active() && machineSpec == 0 && machineName == 0 )
 225                         output << "<arg>" << tokdata << "</arg>";
 226         }
 227
 228         action write_close
 229         {
 230                 if ( active() && machineSpec == 0 && machineName == 0 )
 231                         output << "</write>\n";
 232         }
 233
 234         write_stmt =
 235                 ( KW_Write @write_command
 236                 ( TK_Word @write_arg )+ ';' @write_close )
 237                 <>err write_err <>eof write_err;
 238
 239         action handle_token
 240         {
 241                 /* Send the token off to the parser. */
 242                 if ( active() ) {
 243                         InputLoc loc;
 244
 245                         #if 0
 246                         cerr << "scanner:" << line << ":" << column <<
 247                                         ": sending token to the parser " << Parser_lelNames[*p];
 248                         cerr << " " << toklen;
 249                         if ( tokdata != 0 )
 250                                 cerr << " " << tokdata;
 251                         cerr << endl;
 252                         #endif
 253
 254                         loc.fileName = fileName;
 255                         loc.line = line;
 256                         loc.col = column;
 257
 258                         parser->token( loc, type, tokdata, toklen );
 259                 }
 260         }
 261
 262         # Catch everything else.
 263         everything_else = ^( KW_Machine | KW_Include | KW_Write ) @handle_token;
 264
 265         main := (
 266                 machine_stmt |
 267                 include_stmt |
 268                 write_stmt |
 269                 everything_else
 270         )*;
 271 }%%
 272
 273 void Scanner::token( int type, char *start, char *end )
 274 {
 275         char *tokdata = 0;
 276         int toklen = 0;
 277         int *p = &type;
 278         int *pe = &type + 1;
 279
 280         if ( start != 0 ) {
 281                 toklen = end-start;
 282                 tokdata = new char[toklen+1];
 283                 memcpy( tokdata, start, toklen );
 284                 tokdata[toklen] = 0;
 285         }
 286
 287         %%{
 288                 machine section_parse;
 289                 write exec;
 290         }%%
 291
 292         updateCol();
 293
 294         /* Record the last token for use in controlling the scan of subsequent
 295          * tokens. */
 296         lastToken = type;
 297 }
 298
 299 void Scanner::startSection( )
 300 {
 301         parserExistsError = false;
 302
 303         if ( includeDepth == 0 ) {
 304                 if ( machineSpec == 0 && machineName == 0 )
 305                         output << "</host>\n";
 306         }
 307
 308         sectionLoc.fileName = fileName;
 309         sectionLoc.line = line;
 310         sectionLoc.col = 0;
 311 }
 312
 313 void Scanner::endSection( )
 314 {
 315         /* Execute the eof actions for the section parser. */
 316         %%{
 317                 machine section_parse;
 318                 write eof;
 319         }%%
 320
 321         /* Close off the section with the parser. */
 322         if ( active() ) {
 323                 InputLoc loc;
 324                 loc.fileName = fileName;
 325                 loc.line = line;
 326                 loc.col = 0;
 327
 328                 parser->token( loc, TK_EndSection, 0, 0 );
 329         }
 330
 331         if ( includeDepth == 0 ) {
 332                 if ( machineSpec == 0 && machineName == 0 ) {
 333                         /* The end section may include a newline on the end, so
 334                          * we use the last line, which will count the newline. */
 335                         output << "<host line=\"" << line << "\">";
 336                 }
 337         }
 338 }
 339
 340 %%{
 341         machine rlscan;
 342
 343         # This is sent by the driver code.
 344         EOF = 0;
 345
 346         action inc_nl {
 347                 lastnl = p;
 348                 column = 0;
 349                 line++;
 350         }
 351         NL = '\n' @inc_nl;
 352
 353         # Identifiers, numbers, commetns, and other common things.
 354         ident = ( alpha | '_' ) ( alpha |digit |'_' )*;
 355         number = digit+;
 356         hex_number = '0x' [0-9a-fA-F]+;
 357
 358         c_comment =
 359                 '/*' ( any | NL )* :>> '*/';
 360
 361         cpp_comment =
 362                 '//' [^\n]* NL;
 363
 364         c_cpp_comment = c_comment | cpp_comment;
 365
 366         # These literal forms are common to C-like host code and ragel.
 367         s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'";
 368         d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"';
 369
 370         whitespace = [ \t] | NL;
 371         pound_comment = '#' [^\n]* NL;
 372
 373         # An inline block of code. This is specified as a scanned, but is sent to
 374         # the parser as one long block. The inline_block pointer is used to handle
 375         # the preservation of the data.
 376         inline_code := |*
 377                 # Inline expression keywords.
 378                 "fpc" => { token( KW_PChar ); };
 379                 "fc" => { token( KW_Char ); };
 380                 "fcurs" => { token( KW_CurState ); };
 381                 "ftargs" => { token( KW_TargState ); };
 382                 "fentry" => {
 383                         whitespaceOn = false;
 384                         token( KW_Entry );
 385                 };
 386
 387                 # Inline statement keywords.
 388                 "fhold" => {
 389                         whitespaceOn = false;
 390                         token( KW_Hold );
 391                 };
 392                 "fexec" => { token( KW_Exec, 0, 0 ); };
 393                 "fgoto" => {
 394                         whitespaceOn = false;
 395                         token( KW_Goto );
 396                 };
 397                 "fnext" => {
 398                         whitespaceOn = false;
 399                         token( KW_Next );
 400                 };
 401                 "fcall" => {
 402                         whitespaceOn = false;
 403                         token( KW_Call );
 404                 };
 405                 "fret" => {
 406                         whitespaceOn = false;
 407                         token( KW_Ret );
 408                 };
 409                 "fbreak" => {
 410                         whitespaceOn = false;
 411                         token( KW_Break );
 412                 };
 413
 414                 ident => { token( TK_Word, tokstart, tokend ); };
 415
 416                 number => { token( TK_UInt, tokstart, tokend ); };
 417                 hex_number => { token( TK_Hex, tokstart, tokend ); };
 418
 419                 ( s_literal | d_literal )
 420                         => { token( IL_Literal, tokstart, tokend ); };
 421
 422                 whitespace+ => {
 423                         if ( whitespaceOn )
 424                                 token( IL_WhiteSpace, tokstart, tokend );
 425                 };
 426                 c_cpp_comment => { token( IL_Comment, tokstart, tokend ); };
 427
 428                 "::" => { token( TK_NameSep, tokstart, tokend ); };
 429
 430                 # Some symbols need to go to the parser as with their cardinal value as
 431                 # the token type (as opposed to being sent as anonymous symbols)
 432                 # because they are part of the sequences which we interpret. The * ) ;
 433                 # symbols cause whitespace parsing to come back on. This gets turned
 434                 # off by some keywords.
 435
 436                 ";" => {
 437                         whitespaceOn = true;
 438                         token( *tokstart, tokstart, tokend );
 439                         if ( inlineBlockType == SemiTerminated )
 440                                 fgoto parser_def;
 441                 };
 442
 443                 [*)] => {
 444                         whitespaceOn = true;
 445                         token( *tokstart, tokstart, tokend );
 446                 };
 447
 448                 [,(] => { token( *tokstart, tokstart, tokend ); };
 449
 450                 '{' => {
 451                         token( IL_Symbol, tokstart, tokend );
 452                         curly_count += 1;
 453                 };
 454
 455                 '}' => {
 456                         if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) {
 457                                 /* Inline code block ends. */
 458                                 token( '}' );
 459                                 fgoto parser_def;
 460                         }
 461                         else {
 462                                 /* Either a semi terminated inline block or only the closing
 463                                  * brace of some inner scope, not the block's closing brace. */
 464                                 token( IL_Symbol, tokstart, tokend );
 465                         }
 466                 };
 467
 468                 EOF => {
 469                         scan_error() << "unterminated code block" << endl;
 470                 };
 471
 472                 # Send every other character as a symbol.
 473                 any => { token( IL_Symbol, tokstart, tokend ); };
 474         *|;
 475
 476         or_literal := |*
 477                 # Escape sequences in OR expressions.
 478                 '\\0' => { token( RE_Char, '\0' ); };
 479                 '\\a' => { token( RE_Char, '\a' ); };
 480                 '\\b' => { token( RE_Char, '\b' ); };
 481                 '\\t' => { token( RE_Char, '\t' ); };
 482                 '\\n' => { token( RE_Char, '\n' ); };
 483                 '\\v' => { token( RE_Char, '\v' ); };
 484                 '\\f' => { token( RE_Char, '\f' ); };
 485                 '\\r' => { token( RE_Char, '\r' ); };
 486                 '\\\n' => { updateCol(); };
 487                 '\\' any => { token( RE_Char, tokstart+1, tokend ); };
 488
 489                 # Range dash in an OR expression.
 490                 '-' => { token( RE_Dash, 0, 0 ); };
 491
 492                 # Terminate an OR expression.
 493                 ']'     => { token( RE_SqClose ); fret; };
 494
 495                 EOF => {
 496                         scan_error() << "unterminated OR literal" << endl;
 497                 };
 498
 499                 # Characters in an OR expression.
 500                 [^\]] => { token( RE_Char, tokstart, tokend ); };
 501
 502         *|;
 503
 504         re_literal := |*
 505                 # Escape sequences in regular expressions.
 506                 '\\0' => { token( RE_Char, '\0' ); };
 507                 '\\a' => { token( RE_Char, '\a' ); };
 508                 '\\b' => { token( RE_Char, '\b' ); };
 509                 '\\t' => { token( RE_Char, '\t' ); };
 510                 '\\n' => { token( RE_Char, '\n' ); };
 511                 '\\v' => { token( RE_Char, '\v' ); };
 512                 '\\f' => { token( RE_Char, '\f' ); };
 513                 '\\r' => { token( RE_Char, '\r' ); };
 514                 '\\\n' => { updateCol(); };
 515                 '\\' any => { token( RE_Char, tokstart+1, tokend ); };
 516
 517                 # Terminate an OR expression.
 518                 '/' [i]? => {
 519                         token( RE_Slash, tokstart, tokend );
 520                         fgoto parser_def;
 521                 };
 522
 523                 # Special characters.
 524                 '.' => { token( RE_Dot ); };
 525                 '*' => { token( RE_Star ); };
 526
 527                 '[' => { token( RE_SqOpen ); fcall or_literal; };
 528                 '[^' => { token( RE_SqOpenNeg ); fcall or_literal; };
 529
 530                 EOF => {
 531                         scan_error() << "unterminated regular expression" << endl;
 532                 };
 533
 534                 # Characters in an OR expression.
 535                 [^\/] => { token( RE_Char, tokstart, tokend ); };
 536         *|;
 537
 538         # We need a separate token space here to avoid the ragel keywords.
 539         write_statement := |*
 540                 ident => { token( TK_Word, tokstart, tokend ); } ;
 541                 [ \t\n]+ => { updateCol(); };
 542                 ';' => { token( ';' ); fgoto parser_def; };
 543
 544                 EOF => {
 545                         scan_error() << "unterminated write statement" << endl;
 546                 };
 547         *|;
 548
 549         # Parser definitions.
 550         parser_def := |*
 551                 'machine' => { token( KW_Machine ); };
 552                 'include' => { token( KW_Include ); };
 553                 'write' => {
 554                         token( KW_Write );
 555                         fgoto write_statement;
 556                 };
 557                 'action' => { token( KW_Action ); };
 558                 'alphtype' => { token( KW_AlphType ); };
 559
 560                 # FIXME: Enable this post 5.17.
 561                 # 'range' => { token( KW_Range ); };
 562
 563                 'getkey' => {
 564                         token( KW_GetKey );
 565                         inlineBlockType = SemiTerminated;
 566                         fgoto inline_code;
 567                 };
 568                 'access' => {
 569                         token( KW_Access );
 570                         inlineBlockType = SemiTerminated;
 571                         fgoto inline_code;
 572                 };
 573                 'variable' => {
 574                         token( KW_Variable );
 575                         inlineBlockType = SemiTerminated;
 576                         fgoto inline_code;
 577                 };
 578                 'when' => { token( KW_When ); };
 579                 'eof' => { token( KW_Eof ); };
 580                 'err' => { token( KW_Err ); };
 581                 'lerr' => { token( KW_Lerr ); };
 582                 'to' => { token( KW_To ); };
 583                 'from' => { token( KW_From ); };
 584                 'export' => { token( KW_Export ); };
 585
 586                 # Identifiers.
 587                 ident => { token( TK_Word, tokstart, tokend ); } ;
 588
 589                 # Numbers
 590                 number => { token( TK_UInt, tokstart, tokend ); };
 591                 hex_number => { token( TK_Hex, tokstart, tokend ); };
 592
 593                 # Literals, with optionals.
 594                 ( s_literal | d_literal ) [i]?
 595                         => { token( TK_Literal, tokstart, tokend ); };
 596
 597                 '[' => { token( RE_SqOpen ); fcall or_literal; };
 598                 '[^' => { token( RE_SqOpenNeg ); fcall or_literal; };
 599
 600                 '/' => { token( RE_Slash ); fgoto re_literal; };
 601
 602                 # Ignore.
 603                 pound_comment => { updateCol(); };
 604
 605                 ':=' => { token( TK_ColonEquals ); };
 606
 607                 # To State Actions.
 608                 ">~" => { token( TK_StartToState ); };
 609                 "$~" => { token( TK_AllToState ); };
 610                 "%~" => { token( TK_FinalToState ); };
 611                 "<~" => { token( TK_NotStartToState ); };
 612                 "@~" => { token( TK_NotFinalToState ); };
 613                 "<>~" => { token( TK_MiddleToState ); };
 614
 615                 # From State actions
 616                 ">*" => { token( TK_StartFromState ); };
 617                 "$*" => { token( TK_AllFromState ); };
 618                 "%*" => { token( TK_FinalFromState ); };
 619                 "<*" => { token( TK_NotStartFromState ); };
 620                 "@*" => { token( TK_NotFinalFromState ); };
 621                 "<>*" => { token( TK_MiddleFromState ); };
 622
 623                 # EOF Actions.
 624                 ">/" => { token( TK_StartEOF ); };
 625                 "$/" => { token( TK_AllEOF ); };
 626                 "%/" => { token( TK_FinalEOF ); };
 627                 "</" => { token( TK_NotStartEOF ); };
 628                 "@/" => { token( TK_NotFinalEOF ); };
 629                 "<>/" => { token( TK_MiddleEOF ); };
 630
 631                 # Global Error actions.
 632                 ">!" => { token( TK_StartGblError ); };
 633                 "$!" => { token( TK_AllGblError ); };
 634                 "%!" => { token( TK_FinalGblError ); };
 635                 "<!" => { token( TK_NotStartGblError ); };
 636                 "@!" => { token( TK_NotFinalGblError ); };
 637                 "<>!" => { token( TK_MiddleGblError ); };
 638
 639                 # Local error actions.
 640                 ">^" => { token( TK_StartLocalError ); };
 641                 "$^" => { token( TK_AllLocalError ); };
 642                 "%^" => { token( TK_FinalLocalError ); };
 643                 "<^" => { token( TK_NotStartLocalError ); };
 644                 "@^" => { token( TK_NotFinalLocalError ); };
 645                 "<>^" => { token( TK_MiddleLocalError ); };
 646
 647                 # Middle.
 648                 "<>" => { token( TK_Middle ); };
 649
 650                 # Conditions.
 651                 '>?' => { token( TK_StartCond ); };
 652                 '$?' => { token( TK_AllCond ); };
 653                 '%?' => { token( TK_LeavingCond ); };
 654
 655                 '..' => { token( TK_DotDot ); };
 656                 '**' => { token( TK_StarStar ); };
 657                 '--' => { token( TK_DashDash ); };
 658                 '->' => { token( TK_Arrow ); };
 659                 '=>' => { token( TK_DoubleArrow ); };
 660
 661                 ":>"  => { token( TK_ColonGt ); };
 662                 ":>>" => { token( TK_ColonGtGt ); };
 663                 "<:"  => { token( TK_LtColon ); };
 664
 665                 # Opening of longest match.
 666                 "|*" => { token( TK_BarStar ); };
 667
 668                 # Separater for name references.
 669                 "::" => { token( TK_NameSep, tokstart, tokend ); };
 670
 671                 '}%%' => {
 672                         updateCol();
 673                         endSection();
 674                         fgoto main;
 675                 };
 676
 677                 [ \t\r]+ => { updateCol(); };
 678
 679                 # If we are in a single line machine then newline may end the spec.
 680                 NL => {
 681                         updateCol();
 682                         if ( singleLineSpec ) {
 683                                 endSection();
 684                                 fgoto main;
 685                         }
 686                 };
 687
 688                 '{' => {
 689                         if ( lastToken == KW_Export || lastToken == KW_Entry )
 690                                 token( '{' );
 691                         else {
 692                                 token( '{' );
 693                                 curly_count = 1;
 694                                 inlineBlockType = CurlyDelimited;
 695                                 fgoto inline_code;
 696                         }
 697                 };
 698
 699                 EOF => {
 700                         scan_error() << "unterminated ragel section" << endl;
 701                 };
 702
 703                 any => { token( *tokstart ); } ;
 704         *|;
 705
 706         action pass {
 707                 updateCol();
 708
 709                 /* If no errors and we are at the bottom of the include stack (the
 710                  * source file listed on the command line) then write out the data. */
 711                 if ( includeDepth == 0 && machineSpec == 0 && machineName == 0 )
 712                         xmlEscapeHost( output, tokstart, tokend-tokstart );
 713         }
 714
 715         # Outside code scanner. These tokens get passed through.
 716         main := |*
 717                 ident => pass;
 718                 number => pass;
 719                 c_cpp_comment => pass;
 720                 s_literal | d_literal => pass;
 721                 '%%{' => {
 722                         updateCol();
 723                         singleLineSpec = false;
 724                         startSection();
 725                         fgoto parser_def;
 726                 };
 727                 '%%' => {
 728                         updateCol();
 729                         singleLineSpec = true;
 730                         startSection();
 731                         fgoto parser_def;
 732                 };
 733                 whitespace+ => pass;
 734                 EOF;
 735                 any => pass;
 736         *|;
 737
 738 }%%
 739
 740 %% write data;
 741
 742 void Scanner::do_scan()
 743 {
 744         int bufsize = 8;
 745         char *buf = new char[bufsize];
 746         const char last_char = 0;
 747         int cs, act, have = 0;
 748         int top, stack[1];
 749         int curly_count = 0;
 750         bool execute = true;
 751         bool singleLineSpec = false;
 752         InlineBlockType inlineBlockType = CurlyDelimited;
 753
 754         /* Init the section parser and the character scanner. */
 755         init();
 756         %% write init;
 757
 758         while ( execute ) {
 759                 char *p = buf + have;
 760                 int space = bufsize - have;
 761
 762                 if ( space == 0 ) {
 763                         /* We filled up the buffer trying to scan a token. Grow it. */
 764                         bufsize = bufsize * 2;
 765                         char *newbuf = new char[bufsize];
 766
 767                         /* Recompute p and space. */
 768                         p = newbuf + have;
 769                         space = bufsize - have;
 770
 771                         /* Patch up pointers possibly in use. */
 772                         if ( tokstart != 0 )
 773                                 tokstart = newbuf + ( tokstart - buf );
 774                         tokend = newbuf + ( tokend - buf );
 775
 776                         /* Copy the new buffer in. */
 777                         memcpy( newbuf, buf, have );
 778                         delete[] buf;
 779                         buf = newbuf;
 780                 }
 781
 782                 input.read( p, space );
 783                 int len = input.gcount();
 784
 785                 /* If we see eof then append the EOF char. */
 786                 if ( len == 0 ) {
 787                         p[0] = last_char, len = 1;
 788                         execute = false;
 789                 }
 790
 791                 char *pe = p + len;
 792                 %% write exec;
 793
 794                 /* Check if we failed. */
 795                 if ( cs == rlscan_error ) {
 796                         /* Machine failed before finding a token. I'm not yet sure if this
 797                          * is reachable. */
 798                         scan_error() << "scanner error" << endl;
 799                         exit(1);
 800                 }
 801
 802                 /* Decide if we need to preserve anything. */
 803                 char *preserve = tokstart;
 804
 805                 /* Now set up the prefix. */
 806                 if ( preserve == 0 )
 807                         have = 0;
 808                 else {
 809                         /* There is data that needs to be shifted over. */
 810                         have = pe - preserve;
 811                         memmove( buf, preserve, have );
 812                         unsigned int shiftback = preserve - buf;
 813                         if ( tokstart != 0 )
 814                                 tokstart -= shiftback;
 815                         tokend -= shiftback;
 816
 817                         preserve = buf;
 818                 }
 819         }
 820
 821         delete[] buf;
 822 }
 823
 824 void scan( char *fileName, istream &input, ostream &output )
 825 {
 826 }