Line directives need to use the fileName stored in the InputLoc stuctures from
[external/ragel.git] / ragel / rlscan.rl
index 70e97fd..3c325c3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- *  Copyright 2006-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ *  Copyright 2006-2007 Adrian Thurston <thurston@complang.org>
  */
 
 /*  This file is part of Ragel.
@@ -25,6 +25,7 @@
 
 #include "ragel.h"
 #include "rlscan.h"
+#include "inputdata.h"
 
 //#define LOG_TOKENS
 
@@ -41,30 +42,29 @@ enum InlineBlockType
        SemiTerminated
 };
 
+#ifdef _WIN32
+#define PATH_SEP '\\'
+#else
+#define PATH_SEP '/'
+#endif
+
 
 /*
  * The Scanner for Importing
  */
 
-#define IMP_Word 128
-#define IMP_Literal 129
-#define IMP_UInt 130
-#define IMP_Define 131
-
 %%{
        machine inline_token_scan;
        alphtype int;
        access tok_;
 
-       IMP_Word = 128;
-       IMP_Literal = 129;
-       IMP_UInt = 130;
-       IMP_Define = 131;
+       # Import scanner tokens.
+       import "rlparse.h"; 
 
        main := |*
                # Define of number.
                IMP_Define IMP_Word IMP_UInt => { 
-                       int base = tok_tokstart - token_data;
+                       int base = tok_ts - token_data;
                        int nameOff = 1;
                        int numOff = 2;
 
@@ -78,7 +78,7 @@ enum InlineBlockType
 
                # Assignment of number.
                IMP_Word '=' IMP_UInt => { 
-                       int base = tok_tokstart - token_data;
+                       int base = tok_ts - token_data;
                        int nameOff = 0;
                        int numOff = 2;
 
@@ -92,7 +92,7 @@ enum InlineBlockType
 
                # Define of literal.
                IMP_Define IMP_Word IMP_Literal => { 
-                       int base = tok_tokstart - token_data;
+                       int base = tok_ts - token_data;
                        int nameOff = 1;
                        int litOff = 2;
 
@@ -106,7 +106,7 @@ enum InlineBlockType
 
                # Assignment of literal.
                IMP_Word '=' IMP_Literal => { 
-                       int base = tok_tokstart - token_data;
+                       int base = tok_ts - token_data;
                        int nameOff = 0;
                        int litOff = 2;
 
@@ -129,22 +129,26 @@ void Scanner::flushImport()
 {
        int *p = token_data;
        int *pe = token_data + cur_token;
+       int *eof = 0;
 
-       %% write init;
-       %% write exec;
+       %%{
+               machine inline_token_scan;
+               write init;
+               write exec;
+       }%%
 
-       if ( tok_tokstart == 0 )
+       if ( tok_ts == 0 )
                cur_token = 0;
        else {
-               cur_token = pe - tok_tokstart;
-               int ts_offset = tok_tokstart - token_data;
+               cur_token = pe - tok_ts;
+               int ts_offset = tok_ts - token_data;
                memmove( token_data, token_data+ts_offset, cur_token*sizeof(token_data[0]) );
                memmove( token_strings, token_strings+ts_offset, cur_token*sizeof(token_strings[0]) );
                memmove( token_lens, token_lens+ts_offset, cur_token*sizeof(token_lens[0]) );
        }
 }
 
-void Scanner::directToParser( Parser *toParser, char *tokFileName, int tokLine, 
+void Scanner::directToParser( Parser *toParser, const char *tokFileName, int tokLine, 
                int tokColumn, int type, char *tokdata, int toklen )
 {
        InputLoc loc;
@@ -199,7 +203,7 @@ void Scanner::pass()
        /* If no errors and we are at the bottom of the include stack (the
         * source file listed on the command line) then write out the data. */
        if ( includeDepth == 0 && machineSpec == 0 && machineName == 0 )
-               xmlEscapeHost( output, tokstart, tokend-tokstart );
+               id.inputItems.tail->data.write( ts, te-ts );
 }
 
 /*
@@ -224,7 +228,8 @@ bool Scanner::active()
                return false;
 
        if ( parser == 0 && ! parserExistsError ) {
-               scan_error() << "there is no previous specification name" << endl;
+               scan_error() << "this specification has no name, nor does any previous"
+                       " specification" << endl;
                parserExistsError = true;
        }
 
@@ -238,15 +243,17 @@ ostream &Scanner::scan_error()
 {
        /* Maintain the error count. */
        gblErrorCount += 1;
-       cerr << fileName << ":" << line << ":" << column << ": ";
+       cerr << makeInputLoc( fileName, line, column ) << ": ";
        return cerr;
 }
 
-bool Scanner::recursiveInclude( char *inclFileName, char *inclSectionName )
+/* An approximate check for duplicate includes. Due to aliasing of files it's
+ * possible for duplicates to creep in. */
+bool Scanner::duplicateInclude( char *inclFileName, char *inclSectionName )
 {
-       for ( IncludeStack::Iter si = includeStack; si.lte(); si++ ) {
-               if ( strcmp( si->fileName, inclFileName ) == 0 &&
-                               strcmp( si->sectionName, inclSectionName ) == 0 )
+       for ( IncludeHistory::Iter hi = parser->includeHistory; hi.lte(); hi++ ) {
+               if ( strcmp( hi->fileName, inclFileName ) == 0 &&
+                               strcmp( hi->sectionName, inclSectionName ) == 0 )
                {
                        return true;
                }
@@ -258,106 +265,135 @@ void Scanner::updateCol()
 {
        char *from = lastnl;
        if ( from == 0 )
-               from = tokstart;
-       //cerr << "adding " << tokend - from << " to column" << endl;
-       column += tokend - from;
+               from = ts;
+       //cerr << "adding " << te - from << " to column" << endl;
+       column += te - from;
        lastnl = 0;
 }
 
-%%{
-       machine section_parse;
+void Scanner::handleMachine()
+{
+       /* Assign a name to the machine. */
+       char *machine = word;
+
+       if ( !importMachines && inclSectionTarg == 0 ) {
+               ignoreSection = false;
+
+               ParserDictEl *pdEl = id.parserDict.find( machine );
+               if ( pdEl == 0 ) {
+                       pdEl = new ParserDictEl( machine );
+                       pdEl->value = new Parser( fileName, machine, sectionLoc );
+                       pdEl->value->init();
+                       id.parserDict.insert( pdEl );
+                       id.parserList.append( pdEl->value );
+               }
+
+               parser = pdEl->value;
+       }
+       else if ( !importMachines && strcmp( inclSectionTarg, machine ) == 0 ) {
+               /* found include target */
+               ignoreSection = false;
+               parser = inclToParser;
+       }
+       else {
+               /* ignoring section */
+               ignoreSection = true;
+               parser = 0;
+       }
+}
 
-       # This relies on the the kelbt implementation and the order
-       # that tokens are declared.
-       KW_Machine = 128;
-       KW_Include = 129;
-       KW_Import = 130;
-       KW_Write = 131;
-       TK_Word = 132;
-       TK_Literal = 133;
+void Scanner::handleInclude()
+{
+       if ( active() ) {
+               char *inclSectionName = word;
+               char **includeChecks = 0;
 
-       action clear_words { word = lit = 0; word_len = lit_len = 0; }
-       action store_word { word = tokdata; word_len = toklen; }
-       action store_lit { lit = tokdata; lit_len = toklen; }
+               /* Implement defaults for the input file and section name. */
+               if ( inclSectionName == 0 )
+                       inclSectionName = parser->sectionName;
 
-       action mach_err { scan_error() << "bad machine statement" << endl; }
-       action incl_err { scan_error() << "bad include statement" << endl; }
-       action import_err { scan_error() << "bad import statement" << endl; }
-       action write_err { scan_error() << "bad write statement" << endl; }
+               if ( lit != 0 )
+                       includeChecks = makeIncludePathChecks( fileName, lit, lit_len );
+               else {
+                       char *test = new char[strlen(fileName)+1];
+                       strcpy( test, fileName );
 
-       action handle_machine
-       {
-               /* Assign a name to the machine. */
-               char *machine = word;
-
-               if ( !importMachines && inclSectionTarg == 0 ) {
-                       ignoreSection = false;
-
-                       ParserDictEl *pdEl = parserDict.find( machine );
-                       if ( pdEl == 0 ) {
-                               pdEl = new ParserDictEl( machine );
-                               pdEl->value = new Parser( fileName, machine, sectionLoc );
-                               pdEl->value->init();
-                               parserDict.insert( pdEl );
-                       }
+                       includeChecks = new char*[2];
 
-                       parser = pdEl->value;
+                       includeChecks[0] = test;
+                       includeChecks[1] = 0;
                }
-               else if ( !importMachines && strcmp( inclSectionTarg, machine ) == 0 ) {
-                       /* found include target */
-                       ignoreSection = false;
-                       parser = inclToParser;
+
+               long found = 0;
+               ifstream *inFile = tryOpenInclude( includeChecks, found );
+               if ( inFile == 0 ) {
+                       scan_error() << "include: failed to locate file" << endl;
+                       char **tried = includeChecks;
+                       while ( *tried != 0 )
+                               scan_error() << "include: attempted: \"" << *tried++ << '\"' << endl;
                }
                else {
-                       /* ignoring section */
-                       ignoreSection = true;
-                       parser = 0;
+                       /* Don't include anything that's already been included. */
+                       if ( !duplicateInclude( includeChecks[found], inclSectionName ) ) {
+                               parser->includeHistory.append( IncludeHistoryItem( 
+                                               includeChecks[found], inclSectionName ) );
+
+                               Scanner scanner( id, includeChecks[found], *inFile, parser,
+                                               inclSectionName, includeDepth+1, false );
+                               scanner.do_scan( );
+                               delete inFile;
+                       }
                }
        }
+}
 
-       machine_stmt =
-               ( KW_Machine TK_Word @store_word ';' ) @handle_machine
-               <>err mach_err <>eof mach_err;
+void Scanner::handleImport()
+{
+       if ( active() ) {
+               char **importChecks = makeIncludePathChecks( fileName, lit, lit_len );
+
+               /* Open the input file for reading. */
+               long found = 0;
+               ifstream *inFile = tryOpenInclude( importChecks, found );
+               if ( inFile == 0 ) {
+                       scan_error() << "import: could not open import file " <<
+                                       "for reading" << endl;
+                       char **tried = importChecks;
+                       while ( *tried != 0 )
+                               scan_error() << "import: attempted: \"" << *tried++ << '\"' << endl;
+               }
 
-       action handle_include
-       {
-               if ( active() ) {
-                       char *inclSectionName = word;
-                       char *inclFileName = 0;
+               Scanner scanner( id, importChecks[found], *inFile, parser,
+                               0, includeDepth+1, true );
+               scanner.do_scan( );
+               scanner.importToken( 0, 0, 0 );
+               scanner.flushImport();
+               delete inFile;
+       }
+}
 
-                       /* Implement defaults for the input file and section name. */
-                       if ( inclSectionName == 0 )
-                               inclSectionName = parser->sectionName;
+%%{
+       machine section_parse;
 
-                       if ( lit != 0 ) 
-                               inclFileName = prepareFileName( lit, lit_len );
-                       else
-                               inclFileName = fileName;
+       # Need the defines representing tokens.
+       import "rlparse.h"; 
 
-                       /* Check for a recursive include structure. Add the current file/section
-                        * name then check if what we are including is already in the stack. */
-                       includeStack.append( IncludeStackItem( fileName, parser->sectionName ) );
+       action clear_words { word = lit = 0; word_len = lit_len = 0; }
+       action store_word { word = tokdata; word_len = toklen; }
+       action store_lit { lit = tokdata; lit_len = toklen; }
 
-                       if ( recursiveInclude( inclFileName, inclSectionName ) )
-                               scan_error() << "include: this is a recursive include operation" << endl;
-                       else {
-                               /* Open the input file for reading. */
-                               ifstream *inFile = new ifstream( inclFileName );
-                               if ( ! inFile->is_open() ) {
-                                       scan_error() << "include: could not open " << 
-                                                       inclFileName << " for reading" << endl;
-                               }
-
-                               Scanner scanner( inclFileName, *inFile, output, parser,
-                                               inclSectionName, includeDepth+1, false );
-                               scanner.do_scan( );
-                               delete inFile;
-                       }
+       action mach_err { scan_error() << "bad machine statement" << endl; }
+       action incl_err { scan_error() << "bad include statement" << endl; }
+       action import_err { scan_error() << "bad import statement" << endl; }
+       action write_err { scan_error() << "bad write statement" << endl; }
 
-                       /* Remove the last element (len-1) */
-                       includeStack.remove( -1 );
-               }
-       }
+       action handle_machine { handleMachine(); }
+       action handle_include { handleInclude(); }
+       action handle_import { handleImport(); }
+
+       machine_stmt =
+               ( KW_Machine TK_Word @store_word ';' ) @handle_machine
+               <>err mach_err <>eof mach_err;
 
        include_names = (
                TK_Word @store_word ( TK_Literal @store_lit )? |
@@ -368,27 +404,6 @@ void Scanner::updateCol()
                ( KW_Include include_names ';' ) @handle_include
                <>err incl_err <>eof incl_err;
 
-       action handle_import
-       {
-               if ( active() ) {
-                       char *importFileName = prepareFileName( lit, lit_len );
-
-                       /* Open the input file for reading. */
-                       ifstream *inFile = new ifstream( importFileName );
-                       if ( ! inFile->is_open() ) {
-                               scan_error() << "import: could not open " << 
-                                               importFileName << " for reading" << endl;
-                       }
-
-                       Scanner scanner( importFileName, *inFile, output, parser,
-                                       0, includeDepth+1, true );
-                       scanner.do_scan( );
-                       scanner.importToken( 0, 0, 0 );
-                       scanner.flushImport();
-                       delete inFile;
-               }
-       }
-
        import_stmt =
                ( KW_Import TK_Literal @store_lit ';' ) @handle_import
                <>err import_err <>eof import_err;
@@ -396,24 +411,26 @@ void Scanner::updateCol()
        action write_command
        {
                if ( active() && machineSpec == 0 && machineName == 0 ) {
-                       output << "<write"
-                                       " def_name=\"" << parser->sectionName << "\""
-                                       " line=\"" << line << "\""
-                                       " col=\"" << column << "\""
-                                       ">";
+                       InputItem *inputItem = new InputItem;
+                       inputItem->type = InputItem::Write;
+                       inputItem->loc.line = line;
+                       inputItem->loc.col = column;
+                       inputItem->name = parser->sectionName;
+                       inputItem->pd = parser->pd;
+                       id.inputItems.append( inputItem );
                }
        }
 
        action write_arg
        {
                if ( active() && machineSpec == 0 && machineName == 0 )
-                       output << "<arg>" << tokdata << "</arg>";
+                       id.inputItems.tail->writeArgs.append( strdup(tokdata) );
        }
 
        action write_close
        {
                if ( active() && machineSpec == 0 && machineName == 0 )
-                       output << "</write>\n";
+                       id.inputItems.tail->writeArgs.append( 0 );
        }
 
        write_stmt =
@@ -467,8 +484,15 @@ void Scanner::token( int type, char *start, char *end )
 
 void Scanner::processToken( int type, char *tokdata, int toklen )
 {
-       int *p = &type;
-       int *pe = &type + 1;
+       int *p, *pe, *eof;
+
+       if ( type < 0 )
+               p = pe = eof = 0;
+       else {
+               p = &type;
+               pe = &type + 1;
+               eof = 0;
+       }
 
        %%{
                machine section_parse;
@@ -486,30 +510,22 @@ void Scanner::startSection( )
 {
        parserExistsError = false;
 
-       if ( includeDepth == 0 ) {
-               if ( machineSpec == 0 && machineName == 0 )
-                       output << "</host>\n";
-       }
-
        sectionLoc.fileName = fileName;
        sectionLoc.line = line;
-       sectionLoc.col = 0;
+       sectionLoc.col = column;
 }
 
 void Scanner::endSection( )
 {
        /* Execute the eof actions for the section parser. */
-       %%{
-               machine section_parse;
-               write eof;
-       }%%
+       processToken( -1, 0, 0 );
 
        /* Close off the section with the parser. */
        if ( active() ) {
                InputLoc loc;
                loc.fileName = fileName;
                loc.line = line;
-               loc.col = 0;
+               loc.col = column;
 
                parser->token( loc, TK_EndSection, 0, 0 );
        }
@@ -518,9 +534,87 @@ void Scanner::endSection( )
                if ( machineSpec == 0 && machineName == 0 ) {
                        /* The end section may include a newline on the end, so
                         * we use the last line, which will count the newline. */
-                       output << "<host line=\"" << line << "\">";
+                       InputItem *inputItem = new InputItem;
+                       inputItem->type = InputItem::HostData;
+                       inputItem->loc.line = line;
+                       inputItem->loc.col = column;
+                       id.inputItems.append( inputItem );
+               }
+       }
+}
+
+bool isAbsolutePath( const char *path )
+{
+#ifdef _WIN32
+       return isalpha( path[0] ) && path[1] == ':' && path[2] == '\\';
+#else
+       return path[0] == '/';
+#endif
+}
+
+char **Scanner::makeIncludePathChecks( const char *thisFileName, 
+               const char *fileName, int fnlen )
+{
+       char **checks = new char*[2];
+       long nextCheck = 0;
+
+       bool caseInsensitive = false;
+       long length = 0;
+       char *data = prepareLitString( InputLoc(), fileName, fnlen, 
+                       length, caseInsensitive );
+
+       /* Absolute path? */
+       if ( isAbsolutePath( data ) )
+               checks[nextCheck++] = data;
+       else {
+               /* Search from the the location of the current file. */
+               const char *lastSlash = strrchr( thisFileName, PATH_SEP );
+               if ( lastSlash == 0 )
+                       checks[nextCheck++] = data;
+               else {
+                       long givenPathLen = (lastSlash - thisFileName) + 1;
+                       long checklen = givenPathLen + length;
+                       char *check = new char[checklen+1];
+                       memcpy( check, thisFileName, givenPathLen );
+                       memcpy( check+givenPathLen, data, length );
+                       check[checklen] = 0;
+                       checks[nextCheck++] = check;
+               }
+
+               /* Search from the include paths given on the command line. */
+               for ( ArgsVector::Iter incp = id.includePaths; incp.lte(); incp++ ) {
+                       long pathLen = strlen( *incp );
+                       long checkLen = pathLen + 1 + length;
+                       char *check = new char[checkLen+1];
+                       memcpy( check, *incp, pathLen );
+                       check[pathLen] = PATH_SEP;
+                       memcpy( check+pathLen+1, data, length );
+                       check[checkLen] = 0;
+                       checks[nextCheck++] = check;
                }
        }
+
+       checks[nextCheck] = 0;
+       return checks;
+}
+
+ifstream *Scanner::tryOpenInclude( char **pathChecks, long &found )
+{
+       char **check = pathChecks;
+       ifstream *inFile = new ifstream;
+       
+       while ( *check != 0 ) {
+               inFile->open( *check );
+               if ( inFile->is_open() ) {
+                       found = check - pathChecks;
+                       return inFile;
+               }
+               check += 1;
+       }
+
+       found = -1;
+       delete inFile;
+       return 0;
 }
 
 %%{
@@ -549,16 +643,120 @@ void Scanner::endSection( )
 
        c_cpp_comment = c_comment | cpp_comment;
 
-       # These literal forms are common to C-like host code and ragel.
+       ruby_comment = '#' [^\n]* NL;
+
+       # These literal forms are common to host code and ragel.
        s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'";
        d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"';
+       host_re_literal = '/' ([^/\\] | NL | '\\' (any | NL))* '/';
 
        whitespace = [ \t] | NL;
        pound_comment = '#' [^\n]* NL;
 
-       # An inline block of code. This is specified as a scanned, but is sent to
-       # the parser as one long block. The inline_block pointer is used to handle
-       # the preservation of the data.
+       # An inline block of code for Ruby.
+       inline_code_ruby := |*
+               # Inline expression keywords.
+               "fpc" => { token( KW_PChar ); };
+               "fc" => { token( KW_Char ); };
+               "fcurs" => { token( KW_CurState ); };
+               "ftargs" => { token( KW_TargState ); };
+               "fentry" => { 
+                       whitespaceOn = false; 
+                       token( KW_Entry );
+               };
+
+               # Inline statement keywords.
+               "fhold" => { 
+                       whitespaceOn = false; 
+                       token( KW_Hold );
+               };
+               "fexec" => { token( KW_Exec, 0, 0 ); };
+               "fgoto" => { 
+                       whitespaceOn = false; 
+                       token( KW_Goto );
+               };
+               "fnext" => { 
+                       whitespaceOn = false; 
+                       token( KW_Next );
+               };
+               "fcall" => { 
+                       whitespaceOn = false; 
+                       token( KW_Call );
+               };
+               "fret" => { 
+                       whitespaceOn = false; 
+                       token( KW_Ret );
+               };
+               "fbreak" => { 
+                       whitespaceOn = false; 
+                       token( KW_Break );
+               };
+
+               ident => { token( TK_Word, ts, te ); };
+
+               number => { token( TK_UInt, ts, te ); };
+               hex_number => { token( TK_Hex, ts, te ); };
+
+               ( s_literal | d_literal | host_re_literal ) 
+                       => { token( IL_Literal, ts, te ); };
+
+               whitespace+ => { 
+                       if ( whitespaceOn ) 
+                               token( IL_WhiteSpace, ts, te );
+               };
+
+               ruby_comment => { token( IL_Comment, ts, te ); };
+
+               "::" => { token( TK_NameSep, ts, te ); };
+
+               # Some symbols need to go to the parser as with their cardinal value as
+               # the token type (as opposed to being sent as anonymous symbols)
+               # because they are part of the sequences which we interpret. The * ) ;
+               # symbols cause whitespace parsing to come back on. This gets turned
+               # off by some keywords.
+
+               ";" => {
+                       whitespaceOn = true;
+                       token( *ts, ts, te );
+                       if ( inlineBlockType == SemiTerminated )
+                               fret;
+               };
+
+               [*)] => { 
+                       whitespaceOn = true;
+                       token( *ts, ts, te );
+               };
+
+               [,(] => { token( *ts, ts, te ); };
+
+               '{' => { 
+                       token( IL_Symbol, ts, te );
+                       curly_count += 1; 
+               };
+
+               '}' => { 
+                       if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) {
+                               /* Inline code block ends. */
+                               token( '}' );
+                               fret;
+                       }
+                       else {
+                               /* Either a semi terminated inline block or only the closing
+                                * brace of some inner scope, not the block's closing brace. */
+                               token( IL_Symbol, ts, te );
+                       }
+               };
+
+               EOF => {
+                       scan_error() << "unterminated code block" << endl;
+               };
+
+               # Send every other character as a symbol.
+               any => { token( IL_Symbol, ts, te ); };
+       *|;
+
+
+       # An inline block of code for languages other than Ruby.
        inline_code := |*
                # Inline expression keywords.
                "fpc" => { token( KW_PChar ); };
@@ -597,21 +795,22 @@ void Scanner::endSection( )
                        token( KW_Break );
                };
 
-               ident => { token( TK_Word, tokstart, tokend ); };
+               ident => { token( TK_Word, ts, te ); };
 
-               number => { token( TK_UInt, tokstart, tokend ); };
-               hex_number => { token( TK_Hex, tokstart, tokend ); };
+               number => { token( TK_UInt, ts, te ); };
+               hex_number => { token( TK_Hex, ts, te ); };
 
                ( s_literal | d_literal ) 
-                       => { token( IL_Literal, tokstart, tokend ); };
+                       => { token( IL_Literal, ts, te ); };
 
                whitespace+ => { 
                        if ( whitespaceOn ) 
-                               token( IL_WhiteSpace, tokstart, tokend );
+                               token( IL_WhiteSpace, ts, te );
                };
-               c_cpp_comment => { token( IL_Comment, tokstart, tokend ); };
 
-               "::" => { token( TK_NameSep, tokstart, tokend ); };
+               c_cpp_comment => { token( IL_Comment, ts, te ); };
+
+               "::" => { token( TK_NameSep, ts, te ); };
 
                # Some symbols need to go to the parser as with their cardinal value as
                # the token type (as opposed to being sent as anonymous symbols)
@@ -621,20 +820,20 @@ void Scanner::endSection( )
 
                ";" => {
                        whitespaceOn = true;
-                       token( *tokstart, tokstart, tokend );
+                       token( *ts, ts, te );
                        if ( inlineBlockType == SemiTerminated )
-                               fgoto parser_def;
+                               fret;
                };
 
                [*)] => { 
                        whitespaceOn = true;
-                       token( *tokstart, tokstart, tokend );
+                       token( *ts, ts, te );
                };
 
-               [,(] => { token( *tokstart, tokstart, tokend ); };
+               [,(] => { token( *ts, ts, te ); };
 
                '{' => { 
-                       token( IL_Symbol, tokstart, tokend );
+                       token( IL_Symbol, ts, te );
                        curly_count += 1; 
                };
 
@@ -642,12 +841,12 @@ void Scanner::endSection( )
                        if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) {
                                /* Inline code block ends. */
                                token( '}' );
-                               fgoto parser_def;
+                               fret;
                        }
                        else {
                                /* Either a semi terminated inline block or only the closing
                                 * brace of some inner scope, not the block's closing brace. */
-                               token( IL_Symbol, tokstart, tokend );
+                               token( IL_Symbol, ts, te );
                        }
                };
 
@@ -656,7 +855,7 @@ void Scanner::endSection( )
                };
 
                # Send every other character as a symbol.
-               any => { token( IL_Symbol, tokstart, tokend ); };
+               any => { token( IL_Symbol, ts, te ); };
        *|;
 
        or_literal := |*
@@ -670,7 +869,7 @@ void Scanner::endSection( )
                '\\f' => { token( RE_Char, '\f' ); };
                '\\r' => { token( RE_Char, '\r' ); };
                '\\\n' => { updateCol(); };
-               '\\' any => { token( RE_Char, tokstart+1, tokend ); };
+               '\\' any => { token( RE_Char, ts+1, te ); };
 
                # Range dash in an OR expression.
                '-' => { token( RE_Dash, 0, 0 ); };
@@ -683,11 +882,11 @@ void Scanner::endSection( )
                };
 
                # Characters in an OR expression.
-               [^\]] => { token( RE_Char, tokstart, tokend ); };
+               [^\]] => { token( RE_Char, ts, te ); };
 
        *|;
 
-       re_literal := |*
+       ragel_re_literal := |*
                # Escape sequences in regular expressions.
                '\\0' => { token( RE_Char, '\0' ); };
                '\\a' => { token( RE_Char, '\a' ); };
@@ -698,11 +897,11 @@ void Scanner::endSection( )
                '\\f' => { token( RE_Char, '\f' ); };
                '\\r' => { token( RE_Char, '\r' ); };
                '\\\n' => { updateCol(); };
-               '\\' any => { token( RE_Char, tokstart+1, tokend ); };
+               '\\' any => { token( RE_Char, ts+1, te ); };
 
                # Terminate an OR expression.
                '/' [i]? => { 
-                       token( RE_Slash, tokstart, tokend ); 
+                       token( RE_Slash, ts, te ); 
                        fgoto parser_def;
                };
 
@@ -718,12 +917,12 @@ void Scanner::endSection( )
                };
 
                # Characters in an OR expression.
-               [^\/] => { token( RE_Char, tokstart, tokend ); };
+               [^\/] => { token( RE_Char, ts, te ); };
        *|;
 
        # We need a separate token space here to avoid the ragel keywords.
        write_statement := |*
-               ident => { token( TK_Word, tokstart, tokend ); } ;
+               ident => { token( TK_Word, ts, te ); } ;
                [ \t\n]+ => { updateCol(); };
                ';' => { token( ';' ); fgoto parser_def; };
 
@@ -734,6 +933,7 @@ void Scanner::endSection( )
 
        # Parser definitions. 
        parser_def := |*
+               'length_cond' => { token( KW_Length ); };
                'machine' => { token( KW_Machine ); };
                'include' => { token( KW_Include ); };
                'import' => { token( KW_Import ); };
@@ -743,6 +943,8 @@ void Scanner::endSection( )
                };
                'action' => { token( KW_Action ); };
                'alphtype' => { token( KW_AlphType ); };
+               'prepush' => { token( KW_PrePush ); };
+               'postpop' => { token( KW_PostPop ); };
 
                # FIXME: Enable this post 5.17.
                # 'range' => { token( KW_Range ); };
@@ -750,19 +952,30 @@ void Scanner::endSection( )
                'getkey' => { 
                        token( KW_GetKey );
                        inlineBlockType = SemiTerminated;
-                       fgoto inline_code;
+                       if ( hostLang->lang == HostLang::Ruby )
+                               fcall inline_code_ruby;
+                       else
+                               fcall inline_code;
                };
                'access' => { 
                        token( KW_Access );
                        inlineBlockType = SemiTerminated;
-                       fgoto inline_code;
+                       if ( hostLang->lang == HostLang::Ruby )
+                               fcall inline_code_ruby;
+                       else
+                               fcall inline_code;
                };
                'variable' => { 
                        token( KW_Variable );
                        inlineBlockType = SemiTerminated;
-                       fgoto inline_code;
+                       if ( hostLang->lang == HostLang::Ruby )
+                               fcall inline_code_ruby;
+                       else
+                               fcall inline_code;
                };
                'when' => { token( KW_When ); };
+               'inwhen' => { token( KW_InWhen ); };
+               'outwhen' => { token( KW_OutWhen ); };
                'eof' => { token( KW_Eof ); };
                'err' => { token( KW_Err ); };
                'lerr' => { token( KW_Lerr ); };
@@ -771,20 +984,20 @@ void Scanner::endSection( )
                'export' => { token( KW_Export ); };
 
                # Identifiers.
-               ident => { token( TK_Word, tokstart, tokend ); } ;
+               ident => { token( TK_Word, ts, te ); } ;
 
                # Numbers
-               number => { token( TK_UInt, tokstart, tokend ); };
-               hex_number => { token( TK_Hex, tokstart, tokend ); };
+               number => { token( TK_UInt, ts, te ); };
+               hex_number => { token( TK_Hex, ts, te ); };
 
                # Literals, with optionals.
                ( s_literal | d_literal ) [i]? 
-                       => { token( TK_Literal, tokstart, tokend ); };
+                       => { token( TK_Literal, ts, te ); };
 
                '[' => { token( RE_SqOpen ); fcall or_literal; };
                '[^' => { token( RE_SqOpenNeg ); fcall or_literal; };
 
-               '/' => { token( RE_Slash ); fgoto re_literal; };
+               '/' => { token( RE_Slash ); fgoto ragel_re_literal; };
 
                # Ignore.
                pound_comment => { updateCol(); };
@@ -853,12 +1066,12 @@ void Scanner::endSection( )
                "|*" => { token( TK_BarStar ); };
 
                # Separater for name references.
-               "::" => { token( TK_NameSep, tokstart, tokend ); };
+               "::" => { token( TK_NameSep, ts, te ); };
 
                '}%%' => { 
                        updateCol();
                        endSection();
-                       fgoto main;
+                       fret;
                };
 
                [ \t\r]+ => { updateCol(); };
@@ -868,7 +1081,7 @@ void Scanner::endSection( )
                        updateCol();
                        if ( singleLineSpec ) {
                                endSection();
-                               fgoto main;
+                               fret;
                        }
                };
 
@@ -879,7 +1092,10 @@ void Scanner::endSection( )
                                token( '{' );
                                curly_count = 1; 
                                inlineBlockType = CurlyDelimited;
-                               fgoto inline_code;
+                               if ( hostLang->lang == HostLang::Ruby )
+                                       fcall inline_code_ruby;
+                               else
+                                       fcall inline_code;
                        }
                };
 
@@ -887,32 +1103,57 @@ void Scanner::endSection( )
                        scan_error() << "unterminated ragel section" << endl;
                };
 
-               any => { token( *tokstart ); } ;
+               any => { token( *ts ); } ;
+       *|;
+
+       # Outside code scanner. These tokens get passed through.
+       main_ruby := |*
+               ident => { pass( IMP_Word, ts, te ); };
+               number => { pass( IMP_UInt, ts, te ); };
+               ruby_comment => { pass(); };
+               ( s_literal | d_literal | host_re_literal ) 
+                       => { pass( IMP_Literal, ts, te ); };
+
+               '%%{' => { 
+                       updateCol();
+                       singleLineSpec = false;
+                       startSection();
+                       fcall parser_def;
+               };
+               '%%' => { 
+                       updateCol();
+                       singleLineSpec = true;
+                       startSection();
+                       fcall parser_def;
+               };
+               whitespace+ => { pass(); };
+               EOF;
+               any => { pass( *ts, 0, 0 ); };
        *|;
 
        # Outside code scanner. These tokens get passed through.
        main := |*
                'define' => { pass( IMP_Define, 0, 0 ); };
-               ident => { pass( IMP_Word, tokstart, tokend ); };
-               number => { pass( IMP_UInt, tokstart, tokend ); };
+               ident => { pass( IMP_Word, ts, te ); };
+               number => { pass( IMP_UInt, ts, te ); };
                c_cpp_comment => { pass(); };
-               ( s_literal | d_literal ) => { pass( IMP_Literal, tokstart, tokend ); };
+               ( s_literal | d_literal ) => { pass( IMP_Literal, ts, te ); };
 
                '%%{' => { 
                        updateCol();
                        singleLineSpec = false;
                        startSection();
-                       fgoto parser_def;
+                       fcall parser_def;
                };
                '%%' => { 
                        updateCol();
                        singleLineSpec = true;
                        startSection();
-                       fgoto parser_def;
+                       fcall parser_def;
                };
                whitespace+ => { pass(); };
                EOF;
-               any => { pass( *tokstart, 0, 0 ); };
+               any => { pass( *ts, 0, 0 ); };
        *|;
 }%%
 
@@ -922,9 +1163,13 @@ void Scanner::do_scan()
 {
        int bufsize = 8;
        char *buf = new char[bufsize];
-       const char last_char = 0;
        int cs, act, have = 0;
-       int top, stack[1];
+       int top;
+
+       /* The stack is two deep, one level for going into ragel defs from the main
+        * machines which process outside code, and another for going into or literals
+        * from either a ragel spec, or a regular expression. */
+       int stack[2];
        int curly_count = 0;
        bool execute = true;
        bool singleLineSpec = false;
@@ -934,6 +1179,14 @@ void Scanner::do_scan()
        init();
        %% write init;
 
+       /* Set up the start state. FIXME: After 5.20 is released the nocs write
+        * init option should be used, the main machine eliminated and this statement moved
+        * above the write init. */
+       if ( hostLang->lang == HostLang::Ruby )
+               cs = rlscan_en_main_ruby;
+       else
+               cs = rlscan_en_main;
+       
        while ( execute ) {
                char *p = buf + have;
                int space = bufsize - have;
@@ -948,9 +1201,9 @@ void Scanner::do_scan()
                        space = bufsize - have;
 
                        /* Patch up pointers possibly in use. */
-                       if ( tokstart != 0 )
-                               tokstart = newbuf + ( tokstart - buf );
-                       tokend = newbuf + ( tokend - buf );
+                       if ( ts != 0 )
+                               ts = newbuf + ( ts - buf );
+                       te = newbuf + ( te - buf );
 
                        /* Copy the new buffer in. */
                        memcpy( newbuf, buf, have );
@@ -960,14 +1213,15 @@ void Scanner::do_scan()
 
                input.read( p, space );
                int len = input.gcount();
+               char *pe = p + len;
 
-               /* If we see eof then append the EOF char. */
+               /* If we see eof then append the eof var. */
+               char *eof = 0;
                if ( len == 0 ) {
-                       p[0] = last_char, len = 1;
+                       eof = pe;
                        execute = false;
                }
 
-               char *pe = p + len;
                %% write exec;
 
                /* Check if we failed. */
@@ -979,7 +1233,7 @@ void Scanner::do_scan()
                }
 
                /* Decide if we need to preserve anything. */
-               char *preserve = tokstart;
+               char *preserve = ts;
 
                /* Now set up the prefix. */
                if ( preserve == 0 )
@@ -989,9 +1243,9 @@ void Scanner::do_scan()
                        have = pe - preserve;
                        memmove( buf, preserve, have );
                        unsigned int shiftback = preserve - buf;
-                       if ( tokstart != 0 )
-                               tokstart -= shiftback;
-                       tokend -= shiftback;
+                       if ( ts != 0 )
+                               ts -= shiftback;
+                       te -= shiftback;
 
                        preserve = buf;
                }
@@ -999,7 +1253,3 @@ void Scanner::do_scan()
 
        delete[] buf;
 }
-
-void scan( char *fileName, istream &input, ostream &output )
-{
-}