Line directives need to use the fileName stored in the InputLoc stuctures from
[external/ragel.git] / ragel / rlscan.rl
index db2c7ef..3c325c3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- *  Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca>
+ *  Copyright 2006-2007 Adrian Thurston <thurston@complang.org>
  */
 
 /*  This file is part of Ragel.
 #include <string.h>
 
 #include "ragel.h"
-#include "rlparse.h"
-#include "parsedata.h"
-#include "avltree.h"
-#include "vector.h"
+#include "rlscan.h"
+#include "inputdata.h"
 
+//#define LOG_TOKENS
 
 using std::ifstream;
 using std::istream;
@@ -37,96 +36,179 @@ using std::cout;
 using std::cerr;
 using std::endl;
 
-/* This is used for tracking the current stack of include file/machine pairs. It is
- * is used to detect and recursive include structure. */
-struct IncludeStackItem
-{
-       IncludeStackItem( char *fileName, char *sectionName )
-               : fileName(fileName), sectionName(sectionName) {}
-
-       char *fileName;
-       char *sectionName;
-};
-
-typedef Vector<IncludeStackItem> IncludeStack;
-
 enum InlineBlockType
 {
        CurlyDelimited,
        SemiTerminated
 };
 
-struct Scanner
+#ifdef _WIN32
+#define PATH_SEP '\\'
+#else
+#define PATH_SEP '/'
+#endif
+
+
+/*
+ * The Scanner for Importing
+ */
+
+%%{
+       machine inline_token_scan;
+       alphtype int;
+       access tok_;
+
+       # Import scanner tokens.
+       import "rlparse.h"; 
+
+       main := |*
+               # Define of number.
+               IMP_Define IMP_Word IMP_UInt => { 
+                       int base = tok_ts - token_data;
+                       int nameOff = 1;
+                       int numOff = 2;
+
+                       directToParser( inclToParser, fileName, line, column, TK_Word, 
+                                       token_strings[base+nameOff], token_lens[base+nameOff] );
+                       directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+                       directToParser( inclToParser, fileName, line, column, TK_UInt,
+                                       token_strings[base+numOff], token_lens[base+numOff] );
+                       directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+               };
+
+               # Assignment of number.
+               IMP_Word '=' IMP_UInt => { 
+                       int base = tok_ts - token_data;
+                       int nameOff = 0;
+                       int numOff = 2;
+
+                       directToParser( inclToParser, fileName, line, column, TK_Word, 
+                                       token_strings[base+nameOff], token_lens[base+nameOff] );
+                       directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+                       directToParser( inclToParser, fileName, line, column, TK_UInt,
+                                       token_strings[base+numOff], token_lens[base+numOff] );
+                       directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+               };
+
+               # Define of literal.
+               IMP_Define IMP_Word IMP_Literal => { 
+                       int base = tok_ts - token_data;
+                       int nameOff = 1;
+                       int litOff = 2;
+
+                       directToParser( inclToParser, fileName, line, column, TK_Word, 
+                                       token_strings[base+nameOff], token_lens[base+nameOff] );
+                       directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+                       directToParser( inclToParser, fileName, line, column, TK_Literal,
+                                       token_strings[base+litOff], token_lens[base+litOff] );
+                       directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+               };
+
+               # Assignment of literal.
+               IMP_Word '=' IMP_Literal => { 
+                       int base = tok_ts - token_data;
+                       int nameOff = 0;
+                       int litOff = 2;
+
+                       directToParser( inclToParser, fileName, line, column, TK_Word, 
+                                       token_strings[base+nameOff], token_lens[base+nameOff] );
+                       directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+                       directToParser( inclToParser, fileName, line, column, TK_Literal,
+                                       token_strings[base+litOff], token_lens[base+litOff] );
+                       directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+               };
+
+               # Catch everything else.
+               any;
+       *|;
+}%%
+
+%% write data;
+
+void Scanner::flushImport()
 {
-       Scanner( char *fileName, istream &input, ostream &output,
-                       Parser *inclToParser, char *inclSectionTarg,
-                       int includeDepth )
-       : 
-               fileName(fileName), input(input), output(output),
-               inclToParser(inclToParser),
-               inclSectionTarg(inclSectionTarg),
-               includeDepth(includeDepth),
-               line(1), column(1), lastnl(0), 
-               parser(0), active(false), 
-               parserExistsError(false), ragelDefOpen(false),
-               whitespaceOn(true)
-               {}
-
-       bool recursiveInclude( char *inclFileName, char *inclSectionName );
-
-       char *prepareFileName( char *fileName, int len )
-       {
-               bool caseInsensitive;
-               Token tokenFnStr, tokenRes;
-               tokenFnStr.data = fileName;
-               tokenFnStr.length = len;
-               tokenFnStr.prepareLitString( tokenRes, caseInsensitive );
-               return tokenRes.data;
+       int *p = token_data;
+       int *pe = token_data + cur_token;
+       int *eof = 0;
+
+       %%{
+               machine inline_token_scan;
+               write init;
+               write exec;
+       }%%
+
+       if ( tok_ts == 0 )
+               cur_token = 0;
+       else {
+               cur_token = pe - tok_ts;
+               int ts_offset = tok_ts - token_data;
+               memmove( token_data, token_data+ts_offset, cur_token*sizeof(token_data[0]) );
+               memmove( token_strings, token_strings+ts_offset, cur_token*sizeof(token_strings[0]) );
+               memmove( token_lens, token_lens+ts_offset, cur_token*sizeof(token_lens[0]) );
+       }
+}
+
+void Scanner::directToParser( Parser *toParser, const char *tokFileName, int tokLine, 
+               int tokColumn, int type, char *tokdata, int toklen )
+{
+       InputLoc loc;
+
+       #ifdef LOG_TOKENS
+       cerr << "scanner:" << tokLine << ":" << tokColumn << 
+                       ": sending token to the parser " << Parser_lelNames[type];
+       cerr << " " << toklen;
+       if ( tokdata != 0 )
+               cerr << " " << tokdata;
+       cerr << endl;
+       #endif
+
+       loc.fileName = tokFileName;
+       loc.line = tokLine;
+       loc.col = tokColumn;
+
+       toParser->token( loc, type, tokdata, toklen );
+}
+
+void Scanner::importToken( int token, char *start, char *end )
+{
+       if ( cur_token == max_tokens )
+               flushImport();
+
+       token_data[cur_token] = token;
+       if ( start == 0 ) {
+               token_strings[cur_token] = 0;
+               token_lens[cur_token] = 0;
        }
+       else {
+               int toklen = end-start;
+               token_lens[cur_token] = toklen;
+               token_strings[cur_token] = new char[toklen+1];
+               memcpy( token_strings[cur_token], start, toklen );
+               token_strings[cur_token][toklen] = 0;
+       }
+       cur_token++;
+}
 
-       void init();
-       void token( int type, char *start, char *end );
-       void token( int type, char c );
-       void token( int type );
-       void updateCol();
-       void startSection();
-       void endSection();
-       void openRagelDef();
-       void do_scan();
-       bool parserExists();
-       ostream &scan_error();
-
-       char *fileName;
-       istream &input;
-       ostream &output;
-       Parser *inclToParser;
-       char *inclSectionTarg;
-       int includeDepth;
-
-       int cs;
-       int line;
-       char *word, *lit;
-       int word_len, lit_len;
-       InputLoc sectionLoc;
-       char *tokstart, *tokend;
-       int column;
-       char *lastnl;
-
-       /* Set by machine statements, these persist from section to section
-        * allowing for unnamed sections. */
-       Parser *parser;
-       bool active;
-       IncludeStack includeStack;
-
-       /* This is set if ragel has already emitted an error stating that
-        * no section name has been seen and thus no parser exists. */
-       bool parserExistsError;
-       bool ragelDefOpen;
-
-       /* This is for inline code. By default it is on. It goes off for
-        * statements and values in inline blocks which are parsed. */
-       bool whitespaceOn;
-};
+void Scanner::pass( int token, char *start, char *end )
+{
+       if ( importMachines )
+               importToken( token, start, end );
+       pass();
+}
+
+void Scanner::pass()
+{
+       updateCol();
+
+       /* If no errors and we are at the bottom of the include stack (the
+        * source file listed on the command line) then write out the data. */
+       if ( includeDepth == 0 && machineSpec == 0 && machineName == 0 )
+               id.inputItems.tail->data.write( ts, te-ts );
+}
+
+/*
+ * The scanner for processing sections, includes, imports, etc.
+ */
 
 %%{
        machine section_parse;
@@ -134,36 +216,44 @@ struct Scanner
        write data;
 }%%
 
+
 void Scanner::init( )
 {
        %% write init;
 }
 
-bool Scanner::parserExists()
+bool Scanner::active()
 {
-       if ( parser != 0 )
-               return true;
+       if ( ignoreSection )
+               return false;
 
-       if ( ! parserExistsError ) {
-               scan_error() << "include: there is no previous specification name" << endl;
+       if ( parser == 0 && ! parserExistsError ) {
+               scan_error() << "this specification has no name, nor does any previous"
+                       " specification" << endl;
                parserExistsError = true;
        }
-       return false;
+
+       if ( parser == 0 )
+               return false;
+
+       return true;
 }
 
 ostream &Scanner::scan_error()
 {
        /* Maintain the error count. */
        gblErrorCount += 1;
-       cerr << fileName << ":" << line << ":" << column << ": ";
+       cerr << makeInputLoc( fileName, line, column ) << ": ";
        return cerr;
 }
 
-bool Scanner::recursiveInclude( char *inclFileName, char *inclSectionName )
+/* An approximate check for duplicate includes. Due to aliasing of files it's
+ * possible for duplicates to creep in. */
+bool Scanner::duplicateInclude( char *inclFileName, char *inclSectionName )
 {
-       for ( IncludeStack::Iter si = includeStack; si.lte(); si++ ) {
-               if ( strcmp( si->fileName, inclFileName ) == 0 &&
-                               strcmp( si->sectionName, inclSectionName ) == 0 )
+       for ( IncludeHistory::Iter hi = parser->includeHistory; hi.lte(); hi++ ) {
+               if ( strcmp( hi->fileName, inclFileName ) == 0 &&
+                               strcmp( hi->sectionName, inclSectionName ) == 0 )
                {
                        return true;
                }
@@ -175,32 +265,118 @@ void Scanner::updateCol()
 {
        char *from = lastnl;
        if ( from == 0 )
-               from = tokstart;
-       //cerr << "adding " << tokend - from << " to column" << endl;
-       column += tokend - from;
+               from = ts;
+       //cerr << "adding " << te - from << " to column" << endl;
+       column += te - from;
        lastnl = 0;
 }
 
-void Scanner::token( int type, char c )
+void Scanner::handleMachine()
 {
-       token( type, &c, &c + 1 );
+       /* Assign a name to the machine. */
+       char *machine = word;
+
+       if ( !importMachines && inclSectionTarg == 0 ) {
+               ignoreSection = false;
+
+               ParserDictEl *pdEl = id.parserDict.find( machine );
+               if ( pdEl == 0 ) {
+                       pdEl = new ParserDictEl( machine );
+                       pdEl->value = new Parser( fileName, machine, sectionLoc );
+                       pdEl->value->init();
+                       id.parserDict.insert( pdEl );
+                       id.parserList.append( pdEl->value );
+               }
+
+               parser = pdEl->value;
+       }
+       else if ( !importMachines && strcmp( inclSectionTarg, machine ) == 0 ) {
+               /* found include target */
+               ignoreSection = false;
+               parser = inclToParser;
+       }
+       else {
+               /* ignoring section */
+               ignoreSection = true;
+               parser = 0;
+       }
 }
 
-void Scanner::token( int type )
+void Scanner::handleInclude()
 {
-       token( type, 0, 0 );
+       if ( active() ) {
+               char *inclSectionName = word;
+               char **includeChecks = 0;
+
+               /* Implement defaults for the input file and section name. */
+               if ( inclSectionName == 0 )
+                       inclSectionName = parser->sectionName;
+
+               if ( lit != 0 )
+                       includeChecks = makeIncludePathChecks( fileName, lit, lit_len );
+               else {
+                       char *test = new char[strlen(fileName)+1];
+                       strcpy( test, fileName );
+
+                       includeChecks = new char*[2];
+
+                       includeChecks[0] = test;
+                       includeChecks[1] = 0;
+               }
+
+               long found = 0;
+               ifstream *inFile = tryOpenInclude( includeChecks, found );
+               if ( inFile == 0 ) {
+                       scan_error() << "include: failed to locate file" << endl;
+                       char **tried = includeChecks;
+                       while ( *tried != 0 )
+                               scan_error() << "include: attempted: \"" << *tried++ << '\"' << endl;
+               }
+               else {
+                       /* Don't include anything that's already been included. */
+                       if ( !duplicateInclude( includeChecks[found], inclSectionName ) ) {
+                               parser->includeHistory.append( IncludeHistoryItem( 
+                                               includeChecks[found], inclSectionName ) );
+
+                               Scanner scanner( id, includeChecks[found], *inFile, parser,
+                                               inclSectionName, includeDepth+1, false );
+                               scanner.do_scan( );
+                               delete inFile;
+                       }
+               }
+       }
+}
+
+void Scanner::handleImport()
+{
+       if ( active() ) {
+               char **importChecks = makeIncludePathChecks( fileName, lit, lit_len );
+
+               /* Open the input file for reading. */
+               long found = 0;
+               ifstream *inFile = tryOpenInclude( importChecks, found );
+               if ( inFile == 0 ) {
+                       scan_error() << "import: could not open import file " <<
+                                       "for reading" << endl;
+                       char **tried = importChecks;
+                       while ( *tried != 0 )
+                               scan_error() << "import: attempted: \"" << *tried++ << '\"' << endl;
+               }
+
+               Scanner scanner( id, importChecks[found], *inFile, parser,
+                               0, includeDepth+1, true );
+               scanner.do_scan( );
+               scanner.importToken( 0, 0, 0 );
+               scanner.flushImport();
+               delete inFile;
+       }
 }
 
 %%{
        machine section_parse;
 
-       # This relies on the the kelbt implementation and the order
-       # that tokens are declared.
-       KW_Machine = 128;
-       KW_Include = 129;
-       KW_Write = 130;
-       TK_Word = 131;
-       TK_Literal = 132;
+       # Need the defines representing tokens.
+       import "rlparse.h"; 
 
        action clear_words { word = lit = 0; word_len = lit_len = 0; }
        action store_word { word = tokdata; word_len = toklen; }
@@ -208,83 +384,17 @@ void Scanner::token( int type )
 
        action mach_err { scan_error() << "bad machine statement" << endl; }
        action incl_err { scan_error() << "bad include statement" << endl; }
+       action import_err { scan_error() << "bad import statement" << endl; }
        action write_err { scan_error() << "bad write statement" << endl; }
 
-       action handle_machine
-       {
-               /* Assign a name to the machine. */
-               char *machine = word;
-
-               if ( inclSectionTarg == 0 ) {
-                       active = true;
-
-                       ParserDictEl *pdEl = parserDict.find( machine );
-                       if ( pdEl == 0 ) {
-                               pdEl = new ParserDictEl( machine );
-                               pdEl->value = new Parser( fileName, machine, sectionLoc );
-                               pdEl->value->init();
-                               parserDict.insert( pdEl );
-                       }
-
-                       parser = pdEl->value;
-               }
-               else if ( strcmp( inclSectionTarg, machine ) == 0 ) {
-                       /* found include target */
-                       active = true;
-                       parser = inclToParser;
-               }
-               else {
-                       /* ignoring section */
-                       active = false;
-                       parser = 0;
-               }
-       }
+       action handle_machine { handleMachine(); }
+       action handle_include { handleInclude(); }
+       action handle_import { handleImport(); }
 
        machine_stmt =
                ( KW_Machine TK_Word @store_word ';' ) @handle_machine
                <>err mach_err <>eof mach_err;
 
-       action handle_include
-       {
-               if ( active && parserExists() ) {
-                       char *inclSectionName = word;
-                       char *inclFileName = 0;
-
-                       /* Implement defaults for the input file and section name. */
-                       if ( inclSectionName == 0 )
-                               inclSectionName = parser->sectionName;
-
-                       if ( lit != 0 ) 
-                               inclFileName = prepareFileName( lit, lit_len );
-                       else
-                               inclFileName = fileName;
-
-                       /* Check for a recursive include structure. Add the current file/section
-                        * name then check if what we are including is already in the stack. */
-                       includeStack.append( IncludeStackItem( fileName, parser->sectionName ) );
-
-                       if ( recursiveInclude( inclFileName, inclSectionName ) )
-                               scan_error() << "include: this is a recursive include operation" << endl;
-                       else {
-                               /* Open the input file for reading. */
-                               ifstream *inFile = new ifstream( inclFileName );
-                               if ( ! inFile->is_open() ) {
-                                       scan_error() << "include: could not open " << 
-                                                       inclFileName << " for reading" << endl;
-                               }
-
-                               Scanner scanner( inclFileName, *inFile, output, parser,
-                                               inclSectionName, includeDepth+1 );
-                               scanner.init();
-                               scanner.do_scan( );
-                               delete inFile;
-                       }
-
-                       /* Remove the last element (len-1) */
-                       includeStack.remove( -1 );
-               }
-       }
-
        include_names = (
                TK_Word @store_word ( TK_Literal @store_lit )? |
                TK_Literal @store_lit
@@ -294,78 +404,74 @@ void Scanner::token( int type )
                ( KW_Include include_names ';' ) @handle_include
                <>err incl_err <>eof incl_err;
 
+       import_stmt =
+               ( KW_Import TK_Literal @store_lit ';' ) @handle_import
+               <>err import_err <>eof import_err;
+
        action write_command
        {
-               if ( active ) {
-                       openRagelDef();
-                       if ( strcmp( tokdata, "data" ) != 0 &&
-                                       strcmp( tokdata, "init" ) != 0 &&
-                                       strcmp( tokdata, "exec" ) != 0 &&
-                                       strcmp( tokdata, "eof" ) != 0 )
-                       {
-                               scan_error() << "unknown write command" << endl;
-                       }
-                       output << "  <write what=\"" << tokdata << "\">";
+               if ( active() && machineSpec == 0 && machineName == 0 ) {
+                       InputItem *inputItem = new InputItem;
+                       inputItem->type = InputItem::Write;
+                       inputItem->loc.line = line;
+                       inputItem->loc.col = column;
+                       inputItem->name = parser->sectionName;
+                       inputItem->pd = parser->pd;
+                       id.inputItems.append( inputItem );
                }
        }
 
-       action write_option
+       action write_arg
        {
-               if ( active )
-                       output << "<option>" << tokdata << "</option>";
+               if ( active() && machineSpec == 0 && machineName == 0 )
+                       id.inputItems.tail->writeArgs.append( strdup(tokdata) );
        }
+
        action write_close
        {
-               if ( active )
-                       output << "</write>\n";
+               if ( active() && machineSpec == 0 && machineName == 0 )
+                       id.inputItems.tail->writeArgs.append( 0 );
        }
 
        write_stmt =
-               ( KW_Write TK_Word @write_command 
-                       ( TK_Word @write_option )* ';' @write_close )
+               ( KW_Write @write_command 
+               ( TK_Word @write_arg )+ ';' @write_close )
                <>err write_err <>eof write_err;
 
        action handle_token
        {
                /* Send the token off to the parser. */
-               if ( active && parserExists() ) {
-                       InputLoc loc;
-
-                       #if 0
-                       cerr << "scanner:" << line << ":" << column << 
-                                       ": sending token to the parser " << lelNames[*p];
-                       cerr << " " << toklen;
-                       if ( tokdata != 0 )
-                               cerr << " " << tokdata;
-                       cerr << endl;
-                       #endif
-
-                       loc.fileName = fileName;
-                       loc.line = line;
-                       loc.col = column;
-
-                       parser->token( loc, type, tokdata, toklen );
-               }
+               if ( active() )
+                       directToParser( parser, fileName, line, column, type, tokdata, toklen );
        }
 
        # Catch everything else.
-       everything_else = ^( KW_Machine | KW_Include | KW_Write ) @handle_token;
+       everything_else = 
+               ^( KW_Machine | KW_Include | KW_Import | KW_Write ) @handle_token;
 
        main := ( 
                machine_stmt |
                include_stmt |
+               import_stmt |
                write_stmt |
                everything_else
        )*;
 }%%
 
+void Scanner::token( int type, char c )
+{
+       token( type, &c, &c + 1 );
+}
+
+void Scanner::token( int type )
+{
+       token( type, 0, 0 );
+}
+
 void Scanner::token( int type, char *start, char *end )
 {
        char *tokdata = 0;
        int toklen = 0;
-       int *p = &type;
-       int *pe = &type + 1;
-
        if ( start != 0 ) {
                toklen = end-start;
                tokdata = new char[toklen+1];
@@ -373,67 +479,142 @@ void Scanner::token( int type, char *start, char *end )
                tokdata[toklen] = 0;
        }
 
+       processToken( type, tokdata, toklen );
+}
+
+void Scanner::processToken( int type, char *tokdata, int toklen )
+{
+       int *p, *pe, *eof;
+
+       if ( type < 0 )
+               p = pe = eof = 0;
+       else {
+               p = &type;
+               pe = &type + 1;
+               eof = 0;
+       }
+
        %%{
                machine section_parse;
                write exec;
        }%%
 
        updateCol();
+
+       /* Record the last token for use in controlling the scan of subsequent
+        * tokens. */
+       lastToken = type;
 }
 
 void Scanner::startSection( )
 {
        parserExistsError = false;
 
-       if ( includeDepth == 0 ) {
-               if ( machineSpec == 0 && machineName == 0 )
-                       output << "</host>\n";
-               ragelDefOpen = false;
-       }
-
        sectionLoc.fileName = fileName;
        sectionLoc.line = line;
-       sectionLoc.col = 0;
-}
-
-void Scanner::openRagelDef()
-{
-       if ( ! ragelDefOpen ) {
-               ragelDefOpen = true;
-               output << "<ragel_def name=\"" << parser->sectionName << "\">\n";
-       }
+       sectionLoc.col = column;
 }
 
 void Scanner::endSection( )
 {
        /* Execute the eof actions for the section parser. */
-       %%{
-               machine section_parse;
-               write eof;
-       }%%
+       processToken( -1, 0, 0 );
 
        /* Close off the section with the parser. */
-       if ( active && parserExists() ) {
+       if ( active() ) {
                InputLoc loc;
                loc.fileName = fileName;
                loc.line = line;
-               loc.col = 0;
+               loc.col = column;
 
                parser->token( loc, TK_EndSection, 0, 0 );
        }
 
        if ( includeDepth == 0 ) {
-               if ( ragelDefOpen ) {
-                       output << "</ragel_def>\n";
-                       ragelDefOpen = false;
-               }
-
                if ( machineSpec == 0 && machineName == 0 ) {
                        /* The end section may include a newline on the end, so
                         * we use the last line, which will count the newline. */
-                       output << "<host line=\"" << line << "\">";
+                       InputItem *inputItem = new InputItem;
+                       inputItem->type = InputItem::HostData;
+                       inputItem->loc.line = line;
+                       inputItem->loc.col = column;
+                       id.inputItems.append( inputItem );
+               }
+       }
+}
+
+bool isAbsolutePath( const char *path )
+{
+#ifdef _WIN32
+       return isalpha( path[0] ) && path[1] == ':' && path[2] == '\\';
+#else
+       return path[0] == '/';
+#endif
+}
+
+char **Scanner::makeIncludePathChecks( const char *thisFileName, 
+               const char *fileName, int fnlen )
+{
+       char **checks = new char*[2];
+       long nextCheck = 0;
+
+       bool caseInsensitive = false;
+       long length = 0;
+       char *data = prepareLitString( InputLoc(), fileName, fnlen, 
+                       length, caseInsensitive );
+
+       /* Absolute path? */
+       if ( isAbsolutePath( data ) )
+               checks[nextCheck++] = data;
+       else {
+               /* Search from the the location of the current file. */
+               const char *lastSlash = strrchr( thisFileName, PATH_SEP );
+               if ( lastSlash == 0 )
+                       checks[nextCheck++] = data;
+               else {
+                       long givenPathLen = (lastSlash - thisFileName) + 1;
+                       long checklen = givenPathLen + length;
+                       char *check = new char[checklen+1];
+                       memcpy( check, thisFileName, givenPathLen );
+                       memcpy( check+givenPathLen, data, length );
+                       check[checklen] = 0;
+                       checks[nextCheck++] = check;
+               }
+
+               /* Search from the include paths given on the command line. */
+               for ( ArgsVector::Iter incp = id.includePaths; incp.lte(); incp++ ) {
+                       long pathLen = strlen( *incp );
+                       long checkLen = pathLen + 1 + length;
+                       char *check = new char[checkLen+1];
+                       memcpy( check, *incp, pathLen );
+                       check[pathLen] = PATH_SEP;
+                       memcpy( check+pathLen+1, data, length );
+                       check[checkLen] = 0;
+                       checks[nextCheck++] = check;
                }
        }
+
+       checks[nextCheck] = 0;
+       return checks;
+}
+
+ifstream *Scanner::tryOpenInclude( char **pathChecks, long &found )
+{
+       char **check = pathChecks;
+       ifstream *inFile = new ifstream;
+       
+       while ( *check != 0 ) {
+               inFile->open( *check );
+               if ( inFile->is_open() ) {
+                       found = check - pathChecks;
+                       return inFile;
+               }
+               check += 1;
+       }
+
+       found = -1;
+       delete inFile;
+       return 0;
 }
 
 %%{
@@ -462,16 +643,120 @@ void Scanner::endSection( )
 
        c_cpp_comment = c_comment | cpp_comment;
 
-       # These literal forms are common to C-like host code and ragel.
+       ruby_comment = '#' [^\n]* NL;
+
+       # These literal forms are common to host code and ragel.
        s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'";
        d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"';
+       host_re_literal = '/' ([^/\\] | NL | '\\' (any | NL))* '/';
 
        whitespace = [ \t] | NL;
        pound_comment = '#' [^\n]* NL;
 
-       # An inline block of code. This is specified as a scanned, but is sent to
-       # the parser as one long block. The inline_block pointer is used to handle
-       # the preservation of the data.
+       # An inline block of code for Ruby.
+       inline_code_ruby := |*
+               # Inline expression keywords.
+               "fpc" => { token( KW_PChar ); };
+               "fc" => { token( KW_Char ); };
+               "fcurs" => { token( KW_CurState ); };
+               "ftargs" => { token( KW_TargState ); };
+               "fentry" => { 
+                       whitespaceOn = false; 
+                       token( KW_Entry );
+               };
+
+               # Inline statement keywords.
+               "fhold" => { 
+                       whitespaceOn = false; 
+                       token( KW_Hold );
+               };
+               "fexec" => { token( KW_Exec, 0, 0 ); };
+               "fgoto" => { 
+                       whitespaceOn = false; 
+                       token( KW_Goto );
+               };
+               "fnext" => { 
+                       whitespaceOn = false; 
+                       token( KW_Next );
+               };
+               "fcall" => { 
+                       whitespaceOn = false; 
+                       token( KW_Call );
+               };
+               "fret" => { 
+                       whitespaceOn = false; 
+                       token( KW_Ret );
+               };
+               "fbreak" => { 
+                       whitespaceOn = false; 
+                       token( KW_Break );
+               };
+
+               ident => { token( TK_Word, ts, te ); };
+
+               number => { token( TK_UInt, ts, te ); };
+               hex_number => { token( TK_Hex, ts, te ); };
+
+               ( s_literal | d_literal | host_re_literal ) 
+                       => { token( IL_Literal, ts, te ); };
+
+               whitespace+ => { 
+                       if ( whitespaceOn ) 
+                               token( IL_WhiteSpace, ts, te );
+               };
+
+               ruby_comment => { token( IL_Comment, ts, te ); };
+
+               "::" => { token( TK_NameSep, ts, te ); };
+
+               # Some symbols need to go to the parser as with their cardinal value as
+               # the token type (as opposed to being sent as anonymous symbols)
+               # because they are part of the sequences which we interpret. The * ) ;
+               # symbols cause whitespace parsing to come back on. This gets turned
+               # off by some keywords.
+
+               ";" => {
+                       whitespaceOn = true;
+                       token( *ts, ts, te );
+                       if ( inlineBlockType == SemiTerminated )
+                               fret;
+               };
+
+               [*)] => { 
+                       whitespaceOn = true;
+                       token( *ts, ts, te );
+               };
+
+               [,(] => { token( *ts, ts, te ); };
+
+               '{' => { 
+                       token( IL_Symbol, ts, te );
+                       curly_count += 1; 
+               };
+
+               '}' => { 
+                       if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) {
+                               /* Inline code block ends. */
+                               token( '}' );
+                               fret;
+                       }
+                       else {
+                               /* Either a semi terminated inline block or only the closing
+                                * brace of some inner scope, not the block's closing brace. */
+                               token( IL_Symbol, ts, te );
+                       }
+               };
+
+               EOF => {
+                       scan_error() << "unterminated code block" << endl;
+               };
+
+               # Send every other character as a symbol.
+               any => { token( IL_Symbol, ts, te ); };
+       *|;
+
+
+       # An inline block of code for languages other than Ruby.
        inline_code := |*
                # Inline expression keywords.
                "fpc" => { token( KW_PChar ); };
@@ -510,21 +795,22 @@ void Scanner::endSection( )
                        token( KW_Break );
                };
 
-               ident => { token( TK_Word, tokstart, tokend ); };
+               ident => { token( TK_Word, ts, te ); };
 
-               number => { token( TK_UInt, tokstart, tokend ); };
-               hex_number => { token( TK_Hex, tokstart, tokend ); };
+               number => { token( TK_UInt, ts, te ); };
+               hex_number => { token( TK_Hex, ts, te ); };
 
                ( s_literal | d_literal ) 
-                       => { token( IL_Literal, tokstart, tokend ); };
+                       => { token( IL_Literal, ts, te ); };
 
                whitespace+ => { 
                        if ( whitespaceOn ) 
-                               token( IL_WhiteSpace, tokstart, tokend );
+                               token( IL_WhiteSpace, ts, te );
                };
-               c_cpp_comment => { token( IL_Comment, tokstart, tokend ); };
 
-               "::" => { token( TK_NameSep, tokstart, tokend ); };
+               c_cpp_comment => { token( IL_Comment, ts, te ); };
+
+               "::" => { token( TK_NameSep, ts, te ); };
 
                # Some symbols need to go to the parser as with their cardinal value as
                # the token type (as opposed to being sent as anonymous symbols)
@@ -534,20 +820,20 @@ void Scanner::endSection( )
 
                ";" => {
                        whitespaceOn = true;
-                       token( *tokstart, tokstart, tokend );
+                       token( *ts, ts, te );
                        if ( inlineBlockType == SemiTerminated )
-                               fgoto parser_def;
+                               fret;
                };
 
                [*)] => { 
                        whitespaceOn = true;
-                       token( *tokstart, tokstart, tokend );
+                       token( *ts, ts, te );
                };
 
-               [,(] => { token( *tokstart, tokstart, tokend ); };
+               [,(] => { token( *ts, ts, te ); };
 
                '{' => { 
-                       token( IL_Symbol, tokstart, tokend );
+                       token( IL_Symbol, ts, te );
                        curly_count += 1; 
                };
 
@@ -555,12 +841,12 @@ void Scanner::endSection( )
                        if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) {
                                /* Inline code block ends. */
                                token( '}' );
-                               fgoto parser_def;
+                               fret;
                        }
                        else {
                                /* Either a semi terminated inline block or only the closing
                                 * brace of some inner scope, not the block's closing brace. */
-                               token( IL_Symbol, tokstart, tokend );
+                               token( IL_Symbol, ts, te );
                        }
                };
 
@@ -569,7 +855,7 @@ void Scanner::endSection( )
                };
 
                # Send every other character as a symbol.
-               any => { token( IL_Symbol, tokstart, tokend ); };
+               any => { token( IL_Symbol, ts, te ); };
        *|;
 
        or_literal := |*
@@ -583,7 +869,7 @@ void Scanner::endSection( )
                '\\f' => { token( RE_Char, '\f' ); };
                '\\r' => { token( RE_Char, '\r' ); };
                '\\\n' => { updateCol(); };
-               '\\' any => { token( RE_Char, tokstart+1, tokend ); };
+               '\\' any => { token( RE_Char, ts+1, te ); };
 
                # Range dash in an OR expression.
                '-' => { token( RE_Dash, 0, 0 ); };
@@ -596,11 +882,11 @@ void Scanner::endSection( )
                };
 
                # Characters in an OR expression.
-               [^\]] => { token( RE_Char, tokstart, tokend ); };
+               [^\]] => { token( RE_Char, ts, te ); };
 
        *|;
 
-       re_literal := |*
+       ragel_re_literal := |*
                # Escape sequences in regular expressions.
                '\\0' => { token( RE_Char, '\0' ); };
                '\\a' => { token( RE_Char, '\a' ); };
@@ -611,11 +897,11 @@ void Scanner::endSection( )
                '\\f' => { token( RE_Char, '\f' ); };
                '\\r' => { token( RE_Char, '\r' ); };
                '\\\n' => { updateCol(); };
-               '\\' any => { token( RE_Char, tokstart+1, tokend ); };
+               '\\' any => { token( RE_Char, ts+1, te ); };
 
                # Terminate an OR expression.
                '/' [i]? => { 
-                       token( RE_Slash, tokstart, tokend ); 
+                       token( RE_Slash, ts, te ); 
                        fgoto parser_def;
                };
 
@@ -631,12 +917,12 @@ void Scanner::endSection( )
                };
 
                # Characters in an OR expression.
-               [^\/] => { token( RE_Char, tokstart, tokend ); };
+               [^\/] => { token( RE_Char, ts, te ); };
        *|;
 
        # We need a separate token space here to avoid the ragel keywords.
        write_statement := |*
-               ident => { token( TK_Word, tokstart, tokend ); } ;
+               ident => { token( TK_Word, ts, te ); } ;
                [ \t\n]+ => { updateCol(); };
                ';' => { token( ';' ); fgoto parser_def; };
 
@@ -647,14 +933,18 @@ void Scanner::endSection( )
 
        # Parser definitions. 
        parser_def := |*
+               'length_cond' => { token( KW_Length ); };
                'machine' => { token( KW_Machine ); };
                'include' => { token( KW_Include ); };
+               'import' => { token( KW_Import ); };
                'write' => { 
                        token( KW_Write );
                        fgoto write_statement;
                };
                'action' => { token( KW_Action ); };
                'alphtype' => { token( KW_AlphType ); };
+               'prepush' => { token( KW_PrePush ); };
+               'postpop' => { token( KW_PostPop ); };
 
                # FIXME: Enable this post 5.17.
                # 'range' => { token( KW_Range ); };
@@ -662,40 +952,52 @@ void Scanner::endSection( )
                'getkey' => { 
                        token( KW_GetKey );
                        inlineBlockType = SemiTerminated;
-                       fgoto inline_code;
+                       if ( hostLang->lang == HostLang::Ruby )
+                               fcall inline_code_ruby;
+                       else
+                               fcall inline_code;
                };
                'access' => { 
                        token( KW_Access );
                        inlineBlockType = SemiTerminated;
-                       fgoto inline_code;
+                       if ( hostLang->lang == HostLang::Ruby )
+                               fcall inline_code_ruby;
+                       else
+                               fcall inline_code;
                };
                'variable' => { 
                        token( KW_Variable );
                        inlineBlockType = SemiTerminated;
-                       fgoto inline_code;
+                       if ( hostLang->lang == HostLang::Ruby )
+                               fcall inline_code_ruby;
+                       else
+                               fcall inline_code;
                };
                'when' => { token( KW_When ); };
+               'inwhen' => { token( KW_InWhen ); };
+               'outwhen' => { token( KW_OutWhen ); };
                'eof' => { token( KW_Eof ); };
                'err' => { token( KW_Err ); };
                'lerr' => { token( KW_Lerr ); };
                'to' => { token( KW_To ); };
                'from' => { token( KW_From ); };
+               'export' => { token( KW_Export ); };
 
                # Identifiers.
-               ident => { token( TK_Word, tokstart, tokend ); } ;
+               ident => { token( TK_Word, ts, te ); } ;
 
                # Numbers
-               number => { token( TK_UInt, tokstart, tokend ); };
-               hex_number => { token( TK_Hex, tokstart, tokend ); };
+               number => { token( TK_UInt, ts, te ); };
+               hex_number => { token( TK_Hex, ts, te ); };
 
                # Literals, with optionals.
                ( s_literal | d_literal ) [i]? 
-                       => { token( TK_Literal, tokstart, tokend ); };
+                       => { token( TK_Literal, ts, te ); };
 
                '[' => { token( RE_SqOpen ); fcall or_literal; };
                '[^' => { token( RE_SqOpenNeg ); fcall or_literal; };
 
-               '/' => { token( RE_Slash ); fgoto re_literal; };
+               '/' => { token( RE_Slash ); fgoto ragel_re_literal; };
 
                # Ignore.
                pound_comment => { updateCol(); };
@@ -763,69 +1065,96 @@ void Scanner::endSection( )
                # Opening of longest match.
                "|*" => { token( TK_BarStar ); };
 
+               # Separater for name references.
+               "::" => { token( TK_NameSep, ts, te ); };
+
                '}%%' => { 
                        updateCol();
                        endSection();
-                       fgoto main;
+                       fret;
                };
 
-               [ \t]+ => { updateCol(); };
+               [ \t\r]+ => { updateCol(); };
 
                # If we are in a single line machine then newline may end the spec.
                NL => {
                        updateCol();
                        if ( singleLineSpec ) {
                                endSection();
-                               fgoto main;
+                               fret;
                        }
                };
 
                '{' => { 
-                       token( '{' );
-                       curly_count = 1; 
-                       inlineBlockType = CurlyDelimited;
-                       fgoto inline_code;
+                       if ( lastToken == KW_Export || lastToken == KW_Entry )
+                               token( '{' );
+                       else {
+                               token( '{' );
+                               curly_count = 1; 
+                               inlineBlockType = CurlyDelimited;
+                               if ( hostLang->lang == HostLang::Ruby )
+                                       fcall inline_code_ruby;
+                               else
+                                       fcall inline_code;
+                       }
                };
 
                EOF => {
                        scan_error() << "unterminated ragel section" << endl;
                };
 
-               any => { token( *tokstart ); } ;
+               any => { token( *ts ); } ;
        *|;
 
-       action pass {
-               updateCol();
+       # Outside code scanner. These tokens get passed through.
+       main_ruby := |*
+               ident => { pass( IMP_Word, ts, te ); };
+               number => { pass( IMP_UInt, ts, te ); };
+               ruby_comment => { pass(); };
+               ( s_literal | d_literal | host_re_literal ) 
+                       => { pass( IMP_Literal, ts, te ); };
 
-               /* If no errors and we are at the bottom of the include stack (the
-                * source file listed on the command line) then write out the data. */
-               if ( includeDepth == 0 && machineSpec == 0 && machineName == 0 )
-                       xmlEscapeHost( output, tokstart, tokend-tokstart );
-       }
+               '%%{' => { 
+                       updateCol();
+                       singleLineSpec = false;
+                       startSection();
+                       fcall parser_def;
+               };
+               '%%' => { 
+                       updateCol();
+                       singleLineSpec = true;
+                       startSection();
+                       fcall parser_def;
+               };
+               whitespace+ => { pass(); };
+               EOF;
+               any => { pass( *ts, 0, 0 ); };
+       *|;
 
        # Outside code scanner. These tokens get passed through.
        main := |*
-               ident => pass;
-               number => pass;
-               c_cpp_comment => pass;
-               s_literal | d_literal => pass;
+               'define' => { pass( IMP_Define, 0, 0 ); };
+               ident => { pass( IMP_Word, ts, te ); };
+               number => { pass( IMP_UInt, ts, te ); };
+               c_cpp_comment => { pass(); };
+               ( s_literal | d_literal ) => { pass( IMP_Literal, ts, te ); };
+
                '%%{' => { 
                        updateCol();
                        singleLineSpec = false;
                        startSection();
-                       fgoto parser_def;
+                       fcall parser_def;
                };
                '%%' => { 
                        updateCol();
                        singleLineSpec = true;
                        startSection();
-                       fgoto parser_def;
+                       fcall parser_def;
                };
-               whitespace+ => pass;
+               whitespace+ => { pass(); };
                EOF;
-               any => pass;
+               any => { pass( *ts, 0, 0 ); };
        *|;
-
 }%%
 
 %% write data;
@@ -834,16 +1163,30 @@ void Scanner::do_scan()
 {
        int bufsize = 8;
        char *buf = new char[bufsize];
-       const char last_char = 0;
        int cs, act, have = 0;
-       int top, stack[1];
+       int top;
+
+       /* The stack is two deep, one level for going into ragel defs from the main
+        * machines which process outside code, and another for going into or literals
+        * from either a ragel spec, or a regular expression. */
+       int stack[2];
        int curly_count = 0;
        bool execute = true;
        bool singleLineSpec = false;
-       InlineBlockType inlineBlockType;
+       InlineBlockType inlineBlockType = CurlyDelimited;
 
+       /* Init the section parser and the character scanner. */
+       init();
        %% write init;
 
+       /* Set up the start state. FIXME: After 5.20 is released the nocs write
+        * init option should be used, the main machine eliminated and this statement moved
+        * above the write init. */
+       if ( hostLang->lang == HostLang::Ruby )
+               cs = rlscan_en_main_ruby;
+       else
+               cs = rlscan_en_main;
+       
        while ( execute ) {
                char *p = buf + have;
                int space = bufsize - have;
@@ -858,9 +1201,9 @@ void Scanner::do_scan()
                        space = bufsize - have;
 
                        /* Patch up pointers possibly in use. */
-                       if ( tokstart != 0 )
-                               tokstart = newbuf + ( tokstart - buf );
-                       tokend = newbuf + ( tokend - buf );
+                       if ( ts != 0 )
+                               ts = newbuf + ( ts - buf );
+                       te = newbuf + ( te - buf );
 
                        /* Copy the new buffer in. */
                        memcpy( newbuf, buf, have );
@@ -870,14 +1213,15 @@ void Scanner::do_scan()
 
                input.read( p, space );
                int len = input.gcount();
+               char *pe = p + len;
 
-               /* If we see eof then append the EOF char. */
+               /* If we see eof then append the eof var. */
+               char *eof = 0;
                if ( len == 0 ) {
-                       p[0] = last_char, len = 1;
+                       eof = pe;
                        execute = false;
                }
 
-               char *pe = p + len;
                %% write exec;
 
                /* Check if we failed. */
@@ -889,7 +1233,7 @@ void Scanner::do_scan()
                }
 
                /* Decide if we need to preserve anything. */
-               char *preserve = tokstart;
+               char *preserve = ts;
 
                /* Now set up the prefix. */
                if ( preserve == 0 )
@@ -899,9 +1243,9 @@ void Scanner::do_scan()
                        have = pe - preserve;
                        memmove( buf, preserve, have );
                        unsigned int shiftback = preserve - buf;
-                       if ( tokstart != 0 )
-                               tokstart -= shiftback;
-                       tokend -= shiftback;
+                       if ( ts != 0 )
+                               ts -= shiftback;
+                       te -= shiftback;
 
                        preserve = buf;
                }
@@ -909,15 +1253,3 @@ void Scanner::do_scan()
 
        delete[] buf;
 }
-
-void scan( char *fileName, istream &input, ostream &output )
-{
-       Scanner scanner( fileName, input, output, 0, 0, 0 );
-       scanner.init();
-       scanner.do_scan();
-
-       InputLoc eofLoc;
-       eofLoc.fileName = fileName;
-       eofLoc.col = 1;
-       eofLoc.line = scanner.line;
-}