Line directives need to use the fileName stored in the InputLoc stuctures from

[external/ragel.git] / ragel / rlscan.rl
diff --git a/ragel/rlscan.rl b/ragel/rlscan.rl

index 6deb533..3c325c3 100644 (file)
--- a/ragel/rlscan.rl
+++ b/ragel/rlscan.rl
@@ -1,5 +1,5 @@
  /*
- *  Copyright 2006-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ *  Copyright 2006-2007 Adrian Thurston <thurston@complang.org>
   */
  
  /*  This file is part of Ragel.
@@ -25,6 +25,9 @@
  
  #include "ragel.h"
  #include "rlscan.h"
+#include "inputdata.h"
+
+//#define LOG_TOKENS
  
  using std::ifstream;
  using std::istream;
@@ -33,194 +36,179 @@ using std::cout;
  using std::cerr;
  using std::endl;
  
+enum InlineBlockType
+{
+       CurlyDelimited,
+       SemiTerminated
+};
+
+#ifdef _WIN32
+#define PATH_SEP '\\'
+#else
+#define PATH_SEP '/'
+#endif
+
+
  /*
   * The Scanner for Importing
   */
  
-#define IMP_Word 128
-#define IMP_Literal 129
-#define IMP_Number 130
-#define IMP_Define 131
-
  %%{
         machine inline_token_scan;
         alphtype int;
         access tok_;
  
-       IMP_Word = 128;
-       IMP_Literal = 129;
-       IMP_Number = 130;
-       IMP_Define = 131;
+       # Import scanner tokens.
+       import "rlparse.h"; 
  
         main := |*
-               IMP_Define IMP_Word IMP_Number => { cerr << ( "define" ) << endl; };
-               IMP_Word '=' IMP_Number => { cerr << ( "const1" ) << endl; };
-               IMP_Word '=' IMP_Literal => { cerr << ( "const2" ) << endl; };
-               any;
-       *|;
-}%%
-
-%% write data;
-
-void ImportScanner::token( int token, char *start, char *end )
-{
-       if ( cur_token == max_tokens ) {
-               int *p = token_data;
-               int *pe = token_data + cur_token;
-
-               %% write init;
-               %% write exec;
-
-               if ( tok_tokstart == 0 )
-                       cur_token = 0;
-               else {
-                       cerr << "BLOCK BREAK" << endl;
-                       cur_token = pe - tok_tokstart;
-                       memmove( token_data, tok_tokstart, cur_token*sizeof(int) );
-               }
-       }
-
-       token_data[cur_token++] = token;
-}
-
-%%{
-       machine inline_scan;
-       access chr_;
-
-       # This is sent by the driver code.
-       EOF = 0;
-       NL = '\n';
-
-       # Identifiers, numbers, commetns, and other common things.
-       ident = ( alpha | '_' ) ( alpha |digit |'_' )*;
-       number = digit+;
-       hex_number = '0x' [0-9a-fA-F]+;
-
-       c_comment = 
-               '/*' ( any | NL )* :>> '*/';
-
-       cpp_comment =
-               '//' [^\n]* NL;
-
-       c_cpp_comment = c_comment | cpp_comment;
+               # Define of number.
+               IMP_Define IMP_Word IMP_UInt => { 
+                       int base = tok_ts - token_data;
+                       int nameOff = 1;
+                       int numOff = 2;
+
+                       directToParser( inclToParser, fileName, line, column, TK_Word, 
+                                       token_strings[base+nameOff], token_lens[base+nameOff] );
+                       directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+                       directToParser( inclToParser, fileName, line, column, TK_UInt,
+                                       token_strings[base+numOff], token_lens[base+numOff] );
+                       directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+               };
  
-       # These literal forms are common to C-like host code and ragel.
-       s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'";
-       d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"';
+               # Assignment of number.
+               IMP_Word '=' IMP_UInt => { 
+                       int base = tok_ts - token_data;
+                       int nameOff = 0;
+                       int numOff = 2;
+
+                       directToParser( inclToParser, fileName, line, column, TK_Word, 
+                                       token_strings[base+nameOff], token_lens[base+nameOff] );
+                       directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+                       directToParser( inclToParser, fileName, line, column, TK_UInt,
+                                       token_strings[base+numOff], token_lens[base+numOff] );
+                       directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+               };
  
-       whitespace = [ \t] | NL;
+               # Define of literal.
+               IMP_Define IMP_Word IMP_Literal => { 
+                       int base = tok_ts - token_data;
+                       int nameOff = 1;
+                       int litOff = 2;
+
+                       directToParser( inclToParser, fileName, line, column, TK_Word, 
+                                       token_strings[base+nameOff], token_lens[base+nameOff] );
+                       directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+                       directToParser( inclToParser, fileName, line, column, TK_Literal,
+                                       token_strings[base+litOff], token_lens[base+litOff] );
+                       directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+               };
  
+               # Assignment of literal.
+               IMP_Word '=' IMP_Literal => { 
+                       int base = tok_ts - token_data;
+                       int nameOff = 0;
+                       int litOff = 2;
+
+                       directToParser( inclToParser, fileName, line, column, TK_Word, 
+                                       token_strings[base+nameOff], token_lens[base+nameOff] );
+                       directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+                       directToParser( inclToParser, fileName, line, column, TK_Literal,
+                                       token_strings[base+litOff], token_lens[base+litOff] );
+                       directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+               };
  
-       # Outside code scanner. These tokens get passed through.
-       main := |*
-               'define' => { token( IMP_Define, 0, 0 ); };
-               ident => { token( IMP_Word, chr_tokstart, chr_tokend ); };
-               number => { token( IMP_Number, chr_tokstart, chr_tokend ); };
-               c_cpp_comment;
-               s_literal | d_literal => { token( IMP_Literal, chr_tokstart, chr_tokend ); };
-               whitespace+;
-               EOF;
-               any => { token( *chr_tokstart, 0, 0 ); };
+               # Catch everything else.
+               any;
         *|;
  }%%
  
  %% write data;
  
-void ImportScanner::do_scan()
+void Scanner::flushImport()
  {
-       int bufsize = 8;
-       char *buf = new char[bufsize];
-       const char last_char = 0;
-       int chr_cs, chr_act, have = 0;
-       bool execute = true;
-
-       /* Init the section parser and the character scanner. */
-       %% write init;
-
-       while ( execute ) {
-               char *p = buf + have;
-               int space = bufsize - have;
-
-               if ( space == 0 ) {
-                       /* We filled up the buffer trying to scan a token. Grow it. */
-                       bufsize = bufsize * 2;
-                       char *newbuf = new char[bufsize];
-
-                       /* Recompute p and space. */
-                       p = newbuf + have;
-                       space = bufsize - have;
-
-                       /* Patch up pointers possibly in use. */
-                       if ( chr_tokstart != 0 )
-                               chr_tokstart = newbuf + ( chr_tokstart - buf );
-                       chr_tokend = newbuf + ( chr_tokend - buf );
-
-                       /* Copy the new buffer in. */
-                       memcpy( newbuf, buf, have );
-                       delete[] buf;
-                       buf = newbuf;
-               }
-
-               input.read( p, space );
-               int len = input.gcount();
-
-               /* If we see eof then append the EOF char. */
-               if ( len == 0 ) {
-                       p[0] = last_char, len = 1;
-                       execute = false;
-               }
+       int *p = token_data;
+       int *pe = token_data + cur_token;
+       int *eof = 0;
  
-               char *pe = p + len;
-               %% write exec;
+       %%{
+               machine inline_token_scan;
+               write init;
+               write exec;
+       }%%
  
-               /* Check if we failed. */
-               if ( chr_cs == inline_scan_error ) {
-                       /* Machine failed before finding a token. I'm not yet sure if this
-                        * is reachable. */
-                       scan_error() << "scanner error" << endl;
-                       exit(1);
-               }
+       if ( tok_ts == 0 )
+               cur_token = 0;
+       else {
+               cur_token = pe - tok_ts;
+               int ts_offset = tok_ts - token_data;
+               memmove( token_data, token_data+ts_offset, cur_token*sizeof(token_data[0]) );
+               memmove( token_strings, token_strings+ts_offset, cur_token*sizeof(token_strings[0]) );
+               memmove( token_lens, token_lens+ts_offset, cur_token*sizeof(token_lens[0]) );
+       }
+}
  
-               /* Decide if we need to preserve anything. */
-               char *preserve = chr_tokstart;
+void Scanner::directToParser( Parser *toParser, const char *tokFileName, int tokLine, 
+               int tokColumn, int type, char *tokdata, int toklen )
+{
+       InputLoc loc;
+
+       #ifdef LOG_TOKENS
+       cerr << "scanner:" << tokLine << ":" << tokColumn << 
+                       ": sending token to the parser " << Parser_lelNames[type];
+       cerr << " " << toklen;
+       if ( tokdata != 0 )
+               cerr << " " << tokdata;
+       cerr << endl;
+       #endif
+
+       loc.fileName = tokFileName;
+       loc.line = tokLine;
+       loc.col = tokColumn;
+
+       toParser->token( loc, type, tokdata, toklen );
+}
  
-               /* Now set up the prefix. */
-               if ( preserve == 0 )
-                       have = 0;
-               else {
-                       /* There is data that needs to be shifted over. */
-                       have = pe - preserve;
-                       memmove( buf, preserve, have );
-                       unsigned int shiftback = preserve - buf;
-                       if ( chr_tokstart != 0 )
-                               chr_tokstart -= shiftback;
-                       chr_tokend -= shiftback;
+void Scanner::importToken( int token, char *start, char *end )
+{
+       if ( cur_token == max_tokens )
+               flushImport();
  
-                       preserve = buf;
-               }
+       token_data[cur_token] = token;
+       if ( start == 0 ) {
+               token_strings[cur_token] = 0;
+               token_lens[cur_token] = 0;
         }
-
-       delete[] buf;
+       else {
+               int toklen = end-start;
+               token_lens[cur_token] = toklen;
+               token_strings[cur_token] = new char[toklen+1];
+               memcpy( token_strings[cur_token], start, toklen );
+               token_strings[cur_token][toklen] = 0;
+       }
+       cur_token++;
  }
  
-ostream &ImportScanner::scan_error()
+void Scanner::pass( int token, char *start, char *end )
  {
-       /* Maintain the error count. */
-       gblErrorCount += 1;
-       cerr << fileName << ":" << line << ":" << column << ": ";
-       return cerr;
+       if ( importMachines )
+               importToken( token, start, end );
+       pass();
  }
  
+void Scanner::pass()
+{
+       updateCol();
  
-/* 
- * The Ragel Scanner
- */
+       /* If no errors and we are at the bottom of the include stack (the
+        * source file listed on the command line) then write out the data. */
+       if ( includeDepth == 0 && machineSpec == 0 && machineName == 0 )
+               id.inputItems.tail->data.write( ts, te-ts );
+}
  
-enum InlineBlockType
-{
-       CurlyDelimited,
-       SemiTerminated
-};
+/*
+ * The scanner for processing sections, includes, imports, etc.
+ */
  
  %%{
         machine section_parse;
@@ -240,7 +228,8 @@ bool Scanner::active()
                 return false;
  
         if ( parser == 0 && ! parserExistsError ) {
-               scan_error() << "there is no previous specification name" << endl;
+               scan_error() << "this specification has no name, nor does any previous"
+                       " specification" << endl;
                 parserExistsError = true;
         }
  
@@ -254,15 +243,17 @@ ostream &Scanner::scan_error()
  {
         /* Maintain the error count. */
         gblErrorCount += 1;
-       cerr << fileName << ":" << line << ":" << column << ": ";
+       cerr << makeInputLoc( fileName, line, column ) << ": ";
         return cerr;
  }
  
-bool Scanner::recursiveInclude( char *inclFileName, char *inclSectionName )
+/* An approximate check for duplicate includes. Due to aliasing of files it's
+ * possible for duplicates to creep in. */
+bool Scanner::duplicateInclude( char *inclFileName, char *inclSectionName )
  {
-       for ( IncludeStack::Iter si = includeStack; si.lte(); si++ ) {
-               if ( strcmp( si->fileName, inclFileName ) == 0 &&
-                               strcmp( si->sectionName, inclSectionName ) == 0 )
+       for ( IncludeHistory::Iter hi = parser->includeHistory; hi.lte(); hi++ ) {
+               if ( strcmp( hi->fileName, inclFileName ) == 0 &&
+                               strcmp( hi->sectionName, inclSectionName ) == 0 )
                 {
                         return true;
                 }
@@ -274,32 +265,118 @@ void Scanner::updateCol()
  {
         char *from = lastnl;
         if ( from == 0 )
-               from = tokstart;
-       //cerr << "adding " << tokend - from << " to column" << endl;
-       column += tokend - from;
+               from = ts;
+       //cerr << "adding " << te - from << " to column" << endl;
+       column += te - from;
         lastnl = 0;
  }
  
-void Scanner::token( int type, char c )
+void Scanner::handleMachine()
  {
-       token( type, &c, &c + 1 );
+       /* Assign a name to the machine. */
+       char *machine = word;
+
+       if ( !importMachines && inclSectionTarg == 0 ) {
+               ignoreSection = false;
+
+               ParserDictEl *pdEl = id.parserDict.find( machine );
+               if ( pdEl == 0 ) {
+                       pdEl = new ParserDictEl( machine );
+                       pdEl->value = new Parser( fileName, machine, sectionLoc );
+                       pdEl->value->init();
+                       id.parserDict.insert( pdEl );
+                       id.parserList.append( pdEl->value );
+               }
+
+               parser = pdEl->value;
+       }
+       else if ( !importMachines && strcmp( inclSectionTarg, machine ) == 0 ) {
+               /* found include target */
+               ignoreSection = false;
+               parser = inclToParser;
+       }
+       else {
+               /* ignoring section */
+               ignoreSection = true;
+               parser = 0;
+       }
  }
  
-void Scanner::token( int type )
+void Scanner::handleInclude()
  {
-       token( type, 0, 0 );
+       if ( active() ) {
+               char *inclSectionName = word;
+               char **includeChecks = 0;
+
+               /* Implement defaults for the input file and section name. */
+               if ( inclSectionName == 0 )
+                       inclSectionName = parser->sectionName;
+
+               if ( lit != 0 )
+                       includeChecks = makeIncludePathChecks( fileName, lit, lit_len );
+               else {
+                       char *test = new char[strlen(fileName)+1];
+                       strcpy( test, fileName );
+
+                       includeChecks = new char*[2];
+
+                       includeChecks[0] = test;
+                       includeChecks[1] = 0;
+               }
+
+               long found = 0;
+               ifstream *inFile = tryOpenInclude( includeChecks, found );
+               if ( inFile == 0 ) {
+                       scan_error() << "include: failed to locate file" << endl;
+                       char **tried = includeChecks;
+                       while ( *tried != 0 )
+                               scan_error() << "include: attempted: \"" << *tried++ << '\"' << endl;
+               }
+               else {
+                       /* Don't include anything that's already been included. */
+                       if ( !duplicateInclude( includeChecks[found], inclSectionName ) ) {
+                               parser->includeHistory.append( IncludeHistoryItem( 
+                                               includeChecks[found], inclSectionName ) );
+
+                               Scanner scanner( id, includeChecks[found], *inFile, parser,
+                                               inclSectionName, includeDepth+1, false );
+                               scanner.do_scan( );
+                               delete inFile;
+                       }
+               }
+       }
+}
+
+void Scanner::handleImport()
+{
+       if ( active() ) {
+               char **importChecks = makeIncludePathChecks( fileName, lit, lit_len );
+
+               /* Open the input file for reading. */
+               long found = 0;
+               ifstream *inFile = tryOpenInclude( importChecks, found );
+               if ( inFile == 0 ) {
+                       scan_error() << "import: could not open import file " <<
+                                       "for reading" << endl;
+                       char **tried = importChecks;
+                       while ( *tried != 0 )
+                               scan_error() << "import: attempted: \"" << *tried++ << '\"' << endl;
+               }
+
+               Scanner scanner( id, importChecks[found], *inFile, parser,
+                               0, includeDepth+1, true );
+               scanner.do_scan( );
+               scanner.importToken( 0, 0, 0 );
+               scanner.flushImport();
+               delete inFile;
+       }
  }
  
  %%{
         machine section_parse;
  
-       # This relies on the the kelbt implementation and the order
-       # that tokens are declared.
-       KW_Machine = 128;
-       KW_Include = 129;
-       KW_Write = 130;
-       TK_Word = 131;
-       TK_Literal = 132;
+       # Need the defines representing tokens.
+       import "rlparse.h"; 
  
         action clear_words { word = lit = 0; word_len = lit_len = 0; }
         action store_word { word = tokdata; word_len = toklen; }
@@ -307,82 +384,17 @@ void Scanner::token( int type )
  
         action mach_err { scan_error() << "bad machine statement" << endl; }
         action incl_err { scan_error() << "bad include statement" << endl; }
+       action import_err { scan_error() << "bad import statement" << endl; }
         action write_err { scan_error() << "bad write statement" << endl; }
  
-       action handle_machine
-       {
-               /* Assign a name to the machine. */
-               char *machine = word;
-
-               if ( inclSectionTarg == 0 ) {
-                       ignoreSection = false;
-
-                       ParserDictEl *pdEl = parserDict.find( machine );
-                       if ( pdEl == 0 ) {
-                               pdEl = new ParserDictEl( machine );
-                               pdEl->value = new Parser( fileName, machine, sectionLoc );
-                               pdEl->value->init();
-                               parserDict.insert( pdEl );
-                       }
-
-                       parser = pdEl->value;
-               }
-               else if ( strcmp( inclSectionTarg, machine ) == 0 ) {
-                       /* found include target */
-                       ignoreSection = false;
-                       parser = inclToParser;
-               }
-               else {
-                       /* ignoring section */
-                       ignoreSection = true;
-                       parser = 0;
-               }
-       }
+       action handle_machine { handleMachine(); }
+       action handle_include { handleInclude(); }
+       action handle_import { handleImport(); }
  
         machine_stmt =
                 ( KW_Machine TK_Word @store_word ';' ) @handle_machine
                 <>err mach_err <>eof mach_err;
  
-       action handle_include
-       {
-               if ( active() ) {
-                       char *inclSectionName = word;
-                       char *inclFileName = 0;
-
-                       /* Implement defaults for the input file and section name. */
-                       if ( inclSectionName == 0 )
-                               inclSectionName = parser->sectionName;
-
-                       if ( lit != 0 ) 
-                               inclFileName = prepareFileName( lit, lit_len );
-                       else
-                               inclFileName = fileName;
-
-                       /* Check for a recursive include structure. Add the current file/section
-                        * name then check if what we are including is already in the stack. */
-                       includeStack.append( IncludeStackItem( fileName, parser->sectionName ) );
-
-                       if ( recursiveInclude( inclFileName, inclSectionName ) )
-                               scan_error() << "include: this is a recursive include operation" << endl;
-                       else {
-                               /* Open the input file for reading. */
-                               ifstream *inFile = new ifstream( inclFileName );
-                               if ( ! inFile->is_open() ) {
-                                       scan_error() << "include: could not open " << 
-                                                       inclFileName << " for reading" << endl;
-                               }
-
-                               Scanner scanner( inclFileName, *inFile, output, parser,
-                                               inclSectionName, includeDepth+1 );
-                               scanner.do_scan( );
-                               delete inFile;
-                       }
-
-                       /* Remove the last element (len-1) */
-                       includeStack.remove( -1 );
-               }
-       }
-
         include_names = (
                 TK_Word @store_word ( TK_Literal @store_lit )? |
                 TK_Literal @store_lit
@@ -392,27 +404,33 @@ void Scanner::token( int type )
                 ( KW_Include include_names ';' ) @handle_include
                 <>err incl_err <>eof incl_err;
  
+       import_stmt =
+               ( KW_Import TK_Literal @store_lit ';' ) @handle_import
+               <>err import_err <>eof import_err;
+
         action write_command
         {
                 if ( active() && machineSpec == 0 && machineName == 0 ) {
-                       output << "<write"
-                                       " def_name=\"" << parser->sectionName << "\""
-                                       " line=\"" << line << "\""
-                                       " col=\"" << column << "\""
-                                       ">";
+                       InputItem *inputItem = new InputItem;
+                       inputItem->type = InputItem::Write;
+                       inputItem->loc.line = line;
+                       inputItem->loc.col = column;
+                       inputItem->name = parser->sectionName;
+                       inputItem->pd = parser->pd;
+                       id.inputItems.append( inputItem );
                 }
         }
  
         action write_arg
         {
                 if ( active() && machineSpec == 0 && machineName == 0 )
-                       output << "<arg>" << tokdata << "</arg>";
+                       id.inputItems.tail->writeArgs.append( strdup(tokdata) );
         }
  
         action write_close
         {
                 if ( active() && machineSpec == 0 && machineName == 0 )
-                       output << "</write>\n";
+                       id.inputItems.tail->writeArgs.append( 0 );
         }
  
         write_stmt =
@@ -423,44 +441,37 @@ void Scanner::token( int type )
         action handle_token
         {
                 /* Send the token off to the parser. */
-               if ( active() ) {
-                       InputLoc loc;
-
-                       #if 0
-                       cerr << "scanner:" << line << ":" << column << 
-                                       ": sending token to the parser " << Parser_lelNames[*p];
-                       cerr << " " << toklen;
-                       if ( tokdata != 0 )
-                               cerr << " " << tokdata;
-                       cerr << endl;
-                       #endif
-
-                       loc.fileName = fileName;
-                       loc.line = line;
-                       loc.col = column;
-
-                       parser->token( loc, type, tokdata, toklen );
-               }
+               if ( active() )
+                       directToParser( parser, fileName, line, column, type, tokdata, toklen );
         }
  
         # Catch everything else.
-       everything_else = ^( KW_Machine | KW_Include | KW_Write ) @handle_token;
+       everything_else = 
+               ^( KW_Machine | KW_Include | KW_Import | KW_Write ) @handle_token;
  
         main := ( 
                 machine_stmt |
                 include_stmt |
+               import_stmt |
                 write_stmt |
                 everything_else
         )*;
  }%%
  
+void Scanner::token( int type, char c )
+{
+       token( type, &c, &c + 1 );
+}
+
+void Scanner::token( int type )
+{
+       token( type, 0, 0 );
+}
+
  void Scanner::token( int type, char *start, char *end )
  {
         char *tokdata = 0;
         int toklen = 0;
-       int *p = &type;
-       int *pe = &type + 1;
-
         if ( start != 0 ) {
                 toklen = end-start;
                 tokdata = new char[toklen+1];
@@ -468,6 +479,21 @@ void Scanner::token( int type, char *start, char *end )
                 tokdata[toklen] = 0;
         }
  
+       processToken( type, tokdata, toklen );
+}
+
+void Scanner::processToken( int type, char *tokdata, int toklen )
+{
+       int *p, *pe, *eof;
+
+       if ( type < 0 )
+               p = pe = eof = 0;
+       else {
+               p = &type;
+               pe = &type + 1;
+               eof = 0;
+       }
+
         %%{
                 machine section_parse;
                 write exec;
@@ -484,30 +510,22 @@ void Scanner::startSection( )
  {
         parserExistsError = false;
  
-       if ( includeDepth == 0 ) {
-               if ( machineSpec == 0 && machineName == 0 )
-                       output << "</host>\n";
-       }
-
         sectionLoc.fileName = fileName;
         sectionLoc.line = line;
-       sectionLoc.col = 0;
+       sectionLoc.col = column;
  }
  
  void Scanner::endSection( )
  {
         /* Execute the eof actions for the section parser. */
-       %%{
-               machine section_parse;
-               write eof;
-       }%%
+       processToken( -1, 0, 0 );
  
         /* Close off the section with the parser. */
         if ( active() ) {
                 InputLoc loc;
                 loc.fileName = fileName;
                 loc.line = line;
-               loc.col = 0;
+               loc.col = column;
  
                 parser->token( loc, TK_EndSection, 0, 0 );
         }
@@ -516,9 +534,87 @@ void Scanner::endSection( )
                 if ( machineSpec == 0 && machineName == 0 ) {
                         /* The end section may include a newline on the end, so
                          * we use the last line, which will count the newline. */
-                       output << "<host line=\"" << line << "\">";
+                       InputItem *inputItem = new InputItem;
+                       inputItem->type = InputItem::HostData;
+                       inputItem->loc.line = line;
+                       inputItem->loc.col = column;
+                       id.inputItems.append( inputItem );
+               }
+       }
+}
+
+bool isAbsolutePath( const char *path )
+{
+#ifdef _WIN32
+       return isalpha( path[0] ) && path[1] == ':' && path[2] == '\\';
+#else
+       return path[0] == '/';
+#endif
+}
+
+char **Scanner::makeIncludePathChecks( const char *thisFileName, 
+               const char *fileName, int fnlen )
+{
+       char **checks = new char*[2];
+       long nextCheck = 0;
+
+       bool caseInsensitive = false;
+       long length = 0;
+       char *data = prepareLitString( InputLoc(), fileName, fnlen, 
+                       length, caseInsensitive );
+
+       /* Absolute path? */
+       if ( isAbsolutePath( data ) )
+               checks[nextCheck++] = data;
+       else {
+               /* Search from the the location of the current file. */
+               const char *lastSlash = strrchr( thisFileName, PATH_SEP );
+               if ( lastSlash == 0 )
+                       checks[nextCheck++] = data;
+               else {
+                       long givenPathLen = (lastSlash - thisFileName) + 1;
+                       long checklen = givenPathLen + length;
+                       char *check = new char[checklen+1];
+                       memcpy( check, thisFileName, givenPathLen );
+                       memcpy( check+givenPathLen, data, length );
+                       check[checklen] = 0;
+                       checks[nextCheck++] = check;
+               }
+
+               /* Search from the include paths given on the command line. */
+               for ( ArgsVector::Iter incp = id.includePaths; incp.lte(); incp++ ) {
+                       long pathLen = strlen( *incp );
+                       long checkLen = pathLen + 1 + length;
+                       char *check = new char[checkLen+1];
+                       memcpy( check, *incp, pathLen );
+                       check[pathLen] = PATH_SEP;
+                       memcpy( check+pathLen+1, data, length );
+                       check[checkLen] = 0;
+                       checks[nextCheck++] = check;
+               }
+       }
+
+       checks[nextCheck] = 0;
+       return checks;
+}
+
+ifstream *Scanner::tryOpenInclude( char **pathChecks, long &found )
+{
+       char **check = pathChecks;
+       ifstream *inFile = new ifstream;
+       
+       while ( *check != 0 ) {
+               inFile->open( *check );
+               if ( inFile->is_open() ) {
+                       found = check - pathChecks;
+                       return inFile;
                 }
+               check += 1;
         }
+
+       found = -1;
+       delete inFile;
+       return 0;
  }
  
  %%{
@@ -547,16 +643,120 @@ void Scanner::endSection( )
  
         c_cpp_comment = c_comment | cpp_comment;
  
-       # These literal forms are common to C-like host code and ragel.
+       ruby_comment = '#' [^\n]* NL;
+
+       # These literal forms are common to host code and ragel.
         s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'";
         d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"';
+       host_re_literal = '/' ([^/\\] | NL | '\\' (any | NL))* '/';
  
         whitespace = [ \t] | NL;
         pound_comment = '#' [^\n]* NL;
  
-       # An inline block of code. This is specified as a scanned, but is sent to
-       # the parser as one long block. The inline_block pointer is used to handle
-       # the preservation of the data.
+       # An inline block of code for Ruby.
+       inline_code_ruby := |*
+               # Inline expression keywords.
+               "fpc" => { token( KW_PChar ); };
+               "fc" => { token( KW_Char ); };
+               "fcurs" => { token( KW_CurState ); };
+               "ftargs" => { token( KW_TargState ); };
+               "fentry" => { 
+                       whitespaceOn = false; 
+                       token( KW_Entry );
+               };
+
+               # Inline statement keywords.
+               "fhold" => { 
+                       whitespaceOn = false; 
+                       token( KW_Hold );
+               };
+               "fexec" => { token( KW_Exec, 0, 0 ); };
+               "fgoto" => { 
+                       whitespaceOn = false; 
+                       token( KW_Goto );
+               };
+               "fnext" => { 
+                       whitespaceOn = false; 
+                       token( KW_Next );
+               };
+               "fcall" => { 
+                       whitespaceOn = false; 
+                       token( KW_Call );
+               };
+               "fret" => { 
+                       whitespaceOn = false; 
+                       token( KW_Ret );
+               };
+               "fbreak" => { 
+                       whitespaceOn = false; 
+                       token( KW_Break );
+               };
+
+               ident => { token( TK_Word, ts, te ); };
+
+               number => { token( TK_UInt, ts, te ); };
+               hex_number => { token( TK_Hex, ts, te ); };
+
+               ( s_literal | d_literal | host_re_literal ) 
+                       => { token( IL_Literal, ts, te ); };
+
+               whitespace+ => { 
+                       if ( whitespaceOn ) 
+                               token( IL_WhiteSpace, ts, te );
+               };
+
+               ruby_comment => { token( IL_Comment, ts, te ); };
+
+               "::" => { token( TK_NameSep, ts, te ); };
+
+               # Some symbols need to go to the parser as with their cardinal value as
+               # the token type (as opposed to being sent as anonymous symbols)
+               # because they are part of the sequences which we interpret. The * ) ;
+               # symbols cause whitespace parsing to come back on. This gets turned
+               # off by some keywords.
+
+               ";" => {
+                       whitespaceOn = true;
+                       token( *ts, ts, te );
+                       if ( inlineBlockType == SemiTerminated )
+                               fret;
+               };
+
+               [*)] => { 
+                       whitespaceOn = true;
+                       token( *ts, ts, te );
+               };
+
+               [,(] => { token( *ts, ts, te ); };
+
+               '{' => { 
+                       token( IL_Symbol, ts, te );
+                       curly_count += 1; 
+               };
+
+               '}' => { 
+                       if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) {
+                               /* Inline code block ends. */
+                               token( '}' );
+                               fret;
+                       }
+                       else {
+                               /* Either a semi terminated inline block or only the closing
+                                * brace of some inner scope, not the block's closing brace. */
+                               token( IL_Symbol, ts, te );
+                       }
+               };
+
+               EOF => {
+                       scan_error() << "unterminated code block" << endl;
+               };
+
+               # Send every other character as a symbol.
+               any => { token( IL_Symbol, ts, te ); };
+       *|;
+
+
+       # An inline block of code for languages other than Ruby.
         inline_code := |*
                 # Inline expression keywords.
                 "fpc" => { token( KW_PChar ); };
@@ -595,21 +795,22 @@ void Scanner::endSection( )
                         token( KW_Break );
                 };
  
-               ident => { token( TK_Word, tokstart, tokend ); };
+               ident => { token( TK_Word, ts, te ); };
  
-               number => { token( TK_UInt, tokstart, tokend ); };
-               hex_number => { token( TK_Hex, tokstart, tokend ); };
+               number => { token( TK_UInt, ts, te ); };
+               hex_number => { token( TK_Hex, ts, te ); };
  
                 ( s_literal | d_literal ) 
-                       => { token( IL_Literal, tokstart, tokend ); };
+                       => { token( IL_Literal, ts, te ); };
  
                 whitespace+ => { 
                         if ( whitespaceOn ) 
-                               token( IL_WhiteSpace, tokstart, tokend );
+                               token( IL_WhiteSpace, ts, te );
                 };
-               c_cpp_comment => { token( IL_Comment, tokstart, tokend ); };
  
-               "::" => { token( TK_NameSep, tokstart, tokend ); };
+               c_cpp_comment => { token( IL_Comment, ts, te ); };
+
+               "::" => { token( TK_NameSep, ts, te ); };
  
                 # Some symbols need to go to the parser as with their cardinal value as
                 # the token type (as opposed to being sent as anonymous symbols)
@@ -619,20 +820,20 @@ void Scanner::endSection( )
  
                 ";" => {
                         whitespaceOn = true;
-                       token( *tokstart, tokstart, tokend );
+                       token( *ts, ts, te );
                         if ( inlineBlockType == SemiTerminated )
-                               fgoto parser_def;
+                               fret;
                 };
  
                 [*)] => { 
                         whitespaceOn = true;
-                       token( *tokstart, tokstart, tokend );
+                       token( *ts, ts, te );
                 };
  
-               [,(] => { token( *tokstart, tokstart, tokend ); };
+               [,(] => { token( *ts, ts, te ); };
  
                 '{' => { 
-                       token( IL_Symbol, tokstart, tokend );
+                       token( IL_Symbol, ts, te );
                         curly_count += 1; 
                 };
  
@@ -640,12 +841,12 @@ void Scanner::endSection( )
                         if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) {
                                 /* Inline code block ends. */
                                 token( '}' );
-                               fgoto parser_def;
+                               fret;
                         }
                         else {
                                 /* Either a semi terminated inline block or only the closing
                                  * brace of some inner scope, not the block's closing brace. */
-                               token( IL_Symbol, tokstart, tokend );
+                               token( IL_Symbol, ts, te );
                         }
                 };
  
@@ -654,7 +855,7 @@ void Scanner::endSection( )
                 };
  
                 # Send every other character as a symbol.
-               any => { token( IL_Symbol, tokstart, tokend ); };
+               any => { token( IL_Symbol, ts, te ); };
         *|;
  
         or_literal := |*
@@ -668,7 +869,7 @@ void Scanner::endSection( )
                 '\\f' => { token( RE_Char, '\f' ); };
                 '\\r' => { token( RE_Char, '\r' ); };
                 '\\\n' => { updateCol(); };
-               '\\' any => { token( RE_Char, tokstart+1, tokend ); };
+               '\\' any => { token( RE_Char, ts+1, te ); };
  
                 # Range dash in an OR expression.
                 '-' => { token( RE_Dash, 0, 0 ); };
@@ -681,11 +882,11 @@ void Scanner::endSection( )
                 };
  
                 # Characters in an OR expression.
-               [^\]] => { token( RE_Char, tokstart, tokend ); };
+               [^\]] => { token( RE_Char, ts, te ); };
  
         *|;
  
-       re_literal := |*
+       ragel_re_literal := |*
                 # Escape sequences in regular expressions.
                 '\\0' => { token( RE_Char, '\0' ); };
                 '\\a' => { token( RE_Char, '\a' ); };
@@ -696,11 +897,11 @@ void Scanner::endSection( )
                 '\\f' => { token( RE_Char, '\f' ); };
                 '\\r' => { token( RE_Char, '\r' ); };
                 '\\\n' => { updateCol(); };
-               '\\' any => { token( RE_Char, tokstart+1, tokend ); };
+               '\\' any => { token( RE_Char, ts+1, te ); };
  
                 # Terminate an OR expression.
                 '/' [i]? => { 
-                       token( RE_Slash, tokstart, tokend ); 
+                       token( RE_Slash, ts, te ); 
                         fgoto parser_def;
                 };
  
@@ -716,12 +917,12 @@ void Scanner::endSection( )
                 };
  
                 # Characters in an OR expression.
-               [^\/] => { token( RE_Char, tokstart, tokend ); };
+               [^\/] => { token( RE_Char, ts, te ); };
         *|;
  
         # We need a separate token space here to avoid the ragel keywords.
         write_statement := |*
-               ident => { token( TK_Word, tokstart, tokend ); } ;
+               ident => { token( TK_Word, ts, te ); } ;
                 [ \t\n]+ => { updateCol(); };
                 ';' => { token( ';' ); fgoto parser_def; };
  
@@ -732,14 +933,18 @@ void Scanner::endSection( )
  
         # Parser definitions. 
         parser_def := |*
+               'length_cond' => { token( KW_Length ); };
                 'machine' => { token( KW_Machine ); };
                 'include' => { token( KW_Include ); };
+               'import' => { token( KW_Import ); };
                 'write' => { 
                         token( KW_Write );
                         fgoto write_statement;
                 };
                 'action' => { token( KW_Action ); };
                 'alphtype' => { token( KW_AlphType ); };
+               'prepush' => { token( KW_PrePush ); };
+               'postpop' => { token( KW_PostPop ); };
  
                 # FIXME: Enable this post 5.17.
                 # 'range' => { token( KW_Range ); };
@@ -747,19 +952,30 @@ void Scanner::endSection( )
                 'getkey' => { 
                         token( KW_GetKey );
                         inlineBlockType = SemiTerminated;
-                       fgoto inline_code;
+                       if ( hostLang->lang == HostLang::Ruby )
+                               fcall inline_code_ruby;
+                       else
+                               fcall inline_code;
                 };
                 'access' => { 
                         token( KW_Access );
                         inlineBlockType = SemiTerminated;
-                       fgoto inline_code;
+                       if ( hostLang->lang == HostLang::Ruby )
+                               fcall inline_code_ruby;
+                       else
+                               fcall inline_code;
                 };
                 'variable' => { 
                         token( KW_Variable );
                         inlineBlockType = SemiTerminated;
-                       fgoto inline_code;
+                       if ( hostLang->lang == HostLang::Ruby )
+                               fcall inline_code_ruby;
+                       else
+                               fcall inline_code;
                 };
                 'when' => { token( KW_When ); };
+               'inwhen' => { token( KW_InWhen ); };
+               'outwhen' => { token( KW_OutWhen ); };
                 'eof' => { token( KW_Eof ); };
                 'err' => { token( KW_Err ); };
                 'lerr' => { token( KW_Lerr ); };
@@ -768,20 +984,20 @@ void Scanner::endSection( )
                 'export' => { token( KW_Export ); };
  
                 # Identifiers.
-               ident => { token( TK_Word, tokstart, tokend ); } ;
+               ident => { token( TK_Word, ts, te ); } ;
  
                 # Numbers
-               number => { token( TK_UInt, tokstart, tokend ); };
-               hex_number => { token( TK_Hex, tokstart, tokend ); };
+               number => { token( TK_UInt, ts, te ); };
+               hex_number => { token( TK_Hex, ts, te ); };
  
                 # Literals, with optionals.
                 ( s_literal | d_literal ) [i]? 
-                       => { token( TK_Literal, tokstart, tokend ); };
+                       => { token( TK_Literal, ts, te ); };
  
                 '[' => { token( RE_SqOpen ); fcall or_literal; };
                 '[^' => { token( RE_SqOpenNeg ); fcall or_literal; };
  
-               '/' => { token( RE_Slash ); fgoto re_literal; };
+               '/' => { token( RE_Slash ); fgoto ragel_re_literal; };
  
                 # Ignore.
                 pound_comment => { updateCol(); };
@@ -850,12 +1066,12 @@ void Scanner::endSection( )
                 "|*" => { token( TK_BarStar ); };
  
                 # Separater for name references.
-               "::" => { token( TK_NameSep, tokstart, tokend ); };
+               "::" => { token( TK_NameSep, ts, te ); };
  
                 '}%%' => { 
                         updateCol();
                         endSection();
-                       fgoto main;
+                       fret;
                 };
  
                 [ \t\r]+ => { updateCol(); };
@@ -865,7 +1081,7 @@ void Scanner::endSection( )
                         updateCol();
                         if ( singleLineSpec ) {
                                 endSection();
-                               fgoto main;
+                               fret;
                         }
                 };
  
@@ -876,7 +1092,10 @@ void Scanner::endSection( )
                                 token( '{' );
                                 curly_count = 1; 
                                 inlineBlockType = CurlyDelimited;
-                               fgoto inline_code;
+                               if ( hostLang->lang == HostLang::Ruby )
+                                       fcall inline_code_ruby;
+                               else
+                                       fcall inline_code;
                         }
                 };
  
@@ -884,41 +1103,58 @@ void Scanner::endSection( )
                         scan_error() << "unterminated ragel section" << endl;
                 };
  
-               any => { token( *tokstart ); } ;
+               any => { token( *ts ); } ;
         *|;
  
-       action pass {
-               updateCol();
+       # Outside code scanner. These tokens get passed through.
+       main_ruby := |*
+               ident => { pass( IMP_Word, ts, te ); };
+               number => { pass( IMP_UInt, ts, te ); };
+               ruby_comment => { pass(); };
+               ( s_literal | d_literal | host_re_literal ) 
+                       => { pass( IMP_Literal, ts, te ); };
  
-               /* If no errors and we are at the bottom of the include stack (the
-                * source file listed on the command line) then write out the data. */
-               if ( includeDepth == 0 && machineSpec == 0 && machineName == 0 )
-                       xmlEscapeHost( output, tokstart, tokend-tokstart );
-       }
+               '%%{' => { 
+                       updateCol();
+                       singleLineSpec = false;
+                       startSection();
+                       fcall parser_def;
+               };
+               '%%' => { 
+                       updateCol();
+                       singleLineSpec = true;
+                       startSection();
+                       fcall parser_def;
+               };
+               whitespace+ => { pass(); };
+               EOF;
+               any => { pass( *ts, 0, 0 ); };
+       *|;
  
         # Outside code scanner. These tokens get passed through.
         main := |*
-               ident => pass;
-               number => pass;
-               c_cpp_comment => pass;
-               s_literal | d_literal => pass;
+               'define' => { pass( IMP_Define, 0, 0 ); };
+               ident => { pass( IMP_Word, ts, te ); };
+               number => { pass( IMP_UInt, ts, te ); };
+               c_cpp_comment => { pass(); };
+               ( s_literal | d_literal ) => { pass( IMP_Literal, ts, te ); };
+
                 '%%{' => { 
                         updateCol();
                         singleLineSpec = false;
                         startSection();
-                       fgoto parser_def;
+                       fcall parser_def;
                 };
                 '%%' => { 
                         updateCol();
                         singleLineSpec = true;
                         startSection();
-                       fgoto parser_def;
+                       fcall parser_def;
                 };
-               whitespace+ => pass;
+               whitespace+ => { pass(); };
                 EOF;
-               any => pass;
+               any => { pass( *ts, 0, 0 ); };
         *|;
-
  }%%
  
  %% write data;
@@ -927,9 +1163,13 @@ void Scanner::do_scan()
  {
         int bufsize = 8;
         char *buf = new char[bufsize];
-       const char last_char = 0;
         int cs, act, have = 0;
-       int top, stack[1];
+       int top;
+
+       /* The stack is two deep, one level for going into ragel defs from the main
+        * machines which process outside code, and another for going into or literals
+        * from either a ragel spec, or a regular expression. */
+       int stack[2];
         int curly_count = 0;
         bool execute = true;
         bool singleLineSpec = false;
@@ -939,6 +1179,14 @@ void Scanner::do_scan()
         init();
         %% write init;
  
+       /* Set up the start state. FIXME: After 5.20 is released the nocs write
+        * init option should be used, the main machine eliminated and this statement moved
+        * above the write init. */
+       if ( hostLang->lang == HostLang::Ruby )
+               cs = rlscan_en_main_ruby;
+       else
+               cs = rlscan_en_main;
+       
         while ( execute ) {
                 char *p = buf + have;
                 int space = bufsize - have;
@@ -953,9 +1201,9 @@ void Scanner::do_scan()
                         space = bufsize - have;
  
                         /* Patch up pointers possibly in use. */
-                       if ( tokstart != 0 )
-                               tokstart = newbuf + ( tokstart - buf );
-                       tokend = newbuf + ( tokend - buf );
+                       if ( ts != 0 )
+                               ts = newbuf + ( ts - buf );
+                       te = newbuf + ( te - buf );
  
                         /* Copy the new buffer in. */
                         memcpy( newbuf, buf, have );
@@ -965,14 +1213,15 @@ void Scanner::do_scan()
  
                 input.read( p, space );
                 int len = input.gcount();
+               char *pe = p + len;
  
-               /* If we see eof then append the EOF char. */
+               /* If we see eof then append the eof var. */
+               char *eof = 0;
                 if ( len == 0 ) {
-                       p[0] = last_char, len = 1;
+                       eof = pe;
                         execute = false;
                 }
  
-               char *pe = p + len;
                 %% write exec;
  
                 /* Check if we failed. */
@@ -984,7 +1233,7 @@ void Scanner::do_scan()
                 }
  
                 /* Decide if we need to preserve anything. */
-               char *preserve = tokstart;
+               char *preserve = ts;
  
                 /* Now set up the prefix. */
                 if ( preserve == 0 )
@@ -994,9 +1243,9 @@ void Scanner::do_scan()
                         have = pe - preserve;
                         memmove( buf, preserve, have );
                         unsigned int shiftback = preserve - buf;
-                       if ( tokstart != 0 )
-                               tokstart -= shiftback;
-                       tokend -= shiftback;
+                       if ( ts != 0 )
+                               ts -= shiftback;
+                       te -= shiftback;
  
                         preserve = buf;
                 }
@@ -1004,7 +1253,3 @@ void Scanner::do_scan()
  
         delete[] buf;
  }
-
-void scan( char *fileName, istream &input, ostream &output )
-{
-}