X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;ds=sidebyside;f=ragel%2Frlscan.rl;h=3c325c31e193bb3ce3e00b933a6f36c714d1310b;hb=9f3c2baa91083bb5b33b4f3ec07f58d900157e32;hp=e8154a15971252dc6ec134f60574a36d6572afda;hpb=58fb5a6af378fca241c794d6da5d4090835e94ba;p=external%2Fragel.git

diff --git a/ragel/rlscan.rl b/ragel/rlscan.rl
index e8154a1..3c325c3 100644
--- a/ragel/rlscan.rl
+++ b/ragel/rlscan.rl
@@ -1,5 +1,5 @@
 /*
- *  Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca>
+ *  Copyright 2006-2007 Adrian Thurston <thurston@complang.org>
  */
 
 /*  This file is part of Ragel.
@@ -24,11 +24,10 @@
 #include <string.h>
 
 #include "ragel.h"
-#include "rlparse.h"
-#include "parsedata.h"
-#include "avltree.h"
-#include "vector.h"
+#include "rlscan.h"
+#include "inputdata.h"
 
+//#define LOG_TOKENS
 
 using std::ifstream;
 using std::istream;
@@ -37,96 +36,179 @@ using std::cout;
 using std::cerr;
 using std::endl;
 
-/* This is used for tracking the current stack of include file/machine pairs. It is
- * is used to detect and recursive include structure. */
-struct IncludeStackItem
-{
-	IncludeStackItem( char *fileName, char *sectionName )
-		: fileName(fileName), sectionName(sectionName) {}
-
-	char *fileName;
-	char *sectionName;
-};
-
-typedef Vector<IncludeStackItem> IncludeStack;
-IncludeStack includeStack;
-
 enum InlineBlockType
 {
 	CurlyDelimited,
 	SemiTerminated
 };
 
-struct Scanner
+#ifdef _WIN32
+#define PATH_SEP '\\'
+#else
+#define PATH_SEP '/'
+#endif
+
+
+/*
+ * The Scanner for Importing
+ */
+
+%%{
+	machine inline_token_scan;
+	alphtype int;
+	access tok_;
+
+	# Import scanner tokens.
+	import "rlparse.h"; 
+
+	main := |*
+		# Define of number.
+		IMP_Define IMP_Word IMP_UInt => { 
+			int base = tok_ts - token_data;
+			int nameOff = 1;
+			int numOff = 2;
+
+			directToParser( inclToParser, fileName, line, column, TK_Word, 
+					token_strings[base+nameOff], token_lens[base+nameOff] );
+			directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+			directToParser( inclToParser, fileName, line, column, TK_UInt,
+					token_strings[base+numOff], token_lens[base+numOff] );
+			directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+		};
+
+		# Assignment of number.
+		IMP_Word '=' IMP_UInt => { 
+			int base = tok_ts - token_data;
+			int nameOff = 0;
+			int numOff = 2;
+
+			directToParser( inclToParser, fileName, line, column, TK_Word, 
+					token_strings[base+nameOff], token_lens[base+nameOff] );
+			directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+			directToParser( inclToParser, fileName, line, column, TK_UInt,
+					token_strings[base+numOff], token_lens[base+numOff] );
+			directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+		};
+
+		# Define of literal.
+		IMP_Define IMP_Word IMP_Literal => { 
+			int base = tok_ts - token_data;
+			int nameOff = 1;
+			int litOff = 2;
+
+			directToParser( inclToParser, fileName, line, column, TK_Word, 
+					token_strings[base+nameOff], token_lens[base+nameOff] );
+			directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+			directToParser( inclToParser, fileName, line, column, TK_Literal,
+					token_strings[base+litOff], token_lens[base+litOff] );
+			directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+		};
+
+		# Assignment of literal.
+		IMP_Word '=' IMP_Literal => { 
+			int base = tok_ts - token_data;
+			int nameOff = 0;
+			int litOff = 2;
+
+			directToParser( inclToParser, fileName, line, column, TK_Word, 
+					token_strings[base+nameOff], token_lens[base+nameOff] );
+			directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+			directToParser( inclToParser, fileName, line, column, TK_Literal,
+					token_strings[base+litOff], token_lens[base+litOff] );
+			directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+		};
+
+		# Catch everything else.
+		any;
+	*|;
+}%%
+
+%% write data;
+
+void Scanner::flushImport()
 {
-	Scanner( char *fileName, istream &input,
-			Parser *inclToParser, char *inclSectionTarg,
-			int include_depth )
-	: 
-		fileName(fileName), input(input), 
-		inclToParser(inclToParser),
-		inclSectionTarg(inclSectionTarg),
-		include_depth(include_depth),
-		line(1), column(1), lastnl(0), 
-		parser(0), active(false), 
-		parserExistsError(false), ragelDefOpen(false),
-		whitespaceOn(true)
-		{}
-
-	bool recursiveInclude( IncludeStack &includeStack, 
-			char *inclFileName, char *inclSectionName );
-
-	char *prepareFileName( char *fileName, int len )
-	{
-		bool caseInsensitive;
-		Token tokenFnStr, tokenRes;
-		tokenFnStr.data = fileName;
-		tokenFnStr.length = len;
-		tokenFnStr.prepareLitString( tokenRes, caseInsensitive );
-		return tokenRes.data;
+	int *p = token_data;
+	int *pe = token_data + cur_token;
+	int *eof = 0;
+
+	%%{
+		machine inline_token_scan;
+		write init;
+		write exec;
+	}%%
+
+	if ( tok_ts == 0 )
+		cur_token = 0;
+	else {
+		cur_token = pe - tok_ts;
+		int ts_offset = tok_ts - token_data;
+		memmove( token_data, token_data+ts_offset, cur_token*sizeof(token_data[0]) );
+		memmove( token_strings, token_strings+ts_offset, cur_token*sizeof(token_strings[0]) );
+		memmove( token_lens, token_lens+ts_offset, cur_token*sizeof(token_lens[0]) );
+	}
+}
+
+void Scanner::directToParser( Parser *toParser, const char *tokFileName, int tokLine, 
+		int tokColumn, int type, char *tokdata, int toklen )
+{
+	InputLoc loc;
+
+	#ifdef LOG_TOKENS
+	cerr << "scanner:" << tokLine << ":" << tokColumn << 
+			": sending token to the parser " << Parser_lelNames[type];
+	cerr << " " << toklen;
+	if ( tokdata != 0 )
+		cerr << " " << tokdata;
+	cerr << endl;
+	#endif
+
+	loc.fileName = tokFileName;
+	loc.line = tokLine;
+	loc.col = tokColumn;
+
+	toParser->token( loc, type, tokdata, toklen );
+}
+
+void Scanner::importToken( int token, char *start, char *end )
+{
+	if ( cur_token == max_tokens )
+		flushImport();
+
+	token_data[cur_token] = token;
+	if ( start == 0 ) {
+		token_strings[cur_token] = 0;
+		token_lens[cur_token] = 0;
 	}
+	else {
+		int toklen = end-start;
+		token_lens[cur_token] = toklen;
+		token_strings[cur_token] = new char[toklen+1];
+		memcpy( token_strings[cur_token], start, toklen );
+		token_strings[cur_token][toklen] = 0;
+	}
+	cur_token++;
+}
 
-	void init();
-	void token( int type, char *start, char *end );
-	void token( int type, char c );
-	void token( int type );
-	void updateCol();
-	void startSection();
-	void endSection();
-	void openRagelDef();
-	void do_scan();
-	bool parserExists();
-	ostream &error();
-
-	char *fileName;
-	istream &input;
-	Parser *inclToParser;
-	char *inclSectionTarg;
-	int include_depth;
-
-	int cs;
-	int line;
-	char *word, *lit;
-	int word_len, lit_len;
-	InputLoc sectionLoc;
-	char *tokstart, *tokend;
-	int column;
-	char *lastnl;
-
-	/* Set by machine statements, these persist from section to section
-	 * allowing for unnamed sections. */
-	Parser *parser;
-	bool active;
-
-	/* This is set if ragel has already emitted an error stating that
-	 * no section name has been seen and thus no parser exists. */
-	bool parserExistsError;
-	bool ragelDefOpen;
-
-	/* This is for inline code. By default it is on. It goes off for
-	 * statements and values in inline blocks which are parsed. */
-	bool whitespaceOn;
-};
+void Scanner::pass( int token, char *start, char *end )
+{
+	if ( importMachines )
+		importToken( token, start, end );
+	pass();
+}
+
+void Scanner::pass()
+{
+	updateCol();
+
+	/* If no errors and we are at the bottom of the include stack (the
+	 * source file listed on the command line) then write out the data. */
+	if ( includeDepth == 0 && machineSpec == 0 && machineName == 0 )
+		id.inputItems.tail->data.write( ts, te-ts );
+}
+
+/*
+ * The scanner for processing sections, includes, imports, etc.
+ */
 
 %%{
 	machine section_parse;
@@ -134,38 +216,44 @@ struct Scanner
 	write data;
 }%%
 
+
 void Scanner::init( )
 {
 	%% write init;
 }
 
-bool Scanner::parserExists()
+bool Scanner::active()
 {
-	if ( parser != 0 )
-		return true;
+	if ( ignoreSection )
+		return false;
 
-	if ( ! parserExistsError ) {
-		error() << "include: there is no previous specification name" << endl;
+	if ( parser == 0 && ! parserExistsError ) {
+		scan_error() << "this specification has no name, nor does any previous"
+			" specification" << endl;
 		parserExistsError = true;
 	}
-	return false;
+
+	if ( parser == 0 )
+		return false;
+
+	return true;
 }
 
-ostream &Scanner::error()
+ostream &Scanner::scan_error()
 {
 	/* Maintain the error count. */
 	gblErrorCount += 1;
-
-	cerr << fileName << ":" << line << ":" << column << ": ";
+	cerr << makeInputLoc( fileName, line, column ) << ": ";
 	return cerr;
 }
 
-bool Scanner::recursiveInclude( IncludeStack &includeStack, 
-			char *inclFileName, char *inclSectionName )
+/* An approximate check for duplicate includes. Due to aliasing of files it's
+ * possible for duplicates to creep in. */
+bool Scanner::duplicateInclude( char *inclFileName, char *inclSectionName )
 {
-	for ( IncludeStack::Iter si = includeStack; si.lte(); si++ ) {
-		if ( strcmp( si->fileName, inclFileName ) == 0 &&
-				strcmp( si->sectionName, inclSectionName ) == 0 )
+	for ( IncludeHistory::Iter hi = parser->includeHistory; hi.lte(); hi++ ) {
+		if ( strcmp( hi->fileName, inclFileName ) == 0 &&
+				strcmp( hi->sectionName, inclSectionName ) == 0 )
 		{
 			return true;
 		}
@@ -177,115 +265,135 @@ void Scanner::updateCol()
 {
 	char *from = lastnl;
 	if ( from == 0 )
-		from = tokstart;
-	//cerr << "adding " << tokend - from << " to column" << endl;
-	column += tokend - from;
+		from = ts;
+	//cerr << "adding " << te - from << " to column" << endl;
+	column += te - from;
 	lastnl = 0;
 }
 
-void Scanner::token( int type, char c )
+void Scanner::handleMachine()
 {
-	token( type, &c, &c + 1 );
-}
+	/* Assign a name to the machine. */
+	char *machine = word;
+
+	if ( !importMachines && inclSectionTarg == 0 ) {
+		ignoreSection = false;
+
+		ParserDictEl *pdEl = id.parserDict.find( machine );
+		if ( pdEl == 0 ) {
+			pdEl = new ParserDictEl( machine );
+			pdEl->value = new Parser( fileName, machine, sectionLoc );
+			pdEl->value->init();
+			id.parserDict.insert( pdEl );
+			id.parserList.append( pdEl->value );
+		}
 
-void Scanner::token( int type )
-{
-	token( type, 0, 0 );
+		parser = pdEl->value;
+	}
+	else if ( !importMachines && strcmp( inclSectionTarg, machine ) == 0 ) {
+		/* found include target */
+		ignoreSection = false;
+		parser = inclToParser;
+	}
+	else {
+		/* ignoring section */
+		ignoreSection = true;
+		parser = 0;
+	}
 }
 
-%%{
-	machine section_parse;
+void Scanner::handleInclude()
+{
+	if ( active() ) {
+		char *inclSectionName = word;
+		char **includeChecks = 0;
 
-	# This relies on the the kelbt implementation and the order
-	# that tokens are declared.
-	KW_Machine = 128;
-	KW_Include = 129;
-	KW_Write = 130;
-	TK_Word = 131;
-	TK_Literal = 132;
+		/* Implement defaults for the input file and section name. */
+		if ( inclSectionName == 0 )
+			inclSectionName = parser->sectionName;
 
-	action clear_words { word = lit = 0; word_len = lit_len = 0; }
-	action store_word { word = tokdata; word_len = toklen; }
-	action store_lit { lit = tokdata; lit_len = toklen; }
-
-	action mach_err { error() << "bad machine statement" << endl; }
-	action incl_err { error() << "bad include statement" << endl; }
-	action write_err { error() << "bad write statement" << endl; }
+		if ( lit != 0 )
+			includeChecks = makeIncludePathChecks( fileName, lit, lit_len );
+		else {
+			char *test = new char[strlen(fileName)+1];
+			strcpy( test, fileName );
 
-	action handle_machine
-	{
-		/* Assign a name to the machine. */
-		char *machine = word;
-
-		if ( inclSectionTarg == 0 ) {
-			active = true;
-
-			ParserDictEl *pdEl = parserDict.find( machine );
-			if ( pdEl == 0 ) {
-				pdEl = new ParserDictEl( machine );
-				pdEl->value = new Parser( fileName, machine, sectionLoc );
-				pdEl->value->init();
-				parserDict.insert( pdEl );
-			}
+			includeChecks = new char*[2];
 
-			parser = pdEl->value;
+			includeChecks[0] = test;
+			includeChecks[1] = 0;
 		}
-		else if ( strcmp( inclSectionTarg, machine ) == 0 ) {
-			/* found include target */
-			active = true;
-			parser = inclToParser;
+
+		long found = 0;
+		ifstream *inFile = tryOpenInclude( includeChecks, found );
+		if ( inFile == 0 ) {
+			scan_error() << "include: failed to locate file" << endl;
+			char **tried = includeChecks;
+			while ( *tried != 0 )
+				scan_error() << "include: attempted: \"" << *tried++ << '\"' << endl;
 		}
 		else {
-			/* ignoring section */
-			active = false;
-			parser = 0;
+			/* Don't include anything that's already been included. */
+			if ( !duplicateInclude( includeChecks[found], inclSectionName ) ) {
+				parser->includeHistory.append( IncludeHistoryItem( 
+						includeChecks[found], inclSectionName ) );
+
+				Scanner scanner( id, includeChecks[found], *inFile, parser,
+						inclSectionName, includeDepth+1, false );
+				scanner.do_scan( );
+				delete inFile;
+			}
 		}
 	}
+}
 
-	machine_stmt =
-		( KW_Machine TK_Word @store_word ';' ) @handle_machine
-		<>err mach_err <>eof mach_err;
+void Scanner::handleImport()
+{
+	if ( active() ) {
+		char **importChecks = makeIncludePathChecks( fileName, lit, lit_len );
+
+		/* Open the input file for reading. */
+		long found = 0;
+		ifstream *inFile = tryOpenInclude( importChecks, found );
+		if ( inFile == 0 ) {
+			scan_error() << "import: could not open import file " <<
+					"for reading" << endl;
+			char **tried = importChecks;
+			while ( *tried != 0 )
+				scan_error() << "import: attempted: \"" << *tried++ << '\"' << endl;
+		}
 
-	action handle_include
-	{
-		if ( active && parserExists() ) {
-			char *inclSectionName = word;
-			char *inclFileName = 0;
+		Scanner scanner( id, importChecks[found], *inFile, parser,
+				0, includeDepth+1, true );
+		scanner.do_scan( );
+		scanner.importToken( 0, 0, 0 );
+		scanner.flushImport();
+		delete inFile;
+	}
+}
 
-			/* Implement defaults for the input file and section name. */
-			if ( inclSectionName == 0 )
-				inclSectionName = parser->sectionName;
+%%{
+	machine section_parse;
 
-			if ( lit != 0 ) 
-				inclFileName = prepareFileName( lit, lit_len );
-			else
-				inclFileName = fileName;
+	# Need the defines representing tokens.
+	import "rlparse.h"; 
 
-			/* Check for a recursive include structure. Add the current file/section
-			 * name then check if what we are including is already in the stack. */
-			includeStack.append( IncludeStackItem( fileName, parser->sectionName ) );
+	action clear_words { word = lit = 0; word_len = lit_len = 0; }
+	action store_word { word = tokdata; word_len = toklen; }
+	action store_lit { lit = tokdata; lit_len = toklen; }
 
-			if ( recursiveInclude( includeStack, inclFileName, inclSectionName ) )
-				error() << "include: this is a recursive include operation" << endl;
-			else {
-				/* Open the input file for reading. */
-				ifstream *inFile = new ifstream( inclFileName );
-				if ( ! inFile->is_open() ) {
-					error() << "include: could not open " << 
-							inclFileName << " for reading" << endl;
-				}
-
-				Scanner scanner( inclFileName, *inFile, parser,
-						inclSectionName, include_depth+1 );
-				scanner.init();
-				scanner.do_scan( );
-				delete inFile;
-			}
+	action mach_err { scan_error() << "bad machine statement" << endl; }
+	action incl_err { scan_error() << "bad include statement" << endl; }
+	action import_err { scan_error() << "bad import statement" << endl; }
+	action write_err { scan_error() << "bad write statement" << endl; }
 
-			/* Remove the last element (len-1) */
-			includeStack.remove( -1 );
-		}
-	}
+	action handle_machine { handleMachine(); }
+	action handle_include { handleInclude(); }
+	action handle_import { handleImport(); }
+
+	machine_stmt =
+		( KW_Machine TK_Word @store_word ';' ) @handle_machine
+		<>err mach_err <>eof mach_err;
 
 	include_names = (
 		TK_Word @store_word ( TK_Literal @store_lit )? |
@@ -296,78 +404,74 @@ void Scanner::token( int type )
 		( KW_Include include_names ';' ) @handle_include
 		<>err incl_err <>eof incl_err;
 
+	import_stmt =
+		( KW_Import TK_Literal @store_lit ';' ) @handle_import
+		<>err import_err <>eof import_err;
+
 	action write_command
 	{
-		if ( active ) {
-			openRagelDef();
-			if ( strcmp( tokdata, "data" ) != 0 &&
-					strcmp( tokdata, "init" ) != 0 &&
-					strcmp( tokdata, "exec" ) != 0 &&
-					strcmp( tokdata, "eof" ) != 0 )
-			{
-				error() << "unknown write command" << endl;
-			}
-			*outStream << "  <write what=\"" << tokdata << "\">";
+		if ( active() && machineSpec == 0 && machineName == 0 ) {
+			InputItem *inputItem = new InputItem;
+			inputItem->type = InputItem::Write;
+			inputItem->loc.line = line;
+			inputItem->loc.col = column;
+			inputItem->name = parser->sectionName;
+			inputItem->pd = parser->pd;
+			id.inputItems.append( inputItem );
 		}
 	}
 
-	action write_option
+	action write_arg
 	{
-		if ( active )
-			*outStream << "<option>" << tokdata << "</option>";
+		if ( active() && machineSpec == 0 && machineName == 0 )
+			id.inputItems.tail->writeArgs.append( strdup(tokdata) );
 	}
+
 	action write_close
 	{
-		if ( active )
-			*outStream << "</write>\n";
+		if ( active() && machineSpec == 0 && machineName == 0 )
+			id.inputItems.tail->writeArgs.append( 0 );
 	}
 
 	write_stmt =
-		( KW_Write TK_Word @write_command 
-			( TK_Word @write_option )* ';' @write_close )
+		( KW_Write @write_command 
+		( TK_Word @write_arg )+ ';' @write_close )
 		<>err write_err <>eof write_err;
 
 	action handle_token
 	{
 		/* Send the token off to the parser. */
-		if ( active && parserExists() ) {
-			InputLoc loc;
-
-			#if 0
-			cerr << "scanner:" << line << ":" << column << 
-					": sending token to the parser " << lelNames[*p];
-			cerr << " " << toklen;
-			if ( tokdata != 0 )
-				cerr << " " << tokdata;
-			cerr << endl;
-			#endif
-
-			loc.fileName = fileName;
-			loc.line = line;
-			loc.col = column;
-
-			parser->token( loc, type, tokdata, toklen );
-		}
+		if ( active() )
+			directToParser( parser, fileName, line, column, type, tokdata, toklen );
 	}
 
 	# Catch everything else.
-	everything_else = ^( KW_Machine | KW_Include | KW_Write ) @handle_token;
+	everything_else = 
+		^( KW_Machine | KW_Include | KW_Import | KW_Write ) @handle_token;
 
 	main := ( 
 		machine_stmt |
 		include_stmt |
+		import_stmt |
 		write_stmt |
 		everything_else
 	)*;
 }%%
 
+void Scanner::token( int type, char c )
+{
+	token( type, &c, &c + 1 );
+}
+
+void Scanner::token( int type )
+{
+	token( type, 0, 0 );
+}
+
 void Scanner::token( int type, char *start, char *end )
 {
 	char *tokdata = 0;
 	int toklen = 0;
-	int *p = &type;
-	int *pe = &type + 1;
-
 	if ( start != 0 ) {
 		toklen = end-start;
 		tokdata = new char[toklen+1];
@@ -375,67 +479,142 @@ void Scanner::token( int type, char *start, char *end )
 		tokdata[toklen] = 0;
 	}
 
+	processToken( type, tokdata, toklen );
+}
+
+void Scanner::processToken( int type, char *tokdata, int toklen )
+{
+	int *p, *pe, *eof;
+
+	if ( type < 0 )
+		p = pe = eof = 0;
+	else {
+		p = &type;
+		pe = &type + 1;
+		eof = 0;
+	}
+
 	%%{
 		machine section_parse;
 		write exec;
 	}%%
 
 	updateCol();
+
+	/* Record the last token for use in controlling the scan of subsequent
+	 * tokens. */
+	lastToken = type;
 }
 
 void Scanner::startSection( )
 {
 	parserExistsError = false;
 
-	if ( include_depth == 0 ) {
-		if ( machineSpec == 0 && machineName == 0 )
-			*outStream << "</host>\n";
-		ragelDefOpen = false;
-	}
-
 	sectionLoc.fileName = fileName;
 	sectionLoc.line = line;
-	sectionLoc.col = 0;
-}
-
-void Scanner::openRagelDef()
-{
-	if ( ! ragelDefOpen ) {
-		ragelDefOpen = true;
-		*outStream << "<ragel_def name=\"" << parser->sectionName << "\">\n";
-	}
+	sectionLoc.col = column;
 }
 
 void Scanner::endSection( )
 {
 	/* Execute the eof actions for the section parser. */
-	%%{
-		machine section_parse;
-		write eof;
-	}%%
+	processToken( -1, 0, 0 );
 
 	/* Close off the section with the parser. */
-	if ( active && parserExists() ) {
+	if ( active() ) {
 		InputLoc loc;
 		loc.fileName = fileName;
 		loc.line = line;
-		loc.col = 0;
+		loc.col = column;
 
 		parser->token( loc, TK_EndSection, 0, 0 );
 	}
 
-	if ( include_depth == 0 ) {
-		if ( ragelDefOpen ) {
-			*outStream << "</ragel_def>\n";
-			ragelDefOpen = false;
-		}
-
+	if ( includeDepth == 0 ) {
 		if ( machineSpec == 0 && machineName == 0 ) {
 			/* The end section may include a newline on the end, so
 			 * we use the last line, which will count the newline. */
-			*outStream << "<host line=\"" << line << "\">";
+			InputItem *inputItem = new InputItem;
+			inputItem->type = InputItem::HostData;
+			inputItem->loc.line = line;
+			inputItem->loc.col = column;
+			id.inputItems.append( inputItem );
+		}
+	}
+}
+
+bool isAbsolutePath( const char *path )
+{
+#ifdef _WIN32
+	return isalpha( path[0] ) && path[1] == ':' && path[2] == '\\';
+#else
+	return path[0] == '/';
+#endif
+}
+
+char **Scanner::makeIncludePathChecks( const char *thisFileName, 
+		const char *fileName, int fnlen )
+{
+	char **checks = new char*[2];
+	long nextCheck = 0;
+
+	bool caseInsensitive = false;
+	long length = 0;
+	char *data = prepareLitString( InputLoc(), fileName, fnlen, 
+			length, caseInsensitive );
+
+	/* Absolute path? */
+	if ( isAbsolutePath( data ) )
+		checks[nextCheck++] = data;
+	else {
+		/* Search from the the location of the current file. */
+		const char *lastSlash = strrchr( thisFileName, PATH_SEP );
+		if ( lastSlash == 0 )
+			checks[nextCheck++] = data;
+		else {
+			long givenPathLen = (lastSlash - thisFileName) + 1;
+			long checklen = givenPathLen + length;
+			char *check = new char[checklen+1];
+			memcpy( check, thisFileName, givenPathLen );
+			memcpy( check+givenPathLen, data, length );
+			check[checklen] = 0;
+			checks[nextCheck++] = check;
+		}
+
+		/* Search from the include paths given on the command line. */
+		for ( ArgsVector::Iter incp = id.includePaths; incp.lte(); incp++ ) {
+			long pathLen = strlen( *incp );
+			long checkLen = pathLen + 1 + length;
+			char *check = new char[checkLen+1];
+			memcpy( check, *incp, pathLen );
+			check[pathLen] = PATH_SEP;
+			memcpy( check+pathLen+1, data, length );
+			check[checkLen] = 0;
+			checks[nextCheck++] = check;
 		}
 	}
+
+	checks[nextCheck] = 0;
+	return checks;
+}
+
+ifstream *Scanner::tryOpenInclude( char **pathChecks, long &found )
+{
+	char **check = pathChecks;
+	ifstream *inFile = new ifstream;
+	
+	while ( *check != 0 ) {
+		inFile->open( *check );
+		if ( inFile->is_open() ) {
+			found = check - pathChecks;
+			return inFile;
+		}
+		check += 1;
+	}
+
+	found = -1;
+	delete inFile;
+	return 0;
 }
 
 %%{
@@ -464,16 +643,120 @@ void Scanner::endSection( )
 
 	c_cpp_comment = c_comment | cpp_comment;
 
-	# These literal forms are common to C-like host code and ragel.
+	ruby_comment = '#' [^\n]* NL;
+
+	# These literal forms are common to host code and ragel.
 	s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'";
 	d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"';
+	host_re_literal = '/' ([^/\\] | NL | '\\' (any | NL))* '/';
 
 	whitespace = [ \t] | NL;
 	pound_comment = '#' [^\n]* NL;
 
-	# An inline block of code. This is specified as a scanned, but is sent to
-	# the parser as one long block. The inline_block pointer is used to handle
-	# the preservation of the data.
+	# An inline block of code for Ruby.
+	inline_code_ruby := |*
+		# Inline expression keywords.
+		"fpc" => { token( KW_PChar ); };
+		"fc" => { token( KW_Char ); };
+		"fcurs" => { token( KW_CurState ); };
+		"ftargs" => { token( KW_TargState ); };
+		"fentry" => { 
+			whitespaceOn = false; 
+			token( KW_Entry );
+		};
+
+		# Inline statement keywords.
+		"fhold" => { 
+			whitespaceOn = false; 
+			token( KW_Hold );
+		};
+		"fexec" => { token( KW_Exec, 0, 0 ); };
+		"fgoto" => { 
+			whitespaceOn = false; 
+			token( KW_Goto );
+		};
+		"fnext" => { 
+			whitespaceOn = false; 
+			token( KW_Next );
+		};
+		"fcall" => { 
+			whitespaceOn = false; 
+			token( KW_Call );
+		};
+		"fret" => { 
+			whitespaceOn = false; 
+			token( KW_Ret );
+		};
+		"fbreak" => { 
+			whitespaceOn = false; 
+			token( KW_Break );
+		};
+
+		ident => { token( TK_Word, ts, te ); };
+
+		number => { token( TK_UInt, ts, te ); };
+		hex_number => { token( TK_Hex, ts, te ); };
+
+		( s_literal | d_literal | host_re_literal ) 
+			=> { token( IL_Literal, ts, te ); };
+
+		whitespace+ => { 
+			if ( whitespaceOn ) 
+				token( IL_WhiteSpace, ts, te );
+		};
+
+		ruby_comment => { token( IL_Comment, ts, te ); };
+
+		"::" => { token( TK_NameSep, ts, te ); };
+
+		# Some symbols need to go to the parser as with their cardinal value as
+		# the token type (as opposed to being sent as anonymous symbols)
+		# because they are part of the sequences which we interpret. The * ) ;
+		# symbols cause whitespace parsing to come back on. This gets turned
+		# off by some keywords.
+
+		";" => {
+			whitespaceOn = true;
+			token( *ts, ts, te );
+			if ( inlineBlockType == SemiTerminated )
+				fret;
+		};
+
+		[*)] => { 
+			whitespaceOn = true;
+			token( *ts, ts, te );
+		};
+
+		[,(] => { token( *ts, ts, te ); };
+
+		'{' => { 
+			token( IL_Symbol, ts, te );
+			curly_count += 1; 
+		};
+
+		'}' => { 
+			if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) {
+				/* Inline code block ends. */
+				token( '}' );
+				fret;
+			}
+			else {
+				/* Either a semi terminated inline block or only the closing
+				 * brace of some inner scope, not the block's closing brace. */
+				token( IL_Symbol, ts, te );
+			}
+		};
+
+		EOF => {
+			scan_error() << "unterminated code block" << endl;
+		};
+
+		# Send every other character as a symbol.
+		any => { token( IL_Symbol, ts, te ); };
+	*|;
+
+
+	# An inline block of code for languages other than Ruby.
 	inline_code := |*
 		# Inline expression keywords.
 		"fpc" => { token( KW_PChar ); };
@@ -512,21 +795,22 @@ void Scanner::endSection( )
 			token( KW_Break );
 		};
 
-		ident => { token( TK_Word, tokstart, tokend ); };
+		ident => { token( TK_Word, ts, te ); };
 
-		number => { token( TK_UInt, tokstart, tokend ); };
-		hex_number => { token( TK_Hex, tokstart, tokend ); };
+		number => { token( TK_UInt, ts, te ); };
+		hex_number => { token( TK_Hex, ts, te ); };
 
 		( s_literal | d_literal ) 
-			=> { token( IL_Literal, tokstart, tokend ); };
+			=> { token( IL_Literal, ts, te ); };
 
 		whitespace+ => { 
 			if ( whitespaceOn ) 
-				token( IL_WhiteSpace, tokstart, tokend );
+				token( IL_WhiteSpace, ts, te );
 		};
-		c_cpp_comment => { token( IL_Comment, tokstart, tokend ); };
 
-		"::" => { token( TK_NameSep, tokstart, tokend ); };
+		c_cpp_comment => { token( IL_Comment, ts, te ); };
+
+		"::" => { token( TK_NameSep, ts, te ); };
 
 		# Some symbols need to go to the parser as with their cardinal value as
 		# the token type (as opposed to being sent as anonymous symbols)
@@ -536,20 +820,20 @@ void Scanner::endSection( )
 
 		";" => {
 			whitespaceOn = true;
-			token( *tokstart, tokstart, tokend );
+			token( *ts, ts, te );
 			if ( inlineBlockType == SemiTerminated )
-				fgoto parser_def;
+				fret;
 		};
 
 		[*)] => { 
 			whitespaceOn = true;
-			token( *tokstart, tokstart, tokend );
+			token( *ts, ts, te );
 		};
 
-		[,(] => { token( *tokstart, tokstart, tokend ); };
+		[,(] => { token( *ts, ts, te ); };
 
 		'{' => { 
-			token( IL_Symbol, tokstart, tokend );
+			token( IL_Symbol, ts, te );
 			curly_count += 1; 
 		};
 
@@ -557,21 +841,21 @@ void Scanner::endSection( )
 			if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) {
 				/* Inline code block ends. */
 				token( '}' );
-				fgoto parser_def;
+				fret;
 			}
 			else {
 				/* Either a semi terminated inline block or only the closing
 				 * brace of some inner scope, not the block's closing brace. */
-				token( IL_Symbol, tokstart, tokend );
+				token( IL_Symbol, ts, te );
 			}
 		};
 
 		EOF => {
-			error() << "unterminated code block" << endl;
+			scan_error() << "unterminated code block" << endl;
 		};
 
 		# Send every other character as a symbol.
-		any => { token( IL_Symbol, tokstart, tokend ); };
+		any => { token( IL_Symbol, ts, te ); };
 	*|;
 
 	or_literal := |*
@@ -585,7 +869,7 @@ void Scanner::endSection( )
 		'\\f' => { token( RE_Char, '\f' ); };
 		'\\r' => { token( RE_Char, '\r' ); };
 		'\\\n' => { updateCol(); };
-		'\\' any => { token( RE_Char, tokstart+1, tokend ); };
+		'\\' any => { token( RE_Char, ts+1, te ); };
 
 		# Range dash in an OR expression.
 		'-' => { token( RE_Dash, 0, 0 ); };
@@ -594,15 +878,15 @@ void Scanner::endSection( )
 		']'	=> { token( RE_SqClose ); fret; };
 
 		EOF => {
-			error() << "unterminated OR literal" << endl;
+			scan_error() << "unterminated OR literal" << endl;
 		};
 
 		# Characters in an OR expression.
-		[^\]] => { token( RE_Char, tokstart, tokend ); };
+		[^\]] => { token( RE_Char, ts, te ); };
 
 	*|;
 
-	re_literal := |*
+	ragel_re_literal := |*
 		# Escape sequences in regular expressions.
 		'\\0' => { token( RE_Char, '\0' ); };
 		'\\a' => { token( RE_Char, '\a' ); };
@@ -613,11 +897,11 @@ void Scanner::endSection( )
 		'\\f' => { token( RE_Char, '\f' ); };
 		'\\r' => { token( RE_Char, '\r' ); };
 		'\\\n' => { updateCol(); };
-		'\\' any => { token( RE_Char, tokstart+1, tokend ); };
+		'\\' any => { token( RE_Char, ts+1, te ); };
 
 		# Terminate an OR expression.
 		'/' [i]? => { 
-			token( RE_Slash, tokstart, tokend ); 
+			token( RE_Slash, ts, te ); 
 			fgoto parser_def;
 		};
 
@@ -629,34 +913,38 @@ void Scanner::endSection( )
 		'[^' => { token( RE_SqOpenNeg ); fcall or_literal; };
 
 		EOF => {
-			error() << "unterminated regular expression" << endl;
+			scan_error() << "unterminated regular expression" << endl;
 		};
 
 		# Characters in an OR expression.
-		[^\/] => { token( RE_Char, tokstart, tokend ); };
+		[^\/] => { token( RE_Char, ts, te ); };
 	*|;
 
 	# We need a separate token space here to avoid the ragel keywords.
 	write_statement := |*
-		ident => { token( TK_Word, tokstart, tokend ); } ;
+		ident => { token( TK_Word, ts, te ); } ;
 		[ \t\n]+ => { updateCol(); };
 		';' => { token( ';' ); fgoto parser_def; };
 
 		EOF => {
-			error() << "unterminated write statement" << endl;
+			scan_error() << "unterminated write statement" << endl;
 		};
 	*|;
 
 	# Parser definitions. 
 	parser_def := |*
+		'length_cond' => { token( KW_Length ); };
 		'machine' => { token( KW_Machine ); };
 		'include' => { token( KW_Include ); };
+		'import' => { token( KW_Import ); };
 		'write' => { 
 			token( KW_Write );
 			fgoto write_statement;
 		};
 		'action' => { token( KW_Action ); };
 		'alphtype' => { token( KW_AlphType ); };
+		'prepush' => { token( KW_PrePush ); };
+		'postpop' => { token( KW_PostPop ); };
 
 		# FIXME: Enable this post 5.17.
 		# 'range' => { token( KW_Range ); };
@@ -664,40 +952,52 @@ void Scanner::endSection( )
 		'getkey' => { 
 			token( KW_GetKey );
 			inlineBlockType = SemiTerminated;
-			fgoto inline_code;
+			if ( hostLang->lang == HostLang::Ruby )
+				fcall inline_code_ruby;
+			else
+				fcall inline_code;
 		};
 		'access' => { 
 			token( KW_Access );
 			inlineBlockType = SemiTerminated;
-			fgoto inline_code;
+			if ( hostLang->lang == HostLang::Ruby )
+				fcall inline_code_ruby;
+			else
+				fcall inline_code;
 		};
 		'variable' => { 
 			token( KW_Variable );
 			inlineBlockType = SemiTerminated;
-			fgoto inline_code;
+			if ( hostLang->lang == HostLang::Ruby )
+				fcall inline_code_ruby;
+			else
+				fcall inline_code;
 		};
 		'when' => { token( KW_When ); };
+		'inwhen' => { token( KW_InWhen ); };
+		'outwhen' => { token( KW_OutWhen ); };
 		'eof' => { token( KW_Eof ); };
 		'err' => { token( KW_Err ); };
 		'lerr' => { token( KW_Lerr ); };
 		'to' => { token( KW_To ); };
 		'from' => { token( KW_From ); };
+		'export' => { token( KW_Export ); };
 
 		# Identifiers.
-		ident => { token( TK_Word, tokstart, tokend ); } ;
+		ident => { token( TK_Word, ts, te ); } ;
 
 		# Numbers
-		number => { token( TK_UInt, tokstart, tokend ); };
-		hex_number => { token( TK_Hex, tokstart, tokend ); };
+		number => { token( TK_UInt, ts, te ); };
+		hex_number => { token( TK_Hex, ts, te ); };
 
 		# Literals, with optionals.
 		( s_literal | d_literal ) [i]? 
-			=> { token( TK_Literal, tokstart, tokend ); };
+			=> { token( TK_Literal, ts, te ); };
 
 		'[' => { token( RE_SqOpen ); fcall or_literal; };
 		'[^' => { token( RE_SqOpenNeg ); fcall or_literal; };
 
-		'/' => { token( RE_Slash ); fgoto re_literal; };
+		'/' => { token( RE_Slash ); fgoto ragel_re_literal; };
 
 		# Ignore.
 		pound_comment => { updateCol(); };
@@ -765,69 +1065,96 @@ void Scanner::endSection( )
 		# Opening of longest match.
 		"|*" => { token( TK_BarStar ); };
 
+		# Separater for name references.
+		"::" => { token( TK_NameSep, ts, te ); };
+
 		'}%%' => { 
 			updateCol();
 			endSection();
-			fgoto main;
+			fret;
 		};
 
-		[ \t]+ => { updateCol(); };
+		[ \t\r]+ => { updateCol(); };
 
 		# If we are in a single line machine then newline may end the spec.
 		NL => {
 			updateCol();
 			if ( singleLineSpec ) {
 				endSection();
-				fgoto main;
+				fret;
 			}
 		};
 
 		'{' => { 
-			token( '{' );
-			curly_count = 1; 
-			inlineBlockType = CurlyDelimited;
-			fgoto inline_code;
+			if ( lastToken == KW_Export || lastToken == KW_Entry )
+				token( '{' );
+			else {
+				token( '{' );
+				curly_count = 1; 
+				inlineBlockType = CurlyDelimited;
+				if ( hostLang->lang == HostLang::Ruby )
+					fcall inline_code_ruby;
+				else
+					fcall inline_code;
+			}
 		};
 
 		EOF => {
-			error() << "unterminated ragel section" << endl;
+			scan_error() << "unterminated ragel section" << endl;
 		};
 
-		any => { token( *tokstart ); } ;
+		any => { token( *ts ); } ;
 	*|;
 
-	action pass {
-		updateCol();
+	# Outside code scanner. These tokens get passed through.
+	main_ruby := |*
+		ident => { pass( IMP_Word, ts, te ); };
+		number => { pass( IMP_UInt, ts, te ); };
+		ruby_comment => { pass(); };
+		( s_literal | d_literal | host_re_literal ) 
+			=> { pass( IMP_Literal, ts, te ); };
 
-		/* If no errors and we are at the bottom of the include stack (the
-		 * source file listed on the command line) then write out the data. */
-		if ( include_depth == 0 && machineSpec == 0 && machineName == 0 )
-			xmlEscapeHost( *outStream, tokstart, tokend-tokstart );
-	}
+		'%%{' => { 
+			updateCol();
+			singleLineSpec = false;
+			startSection();
+			fcall parser_def;
+		};
+		'%%' => { 
+			updateCol();
+			singleLineSpec = true;
+			startSection();
+			fcall parser_def;
+		};
+		whitespace+ => { pass(); };
+		EOF;
+		any => { pass( *ts, 0, 0 ); };
+	*|;
 
 	# Outside code scanner. These tokens get passed through.
 	main := |*
-		ident => pass;
-		number => pass;
-		c_cpp_comment => pass;
-		s_literal | d_literal => pass;
+		'define' => { pass( IMP_Define, 0, 0 ); };
+		ident => { pass( IMP_Word, ts, te ); };
+		number => { pass( IMP_UInt, ts, te ); };
+		c_cpp_comment => { pass(); };
+		( s_literal | d_literal ) => { pass( IMP_Literal, ts, te ); };
+
 		'%%{' => { 
 			updateCol();
 			singleLineSpec = false;
 			startSection();
-			fgoto parser_def;
+			fcall parser_def;
 		};
 		'%%' => { 
 			updateCol();
 			singleLineSpec = true;
 			startSection();
-			fgoto parser_def;
+			fcall parser_def;
 		};
-		whitespace+ => pass;
+		whitespace+ => { pass(); };
 		EOF;
-		any => pass;
+		any => { pass( *ts, 0, 0 ); };
 	*|;
-
 }%%
 
 %% write data;
@@ -836,16 +1163,30 @@ void Scanner::do_scan()
 {
 	int bufsize = 8;
 	char *buf = new char[bufsize];
-	const char last_char = 0;
 	int cs, act, have = 0;
-	int top, stack[1];
+	int top;
+
+	/* The stack is two deep, one level for going into ragel defs from the main
+	 * machines which process outside code, and another for going into or literals
+	 * from either a ragel spec, or a regular expression. */
+	int stack[2];
 	int curly_count = 0;
 	bool execute = true;
 	bool singleLineSpec = false;
-	InlineBlockType inlineBlockType;
+	InlineBlockType inlineBlockType = CurlyDelimited;
 
+	/* Init the section parser and the character scanner. */
+	init();
 	%% write init;
 
+	/* Set up the start state. FIXME: After 5.20 is released the nocs write
+	 * init option should be used, the main machine eliminated and this statement moved
+	 * above the write init. */
+	if ( hostLang->lang == HostLang::Ruby )
+		cs = rlscan_en_main_ruby;
+	else
+		cs = rlscan_en_main;
+	
 	while ( execute ) {
 		char *p = buf + have;
 		int space = bufsize - have;
@@ -860,9 +1201,9 @@ void Scanner::do_scan()
 			space = bufsize - have;
 
 			/* Patch up pointers possibly in use. */
-			if ( tokstart != 0 )
-				tokstart = newbuf + ( tokstart - buf );
-			tokend = newbuf + ( tokend - buf );
+			if ( ts != 0 )
+				ts = newbuf + ( ts - buf );
+			te = newbuf + ( te - buf );
 
 			/* Copy the new buffer in. */
 			memcpy( newbuf, buf, have );
@@ -872,26 +1213,27 @@ void Scanner::do_scan()
 
 		input.read( p, space );
 		int len = input.gcount();
+		char *pe = p + len;
 
-		/* If we see eof then append the EOF char. */
+		/* If we see eof then append the eof var. */
+		char *eof = 0;
 	 	if ( len == 0 ) {
-			p[0] = last_char, len = 1;
+			eof = pe;
 			execute = false;
 		}
 
-		char *pe = p + len;
 		%% write exec;
 
 		/* Check if we failed. */
 		if ( cs == rlscan_error ) {
 			/* Machine failed before finding a token. I'm not yet sure if this
 			 * is reachable. */
-			error() << "scanner error" << endl;
+			scan_error() << "scanner error" << endl;
 			exit(1);
 		}
 
 		/* Decide if we need to preserve anything. */
-		char *preserve = tokstart;
+		char *preserve = ts;
 
 		/* Now set up the prefix. */
 		if ( preserve == 0 )
@@ -901,9 +1243,9 @@ void Scanner::do_scan()
 			have = pe - preserve;
 			memmove( buf, preserve, have );
 			unsigned int shiftback = preserve - buf;
-			if ( tokstart != 0 )
-				tokstart -= shiftback;
-			tokend -= shiftback;
+			if ( ts != 0 )
+				ts -= shiftback;
+			te -= shiftback;
 
 			preserve = buf;
 		}
@@ -911,15 +1253,3 @@ void Scanner::do_scan()
 
 	delete[] buf;
 }
-
-void scan( char *fileName, istream &input )
-{
-	Scanner scanner( fileName, input, 0, 0, 0 );
-	scanner.init();
-	scanner.do_scan();
-
-	InputLoc eofLoc;
-	eofLoc.fileName = fileName;
-	eofLoc.col = 1;
-	eofLoc.line = scanner.line;
-}