From: thurston <thurston@052ea7fc-9027-0410-9066-f65837a77df0>
Date: Fri, 11 Jan 2008 23:14:17 +0000 (+0000)
Subject: Self-host with 6.0.
X-Git-Tag: 2.0_alpha~169
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ae3dcd13c5f3bcec05f5f5f94d2df629d8679bed;p=external%2Fragel.git

Self-host with 6.0.


git-svn-id: http://svn.complang.org/ragel/trunk@401 052ea7fc-9027-0410-9066-f65837a77df0
---

diff --git a/ragel/Makefile.in b/ragel/Makefile.in
index 78b396f..8bff749 100644
--- a/ragel/Makefile.in
+++ b/ragel/Makefile.in
@@ -69,7 +69,7 @@ rlparse.cpp: rlparse.kl rlparse.kh
 rlscan.cpp: rlparse.h
 
 rlscan.cpp: rlscan.rl
-	ragel $< | rlgen-cd -G2 -o $@
+	ragel -G2 -o $@ $<
 
 endif
 
diff --git a/ragel/rlscan.h b/ragel/rlscan.h
index 3729889..80ed52f 100644
--- a/ragel/rlscan.h
+++ b/ragel/rlscan.h
@@ -107,7 +107,7 @@ struct Scanner
 
 	/* For import parsing. */
 	int tok_cs, tok_act;
-	int *tok_tokstart, *tok_tokend;
+	int *tok_ts, *tok_te;
 	int cur_token;
 	static const int max_tokens = 32;
 	int token_data[max_tokens];
@@ -122,7 +122,7 @@ struct Scanner
 	/* For character scanning. */
 	int line;
 	InputLoc sectionLoc;
-	char *tokstart, *tokend;
+	char *ts, *te;
 	int column;
 	char *lastnl;
 
diff --git a/ragel/rlscan.rl b/ragel/rlscan.rl
index ab23d86..d450fe1 100644
--- a/ragel/rlscan.rl
+++ b/ragel/rlscan.rl
@@ -57,7 +57,7 @@ enum InlineBlockType
 	main := |*
 		# Define of number.
 		IMP_Define IMP_Word IMP_UInt => { 
-			int base = tok_tokstart - token_data;
+			int base = tok_ts - token_data;
 			int nameOff = 1;
 			int numOff = 2;
 
@@ -71,7 +71,7 @@ enum InlineBlockType
 
 		# Assignment of number.
 		IMP_Word '=' IMP_UInt => { 
-			int base = tok_tokstart - token_data;
+			int base = tok_ts - token_data;
 			int nameOff = 0;
 			int numOff = 2;
 
@@ -85,7 +85,7 @@ enum InlineBlockType
 
 		# Define of literal.
 		IMP_Define IMP_Word IMP_Literal => { 
-			int base = tok_tokstart - token_data;
+			int base = tok_ts - token_data;
 			int nameOff = 1;
 			int litOff = 2;
 
@@ -99,7 +99,7 @@ enum InlineBlockType
 
 		# Assignment of literal.
 		IMP_Word '=' IMP_Literal => { 
-			int base = tok_tokstart - token_data;
+			int base = tok_ts - token_data;
 			int nameOff = 0;
 			int litOff = 2;
 
@@ -122,15 +122,19 @@ void Scanner::flushImport()
 {
 	int *p = token_data;
 	int *pe = token_data + cur_token;
+	int *eof = 0;
 
-	%% write init;
-	%% write exec;
+	%%{
+		machine inline_token_scan;
+		write init;
+		write exec;
+	}%%
 
-	if ( tok_tokstart == 0 )
+	if ( tok_ts == 0 )
 		cur_token = 0;
 	else {
-		cur_token = pe - tok_tokstart;
-		int ts_offset = tok_tokstart - token_data;
+		cur_token = pe - tok_ts;
+		int ts_offset = tok_ts - token_data;
 		memmove( token_data, token_data+ts_offset, cur_token*sizeof(token_data[0]) );
 		memmove( token_strings, token_strings+ts_offset, cur_token*sizeof(token_strings[0]) );
 		memmove( token_lens, token_lens+ts_offset, cur_token*sizeof(token_lens[0]) );
@@ -192,7 +196,7 @@ void Scanner::pass()
 	/* If no errors and we are at the bottom of the include stack (the
 	 * source file listed on the command line) then write out the data. */
 	if ( includeDepth == 0 && machineSpec == 0 && machineName == 0 )
-		xmlEscapeHost( output, tokstart, tokend-tokstart );
+		xmlEscapeHost( output, ts, te-ts );
 }
 
 /*
@@ -252,9 +256,9 @@ void Scanner::updateCol()
 {
 	char *from = lastnl;
 	if ( from == 0 )
-		from = tokstart;
-	//cerr << "adding " << tokend - from << " to column" << endl;
-	column += tokend - from;
+		from = ts;
+	//cerr << "adding " << te - from << " to column" << endl;
+	column += te - from;
 	lastnl = 0;
 }
 
@@ -455,8 +459,16 @@ void Scanner::token( int type, char *start, char *end )
 
 void Scanner::processToken( int type, char *tokdata, int toklen )
 {
-	int *p = &type;
-	int *pe = &type + 1;
+	int *p, *pe, *eof;
+	
+
+	if ( type < 0 )
+		p = pe = eof = 0;
+	else {
+		p = &type;
+		pe = &type + 1;
+		eof = 0;
+	}
 
 	%%{
 		machine section_parse;
@@ -487,10 +499,7 @@ void Scanner::startSection( )
 void Scanner::endSection( )
 {
 	/* Execute the eof actions for the section parser. */
-	%%{
-		machine section_parse;
-		write eof;
-	}%%
+	processToken( -1, 0, 0 );
 
 	/* Close off the section with the parser. */
 	if ( active() ) {
@@ -586,22 +595,22 @@ void Scanner::endSection( )
 			token( KW_Break );
 		};
 
-		ident => { token( TK_Word, tokstart, tokend ); };
+		ident => { token( TK_Word, ts, te ); };
 
-		number => { token( TK_UInt, tokstart, tokend ); };
-		hex_number => { token( TK_Hex, tokstart, tokend ); };
+		number => { token( TK_UInt, ts, te ); };
+		hex_number => { token( TK_Hex, ts, te ); };
 
 		( s_literal | d_literal | host_re_literal ) 
-			=> { token( IL_Literal, tokstart, tokend ); };
+			=> { token( IL_Literal, ts, te ); };
 
 		whitespace+ => { 
 			if ( whitespaceOn ) 
-				token( IL_WhiteSpace, tokstart, tokend );
+				token( IL_WhiteSpace, ts, te );
 		};
 
-		ruby_comment => { token( IL_Comment, tokstart, tokend ); };
+		ruby_comment => { token( IL_Comment, ts, te ); };
 
-		"::" => { token( TK_NameSep, tokstart, tokend ); };
+		"::" => { token( TK_NameSep, ts, te ); };
 
 		# Some symbols need to go to the parser as with their cardinal value as
 		# the token type (as opposed to being sent as anonymous symbols)
@@ -611,20 +620,20 @@ void Scanner::endSection( )
 
 		";" => {
 			whitespaceOn = true;
-			token( *tokstart, tokstart, tokend );
+			token( *ts, ts, te );
 			if ( inlineBlockType == SemiTerminated )
 				fret;
 		};
 
 		[*)] => { 
 			whitespaceOn = true;
-			token( *tokstart, tokstart, tokend );
+			token( *ts, ts, te );
 		};
 
-		[,(] => { token( *tokstart, tokstart, tokend ); };
+		[,(] => { token( *ts, ts, te ); };
 
 		'{' => { 
-			token( IL_Symbol, tokstart, tokend );
+			token( IL_Symbol, ts, te );
 			curly_count += 1; 
 		};
 
@@ -637,7 +646,7 @@ void Scanner::endSection( )
 			else {
 				/* Either a semi terminated inline block or only the closing
 				 * brace of some inner scope, not the block's closing brace. */
-				token( IL_Symbol, tokstart, tokend );
+				token( IL_Symbol, ts, te );
 			}
 		};
 
@@ -646,7 +655,7 @@ void Scanner::endSection( )
 		};
 
 		# Send every other character as a symbol.
-		any => { token( IL_Symbol, tokstart, tokend ); };
+		any => { token( IL_Symbol, ts, te ); };
 	*|;
 
 
@@ -689,22 +698,22 @@ void Scanner::endSection( )
 			token( KW_Break );
 		};
 
-		ident => { token( TK_Word, tokstart, tokend ); };
+		ident => { token( TK_Word, ts, te ); };
 
-		number => { token( TK_UInt, tokstart, tokend ); };
-		hex_number => { token( TK_Hex, tokstart, tokend ); };
+		number => { token( TK_UInt, ts, te ); };
+		hex_number => { token( TK_Hex, ts, te ); };
 
 		( s_literal | d_literal ) 
-			=> { token( IL_Literal, tokstart, tokend ); };
+			=> { token( IL_Literal, ts, te ); };
 
 		whitespace+ => { 
 			if ( whitespaceOn ) 
-				token( IL_WhiteSpace, tokstart, tokend );
+				token( IL_WhiteSpace, ts, te );
 		};
 
-		c_cpp_comment => { token( IL_Comment, tokstart, tokend ); };
+		c_cpp_comment => { token( IL_Comment, ts, te ); };
 
-		"::" => { token( TK_NameSep, tokstart, tokend ); };
+		"::" => { token( TK_NameSep, ts, te ); };
 
 		# Some symbols need to go to the parser as with their cardinal value as
 		# the token type (as opposed to being sent as anonymous symbols)
@@ -714,20 +723,20 @@ void Scanner::endSection( )
 
 		";" => {
 			whitespaceOn = true;
-			token( *tokstart, tokstart, tokend );
+			token( *ts, ts, te );
 			if ( inlineBlockType == SemiTerminated )
 				fret;
 		};
 
 		[*)] => { 
 			whitespaceOn = true;
-			token( *tokstart, tokstart, tokend );
+			token( *ts, ts, te );
 		};
 
-		[,(] => { token( *tokstart, tokstart, tokend ); };
+		[,(] => { token( *ts, ts, te ); };
 
 		'{' => { 
-			token( IL_Symbol, tokstart, tokend );
+			token( IL_Symbol, ts, te );
 			curly_count += 1; 
 		};
 
@@ -740,7 +749,7 @@ void Scanner::endSection( )
 			else {
 				/* Either a semi terminated inline block or only the closing
 				 * brace of some inner scope, not the block's closing brace. */
-				token( IL_Symbol, tokstart, tokend );
+				token( IL_Symbol, ts, te );
 			}
 		};
 
@@ -749,7 +758,7 @@ void Scanner::endSection( )
 		};
 
 		# Send every other character as a symbol.
-		any => { token( IL_Symbol, tokstart, tokend ); };
+		any => { token( IL_Symbol, ts, te ); };
 	*|;
 
 	or_literal := |*
@@ -763,7 +772,7 @@ void Scanner::endSection( )
 		'\\f' => { token( RE_Char, '\f' ); };
 		'\\r' => { token( RE_Char, '\r' ); };
 		'\\\n' => { updateCol(); };
-		'\\' any => { token( RE_Char, tokstart+1, tokend ); };
+		'\\' any => { token( RE_Char, ts+1, te ); };
 
 		# Range dash in an OR expression.
 		'-' => { token( RE_Dash, 0, 0 ); };
@@ -776,7 +785,7 @@ void Scanner::endSection( )
 		};
 
 		# Characters in an OR expression.
-		[^\]] => { token( RE_Char, tokstart, tokend ); };
+		[^\]] => { token( RE_Char, ts, te ); };
 
 	*|;
 
@@ -791,11 +800,11 @@ void Scanner::endSection( )
 		'\\f' => { token( RE_Char, '\f' ); };
 		'\\r' => { token( RE_Char, '\r' ); };
 		'\\\n' => { updateCol(); };
-		'\\' any => { token( RE_Char, tokstart+1, tokend ); };
+		'\\' any => { token( RE_Char, ts+1, te ); };
 
 		# Terminate an OR expression.
 		'/' [i]? => { 
-			token( RE_Slash, tokstart, tokend ); 
+			token( RE_Slash, ts, te ); 
 			fgoto parser_def;
 		};
 
@@ -811,12 +820,12 @@ void Scanner::endSection( )
 		};
 
 		# Characters in an OR expression.
-		[^\/] => { token( RE_Char, tokstart, tokend ); };
+		[^\/] => { token( RE_Char, ts, te ); };
 	*|;
 
 	# We need a separate token space here to avoid the ragel keywords.
 	write_statement := |*
-		ident => { token( TK_Word, tokstart, tokend ); } ;
+		ident => { token( TK_Word, ts, te ); } ;
 		[ \t\n]+ => { updateCol(); };
 		';' => { token( ';' ); fgoto parser_def; };
 
@@ -877,15 +886,15 @@ void Scanner::endSection( )
 		'export' => { token( KW_Export ); };
 
 		# Identifiers.
-		ident => { token( TK_Word, tokstart, tokend ); } ;
+		ident => { token( TK_Word, ts, te ); } ;
 
 		# Numbers
-		number => { token( TK_UInt, tokstart, tokend ); };
-		hex_number => { token( TK_Hex, tokstart, tokend ); };
+		number => { token( TK_UInt, ts, te ); };
+		hex_number => { token( TK_Hex, ts, te ); };
 
 		# Literals, with optionals.
 		( s_literal | d_literal ) [i]? 
-			=> { token( TK_Literal, tokstart, tokend ); };
+			=> { token( TK_Literal, ts, te ); };
 
 		'[' => { token( RE_SqOpen ); fcall or_literal; };
 		'[^' => { token( RE_SqOpenNeg ); fcall or_literal; };
@@ -959,7 +968,7 @@ void Scanner::endSection( )
 		"|*" => { token( TK_BarStar ); };
 
 		# Separater for name references.
-		"::" => { token( TK_NameSep, tokstart, tokend ); };
+		"::" => { token( TK_NameSep, ts, te ); };
 
 		'}%%' => { 
 			updateCol();
@@ -996,16 +1005,16 @@ void Scanner::endSection( )
 			scan_error() << "unterminated ragel section" << endl;
 		};
 
-		any => { token( *tokstart ); } ;
+		any => { token( *ts ); } ;
 	*|;
 
 	# Outside code scanner. These tokens get passed through.
 	main_ruby := |*
-		ident => { pass( IMP_Word, tokstart, tokend ); };
-		number => { pass( IMP_UInt, tokstart, tokend ); };
+		ident => { pass( IMP_Word, ts, te ); };
+		number => { pass( IMP_UInt, ts, te ); };
 		ruby_comment => { pass(); };
 		( s_literal | d_literal | host_re_literal ) 
-			=> { pass( IMP_Literal, tokstart, tokend ); };
+			=> { pass( IMP_Literal, ts, te ); };
 
 		'%%{' => { 
 			updateCol();
@@ -1021,16 +1030,16 @@ void Scanner::endSection( )
 		};
 		whitespace+ => { pass(); };
 		EOF;
-		any => { pass( *tokstart, 0, 0 ); };
+		any => { pass( *ts, 0, 0 ); };
 	*|;
 
 	# Outside code scanner. These tokens get passed through.
 	main := |*
 		'define' => { pass( IMP_Define, 0, 0 ); };
-		ident => { pass( IMP_Word, tokstart, tokend ); };
-		number => { pass( IMP_UInt, tokstart, tokend ); };
+		ident => { pass( IMP_Word, ts, te ); };
+		number => { pass( IMP_UInt, ts, te ); };
 		c_cpp_comment => { pass(); };
-		( s_literal | d_literal ) => { pass( IMP_Literal, tokstart, tokend ); };
+		( s_literal | d_literal ) => { pass( IMP_Literal, ts, te ); };
 
 		'%%{' => { 
 			updateCol();
@@ -1046,7 +1055,7 @@ void Scanner::endSection( )
 		};
 		whitespace+ => { pass(); };
 		EOF;
-		any => { pass( *tokstart, 0, 0 ); };
+		any => { pass( *ts, 0, 0 ); };
 	*|;
 }%%
 
@@ -1056,7 +1065,6 @@ void Scanner::do_scan()
 {
 	int bufsize = 8;
 	char *buf = new char[bufsize];
-	const char last_char = 0;
 	int cs, act, have = 0;
 	int top;
 
@@ -1095,9 +1103,9 @@ void Scanner::do_scan()
 			space = bufsize - have;
 
 			/* Patch up pointers possibly in use. */
-			if ( tokstart != 0 )
-				tokstart = newbuf + ( tokstart - buf );
-			tokend = newbuf + ( tokend - buf );
+			if ( ts != 0 )
+				ts = newbuf + ( ts - buf );
+			te = newbuf + ( te - buf );
 
 			/* Copy the new buffer in. */
 			memcpy( newbuf, buf, have );
@@ -1107,14 +1115,15 @@ void Scanner::do_scan()
 
 		input.read( p, space );
 		int len = input.gcount();
+		char *pe = p + len;
 
-		/* If we see eof then append the EOF char. */
+		/* If we see eof then append the eof var. */
+		char *eof = 0;
 	 	if ( len == 0 ) {
-			p[0] = last_char, len = 1;
+			eof = pe;
 			execute = false;
 		}
 
-		char *pe = p + len;
 		%% write exec;
 
 		/* Check if we failed. */
@@ -1126,7 +1135,7 @@ void Scanner::do_scan()
 		}
 
 		/* Decide if we need to preserve anything. */
-		char *preserve = tokstart;
+		char *preserve = ts;
 
 		/* Now set up the prefix. */
 		if ( preserve == 0 )
@@ -1136,9 +1145,9 @@ void Scanner::do_scan()
 			have = pe - preserve;
 			memmove( buf, preserve, have );
 			unsigned int shiftback = preserve - buf;
-			if ( tokstart != 0 )
-				tokstart -= shiftback;
-			tokend -= shiftback;
+			if ( ts != 0 )
+				ts -= shiftback;
+			te -= shiftback;
 
 			preserve = buf;
 		}
diff --git a/redfsm/Makefile.in b/redfsm/Makefile.in
index d59eef1..bdc5ca7 100644
--- a/redfsm/Makefile.in
+++ b/redfsm/Makefile.in
@@ -58,7 +58,7 @@ xmlparse.cpp: xmlparse.kl xmlparse.kh
 	kelbt -o $@ $<
 
 xmlscan.cpp: xmlscan.rl
-	ragel xmlscan.rl | rlgen-cd -G2 -o xmlscan.cpp
+	ragel -G2 -o xmlscan.cpp xmlscan.rl
 
 xmltags.cpp: xmltags.gperf
 	gperf -L C++ -t $< > $@
diff --git a/redfsm/xmlscan.rl b/redfsm/xmlscan.rl
index 0976ed4..d1eeb72 100644
--- a/redfsm/xmlscan.rl
+++ b/redfsm/xmlscan.rl
@@ -72,7 +72,7 @@ struct Scanner
 
 	/* Scanner State. */
 	int cs, act, have, curline, curcol;
-	char *tokstart, *tokend;
+	char *ts, *te;
 	char *p, *pe;
 	int done;
 
@@ -97,7 +97,7 @@ struct Scanner
 #define TK_OpenTag 4
 #define TK_CloseTag 5
 
-#define ret_tok( _tok ) token = (_tok); data = tokstart
+#define ret_tok( _tok ) token = (_tok); data = ts
 
 void Scanner::adjustAttrPointers( int distance )
 {
@@ -193,21 +193,21 @@ int Scanner::scan( )
 		if ( p == pe ) {
 			//printf("scanner: need more data\n");
 
-			if ( tokstart == 0 )
+			if ( ts == 0 )
 				have = 0;
 			else {
 				/* There is data that needs to be shifted over. */
 				//printf("scanner: buffer broken mid token\n");
-				have = pe - tokstart;
-				memmove( buf, tokstart, have );
+				have = pe - ts;
+				memmove( buf, ts, have );
 
-				int distance = tokstart - buf;
-				tokend -= distance;
+				int distance = ts - buf;
+				te -= distance;
 				tag_id_start -= distance;
 				attr_id_start -= distance;
 				attr_value_start -= distance;
 				adjustAttrPointers( distance );
-				tokstart = buf;
+				ts = buf;
 			}
 
 			p = buf + have;
@@ -241,8 +241,6 @@ int Scanner::scan( )
 			return TK_ERR;
 
 		if ( token != TK_NO_TOKEN ) {
-			/* fbreak does not advance p, so we do it manually. */
-			p = p + 1;
 			data_len = p - data;
 			return token;
 		}