/*
- * Copyright 2006-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ * Copyright 2006-2007 Adrian Thurston <thurston@complang.org>
*/
/* This file is part of Ragel.
#include <string.h>
#include "ragel.h"
-#include "rlparse.h"
-#include "parsedata.h"
-#include "avltree.h"
-#include "vector.h"
+#include "rlscan.h"
+#include "inputdata.h"
+
+//#define LOG_TOKENS
using std::ifstream;
using std::istream;
using std::cerr;
using std::endl;
-extern char *Parser_lelNames[];
+enum InlineBlockType
+{
+ CurlyDelimited,
+ SemiTerminated
+};
+
+#ifdef _WIN32
+#define PATH_SEP '\\'
+#else
+#define PATH_SEP '/'
+#endif
+
+
+/*
+ * The Scanner for Importing
+ */
+
+%%{
+ machine inline_token_scan;
+ alphtype int;
+ access tok_;
+
+ # Import scanner tokens.
+ import "rlparse.h";
+
+ main := |*
+ # Define of number.
+ IMP_Define IMP_Word IMP_UInt => {
+ int base = tok_ts - token_data;
+ int nameOff = 1;
+ int numOff = 2;
+
+ directToParser( inclToParser, fileName, line, column, TK_Word,
+ token_strings[base+nameOff], token_lens[base+nameOff] );
+ directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+ directToParser( inclToParser, fileName, line, column, TK_UInt,
+ token_strings[base+numOff], token_lens[base+numOff] );
+ directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+ };
-/* This is used for tracking the current stack of include file/machine pairs. It is
- * is used to detect and recursive include structure. */
-struct IncludeStackItem
+ # Assignment of number.
+ IMP_Word '=' IMP_UInt => {
+ int base = tok_ts - token_data;
+ int nameOff = 0;
+ int numOff = 2;
+
+ directToParser( inclToParser, fileName, line, column, TK_Word,
+ token_strings[base+nameOff], token_lens[base+nameOff] );
+ directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+ directToParser( inclToParser, fileName, line, column, TK_UInt,
+ token_strings[base+numOff], token_lens[base+numOff] );
+ directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+ };
+
+ # Define of literal.
+ IMP_Define IMP_Word IMP_Literal => {
+ int base = tok_ts - token_data;
+ int nameOff = 1;
+ int litOff = 2;
+
+ directToParser( inclToParser, fileName, line, column, TK_Word,
+ token_strings[base+nameOff], token_lens[base+nameOff] );
+ directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+ directToParser( inclToParser, fileName, line, column, TK_Literal,
+ token_strings[base+litOff], token_lens[base+litOff] );
+ directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+ };
+
+ # Assignment of literal.
+ IMP_Word '=' IMP_Literal => {
+ int base = tok_ts - token_data;
+ int nameOff = 0;
+ int litOff = 2;
+
+ directToParser( inclToParser, fileName, line, column, TK_Word,
+ token_strings[base+nameOff], token_lens[base+nameOff] );
+ directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+ directToParser( inclToParser, fileName, line, column, TK_Literal,
+ token_strings[base+litOff], token_lens[base+litOff] );
+ directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+ };
+
+ # Catch everything else.
+ any;
+ *|;
+}%%
+
+%% write data;
+
+void Scanner::flushImport()
{
- IncludeStackItem( char *fileName, char *sectionName )
- : fileName(fileName), sectionName(sectionName) {}
+ int *p = token_data;
+ int *pe = token_data + cur_token;
+ int *eof = 0;
- char *fileName;
- char *sectionName;
-};
+ %%{
+ machine inline_token_scan;
+ write init;
+ write exec;
+ }%%
-typedef Vector<IncludeStackItem> IncludeStack;
+ if ( tok_ts == 0 )
+ cur_token = 0;
+ else {
+ cur_token = pe - tok_ts;
+ int ts_offset = tok_ts - token_data;
+ memmove( token_data, token_data+ts_offset, cur_token*sizeof(token_data[0]) );
+ memmove( token_strings, token_strings+ts_offset, cur_token*sizeof(token_strings[0]) );
+ memmove( token_lens, token_lens+ts_offset, cur_token*sizeof(token_lens[0]) );
+ }
+}
-enum InlineBlockType
+void Scanner::directToParser( Parser *toParser, const char *tokFileName, int tokLine,
+ int tokColumn, int type, char *tokdata, int toklen )
{
- CurlyDelimited,
- SemiTerminated
-};
+ InputLoc loc;
+
+ #ifdef LOG_TOKENS
+ cerr << "scanner:" << tokLine << ":" << tokColumn <<
+ ": sending token to the parser " << Parser_lelNames[type];
+ cerr << " " << toklen;
+ if ( tokdata != 0 )
+ cerr << " " << tokdata;
+ cerr << endl;
+ #endif
+
+ loc.fileName = tokFileName;
+ loc.line = tokLine;
+ loc.col = tokColumn;
+
+ toParser->token( loc, type, tokdata, toklen );
+}
-struct Scanner
+void Scanner::importToken( int token, char *start, char *end )
{
- Scanner( char *fileName, istream &input, ostream &output,
- Parser *inclToParser, char *inclSectionTarg,
- int includeDepth )
- :
- fileName(fileName), input(input), output(output),
- inclToParser(inclToParser),
- inclSectionTarg(inclSectionTarg),
- includeDepth(includeDepth),
- line(1), column(1), lastnl(0),
- parser(0), active(false),
- parserExistsError(false),
- whitespaceOn(true)
- {}
-
- bool recursiveInclude( char *inclFileName, char *inclSectionName );
-
- char *prepareFileName( char *fileName, int len )
- {
- bool caseInsensitive;
- Token tokenFnStr, tokenRes;
- tokenFnStr.data = fileName;
- tokenFnStr.length = len;
- tokenFnStr.prepareLitString( tokenRes, caseInsensitive );
- return tokenRes.data;
+ if ( cur_token == max_tokens )
+ flushImport();
+
+ token_data[cur_token] = token;
+ if ( start == 0 ) {
+ token_strings[cur_token] = 0;
+ token_lens[cur_token] = 0;
}
+ else {
+ int toklen = end-start;
+ token_lens[cur_token] = toklen;
+ token_strings[cur_token] = new char[toklen+1];
+ memcpy( token_strings[cur_token], start, toklen );
+ token_strings[cur_token][toklen] = 0;
+ }
+ cur_token++;
+}
- void init();
- void token( int type, char *start, char *end );
- void token( int type, char c );
- void token( int type );
- void updateCol();
- void startSection();
- void endSection();
- void do_scan();
- bool parserExists();
- ostream &scan_error();
-
- char *fileName;
- istream &input;
- ostream &output;
- Parser *inclToParser;
- char *inclSectionTarg;
- int includeDepth;
-
- int cs;
- int line;
- char *word, *lit;
- int word_len, lit_len;
- InputLoc sectionLoc;
- char *tokstart, *tokend;
- int column;
- char *lastnl;
-
- /* Set by machine statements, these persist from section to section
- * allowing for unnamed sections. */
- Parser *parser;
- bool active;
- IncludeStack includeStack;
-
- /* This is set if ragel has already emitted an error stating that
- * no section name has been seen and thus no parser exists. */
- bool parserExistsError;
-
- /* This is for inline code. By default it is on. It goes off for
- * statements and values in inline blocks which are parsed. */
- bool whitespaceOn;
-};
+void Scanner::pass( int token, char *start, char *end )
+{
+ if ( importMachines )
+ importToken( token, start, end );
+ pass();
+}
+
+void Scanner::pass()
+{
+ updateCol();
+
+ /* If no errors and we are at the bottom of the include stack (the
+ * source file listed on the command line) then write out the data. */
+ if ( includeDepth == 0 && machineSpec == 0 && machineName == 0 )
+ id.inputItems.tail->data.write( ts, te-ts );
+}
+
+/*
+ * The scanner for processing sections, includes, imports, etc.
+ */
%%{
machine section_parse;
write data;
}%%
+
void Scanner::init( )
{
%% write init;
}
-bool Scanner::parserExists()
+bool Scanner::active()
{
- if ( parser != 0 )
- return true;
+ if ( ignoreSection )
+ return false;
- if ( ! parserExistsError ) {
- scan_error() << "include: there is no previous specification name" << endl;
+ if ( parser == 0 && ! parserExistsError ) {
+ scan_error() << "this specification has no name, nor does any previous"
+ " specification" << endl;
parserExistsError = true;
}
- return false;
+
+ if ( parser == 0 )
+ return false;
+
+ return true;
}
ostream &Scanner::scan_error()
{
/* Maintain the error count. */
gblErrorCount += 1;
- cerr << fileName << ":" << line << ":" << column << ": ";
+ cerr << makeInputLoc( fileName, line, column ) << ": ";
return cerr;
}
-bool Scanner::recursiveInclude( char *inclFileName, char *inclSectionName )
+/* An approximate check for duplicate includes. Due to aliasing of files it's
+ * possible for duplicates to creep in. */
+bool Scanner::duplicateInclude( char *inclFileName, char *inclSectionName )
{
- for ( IncludeStack::Iter si = includeStack; si.lte(); si++ ) {
- if ( strcmp( si->fileName, inclFileName ) == 0 &&
- strcmp( si->sectionName, inclSectionName ) == 0 )
+ for ( IncludeHistory::Iter hi = parser->includeHistory; hi.lte(); hi++ ) {
+ if ( strcmp( hi->fileName, inclFileName ) == 0 &&
+ strcmp( hi->sectionName, inclSectionName ) == 0 )
{
return true;
}
{
char *from = lastnl;
if ( from == 0 )
- from = tokstart;
- //cerr << "adding " << tokend - from << " to column" << endl;
- column += tokend - from;
+ from = ts;
+ //cerr << "adding " << te - from << " to column" << endl;
+ column += te - from;
lastnl = 0;
}
-void Scanner::token( int type, char c )
+void Scanner::handleMachine()
{
- token( type, &c, &c + 1 );
+ /* Assign a name to the machine. */
+ char *machine = word;
+
+ if ( !importMachines && inclSectionTarg == 0 ) {
+ ignoreSection = false;
+
+ ParserDictEl *pdEl = id.parserDict.find( machine );
+ if ( pdEl == 0 ) {
+ pdEl = new ParserDictEl( machine );
+ pdEl->value = new Parser( fileName, machine, sectionLoc );
+ pdEl->value->init();
+ id.parserDict.insert( pdEl );
+ id.parserList.append( pdEl->value );
+ }
+
+ parser = pdEl->value;
+ }
+ else if ( !importMachines && strcmp( inclSectionTarg, machine ) == 0 ) {
+ /* found include target */
+ ignoreSection = false;
+ parser = inclToParser;
+ }
+ else {
+ /* ignoring section */
+ ignoreSection = true;
+ parser = 0;
+ }
}
-void Scanner::token( int type )
+void Scanner::handleInclude()
{
- token( type, 0, 0 );
+ if ( active() ) {
+ char *inclSectionName = word;
+ char **includeChecks = 0;
+
+ /* Implement defaults for the input file and section name. */
+ if ( inclSectionName == 0 )
+ inclSectionName = parser->sectionName;
+
+ if ( lit != 0 )
+ includeChecks = makeIncludePathChecks( fileName, lit, lit_len );
+ else {
+ char *test = new char[strlen(fileName)+1];
+ strcpy( test, fileName );
+
+ includeChecks = new char*[2];
+
+ includeChecks[0] = test;
+ includeChecks[1] = 0;
+ }
+
+ long found = 0;
+ ifstream *inFile = tryOpenInclude( includeChecks, found );
+ if ( inFile == 0 ) {
+ scan_error() << "include: failed to locate file" << endl;
+ char **tried = includeChecks;
+ while ( *tried != 0 )
+ scan_error() << "include: attempted: \"" << *tried++ << '\"' << endl;
+ }
+ else {
+ /* Don't include anything that's already been included. */
+ if ( !duplicateInclude( includeChecks[found], inclSectionName ) ) {
+ parser->includeHistory.append( IncludeHistoryItem(
+ includeChecks[found], inclSectionName ) );
+
+ Scanner scanner( id, includeChecks[found], *inFile, parser,
+ inclSectionName, includeDepth+1, false );
+ scanner.do_scan( );
+ delete inFile;
+ }
+ }
+ }
+}
+
+void Scanner::handleImport()
+{
+ if ( active() ) {
+ char **importChecks = makeIncludePathChecks( fileName, lit, lit_len );
+
+ /* Open the input file for reading. */
+ long found = 0;
+ ifstream *inFile = tryOpenInclude( importChecks, found );
+ if ( inFile == 0 ) {
+ scan_error() << "import: could not open import file " <<
+ "for reading" << endl;
+ char **tried = importChecks;
+ while ( *tried != 0 )
+ scan_error() << "import: attempted: \"" << *tried++ << '\"' << endl;
+ }
+
+ Scanner scanner( id, importChecks[found], *inFile, parser,
+ 0, includeDepth+1, true );
+ scanner.do_scan( );
+ scanner.importToken( 0, 0, 0 );
+ scanner.flushImport();
+ delete inFile;
+ }
}
%%{
machine section_parse;
- # This relies on the the kelbt implementation and the order
- # that tokens are declared.
- KW_Machine = 128;
- KW_Include = 129;
- KW_Write = 130;
- TK_Word = 131;
- TK_Literal = 132;
+ # Need the defines representing tokens.
+ import "rlparse.h";
action clear_words { word = lit = 0; word_len = lit_len = 0; }
action store_word { word = tokdata; word_len = toklen; }
action mach_err { scan_error() << "bad machine statement" << endl; }
action incl_err { scan_error() << "bad include statement" << endl; }
+ action import_err { scan_error() << "bad import statement" << endl; }
action write_err { scan_error() << "bad write statement" << endl; }
- action handle_machine
- {
- /* Assign a name to the machine. */
- char *machine = word;
-
- if ( inclSectionTarg == 0 ) {
- active = true;
-
- ParserDictEl *pdEl = parserDict.find( machine );
- if ( pdEl == 0 ) {
- pdEl = new ParserDictEl( machine );
- pdEl->value = new Parser( fileName, machine, sectionLoc );
- pdEl->value->init();
- parserDict.insert( pdEl );
- }
-
- parser = pdEl->value;
- }
- else if ( strcmp( inclSectionTarg, machine ) == 0 ) {
- /* found include target */
- active = true;
- parser = inclToParser;
- }
- else {
- /* ignoring section */
- active = false;
- parser = 0;
- }
- }
+ action handle_machine { handleMachine(); }
+ action handle_include { handleInclude(); }
+ action handle_import { handleImport(); }
machine_stmt =
( KW_Machine TK_Word @store_word ';' ) @handle_machine
<>err mach_err <>eof mach_err;
- action handle_include
- {
- if ( active && parserExists() ) {
- char *inclSectionName = word;
- char *inclFileName = 0;
-
- /* Implement defaults for the input file and section name. */
- if ( inclSectionName == 0 )
- inclSectionName = parser->sectionName;
-
- if ( lit != 0 )
- inclFileName = prepareFileName( lit, lit_len );
- else
- inclFileName = fileName;
-
- /* Check for a recursive include structure. Add the current file/section
- * name then check if what we are including is already in the stack. */
- includeStack.append( IncludeStackItem( fileName, parser->sectionName ) );
-
- if ( recursiveInclude( inclFileName, inclSectionName ) )
- scan_error() << "include: this is a recursive include operation" << endl;
- else {
- /* Open the input file for reading. */
- ifstream *inFile = new ifstream( inclFileName );
- if ( ! inFile->is_open() ) {
- scan_error() << "include: could not open " <<
- inclFileName << " for reading" << endl;
- }
-
- Scanner scanner( inclFileName, *inFile, output, parser,
- inclSectionName, includeDepth+1 );
- scanner.init();
- scanner.do_scan( );
- delete inFile;
- }
-
- /* Remove the last element (len-1) */
- includeStack.remove( -1 );
- }
- }
-
include_names = (
TK_Word @store_word ( TK_Literal @store_lit )? |
TK_Literal @store_lit
( KW_Include include_names ';' ) @handle_include
<>err incl_err <>eof incl_err;
+ import_stmt =
+ ( KW_Import TK_Literal @store_lit ';' ) @handle_import
+ <>err import_err <>eof import_err;
+
action write_command
{
- if ( active && machineSpec == 0 && machineName == 0 ) {
- output << "<write"
- " def_name=\"" << parser->sectionName << "\""
- " line=\"" << line << "\""
- " col=\"" << column << "\""
- ">";
+ if ( active() && machineSpec == 0 && machineName == 0 ) {
+ InputItem *inputItem = new InputItem;
+ inputItem->type = InputItem::Write;
+ inputItem->loc.fileName = fileName;
+ inputItem->loc.line = line;
+ inputItem->loc.col = column;
+ inputItem->name = parser->sectionName;
+ inputItem->pd = parser->pd;
+ id.inputItems.append( inputItem );
}
}
action write_arg
{
- if ( active && machineSpec == 0 && machineName == 0 )
- output << "<arg>" << tokdata << "</arg>";
+ if ( active() && machineSpec == 0 && machineName == 0 )
+ id.inputItems.tail->writeArgs.append( strdup(tokdata) );
}
action write_close
{
- if ( active && machineSpec == 0 && machineName == 0 )
- output << "</write>\n";
+ if ( active() && machineSpec == 0 && machineName == 0 )
+ id.inputItems.tail->writeArgs.append( 0 );
}
write_stmt =
action handle_token
{
/* Send the token off to the parser. */
- if ( active && parserExists() ) {
- InputLoc loc;
-
- #if 0
- cerr << "scanner:" << line << ":" << column <<
- ": sending token to the parser " << Parser_lelNames[*p];
- cerr << " " << toklen;
- if ( tokdata != 0 )
- cerr << " " << tokdata;
- cerr << endl;
- #endif
-
- loc.fileName = fileName;
- loc.line = line;
- loc.col = column;
-
- parser->token( loc, type, tokdata, toklen );
- }
+ if ( active() )
+ directToParser( parser, fileName, line, column, type, tokdata, toklen );
}
# Catch everything else.
- everything_else = ^( KW_Machine | KW_Include | KW_Write ) @handle_token;
+ everything_else =
+ ^( KW_Machine | KW_Include | KW_Import | KW_Write ) @handle_token;
main := (
machine_stmt |
include_stmt |
+ import_stmt |
write_stmt |
everything_else
)*;
}%%
+void Scanner::token( int type, char c )
+{
+ token( type, &c, &c + 1 );
+}
+
+void Scanner::token( int type )
+{
+ token( type, 0, 0 );
+}
+
void Scanner::token( int type, char *start, char *end )
{
char *tokdata = 0;
int toklen = 0;
- int *p = &type;
- int *pe = &type + 1;
-
if ( start != 0 ) {
toklen = end-start;
tokdata = new char[toklen+1];
tokdata[toklen] = 0;
}
+ processToken( type, tokdata, toklen );
+}
+
+void Scanner::processToken( int type, char *tokdata, int toklen )
+{
+ int *p, *pe, *eof;
+
+ if ( type < 0 )
+ p = pe = eof = 0;
+ else {
+ p = &type;
+ pe = &type + 1;
+ eof = 0;
+ }
+
%%{
machine section_parse;
write exec;
}%%
updateCol();
+
+ /* Record the last token for use in controlling the scan of subsequent
+ * tokens. */
+ lastToken = type;
}
void Scanner::startSection( )
{
parserExistsError = false;
- if ( includeDepth == 0 ) {
- if ( machineSpec == 0 && machineName == 0 )
- output << "</host>\n";
- }
-
sectionLoc.fileName = fileName;
sectionLoc.line = line;
- sectionLoc.col = 0;
+ sectionLoc.col = column;
}
void Scanner::endSection( )
{
/* Execute the eof actions for the section parser. */
- %%{
- machine section_parse;
- write eof;
- }%%
+ processToken( -1, 0, 0 );
/* Close off the section with the parser. */
- if ( active && parserExists() ) {
+ if ( active() ) {
InputLoc loc;
loc.fileName = fileName;
loc.line = line;
- loc.col = 0;
+ loc.col = column;
parser->token( loc, TK_EndSection, 0, 0 );
}
if ( machineSpec == 0 && machineName == 0 ) {
/* The end section may include a newline on the end, so
* we use the last line, which will count the newline. */
- output << "<host line=\"" << line << "\">";
+ InputItem *inputItem = new InputItem;
+ inputItem->type = InputItem::HostData;
+ inputItem->loc.line = line;
+ inputItem->loc.col = column;
+ id.inputItems.append( inputItem );
+ }
+ }
+}
+
+bool isAbsolutePath( const char *path )
+{
+#ifdef _WIN32
+ return isalpha( path[0] ) && path[1] == ':' && path[2] == '\\';
+#else
+ return path[0] == '/';
+#endif
+}
+
+char **Scanner::makeIncludePathChecks( const char *thisFileName,
+ const char *fileName, int fnlen )
+{
+ char **checks = 0;
+ long nextCheck = 0;
+ long length = 0;
+ bool caseInsensitive = false;
+ char *data = prepareLitString( InputLoc(), fileName, fnlen,
+ length, caseInsensitive );
+
+ /* Absolute path? */
+ if ( isAbsolutePath( data ) ) {
+ checks = new char*[2];
+ checks[nextCheck++] = data;
+ }
+ else {
+ checks = new char*[2 + id.includePaths.length()];
+
+ /* Search from the the location of the current file. */
+ const char *lastSlash = strrchr( thisFileName, PATH_SEP );
+ if ( lastSlash == 0 )
+ checks[nextCheck++] = data;
+ else {
+ long givenPathLen = (lastSlash - thisFileName) + 1;
+ long checklen = givenPathLen + length;
+ char *check = new char[checklen+1];
+ memcpy( check, thisFileName, givenPathLen );
+ memcpy( check+givenPathLen, data, length );
+ check[checklen] = 0;
+ checks[nextCheck++] = check;
+ }
+
+ /* Search from the include paths given on the command line. */
+ for ( ArgsVector::Iter incp = id.includePaths; incp.lte(); incp++ ) {
+ long pathLen = strlen( *incp );
+ long checkLen = pathLen + 1 + length;
+ char *check = new char[checkLen+1];
+ memcpy( check, *incp, pathLen );
+ check[pathLen] = PATH_SEP;
+ memcpy( check+pathLen+1, data, length );
+ check[checkLen] = 0;
+ checks[nextCheck++] = check;
}
}
+
+ checks[nextCheck] = 0;
+ return checks;
+}
+
+ifstream *Scanner::tryOpenInclude( char **pathChecks, long &found )
+{
+ char **check = pathChecks;
+ ifstream *inFile = new ifstream;
+
+ while ( *check != 0 ) {
+ inFile->open( *check );
+ if ( inFile->is_open() ) {
+ found = check - pathChecks;
+ return inFile;
+ }
+ check += 1;
+ }
+
+ found = -1;
+ delete inFile;
+ return 0;
}
%%{
c_cpp_comment = c_comment | cpp_comment;
- # These literal forms are common to C-like host code and ragel.
+ ruby_comment = '#' [^\n]* NL;
+
+ # These literal forms are common to host code and ragel.
s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'";
d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"';
+ host_re_literal = '/' ([^/\\] | NL | '\\' (any | NL))* '/';
whitespace = [ \t] | NL;
pound_comment = '#' [^\n]* NL;
- # An inline block of code. This is specified as a scanned, but is sent to
- # the parser as one long block. The inline_block pointer is used to handle
- # the preservation of the data.
+ # An inline block of code for Ruby.
+ inline_code_ruby := |*
+ # Inline expression keywords.
+ "fpc" => { token( KW_PChar ); };
+ "fc" => { token( KW_Char ); };
+ "fcurs" => { token( KW_CurState ); };
+ "ftargs" => { token( KW_TargState ); };
+ "fentry" => {
+ whitespaceOn = false;
+ token( KW_Entry );
+ };
+
+ # Inline statement keywords.
+ "fhold" => {
+ whitespaceOn = false;
+ token( KW_Hold );
+ };
+ "fexec" => { token( KW_Exec, 0, 0 ); };
+ "fgoto" => {
+ whitespaceOn = false;
+ token( KW_Goto );
+ };
+ "fnext" => {
+ whitespaceOn = false;
+ token( KW_Next );
+ };
+ "fcall" => {
+ whitespaceOn = false;
+ token( KW_Call );
+ };
+ "fret" => {
+ whitespaceOn = false;
+ token( KW_Ret );
+ };
+ "fbreak" => {
+ whitespaceOn = false;
+ token( KW_Break );
+ };
+
+ ident => { token( TK_Word, ts, te ); };
+
+ number => { token( TK_UInt, ts, te ); };
+ hex_number => { token( TK_Hex, ts, te ); };
+
+ ( s_literal | d_literal | host_re_literal )
+ => { token( IL_Literal, ts, te ); };
+
+ whitespace+ => {
+ if ( whitespaceOn )
+ token( IL_WhiteSpace, ts, te );
+ };
+
+ ruby_comment => { token( IL_Comment, ts, te ); };
+
+ "::" => { token( TK_NameSep, ts, te ); };
+
+ # Some symbols need to go to the parser as with their cardinal value as
+ # the token type (as opposed to being sent as anonymous symbols)
+ # because they are part of the sequences which we interpret. The * ) ;
+ # symbols cause whitespace parsing to come back on. This gets turned
+ # off by some keywords.
+
+ ";" => {
+ whitespaceOn = true;
+ token( *ts, ts, te );
+ if ( inlineBlockType == SemiTerminated )
+ fret;
+ };
+
+ [*)] => {
+ whitespaceOn = true;
+ token( *ts, ts, te );
+ };
+
+ [,(] => { token( *ts, ts, te ); };
+
+ '{' => {
+ token( IL_Symbol, ts, te );
+ curly_count += 1;
+ };
+
+ '}' => {
+ if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) {
+ /* Inline code block ends. */
+ token( '}' );
+ fret;
+ }
+ else {
+ /* Either a semi terminated inline block or only the closing
+ * brace of some inner scope, not the block's closing brace. */
+ token( IL_Symbol, ts, te );
+ }
+ };
+
+ EOF => {
+ scan_error() << "unterminated code block" << endl;
+ };
+
+ # Send every other character as a symbol.
+ any => { token( IL_Symbol, ts, te ); };
+ *|;
+
+
+ # An inline block of code for languages other than Ruby.
inline_code := |*
# Inline expression keywords.
"fpc" => { token( KW_PChar ); };
token( KW_Break );
};
- ident => { token( TK_Word, tokstart, tokend ); };
+ ident => { token( TK_Word, ts, te ); };
- number => { token( TK_UInt, tokstart, tokend ); };
- hex_number => { token( TK_Hex, tokstart, tokend ); };
+ number => { token( TK_UInt, ts, te ); };
+ hex_number => { token( TK_Hex, ts, te ); };
( s_literal | d_literal )
- => { token( IL_Literal, tokstart, tokend ); };
+ => { token( IL_Literal, ts, te ); };
whitespace+ => {
if ( whitespaceOn )
- token( IL_WhiteSpace, tokstart, tokend );
+ token( IL_WhiteSpace, ts, te );
};
- c_cpp_comment => { token( IL_Comment, tokstart, tokend ); };
- "::" => { token( TK_NameSep, tokstart, tokend ); };
+ c_cpp_comment => { token( IL_Comment, ts, te ); };
+
+ "::" => { token( TK_NameSep, ts, te ); };
# Some symbols need to go to the parser as with their cardinal value as
# the token type (as opposed to being sent as anonymous symbols)
";" => {
whitespaceOn = true;
- token( *tokstart, tokstart, tokend );
+ token( *ts, ts, te );
if ( inlineBlockType == SemiTerminated )
- fgoto parser_def;
+ fret;
};
[*)] => {
whitespaceOn = true;
- token( *tokstart, tokstart, tokend );
+ token( *ts, ts, te );
};
- [,(] => { token( *tokstart, tokstart, tokend ); };
+ [,(] => { token( *ts, ts, te ); };
'{' => {
- token( IL_Symbol, tokstart, tokend );
+ token( IL_Symbol, ts, te );
curly_count += 1;
};
if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) {
/* Inline code block ends. */
token( '}' );
- fgoto parser_def;
+ fret;
}
else {
/* Either a semi terminated inline block or only the closing
* brace of some inner scope, not the block's closing brace. */
- token( IL_Symbol, tokstart, tokend );
+ token( IL_Symbol, ts, te );
}
};
};
# Send every other character as a symbol.
- any => { token( IL_Symbol, tokstart, tokend ); };
+ any => { token( IL_Symbol, ts, te ); };
*|;
or_literal := |*
'\\f' => { token( RE_Char, '\f' ); };
'\\r' => { token( RE_Char, '\r' ); };
'\\\n' => { updateCol(); };
- '\\' any => { token( RE_Char, tokstart+1, tokend ); };
+ '\\' any => { token( RE_Char, ts+1, te ); };
# Range dash in an OR expression.
'-' => { token( RE_Dash, 0, 0 ); };
};
# Characters in an OR expression.
- [^\]] => { token( RE_Char, tokstart, tokend ); };
+ [^\]] => { token( RE_Char, ts, te ); };
*|;
- re_literal := |*
+ ragel_re_literal := |*
# Escape sequences in regular expressions.
'\\0' => { token( RE_Char, '\0' ); };
'\\a' => { token( RE_Char, '\a' ); };
'\\f' => { token( RE_Char, '\f' ); };
'\\r' => { token( RE_Char, '\r' ); };
'\\\n' => { updateCol(); };
- '\\' any => { token( RE_Char, tokstart+1, tokend ); };
+ '\\' any => { token( RE_Char, ts+1, te ); };
# Terminate an OR expression.
'/' [i]? => {
- token( RE_Slash, tokstart, tokend );
+ token( RE_Slash, ts, te );
fgoto parser_def;
};
};
# Characters in an OR expression.
- [^\/] => { token( RE_Char, tokstart, tokend ); };
+ [^\/] => { token( RE_Char, ts, te ); };
*|;
# We need a separate token space here to avoid the ragel keywords.
write_statement := |*
- ident => { token( TK_Word, tokstart, tokend ); } ;
+ ident => { token( TK_Word, ts, te ); } ;
[ \t\n]+ => { updateCol(); };
';' => { token( ';' ); fgoto parser_def; };
# Parser definitions.
parser_def := |*
+ #'length_cond' => { token( KW_Length ); };
'machine' => { token( KW_Machine ); };
'include' => { token( KW_Include ); };
+ 'import' => { token( KW_Import ); };
'write' => {
token( KW_Write );
fgoto write_statement;
};
'action' => { token( KW_Action ); };
'alphtype' => { token( KW_AlphType ); };
+ 'prepush' => { token( KW_PrePush ); };
+ 'postpop' => { token( KW_PostPop ); };
# FIXME: Enable this post 5.17.
# 'range' => { token( KW_Range ); };
'getkey' => {
token( KW_GetKey );
inlineBlockType = SemiTerminated;
- fgoto inline_code;
+ if ( hostLang->lang == HostLang::Ruby )
+ fcall inline_code_ruby;
+ else
+ fcall inline_code;
};
'access' => {
token( KW_Access );
inlineBlockType = SemiTerminated;
- fgoto inline_code;
+ if ( hostLang->lang == HostLang::Ruby )
+ fcall inline_code_ruby;
+ else
+ fcall inline_code;
};
'variable' => {
token( KW_Variable );
inlineBlockType = SemiTerminated;
- fgoto inline_code;
+ if ( hostLang->lang == HostLang::Ruby )
+ fcall inline_code_ruby;
+ else
+ fcall inline_code;
};
'when' => { token( KW_When ); };
+ 'inwhen' => { token( KW_InWhen ); };
+ 'outwhen' => { token( KW_OutWhen ); };
'eof' => { token( KW_Eof ); };
'err' => { token( KW_Err ); };
'lerr' => { token( KW_Lerr ); };
'to' => { token( KW_To ); };
'from' => { token( KW_From ); };
+ 'export' => { token( KW_Export ); };
# Identifiers.
- ident => { token( TK_Word, tokstart, tokend ); } ;
+ ident => { token( TK_Word, ts, te ); } ;
# Numbers
- number => { token( TK_UInt, tokstart, tokend ); };
- hex_number => { token( TK_Hex, tokstart, tokend ); };
+ number => { token( TK_UInt, ts, te ); };
+ hex_number => { token( TK_Hex, ts, te ); };
# Literals, with optionals.
( s_literal | d_literal ) [i]?
- => { token( TK_Literal, tokstart, tokend ); };
+ => { token( TK_Literal, ts, te ); };
'[' => { token( RE_SqOpen ); fcall or_literal; };
'[^' => { token( RE_SqOpenNeg ); fcall or_literal; };
- '/' => { token( RE_Slash ); fgoto re_literal; };
+ '/' => { token( RE_Slash ); fgoto ragel_re_literal; };
# Ignore.
pound_comment => { updateCol(); };
# Opening of longest match.
"|*" => { token( TK_BarStar ); };
+ # Separater for name references.
+ "::" => { token( TK_NameSep, ts, te ); };
+
'}%%' => {
updateCol();
endSection();
- fgoto main;
+ fret;
};
[ \t\r]+ => { updateCol(); };
updateCol();
if ( singleLineSpec ) {
endSection();
- fgoto main;
+ fret;
}
};
'{' => {
- token( '{' );
- curly_count = 1;
- inlineBlockType = CurlyDelimited;
- fgoto inline_code;
+ if ( lastToken == KW_Export || lastToken == KW_Entry )
+ token( '{' );
+ else {
+ token( '{' );
+ curly_count = 1;
+ inlineBlockType = CurlyDelimited;
+ if ( hostLang->lang == HostLang::Ruby )
+ fcall inline_code_ruby;
+ else
+ fcall inline_code;
+ }
};
EOF => {
scan_error() << "unterminated ragel section" << endl;
};
- any => { token( *tokstart ); } ;
+ any => { token( *ts ); } ;
*|;
- action pass {
- updateCol();
+ # Outside code scanner. These tokens get passed through.
+ main_ruby := |*
+ ident => { pass( IMP_Word, ts, te ); };
+ number => { pass( IMP_UInt, ts, te ); };
+ ruby_comment => { pass(); };
+ ( s_literal | d_literal | host_re_literal )
+ => { pass( IMP_Literal, ts, te ); };
- /* If no errors and we are at the bottom of the include stack (the
- * source file listed on the command line) then write out the data. */
- if ( includeDepth == 0 && machineSpec == 0 && machineName == 0 )
- xmlEscapeHost( output, tokstart, tokend-tokstart );
- }
+ '%%{' => {
+ updateCol();
+ singleLineSpec = false;
+ startSection();
+ fcall parser_def;
+ };
+ '%%' => {
+ updateCol();
+ singleLineSpec = true;
+ startSection();
+ fcall parser_def;
+ };
+ whitespace+ => { pass(); };
+ EOF;
+ any => { pass( *ts, 0, 0 ); };
+ *|;
# Outside code scanner. These tokens get passed through.
main := |*
- ident => pass;
- number => pass;
- c_cpp_comment => pass;
- s_literal | d_literal => pass;
+ 'define' => { pass( IMP_Define, 0, 0 ); };
+ ident => { pass( IMP_Word, ts, te ); };
+ number => { pass( IMP_UInt, ts, te ); };
+ c_cpp_comment => { pass(); };
+ ( s_literal | d_literal ) => { pass( IMP_Literal, ts, te ); };
+
'%%{' => {
updateCol();
singleLineSpec = false;
startSection();
- fgoto parser_def;
+ fcall parser_def;
};
'%%' => {
updateCol();
singleLineSpec = true;
startSection();
- fgoto parser_def;
+ fcall parser_def;
};
- whitespace+ => pass;
+ whitespace+ => { pass(); };
EOF;
- any => pass;
+ any => { pass( *ts, 0, 0 ); };
*|;
-
}%%
%% write data;
{
int bufsize = 8;
char *buf = new char[bufsize];
- const char last_char = 0;
int cs, act, have = 0;
- int top, stack[1];
+ int top;
+
+ /* The stack is two deep, one level for going into ragel defs from the main
+ * machines which process outside code, and another for going into or literals
+ * from either a ragel spec, or a regular expression. */
+ int stack[2];
int curly_count = 0;
bool execute = true;
bool singleLineSpec = false;
- InlineBlockType inlineBlockType;
+ InlineBlockType inlineBlockType = CurlyDelimited;
+ /* Init the section parser and the character scanner. */
+ init();
%% write init;
+ /* Set up the start state. FIXME: After 5.20 is released the nocs write
+ * init option should be used, the main machine eliminated and this statement moved
+ * above the write init. */
+ if ( hostLang->lang == HostLang::Ruby )
+ cs = rlscan_en_main_ruby;
+ else
+ cs = rlscan_en_main;
+
while ( execute ) {
char *p = buf + have;
int space = bufsize - have;
space = bufsize - have;
/* Patch up pointers possibly in use. */
- if ( tokstart != 0 )
- tokstart = newbuf + ( tokstart - buf );
- tokend = newbuf + ( tokend - buf );
+ if ( ts != 0 )
+ ts = newbuf + ( ts - buf );
+ te = newbuf + ( te - buf );
/* Copy the new buffer in. */
memcpy( newbuf, buf, have );
input.read( p, space );
int len = input.gcount();
+ char *pe = p + len;
- /* If we see eof then append the EOF char. */
+ /* If we see eof then append the eof var. */
+ char *eof = 0;
if ( len == 0 ) {
- p[0] = last_char, len = 1;
+ eof = pe;
execute = false;
}
- char *pe = p + len;
%% write exec;
/* Check if we failed. */
}
/* Decide if we need to preserve anything. */
- char *preserve = tokstart;
+ char *preserve = ts;
/* Now set up the prefix. */
if ( preserve == 0 )
have = pe - preserve;
memmove( buf, preserve, have );
unsigned int shiftback = preserve - buf;
- if ( tokstart != 0 )
- tokstart -= shiftback;
- tokend -= shiftback;
+ if ( ts != 0 )
+ ts -= shiftback;
+ te -= shiftback;
preserve = buf;
}
delete[] buf;
}
-
-void scan( char *fileName, istream &input, ostream &output )
-{
- Scanner scanner( fileName, input, output, 0, 0, 0 );
- scanner.init();
- scanner.do_scan();
-
- InputLoc eofLoc;
- eofLoc.fileName = fileName;
- eofLoc.col = 1;
- eofLoc.line = scanner.line;
-}