/*
- * Copyright 2006-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ * Copyright 2006-2007 Adrian Thurston <thurston@complang.org>
*/
/* This file is part of Ragel.
#include "ragel.h"
#include "rlscan.h"
+#include "inputdata.h"
+
+//#define LOG_TOKENS
using std::ifstream;
using std::istream;
using std::cerr;
using std::endl;
+enum InlineBlockType
+{
+ CurlyDelimited,
+ SemiTerminated
+};
+
+#ifdef _WIN32
+#define PATH_SEP '\\'
+#else
+#define PATH_SEP '/'
+#endif
+
+
/*
* The Scanner for Importing
*/
-#define IMP_Word 128
-#define IMP_Literal 129
-#define IMP_Number 130
-#define IMP_Define 131
-
%%{
machine inline_token_scan;
alphtype int;
access tok_;
- IMP_Word = 128;
- IMP_Literal = 129;
- IMP_Number = 130;
- IMP_Define = 131;
+ # Import scanner tokens.
+ import "rlparse.h";
main := |*
- IMP_Define IMP_Word IMP_Number => { cerr << ( "define" ) << endl; };
- IMP_Word '=' IMP_Number => { cerr << ( "const1" ) << endl; };
- IMP_Word '=' IMP_Literal => { cerr << ( "const2" ) << endl; };
- any;
- *|;
-}%%
-
-%% write data;
-
-void ImportScanner::token( int token, char *start, char *end )
-{
- if ( cur_token == max_tokens ) {
- int *p = token_data;
- int *pe = token_data + cur_token;
-
- %% write init;
- %% write exec;
-
- if ( tok_tokstart == 0 )
- cur_token = 0;
- else {
- cerr << "BLOCK BREAK" << endl;
- cur_token = pe - tok_tokstart;
- memmove( token_data, tok_tokstart, cur_token*sizeof(int) );
- }
- }
-
- token_data[cur_token++] = token;
-}
-
-%%{
- machine inline_scan;
- access chr_;
-
- # This is sent by the driver code.
- EOF = 0;
- NL = '\n';
-
- # Identifiers, numbers, commetns, and other common things.
- ident = ( alpha | '_' ) ( alpha |digit |'_' )*;
- number = digit+;
- hex_number = '0x' [0-9a-fA-F]+;
-
- c_comment =
- '/*' ( any | NL )* :>> '*/';
-
- cpp_comment =
- '//' [^\n]* NL;
-
- c_cpp_comment = c_comment | cpp_comment;
+ # Define of number.
+ IMP_Define IMP_Word IMP_UInt => {
+ int base = tok_ts - token_data;
+ int nameOff = 1;
+ int numOff = 2;
+
+ directToParser( inclToParser, fileName, line, column, TK_Word,
+ token_strings[base+nameOff], token_lens[base+nameOff] );
+ directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+ directToParser( inclToParser, fileName, line, column, TK_UInt,
+ token_strings[base+numOff], token_lens[base+numOff] );
+ directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+ };
- # These literal forms are common to C-like host code and ragel.
- s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'";
- d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"';
+ # Assignment of number.
+ IMP_Word '=' IMP_UInt => {
+ int base = tok_ts - token_data;
+ int nameOff = 0;
+ int numOff = 2;
+
+ directToParser( inclToParser, fileName, line, column, TK_Word,
+ token_strings[base+nameOff], token_lens[base+nameOff] );
+ directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+ directToParser( inclToParser, fileName, line, column, TK_UInt,
+ token_strings[base+numOff], token_lens[base+numOff] );
+ directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+ };
- whitespace = [ \t] | NL;
+ # Define of literal.
+ IMP_Define IMP_Word IMP_Literal => {
+ int base = tok_ts - token_data;
+ int nameOff = 1;
+ int litOff = 2;
+
+ directToParser( inclToParser, fileName, line, column, TK_Word,
+ token_strings[base+nameOff], token_lens[base+nameOff] );
+ directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+ directToParser( inclToParser, fileName, line, column, TK_Literal,
+ token_strings[base+litOff], token_lens[base+litOff] );
+ directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+ };
+ # Assignment of literal.
+ IMP_Word '=' IMP_Literal => {
+ int base = tok_ts - token_data;
+ int nameOff = 0;
+ int litOff = 2;
+
+ directToParser( inclToParser, fileName, line, column, TK_Word,
+ token_strings[base+nameOff], token_lens[base+nameOff] );
+ directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
+ directToParser( inclToParser, fileName, line, column, TK_Literal,
+ token_strings[base+litOff], token_lens[base+litOff] );
+ directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
+ };
- # Outside code scanner. These tokens get passed through.
- main := |*
- 'define' => { token( IMP_Define, 0, 0 ); };
- ident => { token( IMP_Word, chr_tokstart, chr_tokend ); };
- number => { token( IMP_Number, chr_tokstart, chr_tokend ); };
- c_cpp_comment;
- s_literal | d_literal => { token( IMP_Literal, chr_tokstart, chr_tokend ); };
- whitespace+;
- EOF;
- any => { token( *chr_tokstart, 0, 0 ); };
+ # Catch everything else.
+ any;
*|;
}%%
%% write data;
-void ImportScanner::do_scan()
+void Scanner::flushImport()
{
- int bufsize = 8;
- char *buf = new char[bufsize];
- const char last_char = 0;
- int chr_cs, chr_act, have = 0;
- bool execute = true;
-
- /* Init the section parser and the character scanner. */
- %% write init;
-
- while ( execute ) {
- char *p = buf + have;
- int space = bufsize - have;
-
- if ( space == 0 ) {
- /* We filled up the buffer trying to scan a token. Grow it. */
- bufsize = bufsize * 2;
- char *newbuf = new char[bufsize];
-
- /* Recompute p and space. */
- p = newbuf + have;
- space = bufsize - have;
-
- /* Patch up pointers possibly in use. */
- if ( chr_tokstart != 0 )
- chr_tokstart = newbuf + ( chr_tokstart - buf );
- chr_tokend = newbuf + ( chr_tokend - buf );
-
- /* Copy the new buffer in. */
- memcpy( newbuf, buf, have );
- delete[] buf;
- buf = newbuf;
- }
-
- input.read( p, space );
- int len = input.gcount();
-
- /* If we see eof then append the EOF char. */
- if ( len == 0 ) {
- p[0] = last_char, len = 1;
- execute = false;
- }
+ int *p = token_data;
+ int *pe = token_data + cur_token;
+ int *eof = 0;
- char *pe = p + len;
- %% write exec;
+ %%{
+ machine inline_token_scan;
+ write init;
+ write exec;
+ }%%
- /* Check if we failed. */
- if ( chr_cs == inline_scan_error ) {
- /* Machine failed before finding a token. I'm not yet sure if this
- * is reachable. */
- scan_error() << "scanner error" << endl;
- exit(1);
- }
+ if ( tok_ts == 0 )
+ cur_token = 0;
+ else {
+ cur_token = pe - tok_ts;
+ int ts_offset = tok_ts - token_data;
+ memmove( token_data, token_data+ts_offset, cur_token*sizeof(token_data[0]) );
+ memmove( token_strings, token_strings+ts_offset, cur_token*sizeof(token_strings[0]) );
+ memmove( token_lens, token_lens+ts_offset, cur_token*sizeof(token_lens[0]) );
+ }
+}
- /* Decide if we need to preserve anything. */
- char *preserve = chr_tokstart;
+void Scanner::directToParser( Parser *toParser, const char *tokFileName, int tokLine,
+ int tokColumn, int type, char *tokdata, int toklen )
+{
+ InputLoc loc;
+
+ #ifdef LOG_TOKENS
+ cerr << "scanner:" << tokLine << ":" << tokColumn <<
+ ": sending token to the parser " << Parser_lelNames[type];
+ cerr << " " << toklen;
+ if ( tokdata != 0 )
+ cerr << " " << tokdata;
+ cerr << endl;
+ #endif
+
+ loc.fileName = tokFileName;
+ loc.line = tokLine;
+ loc.col = tokColumn;
+
+ toParser->token( loc, type, tokdata, toklen );
+}
- /* Now set up the prefix. */
- if ( preserve == 0 )
- have = 0;
- else {
- /* There is data that needs to be shifted over. */
- have = pe - preserve;
- memmove( buf, preserve, have );
- unsigned int shiftback = preserve - buf;
- if ( chr_tokstart != 0 )
- chr_tokstart -= shiftback;
- chr_tokend -= shiftback;
+void Scanner::importToken( int token, char *start, char *end )
+{
+ if ( cur_token == max_tokens )
+ flushImport();
- preserve = buf;
- }
+ token_data[cur_token] = token;
+ if ( start == 0 ) {
+ token_strings[cur_token] = 0;
+ token_lens[cur_token] = 0;
}
-
- delete[] buf;
+ else {
+ int toklen = end-start;
+ token_lens[cur_token] = toklen;
+ token_strings[cur_token] = new char[toklen+1];
+ memcpy( token_strings[cur_token], start, toklen );
+ token_strings[cur_token][toklen] = 0;
+ }
+ cur_token++;
}
-ostream &ImportScanner::scan_error()
+void Scanner::pass( int token, char *start, char *end )
{
- /* Maintain the error count. */
- gblErrorCount += 1;
- cerr << fileName << ":" << line << ":" << column << ": ";
- return cerr;
+ if ( importMachines )
+ importToken( token, start, end );
+ pass();
}
+void Scanner::pass()
+{
+ updateCol();
-/*
- * The Ragel Scanner
- */
+ /* If no errors and we are at the bottom of the include stack (the
+ * source file listed on the command line) then write out the data. */
+ if ( includeDepth == 0 && machineSpec == 0 && machineName == 0 )
+ id.inputItems.tail->data.write( ts, te-ts );
+}
-enum InlineBlockType
-{
- CurlyDelimited,
- SemiTerminated
-};
+/*
+ * The scanner for processing sections, includes, imports, etc.
+ */
%%{
machine section_parse;
return false;
if ( parser == 0 && ! parserExistsError ) {
- scan_error() << "there is no previous specification name" << endl;
+ scan_error() << "this specification has no name, nor does any previous"
+ " specification" << endl;
parserExistsError = true;
}
{
/* Maintain the error count. */
gblErrorCount += 1;
- cerr << fileName << ":" << line << ":" << column << ": ";
+ cerr << makeInputLoc( fileName, line, column ) << ": ";
return cerr;
}
-bool Scanner::recursiveInclude( char *inclFileName, char *inclSectionName )
+/* An approximate check for duplicate includes. Due to aliasing of files it's
+ * possible for duplicates to creep in. */
+bool Scanner::duplicateInclude( char *inclFileName, char *inclSectionName )
{
- for ( IncludeStack::Iter si = includeStack; si.lte(); si++ ) {
- if ( strcmp( si->fileName, inclFileName ) == 0 &&
- strcmp( si->sectionName, inclSectionName ) == 0 )
+ for ( IncludeHistory::Iter hi = parser->includeHistory; hi.lte(); hi++ ) {
+ if ( strcmp( hi->fileName, inclFileName ) == 0 &&
+ strcmp( hi->sectionName, inclSectionName ) == 0 )
{
return true;
}
{
char *from = lastnl;
if ( from == 0 )
- from = tokstart;
- //cerr << "adding " << tokend - from << " to column" << endl;
- column += tokend - from;
+ from = ts;
+ //cerr << "adding " << te - from << " to column" << endl;
+ column += te - from;
lastnl = 0;
}
-void Scanner::token( int type, char c )
+void Scanner::handleMachine()
{
- token( type, &c, &c + 1 );
+ /* Assign a name to the machine. */
+ char *machine = word;
+
+ if ( !importMachines && inclSectionTarg == 0 ) {
+ ignoreSection = false;
+
+ ParserDictEl *pdEl = id.parserDict.find( machine );
+ if ( pdEl == 0 ) {
+ pdEl = new ParserDictEl( machine );
+ pdEl->value = new Parser( fileName, machine, sectionLoc );
+ pdEl->value->init();
+ id.parserDict.insert( pdEl );
+ id.parserList.append( pdEl->value );
+ }
+
+ parser = pdEl->value;
+ }
+ else if ( !importMachines && strcmp( inclSectionTarg, machine ) == 0 ) {
+ /* found include target */
+ ignoreSection = false;
+ parser = inclToParser;
+ }
+ else {
+ /* ignoring section */
+ ignoreSection = true;
+ parser = 0;
+ }
}
-void Scanner::token( int type )
+void Scanner::handleInclude()
{
- token( type, 0, 0 );
+ if ( active() ) {
+ char *inclSectionName = word;
+ char **includeChecks = 0;
+
+ /* Implement defaults for the input file and section name. */
+ if ( inclSectionName == 0 )
+ inclSectionName = parser->sectionName;
+
+ if ( lit != 0 )
+ includeChecks = makeIncludePathChecks( fileName, lit, lit_len );
+ else {
+ char *test = new char[strlen(fileName)+1];
+ strcpy( test, fileName );
+
+ includeChecks = new char*[2];
+
+ includeChecks[0] = test;
+ includeChecks[1] = 0;
+ }
+
+ long found = 0;
+ ifstream *inFile = tryOpenInclude( includeChecks, found );
+ if ( inFile == 0 ) {
+ scan_error() << "include: failed to locate file" << endl;
+ char **tried = includeChecks;
+ while ( *tried != 0 )
+ scan_error() << "include: attempted: \"" << *tried++ << '\"' << endl;
+ }
+ else {
+ /* Don't include anything that's already been included. */
+ if ( !duplicateInclude( includeChecks[found], inclSectionName ) ) {
+ parser->includeHistory.append( IncludeHistoryItem(
+ includeChecks[found], inclSectionName ) );
+
+ Scanner scanner( id, includeChecks[found], *inFile, parser,
+ inclSectionName, includeDepth+1, false );
+ scanner.do_scan( );
+ delete inFile;
+ }
+ }
+ }
+}
+
+void Scanner::handleImport()
+{
+ if ( active() ) {
+ char **importChecks = makeIncludePathChecks( fileName, lit, lit_len );
+
+ /* Open the input file for reading. */
+ long found = 0;
+ ifstream *inFile = tryOpenInclude( importChecks, found );
+ if ( inFile == 0 ) {
+ scan_error() << "import: could not open import file " <<
+ "for reading" << endl;
+ char **tried = importChecks;
+ while ( *tried != 0 )
+ scan_error() << "import: attempted: \"" << *tried++ << '\"' << endl;
+ }
+
+ Scanner scanner( id, importChecks[found], *inFile, parser,
+ 0, includeDepth+1, true );
+ scanner.do_scan( );
+ scanner.importToken( 0, 0, 0 );
+ scanner.flushImport();
+ delete inFile;
+ }
}
%%{
machine section_parse;
- # This relies on the the kelbt implementation and the order
- # that tokens are declared.
- KW_Machine = 128;
- KW_Include = 129;
- KW_Write = 130;
- TK_Word = 131;
- TK_Literal = 132;
+ # Need the defines representing tokens.
+ import "rlparse.h";
action clear_words { word = lit = 0; word_len = lit_len = 0; }
action store_word { word = tokdata; word_len = toklen; }
action mach_err { scan_error() << "bad machine statement" << endl; }
action incl_err { scan_error() << "bad include statement" << endl; }
+ action import_err { scan_error() << "bad import statement" << endl; }
action write_err { scan_error() << "bad write statement" << endl; }
- action handle_machine
- {
- /* Assign a name to the machine. */
- char *machine = word;
-
- if ( inclSectionTarg == 0 ) {
- ignoreSection = false;
-
- ParserDictEl *pdEl = parserDict.find( machine );
- if ( pdEl == 0 ) {
- pdEl = new ParserDictEl( machine );
- pdEl->value = new Parser( fileName, machine, sectionLoc );
- pdEl->value->init();
- parserDict.insert( pdEl );
- }
-
- parser = pdEl->value;
- }
- else if ( strcmp( inclSectionTarg, machine ) == 0 ) {
- /* found include target */
- ignoreSection = false;
- parser = inclToParser;
- }
- else {
- /* ignoring section */
- ignoreSection = true;
- parser = 0;
- }
- }
+ action handle_machine { handleMachine(); }
+ action handle_include { handleInclude(); }
+ action handle_import { handleImport(); }
machine_stmt =
( KW_Machine TK_Word @store_word ';' ) @handle_machine
<>err mach_err <>eof mach_err;
- action handle_include
- {
- if ( active() ) {
- char *inclSectionName = word;
- char *inclFileName = 0;
-
- /* Implement defaults for the input file and section name. */
- if ( inclSectionName == 0 )
- inclSectionName = parser->sectionName;
-
- if ( lit != 0 )
- inclFileName = prepareFileName( lit, lit_len );
- else
- inclFileName = fileName;
-
- /* Check for a recursive include structure. Add the current file/section
- * name then check if what we are including is already in the stack. */
- includeStack.append( IncludeStackItem( fileName, parser->sectionName ) );
-
- if ( recursiveInclude( inclFileName, inclSectionName ) )
- scan_error() << "include: this is a recursive include operation" << endl;
- else {
- /* Open the input file for reading. */
- ifstream *inFile = new ifstream( inclFileName );
- if ( ! inFile->is_open() ) {
- scan_error() << "include: could not open " <<
- inclFileName << " for reading" << endl;
- }
-
- Scanner scanner( inclFileName, *inFile, output, parser,
- inclSectionName, includeDepth+1 );
- scanner.do_scan( );
- delete inFile;
- }
-
- /* Remove the last element (len-1) */
- includeStack.remove( -1 );
- }
- }
-
include_names = (
TK_Word @store_word ( TK_Literal @store_lit )? |
TK_Literal @store_lit
( KW_Include include_names ';' ) @handle_include
<>err incl_err <>eof incl_err;
+ import_stmt =
+ ( KW_Import TK_Literal @store_lit ';' ) @handle_import
+ <>err import_err <>eof import_err;
+
action write_command
{
if ( active() && machineSpec == 0 && machineName == 0 ) {
- output << "<write"
- " def_name=\"" << parser->sectionName << "\""
- " line=\"" << line << "\""
- " col=\"" << column << "\""
- ">";
+ InputItem *inputItem = new InputItem;
+ inputItem->type = InputItem::Write;
+ inputItem->loc.line = line;
+ inputItem->loc.col = column;
+ inputItem->name = parser->sectionName;
+ inputItem->pd = parser->pd;
+ id.inputItems.append( inputItem );
}
}
action write_arg
{
if ( active() && machineSpec == 0 && machineName == 0 )
- output << "<arg>" << tokdata << "</arg>";
+ id.inputItems.tail->writeArgs.append( strdup(tokdata) );
}
action write_close
{
if ( active() && machineSpec == 0 && machineName == 0 )
- output << "</write>\n";
+ id.inputItems.tail->writeArgs.append( 0 );
}
write_stmt =
action handle_token
{
/* Send the token off to the parser. */
- if ( active() ) {
- InputLoc loc;
-
- #if 0
- cerr << "scanner:" << line << ":" << column <<
- ": sending token to the parser " << Parser_lelNames[*p];
- cerr << " " << toklen;
- if ( tokdata != 0 )
- cerr << " " << tokdata;
- cerr << endl;
- #endif
-
- loc.fileName = fileName;
- loc.line = line;
- loc.col = column;
-
- parser->token( loc, type, tokdata, toklen );
- }
+ if ( active() )
+ directToParser( parser, fileName, line, column, type, tokdata, toklen );
}
# Catch everything else.
- everything_else = ^( KW_Machine | KW_Include | KW_Write ) @handle_token;
+ everything_else =
+ ^( KW_Machine | KW_Include | KW_Import | KW_Write ) @handle_token;
main := (
machine_stmt |
include_stmt |
+ import_stmt |
write_stmt |
everything_else
)*;
}%%
+void Scanner::token( int type, char c )
+{
+ token( type, &c, &c + 1 );
+}
+
+void Scanner::token( int type )
+{
+ token( type, 0, 0 );
+}
+
void Scanner::token( int type, char *start, char *end )
{
char *tokdata = 0;
int toklen = 0;
- int *p = &type;
- int *pe = &type + 1;
-
if ( start != 0 ) {
toklen = end-start;
tokdata = new char[toklen+1];
tokdata[toklen] = 0;
}
+ processToken( type, tokdata, toklen );
+}
+
+void Scanner::processToken( int type, char *tokdata, int toklen )
+{
+ int *p, *pe, *eof;
+
+ if ( type < 0 )
+ p = pe = eof = 0;
+ else {
+ p = &type;
+ pe = &type + 1;
+ eof = 0;
+ }
+
%%{
machine section_parse;
write exec;
{
parserExistsError = false;
- if ( includeDepth == 0 ) {
- if ( machineSpec == 0 && machineName == 0 )
- output << "</host>\n";
- }
-
sectionLoc.fileName = fileName;
sectionLoc.line = line;
- sectionLoc.col = 0;
+ sectionLoc.col = column;
}
void Scanner::endSection( )
{
/* Execute the eof actions for the section parser. */
- %%{
- machine section_parse;
- write eof;
- }%%
+ processToken( -1, 0, 0 );
/* Close off the section with the parser. */
if ( active() ) {
InputLoc loc;
loc.fileName = fileName;
loc.line = line;
- loc.col = 0;
+ loc.col = column;
parser->token( loc, TK_EndSection, 0, 0 );
}
if ( machineSpec == 0 && machineName == 0 ) {
/* The end section may include a newline on the end, so
* we use the last line, which will count the newline. */
- output << "<host line=\"" << line << "\">";
+ InputItem *inputItem = new InputItem;
+ inputItem->type = InputItem::HostData;
+ inputItem->loc.line = line;
+ inputItem->loc.col = column;
+ id.inputItems.append( inputItem );
+ }
+ }
+}
+
+bool isAbsolutePath( const char *path )
+{
+#ifdef _WIN32
+ return isalpha( path[0] ) && path[1] == ':' && path[2] == '\\';
+#else
+ return path[0] == '/';
+#endif
+}
+
+char **Scanner::makeIncludePathChecks( const char *thisFileName,
+ const char *fileName, int fnlen )
+{
+ char **checks = new char*[2];
+ long nextCheck = 0;
+
+ bool caseInsensitive = false;
+ long length = 0;
+ char *data = prepareLitString( InputLoc(), fileName, fnlen,
+ length, caseInsensitive );
+
+ /* Absolute path? */
+ if ( isAbsolutePath( data ) )
+ checks[nextCheck++] = data;
+ else {
+ /* Search from the the location of the current file. */
+ const char *lastSlash = strrchr( thisFileName, PATH_SEP );
+ if ( lastSlash == 0 )
+ checks[nextCheck++] = data;
+ else {
+ long givenPathLen = (lastSlash - thisFileName) + 1;
+ long checklen = givenPathLen + length;
+ char *check = new char[checklen+1];
+ memcpy( check, thisFileName, givenPathLen );
+ memcpy( check+givenPathLen, data, length );
+ check[checklen] = 0;
+ checks[nextCheck++] = check;
+ }
+
+ /* Search from the include paths given on the command line. */
+ for ( ArgsVector::Iter incp = id.includePaths; incp.lte(); incp++ ) {
+ long pathLen = strlen( *incp );
+ long checkLen = pathLen + 1 + length;
+ char *check = new char[checkLen+1];
+ memcpy( check, *incp, pathLen );
+ check[pathLen] = PATH_SEP;
+ memcpy( check+pathLen+1, data, length );
+ check[checkLen] = 0;
+ checks[nextCheck++] = check;
+ }
+ }
+
+ checks[nextCheck] = 0;
+ return checks;
+}
+
+ifstream *Scanner::tryOpenInclude( char **pathChecks, long &found )
+{
+ char **check = pathChecks;
+ ifstream *inFile = new ifstream;
+
+ while ( *check != 0 ) {
+ inFile->open( *check );
+ if ( inFile->is_open() ) {
+ found = check - pathChecks;
+ return inFile;
}
+ check += 1;
}
+
+ found = -1;
+ delete inFile;
+ return 0;
}
%%{
c_cpp_comment = c_comment | cpp_comment;
- # These literal forms are common to C-like host code and ragel.
+ ruby_comment = '#' [^\n]* NL;
+
+ # These literal forms are common to host code and ragel.
s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'";
d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"';
+ host_re_literal = '/' ([^/\\] | NL | '\\' (any | NL))* '/';
whitespace = [ \t] | NL;
pound_comment = '#' [^\n]* NL;
- # An inline block of code. This is specified as a scanned, but is sent to
- # the parser as one long block. The inline_block pointer is used to handle
- # the preservation of the data.
+ # An inline block of code for Ruby.
+ inline_code_ruby := |*
+ # Inline expression keywords.
+ "fpc" => { token( KW_PChar ); };
+ "fc" => { token( KW_Char ); };
+ "fcurs" => { token( KW_CurState ); };
+ "ftargs" => { token( KW_TargState ); };
+ "fentry" => {
+ whitespaceOn = false;
+ token( KW_Entry );
+ };
+
+ # Inline statement keywords.
+ "fhold" => {
+ whitespaceOn = false;
+ token( KW_Hold );
+ };
+ "fexec" => { token( KW_Exec, 0, 0 ); };
+ "fgoto" => {
+ whitespaceOn = false;
+ token( KW_Goto );
+ };
+ "fnext" => {
+ whitespaceOn = false;
+ token( KW_Next );
+ };
+ "fcall" => {
+ whitespaceOn = false;
+ token( KW_Call );
+ };
+ "fret" => {
+ whitespaceOn = false;
+ token( KW_Ret );
+ };
+ "fbreak" => {
+ whitespaceOn = false;
+ token( KW_Break );
+ };
+
+ ident => { token( TK_Word, ts, te ); };
+
+ number => { token( TK_UInt, ts, te ); };
+ hex_number => { token( TK_Hex, ts, te ); };
+
+ ( s_literal | d_literal | host_re_literal )
+ => { token( IL_Literal, ts, te ); };
+
+ whitespace+ => {
+ if ( whitespaceOn )
+ token( IL_WhiteSpace, ts, te );
+ };
+
+ ruby_comment => { token( IL_Comment, ts, te ); };
+
+ "::" => { token( TK_NameSep, ts, te ); };
+
+ # Some symbols need to go to the parser as with their cardinal value as
+ # the token type (as opposed to being sent as anonymous symbols)
+ # because they are part of the sequences which we interpret. The * ) ;
+ # symbols cause whitespace parsing to come back on. This gets turned
+ # off by some keywords.
+
+ ";" => {
+ whitespaceOn = true;
+ token( *ts, ts, te );
+ if ( inlineBlockType == SemiTerminated )
+ fret;
+ };
+
+ [*)] => {
+ whitespaceOn = true;
+ token( *ts, ts, te );
+ };
+
+ [,(] => { token( *ts, ts, te ); };
+
+ '{' => {
+ token( IL_Symbol, ts, te );
+ curly_count += 1;
+ };
+
+ '}' => {
+ if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) {
+ /* Inline code block ends. */
+ token( '}' );
+ fret;
+ }
+ else {
+ /* Either a semi terminated inline block or only the closing
+ * brace of some inner scope, not the block's closing brace. */
+ token( IL_Symbol, ts, te );
+ }
+ };
+
+ EOF => {
+ scan_error() << "unterminated code block" << endl;
+ };
+
+ # Send every other character as a symbol.
+ any => { token( IL_Symbol, ts, te ); };
+ *|;
+
+
+ # An inline block of code for languages other than Ruby.
inline_code := |*
# Inline expression keywords.
"fpc" => { token( KW_PChar ); };
token( KW_Break );
};
- ident => { token( TK_Word, tokstart, tokend ); };
+ ident => { token( TK_Word, ts, te ); };
- number => { token( TK_UInt, tokstart, tokend ); };
- hex_number => { token( TK_Hex, tokstart, tokend ); };
+ number => { token( TK_UInt, ts, te ); };
+ hex_number => { token( TK_Hex, ts, te ); };
( s_literal | d_literal )
- => { token( IL_Literal, tokstart, tokend ); };
+ => { token( IL_Literal, ts, te ); };
whitespace+ => {
if ( whitespaceOn )
- token( IL_WhiteSpace, tokstart, tokend );
+ token( IL_WhiteSpace, ts, te );
};
- c_cpp_comment => { token( IL_Comment, tokstart, tokend ); };
- "::" => { token( TK_NameSep, tokstart, tokend ); };
+ c_cpp_comment => { token( IL_Comment, ts, te ); };
+
+ "::" => { token( TK_NameSep, ts, te ); };
# Some symbols need to go to the parser as with their cardinal value as
# the token type (as opposed to being sent as anonymous symbols)
";" => {
whitespaceOn = true;
- token( *tokstart, tokstart, tokend );
+ token( *ts, ts, te );
if ( inlineBlockType == SemiTerminated )
- fgoto parser_def;
+ fret;
};
[*)] => {
whitespaceOn = true;
- token( *tokstart, tokstart, tokend );
+ token( *ts, ts, te );
};
- [,(] => { token( *tokstart, tokstart, tokend ); };
+ [,(] => { token( *ts, ts, te ); };
'{' => {
- token( IL_Symbol, tokstart, tokend );
+ token( IL_Symbol, ts, te );
curly_count += 1;
};
if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) {
/* Inline code block ends. */
token( '}' );
- fgoto parser_def;
+ fret;
}
else {
/* Either a semi terminated inline block or only the closing
* brace of some inner scope, not the block's closing brace. */
- token( IL_Symbol, tokstart, tokend );
+ token( IL_Symbol, ts, te );
}
};
};
# Send every other character as a symbol.
- any => { token( IL_Symbol, tokstart, tokend ); };
+ any => { token( IL_Symbol, ts, te ); };
*|;
or_literal := |*
'\\f' => { token( RE_Char, '\f' ); };
'\\r' => { token( RE_Char, '\r' ); };
'\\\n' => { updateCol(); };
- '\\' any => { token( RE_Char, tokstart+1, tokend ); };
+ '\\' any => { token( RE_Char, ts+1, te ); };
# Range dash in an OR expression.
'-' => { token( RE_Dash, 0, 0 ); };
};
# Characters in an OR expression.
- [^\]] => { token( RE_Char, tokstart, tokend ); };
+ [^\]] => { token( RE_Char, ts, te ); };
*|;
- re_literal := |*
+ ragel_re_literal := |*
# Escape sequences in regular expressions.
'\\0' => { token( RE_Char, '\0' ); };
'\\a' => { token( RE_Char, '\a' ); };
'\\f' => { token( RE_Char, '\f' ); };
'\\r' => { token( RE_Char, '\r' ); };
'\\\n' => { updateCol(); };
- '\\' any => { token( RE_Char, tokstart+1, tokend ); };
+ '\\' any => { token( RE_Char, ts+1, te ); };
# Terminate an OR expression.
'/' [i]? => {
- token( RE_Slash, tokstart, tokend );
+ token( RE_Slash, ts, te );
fgoto parser_def;
};
};
# Characters in an OR expression.
- [^\/] => { token( RE_Char, tokstart, tokend ); };
+ [^\/] => { token( RE_Char, ts, te ); };
*|;
# We need a separate token space here to avoid the ragel keywords.
write_statement := |*
- ident => { token( TK_Word, tokstart, tokend ); } ;
+ ident => { token( TK_Word, ts, te ); } ;
[ \t\n]+ => { updateCol(); };
';' => { token( ';' ); fgoto parser_def; };
# Parser definitions.
parser_def := |*
+ 'length_cond' => { token( KW_Length ); };
'machine' => { token( KW_Machine ); };
'include' => { token( KW_Include ); };
+ 'import' => { token( KW_Import ); };
'write' => {
token( KW_Write );
fgoto write_statement;
};
'action' => { token( KW_Action ); };
'alphtype' => { token( KW_AlphType ); };
+ 'prepush' => { token( KW_PrePush ); };
+ 'postpop' => { token( KW_PostPop ); };
# FIXME: Enable this post 5.17.
# 'range' => { token( KW_Range ); };
'getkey' => {
token( KW_GetKey );
inlineBlockType = SemiTerminated;
- fgoto inline_code;
+ if ( hostLang->lang == HostLang::Ruby )
+ fcall inline_code_ruby;
+ else
+ fcall inline_code;
};
'access' => {
token( KW_Access );
inlineBlockType = SemiTerminated;
- fgoto inline_code;
+ if ( hostLang->lang == HostLang::Ruby )
+ fcall inline_code_ruby;
+ else
+ fcall inline_code;
};
'variable' => {
token( KW_Variable );
inlineBlockType = SemiTerminated;
- fgoto inline_code;
+ if ( hostLang->lang == HostLang::Ruby )
+ fcall inline_code_ruby;
+ else
+ fcall inline_code;
};
'when' => { token( KW_When ); };
+ 'inwhen' => { token( KW_InWhen ); };
+ 'outwhen' => { token( KW_OutWhen ); };
'eof' => { token( KW_Eof ); };
'err' => { token( KW_Err ); };
'lerr' => { token( KW_Lerr ); };
'export' => { token( KW_Export ); };
# Identifiers.
- ident => { token( TK_Word, tokstart, tokend ); } ;
+ ident => { token( TK_Word, ts, te ); } ;
# Numbers
- number => { token( TK_UInt, tokstart, tokend ); };
- hex_number => { token( TK_Hex, tokstart, tokend ); };
+ number => { token( TK_UInt, ts, te ); };
+ hex_number => { token( TK_Hex, ts, te ); };
# Literals, with optionals.
( s_literal | d_literal ) [i]?
- => { token( TK_Literal, tokstart, tokend ); };
+ => { token( TK_Literal, ts, te ); };
'[' => { token( RE_SqOpen ); fcall or_literal; };
'[^' => { token( RE_SqOpenNeg ); fcall or_literal; };
- '/' => { token( RE_Slash ); fgoto re_literal; };
+ '/' => { token( RE_Slash ); fgoto ragel_re_literal; };
# Ignore.
pound_comment => { updateCol(); };
"|*" => { token( TK_BarStar ); };
# Separater for name references.
- "::" => { token( TK_NameSep, tokstart, tokend ); };
+ "::" => { token( TK_NameSep, ts, te ); };
'}%%' => {
updateCol();
endSection();
- fgoto main;
+ fret;
};
[ \t\r]+ => { updateCol(); };
updateCol();
if ( singleLineSpec ) {
endSection();
- fgoto main;
+ fret;
}
};
token( '{' );
curly_count = 1;
inlineBlockType = CurlyDelimited;
- fgoto inline_code;
+ if ( hostLang->lang == HostLang::Ruby )
+ fcall inline_code_ruby;
+ else
+ fcall inline_code;
}
};
scan_error() << "unterminated ragel section" << endl;
};
- any => { token( *tokstart ); } ;
+ any => { token( *ts ); } ;
*|;
- action pass {
- updateCol();
+ # Outside code scanner. These tokens get passed through.
+ main_ruby := |*
+ ident => { pass( IMP_Word, ts, te ); };
+ number => { pass( IMP_UInt, ts, te ); };
+ ruby_comment => { pass(); };
+ ( s_literal | d_literal | host_re_literal )
+ => { pass( IMP_Literal, ts, te ); };
- /* If no errors and we are at the bottom of the include stack (the
- * source file listed on the command line) then write out the data. */
- if ( includeDepth == 0 && machineSpec == 0 && machineName == 0 )
- xmlEscapeHost( output, tokstart, tokend-tokstart );
- }
+ '%%{' => {
+ updateCol();
+ singleLineSpec = false;
+ startSection();
+ fcall parser_def;
+ };
+ '%%' => {
+ updateCol();
+ singleLineSpec = true;
+ startSection();
+ fcall parser_def;
+ };
+ whitespace+ => { pass(); };
+ EOF;
+ any => { pass( *ts, 0, 0 ); };
+ *|;
# Outside code scanner. These tokens get passed through.
main := |*
- ident => pass;
- number => pass;
- c_cpp_comment => pass;
- s_literal | d_literal => pass;
+ 'define' => { pass( IMP_Define, 0, 0 ); };
+ ident => { pass( IMP_Word, ts, te ); };
+ number => { pass( IMP_UInt, ts, te ); };
+ c_cpp_comment => { pass(); };
+ ( s_literal | d_literal ) => { pass( IMP_Literal, ts, te ); };
+
'%%{' => {
updateCol();
singleLineSpec = false;
startSection();
- fgoto parser_def;
+ fcall parser_def;
};
'%%' => {
updateCol();
singleLineSpec = true;
startSection();
- fgoto parser_def;
+ fcall parser_def;
};
- whitespace+ => pass;
+ whitespace+ => { pass(); };
EOF;
- any => pass;
+ any => { pass( *ts, 0, 0 ); };
*|;
-
}%%
%% write data;
{
int bufsize = 8;
char *buf = new char[bufsize];
- const char last_char = 0;
int cs, act, have = 0;
- int top, stack[1];
+ int top;
+
+ /* The stack is two deep, one level for going into ragel defs from the main
+ * machines which process outside code, and another for going into or literals
+ * from either a ragel spec, or a regular expression. */
+ int stack[2];
int curly_count = 0;
bool execute = true;
bool singleLineSpec = false;
init();
%% write init;
+ /* Set up the start state. FIXME: After 5.20 is released the nocs write
+ * init option should be used, the main machine eliminated and this statement moved
+ * above the write init. */
+ if ( hostLang->lang == HostLang::Ruby )
+ cs = rlscan_en_main_ruby;
+ else
+ cs = rlscan_en_main;
+
while ( execute ) {
char *p = buf + have;
int space = bufsize - have;
space = bufsize - have;
/* Patch up pointers possibly in use. */
- if ( tokstart != 0 )
- tokstart = newbuf + ( tokstart - buf );
- tokend = newbuf + ( tokend - buf );
+ if ( ts != 0 )
+ ts = newbuf + ( ts - buf );
+ te = newbuf + ( te - buf );
/* Copy the new buffer in. */
memcpy( newbuf, buf, have );
input.read( p, space );
int len = input.gcount();
+ char *pe = p + len;
- /* If we see eof then append the EOF char. */
+ /* If we see eof then append the eof var. */
+ char *eof = 0;
if ( len == 0 ) {
- p[0] = last_char, len = 1;
+ eof = pe;
execute = false;
}
- char *pe = p + len;
%% write exec;
/* Check if we failed. */
}
/* Decide if we need to preserve anything. */
- char *preserve = tokstart;
+ char *preserve = ts;
/* Now set up the prefix. */
if ( preserve == 0 )
have = pe - preserve;
memmove( buf, preserve, have );
unsigned int shiftback = preserve - buf;
- if ( tokstart != 0 )
- tokstart -= shiftback;
- tokend -= shiftback;
+ if ( ts != 0 )
+ ts -= shiftback;
+ te -= shiftback;
preserve = buf;
}
delete[] buf;
}
-
-void scan( char *fileName, istream &input, ostream &output )
-{
-}