X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;ds=sidebyside;f=ragel%2Frlscan.rl;h=3c325c31e193bb3ce3e00b933a6f36c714d1310b;hb=9f3c2baa91083bb5b33b4f3ec07f58d900157e32;hp=e8154a15971252dc6ec134f60574a36d6572afda;hpb=58fb5a6af378fca241c794d6da5d4090835e94ba;p=external%2Fragel.git diff --git a/ragel/rlscan.rl b/ragel/rlscan.rl index e8154a1..3c325c3 100644 --- a/ragel/rlscan.rl +++ b/ragel/rlscan.rl @@ -1,5 +1,5 @@ /* - * Copyright 2006 Adrian Thurston + * Copyright 2006-2007 Adrian Thurston */ /* This file is part of Ragel. @@ -24,11 +24,10 @@ #include #include "ragel.h" -#include "rlparse.h" -#include "parsedata.h" -#include "avltree.h" -#include "vector.h" +#include "rlscan.h" +#include "inputdata.h" +//#define LOG_TOKENS using std::ifstream; using std::istream; @@ -37,96 +36,179 @@ using std::cout; using std::cerr; using std::endl; -/* This is used for tracking the current stack of include file/machine pairs. It is - * is used to detect and recursive include structure. */ -struct IncludeStackItem -{ - IncludeStackItem( char *fileName, char *sectionName ) - : fileName(fileName), sectionName(sectionName) {} - - char *fileName; - char *sectionName; -}; - -typedef Vector IncludeStack; -IncludeStack includeStack; - enum InlineBlockType { CurlyDelimited, SemiTerminated }; -struct Scanner +#ifdef _WIN32 +#define PATH_SEP '\\' +#else +#define PATH_SEP '/' +#endif + + +/* + * The Scanner for Importing + */ + +%%{ + machine inline_token_scan; + alphtype int; + access tok_; + + # Import scanner tokens. + import "rlparse.h"; + + main := |* + # Define of number. + IMP_Define IMP_Word IMP_UInt => { + int base = tok_ts - token_data; + int nameOff = 1; + int numOff = 2; + + directToParser( inclToParser, fileName, line, column, TK_Word, + token_strings[base+nameOff], token_lens[base+nameOff] ); + directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); + directToParser( inclToParser, fileName, line, column, TK_UInt, + token_strings[base+numOff], token_lens[base+numOff] ); + directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); + }; + + # Assignment of number. + IMP_Word '=' IMP_UInt => { + int base = tok_ts - token_data; + int nameOff = 0; + int numOff = 2; + + directToParser( inclToParser, fileName, line, column, TK_Word, + token_strings[base+nameOff], token_lens[base+nameOff] ); + directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); + directToParser( inclToParser, fileName, line, column, TK_UInt, + token_strings[base+numOff], token_lens[base+numOff] ); + directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); + }; + + # Define of literal. + IMP_Define IMP_Word IMP_Literal => { + int base = tok_ts - token_data; + int nameOff = 1; + int litOff = 2; + + directToParser( inclToParser, fileName, line, column, TK_Word, + token_strings[base+nameOff], token_lens[base+nameOff] ); + directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); + directToParser( inclToParser, fileName, line, column, TK_Literal, + token_strings[base+litOff], token_lens[base+litOff] ); + directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); + }; + + # Assignment of literal. + IMP_Word '=' IMP_Literal => { + int base = tok_ts - token_data; + int nameOff = 0; + int litOff = 2; + + directToParser( inclToParser, fileName, line, column, TK_Word, + token_strings[base+nameOff], token_lens[base+nameOff] ); + directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); + directToParser( inclToParser, fileName, line, column, TK_Literal, + token_strings[base+litOff], token_lens[base+litOff] ); + directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); + }; + + # Catch everything else. + any; + *|; +}%% + +%% write data; + +void Scanner::flushImport() { - Scanner( char *fileName, istream &input, - Parser *inclToParser, char *inclSectionTarg, - int include_depth ) - : - fileName(fileName), input(input), - inclToParser(inclToParser), - inclSectionTarg(inclSectionTarg), - include_depth(include_depth), - line(1), column(1), lastnl(0), - parser(0), active(false), - parserExistsError(false), ragelDefOpen(false), - whitespaceOn(true) - {} - - bool recursiveInclude( IncludeStack &includeStack, - char *inclFileName, char *inclSectionName ); - - char *prepareFileName( char *fileName, int len ) - { - bool caseInsensitive; - Token tokenFnStr, tokenRes; - tokenFnStr.data = fileName; - tokenFnStr.length = len; - tokenFnStr.prepareLitString( tokenRes, caseInsensitive ); - return tokenRes.data; + int *p = token_data; + int *pe = token_data + cur_token; + int *eof = 0; + + %%{ + machine inline_token_scan; + write init; + write exec; + }%% + + if ( tok_ts == 0 ) + cur_token = 0; + else { + cur_token = pe - tok_ts; + int ts_offset = tok_ts - token_data; + memmove( token_data, token_data+ts_offset, cur_token*sizeof(token_data[0]) ); + memmove( token_strings, token_strings+ts_offset, cur_token*sizeof(token_strings[0]) ); + memmove( token_lens, token_lens+ts_offset, cur_token*sizeof(token_lens[0]) ); + } +} + +void Scanner::directToParser( Parser *toParser, const char *tokFileName, int tokLine, + int tokColumn, int type, char *tokdata, int toklen ) +{ + InputLoc loc; + + #ifdef LOG_TOKENS + cerr << "scanner:" << tokLine << ":" << tokColumn << + ": sending token to the parser " << Parser_lelNames[type]; + cerr << " " << toklen; + if ( tokdata != 0 ) + cerr << " " << tokdata; + cerr << endl; + #endif + + loc.fileName = tokFileName; + loc.line = tokLine; + loc.col = tokColumn; + + toParser->token( loc, type, tokdata, toklen ); +} + +void Scanner::importToken( int token, char *start, char *end ) +{ + if ( cur_token == max_tokens ) + flushImport(); + + token_data[cur_token] = token; + if ( start == 0 ) { + token_strings[cur_token] = 0; + token_lens[cur_token] = 0; } + else { + int toklen = end-start; + token_lens[cur_token] = toklen; + token_strings[cur_token] = new char[toklen+1]; + memcpy( token_strings[cur_token], start, toklen ); + token_strings[cur_token][toklen] = 0; + } + cur_token++; +} - void init(); - void token( int type, char *start, char *end ); - void token( int type, char c ); - void token( int type ); - void updateCol(); - void startSection(); - void endSection(); - void openRagelDef(); - void do_scan(); - bool parserExists(); - ostream &error(); - - char *fileName; - istream &input; - Parser *inclToParser; - char *inclSectionTarg; - int include_depth; - - int cs; - int line; - char *word, *lit; - int word_len, lit_len; - InputLoc sectionLoc; - char *tokstart, *tokend; - int column; - char *lastnl; - - /* Set by machine statements, these persist from section to section - * allowing for unnamed sections. */ - Parser *parser; - bool active; - - /* This is set if ragel has already emitted an error stating that - * no section name has been seen and thus no parser exists. */ - bool parserExistsError; - bool ragelDefOpen; - - /* This is for inline code. By default it is on. It goes off for - * statements and values in inline blocks which are parsed. */ - bool whitespaceOn; -}; +void Scanner::pass( int token, char *start, char *end ) +{ + if ( importMachines ) + importToken( token, start, end ); + pass(); +} + +void Scanner::pass() +{ + updateCol(); + + /* If no errors and we are at the bottom of the include stack (the + * source file listed on the command line) then write out the data. */ + if ( includeDepth == 0 && machineSpec == 0 && machineName == 0 ) + id.inputItems.tail->data.write( ts, te-ts ); +} + +/* + * The scanner for processing sections, includes, imports, etc. + */ %%{ machine section_parse; @@ -134,38 +216,44 @@ struct Scanner write data; }%% + void Scanner::init( ) { %% write init; } -bool Scanner::parserExists() +bool Scanner::active() { - if ( parser != 0 ) - return true; + if ( ignoreSection ) + return false; - if ( ! parserExistsError ) { - error() << "include: there is no previous specification name" << endl; + if ( parser == 0 && ! parserExistsError ) { + scan_error() << "this specification has no name, nor does any previous" + " specification" << endl; parserExistsError = true; } - return false; + + if ( parser == 0 ) + return false; + + return true; } -ostream &Scanner::error() +ostream &Scanner::scan_error() { /* Maintain the error count. */ gblErrorCount += 1; - - cerr << fileName << ":" << line << ":" << column << ": "; + cerr << makeInputLoc( fileName, line, column ) << ": "; return cerr; } -bool Scanner::recursiveInclude( IncludeStack &includeStack, - char *inclFileName, char *inclSectionName ) +/* An approximate check for duplicate includes. Due to aliasing of files it's + * possible for duplicates to creep in. */ +bool Scanner::duplicateInclude( char *inclFileName, char *inclSectionName ) { - for ( IncludeStack::Iter si = includeStack; si.lte(); si++ ) { - if ( strcmp( si->fileName, inclFileName ) == 0 && - strcmp( si->sectionName, inclSectionName ) == 0 ) + for ( IncludeHistory::Iter hi = parser->includeHistory; hi.lte(); hi++ ) { + if ( strcmp( hi->fileName, inclFileName ) == 0 && + strcmp( hi->sectionName, inclSectionName ) == 0 ) { return true; } @@ -177,115 +265,135 @@ void Scanner::updateCol() { char *from = lastnl; if ( from == 0 ) - from = tokstart; - //cerr << "adding " << tokend - from << " to column" << endl; - column += tokend - from; + from = ts; + //cerr << "adding " << te - from << " to column" << endl; + column += te - from; lastnl = 0; } -void Scanner::token( int type, char c ) +void Scanner::handleMachine() { - token( type, &c, &c + 1 ); -} + /* Assign a name to the machine. */ + char *machine = word; + + if ( !importMachines && inclSectionTarg == 0 ) { + ignoreSection = false; + + ParserDictEl *pdEl = id.parserDict.find( machine ); + if ( pdEl == 0 ) { + pdEl = new ParserDictEl( machine ); + pdEl->value = new Parser( fileName, machine, sectionLoc ); + pdEl->value->init(); + id.parserDict.insert( pdEl ); + id.parserList.append( pdEl->value ); + } -void Scanner::token( int type ) -{ - token( type, 0, 0 ); + parser = pdEl->value; + } + else if ( !importMachines && strcmp( inclSectionTarg, machine ) == 0 ) { + /* found include target */ + ignoreSection = false; + parser = inclToParser; + } + else { + /* ignoring section */ + ignoreSection = true; + parser = 0; + } } -%%{ - machine section_parse; +void Scanner::handleInclude() +{ + if ( active() ) { + char *inclSectionName = word; + char **includeChecks = 0; - # This relies on the the kelbt implementation and the order - # that tokens are declared. - KW_Machine = 128; - KW_Include = 129; - KW_Write = 130; - TK_Word = 131; - TK_Literal = 132; + /* Implement defaults for the input file and section name. */ + if ( inclSectionName == 0 ) + inclSectionName = parser->sectionName; - action clear_words { word = lit = 0; word_len = lit_len = 0; } - action store_word { word = tokdata; word_len = toklen; } - action store_lit { lit = tokdata; lit_len = toklen; } - - action mach_err { error() << "bad machine statement" << endl; } - action incl_err { error() << "bad include statement" << endl; } - action write_err { error() << "bad write statement" << endl; } + if ( lit != 0 ) + includeChecks = makeIncludePathChecks( fileName, lit, lit_len ); + else { + char *test = new char[strlen(fileName)+1]; + strcpy( test, fileName ); - action handle_machine - { - /* Assign a name to the machine. */ - char *machine = word; - - if ( inclSectionTarg == 0 ) { - active = true; - - ParserDictEl *pdEl = parserDict.find( machine ); - if ( pdEl == 0 ) { - pdEl = new ParserDictEl( machine ); - pdEl->value = new Parser( fileName, machine, sectionLoc ); - pdEl->value->init(); - parserDict.insert( pdEl ); - } + includeChecks = new char*[2]; - parser = pdEl->value; + includeChecks[0] = test; + includeChecks[1] = 0; } - else if ( strcmp( inclSectionTarg, machine ) == 0 ) { - /* found include target */ - active = true; - parser = inclToParser; + + long found = 0; + ifstream *inFile = tryOpenInclude( includeChecks, found ); + if ( inFile == 0 ) { + scan_error() << "include: failed to locate file" << endl; + char **tried = includeChecks; + while ( *tried != 0 ) + scan_error() << "include: attempted: \"" << *tried++ << '\"' << endl; } else { - /* ignoring section */ - active = false; - parser = 0; + /* Don't include anything that's already been included. */ + if ( !duplicateInclude( includeChecks[found], inclSectionName ) ) { + parser->includeHistory.append( IncludeHistoryItem( + includeChecks[found], inclSectionName ) ); + + Scanner scanner( id, includeChecks[found], *inFile, parser, + inclSectionName, includeDepth+1, false ); + scanner.do_scan( ); + delete inFile; + } } } +} - machine_stmt = - ( KW_Machine TK_Word @store_word ';' ) @handle_machine - <>err mach_err <>eof mach_err; +void Scanner::handleImport() +{ + if ( active() ) { + char **importChecks = makeIncludePathChecks( fileName, lit, lit_len ); + + /* Open the input file for reading. */ + long found = 0; + ifstream *inFile = tryOpenInclude( importChecks, found ); + if ( inFile == 0 ) { + scan_error() << "import: could not open import file " << + "for reading" << endl; + char **tried = importChecks; + while ( *tried != 0 ) + scan_error() << "import: attempted: \"" << *tried++ << '\"' << endl; + } - action handle_include - { - if ( active && parserExists() ) { - char *inclSectionName = word; - char *inclFileName = 0; + Scanner scanner( id, importChecks[found], *inFile, parser, + 0, includeDepth+1, true ); + scanner.do_scan( ); + scanner.importToken( 0, 0, 0 ); + scanner.flushImport(); + delete inFile; + } +} - /* Implement defaults for the input file and section name. */ - if ( inclSectionName == 0 ) - inclSectionName = parser->sectionName; +%%{ + machine section_parse; - if ( lit != 0 ) - inclFileName = prepareFileName( lit, lit_len ); - else - inclFileName = fileName; + # Need the defines representing tokens. + import "rlparse.h"; - /* Check for a recursive include structure. Add the current file/section - * name then check if what we are including is already in the stack. */ - includeStack.append( IncludeStackItem( fileName, parser->sectionName ) ); + action clear_words { word = lit = 0; word_len = lit_len = 0; } + action store_word { word = tokdata; word_len = toklen; } + action store_lit { lit = tokdata; lit_len = toklen; } - if ( recursiveInclude( includeStack, inclFileName, inclSectionName ) ) - error() << "include: this is a recursive include operation" << endl; - else { - /* Open the input file for reading. */ - ifstream *inFile = new ifstream( inclFileName ); - if ( ! inFile->is_open() ) { - error() << "include: could not open " << - inclFileName << " for reading" << endl; - } - - Scanner scanner( inclFileName, *inFile, parser, - inclSectionName, include_depth+1 ); - scanner.init(); - scanner.do_scan( ); - delete inFile; - } + action mach_err { scan_error() << "bad machine statement" << endl; } + action incl_err { scan_error() << "bad include statement" << endl; } + action import_err { scan_error() << "bad import statement" << endl; } + action write_err { scan_error() << "bad write statement" << endl; } - /* Remove the last element (len-1) */ - includeStack.remove( -1 ); - } - } + action handle_machine { handleMachine(); } + action handle_include { handleInclude(); } + action handle_import { handleImport(); } + + machine_stmt = + ( KW_Machine TK_Word @store_word ';' ) @handle_machine + <>err mach_err <>eof mach_err; include_names = ( TK_Word @store_word ( TK_Literal @store_lit )? | @@ -296,78 +404,74 @@ void Scanner::token( int type ) ( KW_Include include_names ';' ) @handle_include <>err incl_err <>eof incl_err; + import_stmt = + ( KW_Import TK_Literal @store_lit ';' ) @handle_import + <>err import_err <>eof import_err; + action write_command { - if ( active ) { - openRagelDef(); - if ( strcmp( tokdata, "data" ) != 0 && - strcmp( tokdata, "init" ) != 0 && - strcmp( tokdata, "exec" ) != 0 && - strcmp( tokdata, "eof" ) != 0 ) - { - error() << "unknown write command" << endl; - } - *outStream << " "; + if ( active() && machineSpec == 0 && machineName == 0 ) { + InputItem *inputItem = new InputItem; + inputItem->type = InputItem::Write; + inputItem->loc.line = line; + inputItem->loc.col = column; + inputItem->name = parser->sectionName; + inputItem->pd = parser->pd; + id.inputItems.append( inputItem ); } } - action write_option + action write_arg { - if ( active ) - *outStream << ""; + if ( active() && machineSpec == 0 && machineName == 0 ) + id.inputItems.tail->writeArgs.append( strdup(tokdata) ); } + action write_close { - if ( active ) - *outStream << "\n"; + if ( active() && machineSpec == 0 && machineName == 0 ) + id.inputItems.tail->writeArgs.append( 0 ); } write_stmt = - ( KW_Write TK_Word @write_command - ( TK_Word @write_option )* ';' @write_close ) + ( KW_Write @write_command + ( TK_Word @write_arg )+ ';' @write_close ) <>err write_err <>eof write_err; action handle_token { /* Send the token off to the parser. */ - if ( active && parserExists() ) { - InputLoc loc; - - #if 0 - cerr << "scanner:" << line << ":" << column << - ": sending token to the parser " << lelNames[*p]; - cerr << " " << toklen; - if ( tokdata != 0 ) - cerr << " " << tokdata; - cerr << endl; - #endif - - loc.fileName = fileName; - loc.line = line; - loc.col = column; - - parser->token( loc, type, tokdata, toklen ); - } + if ( active() ) + directToParser( parser, fileName, line, column, type, tokdata, toklen ); } # Catch everything else. - everything_else = ^( KW_Machine | KW_Include | KW_Write ) @handle_token; + everything_else = + ^( KW_Machine | KW_Include | KW_Import | KW_Write ) @handle_token; main := ( machine_stmt | include_stmt | + import_stmt | write_stmt | everything_else )*; }%% +void Scanner::token( int type, char c ) +{ + token( type, &c, &c + 1 ); +} + +void Scanner::token( int type ) +{ + token( type, 0, 0 ); +} + void Scanner::token( int type, char *start, char *end ) { char *tokdata = 0; int toklen = 0; - int *p = &type; - int *pe = &type + 1; - if ( start != 0 ) { toklen = end-start; tokdata = new char[toklen+1]; @@ -375,67 +479,142 @@ void Scanner::token( int type, char *start, char *end ) tokdata[toklen] = 0; } + processToken( type, tokdata, toklen ); +} + +void Scanner::processToken( int type, char *tokdata, int toklen ) +{ + int *p, *pe, *eof; + + if ( type < 0 ) + p = pe = eof = 0; + else { + p = &type; + pe = &type + 1; + eof = 0; + } + %%{ machine section_parse; write exec; }%% updateCol(); + + /* Record the last token for use in controlling the scan of subsequent + * tokens. */ + lastToken = type; } void Scanner::startSection( ) { parserExistsError = false; - if ( include_depth == 0 ) { - if ( machineSpec == 0 && machineName == 0 ) - *outStream << "\n"; - ragelDefOpen = false; - } - sectionLoc.fileName = fileName; sectionLoc.line = line; - sectionLoc.col = 0; -} - -void Scanner::openRagelDef() -{ - if ( ! ragelDefOpen ) { - ragelDefOpen = true; - *outStream << "sectionName << "\">\n"; - } + sectionLoc.col = column; } void Scanner::endSection( ) { /* Execute the eof actions for the section parser. */ - %%{ - machine section_parse; - write eof; - }%% + processToken( -1, 0, 0 ); /* Close off the section with the parser. */ - if ( active && parserExists() ) { + if ( active() ) { InputLoc loc; loc.fileName = fileName; loc.line = line; - loc.col = 0; + loc.col = column; parser->token( loc, TK_EndSection, 0, 0 ); } - if ( include_depth == 0 ) { - if ( ragelDefOpen ) { - *outStream << "\n"; - ragelDefOpen = false; - } - + if ( includeDepth == 0 ) { if ( machineSpec == 0 && machineName == 0 ) { /* The end section may include a newline on the end, so * we use the last line, which will count the newline. */ - *outStream << ""; + InputItem *inputItem = new InputItem; + inputItem->type = InputItem::HostData; + inputItem->loc.line = line; + inputItem->loc.col = column; + id.inputItems.append( inputItem ); + } + } +} + +bool isAbsolutePath( const char *path ) +{ +#ifdef _WIN32 + return isalpha( path[0] ) && path[1] == ':' && path[2] == '\\'; +#else + return path[0] == '/'; +#endif +} + +char **Scanner::makeIncludePathChecks( const char *thisFileName, + const char *fileName, int fnlen ) +{ + char **checks = new char*[2]; + long nextCheck = 0; + + bool caseInsensitive = false; + long length = 0; + char *data = prepareLitString( InputLoc(), fileName, fnlen, + length, caseInsensitive ); + + /* Absolute path? */ + if ( isAbsolutePath( data ) ) + checks[nextCheck++] = data; + else { + /* Search from the the location of the current file. */ + const char *lastSlash = strrchr( thisFileName, PATH_SEP ); + if ( lastSlash == 0 ) + checks[nextCheck++] = data; + else { + long givenPathLen = (lastSlash - thisFileName) + 1; + long checklen = givenPathLen + length; + char *check = new char[checklen+1]; + memcpy( check, thisFileName, givenPathLen ); + memcpy( check+givenPathLen, data, length ); + check[checklen] = 0; + checks[nextCheck++] = check; + } + + /* Search from the include paths given on the command line. */ + for ( ArgsVector::Iter incp = id.includePaths; incp.lte(); incp++ ) { + long pathLen = strlen( *incp ); + long checkLen = pathLen + 1 + length; + char *check = new char[checkLen+1]; + memcpy( check, *incp, pathLen ); + check[pathLen] = PATH_SEP; + memcpy( check+pathLen+1, data, length ); + check[checkLen] = 0; + checks[nextCheck++] = check; } } + + checks[nextCheck] = 0; + return checks; +} + +ifstream *Scanner::tryOpenInclude( char **pathChecks, long &found ) +{ + char **check = pathChecks; + ifstream *inFile = new ifstream; + + while ( *check != 0 ) { + inFile->open( *check ); + if ( inFile->is_open() ) { + found = check - pathChecks; + return inFile; + } + check += 1; + } + + found = -1; + delete inFile; + return 0; } %%{ @@ -464,16 +643,120 @@ void Scanner::endSection( ) c_cpp_comment = c_comment | cpp_comment; - # These literal forms are common to C-like host code and ragel. + ruby_comment = '#' [^\n]* NL; + + # These literal forms are common to host code and ragel. s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'"; d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"'; + host_re_literal = '/' ([^/\\] | NL | '\\' (any | NL))* '/'; whitespace = [ \t] | NL; pound_comment = '#' [^\n]* NL; - # An inline block of code. This is specified as a scanned, but is sent to - # the parser as one long block. The inline_block pointer is used to handle - # the preservation of the data. + # An inline block of code for Ruby. + inline_code_ruby := |* + # Inline expression keywords. + "fpc" => { token( KW_PChar ); }; + "fc" => { token( KW_Char ); }; + "fcurs" => { token( KW_CurState ); }; + "ftargs" => { token( KW_TargState ); }; + "fentry" => { + whitespaceOn = false; + token( KW_Entry ); + }; + + # Inline statement keywords. + "fhold" => { + whitespaceOn = false; + token( KW_Hold ); + }; + "fexec" => { token( KW_Exec, 0, 0 ); }; + "fgoto" => { + whitespaceOn = false; + token( KW_Goto ); + }; + "fnext" => { + whitespaceOn = false; + token( KW_Next ); + }; + "fcall" => { + whitespaceOn = false; + token( KW_Call ); + }; + "fret" => { + whitespaceOn = false; + token( KW_Ret ); + }; + "fbreak" => { + whitespaceOn = false; + token( KW_Break ); + }; + + ident => { token( TK_Word, ts, te ); }; + + number => { token( TK_UInt, ts, te ); }; + hex_number => { token( TK_Hex, ts, te ); }; + + ( s_literal | d_literal | host_re_literal ) + => { token( IL_Literal, ts, te ); }; + + whitespace+ => { + if ( whitespaceOn ) + token( IL_WhiteSpace, ts, te ); + }; + + ruby_comment => { token( IL_Comment, ts, te ); }; + + "::" => { token( TK_NameSep, ts, te ); }; + + # Some symbols need to go to the parser as with their cardinal value as + # the token type (as opposed to being sent as anonymous symbols) + # because they are part of the sequences which we interpret. The * ) ; + # symbols cause whitespace parsing to come back on. This gets turned + # off by some keywords. + + ";" => { + whitespaceOn = true; + token( *ts, ts, te ); + if ( inlineBlockType == SemiTerminated ) + fret; + }; + + [*)] => { + whitespaceOn = true; + token( *ts, ts, te ); + }; + + [,(] => { token( *ts, ts, te ); }; + + '{' => { + token( IL_Symbol, ts, te ); + curly_count += 1; + }; + + '}' => { + if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) { + /* Inline code block ends. */ + token( '}' ); + fret; + } + else { + /* Either a semi terminated inline block or only the closing + * brace of some inner scope, not the block's closing brace. */ + token( IL_Symbol, ts, te ); + } + }; + + EOF => { + scan_error() << "unterminated code block" << endl; + }; + + # Send every other character as a symbol. + any => { token( IL_Symbol, ts, te ); }; + *|; + + + # An inline block of code for languages other than Ruby. inline_code := |* # Inline expression keywords. "fpc" => { token( KW_PChar ); }; @@ -512,21 +795,22 @@ void Scanner::endSection( ) token( KW_Break ); }; - ident => { token( TK_Word, tokstart, tokend ); }; + ident => { token( TK_Word, ts, te ); }; - number => { token( TK_UInt, tokstart, tokend ); }; - hex_number => { token( TK_Hex, tokstart, tokend ); }; + number => { token( TK_UInt, ts, te ); }; + hex_number => { token( TK_Hex, ts, te ); }; ( s_literal | d_literal ) - => { token( IL_Literal, tokstart, tokend ); }; + => { token( IL_Literal, ts, te ); }; whitespace+ => { if ( whitespaceOn ) - token( IL_WhiteSpace, tokstart, tokend ); + token( IL_WhiteSpace, ts, te ); }; - c_cpp_comment => { token( IL_Comment, tokstart, tokend ); }; - "::" => { token( TK_NameSep, tokstart, tokend ); }; + c_cpp_comment => { token( IL_Comment, ts, te ); }; + + "::" => { token( TK_NameSep, ts, te ); }; # Some symbols need to go to the parser as with their cardinal value as # the token type (as opposed to being sent as anonymous symbols) @@ -536,20 +820,20 @@ void Scanner::endSection( ) ";" => { whitespaceOn = true; - token( *tokstart, tokstart, tokend ); + token( *ts, ts, te ); if ( inlineBlockType == SemiTerminated ) - fgoto parser_def; + fret; }; [*)] => { whitespaceOn = true; - token( *tokstart, tokstart, tokend ); + token( *ts, ts, te ); }; - [,(] => { token( *tokstart, tokstart, tokend ); }; + [,(] => { token( *ts, ts, te ); }; '{' => { - token( IL_Symbol, tokstart, tokend ); + token( IL_Symbol, ts, te ); curly_count += 1; }; @@ -557,21 +841,21 @@ void Scanner::endSection( ) if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) { /* Inline code block ends. */ token( '}' ); - fgoto parser_def; + fret; } else { /* Either a semi terminated inline block or only the closing * brace of some inner scope, not the block's closing brace. */ - token( IL_Symbol, tokstart, tokend ); + token( IL_Symbol, ts, te ); } }; EOF => { - error() << "unterminated code block" << endl; + scan_error() << "unterminated code block" << endl; }; # Send every other character as a symbol. - any => { token( IL_Symbol, tokstart, tokend ); }; + any => { token( IL_Symbol, ts, te ); }; *|; or_literal := |* @@ -585,7 +869,7 @@ void Scanner::endSection( ) '\\f' => { token( RE_Char, '\f' ); }; '\\r' => { token( RE_Char, '\r' ); }; '\\\n' => { updateCol(); }; - '\\' any => { token( RE_Char, tokstart+1, tokend ); }; + '\\' any => { token( RE_Char, ts+1, te ); }; # Range dash in an OR expression. '-' => { token( RE_Dash, 0, 0 ); }; @@ -594,15 +878,15 @@ void Scanner::endSection( ) ']' => { token( RE_SqClose ); fret; }; EOF => { - error() << "unterminated OR literal" << endl; + scan_error() << "unterminated OR literal" << endl; }; # Characters in an OR expression. - [^\]] => { token( RE_Char, tokstart, tokend ); }; + [^\]] => { token( RE_Char, ts, te ); }; *|; - re_literal := |* + ragel_re_literal := |* # Escape sequences in regular expressions. '\\0' => { token( RE_Char, '\0' ); }; '\\a' => { token( RE_Char, '\a' ); }; @@ -613,11 +897,11 @@ void Scanner::endSection( ) '\\f' => { token( RE_Char, '\f' ); }; '\\r' => { token( RE_Char, '\r' ); }; '\\\n' => { updateCol(); }; - '\\' any => { token( RE_Char, tokstart+1, tokend ); }; + '\\' any => { token( RE_Char, ts+1, te ); }; # Terminate an OR expression. '/' [i]? => { - token( RE_Slash, tokstart, tokend ); + token( RE_Slash, ts, te ); fgoto parser_def; }; @@ -629,34 +913,38 @@ void Scanner::endSection( ) '[^' => { token( RE_SqOpenNeg ); fcall or_literal; }; EOF => { - error() << "unterminated regular expression" << endl; + scan_error() << "unterminated regular expression" << endl; }; # Characters in an OR expression. - [^\/] => { token( RE_Char, tokstart, tokend ); }; + [^\/] => { token( RE_Char, ts, te ); }; *|; # We need a separate token space here to avoid the ragel keywords. write_statement := |* - ident => { token( TK_Word, tokstart, tokend ); } ; + ident => { token( TK_Word, ts, te ); } ; [ \t\n]+ => { updateCol(); }; ';' => { token( ';' ); fgoto parser_def; }; EOF => { - error() << "unterminated write statement" << endl; + scan_error() << "unterminated write statement" << endl; }; *|; # Parser definitions. parser_def := |* + 'length_cond' => { token( KW_Length ); }; 'machine' => { token( KW_Machine ); }; 'include' => { token( KW_Include ); }; + 'import' => { token( KW_Import ); }; 'write' => { token( KW_Write ); fgoto write_statement; }; 'action' => { token( KW_Action ); }; 'alphtype' => { token( KW_AlphType ); }; + 'prepush' => { token( KW_PrePush ); }; + 'postpop' => { token( KW_PostPop ); }; # FIXME: Enable this post 5.17. # 'range' => { token( KW_Range ); }; @@ -664,40 +952,52 @@ void Scanner::endSection( ) 'getkey' => { token( KW_GetKey ); inlineBlockType = SemiTerminated; - fgoto inline_code; + if ( hostLang->lang == HostLang::Ruby ) + fcall inline_code_ruby; + else + fcall inline_code; }; 'access' => { token( KW_Access ); inlineBlockType = SemiTerminated; - fgoto inline_code; + if ( hostLang->lang == HostLang::Ruby ) + fcall inline_code_ruby; + else + fcall inline_code; }; 'variable' => { token( KW_Variable ); inlineBlockType = SemiTerminated; - fgoto inline_code; + if ( hostLang->lang == HostLang::Ruby ) + fcall inline_code_ruby; + else + fcall inline_code; }; 'when' => { token( KW_When ); }; + 'inwhen' => { token( KW_InWhen ); }; + 'outwhen' => { token( KW_OutWhen ); }; 'eof' => { token( KW_Eof ); }; 'err' => { token( KW_Err ); }; 'lerr' => { token( KW_Lerr ); }; 'to' => { token( KW_To ); }; 'from' => { token( KW_From ); }; + 'export' => { token( KW_Export ); }; # Identifiers. - ident => { token( TK_Word, tokstart, tokend ); } ; + ident => { token( TK_Word, ts, te ); } ; # Numbers - number => { token( TK_UInt, tokstart, tokend ); }; - hex_number => { token( TK_Hex, tokstart, tokend ); }; + number => { token( TK_UInt, ts, te ); }; + hex_number => { token( TK_Hex, ts, te ); }; # Literals, with optionals. ( s_literal | d_literal ) [i]? - => { token( TK_Literal, tokstart, tokend ); }; + => { token( TK_Literal, ts, te ); }; '[' => { token( RE_SqOpen ); fcall or_literal; }; '[^' => { token( RE_SqOpenNeg ); fcall or_literal; }; - '/' => { token( RE_Slash ); fgoto re_literal; }; + '/' => { token( RE_Slash ); fgoto ragel_re_literal; }; # Ignore. pound_comment => { updateCol(); }; @@ -765,69 +1065,96 @@ void Scanner::endSection( ) # Opening of longest match. "|*" => { token( TK_BarStar ); }; + # Separater for name references. + "::" => { token( TK_NameSep, ts, te ); }; + '}%%' => { updateCol(); endSection(); - fgoto main; + fret; }; - [ \t]+ => { updateCol(); }; + [ \t\r]+ => { updateCol(); }; # If we are in a single line machine then newline may end the spec. NL => { updateCol(); if ( singleLineSpec ) { endSection(); - fgoto main; + fret; } }; '{' => { - token( '{' ); - curly_count = 1; - inlineBlockType = CurlyDelimited; - fgoto inline_code; + if ( lastToken == KW_Export || lastToken == KW_Entry ) + token( '{' ); + else { + token( '{' ); + curly_count = 1; + inlineBlockType = CurlyDelimited; + if ( hostLang->lang == HostLang::Ruby ) + fcall inline_code_ruby; + else + fcall inline_code; + } }; EOF => { - error() << "unterminated ragel section" << endl; + scan_error() << "unterminated ragel section" << endl; }; - any => { token( *tokstart ); } ; + any => { token( *ts ); } ; *|; - action pass { - updateCol(); + # Outside code scanner. These tokens get passed through. + main_ruby := |* + ident => { pass( IMP_Word, ts, te ); }; + number => { pass( IMP_UInt, ts, te ); }; + ruby_comment => { pass(); }; + ( s_literal | d_literal | host_re_literal ) + => { pass( IMP_Literal, ts, te ); }; - /* If no errors and we are at the bottom of the include stack (the - * source file listed on the command line) then write out the data. */ - if ( include_depth == 0 && machineSpec == 0 && machineName == 0 ) - xmlEscapeHost( *outStream, tokstart, tokend-tokstart ); - } + '%%{' => { + updateCol(); + singleLineSpec = false; + startSection(); + fcall parser_def; + }; + '%%' => { + updateCol(); + singleLineSpec = true; + startSection(); + fcall parser_def; + }; + whitespace+ => { pass(); }; + EOF; + any => { pass( *ts, 0, 0 ); }; + *|; # Outside code scanner. These tokens get passed through. main := |* - ident => pass; - number => pass; - c_cpp_comment => pass; - s_literal | d_literal => pass; + 'define' => { pass( IMP_Define, 0, 0 ); }; + ident => { pass( IMP_Word, ts, te ); }; + number => { pass( IMP_UInt, ts, te ); }; + c_cpp_comment => { pass(); }; + ( s_literal | d_literal ) => { pass( IMP_Literal, ts, te ); }; + '%%{' => { updateCol(); singleLineSpec = false; startSection(); - fgoto parser_def; + fcall parser_def; }; '%%' => { updateCol(); singleLineSpec = true; startSection(); - fgoto parser_def; + fcall parser_def; }; - whitespace+ => pass; + whitespace+ => { pass(); }; EOF; - any => pass; + any => { pass( *ts, 0, 0 ); }; *|; - }%% %% write data; @@ -836,16 +1163,30 @@ void Scanner::do_scan() { int bufsize = 8; char *buf = new char[bufsize]; - const char last_char = 0; int cs, act, have = 0; - int top, stack[1]; + int top; + + /* The stack is two deep, one level for going into ragel defs from the main + * machines which process outside code, and another for going into or literals + * from either a ragel spec, or a regular expression. */ + int stack[2]; int curly_count = 0; bool execute = true; bool singleLineSpec = false; - InlineBlockType inlineBlockType; + InlineBlockType inlineBlockType = CurlyDelimited; + /* Init the section parser and the character scanner. */ + init(); %% write init; + /* Set up the start state. FIXME: After 5.20 is released the nocs write + * init option should be used, the main machine eliminated and this statement moved + * above the write init. */ + if ( hostLang->lang == HostLang::Ruby ) + cs = rlscan_en_main_ruby; + else + cs = rlscan_en_main; + while ( execute ) { char *p = buf + have; int space = bufsize - have; @@ -860,9 +1201,9 @@ void Scanner::do_scan() space = bufsize - have; /* Patch up pointers possibly in use. */ - if ( tokstart != 0 ) - tokstart = newbuf + ( tokstart - buf ); - tokend = newbuf + ( tokend - buf ); + if ( ts != 0 ) + ts = newbuf + ( ts - buf ); + te = newbuf + ( te - buf ); /* Copy the new buffer in. */ memcpy( newbuf, buf, have ); @@ -872,26 +1213,27 @@ void Scanner::do_scan() input.read( p, space ); int len = input.gcount(); + char *pe = p + len; - /* If we see eof then append the EOF char. */ + /* If we see eof then append the eof var. */ + char *eof = 0; if ( len == 0 ) { - p[0] = last_char, len = 1; + eof = pe; execute = false; } - char *pe = p + len; %% write exec; /* Check if we failed. */ if ( cs == rlscan_error ) { /* Machine failed before finding a token. I'm not yet sure if this * is reachable. */ - error() << "scanner error" << endl; + scan_error() << "scanner error" << endl; exit(1); } /* Decide if we need to preserve anything. */ - char *preserve = tokstart; + char *preserve = ts; /* Now set up the prefix. */ if ( preserve == 0 ) @@ -901,9 +1243,9 @@ void Scanner::do_scan() have = pe - preserve; memmove( buf, preserve, have ); unsigned int shiftback = preserve - buf; - if ( tokstart != 0 ) - tokstart -= shiftback; - tokend -= shiftback; + if ( ts != 0 ) + ts -= shiftback; + te -= shiftback; preserve = buf; } @@ -911,15 +1253,3 @@ void Scanner::do_scan() delete[] buf; } - -void scan( char *fileName, istream &input ) -{ - Scanner scanner( fileName, input, 0, 0, 0 ); - scanner.init(); - scanner.do_scan(); - - InputLoc eofLoc; - eofLoc.fileName = fileName; - eofLoc.col = 1; - eofLoc.line = scanner.line; -}