From 15b56beda97efc88af9faf8d04d666e7ef842712 Mon Sep 17 00:00:00 2001 From: thurston Date: Mon, 22 Jan 2007 19:22:01 +0000 Subject: [PATCH] Removed the old flex and bison files. Fixed a bug in regexes and OR literals: \0 was not properly recognized. Improved error reporting. git-svn-id: http://svn.complang.org/ragel/trunk@7 052ea7fc-9027-0410-9066-f65837a77df0 --- ragel/rlparse.kl | 10 +- ragel/rlparse.y | 1456 ------------------------------------------------- ragel/rlscan.lex | 1212 ---------------------------------------- ragel/rlscan.rl | 78 ++- rlcodegen/xmlparse.y | 978 --------------------------------- rlcodegen/xmlscan.lex | 433 --------------- rlcodegen/xmlscan.rl | 14 +- 7 files changed, 67 insertions(+), 4114 deletions(-) delete mode 100644 ragel/rlparse.y delete mode 100644 ragel/rlscan.lex delete mode 100644 rlcodegen/xmlparse.y delete mode 100644 rlcodegen/xmlscan.lex diff --git a/ragel/rlparse.kl b/ragel/rlparse.kl index b39fa5c..28fcfcc 100644 --- a/ragel/rlparse.kl +++ b/ragel/rlparse.kl @@ -35,7 +35,10 @@ parser Parser; include "rlparse.kh"; -start: statement_list; +start: section_list; + +section_list: section_list statement_list TK_EndSection; +section_list: ; statement_list: statement_list statement; statement_list: ; @@ -49,9 +52,6 @@ statement: getkey_spec commit; statement: access_spec commit; statement: variable_spec commit; -# We use end section tokens to draw firm boundaries between sections. -statement: TK_EndSection; - assignment: machine_name '=' join ';' final { /* Main machine must be an instance. */ @@ -1374,7 +1374,7 @@ ostream &Parser::parser_error( int tokId, Token &token ) { gblErrorCount += 1; - cerr << token.loc.fileName << ":" << token.loc.line << ": "; + cerr << token.loc.fileName << ":" << token.loc.line << ":" << token.loc.col << ": "; cerr << "at token "; if ( tokId < 128 ) cerr << "\"" << lelNames[tokId] << "\""; diff --git a/ragel/rlparse.y b/ragel/rlparse.y deleted file mode 100644 index b0fc3df..0000000 --- a/ragel/rlparse.y +++ /dev/null @@ -1,1456 +0,0 @@ -/* - * Copyright 2001-2005 Adrian Thurston - */ - -/* This file is part of Ragel. - * - * Ragel is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Ragel is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Ragel; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -%{ - -#include -#include -#include -#include -#include "ragel.h" -#include "parsetree.h" -#include "rlparse.h" - -using std::cerr; -using std::endl; - -InputData *id = 0; -int includeDepth = 0; - -extern bool inlineWhitespace; - -/* These come from the scanner and point back into the parser. We will borrow - * them for error reporting. */ -extern YYSTYPE *yylval; -extern YYLTYPE *yylloc; - -/* The include stack pointer from the scanner. Used to determine if we are - * currently processing an included file. */ -extern int inc_stack_ptr; - -/* Try to do a definition, common to assignment and instantiation. */ -void tryMachineDef( const YYLTYPE &loc, char *name, - JoinOrLm *joinOrLm, bool isInstance ); -void beginOutsideCode(); -void doInclude( const InputLoc &loc, char *sectionName, char *inputFile ); -int yylex( YYSTYPE *yylval, YYLTYPE *yylloc ); - -bool sectionOpened; -void openSection(); - -#define WO_NOEND 0x01 - -%} - -%pure-parser - -%union { - /* General data types. */ - char c; - TokenData data; - int integer; - Literal *literal; - - /* Tree nodes. */ - Term *term; - FactorWithAug *factorWithAug; - FactorWithRep *factorWithRep; - FactorWithNeg *factorWithNeg; - Factor *factor; - Expression *expression; - Join *join; - JoinOrLm *joinOrLm; - LmPartList *longestMatchList; - LongestMatchPart *longestMatchPart; - - /* Priorities and actions. */ - AugType augType; - StateAugType stateAugType; - Action *action; - PriorDesc *priorDesc; - - /* Regular expression items. */ - RegExpr *regExp; - ReItem *reItem; - ReOrBlock *reOrBlock; - ReOrItem *reOrItem; - - /* Inline parse tree items. */ - InlineItem *ilitem; - InlineList *illist; -} - -%token TK_Section -%token TK_SectionNL - -/* General tokens. */ -%token TK_UInt -%token TK_Hex -%token TK_Word -%token TK_Literal -%token TK_CiLiteral -%token TK_BaseClause -%token TK_DotDot -%token TK_ColonGt -%token TK_ColonGtGt -%token TK_LtColon -%token TK_Arrow -%token TK_DoubleArrow -%token TK_StarStar -%token TK_ColonEquals -%token TK_NameSep -%token TK_BarStar -%token TK_RepOpOpen -%token TK_DashDash - -%token TK_StartCond -%token TK_AllCond -%token TK_LeavingCond - -%token TK_Middle - -/* Global error actions. */ -%token TK_StartGblError -%token TK_AllGblError -%token TK_FinalGblError -%token TK_NotFinalGblError -%token TK_NotStartGblError -%token TK_MiddleGblError - -/* Local error actions. */ -%token TK_StartLocalError -%token TK_AllLocalError -%token TK_FinalLocalError -%token TK_NotFinalLocalError -%token TK_NotStartLocalError -%token TK_MiddleLocalError - -/* EOF Action embedding. */ -%token TK_StartEOF -%token TK_AllEOF -%token TK_FinalEOF -%token TK_NotFinalEOF -%token TK_NotStartEOF -%token TK_MiddleEOF - -/* To State Actions. */ -%token TK_StartToState -%token TK_AllToState -%token TK_FinalToState -%token TK_NotFinalToState -%token TK_NotStartToState -%token TK_MiddleToState - -/* In State Actions. */ -%token TK_StartFromState -%token TK_AllFromState -%token TK_FinalFromState -%token TK_NotFinalFromState -%token TK_NotStartFromState -%token TK_MiddleFromState - -/* Regular expression tokens. */ -%token RE_Slash -%token RE_SqOpen -%token RE_SqOpenNeg -%token RE_SqClose -%token RE_Dot -%token RE_Star -%token RE_Dash -%token RE_Char - -/* Tokens specific to inline code. */ -%token IL_WhiteSpace -%token IL_Comment -%token IL_Literal -%token IL_Symbol - -/* Keywords. */ -%token KW_Action -%token KW_AlphType -%token KW_Range -%token KW_GetKey -%token KW_Include -%token KW_Write -%token KW_Machine -%token KW_When -%token KW_Eof -%token KW_Err -%token KW_Lerr -%token KW_To -%token KW_From - -/* Specials in code blocks. */ -%token KW_Break -%token KW_Exec -%token KW_Hold -%token KW_PChar -%token KW_Char -%token KW_Goto -%token KW_Call -%token KW_Ret -%token KW_CurState -%token KW_TargState -%token KW_Entry -%token KW_Next -%token KW_Exec -%token KW_Variable -%token KW_Access - -/* Special token for terminating semi-terminated code blocks. Needed because - * semi is sent as a token in the code block rather than as a generic symbol. */ -%token TK_Semi - -/* Symbols. In ragel lexical space, the scanner does not pass - * any data along with the symbols, in inline code lexical - * space it does. */ -%token '*' '?' '+' '!' '^' '(' ')' ';' ',' '=' -%token ':' '@' '%' '$' '-' '|' '&' '.' '>' - -/* Precedence information. Lower is a higher precedence. We need only two - * precedence groups. Shifting the minus sign in front of a literal number - * conflicts with the reduction of Expression and the subsequent shifting of a - * subtraction operator when a '-' is seen. Since we want subtraction to take - * precedence, we give EXPR_MINUS the higher priority. */ -%nonassoc '-' -%nonassoc EXPR_MINUS - -%type AugTypeBase -%type AugTypeGblError -%type AugTypeLocalError -%type AugTypeEOF -%type AugTypeToState -%type AugTypeFromState -%type AugTypeCond -%type PriorityAug -%type PriorityAugNum -%type ActionEmbed -%type ActionEmbedWord -%type ActionEmbedBlock -%type OptLmPartAction -%type LmPartList -%type LongestMatchPart -%type Join -%type JoinOrLm -%type Expression -%type Term -%type FactorWithLabel -%type FactorWithEp -%type FactorWithAug -%type FactorWithTransAction -%type FactorWithPriority -%type FactorWithCond -%type FactorWithToStateAction -%type FactorWithFromStateAction -%type FactorWithEOFAction -%type FactorWithGblErrorAction -%type FactorWithLocalErrorAction -%type FactorWithRep -%type FactorRepNum -%type FactorWithNeg -%type Factor -%type RangeLit -%type AlphabetNum -%type MachineName -%type PriorityName -%type LocalErrName -%type SectionName -%type OptSection -%type OptFileName -%type EndSection - -%type InlineBlock -%type InlineBlockItem -%type InlineBlockInterpret -%type InlineBlockAny -%type InlineBlockSymbol - -%type InlineExpr -%type InlineExprItem -%type InlineExprInterpret -%type InlineExprSymbol -%type InlineExprAny - -%type RegularExpr -%type RegularExprItem -%type RegularExprChar -%type RegularExprOrData -%type RegularExprOrChar - -%% - -/* Input is any number of input sections. An empty file is accepted. */ -input: FsmSpecList; -FsmSpecList: - FsmSpecList FsmSpec | - /* Nothing */; - -/* Fsm Specification. Fsms are begin with '%%' and may be a {} delimited - * list of Fsm statements or may be a single statement. If no name is - * given the last name given in a machine is used. */ -FsmSpec: - StartSection SectionName StatementList EndSection { - if ( includeDepth == 0 ) { - if ( sectionOpened ) - *outStream << "\n"; - - if ( machineSpec == 0 && machineName == 0 ) { - /* The end section may include a newline on the end, so - * we use the last line, which will count the newline. */ - *outStream << ""; - } - } - }; - -StartSection: - TK_Section { - id->sectionLoc = InputLoc(@1); - - if ( includeDepth == 0 ) { - if ( machineSpec == 0 && machineName == 0 ) - *outStream << "\n"; - sectionOpened = false; - } - }; - -SectionName: - KW_Machine TK_Word ';' { - /* By default active until found not active. */ - id->active = true; - id->sectionName = $2.data; - - if ( id->includeSpec != 0 ) { - if ( strcmp( id->sectionName, id->includeSpec ) == 0 ) - id->sectionName = id->includeTo; - else - id->active = false; - } - - /* Lookup the parse data, if it is not there then create it. */ - SectionMapEl *sectionMapEl = sectionMap.find( id->sectionName ); - if ( sectionMapEl == 0 ) { - ParseData *newPd = new ParseData( id->fileName, id->sectionName, - id->sectionLoc ); - sectionMapEl = sectionMap.insert( id->sectionName, newPd ); - } - id->pd = sectionMapEl->value; - } | - /* Empty */ { - /* No machine name. Just use the previous section setup. Report an - * error if there is no previous section */ - if ( id->pd == 0 ) { - error(id->sectionLoc) << "the first ragel section does not have a name" << endl; - id->pd = new ParseData( id->fileName, "", id->sectionLoc ); - } - }; - -EndSection: - TK_Section { $$ = @1.last_line; } | - TK_SectionNL { $$ = @1.last_line + 1; }; - -/* A NonEmpty list of statements in a fsm. */ -StatementList: - StatementList Statement | - /* Nothing */; - -/* The differnt types of statements in a fsm spec. */ -Statement: - Assignment | - Instantiation | - ActionSpec | - AlphSpec | - GetKeySpec | - RangeSpec | - Include | - Write | - Access | - Variable; - -/* Garble up to the next ; */ -Statement: error ';' { yyerrok; }; - -/* Allow the user to create a named fsm action that can be referenced when - * building a machine. */ -ActionSpec: - KW_Action TK_Word '{' InlineBlock '}' { - if ( id->active ) { - if ( id->pd->actionDict.find( $2.data ) ) { - /* Recover by just ignoring the duplicate. */ - error(@2) << "action \"" << $2.data << "\" already defined" << endl; - } - else { - /* Add the action to the list of actions. */ - Action *newAction = new Action( InputLoc(@3), $2.data, $4, id->nameRefList ); - - /* Insert to list and dict. */ - id->pd->actionList.append( newAction ); - id->pd->actionDict.insert( newAction ); - } - } - }; - -/* Specifies the data type of the input alphabet. One or two words - * followed by a semi-colon. */ -AlphSpec: - KW_AlphType TK_Word TK_Word TK_Semi { - if ( id->active ) { - if ( ! id->pd->setAlphType( $2.data, $3.data ) ) { - // Recover by ignoring the alphtype statement. - error(@2) << "\"" << $2.data << - " " << $3.data << "\" is not a valid alphabet type" << endl; - } - } - } | - KW_AlphType TK_Word TK_Semi { - if ( id->active ) { - if ( ! id->pd->setAlphType( $2.data ) ) { - // Recover by ignoring the alphtype statement. - error(@2) << "\"" << $2.data << "\" is not a valid alphabet type" << endl; - } - } - }; - -GetKeySpec: - KW_GetKey InlineBlock TK_Semi { - if ( id->active ) - id->pd->getKeyExpr = $2; - }; - -/* Specifies a range to assume that the input characters will fall into. */ -RangeSpec: - KW_Range AlphabetNum AlphabetNum ';' { - if ( id->active ) { - // Save the upper and lower ends of the range and emit the line number. - id->pd->lowerNum = $2.data; - id->pd->upperNum = $3.data; - id->pd->rangeLowLoc = InputLoc(@2); - id->pd->rangeHighLoc = InputLoc(@3); - } - }; - - -Write: - WriteOpen WriteOptions ';' { - if ( id->active ) - *outStream << "\n"; - }; - -WriteOpen: - KW_Write TK_Word { - if ( id->active ) { - openSection(); - if ( strcmp( $2.data, "data" ) != 0 && - strcmp( $2.data, "init" ) != 0 && - strcmp( $2.data, "exec" ) != 0 && - strcmp( $2.data, "eof" ) != 0 ) - { - error( @2 ) << "unknown write command" << endl; - } - *outStream << " "; - } - }; - -WriteOptions: - WriteOptions TK_Word { - if ( id->active ) - *outStream << ""; - } | - /* Nothing */; - -Access: - KW_Access InlineBlock TK_Semi { - if ( id->active ) - id->pd->accessExpr = $2; - }; - -Variable: - KW_Variable InlineBlock TK_Semi { - if ( id->active ) { - if ( strcmp( $1.data, "curstate" ) == 0 ) { - id->pd->curStateExpr = $2; - } - } - }; - -/* Include statements are processed by both the scanner and the parser. */ -Include: - IncludeKeyword OptSection OptFileName ';' { - if ( id->active ) - doInclude( @1, $2.data, $3.data ); - }; - -IncludeKeyword: - KW_Include { - /* Do this immediately so that the scanner has a correct sense of the - * value in id->active when it reaches the end of the statement before - * the above action executes. */ - //getParseData( @1 ); - }; - -OptSection: TK_Word { $$ = $1; } | { $$.data = 0; $$.length = 0; }; -OptFileName: TK_Literal { $$ = $1; } | { $$.data = 0; $$.length = 0; }; - -/* An assignement statement. Assigns the definition of a machine to a variable name. */ -Assignment: - MachineName '=' Join ';' { - if ( id->active ) { - /* Main machine must be an instance. */ - bool isInstance = false; - if ( strcmp($1.data, machineMain) == 0 ) { - warning(@1) << "main machine will be implicitly instantiated" << endl; - isInstance = true; - } - - /* Generic creation of machine for instantiation and assignment. */ - JoinOrLm *joinOrLm = new JoinOrLm( $3 ); - tryMachineDef( @1, $1.data, joinOrLm, isInstance ); - } - }; - -/* An instantiation statement. Instantiates a machine and assigns it to a - * variable name. */ -Instantiation: - MachineName TK_ColonEquals JoinOrLm ';' { - /* Generic creation of machine for instantiation and assignment. */ - if ( id->active ) - tryMachineDef( @1, $1.data, $3, true ); - }; - -/* Capture the machine name for making the machine's priority name. */ -MachineName: - TK_Word { - if ( id->active ) { - /* Make/get the priority key. The name may have already been referenced - * and therefore exist. */ - PriorDictEl *priorDictEl; - if ( id->pd->priorDict.insert( $1.data, id->pd->nextPriorKey, &priorDictEl ) ) - id->pd->nextPriorKey += 1; - id->pd->curDefPriorKey = priorDictEl->value; - - /* Make/get the local error key. */ - LocalErrDictEl *localErrDictEl; - if ( id->pd->localErrDict.insert( $1.data, id->pd->nextLocalErrKey, &localErrDictEl ) ) - id->pd->nextLocalErrKey += 1; - id->pd->curDefLocalErrKey = localErrDictEl->value; - } - }; - -JoinOrLm: - Join { - $$ = new JoinOrLm( $1 ); - } | - TK_BarStar LmPartList '*' '|' { - /* Create a new factor going to a longest match structure. Record - * in the parse data that we have a longest match. */ - LongestMatch *lm = new LongestMatch( @1, $2 ); - if ( id->active ) - id->pd->lmList.append( lm ); - for ( LmPartList::Iter lmp = *($2); lmp.lte(); lmp++ ) - lmp->longestMatch = lm; - $$ = new JoinOrLm( lm ); - }; - -Join: - Join ',' Expression { - /* Append the expression to the list and return it. */ - $1->exprList.append( $3 ); - $$ = $1; - } | - Expression { - /* Create the expression list with the intial expression. */ - $$ = new Join( InputLoc(@1), $1 ); - }; - -/* Top level production in the parse of a fsm. The lowest precedence - * is the '|' (or), '&' (intersection), and '-' (subtraction) operators. */ -Expression: - Expression '|' Term { - $$ = new Expression( $1, $3, Expression::OrType ); - } %prec EXPR_MINUS | - Expression '&' Term { - $$ = new Expression( $1, $3, Expression::IntersectType ); - } %prec EXPR_MINUS | - Expression '-' Term { - $$ = new Expression( $1, $3, Expression::SubtractType ); - } %prec EXPR_MINUS | - Expression TK_DashDash Term { - $$ = new Expression( $1, $3, Expression::StrongSubtractType ); - } %prec EXPR_MINUS | - Term { - $$ = new Expression( $1 ); - } %prec EXPR_MINUS; - -Term: - Term FactorWithLabel { - $$ = new Term( $1, $2 ); - } | - Term '.' FactorWithLabel { - $$ = new Term( $1, $3 ); - } | - Term TK_ColonGt FactorWithLabel { - $$ = new Term( $1, $3, Term::RightStartType ); - } | - Term TK_ColonGtGt FactorWithLabel { - $$ = new Term( $1, $3, Term::RightFinishType ); - } | - Term TK_LtColon FactorWithLabel { - $$ = new Term( $1, $3, Term::LeftType ); - } | - FactorWithLabel { - $$ = new Term( $1 ); - }; - -FactorWithLabel: - TK_Word ':' FactorWithLabel { - /* Add the label to the list and pass the factor up. */ - $3->labels.prepend( Label(@1, $1.data) ); - $$ = $3; - } | - FactorWithEp; - -FactorWithEp: - FactorWithEp TK_Arrow LocalStateRef { - /* Add the target to the list and return the factor object. */ - $1->epsilonLinks.append( EpsilonLink( InputLoc(@2), id->nameRef ) ); - $$ = $1; - } | - FactorWithAug; - -/* A local state reference. Qualified name witout :: prefix. */ -LocalStateRef: - NoNameSep StateRefNames; - -/* Clear the name ref structure. */ -NoNameSep: - /* Nothing */ { - id->nameRef.empty(); - }; - -/* A qualified state reference. */ -StateRef: - OptNameSep StateRefNames; - -/* Optional leading name separator. */ -OptNameSep: - TK_NameSep { - /* Insert an inition null pointer val to indicate the existence of the - * initial name seperator. */ - id->nameRef.setAs( 0 ); - } | - /* Nothing. */ { - id->nameRef.empty(); - }; - -/* List of names separated by :: */ -StateRefNames: - StateRefNames TK_NameSep TK_Word { - id->nameRef.append( $3.data ); - } | - TK_Word { - id->nameRef.append( $1.data ); - }; - -/* Third group up in precedence. Allow users to embed actions and priorities */ -FactorWithAug: - FactorWithTransAction | - FactorWithPriority | - FactorWithCond | - FactorWithToStateAction | - FactorWithFromStateAction | - FactorWithEOFAction | - FactorWithGblErrorAction | - FactorWithLocalErrorAction | - FactorWithRep { - $$ = new FactorWithAug( $1 ); - }; - -FactorWithTransAction: - FactorWithAug AugTypeBase ActionEmbed { - /* Append the action to the factorWithAug, record the refernce from - * factorWithAug to the action and pass up the factorWithAug. */ - $1->actions.append( ParserAction( @2, $2, 0, $3 ) ); - $$ = $1; - }; - -FactorWithPriority: - FactorWithAug AugTypeBase PriorityAug { - if ( id->active ) { - /* Append the named priority to the factorWithAug and pass it up. */ - $1->priorityAugs.append( PriorityAug( $2, id->pd->curDefPriorKey, $3 ) ); - } - $$ = $1; - } | - FactorWithAug AugTypeBase '(' PriorityName ',' PriorityAug ')' { - /* Append the priority using a default name. */ - $1->priorityAugs.append( PriorityAug( $2, $4, $6 ) ); - $$ = $1; - }; - -FactorWithCond: - FactorWithAug AugTypeCond ActionEmbed { - $$->conditions.append( ParserAction( @2, $2, 0, $3 ) ); - $$ = $1; - }; - -AugTypeCond: - TK_StartCond { $$ = at_start; } | - '>' KW_When { $$ = at_start; } | - TK_AllCond { $$ = at_all; } | - '$' KW_When { $$ = at_all; } | - TK_LeavingCond { $$ = at_leave; } | - '%' KW_When { $$ = at_all; } | - KW_When { $$ = at_all; }; - -FactorWithToStateAction: - FactorWithAug AugTypeToState ActionEmbed { - /* Append the action, pass it up. */ - $1->actions.append( ParserAction( @2, $2, 0, $3 ) ); - $$ = $1; - }; - -FactorWithFromStateAction: - FactorWithAug AugTypeFromState ActionEmbed { - /* Append the action, pass it up. */ - $1->actions.append( ParserAction( @2, $2, 0, $3 ) ); - $$ = $1; - }; - -FactorWithEOFAction: - FactorWithAug AugTypeEOF ActionEmbed { - /* Append the action, pass it up. */ - $1->actions.append( ParserAction( @2, $2, 0, $3 ) ); - $$ = $1; - }; - -FactorWithGblErrorAction: - FactorWithAug AugTypeGblError ActionEmbed { - if ( id->active ) { - /* Append the action to the factorWithAug, record the refernce from - * factorWithAug to the action and pass up the factorWithAug. */ - $1->actions.append( ParserAction( @2, $2, id->pd->curDefLocalErrKey, $3 ) ); - } - $$ = $1; - }; - -FactorWithLocalErrorAction: - FactorWithAug AugTypeLocalError ActionEmbed { - if ( id->active ) { - /* Append the action to the factorWithAug, record the refernce from - * factorWithAug to the action and pass up the factorWithAug. */ - $1->actions.append( ParserAction( @2, $2, id->pd->curDefLocalErrKey, $3 ) ); - } - $$ = $1; - } | - FactorWithAug AugTypeLocalError '(' LocalErrName ',' ActionEmbed ')' { - /* Append the action to the factorWithAug, record the refernce from - * factorWithAug to the action and pass up the factorWithAug. */ - $1->actions.append( ParserAction( @2, $2, $4, $6 ) ); - $$ = $1; - }; - -/* A specified priority name. Looks up the name in the current priority - * dictionary. */ -PriorityName: - TK_Word { - if ( id->active ) { - // Lookup/create the priority key. - PriorDictEl *priorDictEl; - if ( id->pd->priorDict.insert( $1.data, id->pd->nextPriorKey, &priorDictEl ) ) - id->pd->nextPriorKey += 1; - - // Use the inserted/found priority key. - $$ = priorDictEl->value; - } - }; - -LocalErrName: - TK_Word { - if ( id->active ) { - /* Lookup/create the priority key. */ - LocalErrDictEl *localErrDictEl; - if ( id->pd->localErrDict.insert( $1.data, id->pd->nextLocalErrKey, &localErrDictEl ) ) - id->pd->nextLocalErrKey += 1; - - /* Use the inserted/found priority key. */ - $$ = localErrDictEl->value; - } - }; - -/* Priority change specs. */ -PriorityAug: - PriorityAugNum { - // Convert the priority number to a long. Check for overflow. - errno = 0; - int aug = strtol( $1.data, 0, 10 ); - if ( errno == ERANGE && aug == LONG_MAX ) { - // Priority number too large. Recover by setting the priority to 0. - error(@1) << "priority number " << $1.data << " overflows" << endl; - $$ = 0; - } - else if ( errno == ERANGE && aug == LONG_MIN ) { - // Priority number too large in the neg. Recover by using 0. - error(@1) << "priority number " << $1.data << " underflows" << endl; - $$ = 0; - } - else { - // No overflow or underflow. - $$ = aug; - } - }; - -PriorityAugNum: - TK_UInt | - '+' TK_UInt { - $$ = $2; - } | - '-' TK_UInt { - $$.data = "-"; - $$.length = 1; - $$.append( $2 ); - }; - -/* Classes of transtions on which to embed actions or change priorities. */ -AugTypeBase: - '@' { $$ = at_finish; } | - '%' { $$ = at_leave; } | - '$' { $$ = at_all; } | - '>' { $$ = at_start; }; - -/* Global error actions. */ -AugTypeGblError: - TK_StartGblError { $$ = at_start_gbl_error; } | - '>' KW_Err { $$ = at_start_gbl_error; } | - - TK_NotStartGblError { $$ = at_not_start_gbl_error; } | - '<' KW_Err { $$ = at_not_start_gbl_error; } | - - TK_AllGblError { $$ = at_all_gbl_error; } | - '$' KW_Err { $$ = at_all_gbl_error; } | - - TK_FinalGblError { $$ = at_final_gbl_error; } | - '%' KW_Err { $$ = at_final_gbl_error; } | - - TK_NotFinalGblError { $$ = at_not_final_gbl_error; } | - '@' KW_Err { $$ = at_not_final_gbl_error; } | - - TK_MiddleGblError { $$ = at_middle_gbl_error; } | - TK_Middle KW_Err { $$ = at_middle_gbl_error; }; - -/* Local error actions. */ -AugTypeLocalError: - TK_StartLocalError { $$ = at_start_local_error; } | - '>' KW_Lerr { $$ = at_start_local_error; } | - - TK_NotStartLocalError { $$ = at_not_start_local_error; } | - '<' KW_Lerr { $$ = at_not_start_local_error; } | - - TK_AllLocalError { $$ = at_all_local_error; } | - '$' KW_Lerr { $$ = at_all_local_error; } | - - TK_FinalLocalError { $$ = at_final_local_error; } | - '%' KW_Lerr { $$ = at_final_local_error; } | - - TK_NotFinalLocalError { $$ = at_not_final_local_error; } | - '@' KW_Lerr { $$ = at_not_final_local_error; } | - - TK_MiddleLocalError { $$ = at_middle_local_error; } | - TK_Middle KW_Lerr { $$ = at_middle_local_error; }; - -/* Eof state actions. */ -AugTypeEOF: - TK_StartEOF { $$ = at_start_eof; } | - '>' KW_Eof { $$ = at_start_eof; } | - - TK_NotStartEOF { $$ = at_not_start_eof; } | - '<' KW_Eof { $$ = at_not_start_eof; } | - - TK_AllEOF { $$ = at_all_eof; } | - '$' KW_Eof { $$ = at_all_eof; } | - - TK_FinalEOF { $$ = at_final_eof; } | - '%' KW_Eof { $$ = at_final_eof; } | - - TK_NotFinalEOF { $$ = at_not_final_eof; } | - '@' KW_Eof { $$ = at_not_final_eof; } | - - TK_MiddleEOF { $$ = at_middle_eof; } | - TK_Middle KW_Eof { $$ = at_middle_eof; }; - -/* To state actions. */ -AugTypeToState: - TK_StartToState { $$ = at_start_to_state; } | - '>' KW_To { $$ = at_start_to_state; } | - - TK_NotStartToState { $$ = at_not_start_to_state; } | - '<' KW_To { $$ = at_not_start_to_state; } | - - TK_AllToState { $$ = at_all_to_state; } | - '$' KW_To { $$ = at_all_to_state; } | - - TK_FinalToState { $$ = at_final_to_state; } | - '%' KW_To { $$ = at_final_to_state; } | - - TK_NotFinalToState { $$ = at_not_final_to_state; } | - '@' KW_To { $$ = at_not_final_to_state; } | - - TK_MiddleToState { $$ = at_middle_to_state; } | - TK_Middle KW_To { $$ = at_middle_to_state; }; - -/* From state actions. */ -AugTypeFromState: - TK_StartFromState { $$ = at_start_from_state; } | - '>' KW_From { $$ = at_start_from_state; } | - - TK_NotStartFromState { $$ = at_not_start_from_state; } | - '<' KW_From { $$ = at_not_start_from_state; } | - - TK_AllFromState { $$ = at_all_from_state; } | - '$' KW_From { $$ = at_all_from_state; } | - - TK_FinalFromState { $$ = at_final_from_state; } | - '%' KW_From { $$ = at_final_from_state; } | - - TK_NotFinalFromState { $$ = at_not_final_from_state; } | - '@' KW_From { $$ = at_not_final_from_state; } | - - TK_MiddleFromState { $$ = at_middle_from_state; } | - TK_Middle KW_From { $$ = at_middle_from_state; }; - - -/* Different ways to embed actions. A TK_Word is reference to an action given by - * the user as a statement in the fsm specification. An action can also be - * specified immediately. */ -ActionEmbed: - ActionEmbedWord | ActionEmbedBlock; - -ActionEmbedWord: - TK_Word { - if ( id->active ) { - /* Set the name in the actionDict. */ - Action *action = id->pd->actionDict.find( $1.data ); - if ( action != 0 ) { - /* Pass up the action element */ - $$ = action; - } - else { - /* Will recover by returning null as the action. */ - error(@1) << "action lookup of \"" << $1.data << "\" failed" << endl; - $$ = 0; - } - } - }; - -ActionEmbedBlock: - '{' InlineBlock '}' { - if ( id->active ) { - /* Create the action, add it to the list and pass up. */ - Action *newAction = new Action( InputLoc(@1), 0, $2, id->nameRefList ); - id->pd->actionList.append( newAction ); - $$ = newAction; - } - }; - -/* The fourth level of precedence. These are the trailing unary operators that - * allow for repetition. */ -FactorWithRep: - FactorWithRep '*' { - $$ = new FactorWithRep( InputLoc(@2), $1, 0, 0, - FactorWithRep::StarType ); - } | - FactorWithRep TK_StarStar { - $$ = new FactorWithRep( InputLoc(@2), $1, 0, 0, - FactorWithRep::StarStarType ); - } | - FactorWithRep '?' { - $$ = new FactorWithRep( InputLoc(@2), $1, 0, 0, - FactorWithRep::OptionalType ); - } | - FactorWithRep '+' { - $$ = new FactorWithRep( InputLoc(@2), $1, 0, 0, - FactorWithRep::PlusType ); - } | - FactorWithRep TK_RepOpOpen FactorRepNum '}' { - $$ = new FactorWithRep( InputLoc(@2), $1, $3, 0, - FactorWithRep::ExactType ); - } | - FactorWithRep TK_RepOpOpen ',' FactorRepNum '}' { - $$ = new FactorWithRep( InputLoc(@2), $1, 0, $4, - FactorWithRep::MaxType ); - } | - FactorWithRep TK_RepOpOpen FactorRepNum ',' '}' { - $$ = new FactorWithRep( InputLoc(@2), $1, $3, 0, - FactorWithRep::MinType ); - } | - FactorWithRep TK_RepOpOpen FactorRepNum ',' FactorRepNum '}' { - $$ = new FactorWithRep( InputLoc(@2), $1, $3, $5, - FactorWithRep::RangeType ); - } | - FactorWithNeg { - $$ = new FactorWithRep( InputLoc(@1), $1 ); - }; - -FactorRepNum: - TK_UInt { - // Convert the priority number to a long. Check for overflow. - errno = 0; - int rep = strtol( $1.data, 0, 10 ); - if ( errno == ERANGE && rep == LONG_MAX ) { - // Repetition too large. Recover by returing repetition 1. */ - error(@1) << "repetition number " << $1.data << " overflows" << endl; - $$ = 1; - } - else { - // Cannot be negative, so no overflow. - $$ = rep; - } - }; - -/* The fifth level up in precedence. Negation. */ -FactorWithNeg: - '!' FactorWithNeg { - $$ = new FactorWithNeg( InputLoc(@1), $2, FactorWithNeg::NegateType ); - } | - '^' FactorWithNeg { - $$ = new FactorWithNeg( InputLoc(@1), $2, FactorWithNeg::CharNegateType ); - } | - Factor { - $$ = new FactorWithNeg( InputLoc(@1), $1 ); - }; - -/* The highest level in precedence. Atomic machines such as references to other - * machines, literal machines, regular expressions or Expressions in side of - * parenthesis. */ -Factor: - TK_Literal { - // Create a new factor node going to a concat literal. */ - $$ = new Factor( new Literal( InputLoc(@1), $1, Literal::LitString ) ); - } | - TK_CiLiteral { - // Create a new factor node going to a concat literal. */ - $$ = new Factor( new Literal( InputLoc(@1), $1, Literal::LitString ) ); - $$->literal->caseInsensitive = true; - } | - AlphabetNum { - // Create a new factor node going to a literal number. */ - $$ = new Factor( new Literal( InputLoc(@1), $1, Literal::Number ) ); - } | - TK_Word { - if ( id->active ) { - // Find the named graph. - GraphDictEl *gdNode = id->pd->graphDict.find( $1.data ); - if ( gdNode == 0 ) { - // Recover by returning null as the factor node. - error(@1) << "graph lookup of \"" << $1.data << "\" failed" << endl; - $$ = 0; - } - else if ( gdNode->isInstance ) { - // Recover by retuning null as the factor node. - error(@1) << "references to graph instantiations not allowed " - "in expressions" << endl; - $$ = 0; - } - else { - // Create a factor node that is a lookup of an expression. - $$ = new Factor( InputLoc(@1), gdNode->value ); - } - } - } | - RE_SqOpen RegularExprOrData RE_SqClose { - // Create a new factor node going to an OR expression. */ - $$ = new Factor( new ReItem( InputLoc(@1), $2, ReItem::OrBlock ) ); - } | - RE_SqOpenNeg RegularExprOrData RE_SqClose { - // Create a new factor node going to a negated OR expression. */ - $$ = new Factor( new ReItem( InputLoc(@1), $2, ReItem::NegOrBlock ) ); - } | - RE_Slash RegularExpr RE_Slash { - if ( $3.length > 1 ) { - for ( char *p = $3.data; *p != 0; p++ ) { - if ( *p == 'i' ) - $2->caseInsensitive = true; - } - } - - // Create a new factor node going to a regular exp. - $$ = new Factor( $2 ); - } | - RangeLit TK_DotDot RangeLit { - // Create a new factor node going to a range. */ - $$ = new Factor( new Range( $1, $3 ) ); - } | - '(' Join ')' { - /* Create a new factor going to a parenthesized join. */ - $$ = new Factor( $2 ); - }; - -/* Garble up to the closing brace of a parenthesized expression. */ -Factor: '(' error ')' { $$ = 0; yyerrok; }; - -LmPartList: - LmPartList LongestMatchPart { - if ( $2 != 0 ) - $1->append( $2 ); - $$ = $1; - } | - LongestMatchPart { - /* Create a new list with the part. */ - $$ = new LmPartList; - if ( $1 != 0 ) - $$->append( $1 ); - }; - -LongestMatchPart: - ActionSpec { $$ = 0; } | - Assignment { $$ = 0; } | - Join OptLmPartAction ';' { - $$ = 0; - if ( id->active ) { - Action *action = $2; - if ( action != 0 ) - action->isLmAction = true; - $$ = new LongestMatchPart( $1, action, id->pd->nextLongestMatchId++ ); - } - }; - -OptLmPartAction: - TK_DoubleArrow ActionEmbed { $$ = $2; } | - ActionEmbedBlock { $$ = $1; } | - /* Nothing */ { $$ = 0; }; - - -/* Any form of a number that can be used as a basic machine. */ -AlphabetNum: - TK_UInt | - '-' TK_UInt { - $$.data = "-"; - $$.length = 1; - $$.append( $2 ); - } | - TK_Hex; - -InlineBlock: - InlineBlock InlineBlockItem { - /* Append the item to the list, return the list. */ - $1->append( $2 ); - $$ = $1; - } | - /* Empty */ { - /* Start with empty list. */ - $$ = new InlineList; - }; - -/* Items in a struct block. */ -InlineBlockItem: - InlineBlockAny { - /* Add a text segment. */ - $$ = new InlineItem( @1, $1.data, InlineItem::Text ); - } | - InlineBlockSymbol { - /* Add a text segment, need string on heap. */ - $$ = new InlineItem( @1, strdup($1.data), InlineItem::Text ); - } | - InlineBlockInterpret { - /* Pass the inline item up. */ - $$ = $1; - }; - -/* Uninteresting tokens in a struct block. Data allocated by scanner. */ -InlineBlockAny: - IL_WhiteSpace | IL_Comment | IL_Literal | IL_Symbol | - TK_UInt | TK_Hex | TK_Word; - -/* Symbols in a struct block, no data allocated. */ -InlineBlockSymbol: - ',' { $$.data = ","; $$.length = 1; } | - ';' { $$.data = ";"; $$.length = 1; } | - '(' { $$.data = "("; $$.length = 1; } | - ')' { $$.data = ")"; $$.length = 1; } | - '*' { $$.data = "*"; $$.length = 1; } | - TK_NameSep { $$.data = "::"; $$.length = 2; }; - -/* Interpreted statements in a struct block. */ -InlineBlockInterpret: - InlineExprInterpret { - /* Pass up interpreted items of inline expressions. */ - $$ = $1; - } | - KW_Hold SetNoWs ';' SetWs { - $$ = new InlineItem( @1, InlineItem::Hold ); - } | - KW_Exec SetNoWs InlineExpr ';' SetWs { - $$ = new InlineItem( @1, InlineItem::Exec ); - $$->children = $3; - } | - KW_Goto SetNoWs StateRef ';' SetWs { - $$ = new InlineItem( @1, new NameRef(id->nameRef), InlineItem::Goto ); - } | - KW_Goto SetNoWs '*' SetWs InlineExpr ';' { - $$ = new InlineItem( @1, InlineItem::GotoExpr ); - $$->children = $5; - } | - KW_Next SetNoWs StateRef ';' SetWs { - $$ = new InlineItem( @1, new NameRef(id->nameRef), InlineItem::Next ); - } | - KW_Next SetNoWs '*' SetWs InlineExpr ';' { - $$ = new InlineItem( @1, InlineItem::NextExpr ); - $$->children = $5; - } | - KW_Call SetNoWs StateRef ';' SetWs { - $$ = new InlineItem( @1, new NameRef(id->nameRef), InlineItem::Call ); - } | - KW_Call SetNoWs '*' SetWs InlineExpr ';' { - $$ = new InlineItem( @1, InlineItem::CallExpr ); - $$->children = $5; - } | - KW_Ret SetNoWs ';' SetWs { - $$ = new InlineItem( @1, InlineItem::Ret ); - } | - KW_Break SetNoWs ';' SetWs { - $$ = new InlineItem( @1, InlineItem::Break ); - }; - -/* Turn off whitspace collecting when scanning inline blocks. */ -SetNoWs: { inlineWhitespace = false; }; - -/* Turn on whitespace collecting when scanning inline blocks. */ -SetWs: { inlineWhitespace = true; }; - -InlineExpr: - InlineExpr InlineExprItem { - $1->append( $2 ); - $$ = $1; - } | - /* Empty */ { - /* Init the list used for this expr. */ - $$ = new InlineList; - }; - -InlineExprItem: - InlineExprAny { - /* Return a text segment. */ - $$ = new InlineItem( @1, $1.data, InlineItem::Text ); - } | - InlineExprSymbol { - /* Return a text segment, must heap alloc the text. */ - $$ = new InlineItem( @1, strdup($1.data), InlineItem::Text ); - } | - InlineExprInterpret { - /* Pass the inline item up. */ - $$ = $1; - }; - -InlineExprInterpret: - KW_PChar { - $$ = new InlineItem( @1, InlineItem::PChar ); - } | - KW_Char { - $$ = new InlineItem( @1, InlineItem::Char ); - } | - KW_CurState { - $$ = new InlineItem( @1, InlineItem::Curs ); - } | - KW_TargState { - $$ = new InlineItem( @1, InlineItem::Targs ); - } | - KW_Entry SetNoWs '(' StateRef ')' SetWs { - $$ = new InlineItem( @1, new NameRef(id->nameRef), InlineItem::Entry ); - }; - -InlineExprAny: - IL_WhiteSpace | IL_Comment | IL_Literal | IL_Symbol | - TK_UInt | TK_Hex | TK_Word; - -/* Anything in a ExecValExpr that is not dynamically allocated. This includes - * all special symbols caught in inline code except the semi. */ -InlineExprSymbol: - '(' { $$.data = "("; $$.length = 1; } | - ')' { $$.data = ")"; $$.length = 1; } | - '*' { $$.data = "*"; $$.length = 1; } | - TK_NameSep { $$.data = "::"; $$.length = 1; }; - -/* Parser for regular expression fsms. Any number of expression items which - * generally gives a machine one character long or one character long stared. */ -RegularExpr: - RegularExpr RegularExprItem { - // An optimization to lessen the tree size. If a non-starred char is directly - // under the left side on the right and the right side is another non-starred - // char then paste them together and return the left side. Otherwise - // just put the two under a new reg exp node. - if ( $2->type == ReItem::Data && !$2->star && - $1->type == RegExpr::RecurseItem && - $1->item->type == ReItem::Data && !$1->item->star ) - { - // Append the right side to the right side of the left and toss - // the right side. - $1->item->data.append( $2->data ); - delete $2; - $$ = $1; - } - else { - $$ = new RegExpr( $1, $2 ); - } - } | - /* Nothing */ { - // Can't optimize the tree. - $$ = new RegExpr(); - }; - -/* RegularExprItems can be a character spec with an optional staring of the char. */ -RegularExprItem: - RegularExprChar RE_Star { - $1->star = true; - $$ = $1; - } | - RegularExprChar { - $$ = $1; - }; - -/* A character spec can be a set of characters inside of square parenthesis, - * a dot specifying any character or some explicitly stated character. */ -RegularExprChar: - RE_SqOpen RegularExprOrData RE_SqClose { - $$ = new ReItem( InputLoc(@1), $2, ReItem::OrBlock ); - } | - RE_SqOpenNeg RegularExprOrData RE_SqClose { - $$ = new ReItem( InputLoc(@1), $2, ReItem::NegOrBlock ); - } | - RE_Dot { - $$ = new ReItem( InputLoc(@1), ReItem::Dot ); - } | - RE_Char { - $$ = new ReItem( InputLoc(@1), $1.data[0] ); - }; - -/* The data inside of a [] expression in a regular expression. Accepts any - * number of characters or ranges. */ -RegularExprOrData: - RegularExprOrData RegularExprOrChar { - // An optimization to lessen the tree size. If an or char is directly - // under the left side on the right and the right side is another or - // char then paste them together and return the left side. Otherwise - // just put the two under a new or data node. - if ( $2->type == ReOrItem::Data && - $1->type == ReOrBlock::RecurseItem && - $1->item->type == ReOrItem::Data ) - { - // Append the right side to right side of the left and toss - // the right side. - $1->item->data.append( $2->data ); - delete $2; - $$ = $1; - } - else { - // Can't optimize, put the left and right under a new node. - $$ = new ReOrBlock( $1, $2 ); - } - } | - /* Nothing */ { - $$ = new ReOrBlock(); - }; - - -/* A single character inside of an or expression. Can either be a character - * or a set of characters. */ -RegularExprOrChar: - RE_Char { - $$ = new ReOrItem( InputLoc(@1), $1.data[0] ); - } | - RE_Char RE_Dash RE_Char { - $$ = new ReOrItem( InputLoc(@2), $1.data[0], $3.data[0] ); - }; - -RangeLit: - TK_Literal { - // Range literas must have only one char. - if ( strlen($1.data) != 1 ) { - // Recover by using the literal anyways. - error(@1) << "literal used in range must be of length 1" << endl; - } - $$ = new Literal( InputLoc(@1), $1, Literal::LitString ); - } | - AlphabetNum { - // Create a new literal number. - $$ = new Literal( InputLoc(@1), $1, Literal::Number ); - }; - -%% - -/* Try to do a definition, common to assignment and instantiation. Warns about - * instances other than main not being implemented yet. */ -void tryMachineDef( const YYLTYPE &loc, char *name, JoinOrLm *joinOrLm, bool isInstance ) -{ - GraphDictEl *newEl = id->pd->graphDict.insert( name ); - if ( newEl != 0 ) { - /* New element in the dict, all good. */ - newEl->value = new VarDef( name, joinOrLm ); - newEl->isInstance = isInstance; - newEl->loc = loc; - - /* It it is an instance, put on the instance list. */ - if ( isInstance ) - id->pd->instanceList.append( newEl ); - } - else { - // Recover by ignoring the duplicate. - error(loc) << "fsm \"" << name << "\" previously defined" << endl; - } -} - -void doInclude( const InputLoc &loc, char *sectionName, char *inputFile ) -{ - /* Bail if we hit the max include depth. */ - if ( includeDepth == INCLUDE_STACK_SIZE ) { - error(loc) << "hit maximum include depth of " << INCLUDE_STACK_SIZE << endl; - } - else { - char *includeTo = id->pd->fsmName; - - /* Implement defaults for the input file and section name. */ - if ( inputFile == 0 ) - inputFile = id->fileName; - if ( sectionName == 0 ) - sectionName = id->pd->fsmName; - - /* Parse the included file. */ - InputData *oldId = id; - id = new InputData( inputFile, sectionName, includeTo ); - includeDepth += 1; - yyparse(); - includeDepth -= 1; - delete id; - id = oldId; - } -} - -void openSection() -{ - if ( ! sectionOpened ) { - sectionOpened = true; - *outStream << "pd->fsmName << "\">\n"; - } -} - -void yyerror( char *err ) -{ - /* Bison won't give us the location, but in the last call to the scanner we - * saved a pointer to the location variable. Use that. instead. */ - error(::yylloc->first_line, ::yylloc->first_column) << err << endl; -} diff --git a/ragel/rlscan.lex b/ragel/rlscan.lex deleted file mode 100644 index 8116c92..0000000 --- a/ragel/rlscan.lex +++ /dev/null @@ -1,1212 +0,0 @@ -/* - * Copyright 2001-2006 Adrian Thurston - */ - -/* This file is part of Ragel. - * - * Ragel is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Ragel is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Ragel; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -%{ - -#define YY_NEVER_INTERACTIVE 1 -//#define WANT_TOKEN_WRITE - -#include -#include "ragel.h" -#include "rlparse.h" -#include "parsedata.h" -#include "buffer.h" - -using std::cout; -using std::cerr; -using std::endl; - -Buffer tokbuf; -int builtinBrace = 0; -bool inlineWhitespace = true; -bool handlingInclude = false; -bool multiline = false; - -/* Used for recognising host language code blocks, init with anything not - * involved in the host lang test. */ -int previous_tokens[2] = { TK_Section, TK_Section }; - -/* These keep track of the start of an inline comment or literal string for - * reporting unterminated comments or strings. */ -int il_comm_lit_first_line; -int il_comm_lit_first_column; - -/* These keep track of the start of a code block for reporting unterminated - * code blocks. */ -int il_code_first_line; -int il_code_first_column; - -/* Include Stack data. */ -YY_BUFFER_STATE buff_stack[INCLUDE_STACK_SIZE]; -bool multiline_stack[INCLUDE_STACK_SIZE]; -int inc_stack_ptr = 0; - -YYSTYPE *yylval; -YYLTYPE *yylloc; - -extern InputData *id; -extern int includeDepth; - -void garble(); - -void extendToken( char *data, int len ); -void extendToken(); - -int emitToken( int token, char *data, int len ); -int emitNoData( int token ); -void passThrough( char *data ); -bool openMachineSpecBlock(); -void popInclude(); - -enum InlineBlockType { - CurlyDelimited, - SemiTerminated -} inlineBlockType; - -/* Using a wrapper for the parser, must the lex declaration. */ -#define YY_DECL int ragel_lex() - -%} - -/* Outside an fsm machine specification ("outside code"). */ -%x OC_SGL_LIT -%x OC_DBL_LIT -%x OC_C_COM -%x OC_CXX_COM - -/* Inside a fsm machine specification. */ -%x RL_INITIAL -%x RL_SLIT -%x RL_DLIT -%x RL_OREXP -%x RL_REGEXP -%x RL_REGEXP_OR -%x RL_SHELL_COM -%x RL_VERBOSE_EMBED -%x RL_WRITE - -/* Inline code. */ -%x IL_INITIAL -%x IL_SGL_LIT -%x IL_DBL_LIT -%x IL_C_COM -%x IL_CXX_COM - -WSCHAR [\t\n\v\f\r ] -IDENT [a-zA-Z_][a-zA-Z_0-9]* - -%% - - /* Numbers in outter code. */ -[0-9]+ { - garble(); - passThrough( yytext ); -} - - /* Words in outter code. */ -{IDENT} { - garble(); - passThrough( yytext ); -} - - /* Begin a c style comment. */ -"/*" { - BEGIN(OC_C_COM); - extendToken(); - passThrough( yytext ); -} - /* Data in a C style comment. */ -. extendToken(); passThrough( yytext ); -\n extendToken(); passThrough( yytext ); - - /* Terminate a C style comment. */ -"*/" { - BEGIN(INITIAL); - garble(); - passThrough( yytext ); -} - - /* Begin a C++ style comment. */ -"//" { - BEGIN(OC_CXX_COM); - extendToken(); - passThrough( yytext ); -} - /* Data in a C++ style comment. */ -[^\n]+ { - extendToken(); - passThrough( yytext ); -} - /* Terminate a C++ style comment. */ -\n { - BEGIN(INITIAL); - garble(); - passThrough( yytext ); -} - - - /* Start literals. */ -\' { - BEGIN(OC_SGL_LIT); - extendToken(); - passThrough( yytext ); -} -\" { - BEGIN(OC_DBL_LIT); - extendToken(); - passThrough( yytext ); -} - /* Various escape sequences in literals. We don't need to get them - * all here. We just need to pick off the ones that could confuse us - * about the literal we are matchine */ -\\\' extendToken(); passThrough( yytext ); -\\\" extendToken(); passThrough( yytext ); -\\\\ extendToken(); passThrough( yytext ); - /* Characters in literals. */ -[^\"] extendToken(); passThrough( yytext ); -[^\'] extendToken(); passThrough( yytext ); - /* Terminate a double literal */ -\" { - BEGIN(INITIAL); - garble(); - passThrough( yytext ); -} - /* Terminate a single literal. */ -\' { - BEGIN(INITIAL); - garble(); - passThrough( yytext ); -} - - /* Whitespace. */ -{WSCHAR}+ { - garble(); - passThrough( yytext ); -} - - /* Section Deliminator */ -"%%" { - BEGIN(RL_INITIAL); - multiline = false; - return emitNoData( TK_Section ); -} - - /* Section Deliminator */ -"%%{" { - BEGIN(RL_INITIAL); - multiline = true; - return emitNoData( TK_Section ); -} - -"{" { - garble(); - passThrough( yytext ); -} - -"}" { - garble(); - passThrough( yytext ); -} - -";" { - garble(); - passThrough( yytext ); -} - - /* Any other characters. */ -. { - garble(); - passThrough( yytext ); -} - - /* Numbers. */ -[0-9][0-9]* { - return emitToken( TK_UInt, yytext, yyleng ); -} -0x[0-9a-fA-F][0-9a-fA-F]* { - return emitToken( TK_Hex, yytext, yyleng ); -} - - /* Keywords in RL and IL. */ -variable\ [a-zA-Z_]+ { - BEGIN(IL_INITIAL); - inlineBlockType = SemiTerminated; - return emitToken( KW_Variable, yytext+9, yyleng-9 ); -} -access { - BEGIN(IL_INITIAL); - inlineBlockType = SemiTerminated; - return emitNoData( KW_Access ); -} -action { - return emitNoData( KW_Action ); -} -alphtype { - BEGIN(IL_INITIAL); - inlineWhitespace = false; - inlineBlockType = SemiTerminated; - return emitNoData( KW_AlphType ); -} -getkey { - BEGIN(IL_INITIAL); - inlineBlockType = SemiTerminated; - return emitNoData( KW_GetKey ); -} -when { - return emitNoData( KW_When ); -} -eof { - return emitNoData( KW_Eof ); -} -err { - return emitNoData( KW_Err ); -} -lerr { - return emitNoData( KW_Lerr ); -} -to { - return emitNoData( KW_To ); -} -from { - return emitNoData( KW_From ); -} - - - /* -range { - return emitNoData( KW_Range ); -}*/ - -write { - BEGIN(RL_WRITE); - return emitNoData( KW_Write ); -} -machine { - return emitNoData( KW_Machine ); -} -include { - /* Include tokens statments are processed by both the scanner and the - * parser. The scanner opens the include file and switches to it and the - * parser invokes a new parser for handling the tokens. We use - * handlingInclude to indicate that the scanner is processing an include - * directive. Ends at ; */ - handlingInclude = true; - return emitNoData( KW_Include ); -} - -{WSCHAR}+ garble(); -; { - BEGIN(RL_INITIAL); - return emitNoData( ';' ); -} - - /* These must be synced in rlparse.y */ -fpc { - return emitNoData( KW_PChar ); -} -fc { - return emitNoData( KW_Char ); -} -fhold { - return emitNoData( KW_Hold ); -} -fgoto { - return emitNoData( KW_Goto ); -} -fcall { - return emitNoData( KW_Call ); -} -fret { - return emitNoData( KW_Ret ); -} -fcurs { - return emitNoData( KW_CurState ); -} -ftargs { - return emitNoData( KW_TargState ); -} -fentry { - return emitNoData( KW_Entry ); -} -fnext { - return emitNoData( KW_Next ); -} -fexec { - return emitNoData( KW_Exec ); -} -fbreak { - return emitNoData( KW_Break ); -} - - /* Words. */ -{IDENT} { - return emitToken( TK_Word, yytext, yyleng ); -} - - /* Begin a shell style comment. */ -# { - BEGIN(RL_SHELL_COM); - extendToken(); -} - /* Data in a shell style comment. */ -[^\n]+ { - extendToken(); -} - /* Terminate a C++ style comment. */ -\n { - BEGIN(RL_INITIAL); - garble(); -} - - /* - * Start single and double literals. - */ -' { - BEGIN(RL_SLIT); - extendToken(); -} -\" { - BEGIN(RL_DLIT); - extendToken(); -} - - /* Escape sequences in single and double literals. */ -\\0 extendToken( "\0", 1 ); -\\a extendToken( "\a", 1 ); -\\b extendToken( "\b", 1 ); -\\t extendToken( "\t", 1 ); -\\n extendToken( "\n", 1 ); -\\v extendToken( "\v", 1 ); -\\f extendToken( "\f", 1 ); -\\r extendToken( "\r", 1 ); -\\\n extendToken(); -\\. extendToken( yytext+1, 1 ); - - /* Characters in literals. */ -[^'] extendToken( yytext, 1 ); -[^"] extendToken( yytext, 1 ); - - /* Terminate a single literal. */ -'[i]* { - BEGIN(RL_INITIAL); - return emitToken( yytext[1] == 'i' ? TK_CiLiteral : TK_Literal, 0, 0 ); -} - /* Terminate a double literal */ -\"[i]* { - BEGIN(RL_INITIAL); - return emitToken( yytext[1] == 'i' ? TK_CiLiteral : TK_Literal, 0, 0 ); -} - - /* - * Start an OR expression. - */ -"[" { - BEGIN(RL_OREXP); - return emitNoData( RE_SqOpen ); -} - -"\[^" { - BEGIN(RL_OREXP); - return emitNoData( RE_SqOpenNeg ); -} - - /* Escape sequences in OR expressions. */ -\\0 { return emitToken( RE_Char, "\0", 1 ); } -\\a { return emitToken( RE_Char, "\a", 1 ); } -\\b { return emitToken( RE_Char, "\b", 1 ); } -\\t { return emitToken( RE_Char, "\t", 1 ); } -\\n { return emitToken( RE_Char, "\n", 1 ); } -\\v { return emitToken( RE_Char, "\v", 1 ); } -\\f { return emitToken( RE_Char, "\f", 1 ); } -\\r { return emitToken( RE_Char, "\r", 1 ); } -\\\n { garble(); } -\\. { return emitToken( RE_Char, yytext+1, 1 ); } - - /* Range dash in an OR expression. */ -- { - return emitNoData( RE_Dash ); -} - - /* Characters in an OR expression. */ -[^\]] { - return emitToken( RE_Char, yytext, 1 ); -} - - /* Terminate an OR expression. */ -\] { - BEGIN(RL_INITIAL); - return emitNoData( RE_SqClose ); -} - - /* - * Start a regular expression. - */ -\/ { - BEGIN(RL_REGEXP); - return emitNoData( RE_Slash ); -} - - /* Escape sequences in regular expressions. */ -\\0 { - return emitToken( RE_Char, "\0", 1 ); -} -\\a { - return emitToken( RE_Char, "\a", 1 ); -} -\\b { - return emitToken( RE_Char, "\b", 1 ); -} -\\t { - return emitToken( RE_Char, "\t", 1 ); -} -\\n { - return emitToken( RE_Char, "\n", 1 ); -} -\\v { - return emitToken( RE_Char, "\v", 1 ); -} -\\f { - return emitToken( RE_Char, "\f", 1 ); -} -\\r { - return emitToken( RE_Char, "\r", 1 ); -} -\\\n { - garble(); -} -\\. { - return emitToken( RE_Char, yytext+1, 1 ); -} - - /* Special characters in a regular expression. */ -\. { - return emitNoData( RE_Dot ); -} -\* { - return emitNoData( RE_Star ); -} -"\[^" { - BEGIN(RL_REGEXP_OR); - return emitNoData( RE_SqOpenNeg ); -} -"\[" { - BEGIN(RL_REGEXP_OR); - return emitNoData( RE_SqOpen ); -} - - /* Range dash in a regular expression or set. */ -- { - return emitNoData( RE_Dash ); -} - - /* Terminate an or set or a regular expression. */ -\] { - BEGIN(RL_REGEXP); - return emitNoData( RE_SqClose ); -} - - /* Characters in a regular expression. */ -[^/] { - return emitToken( RE_Char, yytext, 1 ); -} - - /* Terminate a regular expression */ -\/[i]* { - BEGIN(RL_INITIAL); - return emitToken( RE_Slash, yytext, yyleng ); -} - - /* Builtin code move to Builtin initial. */ -"{" { - if ( openMachineSpecBlock() ) { - /* Plain bracket. */ - return emitNoData( *yytext ); - } - else { - /* Start an inline code block. Keep track of where it started in case - * it terminates prematurely. Return the open bracket. */ - BEGIN(IL_INITIAL); - inlineBlockType = CurlyDelimited; - il_code_first_line = id->last_line; - il_code_first_column = id->last_column+1; - builtinBrace++; - return emitNoData( *yytext ); - } -} - -\.\. { - return emitNoData( TK_DotDot ); -} - -:> { - return emitNoData( TK_ColonGt ); -} - -:>> { - return emitNoData( TK_ColonGtGt ); -} - -<: { - return emitNoData( TK_LtColon ); -} - --- { - return emitNoData( TK_DashDash ); -} - - /* The instantiation operator. */ -:= { - return emitNoData( TK_ColonEquals ); -} - - /* Error actions. */ -\>\! { - return emitNoData( TK_StartGblError ); -} -\$\! { - return emitNoData( TK_AllGblError ); -} -%\! { - return emitNoData( TK_FinalGblError ); -} -<\! { - return emitNoData( TK_NotStartGblError ); -} -@\! { - return emitNoData( TK_NotFinalGblError ); -} -<>\! { - return emitNoData( TK_MiddleGblError ); -} - - /* Local error actions. */ -\>\^ { - return emitNoData( TK_StartLocalError ); -} -\$\^ { - return emitNoData( TK_AllLocalError ); -} -%\^ { - return emitNoData( TK_FinalLocalError ); -} -<\^ { - return emitNoData( TK_NotStartLocalError ); -} -@\^ { - return emitNoData( TK_NotFinalLocalError ); -} -<>\^ { - return emitNoData( TK_MiddleLocalError ); -} - - /* EOF Actions. */ -\>\/ { - return emitNoData( TK_StartEOF ); -} -\$\/ { - return emitNoData( TK_AllEOF ); -} -%\/ { - return emitNoData( TK_FinalEOF ); -} -<\/ { - return emitNoData( TK_NotStartEOF ); -} -@\/ { - return emitNoData( TK_NotFinalEOF ); -} -<>\/ { - return emitNoData( TK_MiddleEOF ); -} - - /* To State Actions. */ -\>~ { - return emitNoData( TK_StartToState ); -} -\$~ { - return emitNoData( TK_AllToState ); -} -%~ { - return emitNoData( TK_FinalToState ); -} -<~ { - return emitNoData( TK_NotStartToState ); -} -@~ { - return emitNoData( TK_NotFinalToState ); -} -<>~ { - return emitNoData( TK_MiddleToState ); -} - - /* From State Actions. */ -\>\* { - return emitNoData( TK_StartFromState ); -} -\$\* { - return emitNoData( TK_AllFromState ); -} -%\* { - return emitNoData( TK_FinalFromState ); -} -<\* { - return emitNoData( TK_NotStartFromState ); -} -@\* { - return emitNoData( TK_NotFinalFromState ); -} -<>\* { - return emitNoData( TK_MiddleFromState ); -} - -<> { - return emitNoData( TK_Middle ); -} - -\>\? { - return emitNoData( TK_StartCond ); -} -\$\? { - return emitNoData( TK_AllCond ); -} -%\? { - return emitNoData( TK_LeavingCond ); -} - - /* The Arrow operator. */ --> { - return emitNoData( TK_Arrow ); -} - - /* The double arrow operator. */ -=> { - return emitNoData( TK_DoubleArrow ); -} - - /* Double star (longest match kleene star). */ -\*\* { - return emitNoData( TK_StarStar ); -} - - /* Name separator. */ -:: { - return emitNoData( TK_NameSep ); -} - - /* Opening of longest match. */ -\|\* { - return emitNoData( TK_BarStar ); -} - - /* Catch the repetition operator now to free up the parser. Once caught, - * Send only the opening brace and rescan the rest so it can be broken - * up for the parser. */ -\{([0-9]+(,[0-9]*)?|,[0-9]+)\} { - yyless(1); - return emitNoData( TK_RepOpOpen ); -} - - /* Section Deliminator */ -"}%%" { - BEGIN(INITIAL); - return emitNoData( TK_Section ); -} - - /* Whitespace. */ -[\t\v\f\r ] garble(); -\n { - if ( multiline ) - garble(); - else { - BEGIN(INITIAL); - return emitNoData( TK_SectionNL ); - } -} - - /* Any other characters. */ -. { - return emitNoData( *yytext ); -} - - /* End of input in a literal is an error. */ -<> { - error(id->first_line, id->first_column) << "unterminated literal" << endl; - exit(1); -} - - /* End of input in a comment is an error. */ -<> { - error(id->first_line, id->first_column) << "unterminated comment" << endl; - exit(1); -} - - /* Begin a C style comment. */ -"/*" { - BEGIN(IL_C_COM); - il_comm_lit_first_line = id->last_line; - il_comm_lit_first_column = id->last_column+1; - extendToken( yytext, yyleng ); -} - /* Data in a C style comment. */ -\n extendToken( yytext, 1 ); -. extendToken( yytext, 1 ); - - /* Terminate a C style comment. */ -"*/" { - BEGIN(IL_INITIAL); - return emitToken( IL_Comment, yytext, 2 ); -} - - /* Begin a C++ style comment. */ -"//" { - BEGIN(IL_CXX_COM); - il_comm_lit_first_line = id->last_line; - il_comm_lit_first_column = id->last_column+1; - extendToken( yytext, yyleng ); -} - /* Data in a C++ style comment. */ -[^\n]+ { - extendToken( yytext, yyleng ); -} - /* Terminate a C++ style comment. */ -\n { - BEGIN(IL_INITIAL); - return emitToken( IL_Comment, yytext, 1 ); -} - - - /* Start literals. */ -' { - BEGIN(IL_SGL_LIT); - il_comm_lit_first_line = id->last_line; - il_comm_lit_first_column = id->last_column+1; - extendToken( yytext, 1 ); -} -\" { - BEGIN(IL_DBL_LIT); - il_comm_lit_first_line = id->last_line; - il_comm_lit_first_column = id->last_column+1; - extendToken( yytext, 1 ); -} - /* Various escape sequences in literals. We don't need to get them - * all here. We just need to pick off the ones that could confuse us - * about the literal we are matching */ -\\' extendToken( yytext, yyleng ); -\\\" extendToken( yytext, yyleng ); -\\\\ extendToken( yytext, yyleng ); - /* Characters in literals. */ -[^\"] extendToken( yytext, 1 ); -[^'] extendToken( yytext, 1 ); - - /* Terminate a double literal */ -\" { - BEGIN(IL_INITIAL); - return emitToken( IL_Literal, yytext, 1 ); -} - /* Terminate a single literal. */ -' { - BEGIN(IL_INITIAL); - return emitToken( IL_Literal, yytext, 1 ); -} - - /* Open Brace, increment count of open braces. */ -"{" { - builtinBrace++; - return emitToken( IL_Symbol, yytext, 1 ); -} - - /* Close brace, decrement count of open braces. */ -"}" { - builtinBrace--; - if ( inlineBlockType == CurlyDelimited && builtinBrace == 0 ) { - /* Inline code block ends. */ - BEGIN(RL_INITIAL); - inlineWhitespace = true; - return emitNoData( *yytext ); - } - else { - /* Either a semi terminated inline block or only the closing brace of - * some inner scope, not the block's closing brace. */ - return emitToken( IL_Symbol, yytext, 1 ); - } -} - - /* May need to terminate the inline block. */ -; { - if ( inlineBlockType == SemiTerminated ) { - /* Inline code block ends. */ - BEGIN(RL_INITIAL); - inlineWhitespace = true; - return emitNoData( TK_Semi ); - } - else { - /* Not ending. The semi is sent as a token, not a generic symbol. */ - return emitNoData( *yytext ); - } -} - - /* Catch some symbols so they can be - * sent as tokens instead as generic symbols. */ -[*()] { - return emitNoData( *yytext ); -} -:: { - return emitNoData( TK_NameSep ); -} - - /* Whitespace. */ -{WSCHAR}+ { - if ( inlineWhitespace ) - return emitToken( IL_WhiteSpace, yytext, yyleng ); -} - - /* Any other characters. */ -. { - return emitToken( IL_Symbol, yytext, 1 ); -} - -<> { - /* If we are not at the bottom of the include stack, then pop the current - * file that we are scanning. Since we are always returning 0 to the parser - * it will exit and return to the parser that called it. */ - if ( inc_stack_ptr > 0 ) - popInclude(); - return 0; -} - - /* End of input in a literal is an error. */ -<> { - error(il_comm_lit_first_line, il_comm_lit_first_column) << - "unterminated literal" << endl; - exit(1); -} - - /* End of input in a comment is an error. */ -<> { - error(il_comm_lit_first_line, il_comm_lit_first_column) << - "unterminated comment" << endl; - exit(1); -} - - /* End of intput in a code block. */ -<> { - error(il_code_first_line, il_code_first_column) << - "unterminated code block" << endl; - exit(1); -} - -%% - -/* Write out token data, escaping special charachters. */ -#ifdef WANT_TOKEN_WRITE -void writeToken( int token, char *data ) -{ - cout << "token id " << token << " at " << id->fileName << ":" << - yylloc->first_line << ":" << yylloc->first_column << "-" << - yylloc->last_line << ":" << yylloc->last_column << " "; - - if ( data != 0 ) { - while ( *data != 0 ) { - switch ( *data ) { - case '\n': cout << "\\n"; break; - case '\t': cout << "\\t"; break; - default: cout << *data; break; - } - data += 1; - } - } - cout << endl; -} -#endif - -/* Caclulate line info from yytext. Called on every pattern match. */ -void updateLineInfo() -{ - /* yytext should always have at least one char. */ - assert( yytext[0] != 0 ); - - /* Scan through yytext up to the last character. */ - char *p = yytext; - for ( ; p[1] != 0; p++ ) { - if ( p[0] == '\n' ) { - id->last_line += 1; - id->last_column = 0; - } - else { - id->last_column += 1; - } - } - - /* Always consider the last character as not a newline. Newlines at the - * end of a token are as any old character at the end of the line. */ - id->last_column += 1; - - /* The caller may be about to emit a token, be prepared to pass the line - * info to the parser. */ - yylloc->first_line = id->first_line; - yylloc->first_column = id->first_column; - yylloc->last_line = id->last_line; - yylloc->last_column = id->last_column; - - /* If the last character was indeed a newline, then wrap ahead now. */ - if ( p[0] == '\n' ) { - id->last_line += 1; - id->last_column = 0; - } -} - -/* Eat up a matched pattern that will not be part of a token. */ -void garble() -{ - /* Update line information from yytext. */ - updateLineInfo(); - - /* The next token starts ahead of the last token. */ - id->first_line = id->last_line; - id->first_column = id->last_column + 1; -} - -/* Append data to the end of the token. More token data expected. */ -void extendToken( char *data, int len ) -{ - if ( data != 0 && len > 0 ) - tokbuf.append( data, len ); - - /* Update line information from yytext. */ - updateLineInfo(); -} - -/* Extend, but with no data, more data to come. */ -void extendToken() -{ - /* Update line information from yytext. */ - updateLineInfo(); -} - - -/* Possibly process include data. */ -void processInclude( int token ) -{ - static char *incFileName = 0; - - if ( handlingInclude ) { - if ( token == KW_Include ) - incFileName = 0; - else if ( token == TK_Literal ) - incFileName = yylval->data.data; - else if ( token == ';' ) { - /* Terminate the include statement. Start reading from included file. */ - handlingInclude = false; - - if ( id->active && includeDepth < INCLUDE_STACK_SIZE ) { - /* If there is no section name or input file, default to the curren values. */ - if ( incFileName == 0 ) - incFileName = id->fileName; - - /* Make the new buffer and switch to it. */ - FILE *incFile = fopen( incFileName, "rt" ); - if ( incFile != 0 ) { - buff_stack[inc_stack_ptr] = YY_CURRENT_BUFFER; - multiline_stack[inc_stack_ptr] = multiline; - inc_stack_ptr += 1; - yy_switch_to_buffer( yy_create_buffer( incFile, YY_BUF_SIZE ) ); - BEGIN(INITIAL); - } - else { - error(*yylloc) << "could not locate include file \"" << incFileName - << "\"" << endl; - } - } - } - } -} - -void popInclude() -{ - /* Free the current buffer and move to the previous. */ - yy_delete_buffer( YY_CURRENT_BUFFER ); - inc_stack_ptr -= 1; - yy_switch_to_buffer( buff_stack[inc_stack_ptr] ); - multiline = multiline_stack[inc_stack_ptr]; - - /* Includes get called only from RL_INITIAL. */ - BEGIN(RL_INITIAL); -} - - -/* Append data to the end of a token and emitToken it to the parser. */ -int emitToken( int token, char *data, int len ) -{ - /* Append any new data. */ - if ( data != 0 && len > 0 ) - tokbuf.append( data, len ); - - /* Duplicate the buffer. */ - yylval->data.length = tokbuf.length; - yylval->data.data = new char[tokbuf.length+1]; - memcpy( yylval->data.data, tokbuf.data, tokbuf.length ); - yylval->data.data[tokbuf.length] = 0; - - /* Update line information from yytext. */ - updateLineInfo(); - - /* Write token info. */ -#ifdef WANT_TOKEN_WRITE - writeToken( token, tokbuf.data ); -#endif - - /* Clear out the buffer. */ - tokbuf.clear(); - - /* The next token starts ahead of the last token. */ - id->first_line = id->last_line; - id->first_column = id->last_column + 1; - - /* Maintain a record of two tokens back. */ - previous_tokens[1] = previous_tokens[0]; - previous_tokens[0] = token; - - /* Possibly process the include statement; */ - processInclude( token ); - - return token; -} - -/* Emit a token with no data to the parser. */ -int emitNoData( int token ) -{ - /* Return null to the parser. */ - yylval->data.data = 0; - yylval->data.length = 0; - - /* Update line information from yytext. */ - updateLineInfo(); - - /* Write token info. */ -#ifdef WANT_TOKEN_WRITE - writeToken( token, 0 ); -#endif - - /* Clear out the buffer. */ - tokbuf.clear(); - - /* The next token starts ahead of the last token. */ - id->first_line = id->last_line; - id->first_column = id->last_column + 1; - - /* Maintain a record of two tokens back. */ - previous_tokens[1] = previous_tokens[0]; - previous_tokens[0] = token; - - /* Possibly process the include statement; */ - processInclude( token ); - - return token; -} - -/* Pass tokens in outter code through to the output. */ -void passThrough( char *data ) -{ - /* If no errors and we are at the bottom of the include stack (the source - * file listed on the command line) then write out the data. */ - if ( gblErrorCount == 0 && inc_stack_ptr == 0 && - machineSpec == 0 && machineName == 0 ) - { - xmlEscapeHost( *outStream, data ); - } -} - -/* Init a buffer. */ -Buffer::Buffer() -: - data(0), - length(0), - allocated(0) -{ -} - -/* Empty out a buffer on destruction. */ -Buffer::~Buffer() -{ - empty(); -} - -/* Free the space allocated for the buffer. */ -void Buffer::empty() -{ - if ( data != 0 ) { - free( data ); - - data = 0; - length = 0; - allocated = 0; - } -} - -/* Grow the buffer when to len allocation. */ -void Buffer::upAllocate( int len ) -{ - if ( data == 0 ) - data = (char*) malloc( len ); - else - data = (char*) realloc( data, len ); - allocated = len; -} - -int yywrap() -{ - /* Once processessing of the input is done, signal no more. */ - return 1; -} - -/* Here simply to suppress the unused yyunpt warning. */ -void thisFuncIsNeverCalled() -{ - yyunput(0, 0); -} - -/* Put the scannner back into the outside code start state. */ -void beginOutsideCode() -{ - BEGIN(INITIAL); -} - -/* Determine if we are opening a machine specification block. */ -bool openMachineSpecBlock() -{ - if ( previous_tokens[1] == TK_Section && previous_tokens[0] == TK_Word ) - return true; - else if ( previous_tokens[0] == TK_Section ) - return true; - return false; -} - -/* Wrapper for the lexer which stores the locations of the value and location - * variables of the parser into globals. The parser is reentrant, however the scanner - * does not need to be, so globals work fine. This saves us passing them around - * all the helper functions. */ -int yylex( YYSTYPE *yylval, YYLTYPE *yylloc ) -{ - ::yylval = yylval; - ::yylloc = yylloc; - return ragel_lex(); -} - diff --git a/ragel/rlscan.rl b/ragel/rlscan.rl index 226bbc7..d35ff7c 100644 --- a/ragel/rlscan.rl +++ b/ragel/rlscan.rl @@ -88,7 +88,7 @@ struct Scanner void init(); void token( int type, char *start, char *end ); - void token( int type, char *string ); + void token( int type, char c ); void token( int type ); void updateCol(); void startSection(); @@ -183,9 +183,9 @@ void Scanner::updateCol() lastnl = 0; } -void Scanner::token( int type, char *string ) +void Scanner::token( int type, char c ) { - token( type, string, string + strlen(string) ); + token( type, &c, &c + 1 ); } void Scanner::token( int type ) @@ -341,11 +341,14 @@ void Scanner::token( int type ) if ( active && parserExists() ) { InputLoc loc; - //cerr << "scanner:" << line << ":" << column << - // ": sending token to the parser " << lelNames[*p]; - //if ( tokdata != 0 ) - // cerr << " " << tokdata; - //cerr << endl; + #if 0 + cerr << "scanner:" << line << ":" << column << + ": sending token to the parser " << lelNames[*p]; + cerr << " " << toklen; + if ( tokdata != 0 ) + cerr << " " << tokdata; + cerr << endl; + #endif loc.fileName = fileName; loc.line = line; @@ -571,20 +574,24 @@ void Scanner::endSection( ) } }; + EOF => { + error() << "unterminated code block" << endl; + }; + # Send every other character as a symbol. any => { token( IL_Symbol, tokstart, tokend ); }; *|; or_literal := |* # Escape sequences in OR expressions. - '\\0' => { token( RE_Char, "\0" ); }; - '\\a' => { token( RE_Char, "\a" ); }; - '\\b' => { token( RE_Char, "\b" ); }; - '\\t' => { token( RE_Char, "\t" ); }; - '\\n' => { token( RE_Char, "\n" ); }; - '\\v' => { token( RE_Char, "\v" ); }; - '\\f' => { token( RE_Char, "\f" ); }; - '\\r' => { token( RE_Char, "\r" ); }; + '\\0' => { token( RE_Char, '\0' ); }; + '\\a' => { token( RE_Char, '\a' ); }; + '\\b' => { token( RE_Char, '\b' ); }; + '\\t' => { token( RE_Char, '\t' ); }; + '\\n' => { token( RE_Char, '\n' ); }; + '\\v' => { token( RE_Char, '\v' ); }; + '\\f' => { token( RE_Char, '\f' ); }; + '\\r' => { token( RE_Char, '\r' ); }; '\\\n' => { updateCol(); }; '\\' any => { token( RE_Char, tokstart+1, tokend ); }; @@ -594,20 +601,25 @@ void Scanner::endSection( ) # Terminate an OR expression. ']' => { token( RE_SqClose ); fret; }; + EOF => { + error() << "unterminated OR literal" << endl; + }; + # Characters in an OR expression. [^\]] => { token( RE_Char, tokstart, tokend ); }; + *|; re_literal := |* # Escape sequences in regular expressions. - '\\0' => { token( RE_Char, "\0" ); }; - '\\a' => { token( RE_Char, "\a" ); }; - '\\b' => { token( RE_Char, "\b" ); }; - '\\t' => { token( RE_Char, "\t" ); }; - '\\n' => { token( RE_Char, "\n" ); }; - '\\v' => { token( RE_Char, "\v" ); }; - '\\f' => { token( RE_Char, "\f" ); }; - '\\r' => { token( RE_Char, "\r" ); }; + '\\0' => { token( RE_Char, '\0' ); }; + '\\a' => { token( RE_Char, '\a' ); }; + '\\b' => { token( RE_Char, '\b' ); }; + '\\t' => { token( RE_Char, '\t' ); }; + '\\n' => { token( RE_Char, '\n' ); }; + '\\v' => { token( RE_Char, '\v' ); }; + '\\f' => { token( RE_Char, '\f' ); }; + '\\r' => { token( RE_Char, '\r' ); }; '\\\n' => { updateCol(); }; '\\' any => { token( RE_Char, tokstart+1, tokend ); }; @@ -624,6 +636,10 @@ void Scanner::endSection( ) '[' => { token( RE_SqOpen ); fcall or_literal; }; '[^' => { token( RE_SqOpenNeg ); fcall or_literal; }; + EOF => { + error() << "unterminated regular expression" << endl; + }; + # Characters in an OR expression. [^\/] => { token( RE_Char, tokstart, tokend ); }; *|; @@ -632,6 +648,10 @@ void Scanner::endSection( ) ident => { token( TK_Word, tokstart, tokend ); } ; [ \t\n]+ => { updateCol(); }; ';' => { token( ';' ); fgoto parser_def; }; + + EOF => { + error() << "unterminated write statement" << endl; + }; *|; # Parser definitions. @@ -782,6 +802,10 @@ void Scanner::endSection( ) fgoto inline_code; }; + EOF => { + error() << "unterminated ragel section" << endl; + }; + any => { token( *tokstart ); } ; *|; @@ -906,5 +930,9 @@ void scan( char *fileName, istream &input ) Scanner scanner( fileName, input, 0, 0, 0 ); scanner.init(); scanner.do_scan(); -} + InputLoc eofLoc; + eofLoc.fileName = fileName; + eofLoc.col = 1; + eofLoc.line = scanner.line; +} diff --git a/rlcodegen/xmlparse.y b/rlcodegen/xmlparse.y deleted file mode 100644 index a837c87..0000000 --- a/rlcodegen/xmlparse.y +++ /dev/null @@ -1,978 +0,0 @@ -/* - * Copyright 2005-2006 Adrian Thurston - */ - -/* This file is part of Ragel. - * - * Ragel is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Ragel is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Ragel; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -%{ - -#include -#include -#include -#include -#include "rlcodegen.h" -#include "vector.h" -#include "xmlparse.h" -#include "gendata.h" - -using std::cerr; -using std::endl; - -char *sourceFileName; -char *attrKey; -char *attrValue; -int curAction; -int curActionTable; -int curTrans; -int curState; -int curCondSpace; -int curStateCond; - -Key readKey( char *td, char **end ); -long readOffsetPtr( char *td, char **end ); -unsigned long readLength( char *td ); - -CodeGenMap codeGenMap; - -%} - -%pure-parser - -%union { - /* General data types. */ - char c; - char *data; - int integer; - AttrList *attrList; - - /* Inline parse tree items. */ - InlineItem *ilitem; - InlineList *illist; -} - -%token TAG_unknown -%token TAG_ragel -%token TAG_ragel_def -%token TAG_host -%token TAG_state_list -%token TAG_state -%token TAG_trans_list -%token TAG_t -%token TAG_machine -%token TAG_start_state -%token TAG_action_list -%token TAG_action_table_list -%token TAG_action -%token TAG_action_table -%token TAG_alphtype -%token TAG_element -%token TAG_getkey -%token TAG_state_actions -%token TAG_entry_points -%token TAG_sub_action -%token TAG_cond_space_list -%token TAG_cond_space -%token TAG_cond_list -%token TAG_c - -/* Inline block tokens. */ -%token TAG_text -%token TAG_goto -%token TAG_call -%token TAG_next -%token TAG_goto_expr -%token TAG_call_expr -%token TAG_next_expr -%token TAG_ret -%token TAG_pchar -%token TAG_char -%token TAG_hold -%token TAG_exec -%token TAG_holdte -%token TAG_execte -%token TAG_curs -%token TAG_targs -%token TAG_entry -%token TAG_data -%token TAG_lm_switch -%token TAG_init_act -%token TAG_set_act -%token TAG_set_tokend -%token TAG_get_tokend -%token TAG_init_tokstart -%token TAG_set_tokstart -%token TAG_write -%token TAG_curstate -%token TAG_access -%token TAG_break -%token TAG_option - -%token XML_Word -%token XML_Literal -%type AttributeList - -%type InlineList -%type InlineItem -%type LmActionList - -%type TagText -%type TagGoto -%type TagCall -%type TagNext -%type TagGotoExpr -%type TagCallExpr -%type TagNextExpr -%type TagRet -%type TagBreak -%type TagPChar -%type TagChar -%type TagHold -%type TagExec -%type TagHoldTE -%type TagExecTE -%type TagCurs -%type TagTargs -%type TagIlEntry -%type TagLmSwitch -%type TagLmSetActId -%type TagLmGetTokEnd -%type TagLmSetTokEnd -%type TagLmInitTokStart -%type TagLmInitAct -%type TagLmSetTokStart -%type TagInlineAction -%type TagSubAction - -%% - -/* Input is any number of input sections. An empty file is accepted. */ -input: - TagRagel | - /* Nothing */ { - /* Assume the frontend died if we get no input. It will emit an error. - * Cause us to return an error code. */ - gblErrorCount += 1; - }; - -TagRagel: - TagRagelHead - HostOrDefList - '<' '/' TAG_ragel '>'; - -TagRagelHead: - '<' TAG_ragel AttributeList '>' { - Attribute *fileNameAttr = $3->find( "filename" ); - if ( fileNameAttr == 0 ) - xml_error(@2) << "tag requires a filename attribute" << endl; - else - sourceFileName = fileNameAttr->value; - - Attribute *langAttr = $3->find( "lang" ); - if ( langAttr == 0 ) - xml_error(@2) << "tag requires a lang attribute" << endl; - else { - if ( strcmp( langAttr->value, "C" ) == 0 ) { - hostLangType = CCode; - hostLang = &hostLangC; - } - else if ( strcmp( langAttr->value, "D" ) == 0 ) { - hostLangType = DCode; - hostLang = &hostLangD; - } - else if ( strcmp( langAttr->value, "Java" ) == 0 ) { - hostLangType = JavaCode; - hostLang = &hostLangJava; - } - } - - /* Eventually more types will be supported. */ - if ( hostLangType == JavaCode && codeStyle != GenTables ) { - error() << "java: only the table code style -T0 is " - "currently supported" << endl; - } - - openOutput( sourceFileName ); - }; - -AttributeList: - AttributeList Attribute { - $$ = $1; - $$->append( Attribute( attrKey, attrValue ) ); - } | - /* Nothing */ { - $$ = new AttrList; - }; - -Attribute: - XML_Word '=' XML_Literal { - attrKey = $1; - attrValue = $3; - }; - -HostOrDefList: - HostOrDefList HostOrDef | - /* Nothing */; - -HostOrDef: - TagHost | TagRagelDef; - -TagHost: - TagHostHead - '<' '/' TAG_host '>' { - if ( outputFormat == OutCode ) - *outStream << xmlData.data; - }; - -TagHostHead: - '<' TAG_host AttributeList '>' { - Attribute *lineAttr = $3->find( "line" ); - if ( lineAttr == 0 ) - xml_error(@2) << "tag requires a line attribute" << endl; - else { - int line = atoi( lineAttr->value ); - if ( outputFormat == OutCode ) - lineDirective( *outStream, sourceFileName, line ); - } - }; - -TagRagelDef: - RagelDefHead - RagelDefItemList - '<' '/' TAG_ragel_def '>' { - if ( gblErrorCount == 0 ) - cgd->generate(); - }; - -RagelDefHead: - '<' TAG_ragel_def AttributeList '>' { - bool wantComplete = outputFormat != OutGraphvizDot; - - char *fsmName = 0; - Attribute *nameAttr = $3->find( "name" ); - if ( nameAttr != 0 ) { - fsmName = nameAttr->value; - - CodeGenMapEl *mapEl = codeGenMap.find( fsmName ); - if ( mapEl != 0 ) - cgd = mapEl->value; - else { - cgd = new CodeGenData( sourceFileName, fsmName, wantComplete ); - codeGenMap.insert( fsmName, cgd ); - } - } - else { - cgd = new CodeGenData( sourceFileName, fsmName, wantComplete ); - } - - cgd->writeOps = 0; - cgd->writeData = false; - cgd->writeInit = false; - cgd->writeExec = false; - cgd->writeEOF = false; - ::keyOps = &cgd->thisKeyOps; - }; - -RagelDefItemList: - RagelDefItemList RagelDefItem | - /* Nothing */; - -RagelDefItem: - TagAlphType | - TagGetKeyExpr | - TagAccessExpr | - TagCurStateExpr | - TagMachine | - TagWrite; - -TagWrite: - '<' TAG_write AttributeList '>' - OptionList - '<' '/' TAG_write '>' { - Attribute *what = $3->find( "what" ); - if ( what == 0 ) { - xml_error(@2) << "tag requires a what attribute" << endl; - } - else { - if ( strcmp( what->value, "data" ) == 0 ) - cgd->writeData = true; - else if ( strcmp( what->value, "init" ) == 0 ) - cgd->writeInit = true; - else if ( strcmp( what->value, "exec" ) == 0 ) - cgd->writeExec = true; - else if ( strcmp( what->value, "eof" ) == 0 ) - cgd->writeEOF = true; - } - }; - -OptionList: - OptionList TagOption | - /* Nothing */; - -TagOption: - '<' TAG_option '>' - '<' '/' TAG_option '>' { - if ( strcmp( xmlData.data, "noend" ) == 0 ) - cgd->writeOps |= WO_NOEND; - else if ( strcmp( xmlData.data, "noerror" ) == 0 ) - cgd->writeOps |= WO_NOERROR; - else if ( strcmp( xmlData.data, "noprefix" ) == 0 ) - cgd->writeOps |= WO_NOPREFIX; - else if ( strcmp( xmlData.data, "nofinal" ) == 0 ) - cgd->writeOps |= WO_NOFF; - else { - warning() << "unrecognized write option" << endl; - } - }; - - -TagAlphType: - '<' TAG_alphtype '>' - '<' '/' TAG_alphtype '>' { - if ( ! cgd->setAlphType( xmlData.data ) ) - xml_error(@2) << "tag specifies unknown alphabet type" << endl; - }; - -TagGetKeyExpr: - '<' TAG_getkey '>' - InlineList - '<' '/' TAG_getkey '>' { - cgd->getKeyExpr = $4; - }; - -TagAccessExpr: - '<' TAG_access '>' - InlineList - '<' '/' TAG_access '>' { - cgd->accessExpr = $4; - }; - -TagCurStateExpr: - '<' TAG_curstate '>' - InlineList - '<' '/' TAG_curstate '>' { - cgd->curStateExpr = $4; - }; - -TagMachine: - TagMachineHead - MachineItemList - '<' '/' TAG_machine '>' { - cgd->finishMachine(); - }; - -TagMachineHead: - '<' TAG_machine '>' { - cgd->createMachine(); - }; - -MachineItemList: - MachineItemList MachineItem | - /* Nothing */; - -MachineItem: - TagStartState | - TagEntryPoints | - TagStateList | - TagActionList | - TagActionTableList | - TagCondSpaceList; - -TagStartState: - '<' TAG_start_state '>' - '<' '/' TAG_start_state '>' { - unsigned long startState = strtoul( xmlData.data, 0, 10 ); - cgd->setStartState( startState ); - }; - -TagEntryPoints: - '<' TAG_entry_points AttributeList '>' - EntryPointList - '<' '/' TAG_entry_points '>' { - Attribute *errorAttr = $3->find( "error" ); - if ( errorAttr != 0 ) - cgd->setForcedErrorState(); - }; - -EntryPointList: - EntryPointList TagEntry | - /* Nothing */; - -TagEntry: - '<' TAG_entry AttributeList '>' - '<' '/' TAG_entry '>' { - Attribute *nameAttr = $3->find( "name" ); - if ( nameAttr == 0 ) - xml_error(@2) << "tag :: requires a name attribute" << endl; - else { - char *data = xmlData.data; - unsigned long entry = strtoul( data, &data, 10 ); - cgd->addEntryPoint( nameAttr->value, entry ); - } - }; - -TagStateList: - TagStateListHead - StateList - '<' '/' TAG_state_list '>'; - -TagStateListHead: - '<' TAG_state_list AttributeList '>' { - Attribute *lengthAttr = $3->find( "length" ); - if ( lengthAttr == 0 ) - xml_error(@2) << "tag requires a length attribute" << endl; - else { - unsigned long length = strtoul( lengthAttr->value, 0, 10 ); - cgd->initStateList( length ); - curState = 0; - } - }; - -StateList: - StateList TagState | - /* Nothing */; - -TagState: - TagStateHead - StateItemList - '<' '/' TAG_state '>' { - curState += 1; - }; - -TagStateHead: - '<' TAG_state AttributeList '>' { - Attribute *lengthAttr = $3->find( "final" ); - if ( lengthAttr != 0 ) - cgd->setFinal( curState ); - }; - -StateItemList: - StateItemList StateItem | - /* Nothing */; - -StateItem: - TagStateActions | - TagStateCondList | - TagTransList; - -TagStateActions: - '<' TAG_state_actions '>' - '<' '/' TAG_state_actions '>' { - char *ad = xmlData.data; - - long toStateAction = readOffsetPtr( ad, &ad ); - long fromStateAction = readOffsetPtr( ad, &ad ); - long eofAction = readOffsetPtr( ad, &ad ); - - cgd->setStateActions( curState, toStateAction, - fromStateAction, eofAction ); - }; - -TagStateCondList: - TagStateCondListHead - StateCondList - '<' '/' TAG_cond_list '>'; - -TagStateCondListHead: - '<' TAG_cond_list AttributeList '>' { - Attribute *lengthAttr = $3->find( "length" ); - if ( lengthAttr == 0 ) - xml_error(@2) << "tag requires a length attribute" << endl; - else { - ulong length = readLength( lengthAttr->value ); - cgd->initStateCondList( curState, length ); - curStateCond = 0; - } - } - -StateCondList: - StateCondList StateCond | - /* Empty */; - -StateCond: - '<' TAG_c '>' - '<' '/' TAG_c '>' { - char *td = xmlData.data; - Key lowKey = readKey( td, &td ); - Key highKey = readKey( td, &td ); - long condId = readOffsetPtr( td, &td ); - cgd->addStateCond( curState, lowKey, highKey, condId ); - } - -TagTransList: - TagTransListHead - TransList - '<' '/' TAG_trans_list '>' { - cgd->finishTransList( curState ); - }; - -TagTransListHead: - '<' TAG_trans_list AttributeList '>' { - Attribute *lengthAttr = $3->find( "length" ); - if ( lengthAttr == 0 ) - xml_error(@2) << "tag requires a length attribute" << endl; - else { - unsigned long length = strtoul( lengthAttr->value, 0, 10 ); - cgd->initTransList( curState, length ); - curTrans = 0; - } - }; - -TransList: - TransList TagTrans | - /* Nothing */; - -TagTrans: - '<' TAG_t AttributeList '>' - '<' '/' TAG_t '>' { - char *td = xmlData.data; - Key lowKey = readKey( td, &td ); - Key highKey = readKey( td, &td ); - long targ = readOffsetPtr( td, &td ); - long action = readOffsetPtr( td, &td ); - - cgd->newTrans( curState, curTrans++, lowKey, highKey, targ, action ); - }; - -TagActionList: - TagActionListHead - ActionList - '<' '/' TAG_action_list '>'; - -TagActionListHead: - '<' TAG_action_list AttributeList '>' { - Attribute *lengthAttr = $3->find( "length" ); - if ( lengthAttr == 0 ) - xml_error(@2) << "tag requires a length attribute" << endl; - else { - unsigned long length = strtoul( lengthAttr->value, 0, 10 ); - cgd->initActionList( length ); - curAction = 0; - } - }; - -ActionList: - ActionList TagAction | - /* Nothing */; - -TagAction: - '<' TAG_action AttributeList '>' - InlineList - '<' '/' TAG_action '>' { - Attribute *lineAttr = $3->find( "line" ); - Attribute *colAttr = $3->find( "col" ); - Attribute *nameAttr = $3->find( "name" ); - if ( lineAttr == 0 || colAttr == 0) - xml_error(@2) << "tag requires a line and col attributes" << endl; - else { - unsigned long line = strtoul( lineAttr->value, 0, 10 ); - unsigned long col = strtoul( colAttr->value, 0, 10 ); - - char *name = 0; - if ( nameAttr != 0 ) - name = nameAttr->value; - - cgd->newAction( curAction++, name, line, col, $5 ); - } - }; - -InlineList: - InlineList InlineItem { - /* Append the item to the list, return the list. */ - $1->append( $2 ); - $$ = $1; - } | - /* Nothing */ { - /* Start with empty list. */ - $$ = new InlineList; - }; - -InlineItem: - TagText | - TagGoto | - TagCall | - TagNext | - TagGotoExpr | - TagCallExpr | - TagNextExpr | - TagRet | - TagBreak | - TagPChar | - TagChar | - TagHold | - TagExec | - TagHoldTE | - TagExecTE | - TagCurs | - TagTargs | - TagIlEntry | - TagLmSwitch | - TagLmSetActId | - TagLmSetTokEnd | - TagLmGetTokEnd | - TagSubAction | - TagLmInitTokStart | - TagLmInitAct | - TagLmSetTokStart; - -TagText: - '<' TAG_text AttributeList '>' - '<' '/' TAG_text '>' { - $$ = new InlineItem( InputLoc(), InlineItem::Text ); - $$->data = strdup(xmlData.data); - }; - -TagGoto: - '<' TAG_goto '>' - '<' '/' TAG_goto '>' { - int targ = strtol( xmlData.data, 0, 10 ); - $$ = new InlineItem( InputLoc(), InlineItem::Goto ); - $$->targId = targ; - }; - -TagCall: - '<' TAG_call '>' - '<' '/' TAG_call '>' { - int targ = strtol( xmlData.data, 0, 10 ); - $$ = new InlineItem( InputLoc(), InlineItem::Call ); - $$->targId = targ; - }; - -TagNext: - '<' TAG_next '>' - '<' '/' TAG_next '>' { - int targ = strtol( xmlData.data, 0, 10 ); - $$ = new InlineItem( InputLoc(), InlineItem::Next ); - $$->targId = targ; - }; - -TagGotoExpr: - '<' TAG_goto_expr '>' - InlineList - '<' '/' TAG_goto_expr '>' { - $$ = new InlineItem( InputLoc(), InlineItem::GotoExpr ); - $$->children = $4; - }; - -TagCallExpr: - '<' TAG_call_expr '>' - InlineList - '<' '/' TAG_call_expr '>' { - $$ = new InlineItem( InputLoc(), InlineItem::CallExpr ); - $$->children = $4; - }; - -TagNextExpr: - '<' TAG_next_expr '>' - InlineList - '<' '/' TAG_next_expr '>' { - $$ = new InlineItem( InputLoc(), InlineItem::NextExpr ); - $$->children = $4; - }; - -TagRet: - '<' TAG_ret '>' - '<' '/' TAG_ret '>' { - $$ = new InlineItem( InputLoc(), InlineItem::Ret ); - }; - -TagPChar: - '<' TAG_pchar '>' - '<' '/' TAG_pchar '>' { - $$ = new InlineItem( InputLoc(), InlineItem::PChar ); - }; - -TagChar: - '<' TAG_char '>' - '<' '/' TAG_char '>' { - $$ = new InlineItem( InputLoc(), InlineItem::Char ); - }; - -TagHold: - '<' TAG_hold '>' - '<' '/' TAG_hold '>' { - $$ = new InlineItem( InputLoc(), InlineItem::Hold ); - }; - -TagExec: - '<' TAG_exec '>' - InlineList - '<' '/' TAG_exec '>' { - $$ = new InlineItem( InputLoc(), InlineItem::Exec ); - $$->children = $4; - }; - -TagHoldTE: - '<' TAG_holdte '>' - '<' '/' TAG_holdte '>' { - $$ = new InlineItem( InputLoc(), InlineItem::HoldTE ); - }; - -TagExecTE: - '<' TAG_execte '>' - InlineList - '<' '/' TAG_execte '>' { - $$ = new InlineItem( InputLoc(), InlineItem::ExecTE ); - $$->children = $4; - }; - -TagCurs: - '<' TAG_curs '>' - '<' '/' TAG_curs '>' { - $$ = new InlineItem( InputLoc(), InlineItem::Curs ); - }; - -TagTargs: - '<' TAG_targs '>' - '<' '/' TAG_targs '>' { - $$ = new InlineItem( InputLoc(), InlineItem::Targs ); - }; - -TagIlEntry: - '<' TAG_entry '>' - '<' '/' TAG_entry '>' { - int targ = strtol( xmlData.data, 0, 10 ); - $$ = new InlineItem( InputLoc(), InlineItem::Entry ); - $$->targId = targ; - }; - -TagBreak: - '<' TAG_break '>' - '<' '/' TAG_break '>' { - $$ = new InlineItem( InputLoc(), InlineItem::Break ); - }; - - -TagLmSwitch: - '<' TAG_lm_switch AttributeList '>' - LmActionList - '<' '/' TAG_lm_switch '>' { - bool handlesError = false; - Attribute *handlesErrorAttr = $3->find( "handles_error" ); - if ( handlesErrorAttr != 0 ) - handlesError = true; - - $$ = new InlineItem( InputLoc(), InlineItem::LmSwitch ); - $$->children = $5; - $$->handlesError = handlesError; - }; - -LmActionList: - LmActionList TagInlineAction { - $$ = $1; - $$->append( $2 ); - } | - /* Nothing */ { - $$ = new InlineList; - }; - -TagInlineAction: - '<' TAG_sub_action AttributeList '>' - InlineList - '<' '/' TAG_sub_action '>' { - $$ = new InlineItem( InputLoc(), InlineItem::SubAction ); - $$->children = $5; - - Attribute *idAttr = $3->find( "id" ); - if ( idAttr != 0 ) { - unsigned long id = strtoul( idAttr->value, 0, 10 ); - $$->lmId = id; - } - }; - -TagLmSetActId: - '<' TAG_set_act '>' - '<' '/' TAG_set_act '>' { - $$ = new InlineItem( InputLoc(), InlineItem::LmSetActId ); - $$->lmId = strtol( xmlData.data, 0, 10 ); - }; - -TagLmGetTokEnd: - '<' TAG_get_tokend '>' - '<' '/' TAG_get_tokend '>' { - $$ = new InlineItem( InputLoc(), InlineItem::LmGetTokEnd ); - }; - -TagLmSetTokEnd: - '<' TAG_set_tokend '>' - '<' '/' TAG_set_tokend '>' { - $$ = new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ); - $$->offset = strtol( xmlData.data, 0, 10 ); - }; - -TagSubAction: - '<' TAG_sub_action '>' - InlineList - '<' '/' TAG_sub_action '>' { - $$ = new InlineItem( InputLoc(), InlineItem::SubAction ); - $$->children = $4; - }; - -TagLmInitTokStart: - '<' TAG_init_tokstart '>' - '<' '/' TAG_init_tokstart '>' { - $$ = new InlineItem( InputLoc(), InlineItem::LmInitTokStart ); - }; - -TagLmInitAct: - '<' TAG_init_act '>' - '<' '/' TAG_init_act '>' { - $$ = new InlineItem( InputLoc(), InlineItem::LmInitAct ); - }; - -TagLmSetTokStart: - '<' TAG_set_tokstart '>' - '<' '/' TAG_set_tokstart '>' { - $$ = new InlineItem( InputLoc(), InlineItem::LmSetTokStart ); - cgd->hasLongestMatch = true; - }; - -TagActionTableList: - TagActionTableListHead - ActionTableList - '<' '/' TAG_action_table_list '>'; - -TagActionTableListHead: - '<' TAG_action_table_list AttributeList '>' { - Attribute *lengthAttr = $3->find( "length" ); - if ( lengthAttr == 0 ) - xml_error(@2) << "tag requires a length attribute" << endl; - else { - unsigned long length = strtoul( lengthAttr->value, 0, 10 ); - cgd->initActionTableList( length ); - curActionTable = 0; - } - }; - -ActionTableList: - ActionTableList TagActionTable | - /* Nothing */; - -TagActionTable: - '<' TAG_action_table AttributeList '>' - '<' '/' TAG_action_table '>' { - /* Find the length of the action table. */ - Attribute *lengthAttr = $3->find( "length" ); - if ( lengthAttr == 0 ) - xml_error(@2) << "tag requires a length attribute" << endl; - else { - unsigned long length = strtoul( lengthAttr->value, 0, 10 ); - - /* Collect the action table. */ - RedAction *redAct = cgd->allActionTables + curActionTable; - redAct->actListId = curActionTable; - redAct->key.setAsNew( length ); - char *ptr = xmlData.data; - int pos = 0; - while ( *ptr != 0 ) { - unsigned long actionId = strtoul( ptr, &ptr, 10 ); - redAct->key[pos].key = 0; - redAct->key[pos].value = cgd->allActions+actionId; - pos += 1; - } - - /* Insert into the action table map. */ - cgd->redFsm->actionMap.insert( redAct ); - } - - curActionTable += 1; - }; - -TagCondSpaceList: - TagCondSpaceListHead - CondSpaceList - '<' '/' TAG_cond_space_list '>'; - -TagCondSpaceListHead: - '<' TAG_cond_space_list AttributeList '>' { - Attribute *lengthAttr = $3->find( "length" ); - if ( lengthAttr == 0 ) - xml_error(@2) << "tag requires a length attribute" << endl; - else { - ulong length = readLength( lengthAttr->value ); - cgd->initCondSpaceList( length ); - curCondSpace = 0; - } - }; - -CondSpaceList: - CondSpaceList TagCondSpace | - TagCondSpace; - -TagCondSpace: - '<' TAG_cond_space AttributeList '>' - '<' '/' TAG_cond_space '>' { - Attribute *lengthAttr = $3->find( "length" ); - Attribute *idAttr = $3->find( "id" ); - if ( lengthAttr == 0 ) - xml_error(@2) << "tag requires a length attribute" << endl; - else { - if ( lengthAttr == 0 ) - xml_error(@2) << "tag requires an id attribute" << endl; - else { - unsigned long condSpaceId = strtoul( idAttr->value, 0, 10 ); - ulong length = readLength( lengthAttr->value ); - - char *td = xmlData.data; - Key baseKey = readKey( td, &td ); - - cgd->newCondSpace( curCondSpace, condSpaceId, baseKey ); - for ( ulong a = 0; a < length; a++ ) { - long actionOffset = readOffsetPtr( td, &td ); - cgd->condSpaceItem( curCondSpace, actionOffset ); - } - curCondSpace += 1; - } - } - }; - -%% - -unsigned long readLength( char *td ) -{ - return strtoul( td, 0, 10 ); -} - -Key readKey( char *td, char **end ) -{ - if ( keyOps->isSigned ) - return Key( strtol( td, end, 10 ) ); - else - return Key( strtoul( td, end, 10 ) ); -} - -long readOffsetPtr( char *td, char **end ) -{ - while ( *td == ' ' || *td == '\t' ) - td++; - - if ( *td == 'x' ) { - if ( end != 0 ) - *end = td + 1; - return -1; - } - - return strtol( td, end, 10 ); -} - -void yyerror( char *err ) -{ - /* Bison won't give us the location, but in the last call to the scanner we - * saved a pointer to the locationn variable. Use that. instead. */ - error(::yylloc->first_line, ::yylloc->first_column) << err << endl; -} - diff --git a/rlcodegen/xmlscan.lex b/rlcodegen/xmlscan.lex deleted file mode 100644 index 4ebd70a..0000000 --- a/rlcodegen/xmlscan.lex +++ /dev/null @@ -1,433 +0,0 @@ -/* - * Copyright 2001-2006 Adrian Thurston - */ - -/* This file is part of Ragel. - * - * Ragel is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Ragel is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Ragel; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -%{ - -#define YY_NEVER_INTERACTIVE 1 -//#define WANT_TOKEN_WRITE - -#include -#include "vector.h" -#include "rlcodegen.h" -#include "xmlparse.h" -#include "buffer.h" - -using std::cout; -using std::cerr; -using std::endl; - -Buffer tokbuf; -int builtinBrace = 0; -bool inlineWhitespace = true; -bool handlingInclude = false; - -YYSTYPE *yylval; -YYLTYPE *yylloc; - -void garble(); - -void extendToken(); -void extendToken( char *data, int len ); - -int emitToken( int token, char *data, int len ); -int emitNoData( int token ); -int emitTag( char *data, int len, bool isOpen ); -void passThrough( char *data ); -void popInclude(); -void scannerInit(); - -enum InlineBlockType { - CurlyDelimited, - SemiTerminated -} inlineBlockType; - -/* Using a wrapper for the parser, must the lex declaration. */ -#define YY_DECL int rlcodegen_lex() - -class Perfect_Hash -{ -private: - static inline unsigned int hash (const char *str, unsigned int len); - -public: - static struct XMLTagHashPair *in_word_set (const char *str, unsigned int len); -}; - -Vector shouldEmitXMLData; - -int first_line = 1; -int first_column = 1; -int last_line = 1; -int last_column = 0; - -Buffer xmlData; - -%} - -%x OPEN_TAG -%x CLOSE_TAG1 -%x CLOSE_TAG2 -%x ATTR_LIST -%x ATTR_LITERAL - -WSCHAR [\t\n\v\f\r ] -IDENT [a-zA-Z_][a-zA-Z_0-9\-]* - -%% - - /* Numbers in outter code. */ -"<" { - BEGIN(OPEN_TAG); - shouldEmitXMLData.prepend( false ); - return emitNoData( *yytext ); -} - -[^<&]+ { - if ( shouldEmitXMLData[0] ) - xmlData.append( yytext, yyleng ); - garble(); -} -"&" { - if ( shouldEmitXMLData[0] ) - xmlData.append( "&", 1 ); - garble(); -} -"<" { - if ( shouldEmitXMLData[0] ) - xmlData.append( "<", 1 ); - garble(); -} -">" { - if ( shouldEmitXMLData[0] ) - xmlData.append( ">", 1 ); - garble(); -} - - /* - * Tags - */ - -"/" { - BEGIN(CLOSE_TAG1); - xmlData.append(0); - return emitNoData( *yytext ); -} - -{IDENT} { - BEGIN( ATTR_LIST ); - return emitTag( yytext, yyleng, true ); -} - -{WSCHAR}+ { - garble(); -} - -{IDENT} { - BEGIN( CLOSE_TAG2 ); - return emitTag( yytext, yyleng, false ); -} - -">" { - shouldEmitXMLData.remove( 0 ); - BEGIN(INITIAL); - return emitNoData( *yytext ); -} - -{IDENT} { - return emitToken( XML_Word, yytext, yyleng ); -} - -\" { - BEGIN(ATTR_LITERAL); - extendToken(); -} -\\. extendToken( yytext+1, 1 ); -\\\n extendToken( yytext+1, 1 ); -[^\\"]+ extendToken( yytext, yyleng ); - - /* Terminate a double literal */ -\" { - BEGIN(ATTR_LIST); - return emitToken( XML_Literal, 0, 0 ); -} - -{WSCHAR}+ { - garble(); -} - -">" { - BEGIN(INITIAL); - return emitNoData( *yytext ); -} - -. { - return emitNoData( *yytext ); -} - -%% - -/* Write out token data, escaping special charachters. */ -#ifdef WANT_TOKEN_WRITE -void writeToken( int token, char *data ) -{ - cout << "token id " << token << " at " << id->fileName << ":" << - yylloc->first_line << ":" << yylloc->first_column << "-" << - yylloc->last_line << ":" << yylloc->last_column << " "; - - if ( data != 0 ) { - while ( *data != 0 ) { - switch ( *data ) { - case '\n': cout << "\\n"; break; - case '\t': cout << "\\t"; break; - default: cout << *data; break; - } - data += 1; - } - } - cout << endl; -} -#endif - -/* Caclulate line info from yytext. Called on every pattern match. */ -void updateLineInfo() -{ - /* yytext should always have at least wone char. */ - assert( yytext[0] != 0 ); - - /* Scan through yytext up to the last character. */ - char *p = yytext; - for ( ; p[1] != 0; p++ ) { - if ( p[0] == '\n' ) { - last_line += 1; - last_column = 0; - } - else { - last_column += 1; - } - } - - /* Always consider the last character as not a newline. Newlines at the - * end of a token are as any old character at the end of the line. */ - last_column += 1; - - /* The caller may be about to emit a token, be prepared to pass the line - * info to the parser. */ - yylloc->first_line = first_line; - yylloc->first_column = first_column; - yylloc->last_line = last_line; - yylloc->last_column = last_column; - - /* If the last character was indeed a newline, then wrap ahead now. */ - if ( p[0] == '\n' ) { - last_line += 1; - last_column = 0; - } -} - - -/* Eat up a matched pattern that will not be part of a token. */ -void garble() -{ - /* Update line information from yytext. */ - updateLineInfo(); - - /* The next token starts ahead of the last token. */ - first_line = last_line; - first_column = last_column + 1; -} - -/* Extend a token, but don't add any data to it, more token data expected. */ -void extendToken() -{ - /* Update line information from yytext. */ - updateLineInfo(); -} - -/* Append data to the end of the token. More token data expected. */ -void extendToken( char *data, int len ) -{ - if ( data != 0 && len > 0 ) - tokbuf.append( data, len ); - - /* Update line information from yytext. */ - updateLineInfo(); -} - - -/* Append data to the end of a token and emitToken it to the parser. */ -int emitToken( int token, char *data, int len ) -{ - /* Append the data and null terminate. */ - if ( data != 0 && len > 0 ) - tokbuf.append( data, len ); - tokbuf.append( 0 ); - - /* Duplicate the buffer. */ - yylval->data = new char[tokbuf.length]; - strcpy( yylval->data, tokbuf.data ); - - /* Update line information from yytext. */ - updateLineInfo(); - - /* Write token info. */ -#ifdef WANT_TOKEN_WRITE - writeToken( token, tokbuf.data ); -#endif - - /* Clear out the buffer. */ - tokbuf.clear(); - - /* The next token starts ahead of the last token. */ - first_line = last_line; - first_column = last_column + 1; - - return token; -} - -/* Append data to the end of a token and emitToken it to the parser. */ -int emitTag( char *data, int len, bool isOpen ) -{ - /* Lookup the tag. */ - int token = TAG_unknown; - - XMLTagHashPair *tag = Perfect_Hash::in_word_set( data, len ); - if ( tag != 0 ) - token = tag->id; - - if ( isOpen ) { - switch ( token ) { - case TAG_host: case TAG_t: case TAG_start_state: - case TAG_action_table: - case TAG_alphtype: case TAG_state_actions: - case TAG_entry_points: - case TAG_text: case TAG_goto: - case TAG_call: case TAG_next: - case TAG_set_act: case TAG_set_tokend: - case TAG_entry: case TAG_option: - case TAG_cond_space: case TAG_c: - shouldEmitXMLData[0] = true; - xmlData.clear(); - } - } - - return emitToken( token, data, len ); -} - -/* Emit a token with no data to the parser. */ -int emitNoData( int token ) -{ - /* Return null to the parser. */ - yylval->data = 0; - - /* Update line information from yytext. */ - updateLineInfo(); - - /* Write token info. */ -#ifdef WANT_TOKEN_WRITE - writeToken( token, 0 ); -#endif - - /* Clear out the buffer. */ - tokbuf.clear(); - - /* The next token starts ahead of the last token. */ - first_line = last_line; - first_column = last_column + 1; - - return token; -} - -/* Pass tokens in outter code through to the output. */ -void passThrough( char *data ) -{ - /* If no errors, we are emitting code and we are at the bottom of the - * include stack (the source file listed on the command line) then write - * out the data. */ - if ( gblErrorCount == 0 && outputFormat == OutCode ) - *outStream << data; -} - -/* Init a buffer. */ -Buffer::Buffer() -: - data(0), - length(0), - allocated(0) -{ -} - -/* Empty out a buffer on destruction. */ -Buffer::~Buffer() -{ - empty(); -} - -/* Free the space allocated for the buffer. */ -void Buffer::empty() -{ - if ( data != 0 ) { - free( data ); - - data = 0; - length = 0; - allocated = 0; - } -} - -/* Grow the buffer when to len allocation. */ -void Buffer::upAllocate( int len ) -{ - if ( data == 0 ) - data = (char*) malloc( len ); - else - data = (char*) realloc( data, len ); - allocated = len; -} - -int yywrap() -{ - /* Once processessing of the input is done, signal no more. */ - return 1; -} - -/* Here simply to suppress the unused yyunpt warning. */ -void thisFuncIsNeverCalled() -{ - yyunput(0, 0); -} - -void scannerInit() -{ - /* Set this up in case we are initially given something other - * than an opening tag. */ - shouldEmitXMLData.prepend( false ); -} - -/* Wrapper for the lexer which stores the locations of the value and location - * variables of the parser into globals. The parser is reentrant, however the scanner - * does not need to be, so globals work fine. This saves us passing them around - * all the helper functions. */ -int yylex( YYSTYPE *yylval, YYLTYPE *yylloc ) -{ - ::yylval = yylval; - ::yylloc = yylloc; - return rlcodegen_lex(); -} diff --git a/rlcodegen/xmlscan.rl b/rlcodegen/xmlscan.rl index c42b504..3440f2b 100644 --- a/rlcodegen/xmlscan.rl +++ b/rlcodegen/xmlscan.rl @@ -260,7 +260,11 @@ int xml_parse( istream &input, char *fileName ) while ( 1 ) { int token = scanner.scan(); - if ( token == TK_EOF ) { + if ( token == TK_NO_TOKEN ) { + cerr << PROGNAME << ": interal error: scanner returned NO_TOKEN" << endl; + exit(1); + } + else if ( token == TK_EOF ) { parser.token( _eof, scanner.token_col, scanner.token_line ); break; } @@ -322,14 +326,14 @@ int xml_parse( istream &input, char *fileName ) } #if 0 - cout << "parser_driver: " << (tag->type == XMLTag::Open ? "open" : "close") << - ": " << tag->tagId->name << endl; + cerr << "parser_driver: " << (tag->type == XMLTag::Open ? "open" : "close") << + ": " << (tag->tagId != 0 ? tag->tagId->name : "") << endl; if ( tag->attrList != 0 ) { for ( AttrList::Iter attr = *tag->attrList; attr.lte(); attr++ ) - cout << " " << attr->id << ": " << attr->value << endl; + cerr << " " << attr->id << ": " << attr->value << endl; } if ( tag->content != 0 ) - cout << " content: " << tag->content << endl; + cerr << " content: " << tag->content << endl; #endif parser.token( tag, scanner.token_col, scanner.token_line ); -- 2.7.4