2 * Copyright 2001-2005 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 #include "parsetree.h"
38 extern bool inlineWhitespace;
40 /* These come from the scanner and point back into the parser. We will borrow
41 * them for error reporting. */
42 extern YYSTYPE *yylval;
43 extern YYLTYPE *yylloc;
45 /* The include stack pointer from the scanner. Used to determine if we are
46 * currently processing an included file. */
47 extern int inc_stack_ptr;
49 /* Try to do a definition, common to assignment and instantiation. */
50 void tryMachineDef( const YYLTYPE &loc, char *name,
51 JoinOrLm *joinOrLm, bool isInstance );
52 void beginOutsideCode();
53 void doInclude( const InputLoc &loc, char *sectionName, char *inputFile );
54 int yylex( YYSTYPE *yylval, YYLTYPE *yylloc );
66 /* General data types. */
74 FactorWithAug *factorWithAug;
75 FactorWithRep *factorWithRep;
76 FactorWithNeg *factorWithNeg;
78 Expression *expression;
81 LmPartList *longestMatchList;
82 LongestMatchPart *longestMatchPart;
84 /* Priorities and actions. */
86 StateAugType stateAugType;
90 /* Regular expression items. */
96 /* Inline parse tree items. */
104 /* General tokens. */
105 %token <data> TK_UInt
107 %token <data> TK_Word
108 %token <data> TK_Literal
109 %token <data> TK_CiLiteral
110 %token <data> TK_BaseClause
116 %token TK_DoubleArrow
118 %token TK_ColonEquals
126 %token TK_LeavingCond
130 /* Global error actions. */
131 %token TK_StartGblError
132 %token TK_AllGblError
133 %token TK_FinalGblError
134 %token TK_NotFinalGblError
135 %token TK_NotStartGblError
136 %token TK_MiddleGblError
138 /* Local error actions. */
139 %token TK_StartLocalError
140 %token TK_AllLocalError
141 %token TK_FinalLocalError
142 %token TK_NotFinalLocalError
143 %token TK_NotStartLocalError
144 %token TK_MiddleLocalError
146 /* EOF Action embedding. */
150 %token TK_NotFinalEOF
151 %token TK_NotStartEOF
154 /* To State Actions. */
155 %token TK_StartToState
157 %token TK_FinalToState
158 %token TK_NotFinalToState
159 %token TK_NotStartToState
160 %token TK_MiddleToState
162 /* In State Actions. */
163 %token TK_StartFromState
164 %token TK_AllFromState
165 %token TK_FinalFromState
166 %token TK_NotFinalFromState
167 %token TK_NotStartFromState
168 %token TK_MiddleFromState
170 /* Regular expression tokens. */
171 %token <data> RE_Slash
178 %token <data> RE_Char
180 /* Tokens specific to inline code. */
181 %token <data> IL_WhiteSpace
182 %token <data> IL_Comment
183 %token <data> IL_Literal
184 %token <data> IL_Symbol
201 /* Specials in code blocks. */
215 %token<data> KW_Variable
218 /* Special token for terminating semi-terminated code blocks. Needed because
219 * semi is sent as a token in the code block rather than as a generic symbol. */
222 /* Symbols. In ragel lexical space, the scanner does not pass
223 * any data along with the symbols, in inline code lexical
225 %token '*' '?' '+' '!' '^' '(' ')' ';' ',' '='
226 %token ':' '@' '%' '$' '-' '|' '&' '.' '>'
228 /* Precedence information. Lower is a higher precedence. We need only two
229 * precedence groups. Shifting the minus sign in front of a literal number
230 * conflicts with the reduction of Expression and the subsequent shifting of a
231 * subtraction operator when a '-' is seen. Since we want subtraction to take
232 * precedence, we give EXPR_MINUS the higher priority. */
236 %type <augType> AugTypeBase
237 %type <augType> AugTypeGblError
238 %type <augType> AugTypeLocalError
239 %type <augType> AugTypeEOF
240 %type <augType> AugTypeToState
241 %type <augType> AugTypeFromState
242 %type <augType> AugTypeCond
243 %type <integer> PriorityAug
244 %type <data> PriorityAugNum
245 %type <action> ActionEmbed
246 %type <action> ActionEmbedWord
247 %type <action> ActionEmbedBlock
248 %type <action> OptLmPartAction
249 %type <longestMatchList> LmPartList
250 %type <longestMatchPart> LongestMatchPart
252 %type <joinOrLm> JoinOrLm
253 %type <expression> Expression
255 %type <factorWithAug> FactorWithLabel
256 %type <factorWithAug> FactorWithEp
257 %type <factorWithAug> FactorWithAug
258 %type <factorWithAug> FactorWithTransAction
259 %type <factorWithAug> FactorWithPriority
260 %type <factorWithAug> FactorWithCond
261 %type <factorWithAug> FactorWithToStateAction
262 %type <factorWithAug> FactorWithFromStateAction
263 %type <factorWithAug> FactorWithEOFAction
264 %type <factorWithAug> FactorWithGblErrorAction
265 %type <factorWithAug> FactorWithLocalErrorAction
266 %type <factorWithRep> FactorWithRep
267 %type <integer> FactorRepNum
268 %type <factorWithNeg> FactorWithNeg
269 %type <factor> Factor
270 %type <literal> RangeLit
271 %type <data> AlphabetNum
272 %type <data> MachineName
273 %type <integer> PriorityName
274 %type <integer> LocalErrName
275 %type <data> SectionName
276 %type <data> OptSection
277 %type <data> OptFileName
278 %type <integer> EndSection
280 %type <illist> InlineBlock
281 %type <ilitem> InlineBlockItem
282 %type <ilitem> InlineBlockInterpret
283 %type <data> InlineBlockAny
284 %type <data> InlineBlockSymbol
286 %type <illist> InlineExpr
287 %type <ilitem> InlineExprItem
288 %type <ilitem> InlineExprInterpret
289 %type <data> InlineExprSymbol
290 %type <data> InlineExprAny
292 %type <regExp> RegularExpr
293 %type <reItem> RegularExprItem
294 %type <reItem> RegularExprChar
295 %type <reOrBlock> RegularExprOrData
296 %type <reOrItem> RegularExprOrChar
300 /* Input is any number of input sections. An empty file is accepted. */
303 FsmSpecList FsmSpec |
306 /* Fsm Specification. Fsms are begin with '%%' and may be a {} delimited
307 * list of Fsm statements or may be a single statement. If no name is
308 * given the last name given in a machine is used. */
310 StartSection SectionName StatementList EndSection {
311 if ( includeDepth == 0 ) {
313 *outStream << "</ragel_def>\n";
315 if ( machineSpec == 0 && machineName == 0 ) {
316 /* The end section may include a newline on the end, so
317 * we use the last line, which will count the newline. */
318 *outStream << "<host line=\"" << $4 << "\">";
325 id->sectionLoc = InputLoc(@1);
327 if ( includeDepth == 0 ) {
328 if ( machineSpec == 0 && machineName == 0 )
329 *outStream << "</host>\n";
330 sectionOpened = false;
335 KW_Machine TK_Word ';' {
336 /* By default active until found not active. */
338 id->sectionName = $2.data;
340 if ( id->includeSpec != 0 ) {
341 if ( strcmp( id->sectionName, id->includeSpec ) == 0 )
342 id->sectionName = id->includeTo;
347 /* Lookup the parse data, if it is not there then create it. */
348 SectionMapEl *sectionMapEl = sectionMap.find( id->sectionName );
349 if ( sectionMapEl == 0 ) {
350 ParseData *newPd = new ParseData( id->fileName, id->sectionName,
352 sectionMapEl = sectionMap.insert( id->sectionName, newPd );
354 id->pd = sectionMapEl->value;
357 /* No machine name. Just use the previous section setup. Report an
358 * error if there is no previous section */
360 error(id->sectionLoc) << "the first ragel section does not have a name" << endl;
361 id->pd = new ParseData( id->fileName, "<DUMMY>", id->sectionLoc );
366 TK_Section { $$ = @1.last_line; } |
367 TK_SectionNL { $$ = @1.last_line + 1; };
369 /* A NonEmpty list of statements in a fsm. */
371 StatementList Statement |
374 /* The differnt types of statements in a fsm spec. */
387 /* Garble up to the next ; */
388 Statement: error ';' { yyerrok; };
390 /* Allow the user to create a named fsm action that can be referenced when
391 * building a machine. */
393 KW_Action TK_Word '{' InlineBlock '}' {
395 if ( id->pd->actionDict.find( $2.data ) ) {
396 /* Recover by just ignoring the duplicate. */
397 error(@2) << "action \"" << $2.data << "\" already defined" << endl;
400 /* Add the action to the list of actions. */
401 Action *newAction = new Action( InputLoc(@3), $2.data, $4, id->nameRefList );
403 /* Insert to list and dict. */
404 id->pd->actionList.append( newAction );
405 id->pd->actionDict.insert( newAction );
410 /* Specifies the data type of the input alphabet. One or two words
411 * followed by a semi-colon. */
413 KW_AlphType TK_Word TK_Word TK_Semi {
415 if ( ! id->pd->setAlphType( $2.data, $3.data ) ) {
416 // Recover by ignoring the alphtype statement.
417 error(@2) << "\"" << $2.data <<
418 " " << $3.data << "\" is not a valid alphabet type" << endl;
422 KW_AlphType TK_Word TK_Semi {
424 if ( ! id->pd->setAlphType( $2.data ) ) {
425 // Recover by ignoring the alphtype statement.
426 error(@2) << "\"" << $2.data << "\" is not a valid alphabet type" << endl;
432 KW_GetKey InlineBlock TK_Semi {
434 id->pd->getKeyExpr = $2;
437 /* Specifies a range to assume that the input characters will fall into. */
439 KW_Range AlphabetNum AlphabetNum ';' {
441 // Save the upper and lower ends of the range and emit the line number.
442 id->pd->lowerNum = $2.data;
443 id->pd->upperNum = $3.data;
444 id->pd->rangeLowLoc = InputLoc(@2);
445 id->pd->rangeHighLoc = InputLoc(@3);
451 WriteOpen WriteOptions ';' {
453 *outStream << "</write>\n";
460 if ( strcmp( $2.data, "data" ) != 0 &&
461 strcmp( $2.data, "init" ) != 0 &&
462 strcmp( $2.data, "exec" ) != 0 &&
463 strcmp( $2.data, "eof" ) != 0 )
465 error( @2 ) << "unknown write command" << endl;
467 *outStream << " <write what=\"" << $2.data << "\">";
472 WriteOptions TK_Word {
474 *outStream << "<option>" << $2.data << "</option>";
479 KW_Access InlineBlock TK_Semi {
481 id->pd->accessExpr = $2;
485 KW_Variable InlineBlock TK_Semi {
487 if ( strcmp( $1.data, "curstate" ) == 0 ) {
488 id->pd->curStateExpr = $2;
493 /* Include statements are processed by both the scanner and the parser. */
495 IncludeKeyword OptSection OptFileName ';' {
497 doInclude( @1, $2.data, $3.data );
502 /* Do this immediately so that the scanner has a correct sense of the
503 * value in id->active when it reaches the end of the statement before
504 * the above action executes. */
505 //getParseData( @1 );
508 OptSection: TK_Word { $$ = $1; } | { $$.data = 0; $$.length = 0; };
509 OptFileName: TK_Literal { $$ = $1; } | { $$.data = 0; $$.length = 0; };
511 /* An assignement statement. Assigns the definition of a machine to a variable name. */
513 MachineName '=' Join ';' {
515 /* Main machine must be an instance. */
516 bool isInstance = false;
517 if ( strcmp($1.data, machineMain) == 0 ) {
518 warning(@1) << "main machine will be implicitly instantiated" << endl;
522 /* Generic creation of machine for instantiation and assignment. */
523 JoinOrLm *joinOrLm = new JoinOrLm( $3 );
524 tryMachineDef( @1, $1.data, joinOrLm, isInstance );
528 /* An instantiation statement. Instantiates a machine and assigns it to a
531 MachineName TK_ColonEquals JoinOrLm ';' {
532 /* Generic creation of machine for instantiation and assignment. */
534 tryMachineDef( @1, $1.data, $3, true );
537 /* Capture the machine name for making the machine's priority name. */
541 /* Make/get the priority key. The name may have already been referenced
542 * and therefore exist. */
543 PriorDictEl *priorDictEl;
544 if ( id->pd->priorDict.insert( $1.data, id->pd->nextPriorKey, &priorDictEl ) )
545 id->pd->nextPriorKey += 1;
546 id->pd->curDefPriorKey = priorDictEl->value;
548 /* Make/get the local error key. */
549 LocalErrDictEl *localErrDictEl;
550 if ( id->pd->localErrDict.insert( $1.data, id->pd->nextLocalErrKey, &localErrDictEl ) )
551 id->pd->nextLocalErrKey += 1;
552 id->pd->curDefLocalErrKey = localErrDictEl->value;
558 $$ = new JoinOrLm( $1 );
560 TK_BarStar LmPartList '*' '|' {
561 /* Create a new factor going to a longest match structure. Record
562 * in the parse data that we have a longest match. */
563 LongestMatch *lm = new LongestMatch( @1, $2 );
565 id->pd->lmList.append( lm );
566 for ( LmPartList::Iter lmp = *($2); lmp.lte(); lmp++ )
567 lmp->longestMatch = lm;
568 $$ = new JoinOrLm( lm );
572 Join ',' Expression {
573 /* Append the expression to the list and return it. */
574 $1->exprList.append( $3 );
578 /* Create the expression list with the intial expression. */
579 $$ = new Join( InputLoc(@1), $1 );
582 /* Top level production in the parse of a fsm. The lowest precedence
583 * is the '|' (or), '&' (intersection), and '-' (subtraction) operators. */
585 Expression '|' Term {
586 $$ = new Expression( $1, $3, Expression::OrType );
588 Expression '&' Term {
589 $$ = new Expression( $1, $3, Expression::IntersectType );
591 Expression '-' Term {
592 $$ = new Expression( $1, $3, Expression::SubtractType );
594 Expression TK_DashDash Term {
595 $$ = new Expression( $1, $3, Expression::StrongSubtractType );
598 $$ = new Expression( $1 );
602 Term FactorWithLabel {
603 $$ = new Term( $1, $2 );
605 Term '.' FactorWithLabel {
606 $$ = new Term( $1, $3 );
608 Term TK_ColonGt FactorWithLabel {
609 $$ = new Term( $1, $3, Term::RightStartType );
611 Term TK_ColonGtGt FactorWithLabel {
612 $$ = new Term( $1, $3, Term::RightFinishType );
614 Term TK_LtColon FactorWithLabel {
615 $$ = new Term( $1, $3, Term::LeftType );
622 TK_Word ':' FactorWithLabel {
623 /* Add the label to the list and pass the factor up. */
624 $3->labels.prepend( Label(@1, $1.data) );
630 FactorWithEp TK_Arrow LocalStateRef {
631 /* Add the target to the list and return the factor object. */
632 $1->epsilonLinks.append( EpsilonLink( InputLoc(@2), id->nameRef ) );
637 /* A local state reference. Qualified name witout :: prefix. */
639 NoNameSep StateRefNames;
641 /* Clear the name ref structure. */
647 /* A qualified state reference. */
649 OptNameSep StateRefNames;
651 /* Optional leading name separator. */
654 /* Insert an inition null pointer val to indicate the existence of the
655 * initial name seperator. */
656 id->nameRef.setAs( 0 );
662 /* List of names separated by :: */
664 StateRefNames TK_NameSep TK_Word {
665 id->nameRef.append( $3.data );
668 id->nameRef.append( $1.data );
671 /* Third group up in precedence. Allow users to embed actions and priorities */
673 FactorWithTransAction |
676 FactorWithToStateAction |
677 FactorWithFromStateAction |
678 FactorWithEOFAction |
679 FactorWithGblErrorAction |
680 FactorWithLocalErrorAction |
682 $$ = new FactorWithAug( $1 );
685 FactorWithTransAction:
686 FactorWithAug AugTypeBase ActionEmbed {
687 /* Append the action to the factorWithAug, record the refernce from
688 * factorWithAug to the action and pass up the factorWithAug. */
689 $1->actions.append( ParserAction( @2, $2, 0, $3 ) );
694 FactorWithAug AugTypeBase PriorityAug {
696 /* Append the named priority to the factorWithAug and pass it up. */
697 $1->priorityAugs.append( PriorityAug( $2, id->pd->curDefPriorKey, $3 ) );
701 FactorWithAug AugTypeBase '(' PriorityName ',' PriorityAug ')' {
702 /* Append the priority using a default name. */
703 $1->priorityAugs.append( PriorityAug( $2, $4, $6 ) );
708 FactorWithAug AugTypeCond ActionEmbed {
709 $$->conditions.append( ParserAction( @2, $2, 0, $3 ) );
714 TK_StartCond { $$ = at_start; } |
715 '>' KW_When { $$ = at_start; } |
716 TK_AllCond { $$ = at_all; } |
717 '$' KW_When { $$ = at_all; } |
718 TK_LeavingCond { $$ = at_leave; } |
719 '%' KW_When { $$ = at_all; } |
720 KW_When { $$ = at_all; };
722 FactorWithToStateAction:
723 FactorWithAug AugTypeToState ActionEmbed {
724 /* Append the action, pass it up. */
725 $1->actions.append( ParserAction( @2, $2, 0, $3 ) );
729 FactorWithFromStateAction:
730 FactorWithAug AugTypeFromState ActionEmbed {
731 /* Append the action, pass it up. */
732 $1->actions.append( ParserAction( @2, $2, 0, $3 ) );
737 FactorWithAug AugTypeEOF ActionEmbed {
738 /* Append the action, pass it up. */
739 $1->actions.append( ParserAction( @2, $2, 0, $3 ) );
743 FactorWithGblErrorAction:
744 FactorWithAug AugTypeGblError ActionEmbed {
746 /* Append the action to the factorWithAug, record the refernce from
747 * factorWithAug to the action and pass up the factorWithAug. */
748 $1->actions.append( ParserAction( @2, $2, id->pd->curDefLocalErrKey, $3 ) );
753 FactorWithLocalErrorAction:
754 FactorWithAug AugTypeLocalError ActionEmbed {
756 /* Append the action to the factorWithAug, record the refernce from
757 * factorWithAug to the action and pass up the factorWithAug. */
758 $1->actions.append( ParserAction( @2, $2, id->pd->curDefLocalErrKey, $3 ) );
762 FactorWithAug AugTypeLocalError '(' LocalErrName ',' ActionEmbed ')' {
763 /* Append the action to the factorWithAug, record the refernce from
764 * factorWithAug to the action and pass up the factorWithAug. */
765 $1->actions.append( ParserAction( @2, $2, $4, $6 ) );
769 /* A specified priority name. Looks up the name in the current priority
774 // Lookup/create the priority key.
775 PriorDictEl *priorDictEl;
776 if ( id->pd->priorDict.insert( $1.data, id->pd->nextPriorKey, &priorDictEl ) )
777 id->pd->nextPriorKey += 1;
779 // Use the inserted/found priority key.
780 $$ = priorDictEl->value;
787 /* Lookup/create the priority key. */
788 LocalErrDictEl *localErrDictEl;
789 if ( id->pd->localErrDict.insert( $1.data, id->pd->nextLocalErrKey, &localErrDictEl ) )
790 id->pd->nextLocalErrKey += 1;
792 /* Use the inserted/found priority key. */
793 $$ = localErrDictEl->value;
797 /* Priority change specs. */
800 // Convert the priority number to a long. Check for overflow.
802 int aug = strtol( $1.data, 0, 10 );
803 if ( errno == ERANGE && aug == LONG_MAX ) {
804 // Priority number too large. Recover by setting the priority to 0.
805 error(@1) << "priority number " << $1.data << " overflows" << endl;
808 else if ( errno == ERANGE && aug == LONG_MIN ) {
809 // Priority number too large in the neg. Recover by using 0.
810 error(@1) << "priority number " << $1.data << " underflows" << endl;
814 // No overflow or underflow.
830 /* Classes of transtions on which to embed actions or change priorities. */
832 '@' { $$ = at_finish; } |
833 '%' { $$ = at_leave; } |
834 '$' { $$ = at_all; } |
835 '>' { $$ = at_start; };
837 /* Global error actions. */
839 TK_StartGblError { $$ = at_start_gbl_error; } |
840 '>' KW_Err { $$ = at_start_gbl_error; } |
842 TK_NotStartGblError { $$ = at_not_start_gbl_error; } |
843 '<' KW_Err { $$ = at_not_start_gbl_error; } |
845 TK_AllGblError { $$ = at_all_gbl_error; } |
846 '$' KW_Err { $$ = at_all_gbl_error; } |
848 TK_FinalGblError { $$ = at_final_gbl_error; } |
849 '%' KW_Err { $$ = at_final_gbl_error; } |
851 TK_NotFinalGblError { $$ = at_not_final_gbl_error; } |
852 '@' KW_Err { $$ = at_not_final_gbl_error; } |
854 TK_MiddleGblError { $$ = at_middle_gbl_error; } |
855 TK_Middle KW_Err { $$ = at_middle_gbl_error; };
857 /* Local error actions. */
859 TK_StartLocalError { $$ = at_start_local_error; } |
860 '>' KW_Lerr { $$ = at_start_local_error; } |
862 TK_NotStartLocalError { $$ = at_not_start_local_error; } |
863 '<' KW_Lerr { $$ = at_not_start_local_error; } |
865 TK_AllLocalError { $$ = at_all_local_error; } |
866 '$' KW_Lerr { $$ = at_all_local_error; } |
868 TK_FinalLocalError { $$ = at_final_local_error; } |
869 '%' KW_Lerr { $$ = at_final_local_error; } |
871 TK_NotFinalLocalError { $$ = at_not_final_local_error; } |
872 '@' KW_Lerr { $$ = at_not_final_local_error; } |
874 TK_MiddleLocalError { $$ = at_middle_local_error; } |
875 TK_Middle KW_Lerr { $$ = at_middle_local_error; };
877 /* Eof state actions. */
879 TK_StartEOF { $$ = at_start_eof; } |
880 '>' KW_Eof { $$ = at_start_eof; } |
882 TK_NotStartEOF { $$ = at_not_start_eof; } |
883 '<' KW_Eof { $$ = at_not_start_eof; } |
885 TK_AllEOF { $$ = at_all_eof; } |
886 '$' KW_Eof { $$ = at_all_eof; } |
888 TK_FinalEOF { $$ = at_final_eof; } |
889 '%' KW_Eof { $$ = at_final_eof; } |
891 TK_NotFinalEOF { $$ = at_not_final_eof; } |
892 '@' KW_Eof { $$ = at_not_final_eof; } |
894 TK_MiddleEOF { $$ = at_middle_eof; } |
895 TK_Middle KW_Eof { $$ = at_middle_eof; };
897 /* To state actions. */
899 TK_StartToState { $$ = at_start_to_state; } |
900 '>' KW_To { $$ = at_start_to_state; } |
902 TK_NotStartToState { $$ = at_not_start_to_state; } |
903 '<' KW_To { $$ = at_not_start_to_state; } |
905 TK_AllToState { $$ = at_all_to_state; } |
906 '$' KW_To { $$ = at_all_to_state; } |
908 TK_FinalToState { $$ = at_final_to_state; } |
909 '%' KW_To { $$ = at_final_to_state; } |
911 TK_NotFinalToState { $$ = at_not_final_to_state; } |
912 '@' KW_To { $$ = at_not_final_to_state; } |
914 TK_MiddleToState { $$ = at_middle_to_state; } |
915 TK_Middle KW_To { $$ = at_middle_to_state; };
917 /* From state actions. */
919 TK_StartFromState { $$ = at_start_from_state; } |
920 '>' KW_From { $$ = at_start_from_state; } |
922 TK_NotStartFromState { $$ = at_not_start_from_state; } |
923 '<' KW_From { $$ = at_not_start_from_state; } |
925 TK_AllFromState { $$ = at_all_from_state; } |
926 '$' KW_From { $$ = at_all_from_state; } |
928 TK_FinalFromState { $$ = at_final_from_state; } |
929 '%' KW_From { $$ = at_final_from_state; } |
931 TK_NotFinalFromState { $$ = at_not_final_from_state; } |
932 '@' KW_From { $$ = at_not_final_from_state; } |
934 TK_MiddleFromState { $$ = at_middle_from_state; } |
935 TK_Middle KW_From { $$ = at_middle_from_state; };
938 /* Different ways to embed actions. A TK_Word is reference to an action given by
939 * the user as a statement in the fsm specification. An action can also be
940 * specified immediately. */
942 ActionEmbedWord | ActionEmbedBlock;
947 /* Set the name in the actionDict. */
948 Action *action = id->pd->actionDict.find( $1.data );
950 /* Pass up the action element */
954 /* Will recover by returning null as the action. */
955 error(@1) << "action lookup of \"" << $1.data << "\" failed" << endl;
962 '{' InlineBlock '}' {
964 /* Create the action, add it to the list and pass up. */
965 Action *newAction = new Action( InputLoc(@1), 0, $2, id->nameRefList );
966 id->pd->actionList.append( newAction );
971 /* The fourth level of precedence. These are the trailing unary operators that
972 * allow for repetition. */
975 $$ = new FactorWithRep( InputLoc(@2), $1, 0, 0,
976 FactorWithRep::StarType );
978 FactorWithRep TK_StarStar {
979 $$ = new FactorWithRep( InputLoc(@2), $1, 0, 0,
980 FactorWithRep::StarStarType );
983 $$ = new FactorWithRep( InputLoc(@2), $1, 0, 0,
984 FactorWithRep::OptionalType );
987 $$ = new FactorWithRep( InputLoc(@2), $1, 0, 0,
988 FactorWithRep::PlusType );
990 FactorWithRep TK_RepOpOpen FactorRepNum '}' {
991 $$ = new FactorWithRep( InputLoc(@2), $1, $3, 0,
992 FactorWithRep::ExactType );
994 FactorWithRep TK_RepOpOpen ',' FactorRepNum '}' {
995 $$ = new FactorWithRep( InputLoc(@2), $1, 0, $4,
996 FactorWithRep::MaxType );
998 FactorWithRep TK_RepOpOpen FactorRepNum ',' '}' {
999 $$ = new FactorWithRep( InputLoc(@2), $1, $3, 0,
1000 FactorWithRep::MinType );
1002 FactorWithRep TK_RepOpOpen FactorRepNum ',' FactorRepNum '}' {
1003 $$ = new FactorWithRep( InputLoc(@2), $1, $3, $5,
1004 FactorWithRep::RangeType );
1007 $$ = new FactorWithRep( InputLoc(@1), $1 );
1012 // Convert the priority number to a long. Check for overflow.
1014 int rep = strtol( $1.data, 0, 10 );
1015 if ( errno == ERANGE && rep == LONG_MAX ) {
1016 // Repetition too large. Recover by returing repetition 1. */
1017 error(@1) << "repetition number " << $1.data << " overflows" << endl;
1021 // Cannot be negative, so no overflow.
1026 /* The fifth level up in precedence. Negation. */
1029 $$ = new FactorWithNeg( InputLoc(@1), $2, FactorWithNeg::NegateType );
1032 $$ = new FactorWithNeg( InputLoc(@1), $2, FactorWithNeg::CharNegateType );
1035 $$ = new FactorWithNeg( InputLoc(@1), $1 );
1038 /* The highest level in precedence. Atomic machines such as references to other
1039 * machines, literal machines, regular expressions or Expressions in side of
1043 // Create a new factor node going to a concat literal. */
1044 $$ = new Factor( new Literal( InputLoc(@1), $1, Literal::LitString ) );
1047 // Create a new factor node going to a concat literal. */
1048 $$ = new Factor( new Literal( InputLoc(@1), $1, Literal::LitString ) );
1049 $$->literal->caseInsensitive = true;
1052 // Create a new factor node going to a literal number. */
1053 $$ = new Factor( new Literal( InputLoc(@1), $1, Literal::Number ) );
1057 // Find the named graph.
1058 GraphDictEl *gdNode = id->pd->graphDict.find( $1.data );
1059 if ( gdNode == 0 ) {
1060 // Recover by returning null as the factor node.
1061 error(@1) << "graph lookup of \"" << $1.data << "\" failed" << endl;
1064 else if ( gdNode->isInstance ) {
1065 // Recover by retuning null as the factor node.
1066 error(@1) << "references to graph instantiations not allowed "
1067 "in expressions" << endl;
1071 // Create a factor node that is a lookup of an expression.
1072 $$ = new Factor( InputLoc(@1), gdNode->value );
1076 RE_SqOpen RegularExprOrData RE_SqClose {
1077 // Create a new factor node going to an OR expression. */
1078 $$ = new Factor( new ReItem( InputLoc(@1), $2, ReItem::OrBlock ) );
1080 RE_SqOpenNeg RegularExprOrData RE_SqClose {
1081 // Create a new factor node going to a negated OR expression. */
1082 $$ = new Factor( new ReItem( InputLoc(@1), $2, ReItem::NegOrBlock ) );
1084 RE_Slash RegularExpr RE_Slash {
1085 if ( $3.length > 1 ) {
1086 for ( char *p = $3.data; *p != 0; p++ ) {
1088 $2->caseInsensitive = true;
1092 // Create a new factor node going to a regular exp.
1093 $$ = new Factor( $2 );
1095 RangeLit TK_DotDot RangeLit {
1096 // Create a new factor node going to a range. */
1097 $$ = new Factor( new Range( $1, $3 ) );
1100 /* Create a new factor going to a parenthesized join. */
1101 $$ = new Factor( $2 );
1104 /* Garble up to the closing brace of a parenthesized expression. */
1105 Factor: '(' error ')' { $$ = 0; yyerrok; };
1108 LmPartList LongestMatchPart {
1114 /* Create a new list with the part. */
1115 $$ = new LmPartList;
1121 ActionSpec { $$ = 0; } |
1122 Assignment { $$ = 0; } |
1123 Join OptLmPartAction ';' {
1126 Action *action = $2;
1128 action->isLmAction = true;
1129 $$ = new LongestMatchPart( $1, action, id->pd->nextLongestMatchId++ );
1134 TK_DoubleArrow ActionEmbed { $$ = $2; } |
1135 ActionEmbedBlock { $$ = $1; } |
1136 /* Nothing */ { $$ = 0; };
1139 /* Any form of a number that can be used as a basic machine. */
1150 InlineBlock InlineBlockItem {
1151 /* Append the item to the list, return the list. */
1156 /* Start with empty list. */
1157 $$ = new InlineList;
1160 /* Items in a struct block. */
1163 /* Add a text segment. */
1164 $$ = new InlineItem( @1, $1.data, InlineItem::Text );
1167 /* Add a text segment, need string on heap. */
1168 $$ = new InlineItem( @1, strdup($1.data), InlineItem::Text );
1170 InlineBlockInterpret {
1171 /* Pass the inline item up. */
1175 /* Uninteresting tokens in a struct block. Data allocated by scanner. */
1177 IL_WhiteSpace | IL_Comment | IL_Literal | IL_Symbol |
1178 TK_UInt | TK_Hex | TK_Word;
1180 /* Symbols in a struct block, no data allocated. */
1182 ',' { $$.data = ","; $$.length = 1; } |
1183 ';' { $$.data = ";"; $$.length = 1; } |
1184 '(' { $$.data = "("; $$.length = 1; } |
1185 ')' { $$.data = ")"; $$.length = 1; } |
1186 '*' { $$.data = "*"; $$.length = 1; } |
1187 TK_NameSep { $$.data = "::"; $$.length = 2; };
1189 /* Interpreted statements in a struct block. */
1190 InlineBlockInterpret:
1191 InlineExprInterpret {
1192 /* Pass up interpreted items of inline expressions. */
1195 KW_Hold SetNoWs ';' SetWs {
1196 $$ = new InlineItem( @1, InlineItem::Hold );
1198 KW_Exec SetNoWs InlineExpr ';' SetWs {
1199 $$ = new InlineItem( @1, InlineItem::Exec );
1202 KW_Goto SetNoWs StateRef ';' SetWs {
1203 $$ = new InlineItem( @1, new NameRef(id->nameRef), InlineItem::Goto );
1205 KW_Goto SetNoWs '*' SetWs InlineExpr ';' {
1206 $$ = new InlineItem( @1, InlineItem::GotoExpr );
1209 KW_Next SetNoWs StateRef ';' SetWs {
1210 $$ = new InlineItem( @1, new NameRef(id->nameRef), InlineItem::Next );
1212 KW_Next SetNoWs '*' SetWs InlineExpr ';' {
1213 $$ = new InlineItem( @1, InlineItem::NextExpr );
1216 KW_Call SetNoWs StateRef ';' SetWs {
1217 $$ = new InlineItem( @1, new NameRef(id->nameRef), InlineItem::Call );
1219 KW_Call SetNoWs '*' SetWs InlineExpr ';' {
1220 $$ = new InlineItem( @1, InlineItem::CallExpr );
1223 KW_Ret SetNoWs ';' SetWs {
1224 $$ = new InlineItem( @1, InlineItem::Ret );
1226 KW_Break SetNoWs ';' SetWs {
1227 $$ = new InlineItem( @1, InlineItem::Break );
1230 /* Turn off whitspace collecting when scanning inline blocks. */
1231 SetNoWs: { inlineWhitespace = false; };
1233 /* Turn on whitespace collecting when scanning inline blocks. */
1234 SetWs: { inlineWhitespace = true; };
1237 InlineExpr InlineExprItem {
1242 /* Init the list used for this expr. */
1243 $$ = new InlineList;
1248 /* Return a text segment. */
1249 $$ = new InlineItem( @1, $1.data, InlineItem::Text );
1252 /* Return a text segment, must heap alloc the text. */
1253 $$ = new InlineItem( @1, strdup($1.data), InlineItem::Text );
1255 InlineExprInterpret {
1256 /* Pass the inline item up. */
1260 InlineExprInterpret:
1262 $$ = new InlineItem( @1, InlineItem::PChar );
1265 $$ = new InlineItem( @1, InlineItem::Char );
1268 $$ = new InlineItem( @1, InlineItem::Curs );
1271 $$ = new InlineItem( @1, InlineItem::Targs );
1273 KW_Entry SetNoWs '(' StateRef ')' SetWs {
1274 $$ = new InlineItem( @1, new NameRef(id->nameRef), InlineItem::Entry );
1278 IL_WhiteSpace | IL_Comment | IL_Literal | IL_Symbol |
1279 TK_UInt | TK_Hex | TK_Word;
1281 /* Anything in a ExecValExpr that is not dynamically allocated. This includes
1282 * all special symbols caught in inline code except the semi. */
1284 '(' { $$.data = "("; $$.length = 1; } |
1285 ')' { $$.data = ")"; $$.length = 1; } |
1286 '*' { $$.data = "*"; $$.length = 1; } |
1287 TK_NameSep { $$.data = "::"; $$.length = 1; };
1289 /* Parser for regular expression fsms. Any number of expression items which
1290 * generally gives a machine one character long or one character long stared. */
1292 RegularExpr RegularExprItem {
1293 // An optimization to lessen the tree size. If a non-starred char is directly
1294 // under the left side on the right and the right side is another non-starred
1295 // char then paste them together and return the left side. Otherwise
1296 // just put the two under a new reg exp node.
1297 if ( $2->type == ReItem::Data && !$2->star &&
1298 $1->type == RegExpr::RecurseItem &&
1299 $1->item->type == ReItem::Data && !$1->item->star )
1301 // Append the right side to the right side of the left and toss
1303 $1->item->data.append( $2->data );
1308 $$ = new RegExpr( $1, $2 );
1312 // Can't optimize the tree.
1316 /* RegularExprItems can be a character spec with an optional staring of the char. */
1318 RegularExprChar RE_Star {
1326 /* A character spec can be a set of characters inside of square parenthesis,
1327 * a dot specifying any character or some explicitly stated character. */
1329 RE_SqOpen RegularExprOrData RE_SqClose {
1330 $$ = new ReItem( InputLoc(@1), $2, ReItem::OrBlock );
1332 RE_SqOpenNeg RegularExprOrData RE_SqClose {
1333 $$ = new ReItem( InputLoc(@1), $2, ReItem::NegOrBlock );
1336 $$ = new ReItem( InputLoc(@1), ReItem::Dot );
1339 $$ = new ReItem( InputLoc(@1), $1.data[0] );
1342 /* The data inside of a [] expression in a regular expression. Accepts any
1343 * number of characters or ranges. */
1345 RegularExprOrData RegularExprOrChar {
1346 // An optimization to lessen the tree size. If an or char is directly
1347 // under the left side on the right and the right side is another or
1348 // char then paste them together and return the left side. Otherwise
1349 // just put the two under a new or data node.
1350 if ( $2->type == ReOrItem::Data &&
1351 $1->type == ReOrBlock::RecurseItem &&
1352 $1->item->type == ReOrItem::Data )
1354 // Append the right side to right side of the left and toss
1356 $1->item->data.append( $2->data );
1361 // Can't optimize, put the left and right under a new node.
1362 $$ = new ReOrBlock( $1, $2 );
1366 $$ = new ReOrBlock();
1370 /* A single character inside of an or expression. Can either be a character
1371 * or a set of characters. */
1374 $$ = new ReOrItem( InputLoc(@1), $1.data[0] );
1376 RE_Char RE_Dash RE_Char {
1377 $$ = new ReOrItem( InputLoc(@2), $1.data[0], $3.data[0] );
1382 // Range literas must have only one char.
1383 if ( strlen($1.data) != 1 ) {
1384 // Recover by using the literal anyways.
1385 error(@1) << "literal used in range must be of length 1" << endl;
1387 $$ = new Literal( InputLoc(@1), $1, Literal::LitString );
1390 // Create a new literal number.
1391 $$ = new Literal( InputLoc(@1), $1, Literal::Number );
1396 /* Try to do a definition, common to assignment and instantiation. Warns about
1397 * instances other than main not being implemented yet. */
1398 void tryMachineDef( const YYLTYPE &loc, char *name, JoinOrLm *joinOrLm, bool isInstance )
1400 GraphDictEl *newEl = id->pd->graphDict.insert( name );
1402 /* New element in the dict, all good. */
1403 newEl->value = new VarDef( name, joinOrLm );
1404 newEl->isInstance = isInstance;
1407 /* It it is an instance, put on the instance list. */
1409 id->pd->instanceList.append( newEl );
1412 // Recover by ignoring the duplicate.
1413 error(loc) << "fsm \"" << name << "\" previously defined" << endl;
1417 void doInclude( const InputLoc &loc, char *sectionName, char *inputFile )
1419 /* Bail if we hit the max include depth. */
1420 if ( includeDepth == INCLUDE_STACK_SIZE ) {
1421 error(loc) << "hit maximum include depth of " << INCLUDE_STACK_SIZE << endl;
1424 char *includeTo = id->pd->fsmName;
1426 /* Implement defaults for the input file and section name. */
1427 if ( inputFile == 0 )
1428 inputFile = id->fileName;
1429 if ( sectionName == 0 )
1430 sectionName = id->pd->fsmName;
1432 /* Parse the included file. */
1433 InputData *oldId = id;
1434 id = new InputData( inputFile, sectionName, includeTo );
1445 if ( ! sectionOpened ) {
1446 sectionOpened = true;
1447 *outStream << "<ragel_def name=\"" << id->pd->fsmName << "\">\n";
1451 void yyerror( char *err )
1453 /* Bison won't give us the location, but in the last call to the scanner we
1454 * saved a pointer to the location variable. Use that. instead. */
1455 error(::yylloc->first_line, ::yylloc->first_column) << err << endl;