2 * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
33 /* Types of builtin machines. */
71 struct LongestMatchPart;
75 /* Type of augmentation. Describes locations in the machine. */
78 /* Transition actions/priorities. */
84 /* Global error actions. */
88 at_not_start_gbl_error,
89 at_not_final_gbl_error,
92 /* Local error actions. */
96 at_not_start_local_error,
97 at_not_final_local_error,
98 at_middle_local_error,
100 /* To State Action embedding. */
104 at_not_start_to_state,
105 at_not_final_to_state,
108 /* From State Action embedding. */
112 at_not_start_from_state,
113 at_not_final_from_state,
114 at_middle_from_state,
116 /* EOF Action embedding. */
125 /* IMPORTANT: These must follow the same order as the state augs in AugType
126 * since we will be using this to compose AugType. */
143 struct ExplicitMachine;
147 /* Reference to a named state. */
148 typedef Vector<char*> NameRef;
149 typedef Vector<NameRef*> NameRefList;
150 typedef Vector<NameInst*> NameTargList;
152 /* Structure for storing location of epsilon transitons. */
155 EpsilonLink( const InputLoc &loc, NameRef &target )
156 : loc(loc), target(target) { }
164 Label( const InputLoc &loc, char *data )
165 : loc(loc), data(data) { }
171 /* Structrue represents an action assigned to some FactorWithAug node. The
172 * factor with aug will keep an array of these. */
175 ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action )
176 : loc(loc), type(type), localErrKey(localErrKey), action(action) { }
190 void prepareLitString( Token &result, bool &caseInsensitive );
191 void append( const Token &other );
192 void set( char *str, int len );
195 /* Store the value and type of a priority augmentation. */
198 PriorityAug( AugType type, int priorKey, int priorValue ) :
199 type(type), priorKey(priorKey), priorValue(priorValue) { }
207 * A Variable Definition
211 VarDef( char *name, JoinOrLm *joinOrLm )
212 : name(name), joinOrLm(joinOrLm) { }
214 /* Parse tree traversal. */
215 FsmAp *walk( ParseData *pd );
216 void makeNameTree( const InputLoc &loc, ParseData *pd );
217 void resolveNameRefs( ParseData *pd );
227 * Wherever possible the item match will execute on the character. If not
228 * possible the item match will execute on a lookahead character and either
229 * hold the current char (if one away) or backup.
231 * How to handle the problem of backing up over a buffer break?
233 * Don't want to use pending out transitions for embedding item match because
234 * the role of item match action is different: it may sometimes match on the
235 * final transition, or may match on a lookahead character.
237 * Don't want to invent a new operator just for this. So just trail action
238 * after machine, this means we can only use literal actions.
240 * The item action may
242 * What states of the machine will be final. The item actions that wrap around
243 * on the last character will go straight to the start state.
245 * Some transitions will be lookahead transitions, they will hold the current
246 * character. Crossing them with regular transitions must be restricted
247 * because it does not make sense. The transition cannot simultaneously hold
248 * and consume the current character.
250 struct LongestMatchPart
252 LongestMatchPart( Join *join, Action *action,
253 InputLoc &semiLoc, int longestMatchId )
255 join(join), action(action), semiLoc(semiLoc),
256 longestMatchId(longestMatchId), inLmSelect(false) { }
267 Action *actLagBehind;
270 LongestMatch *longestMatch;
272 LongestMatchPart *prev, *next;
275 /* Declare a new type so that ptreetypes.h need not include dlist.h. */
276 struct LmPartList : DList<LongestMatchPart> {};
280 /* Construct with a list of joins */
281 LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) :
282 loc(loc), longestMatchList(longestMatchList), name(0),
283 lmSwitchHandlesError(false) { }
285 /* Tree traversal. */
286 FsmAp *walk( ParseData *pd );
287 void makeNameTree( ParseData *pd );
288 void resolveNameRefs( ParseData *pd );
289 void runLonestMatch( ParseData *pd, FsmAp *graph );
290 Action *newAction( ParseData *pd, const InputLoc &loc, char *name,
291 InlineList *inlineList );
292 void makeActions( ParseData *pd );
293 void findName( ParseData *pd );
294 void restart( FsmAp *graph, TransAp *trans );
297 LmPartList *longestMatchList;
301 bool lmSwitchHandlesError;
303 LongestMatch *next, *prev;
307 /* List of Expressions. */
308 typedef DList<Expression> ExprList;
317 JoinOrLm( Join *join ) :
318 join(join), type(JoinType) {}
319 JoinOrLm( LongestMatch *longestMatch ) :
320 longestMatch(longestMatch), type(LongestMatchType) {}
322 FsmAp *walk( ParseData *pd );
323 void makeNameTree( ParseData *pd );
324 void resolveNameRefs( ParseData *pd );
327 LongestMatch *longestMatch;
336 /* Construct with the first expression. */
337 Join( Expression *expr );
338 Join( const InputLoc &loc, Expression *expr );
340 /* Tree traversal. */
341 FsmAp *walk( ParseData *pd );
342 FsmAp *walkJoin( ParseData *pd );
343 void makeNameTree( ParseData *pd );
344 void resolveNameRefs( ParseData *pd );
365 /* Construct with an expression on the left and a term on the right. */
366 Expression( Expression *expression, Term *term, Type type ) :
367 expression(expression), term(term),
368 builtin(builtin), type(type), prev(this), next(this) { }
370 /* Construct with only a term. */
371 Expression( Term *term ) :
372 expression(0), term(term), builtin(builtin),
373 type(TermType) , prev(this), next(this) { }
375 /* Construct with a builtin type. */
376 Expression( BuiltinMachine builtin ) :
377 expression(0), term(0), builtin(builtin),
378 type(BuiltinType), prev(this), next(this) { }
382 /* Tree traversal. */
383 FsmAp *walk( ParseData *pd, bool lastInSeq = true );
384 void makeNameTree( ParseData *pd );
385 void resolveNameRefs( ParseData *pd );
388 Expression *expression;
390 BuiltinMachine builtin;
393 Expression *prev, *next;
409 Term( Term *term, FactorWithAug *factorWithAug ) :
410 term(term), factorWithAug(factorWithAug), type(ConcatType) { }
412 Term( Term *term, FactorWithAug *factorWithAug, Type type ) :
413 term(term), factorWithAug(factorWithAug), type(type) { }
415 Term( FactorWithAug *factorWithAug ) :
416 term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { }
420 FsmAp *walk( ParseData *pd, bool lastInSeq = true );
421 void makeNameTree( ParseData *pd );
422 void resolveNameRefs( ParseData *pd );
425 FactorWithAug *factorWithAug;
428 /* Priority descriptor for RightFinish type. */
429 PriorDesc priorDescs[2];
433 /* Third level of precedence. Augmenting nodes with actions and priorities. */
436 FactorWithAug( FactorWithRep *factorWithRep ) :
437 priorDescs(0), factorWithRep(factorWithRep) { }
440 /* Tree traversal. */
441 FsmAp *walk( ParseData *pd );
442 void makeNameTree( ParseData *pd );
443 void resolveNameRefs( ParseData *pd );
445 void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd );
446 void assignPriorities( FsmAp *graph, int *priorOrd );
448 void assignConditions( FsmAp *graph );
450 /* Actions and priorities assigned to the factor node. */
451 Vector<ParserAction> actions;
452 Vector<PriorityAug> priorityAugs;
453 PriorDesc *priorDescs;
454 Vector<Label> labels;
455 Vector<EpsilonLink> epsilonLinks;
456 Vector<ParserAction> conditions;
458 FactorWithRep *factorWithRep;
461 /* Fourth level of precedence. Trailing unary operators. Provide kleen star,
462 * optional and plus. */
477 FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep,
478 int lowerRep, int upperRep, Type type ) :
479 loc(loc), factorWithRep(factorWithRep),
480 factorWithNeg(0), lowerRep(lowerRep),
481 upperRep(upperRep), type(type) { }
483 FactorWithRep( FactorWithNeg *factorWithNeg )
484 : factorWithNeg(factorWithNeg), type(FactorWithNegType) { }
488 /* Tree traversal. */
489 FsmAp *walk( ParseData *pd );
490 void makeNameTree( ParseData *pd );
491 void resolveNameRefs( ParseData *pd );
494 FactorWithRep *factorWithRep;
495 FactorWithNeg *factorWithNeg;
496 int lowerRep, upperRep;
499 /* Priority descriptor for StarStar type. */
500 PriorDesc priorDescs[2];
503 /* Fifth level of precedence. Provides Negation. */
512 FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) :
513 loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { }
515 FactorWithNeg( Factor *factor ) :
516 factorWithNeg(0), factor(factor), type(FactorType) { }
520 /* Tree traversal. */
521 FsmAp *walk( ParseData *pd );
522 void makeNameTree( ParseData *pd );
523 void resolveNameRefs( ParseData *pd );
526 FactorWithNeg *factorWithNeg;
536 /* Language elements a factor node can be. */
547 /* Construct with a literal fsm. */
548 Factor( Literal *literal ) :
549 literal(literal), type(LiteralType) { }
551 /* Construct with a range. */
552 Factor( Range *range ) :
553 range(range), type(RangeType) { }
555 /* Construct with the or part of a regular expression. */
556 Factor( ReItem *reItem ) :
557 reItem(reItem), type(OrExprType) { }
559 /* Construct with a regular expression. */
560 Factor( RegExpr *regExpr ) :
561 regExpr(regExpr), type(RegExprType) { }
563 /* Construct with a reference to a var def. */
564 Factor( const InputLoc &loc, VarDef *varDef ) :
565 loc(loc), varDef(varDef), type(ReferenceType) {}
567 /* Construct with a parenthesized join. */
568 Factor( Join *join ) :
569 join(join), type(ParenType) {}
571 /* Construct with a longest match operator. */
572 Factor( LongestMatch *longestMatch ) :
573 longestMatch(longestMatch), type(LongestMatchType) {}
578 /* Tree traversal. */
579 FsmAp *walk( ParseData *pd );
580 void makeNameTree( ParseData *pd );
581 void resolveNameRefs( ParseData *pd );
590 LongestMatch *longestMatch;
595 /* A range machine. Only ever composed of two literals. */
598 Range( Literal *lowerLit, Literal *upperLit )
599 : lowerLit(lowerLit), upperLit(upperLit) { }
602 FsmAp *walk( ParseData *pd );
603 bool verifyRangeFsm( FsmAp *rangeEnd );
609 /* Some literal machine. Can be a number or literal string. */
612 enum LiteralType { Number, LitString };
614 Literal( const Token &token, LiteralType type )
615 : token(token), type(type) { }
617 FsmAp *walk( ParseData *pd );
623 /* Regular expression. */
626 enum RegExpType { RecurseItem, Empty };
630 type(Empty), caseInsensitive(false) { }
631 RegExpr(RegExpr *regExpr, ReItem *item) :
632 regExpr(regExpr), item(item),
633 type(RecurseItem), caseInsensitive(false) { }
636 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
641 bool caseInsensitive;
644 /* An item in a regular expression. */
647 enum ReItemType { Data, Dot, OrBlock, NegOrBlock };
649 ReItem( const InputLoc &loc, const Token &token )
650 : loc(loc), token(token), star(false), type(Data) { }
651 ReItem( const InputLoc &loc, ReItemType type )
652 : loc(loc), star(false), type(type) { }
653 ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type )
654 : loc(loc), orBlock(orBlock), star(false), type(type) { }
657 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
666 /* An or block item. */
669 enum ReOrBlockType { RecurseItem, Empty };
674 ReOrBlock(ReOrBlock *orBlock, ReOrItem *item)
675 : orBlock(orBlock), item(item), type(RecurseItem) { }
678 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
685 /* An item in an or block. */
688 enum ReOrItemType { Data, Range };
690 ReOrItem( const InputLoc &loc, const Token &token )
691 : loc(loc), token(token), type(Data) {}
692 ReOrItem( const InputLoc &loc, char lower, char upper )
693 : loc(loc), lower(lower), upper(upper), type(Range) { }
695 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
713 Text, Goto, Call, Next, GotoExpr, CallExpr, NextExpr, Ret, PChar,
714 Char, Hold, Curs, Targs, Entry, Exec, LmSwitch, LmSetActId,
715 LmSetTokEnd, LmOnLast, LmOnNext, LmOnLagBehind, LmInitAct,
716 LmInitTokStart, LmSetTokStart, Break
719 InlineItem( const InputLoc &loc, char *data, Type type ) :
720 loc(loc), data(data), nameRef(0), children(0), type(type) { }
722 InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) :
723 loc(loc), data(0), nameRef(nameRef), children(0), type(type) { }
725 InlineItem( const InputLoc &loc, LongestMatch *longestMatch,
726 LongestMatchPart *longestMatchPart, Type type ) : loc(loc), data(0),
727 nameRef(0), children(0), longestMatch(longestMatch),
728 longestMatchPart(longestMatchPart), type(type) { }
730 InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) :
731 loc(loc), data(0), nameRef(0), nameTarg(nameTarg), children(0),
734 InlineItem( const InputLoc &loc, Type type ) :
735 loc(loc), data(0), nameRef(0), children(0), type(type) { }
741 InlineList *children;
742 LongestMatch *longestMatch;
743 LongestMatchPart *longestMatchPart;
746 InlineItem *prev, *next;
749 /* Normally this would be atypedef, but that would entail including DList from
750 * ptreetypes, which should be just typedef forwards. */
751 struct InlineList : public DList<InlineItem> { };
755 #endif /* _PARSETREE_H */