2 * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
33 /* Types of builtin machines. */
71 struct LongestMatchPart;
75 /* Type of augmentation. Describes locations in the machine. */
78 /* Transition actions/priorities. */
84 /* Global error actions. */
88 at_not_start_gbl_error,
89 at_not_final_gbl_error,
92 /* Local error actions. */
96 at_not_start_local_error,
97 at_not_final_local_error,
98 at_middle_local_error,
100 /* To State Action embedding. */
104 at_not_start_to_state,
105 at_not_final_to_state,
108 /* From State Action embedding. */
112 at_not_start_from_state,
113 at_not_final_from_state,
114 at_middle_from_state,
116 /* EOF Action embedding. */
125 /* IMPORTANT: These must follow the same order as the state augs in AugType
126 * since we will be using this to compose AugType. */
143 struct ExplicitMachine;
147 /* Reference to a named state. */
148 typedef Vector<char*> NameRef;
149 typedef Vector<NameRef*> NameRefList;
150 typedef Vector<NameInst*> NameTargList;
152 /* Structure for storing location of epsilon transitons. */
155 EpsilonLink( const InputLoc &loc, NameRef &target )
156 : loc(loc), target(target) { }
164 Label( const InputLoc &loc, char *data )
165 : loc(loc), data(data) { }
171 /* Structrue represents an action assigned to some FactorWithAug node. The
172 * factor with aug will keep an array of these. */
175 ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action )
176 : loc(loc), type(type), localErrKey(localErrKey), action(action) { }
190 void prepareLitString( Token &result, bool &caseInsensitive );
191 void append( const Token &other );
192 void set( char *str, int len );
195 /* Store the value and type of a priority augmentation. */
198 PriorityAug( AugType type, int priorKey, int priorValue ) :
199 type(type), priorKey(priorKey), priorValue(priorValue) { }
207 * A Variable Definition
211 VarDef( char *name, JoinOrLm *joinOrLm )
212 : name(name), joinOrLm(joinOrLm) { }
214 /* Parse tree traversal. */
215 FsmAp *walk( ParseData *pd );
216 void makeNameTree( const InputLoc &loc, ParseData *pd );
217 void resolveNameRefs( ParseData *pd );
230 * Wherever possible the item match will execute on the character. If not
231 * possible the item match will execute on a lookahead character and either
232 * hold the current char (if one away) or backup.
234 * How to handle the problem of backing up over a buffer break?
236 * Don't want to use pending out transitions for embedding item match because
237 * the role of item match action is different: it may sometimes match on the
238 * final transition, or may match on a lookahead character.
240 * Don't want to invent a new operator just for this. So just trail action
241 * after machine, this means we can only use literal actions.
243 * The item action may
245 * What states of the machine will be final. The item actions that wrap around
246 * on the last character will go straight to the start state.
248 * Some transitions will be lookahead transitions, they will hold the current
249 * character. Crossing them with regular transitions must be restricted
250 * because it does not make sense. The transition cannot simultaneously hold
251 * and consume the current character.
253 struct LongestMatchPart
255 LongestMatchPart( Join *join, Action *action,
256 InputLoc &semiLoc, int longestMatchId )
258 join(join), action(action), semiLoc(semiLoc),
259 longestMatchId(longestMatchId), inLmSelect(false) { }
270 Action *actLagBehind;
273 LongestMatch *longestMatch;
275 LongestMatchPart *prev, *next;
278 /* Declare a new type so that ptreetypes.h need not include dlist.h. */
279 struct LmPartList : DList<LongestMatchPart> {};
283 /* Construct with a list of joins */
284 LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) :
285 loc(loc), longestMatchList(longestMatchList), name(0),
286 lmSwitchHandlesError(false) { }
288 /* Tree traversal. */
289 FsmAp *walk( ParseData *pd );
290 void makeNameTree( ParseData *pd );
291 void resolveNameRefs( ParseData *pd );
292 void runLonestMatch( ParseData *pd, FsmAp *graph );
293 Action *newAction( ParseData *pd, const InputLoc &loc, char *name,
294 InlineList *inlineList );
295 void makeActions( ParseData *pd );
296 void findName( ParseData *pd );
297 void restart( FsmAp *graph, TransAp *trans );
300 LmPartList *longestMatchList;
304 bool lmSwitchHandlesError;
306 LongestMatch *next, *prev;
310 /* List of Expressions. */
311 typedef DList<Expression> ExprList;
320 JoinOrLm( Join *join ) :
321 join(join), type(JoinType) {}
322 JoinOrLm( LongestMatch *longestMatch ) :
323 longestMatch(longestMatch), type(LongestMatchType) {}
325 FsmAp *walk( ParseData *pd );
326 void makeNameTree( ParseData *pd );
327 void resolveNameRefs( ParseData *pd );
330 LongestMatch *longestMatch;
339 /* Construct with the first expression. */
340 Join( Expression *expr );
341 Join( const InputLoc &loc, Expression *expr );
343 /* Tree traversal. */
344 FsmAp *walk( ParseData *pd );
345 FsmAp *walkJoin( ParseData *pd );
346 void makeNameTree( ParseData *pd );
347 void resolveNameRefs( ParseData *pd );
368 /* Construct with an expression on the left and a term on the right. */
369 Expression( Expression *expression, Term *term, Type type ) :
370 expression(expression), term(term),
371 builtin(builtin), type(type), prev(this), next(this) { }
373 /* Construct with only a term. */
374 Expression( Term *term ) :
375 expression(0), term(term), builtin(builtin),
376 type(TermType) , prev(this), next(this) { }
378 /* Construct with a builtin type. */
379 Expression( BuiltinMachine builtin ) :
380 expression(0), term(0), builtin(builtin),
381 type(BuiltinType), prev(this), next(this) { }
385 /* Tree traversal. */
386 FsmAp *walk( ParseData *pd, bool lastInSeq = true );
387 void makeNameTree( ParseData *pd );
388 void resolveNameRefs( ParseData *pd );
391 Expression *expression;
393 BuiltinMachine builtin;
396 Expression *prev, *next;
412 Term( Term *term, FactorWithAug *factorWithAug ) :
413 term(term), factorWithAug(factorWithAug), type(ConcatType) { }
415 Term( Term *term, FactorWithAug *factorWithAug, Type type ) :
416 term(term), factorWithAug(factorWithAug), type(type) { }
418 Term( FactorWithAug *factorWithAug ) :
419 term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { }
423 FsmAp *walk( ParseData *pd, bool lastInSeq = true );
424 void makeNameTree( ParseData *pd );
425 void resolveNameRefs( ParseData *pd );
428 FactorWithAug *factorWithAug;
431 /* Priority descriptor for RightFinish type. */
432 PriorDesc priorDescs[2];
436 /* Third level of precedence. Augmenting nodes with actions and priorities. */
439 FactorWithAug( FactorWithRep *factorWithRep ) :
440 priorDescs(0), factorWithRep(factorWithRep) { }
443 /* Tree traversal. */
444 FsmAp *walk( ParseData *pd );
445 void makeNameTree( ParseData *pd );
446 void resolveNameRefs( ParseData *pd );
448 void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd );
449 void assignPriorities( FsmAp *graph, int *priorOrd );
451 void assignConditions( FsmAp *graph );
453 /* Actions and priorities assigned to the factor node. */
454 Vector<ParserAction> actions;
455 Vector<PriorityAug> priorityAugs;
456 PriorDesc *priorDescs;
457 Vector<Label> labels;
458 Vector<EpsilonLink> epsilonLinks;
459 Vector<ParserAction> conditions;
461 FactorWithRep *factorWithRep;
464 /* Fourth level of precedence. Trailing unary operators. Provide kleen star,
465 * optional and plus. */
480 FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep,
481 int lowerRep, int upperRep, Type type ) :
482 loc(loc), factorWithRep(factorWithRep),
483 factorWithNeg(0), lowerRep(lowerRep),
484 upperRep(upperRep), type(type) { }
486 FactorWithRep( FactorWithNeg *factorWithNeg )
487 : factorWithNeg(factorWithNeg), type(FactorWithNegType) { }
491 /* Tree traversal. */
492 FsmAp *walk( ParseData *pd );
493 void makeNameTree( ParseData *pd );
494 void resolveNameRefs( ParseData *pd );
497 FactorWithRep *factorWithRep;
498 FactorWithNeg *factorWithNeg;
499 int lowerRep, upperRep;
502 /* Priority descriptor for StarStar type. */
503 PriorDesc priorDescs[2];
506 /* Fifth level of precedence. Provides Negation. */
515 FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) :
516 loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { }
518 FactorWithNeg( Factor *factor ) :
519 factorWithNeg(0), factor(factor), type(FactorType) { }
523 /* Tree traversal. */
524 FsmAp *walk( ParseData *pd );
525 void makeNameTree( ParseData *pd );
526 void resolveNameRefs( ParseData *pd );
529 FactorWithNeg *factorWithNeg;
539 /* Language elements a factor node can be. */
550 /* Construct with a literal fsm. */
551 Factor( Literal *literal ) :
552 literal(literal), type(LiteralType) { }
554 /* Construct with a range. */
555 Factor( Range *range ) :
556 range(range), type(RangeType) { }
558 /* Construct with the or part of a regular expression. */
559 Factor( ReItem *reItem ) :
560 reItem(reItem), type(OrExprType) { }
562 /* Construct with a regular expression. */
563 Factor( RegExpr *regExpr ) :
564 regExpr(regExpr), type(RegExprType) { }
566 /* Construct with a reference to a var def. */
567 Factor( const InputLoc &loc, VarDef *varDef ) :
568 loc(loc), varDef(varDef), type(ReferenceType) {}
570 /* Construct with a parenthesized join. */
571 Factor( Join *join ) :
572 join(join), type(ParenType) {}
574 /* Construct with a longest match operator. */
575 Factor( LongestMatch *longestMatch ) :
576 longestMatch(longestMatch), type(LongestMatchType) {}
581 /* Tree traversal. */
582 FsmAp *walk( ParseData *pd );
583 void makeNameTree( ParseData *pd );
584 void resolveNameRefs( ParseData *pd );
593 LongestMatch *longestMatch;
598 /* A range machine. Only ever composed of two literals. */
601 Range( Literal *lowerLit, Literal *upperLit )
602 : lowerLit(lowerLit), upperLit(upperLit) { }
605 FsmAp *walk( ParseData *pd );
611 /* Some literal machine. Can be a number or literal string. */
614 enum LiteralType { Number, LitString };
616 Literal( const Token &token, LiteralType type )
617 : token(token), type(type) { }
619 FsmAp *walk( ParseData *pd );
625 /* Regular expression. */
628 enum RegExpType { RecurseItem, Empty };
632 type(Empty), caseInsensitive(false) { }
633 RegExpr(RegExpr *regExpr, ReItem *item) :
634 regExpr(regExpr), item(item),
635 type(RecurseItem), caseInsensitive(false) { }
638 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
643 bool caseInsensitive;
646 /* An item in a regular expression. */
649 enum ReItemType { Data, Dot, OrBlock, NegOrBlock };
651 ReItem( const InputLoc &loc, const Token &token )
652 : loc(loc), token(token), star(false), type(Data) { }
653 ReItem( const InputLoc &loc, ReItemType type )
654 : loc(loc), star(false), type(type) { }
655 ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type )
656 : loc(loc), orBlock(orBlock), star(false), type(type) { }
659 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
668 /* An or block item. */
671 enum ReOrBlockType { RecurseItem, Empty };
676 ReOrBlock(ReOrBlock *orBlock, ReOrItem *item)
677 : orBlock(orBlock), item(item), type(RecurseItem) { }
680 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
687 /* An item in an or block. */
690 enum ReOrItemType { Data, Range };
692 ReOrItem( const InputLoc &loc, const Token &token )
693 : loc(loc), token(token), type(Data) {}
694 ReOrItem( const InputLoc &loc, char lower, char upper )
695 : loc(loc), lower(lower), upper(upper), type(Range) { }
697 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
715 Text, Goto, Call, Next, GotoExpr, CallExpr, NextExpr, Ret, PChar,
716 Char, Hold, Curs, Targs, Entry, Exec, LmSwitch, LmSetActId,
717 LmSetTokEnd, LmOnLast, LmOnNext, LmOnLagBehind, LmInitAct,
718 LmInitTokStart, LmSetTokStart, Break
721 InlineItem( const InputLoc &loc, char *data, Type type ) :
722 loc(loc), data(data), nameRef(0), children(0), type(type) { }
724 InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) :
725 loc(loc), data(0), nameRef(nameRef), children(0), type(type) { }
727 InlineItem( const InputLoc &loc, LongestMatch *longestMatch,
728 LongestMatchPart *longestMatchPart, Type type ) : loc(loc), data(0),
729 nameRef(0), children(0), longestMatch(longestMatch),
730 longestMatchPart(longestMatchPart), type(type) { }
732 InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) :
733 loc(loc), data(0), nameRef(0), nameTarg(nameTarg), children(0),
736 InlineItem( const InputLoc &loc, Type type ) :
737 loc(loc), data(0), nameRef(0), children(0), type(type) { }
743 InlineList *children;
744 LongestMatch *longestMatch;
745 LongestMatchPart *longestMatchPart;
748 InlineItem *prev, *next;
751 /* Normally this would be atypedef, but that would entail including DList from
752 * ptreetypes, which should be just typedef forwards. */
753 struct InlineList : public DList<InlineItem> { };
757 #endif /* _PARSETREE_H */