2 * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
33 /* Types of builtin machines. */
71 struct LongestMatchPart;
75 /* Type of augmentation. Describes locations in the machine. */
78 /* Transition actions/priorities. */
84 /* Global error actions. */
88 at_not_start_gbl_error,
89 at_not_final_gbl_error,
92 /* Local error actions. */
96 at_not_start_local_error,
97 at_not_final_local_error,
98 at_middle_local_error,
100 /* To State Action embedding. */
104 at_not_start_to_state,
105 at_not_final_to_state,
108 /* From State Action embedding. */
112 at_not_start_from_state,
113 at_not_final_from_state,
114 at_middle_from_state,
116 /* EOF Action embedding. */
125 /* IMPORTANT: These must follow the same order as the state augs in AugType
126 * since we will be using this to compose AugType. */
143 struct ExplicitMachine;
147 /* Reference to a named state. */
148 typedef Vector<char*> NameRef;
149 typedef Vector<NameRef*> NameRefList;
150 typedef Vector<NameInst*> NameTargList;
152 /* Structure for storing location of epsilon transitons. */
155 EpsilonLink( const InputLoc &loc, NameRef &target )
156 : loc(loc), target(target) { }
164 Label( const InputLoc &loc, char *data )
165 : loc(loc), data(data) { }
171 /* Structrue represents an action assigned to some FactorWithAug node. The
172 * factor with aug will keep an array of these. */
175 ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action )
176 : loc(loc), type(type), localErrKey(localErrKey), action(action) { }
190 void prepareLitString( Token &result, bool &caseInsensitive );
191 void append( const Token &other );
192 void set( char *str, int len );
195 /* Store the value and type of a priority augmentation. */
198 PriorityAug( AugType type, int priorKey, int priorValue ) :
199 type(type), priorKey(priorKey), priorValue(priorValue) { }
207 * A Variable Definition
211 VarDef( char *name, JoinOrLm *joinOrLm )
212 : name(name), joinOrLm(joinOrLm), isExport(false) { }
214 /* Parse tree traversal. */
215 FsmAp *walk( ParseData *pd );
216 void makeNameTree( const InputLoc &loc, ParseData *pd );
217 void resolveNameRefs( ParseData *pd );
228 * Wherever possible the item match will execute on the character. If not
229 * possible the item match will execute on a lookahead character and either
230 * hold the current char (if one away) or backup.
232 * How to handle the problem of backing up over a buffer break?
234 * Don't want to use pending out transitions for embedding item match because
235 * the role of item match action is different: it may sometimes match on the
236 * final transition, or may match on a lookahead character.
238 * Don't want to invent a new operator just for this. So just trail action
239 * after machine, this means we can only use literal actions.
241 * The item action may
243 * What states of the machine will be final. The item actions that wrap around
244 * on the last character will go straight to the start state.
246 * Some transitions will be lookahead transitions, they will hold the current
247 * character. Crossing them with regular transitions must be restricted
248 * because it does not make sense. The transition cannot simultaneously hold
249 * and consume the current character.
251 struct LongestMatchPart
253 LongestMatchPart( Join *join, Action *action,
254 InputLoc &semiLoc, int longestMatchId )
256 join(join), action(action), semiLoc(semiLoc),
257 longestMatchId(longestMatchId), inLmSelect(false) { }
268 Action *actLagBehind;
271 LongestMatch *longestMatch;
273 LongestMatchPart *prev, *next;
276 /* Declare a new type so that ptreetypes.h need not include dlist.h. */
277 struct LmPartList : DList<LongestMatchPart> {};
281 /* Construct with a list of joins */
282 LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) :
283 loc(loc), longestMatchList(longestMatchList), name(0),
284 lmSwitchHandlesError(false) { }
286 /* Tree traversal. */
287 FsmAp *walk( ParseData *pd );
288 void makeNameTree( ParseData *pd );
289 void resolveNameRefs( ParseData *pd );
290 void runLonestMatch( ParseData *pd, FsmAp *graph );
291 Action *newAction( ParseData *pd, const InputLoc &loc, char *name,
292 InlineList *inlineList );
293 void makeActions( ParseData *pd );
294 void findName( ParseData *pd );
295 void restart( FsmAp *graph, TransAp *trans );
298 LmPartList *longestMatchList;
302 bool lmSwitchHandlesError;
304 LongestMatch *next, *prev;
308 /* List of Expressions. */
309 typedef DList<Expression> ExprList;
318 JoinOrLm( Join *join ) :
319 join(join), type(JoinType) {}
320 JoinOrLm( LongestMatch *longestMatch ) :
321 longestMatch(longestMatch), type(LongestMatchType) {}
323 FsmAp *walk( ParseData *pd );
324 void makeNameTree( ParseData *pd );
325 void resolveNameRefs( ParseData *pd );
328 LongestMatch *longestMatch;
337 /* Construct with the first expression. */
338 Join( Expression *expr );
339 Join( const InputLoc &loc, Expression *expr );
341 /* Tree traversal. */
342 FsmAp *walk( ParseData *pd );
343 FsmAp *walkJoin( ParseData *pd );
344 void makeNameTree( ParseData *pd );
345 void resolveNameRefs( ParseData *pd );
366 /* Construct with an expression on the left and a term on the right. */
367 Expression( Expression *expression, Term *term, Type type ) :
368 expression(expression), term(term),
369 builtin(builtin), type(type), prev(this), next(this) { }
371 /* Construct with only a term. */
372 Expression( Term *term ) :
373 expression(0), term(term), builtin(builtin),
374 type(TermType) , prev(this), next(this) { }
376 /* Construct with a builtin type. */
377 Expression( BuiltinMachine builtin ) :
378 expression(0), term(0), builtin(builtin),
379 type(BuiltinType), prev(this), next(this) { }
383 /* Tree traversal. */
384 FsmAp *walk( ParseData *pd, bool lastInSeq = true );
385 void makeNameTree( ParseData *pd );
386 void resolveNameRefs( ParseData *pd );
389 Expression *expression;
391 BuiltinMachine builtin;
394 Expression *prev, *next;
410 Term( Term *term, FactorWithAug *factorWithAug ) :
411 term(term), factorWithAug(factorWithAug), type(ConcatType) { }
413 Term( Term *term, FactorWithAug *factorWithAug, Type type ) :
414 term(term), factorWithAug(factorWithAug), type(type) { }
416 Term( FactorWithAug *factorWithAug ) :
417 term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { }
421 FsmAp *walk( ParseData *pd, bool lastInSeq = true );
422 void makeNameTree( ParseData *pd );
423 void resolveNameRefs( ParseData *pd );
426 FactorWithAug *factorWithAug;
429 /* Priority descriptor for RightFinish type. */
430 PriorDesc priorDescs[2];
434 /* Third level of precedence. Augmenting nodes with actions and priorities. */
437 FactorWithAug( FactorWithRep *factorWithRep ) :
438 priorDescs(0), factorWithRep(factorWithRep) { }
441 /* Tree traversal. */
442 FsmAp *walk( ParseData *pd );
443 void makeNameTree( ParseData *pd );
444 void resolveNameRefs( ParseData *pd );
446 void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd );
447 void assignPriorities( FsmAp *graph, int *priorOrd );
449 void assignConditions( FsmAp *graph );
451 /* Actions and priorities assigned to the factor node. */
452 Vector<ParserAction> actions;
453 Vector<PriorityAug> priorityAugs;
454 PriorDesc *priorDescs;
455 Vector<Label> labels;
456 Vector<EpsilonLink> epsilonLinks;
457 Vector<ParserAction> conditions;
459 FactorWithRep *factorWithRep;
462 /* Fourth level of precedence. Trailing unary operators. Provide kleen star,
463 * optional and plus. */
478 FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep,
479 int lowerRep, int upperRep, Type type ) :
480 loc(loc), factorWithRep(factorWithRep),
481 factorWithNeg(0), lowerRep(lowerRep),
482 upperRep(upperRep), type(type) { }
484 FactorWithRep( FactorWithNeg *factorWithNeg )
485 : factorWithNeg(factorWithNeg), type(FactorWithNegType) { }
489 /* Tree traversal. */
490 FsmAp *walk( ParseData *pd );
491 void makeNameTree( ParseData *pd );
492 void resolveNameRefs( ParseData *pd );
495 FactorWithRep *factorWithRep;
496 FactorWithNeg *factorWithNeg;
497 int lowerRep, upperRep;
500 /* Priority descriptor for StarStar type. */
501 PriorDesc priorDescs[2];
504 /* Fifth level of precedence. Provides Negation. */
513 FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) :
514 loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { }
516 FactorWithNeg( Factor *factor ) :
517 factorWithNeg(0), factor(factor), type(FactorType) { }
521 /* Tree traversal. */
522 FsmAp *walk( ParseData *pd );
523 void makeNameTree( ParseData *pd );
524 void resolveNameRefs( ParseData *pd );
527 FactorWithNeg *factorWithNeg;
537 /* Language elements a factor node can be. */
548 /* Construct with a literal fsm. */
549 Factor( Literal *literal ) :
550 literal(literal), type(LiteralType) { }
552 /* Construct with a range. */
553 Factor( Range *range ) :
554 range(range), type(RangeType) { }
556 /* Construct with the or part of a regular expression. */
557 Factor( ReItem *reItem ) :
558 reItem(reItem), type(OrExprType) { }
560 /* Construct with a regular expression. */
561 Factor( RegExpr *regExpr ) :
562 regExpr(regExpr), type(RegExprType) { }
564 /* Construct with a reference to a var def. */
565 Factor( const InputLoc &loc, VarDef *varDef ) :
566 loc(loc), varDef(varDef), type(ReferenceType) {}
568 /* Construct with a parenthesized join. */
569 Factor( Join *join ) :
570 join(join), type(ParenType) {}
572 /* Construct with a longest match operator. */
573 Factor( LongestMatch *longestMatch ) :
574 longestMatch(longestMatch), type(LongestMatchType) {}
579 /* Tree traversal. */
580 FsmAp *walk( ParseData *pd );
581 void makeNameTree( ParseData *pd );
582 void resolveNameRefs( ParseData *pd );
591 LongestMatch *longestMatch;
596 /* A range machine. Only ever composed of two literals. */
599 Range( Literal *lowerLit, Literal *upperLit )
600 : lowerLit(lowerLit), upperLit(upperLit) { }
603 FsmAp *walk( ParseData *pd );
609 /* Some literal machine. Can be a number or literal string. */
612 enum LiteralType { Number, LitString };
614 Literal( const Token &token, LiteralType type )
615 : token(token), type(type) { }
617 FsmAp *walk( ParseData *pd );
623 /* Regular expression. */
626 enum RegExpType { RecurseItem, Empty };
630 type(Empty), caseInsensitive(false) { }
631 RegExpr(RegExpr *regExpr, ReItem *item) :
632 regExpr(regExpr), item(item),
633 type(RecurseItem), caseInsensitive(false) { }
636 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
641 bool caseInsensitive;
644 /* An item in a regular expression. */
647 enum ReItemType { Data, Dot, OrBlock, NegOrBlock };
649 ReItem( const InputLoc &loc, const Token &token )
650 : loc(loc), token(token), star(false), type(Data) { }
651 ReItem( const InputLoc &loc, ReItemType type )
652 : loc(loc), star(false), type(type) { }
653 ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type )
654 : loc(loc), orBlock(orBlock), star(false), type(type) { }
657 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
666 /* An or block item. */
669 enum ReOrBlockType { RecurseItem, Empty };
674 ReOrBlock(ReOrBlock *orBlock, ReOrItem *item)
675 : orBlock(orBlock), item(item), type(RecurseItem) { }
678 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
685 /* An item in an or block. */
688 enum ReOrItemType { Data, Range };
690 ReOrItem( const InputLoc &loc, const Token &token )
691 : loc(loc), token(token), type(Data) {}
692 ReOrItem( const InputLoc &loc, char lower, char upper )
693 : loc(loc), lower(lower), upper(upper), type(Range) { }
695 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
713 Text, Goto, Call, Next, GotoExpr, CallExpr, NextExpr, Ret, PChar,
714 Char, Hold, Curs, Targs, Entry, Exec, LmSwitch, LmSetActId,
715 LmSetTokEnd, LmOnLast, LmOnNext, LmOnLagBehind, LmInitAct,
716 LmInitTokStart, LmSetTokStart, Break
719 InlineItem( const InputLoc &loc, char *data, Type type ) :
720 loc(loc), data(data), nameRef(0), children(0), type(type) { }
722 InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) :
723 loc(loc), data(0), nameRef(nameRef), children(0), type(type) { }
725 InlineItem( const InputLoc &loc, LongestMatch *longestMatch,
726 LongestMatchPart *longestMatchPart, Type type ) : loc(loc), data(0),
727 nameRef(0), children(0), longestMatch(longestMatch),
728 longestMatchPart(longestMatchPart), type(type) { }
730 InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) :
731 loc(loc), data(0), nameRef(0), nameTarg(nameTarg), children(0),
734 InlineItem( const InputLoc &loc, Type type ) :
735 loc(loc), data(0), nameRef(0), children(0), type(type) { }
741 InlineList *children;
742 LongestMatch *longestMatch;
743 LongestMatchPart *longestMatchPart;
746 InlineItem *prev, *next;
749 /* Normally this would be atypedef, but that would entail including DList from
750 * ptreetypes, which should be just typedef forwards. */
751 struct InlineList : public DList<InlineItem> { };
755 #endif /* _PARSETREE_H */