2 * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
33 /* Types of builtin machines. */
71 struct LongestMatchPart;
75 /* Type of augmentation. Describes locations in the machine. */
78 /* Transition actions/priorities. */
84 /* Global error actions. */
88 at_not_start_gbl_error,
89 at_not_final_gbl_error,
92 /* Local error actions. */
96 at_not_start_local_error,
97 at_not_final_local_error,
98 at_middle_local_error,
100 /* To State Action embedding. */
104 at_not_start_to_state,
105 at_not_final_to_state,
108 /* From State Action embedding. */
112 at_not_start_from_state,
113 at_not_final_from_state,
114 at_middle_from_state,
116 /* EOF Action embedding. */
125 /* IMPORTANT: These must follow the same order as the state augs in AugType
126 * since we will be using this to compose AugType. */
143 struct ExplicitMachine;
147 /* Reference to a named state. */
148 typedef Vector<char*> NameRef;
149 typedef Vector<NameRef*> NameRefList;
150 typedef Vector<NameInst*> NameTargList;
152 /* Structure for storing location of epsilon transitons. */
155 EpsilonLink( const InputLoc &loc, NameRef &target )
156 : loc(loc), target(target) { }
164 Label( const InputLoc &loc, char *data )
165 : loc(loc), data(data) { }
171 /* Structrue represents an action assigned to some FactorWithAug node. The
172 * factor with aug will keep an array of these. */
175 ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action )
176 : loc(loc), type(type), localErrKey(localErrKey), action(action) { }
186 ConditionTest( const InputLoc &loc, AugType type, Action *action, bool sense ) :
187 loc(loc), type(type), action(action), sense(sense) { }
201 void prepareLitString( Token &result, bool &caseInsensitive );
202 void append( const Token &other );
203 void set( const char *str, int len );
206 /* Store the value and type of a priority augmentation. */
209 PriorityAug( AugType type, int priorKey, int priorValue ) :
210 type(type), priorKey(priorKey), priorValue(priorValue) { }
218 * A Variable Definition
222 VarDef( const char *name, JoinOrLm *joinOrLm )
223 : name(name), joinOrLm(joinOrLm), isExport(false) { }
225 /* Parse tree traversal. */
226 FsmAp *walk( ParseData *pd );
227 void makeNameTree( const InputLoc &loc, ParseData *pd );
228 void resolveNameRefs( ParseData *pd );
239 * Wherever possible the item match will execute on the character. If not
240 * possible the item match will execute on a lookahead character and either
241 * hold the current char (if one away) or backup.
243 * How to handle the problem of backing up over a buffer break?
245 * Don't want to use pending out transitions for embedding item match because
246 * the role of item match action is different: it may sometimes match on the
247 * final transition, or may match on a lookahead character.
249 * Don't want to invent a new operator just for this. So just trail action
250 * after machine, this means we can only use literal actions.
252 * The item action may
254 * What states of the machine will be final. The item actions that wrap around
255 * on the last character will go straight to the start state.
257 * Some transitions will be lookahead transitions, they will hold the current
258 * character. Crossing them with regular transitions must be restricted
259 * because it does not make sense. The transition cannot simultaneously hold
260 * and consume the current character.
262 struct LongestMatchPart
264 LongestMatchPart( Join *join, Action *action,
265 InputLoc &semiLoc, int longestMatchId )
267 join(join), action(action), semiLoc(semiLoc),
268 longestMatchId(longestMatchId), inLmSelect(false) { }
279 Action *actLagBehind;
282 LongestMatch *longestMatch;
284 LongestMatchPart *prev, *next;
287 /* Declare a new type so that ptreetypes.h need not include dlist.h. */
288 struct LmPartList : DList<LongestMatchPart> {};
292 /* Construct with a list of joins */
293 LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) :
294 loc(loc), longestMatchList(longestMatchList), name(0),
295 lmSwitchHandlesError(false) { }
297 /* Tree traversal. */
298 FsmAp *walk( ParseData *pd );
299 void makeNameTree( ParseData *pd );
300 void resolveNameRefs( ParseData *pd );
301 void runLonestMatch( ParseData *pd, FsmAp *graph );
302 Action *newAction( ParseData *pd, const InputLoc &loc, const char *name,
303 InlineList *inlineList );
304 void makeActions( ParseData *pd );
305 void findName( ParseData *pd );
306 void restart( FsmAp *graph, TransAp *trans );
309 LmPartList *longestMatchList;
313 bool lmSwitchHandlesError;
315 LongestMatch *next, *prev;
319 /* List of Expressions. */
320 typedef DList<Expression> ExprList;
329 JoinOrLm( Join *join ) :
330 join(join), type(JoinType) {}
331 JoinOrLm( LongestMatch *longestMatch ) :
332 longestMatch(longestMatch), type(LongestMatchType) {}
334 FsmAp *walk( ParseData *pd );
335 void makeNameTree( ParseData *pd );
336 void resolveNameRefs( ParseData *pd );
339 LongestMatch *longestMatch;
348 /* Construct with the first expression. */
349 Join( Expression *expr );
350 Join( const InputLoc &loc, Expression *expr );
352 /* Tree traversal. */
353 FsmAp *walk( ParseData *pd );
354 FsmAp *walkJoin( ParseData *pd );
355 void makeNameTree( ParseData *pd );
356 void resolveNameRefs( ParseData *pd );
377 /* Construct with an expression on the left and a term on the right. */
378 Expression( Expression *expression, Term *term, Type type ) :
379 expression(expression), term(term),
380 builtin(builtin), type(type), prev(this), next(this) { }
382 /* Construct with only a term. */
383 Expression( Term *term ) :
384 expression(0), term(term), builtin(builtin),
385 type(TermType) , prev(this), next(this) { }
387 /* Construct with a builtin type. */
388 Expression( BuiltinMachine builtin ) :
389 expression(0), term(0), builtin(builtin),
390 type(BuiltinType), prev(this), next(this) { }
394 /* Tree traversal. */
395 FsmAp *walk( ParseData *pd, bool lastInSeq = true );
396 void makeNameTree( ParseData *pd );
397 void resolveNameRefs( ParseData *pd );
400 Expression *expression;
402 BuiltinMachine builtin;
405 Expression *prev, *next;
421 Term( Term *term, FactorWithAug *factorWithAug ) :
422 term(term), factorWithAug(factorWithAug), type(ConcatType) { }
424 Term( Term *term, FactorWithAug *factorWithAug, Type type ) :
425 term(term), factorWithAug(factorWithAug), type(type) { }
427 Term( FactorWithAug *factorWithAug ) :
428 term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { }
432 FsmAp *walk( ParseData *pd, bool lastInSeq = true );
433 void makeNameTree( ParseData *pd );
434 void resolveNameRefs( ParseData *pd );
437 FactorWithAug *factorWithAug;
440 /* Priority descriptor for RightFinish type. */
441 PriorDesc priorDescs[2];
445 /* Third level of precedence. Augmenting nodes with actions and priorities. */
448 FactorWithAug( FactorWithRep *factorWithRep ) :
449 priorDescs(0), factorWithRep(factorWithRep) { }
452 /* Tree traversal. */
453 FsmAp *walk( ParseData *pd );
454 void makeNameTree( ParseData *pd );
455 void resolveNameRefs( ParseData *pd );
457 void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd );
458 void assignPriorities( FsmAp *graph, int *priorOrd );
460 void assignConditions( FsmAp *graph );
462 /* Actions and priorities assigned to the factor node. */
463 Vector<ParserAction> actions;
464 Vector<PriorityAug> priorityAugs;
465 PriorDesc *priorDescs;
466 Vector<Label> labels;
467 Vector<EpsilonLink> epsilonLinks;
468 Vector<ConditionTest> conditions;
470 FactorWithRep *factorWithRep;
473 /* Fourth level of precedence. Trailing unary operators. Provide kleen star,
474 * optional and plus. */
489 FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep,
490 int lowerRep, int upperRep, Type type ) :
491 loc(loc), factorWithRep(factorWithRep),
492 factorWithNeg(0), lowerRep(lowerRep),
493 upperRep(upperRep), type(type) { }
495 FactorWithRep( FactorWithNeg *factorWithNeg )
496 : factorWithNeg(factorWithNeg), type(FactorWithNegType) { }
500 /* Tree traversal. */
501 FsmAp *walk( ParseData *pd );
502 void makeNameTree( ParseData *pd );
503 void resolveNameRefs( ParseData *pd );
506 FactorWithRep *factorWithRep;
507 FactorWithNeg *factorWithNeg;
508 int lowerRep, upperRep;
511 /* Priority descriptor for StarStar type. */
512 PriorDesc priorDescs[2];
515 /* Fifth level of precedence. Provides Negation. */
524 FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) :
525 loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { }
527 FactorWithNeg( Factor *factor ) :
528 factorWithNeg(0), factor(factor), type(FactorType) { }
532 /* Tree traversal. */
533 FsmAp *walk( ParseData *pd );
534 void makeNameTree( ParseData *pd );
535 void resolveNameRefs( ParseData *pd );
538 FactorWithNeg *factorWithNeg;
548 /* Language elements a factor node can be. */
559 /* Construct with a literal fsm. */
560 Factor( Literal *literal ) :
561 literal(literal), type(LiteralType) { }
563 /* Construct with a range. */
564 Factor( Range *range ) :
565 range(range), type(RangeType) { }
567 /* Construct with the or part of a regular expression. */
568 Factor( ReItem *reItem ) :
569 reItem(reItem), type(OrExprType) { }
571 /* Construct with a regular expression. */
572 Factor( RegExpr *regExpr ) :
573 regExpr(regExpr), type(RegExprType) { }
575 /* Construct with a reference to a var def. */
576 Factor( const InputLoc &loc, VarDef *varDef ) :
577 loc(loc), varDef(varDef), type(ReferenceType) {}
579 /* Construct with a parenthesized join. */
580 Factor( Join *join ) :
581 join(join), type(ParenType) {}
583 /* Construct with a longest match operator. */
584 Factor( LongestMatch *longestMatch ) :
585 longestMatch(longestMatch), type(LongestMatchType) {}
590 /* Tree traversal. */
591 FsmAp *walk( ParseData *pd );
592 void makeNameTree( ParseData *pd );
593 void resolveNameRefs( ParseData *pd );
602 LongestMatch *longestMatch;
607 /* A range machine. Only ever composed of two literals. */
610 Range( Literal *lowerLit, Literal *upperLit )
611 : lowerLit(lowerLit), upperLit(upperLit) { }
614 FsmAp *walk( ParseData *pd );
620 /* Some literal machine. Can be a number or literal string. */
623 enum LiteralType { Number, LitString };
625 Literal( const Token &token, LiteralType type )
626 : token(token), type(type) { }
628 FsmAp *walk( ParseData *pd );
634 /* Regular expression. */
637 enum RegExpType { RecurseItem, Empty };
641 type(Empty), caseInsensitive(false) { }
642 RegExpr(RegExpr *regExpr, ReItem *item) :
643 regExpr(regExpr), item(item),
644 type(RecurseItem), caseInsensitive(false) { }
647 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
652 bool caseInsensitive;
655 /* An item in a regular expression. */
658 enum ReItemType { Data, Dot, OrBlock, NegOrBlock };
660 ReItem( const InputLoc &loc, const Token &token )
661 : loc(loc), token(token), star(false), type(Data) { }
662 ReItem( const InputLoc &loc, ReItemType type )
663 : loc(loc), star(false), type(type) { }
664 ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type )
665 : loc(loc), orBlock(orBlock), star(false), type(type) { }
668 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
677 /* An or block item. */
680 enum ReOrBlockType { RecurseItem, Empty };
685 ReOrBlock(ReOrBlock *orBlock, ReOrItem *item)
686 : orBlock(orBlock), item(item), type(RecurseItem) { }
689 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
696 /* An item in an or block. */
699 enum ReOrItemType { Data, Range };
701 ReOrItem( const InputLoc &loc, const Token &token )
702 : loc(loc), token(token), type(Data) {}
703 ReOrItem( const InputLoc &loc, char lower, char upper )
704 : loc(loc), lower(lower), upper(upper), type(Range) { }
706 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
724 Text, Goto, Call, Next, GotoExpr, CallExpr, NextExpr, Ret, PChar,
725 Char, Hold, Curs, Targs, Entry, Exec, LmSwitch, LmSetActId,
726 LmSetTokEnd, LmOnLast, LmOnNext, LmOnLagBehind, LmInitAct,
727 LmInitTokStart, LmSetTokStart, Break
730 InlineItem( const InputLoc &loc, char *data, Type type ) :
731 loc(loc), data(data), nameRef(0), children(0), type(type) { }
733 InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) :
734 loc(loc), data(0), nameRef(nameRef), children(0), type(type) { }
736 InlineItem( const InputLoc &loc, LongestMatch *longestMatch,
737 LongestMatchPart *longestMatchPart, Type type ) : loc(loc), data(0),
738 nameRef(0), children(0), longestMatch(longestMatch),
739 longestMatchPart(longestMatchPart), type(type) { }
741 InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) :
742 loc(loc), data(0), nameRef(0), nameTarg(nameTarg), children(0),
745 InlineItem( const InputLoc &loc, Type type ) :
746 loc(loc), data(0), nameRef(0), children(0), type(type) { }
752 InlineList *children;
753 LongestMatch *longestMatch;
754 LongestMatchPart *longestMatchPart;
757 InlineItem *prev, *next;
760 /* Normally this would be atypedef, but that would entail including DList from
761 * ptreetypes, which should be just typedef forwards. */
762 struct InlineList : public DList<InlineItem> { };
766 #endif /* _PARSETREE_H */