2 * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
33 /* Types of builtin machines. */
71 struct LongestMatchPart;
75 /* Type of augmentation. Describes locations in the machine. */
78 /* Transition actions/priorities. */
84 /* Global error actions. */
88 at_not_start_gbl_error,
89 at_not_final_gbl_error,
92 /* Local error actions. */
96 at_not_start_local_error,
97 at_not_final_local_error,
98 at_middle_local_error,
100 /* To State Action embedding. */
104 at_not_start_to_state,
105 at_not_final_to_state,
108 /* From State Action embedding. */
112 at_not_start_from_state,
113 at_not_final_from_state,
114 at_middle_from_state,
116 /* EOF Action embedding. */
125 /* IMPORTANT: These must follow the same order as the state augs in AugType
126 * since we will be using this to compose AugType. */
143 struct ExplicitMachine;
147 /* Reference to a named state. */
148 typedef Vector<char*> NameRef;
149 typedef Vector<NameRef*> NameRefList;
150 typedef Vector<NameInst*> NameTargList;
152 /* Structure for storing location of epsilon transitons. */
155 EpsilonLink( const InputLoc &loc, NameRef &target )
156 : loc(loc), target(target) { }
164 Label( const InputLoc &loc, char *data )
165 : loc(loc), data(data) { }
171 /* Structrue represents an action assigned to some FactorWithAug node. The
172 * factor with aug will keep an array of these. */
175 ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action )
176 : loc(loc), type(type), localErrKey(localErrKey), action(action) { }
186 ConditionTest( const InputLoc &loc, AugType type, Action *action, bool sense ) :
187 loc(loc), type(type), action(action), sense(sense) { }
201 void append( const Token &other );
202 void set( const char *str, int len );
205 char *prepareLitString( const InputLoc &loc, char *src, long length,
206 long &resLen, bool &caseInsensitive );
208 /* Store the value and type of a priority augmentation. */
211 PriorityAug( AugType type, int priorKey, int priorValue ) :
212 type(type), priorKey(priorKey), priorValue(priorValue) { }
220 * A Variable Definition
224 VarDef( const char *name, JoinOrLm *joinOrLm )
225 : name(name), joinOrLm(joinOrLm), isExport(false) { }
227 /* Parse tree traversal. */
228 FsmAp *walk( ParseData *pd );
229 void makeNameTree( const InputLoc &loc, ParseData *pd );
230 void resolveNameRefs( ParseData *pd );
241 * Wherever possible the item match will execute on the character. If not
242 * possible the item match will execute on a lookahead character and either
243 * hold the current char (if one away) or backup.
245 * How to handle the problem of backing up over a buffer break?
247 * Don't want to use pending out transitions for embedding item match because
248 * the role of item match action is different: it may sometimes match on the
249 * final transition, or may match on a lookahead character.
251 * Don't want to invent a new operator just for this. So just trail action
252 * after machine, this means we can only use literal actions.
254 * The item action may
256 * What states of the machine will be final. The item actions that wrap around
257 * on the last character will go straight to the start state.
259 * Some transitions will be lookahead transitions, they will hold the current
260 * character. Crossing them with regular transitions must be restricted
261 * because it does not make sense. The transition cannot simultaneously hold
262 * and consume the current character.
264 struct LongestMatchPart
266 LongestMatchPart( Join *join, Action *action,
267 InputLoc &semiLoc, int longestMatchId )
269 join(join), action(action), semiLoc(semiLoc),
270 longestMatchId(longestMatchId), inLmSelect(false) { }
281 Action *actLagBehind;
284 LongestMatch *longestMatch;
286 LongestMatchPart *prev, *next;
289 /* Declare a new type so that ptreetypes.h need not include dlist.h. */
290 struct LmPartList : DList<LongestMatchPart> {};
294 /* Construct with a list of joins */
295 LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) :
296 loc(loc), longestMatchList(longestMatchList), name(0),
297 lmSwitchHandlesError(false) { }
299 /* Tree traversal. */
300 FsmAp *walk( ParseData *pd );
301 void makeNameTree( ParseData *pd );
302 void resolveNameRefs( ParseData *pd );
303 void transferScannerLeavingActions( FsmAp *graph );
304 void runLongestMatch( ParseData *pd, FsmAp *graph );
305 Action *newAction( ParseData *pd, const InputLoc &loc, const char *name,
306 InlineList *inlineList );
307 void makeActions( ParseData *pd );
308 void findName( ParseData *pd );
309 void restart( FsmAp *graph, TransAp *trans );
312 LmPartList *longestMatchList;
316 bool lmSwitchHandlesError;
318 LongestMatch *next, *prev;
322 /* List of Expressions. */
323 typedef DList<Expression> ExprList;
332 JoinOrLm( Join *join ) :
333 join(join), longestMatch(0), type(JoinType) {}
334 JoinOrLm( LongestMatch *longestMatch ) :
335 join(0), longestMatch(longestMatch), type(LongestMatchType) {}
337 FsmAp *walk( ParseData *pd );
338 void makeNameTree( ParseData *pd );
339 void resolveNameRefs( ParseData *pd );
342 LongestMatch *longestMatch;
351 /* Construct with the first expression. */
352 Join( Expression *expr );
353 Join( const InputLoc &loc, Expression *expr );
355 /* Tree traversal. */
356 FsmAp *walk( ParseData *pd );
357 FsmAp *walkJoin( ParseData *pd );
358 void makeNameTree( ParseData *pd );
359 void resolveNameRefs( ParseData *pd );
380 /* Construct with an expression on the left and a term on the right. */
381 Expression( Expression *expression, Term *term, Type type ) :
382 expression(expression), term(term),
383 builtin(builtin), type(type), prev(this), next(this) { }
385 /* Construct with only a term. */
386 Expression( Term *term ) :
387 expression(0), term(term), builtin(builtin),
388 type(TermType) , prev(this), next(this) { }
390 /* Construct with a builtin type. */
391 Expression( BuiltinMachine builtin ) :
392 expression(0), term(0), builtin(builtin),
393 type(BuiltinType), prev(this), next(this) { }
397 /* Tree traversal. */
398 FsmAp *walk( ParseData *pd, bool lastInSeq = true );
399 void makeNameTree( ParseData *pd );
400 void resolveNameRefs( ParseData *pd );
403 Expression *expression;
405 BuiltinMachine builtin;
408 Expression *prev, *next;
424 Term( Term *term, FactorWithAug *factorWithAug ) :
425 term(term), factorWithAug(factorWithAug), type(ConcatType) { }
427 Term( Term *term, FactorWithAug *factorWithAug, Type type ) :
428 term(term), factorWithAug(factorWithAug), type(type) { }
430 Term( FactorWithAug *factorWithAug ) :
431 term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { }
435 FsmAp *walk( ParseData *pd, bool lastInSeq = true );
436 void makeNameTree( ParseData *pd );
437 void resolveNameRefs( ParseData *pd );
440 FactorWithAug *factorWithAug;
443 /* Priority descriptor for RightFinish type. */
444 PriorDesc priorDescs[2];
448 /* Third level of precedence. Augmenting nodes with actions and priorities. */
451 FactorWithAug( FactorWithRep *factorWithRep ) :
452 priorDescs(0), factorWithRep(factorWithRep) { }
455 /* Tree traversal. */
456 FsmAp *walk( ParseData *pd );
457 void makeNameTree( ParseData *pd );
458 void resolveNameRefs( ParseData *pd );
460 void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd );
461 void assignPriorities( FsmAp *graph, int *priorOrd );
463 void assignConditions( FsmAp *graph );
465 /* Actions and priorities assigned to the factor node. */
466 Vector<ParserAction> actions;
467 Vector<PriorityAug> priorityAugs;
468 PriorDesc *priorDescs;
469 Vector<Label> labels;
470 Vector<EpsilonLink> epsilonLinks;
471 Vector<ConditionTest> conditions;
473 FactorWithRep *factorWithRep;
476 /* Fourth level of precedence. Trailing unary operators. Provide kleen star,
477 * optional and plus. */
492 FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep,
493 int lowerRep, int upperRep, Type type ) :
494 loc(loc), factorWithRep(factorWithRep),
495 factorWithNeg(0), lowerRep(lowerRep),
496 upperRep(upperRep), type(type) { }
498 FactorWithRep( FactorWithNeg *factorWithNeg )
499 : factorWithNeg(factorWithNeg), type(FactorWithNegType) { }
503 /* Tree traversal. */
504 FsmAp *walk( ParseData *pd );
505 void makeNameTree( ParseData *pd );
506 void resolveNameRefs( ParseData *pd );
509 FactorWithRep *factorWithRep;
510 FactorWithNeg *factorWithNeg;
511 int lowerRep, upperRep;
514 /* Priority descriptor for StarStar type. */
515 PriorDesc priorDescs[2];
518 /* Fifth level of precedence. Provides Negation. */
527 FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) :
528 loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { }
530 FactorWithNeg( Factor *factor ) :
531 factorWithNeg(0), factor(factor), type(FactorType) { }
535 /* Tree traversal. */
536 FsmAp *walk( ParseData *pd );
537 void makeNameTree( ParseData *pd );
538 void resolveNameRefs( ParseData *pd );
541 FactorWithNeg *factorWithNeg;
551 /* Language elements a factor node can be. */
562 /* Construct with a literal fsm. */
563 Factor( Literal *literal ) :
564 literal(literal), type(LiteralType) { }
566 /* Construct with a range. */
567 Factor( Range *range ) :
568 range(range), type(RangeType) { }
570 /* Construct with the or part of a regular expression. */
571 Factor( ReItem *reItem ) :
572 reItem(reItem), type(OrExprType) { }
574 /* Construct with a regular expression. */
575 Factor( RegExpr *regExpr ) :
576 regExpr(regExpr), type(RegExprType) { }
578 /* Construct with a reference to a var def. */
579 Factor( const InputLoc &loc, VarDef *varDef ) :
580 loc(loc), varDef(varDef), type(ReferenceType) {}
582 /* Construct with a parenthesized join. */
583 Factor( Join *join ) :
584 join(join), type(ParenType) {}
586 /* Construct with a longest match operator. */
587 Factor( LongestMatch *longestMatch ) :
588 longestMatch(longestMatch), type(LongestMatchType) {}
593 /* Tree traversal. */
594 FsmAp *walk( ParseData *pd );
595 void makeNameTree( ParseData *pd );
596 void resolveNameRefs( ParseData *pd );
605 LongestMatch *longestMatch;
610 /* A range machine. Only ever composed of two literals. */
613 Range( Literal *lowerLit, Literal *upperLit )
614 : lowerLit(lowerLit), upperLit(upperLit) { }
617 FsmAp *walk( ParseData *pd );
623 /* Some literal machine. Can be a number or literal string. */
626 enum LiteralType { Number, LitString };
628 Literal( const Token &token, LiteralType type )
629 : token(token), type(type) { }
631 FsmAp *walk( ParseData *pd );
637 /* Regular expression. */
640 enum RegExpType { RecurseItem, Empty };
644 type(Empty), caseInsensitive(false) { }
645 RegExpr(RegExpr *regExpr, ReItem *item) :
646 regExpr(regExpr), item(item),
647 type(RecurseItem), caseInsensitive(false) { }
650 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
655 bool caseInsensitive;
658 /* An item in a regular expression. */
661 enum ReItemType { Data, Dot, OrBlock, NegOrBlock };
663 ReItem( const InputLoc &loc, const Token &token )
664 : loc(loc), token(token), star(false), type(Data) { }
665 ReItem( const InputLoc &loc, ReItemType type )
666 : loc(loc), star(false), type(type) { }
667 ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type )
668 : loc(loc), orBlock(orBlock), star(false), type(type) { }
671 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
680 /* An or block item. */
683 enum ReOrBlockType { RecurseItem, Empty };
688 ReOrBlock(ReOrBlock *orBlock, ReOrItem *item)
689 : orBlock(orBlock), item(item), type(RecurseItem) { }
692 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
699 /* An item in an or block. */
702 enum ReOrItemType { Data, Range };
704 ReOrItem( const InputLoc &loc, const Token &token )
705 : loc(loc), token(token), type(Data) {}
706 ReOrItem( const InputLoc &loc, char lower, char upper )
707 : loc(loc), lower(lower), upper(upper), type(Range) { }
709 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
727 Text, Goto, Call, Next, GotoExpr, CallExpr, NextExpr, Ret, PChar,
728 Char, Hold, Curs, Targs, Entry, Exec, LmSwitch, LmSetActId,
729 LmSetTokEnd, LmOnLast, LmOnNext, LmOnLagBehind, LmInitAct,
730 LmInitTokStart, LmSetTokStart, Break
733 InlineItem( const InputLoc &loc, char *data, Type type ) :
734 loc(loc), data(data), nameRef(0), children(0), type(type) { }
736 InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) :
737 loc(loc), data(0), nameRef(nameRef), children(0), type(type) { }
739 InlineItem( const InputLoc &loc, LongestMatch *longestMatch,
740 LongestMatchPart *longestMatchPart, Type type ) : loc(loc), data(0),
741 nameRef(0), children(0), longestMatch(longestMatch),
742 longestMatchPart(longestMatchPart), type(type) { }
744 InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) :
745 loc(loc), data(0), nameRef(0), nameTarg(nameTarg), children(0),
748 InlineItem( const InputLoc &loc, Type type ) :
749 loc(loc), data(0), nameRef(0), children(0), type(type) { }
755 InlineList *children;
756 LongestMatch *longestMatch;
757 LongestMatchPart *longestMatchPart;
760 InlineItem *prev, *next;
763 /* Normally this would be atypedef, but that would entail including DList from
764 * ptreetypes, which should be just typedef forwards. */
765 struct InlineList : public DList<InlineItem> { };
769 #endif /* _PARSETREE_H */