2 * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
33 /* Types of builtin machines. */
71 struct LongestMatchPart;
75 /* Type of augmentation. Describes locations in the machine. */
78 /* Transition actions/priorities. */
84 /* Global error actions. */
88 at_not_start_gbl_error,
89 at_not_final_gbl_error,
92 /* Local error actions. */
96 at_not_start_local_error,
97 at_not_final_local_error,
98 at_middle_local_error,
100 /* To State Action embedding. */
104 at_not_start_to_state,
105 at_not_final_to_state,
108 /* From State Action embedding. */
112 at_not_start_from_state,
113 at_not_final_from_state,
114 at_middle_from_state,
116 /* EOF Action embedding. */
125 /* IMPORTANT: These must follow the same order as the state augs in AugType
126 * since we will be using this to compose AugType. */
143 struct ExplicitMachine;
147 /* Reference to a named state. */
148 typedef Vector<char*> NameRef;
149 typedef Vector<NameRef*> NameRefList;
150 typedef Vector<NameInst*> NameTargList;
152 /* Structure for storing location of epsilon transitons. */
155 EpsilonLink( const InputLoc &loc, NameRef &target )
156 : loc(loc), target(target) { }
164 Label( const InputLoc &loc, char *data )
165 : loc(loc), data(data) { }
171 /* Structrue represents an action assigned to some FactorWithAug node. The
172 * factor with aug will keep an array of these. */
175 ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action )
176 : loc(loc), type(type), localErrKey(localErrKey), action(action) { }
186 ConditionTest( const InputLoc &loc, AugType type, Action *action, bool sense ) :
187 loc(loc), type(type), action(action), sense(sense) { }
201 void prepareLitString( Token &result, bool &caseInsensitive );
202 void append( const Token &other );
203 void set( const char *str, int len );
206 /* Store the value and type of a priority augmentation. */
209 PriorityAug( AugType type, int priorKey, int priorValue ) :
210 type(type), priorKey(priorKey), priorValue(priorValue) { }
218 * A Variable Definition
222 VarDef( const char *name, JoinOrLm *joinOrLm )
223 : name(name), joinOrLm(joinOrLm), isExport(false) { }
225 /* Parse tree traversal. */
226 FsmAp *walk( ParseData *pd );
227 void makeNameTree( const InputLoc &loc, ParseData *pd );
228 void resolveNameRefs( ParseData *pd );
239 * Wherever possible the item match will execute on the character. If not
240 * possible the item match will execute on a lookahead character and either
241 * hold the current char (if one away) or backup.
243 * How to handle the problem of backing up over a buffer break?
245 * Don't want to use pending out transitions for embedding item match because
246 * the role of item match action is different: it may sometimes match on the
247 * final transition, or may match on a lookahead character.
249 * Don't want to invent a new operator just for this. So just trail action
250 * after machine, this means we can only use literal actions.
252 * The item action may
254 * What states of the machine will be final. The item actions that wrap around
255 * on the last character will go straight to the start state.
257 * Some transitions will be lookahead transitions, they will hold the current
258 * character. Crossing them with regular transitions must be restricted
259 * because it does not make sense. The transition cannot simultaneously hold
260 * and consume the current character.
262 struct LongestMatchPart
264 LongestMatchPart( Join *join, Action *action,
265 InputLoc &semiLoc, int longestMatchId )
267 join(join), action(action), semiLoc(semiLoc),
268 longestMatchId(longestMatchId), inLmSelect(false) { }
279 Action *actLagBehind;
282 LongestMatch *longestMatch;
284 LongestMatchPart *prev, *next;
287 /* Declare a new type so that ptreetypes.h need not include dlist.h. */
288 struct LmPartList : DList<LongestMatchPart> {};
292 /* Construct with a list of joins */
293 LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) :
294 loc(loc), longestMatchList(longestMatchList), name(0),
295 lmSwitchHandlesError(false) { }
297 /* Tree traversal. */
298 FsmAp *walk( ParseData *pd );
299 void makeNameTree( ParseData *pd );
300 void resolveNameRefs( ParseData *pd );
301 void transferScannerLeavingActions( FsmAp *graph );
302 void runLongestMatch( ParseData *pd, FsmAp *graph );
303 Action *newAction( ParseData *pd, const InputLoc &loc, const char *name,
304 InlineList *inlineList );
305 void makeActions( ParseData *pd );
306 void findName( ParseData *pd );
307 void restart( FsmAp *graph, TransAp *trans );
310 LmPartList *longestMatchList;
314 bool lmSwitchHandlesError;
316 LongestMatch *next, *prev;
320 /* List of Expressions. */
321 typedef DList<Expression> ExprList;
330 JoinOrLm( Join *join ) :
331 join(join), type(JoinType) {}
332 JoinOrLm( LongestMatch *longestMatch ) :
333 longestMatch(longestMatch), type(LongestMatchType) {}
335 FsmAp *walk( ParseData *pd );
336 void makeNameTree( ParseData *pd );
337 void resolveNameRefs( ParseData *pd );
340 LongestMatch *longestMatch;
349 /* Construct with the first expression. */
350 Join( Expression *expr );
351 Join( const InputLoc &loc, Expression *expr );
353 /* Tree traversal. */
354 FsmAp *walk( ParseData *pd );
355 FsmAp *walkJoin( ParseData *pd );
356 void makeNameTree( ParseData *pd );
357 void resolveNameRefs( ParseData *pd );
378 /* Construct with an expression on the left and a term on the right. */
379 Expression( Expression *expression, Term *term, Type type ) :
380 expression(expression), term(term),
381 builtin(builtin), type(type), prev(this), next(this) { }
383 /* Construct with only a term. */
384 Expression( Term *term ) :
385 expression(0), term(term), builtin(builtin),
386 type(TermType) , prev(this), next(this) { }
388 /* Construct with a builtin type. */
389 Expression( BuiltinMachine builtin ) :
390 expression(0), term(0), builtin(builtin),
391 type(BuiltinType), prev(this), next(this) { }
395 /* Tree traversal. */
396 FsmAp *walk( ParseData *pd, bool lastInSeq = true );
397 void makeNameTree( ParseData *pd );
398 void resolveNameRefs( ParseData *pd );
401 Expression *expression;
403 BuiltinMachine builtin;
406 Expression *prev, *next;
422 Term( Term *term, FactorWithAug *factorWithAug ) :
423 term(term), factorWithAug(factorWithAug), type(ConcatType) { }
425 Term( Term *term, FactorWithAug *factorWithAug, Type type ) :
426 term(term), factorWithAug(factorWithAug), type(type) { }
428 Term( FactorWithAug *factorWithAug ) :
429 term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { }
433 FsmAp *walk( ParseData *pd, bool lastInSeq = true );
434 void makeNameTree( ParseData *pd );
435 void resolveNameRefs( ParseData *pd );
438 FactorWithAug *factorWithAug;
441 /* Priority descriptor for RightFinish type. */
442 PriorDesc priorDescs[2];
446 /* Third level of precedence. Augmenting nodes with actions and priorities. */
449 FactorWithAug( FactorWithRep *factorWithRep ) :
450 priorDescs(0), factorWithRep(factorWithRep) { }
453 /* Tree traversal. */
454 FsmAp *walk( ParseData *pd );
455 void makeNameTree( ParseData *pd );
456 void resolveNameRefs( ParseData *pd );
458 void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd );
459 void assignPriorities( FsmAp *graph, int *priorOrd );
461 void assignConditions( FsmAp *graph );
463 /* Actions and priorities assigned to the factor node. */
464 Vector<ParserAction> actions;
465 Vector<PriorityAug> priorityAugs;
466 PriorDesc *priorDescs;
467 Vector<Label> labels;
468 Vector<EpsilonLink> epsilonLinks;
469 Vector<ConditionTest> conditions;
471 FactorWithRep *factorWithRep;
474 /* Fourth level of precedence. Trailing unary operators. Provide kleen star,
475 * optional and plus. */
490 FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep,
491 int lowerRep, int upperRep, Type type ) :
492 loc(loc), factorWithRep(factorWithRep),
493 factorWithNeg(0), lowerRep(lowerRep),
494 upperRep(upperRep), type(type) { }
496 FactorWithRep( FactorWithNeg *factorWithNeg )
497 : factorWithNeg(factorWithNeg), type(FactorWithNegType) { }
501 /* Tree traversal. */
502 FsmAp *walk( ParseData *pd );
503 void makeNameTree( ParseData *pd );
504 void resolveNameRefs( ParseData *pd );
507 FactorWithRep *factorWithRep;
508 FactorWithNeg *factorWithNeg;
509 int lowerRep, upperRep;
512 /* Priority descriptor for StarStar type. */
513 PriorDesc priorDescs[2];
516 /* Fifth level of precedence. Provides Negation. */
525 FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) :
526 loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { }
528 FactorWithNeg( Factor *factor ) :
529 factorWithNeg(0), factor(factor), type(FactorType) { }
533 /* Tree traversal. */
534 FsmAp *walk( ParseData *pd );
535 void makeNameTree( ParseData *pd );
536 void resolveNameRefs( ParseData *pd );
539 FactorWithNeg *factorWithNeg;
549 /* Language elements a factor node can be. */
560 /* Construct with a literal fsm. */
561 Factor( Literal *literal ) :
562 literal(literal), type(LiteralType) { }
564 /* Construct with a range. */
565 Factor( Range *range ) :
566 range(range), type(RangeType) { }
568 /* Construct with the or part of a regular expression. */
569 Factor( ReItem *reItem ) :
570 reItem(reItem), type(OrExprType) { }
572 /* Construct with a regular expression. */
573 Factor( RegExpr *regExpr ) :
574 regExpr(regExpr), type(RegExprType) { }
576 /* Construct with a reference to a var def. */
577 Factor( const InputLoc &loc, VarDef *varDef ) :
578 loc(loc), varDef(varDef), type(ReferenceType) {}
580 /* Construct with a parenthesized join. */
581 Factor( Join *join ) :
582 join(join), type(ParenType) {}
584 /* Construct with a longest match operator. */
585 Factor( LongestMatch *longestMatch ) :
586 longestMatch(longestMatch), type(LongestMatchType) {}
591 /* Tree traversal. */
592 FsmAp *walk( ParseData *pd );
593 void makeNameTree( ParseData *pd );
594 void resolveNameRefs( ParseData *pd );
603 LongestMatch *longestMatch;
608 /* A range machine. Only ever composed of two literals. */
611 Range( Literal *lowerLit, Literal *upperLit )
612 : lowerLit(lowerLit), upperLit(upperLit) { }
615 FsmAp *walk( ParseData *pd );
621 /* Some literal machine. Can be a number or literal string. */
624 enum LiteralType { Number, LitString };
626 Literal( const Token &token, LiteralType type )
627 : token(token), type(type) { }
629 FsmAp *walk( ParseData *pd );
635 /* Regular expression. */
638 enum RegExpType { RecurseItem, Empty };
642 type(Empty), caseInsensitive(false) { }
643 RegExpr(RegExpr *regExpr, ReItem *item) :
644 regExpr(regExpr), item(item),
645 type(RecurseItem), caseInsensitive(false) { }
648 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
653 bool caseInsensitive;
656 /* An item in a regular expression. */
659 enum ReItemType { Data, Dot, OrBlock, NegOrBlock };
661 ReItem( const InputLoc &loc, const Token &token )
662 : loc(loc), token(token), star(false), type(Data) { }
663 ReItem( const InputLoc &loc, ReItemType type )
664 : loc(loc), star(false), type(type) { }
665 ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type )
666 : loc(loc), orBlock(orBlock), star(false), type(type) { }
669 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
678 /* An or block item. */
681 enum ReOrBlockType { RecurseItem, Empty };
686 ReOrBlock(ReOrBlock *orBlock, ReOrItem *item)
687 : orBlock(orBlock), item(item), type(RecurseItem) { }
690 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
697 /* An item in an or block. */
700 enum ReOrItemType { Data, Range };
702 ReOrItem( const InputLoc &loc, const Token &token )
703 : loc(loc), token(token), type(Data) {}
704 ReOrItem( const InputLoc &loc, char lower, char upper )
705 : loc(loc), lower(lower), upper(upper), type(Range) { }
707 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
725 Text, Goto, Call, Next, GotoExpr, CallExpr, NextExpr, Ret, PChar,
726 Char, Hold, Curs, Targs, Entry, Exec, LmSwitch, LmSetActId,
727 LmSetTokEnd, LmOnLast, LmOnNext, LmOnLagBehind, LmInitAct,
728 LmInitTokStart, LmSetTokStart, Break
731 InlineItem( const InputLoc &loc, char *data, Type type ) :
732 loc(loc), data(data), nameRef(0), children(0), type(type) { }
734 InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) :
735 loc(loc), data(0), nameRef(nameRef), children(0), type(type) { }
737 InlineItem( const InputLoc &loc, LongestMatch *longestMatch,
738 LongestMatchPart *longestMatchPart, Type type ) : loc(loc), data(0),
739 nameRef(0), children(0), longestMatch(longestMatch),
740 longestMatchPart(longestMatchPart), type(type) { }
742 InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) :
743 loc(loc), data(0), nameRef(0), nameTarg(nameTarg), children(0),
746 InlineItem( const InputLoc &loc, Type type ) :
747 loc(loc), data(0), nameRef(0), children(0), type(type) { }
753 InlineList *children;
754 LongestMatch *longestMatch;
755 LongestMatchPart *longestMatchPart;
758 InlineItem *prev, *next;
761 /* Normally this would be atypedef, but that would entail including DList from
762 * ptreetypes, which should be just typedef forwards. */
763 struct InlineList : public DList<InlineItem> { };
767 #endif /* _PARSETREE_H */