2 * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
32 /* Types of builtin machines. */
53 /* Location in an input file. */
77 struct LongestMatchPart;
81 /* Type of augmentation. Describes locations in the machine. */
84 /* Transition actions/priorities. */
90 /* Global error actions. */
94 at_not_start_gbl_error,
95 at_not_final_gbl_error,
98 /* Local error actions. */
101 at_final_local_error,
102 at_not_start_local_error,
103 at_not_final_local_error,
104 at_middle_local_error,
106 /* To State Action embedding. */
110 at_not_start_to_state,
111 at_not_final_to_state,
114 /* From State Action embedding. */
118 at_not_start_from_state,
119 at_not_final_from_state,
120 at_middle_from_state,
122 /* EOF Action embedding. */
131 /* IMPORTANT: These must follow the same order as the state augs in AugType
132 * since we will be using this to compose AugType. */
149 struct ExplicitMachine;
153 /* Reference to a named state. */
154 typedef Vector<char*> NameRef;
155 typedef Vector<NameRef*> NameRefList;
156 typedef Vector<NameInst*> NameTargList;
158 /* Structure for storing location of epsilon transitons. */
161 EpsilonLink( const InputLoc &loc, NameRef &target )
162 : loc(loc), target(target) { }
170 Label( const InputLoc &loc, char *data )
171 : loc(loc), data(data) { }
177 /* Structrue represents an action assigned to some FactorWithAug node. The
178 * factor with aug will keep an array of these. */
181 ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action )
182 : loc(loc), type(type), localErrKey(localErrKey), action(action) { }
196 void prepareLitString( Token &result, bool &caseInsensitive );
197 void append( const Token &other );
198 void set( char *str, int len );
201 /* Store the value and type of a priority augmentation. */
204 PriorityAug( AugType type, int priorKey, int priorValue ) :
205 type(type), priorKey(priorKey), priorValue(priorValue) { }
213 * A Variable Definition
217 VarDef( char *name, JoinOrLm *joinOrLm )
218 : name(name), joinOrLm(joinOrLm) { }
220 /* Parse tree traversal. */
221 FsmAp *walk( ParseData *pd );
222 void makeNameTree( const InputLoc &loc, ParseData *pd );
223 void resolveNameRefs( ParseData *pd );
233 * Wherever possible the item match will execute on the character. If not
234 * possible the item match will execute on a lookahead character and either
235 * hold the current char (if one away) or backup.
237 * How to handle the problem of backing up over a buffer break?
239 * Don't want to use pending out transitions for embedding item match because
240 * the role of item match action is different: it may sometimes match on the
241 * final transition, or may match on a lookahead character.
243 * Don't want to invent a new operator just for this. So just trail action
244 * after machine, this means we can only use literal actions.
246 * The item action may
248 * What states of the machine will be final. The item actions that wrap around
249 * on the last character will go straight to the start state.
251 * Some transitions will be lookahead transitions, they will hold the current
252 * character. Crossing them with regular transitions must be restricted
253 * because it does not make sense. The transition cannot simultaneously hold
254 * and consume the current character.
256 struct LongestMatchPart
258 LongestMatchPart( Join *join, Action *action,
259 InputLoc &semiLoc, int longestMatchId )
261 join(join), action(action), semiLoc(semiLoc),
262 longestMatchId(longestMatchId), inLmSelect(false) { }
273 Action *actLagBehind;
276 LongestMatch *longestMatch;
278 LongestMatchPart *prev, *next;
281 /* Declare a new type so that ptreetypes.h need not include dlist.h. */
282 struct LmPartList : DList<LongestMatchPart> {};
286 /* Construct with a list of joins */
287 LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) :
288 loc(loc), longestMatchList(longestMatchList), name(0),
289 lmSwitchHandlesError(false) { }
291 /* Tree traversal. */
292 FsmAp *walk( ParseData *pd );
293 void makeNameTree( ParseData *pd );
294 void resolveNameRefs( ParseData *pd );
295 void runLonestMatch( ParseData *pd, FsmAp *graph );
296 Action *newAction( ParseData *pd, const InputLoc &loc, char *name,
297 InlineList *inlineList );
298 void makeActions( ParseData *pd );
299 void findName( ParseData *pd );
300 void restart( FsmAp *graph, TransAp *trans );
303 LmPartList *longestMatchList;
307 bool lmSwitchHandlesError;
309 LongestMatch *next, *prev;
313 /* List of Expressions. */
314 typedef DList<Expression> ExprList;
323 JoinOrLm( Join *join ) :
324 join(join), type(JoinType) {}
325 JoinOrLm( LongestMatch *longestMatch ) :
326 longestMatch(longestMatch), type(LongestMatchType) {}
328 FsmAp *walk( ParseData *pd );
329 void makeNameTree( ParseData *pd );
330 void resolveNameRefs( ParseData *pd );
333 LongestMatch *longestMatch;
342 /* Construct with the first expression. */
343 Join( Expression *expr );
344 Join( const InputLoc &loc, Expression *expr );
346 /* Tree traversal. */
347 FsmAp *walk( ParseData *pd );
348 FsmAp *walkJoin( ParseData *pd );
349 void makeNameTree( ParseData *pd );
350 void resolveNameRefs( ParseData *pd );
371 /* Construct with an expression on the left and a term on the right. */
372 Expression( Expression *expression, Term *term, Type type ) :
373 expression(expression), term(term),
374 builtin(builtin), type(type), prev(this), next(this) { }
376 /* Construct with only a term. */
377 Expression( Term *term ) :
378 expression(0), term(term), builtin(builtin),
379 type(TermType) , prev(this), next(this) { }
381 /* Construct with a builtin type. */
382 Expression( BuiltinMachine builtin ) :
383 expression(0), term(0), builtin(builtin),
384 type(BuiltinType), prev(this), next(this) { }
388 /* Tree traversal. */
389 FsmAp *walk( ParseData *pd, bool lastInSeq = true );
390 void makeNameTree( ParseData *pd );
391 void resolveNameRefs( ParseData *pd );
394 Expression *expression;
396 BuiltinMachine builtin;
399 Expression *prev, *next;
415 Term( Term *term, FactorWithAug *factorWithAug ) :
416 term(term), factorWithAug(factorWithAug), type(ConcatType) { }
418 Term( Term *term, FactorWithAug *factorWithAug, Type type ) :
419 term(term), factorWithAug(factorWithAug), type(type) { }
421 Term( FactorWithAug *factorWithAug ) :
422 term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { }
426 FsmAp *walk( ParseData *pd, bool lastInSeq = true );
427 void makeNameTree( ParseData *pd );
428 void resolveNameRefs( ParseData *pd );
431 FactorWithAug *factorWithAug;
434 /* Priority descriptor for RightFinish type. */
435 PriorDesc priorDescs[2];
439 /* Third level of precedence. Augmenting nodes with actions and priorities. */
442 FactorWithAug( FactorWithRep *factorWithRep ) :
443 priorDescs(0), factorWithRep(factorWithRep) { }
446 /* Tree traversal. */
447 FsmAp *walk( ParseData *pd );
448 void makeNameTree( ParseData *pd );
449 void resolveNameRefs( ParseData *pd );
451 void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd );
452 void assignPriorities( FsmAp *graph, int *priorOrd );
454 void assignConditions( FsmAp *graph );
456 /* Actions and priorities assigned to the factor node. */
457 Vector<ParserAction> actions;
458 Vector<PriorityAug> priorityAugs;
459 PriorDesc *priorDescs;
460 Vector<Label> labels;
461 Vector<EpsilonLink> epsilonLinks;
462 Vector<ParserAction> conditions;
464 FactorWithRep *factorWithRep;
467 /* Fourth level of precedence. Trailing unary operators. Provide kleen star,
468 * optional and plus. */
483 FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep,
484 int lowerRep, int upperRep, Type type ) :
485 loc(loc), factorWithRep(factorWithRep),
486 factorWithNeg(0), lowerRep(lowerRep),
487 upperRep(upperRep), type(type) { }
489 FactorWithRep( const InputLoc &loc, FactorWithNeg *factorWithNeg )
490 : loc(loc), factorWithNeg(factorWithNeg), type(FactorWithNegType) { }
494 /* Tree traversal. */
495 FsmAp *walk( ParseData *pd );
496 void makeNameTree( ParseData *pd );
497 void resolveNameRefs( ParseData *pd );
500 FactorWithRep *factorWithRep;
501 FactorWithNeg *factorWithNeg;
502 int lowerRep, upperRep;
505 /* Priority descriptor for StarStar type. */
506 PriorDesc priorDescs[2];
509 /* Fifth level of precedence. Provides Negation. */
518 FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) :
519 loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { }
521 FactorWithNeg( const InputLoc &loc, Factor *factor ) :
522 loc(loc), factorWithNeg(0), factor(factor), type(FactorType) { }
526 /* Tree traversal. */
527 FsmAp *walk( ParseData *pd );
528 void makeNameTree( ParseData *pd );
529 void resolveNameRefs( ParseData *pd );
532 FactorWithNeg *factorWithNeg;
542 /* Language elements a factor node can be. */
553 /* Construct with a literal fsm. */
554 Factor( Literal *literal ) :
555 literal(literal), type(LiteralType) { }
557 /* Construct with a range. */
558 Factor( Range *range ) :
559 range(range), type(RangeType) { }
561 /* Construct with the or part of a regular expression. */
562 Factor( ReItem *reItem ) :
563 reItem(reItem), type(OrExprType) { }
565 /* Construct with a regular expression. */
566 Factor( RegExpr *regExp ) :
567 regExp(regExp), type(RegExprType) { }
569 /* Construct with a reference to a var def. */
570 Factor( const InputLoc &loc, VarDef *varDef ) :
571 loc(loc), varDef(varDef), type(ReferenceType) {}
573 /* Construct with a parenthesized join. */
574 Factor( Join *join ) :
575 join(join), type(ParenType) {}
577 /* Construct with a longest match operator. */
578 Factor( LongestMatch *longestMatch ) :
579 longestMatch(longestMatch), type(LongestMatchType) {}
584 /* Tree traversal. */
585 FsmAp *walk( ParseData *pd );
586 void makeNameTree( ParseData *pd );
587 void resolveNameRefs( ParseData *pd );
596 LongestMatch *longestMatch;
601 /* A range machine. Only ever composed of two literals. */
604 Range( Literal *lowerLit, Literal *upperLit )
605 : lowerLit(lowerLit), upperLit(upperLit) { }
608 FsmAp *walk( ParseData *pd );
609 bool verifyRangeFsm( FsmAp *rangeEnd );
615 /* Some literal machine. Can be a number or literal string. */
618 enum LiteralType { Number, LitString };
620 Literal( const Token &token, LiteralType type )
621 : token(token), type(type) { }
623 FsmAp *walk( ParseData *pd );
629 /* Regular expression. */
632 enum RegExpType { RecurseItem, Empty };
636 type(Empty), caseInsensitive(false) { }
637 RegExpr(RegExpr *regExp, ReItem *item) :
638 regExp(regExp), item(item),
639 type(RecurseItem), caseInsensitive(false) { }
642 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
647 bool caseInsensitive;
650 /* An item in a regular expression. */
653 enum ReItemType { Data, Dot, OrBlock, NegOrBlock };
655 ReItem( const InputLoc &loc, const Token &token )
656 : loc(loc), token(token), star(false), type(Data) { }
657 ReItem( const InputLoc &loc, ReItemType type )
658 : loc(loc), star(false), type(type) { }
659 ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type )
660 : loc(loc), orBlock(orBlock), star(false), type(type) { }
663 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
672 /* An or block item. */
675 enum ReOrBlockType { RecurseItem, Empty };
680 ReOrBlock(ReOrBlock *orBlock, ReOrItem *item)
681 : orBlock(orBlock), item(item), type(RecurseItem) { }
684 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
691 /* An item in an or block. */
694 enum ReOrItemType { Data, Range };
696 ReOrItem( const InputLoc &loc, const Token &token )
697 : loc(loc), token(token), type(Data) {}
698 ReOrItem( const InputLoc &loc, char lower, char upper )
699 : loc(loc), lower(lower), upper(upper), type(Range) { }
701 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
719 Text, Goto, Call, Next, GotoExpr, CallExpr, NextExpr, Ret, PChar,
720 Char, Hold, Curs, Targs, Entry, Exec, LmSwitch, LmSetActId,
721 LmSetTokEnd, LmOnLast, LmOnNext, LmOnLagBehind, LmInitAct,
722 LmInitTokStart, LmSetTokStart, Break
725 InlineItem( const InputLoc &loc, char *data, Type type ) :
726 loc(loc), data(data), nameRef(0), children(0), type(type) { }
728 InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) :
729 loc(loc), data(0), nameRef(nameRef), children(0), type(type) { }
731 InlineItem( const InputLoc &loc, LongestMatch *longestMatch,
732 LongestMatchPart *longestMatchPart, Type type ) : loc(loc), data(0),
733 nameRef(0), children(0), longestMatch(longestMatch),
734 longestMatchPart(longestMatchPart), type(type) { }
736 InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) :
737 loc(loc), data(0), nameRef(0), nameTarg(nameTarg), children(0),
740 InlineItem( const InputLoc &loc, Type type ) :
741 loc(loc), data(0), nameRef(0), children(0), type(type) { }
747 InlineList *children;
748 LongestMatch *longestMatch;
749 LongestMatchPart *longestMatchPart;
752 InlineItem *prev, *next;
755 /* Normally this would be atypedef, but that would entail including DList from
756 * ptreetypes, which should be just typedef forwards. */
757 struct InlineList : public DList<InlineItem> { };
761 #endif /* _PARSETREE_H */