2 * Copyright 2001-2006 Adrian Thurston <thurston@complang.org>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
33 /* Types of builtin machines. */
71 struct LongestMatchPart;
76 /* Type of augmentation. Describes locations in the machine. */
79 /* Transition actions/priorities. */
85 /* Global error actions. */
89 at_not_start_gbl_error,
90 at_not_final_gbl_error,
93 /* Local error actions. */
97 at_not_start_local_error,
98 at_not_final_local_error,
99 at_middle_local_error,
101 /* To State Action embedding. */
105 at_not_start_to_state,
106 at_not_final_to_state,
109 /* From State Action embedding. */
113 at_not_start_from_state,
114 at_not_final_from_state,
115 at_middle_from_state,
117 /* EOF Action embedding. */
126 /* IMPORTANT: These must follow the same order as the state augs in AugType
127 * since we will be using this to compose AugType. */
144 struct ExplicitMachine;
148 /* Reference to a named state. */
149 typedef Vector<char*> NameRef;
150 typedef Vector<NameRef*> NameRefList;
151 typedef Vector<NameInst*> NameTargList;
153 /* Structure for storing location of epsilon transitons. */
156 EpsilonLink( const InputLoc &loc, NameRef &target )
157 : loc(loc), target(target) { }
165 Label( const InputLoc &loc, char *data )
166 : loc(loc), data(data) { }
172 /* Structrue represents an action assigned to some FactorWithAug node. The
173 * factor with aug will keep an array of these. */
176 ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action )
177 : loc(loc), type(type), localErrKey(localErrKey), action(action) { }
187 ConditionTest( const InputLoc &loc, AugType type, Action *action, bool sense ) :
188 loc(loc), type(type), action(action), sense(sense) { }
202 void append( const Token &other );
203 void set( const char *str, int len );
206 char *prepareLitString( const InputLoc &loc, const char *src, long length,
207 long &resLen, bool &caseInsensitive );
209 /* Store the value and type of a priority augmentation. */
212 PriorityAug( AugType type, int priorKey, int priorValue ) :
213 type(type), priorKey(priorKey), priorValue(priorValue) { }
221 * A Variable Definition
225 VarDef( const char *name, MachineDef *machineDef )
226 : name(name), machineDef(machineDef), isExport(false) { }
228 /* Parse tree traversal. */
229 FsmAp *walk( ParseData *pd );
230 void makeNameTree( const InputLoc &loc, ParseData *pd );
231 void resolveNameRefs( ParseData *pd );
234 MachineDef *machineDef;
242 * Wherever possible the item match will execute on the character. If not
243 * possible the item match will execute on a lookahead character and either
244 * hold the current char (if one away) or backup.
246 * How to handle the problem of backing up over a buffer break?
248 * Don't want to use pending out transitions for embedding item match because
249 * the role of item match action is different: it may sometimes match on the
250 * final transition, or may match on a lookahead character.
252 * Don't want to invent a new operator just for this. So just trail action
253 * after machine, this means we can only use literal actions.
255 * The item action may
257 * What states of the machine will be final. The item actions that wrap around
258 * on the last character will go straight to the start state.
260 * Some transitions will be lookahead transitions, they will hold the current
261 * character. Crossing them with regular transitions must be restricted
262 * because it does not make sense. The transition cannot simultaneously hold
263 * and consume the current character.
265 struct LongestMatchPart
267 LongestMatchPart( Join *join, Action *action,
268 InputLoc &semiLoc, int longestMatchId )
270 join(join), action(action), semiLoc(semiLoc),
271 longestMatchId(longestMatchId), inLmSelect(false) { }
282 Action *actLagBehind;
285 LongestMatch *longestMatch;
287 LongestMatchPart *prev, *next;
290 /* Declare a new type so that ptreetypes.h need not include dlist.h. */
291 struct LmPartList : DList<LongestMatchPart> {};
295 /* Construct with a list of joins */
296 LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) :
297 loc(loc), longestMatchList(longestMatchList), name(0),
298 lmSwitchHandlesError(false) { }
300 /* Tree traversal. */
301 FsmAp *walk( ParseData *pd );
302 void makeNameTree( ParseData *pd );
303 void resolveNameRefs( ParseData *pd );
304 void transferScannerLeavingActions( FsmAp *graph );
305 void runLongestMatch( ParseData *pd, FsmAp *graph );
306 Action *newAction( ParseData *pd, const InputLoc &loc, const char *name,
307 InlineList *inlineList );
308 void makeActions( ParseData *pd );
309 void findName( ParseData *pd );
310 void restart( FsmAp *graph, TransAp *trans );
313 LmPartList *longestMatchList;
317 bool lmSwitchHandlesError;
319 LongestMatch *next, *prev;
323 /* List of Expressions. */
324 typedef DList<Expression> ExprList;
334 MachineDef( Join *join )
335 : join(join), longestMatch(0), lengthDef(0), type(JoinType) {}
336 MachineDef( LongestMatch *longestMatch )
337 : join(0), longestMatch(longestMatch), lengthDef(0), type(LongestMatchType) {}
338 MachineDef( LengthDef *lengthDef )
339 : join(0), longestMatch(0), lengthDef(lengthDef), type(LengthDefType) {}
341 FsmAp *walk( ParseData *pd );
342 void makeNameTree( ParseData *pd );
343 void resolveNameRefs( ParseData *pd );
346 LongestMatch *longestMatch;
347 LengthDef *lengthDef;
356 /* Construct with the first expression. */
357 Join( Expression *expr );
358 Join( const InputLoc &loc, Expression *expr );
360 /* Tree traversal. */
361 FsmAp *walk( ParseData *pd );
362 FsmAp *walkJoin( ParseData *pd );
363 void makeNameTree( ParseData *pd );
364 void resolveNameRefs( ParseData *pd );
385 /* Construct with an expression on the left and a term on the right. */
386 Expression( Expression *expression, Term *term, Type type ) :
387 expression(expression), term(term),
388 builtin(builtin), type(type), prev(this), next(this) { }
390 /* Construct with only a term. */
391 Expression( Term *term ) :
392 expression(0), term(term), builtin(builtin),
393 type(TermType) , prev(this), next(this) { }
395 /* Construct with a builtin type. */
396 Expression( BuiltinMachine builtin ) :
397 expression(0), term(0), builtin(builtin),
398 type(BuiltinType), prev(this), next(this) { }
402 /* Tree traversal. */
403 FsmAp *walk( ParseData *pd, bool lastInSeq = true );
404 void makeNameTree( ParseData *pd );
405 void resolveNameRefs( ParseData *pd );
408 Expression *expression;
410 BuiltinMachine builtin;
413 Expression *prev, *next;
429 Term( Term *term, FactorWithAug *factorWithAug ) :
430 term(term), factorWithAug(factorWithAug), type(ConcatType) { }
432 Term( Term *term, FactorWithAug *factorWithAug, Type type ) :
433 term(term), factorWithAug(factorWithAug), type(type) { }
435 Term( FactorWithAug *factorWithAug ) :
436 term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { }
440 FsmAp *walk( ParseData *pd, bool lastInSeq = true );
441 void makeNameTree( ParseData *pd );
442 void resolveNameRefs( ParseData *pd );
445 FactorWithAug *factorWithAug;
448 /* Priority descriptor for RightFinish type. */
449 PriorDesc priorDescs[2];
453 /* Third level of precedence. Augmenting nodes with actions and priorities. */
456 FactorWithAug( FactorWithRep *factorWithRep ) :
457 priorDescs(0), factorWithRep(factorWithRep) { }
460 /* Tree traversal. */
461 FsmAp *walk( ParseData *pd );
462 void makeNameTree( ParseData *pd );
463 void resolveNameRefs( ParseData *pd );
465 void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd );
466 void assignPriorities( FsmAp *graph, int *priorOrd );
468 void assignConditions( FsmAp *graph );
470 /* Actions and priorities assigned to the factor node. */
471 Vector<ParserAction> actions;
472 Vector<PriorityAug> priorityAugs;
473 PriorDesc *priorDescs;
474 Vector<Label> labels;
475 Vector<EpsilonLink> epsilonLinks;
476 Vector<ConditionTest> conditions;
478 FactorWithRep *factorWithRep;
481 /* Fourth level of precedence. Trailing unary operators. Provide kleen star,
482 * optional and plus. */
497 FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep,
498 int lowerRep, int upperRep, Type type ) :
499 loc(loc), factorWithRep(factorWithRep),
500 factorWithNeg(0), lowerRep(lowerRep),
501 upperRep(upperRep), type(type) { }
503 FactorWithRep( FactorWithNeg *factorWithNeg )
504 : factorWithNeg(factorWithNeg), type(FactorWithNegType) { }
508 /* Tree traversal. */
509 FsmAp *walk( ParseData *pd );
510 void makeNameTree( ParseData *pd );
511 void resolveNameRefs( ParseData *pd );
514 FactorWithRep *factorWithRep;
515 FactorWithNeg *factorWithNeg;
516 int lowerRep, upperRep;
519 /* Priority descriptor for StarStar type. */
520 PriorDesc priorDescs[2];
523 /* Fifth level of precedence. Provides Negation. */
532 FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) :
533 loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { }
535 FactorWithNeg( Factor *factor ) :
536 factorWithNeg(0), factor(factor), type(FactorType) { }
540 /* Tree traversal. */
541 FsmAp *walk( ParseData *pd );
542 void makeNameTree( ParseData *pd );
543 void resolveNameRefs( ParseData *pd );
546 FactorWithNeg *factorWithNeg;
556 /* Language elements a factor node can be. */
567 /* Construct with a literal fsm. */
568 Factor( Literal *literal ) :
569 literal(literal), type(LiteralType) { }
571 /* Construct with a range. */
572 Factor( Range *range ) :
573 range(range), type(RangeType) { }
575 /* Construct with the or part of a regular expression. */
576 Factor( ReItem *reItem ) :
577 reItem(reItem), type(OrExprType) { }
579 /* Construct with a regular expression. */
580 Factor( RegExpr *regExpr ) :
581 regExpr(regExpr), type(RegExprType) { }
583 /* Construct with a reference to a var def. */
584 Factor( const InputLoc &loc, VarDef *varDef ) :
585 loc(loc), varDef(varDef), type(ReferenceType) {}
587 /* Construct with a parenthesized join. */
588 Factor( Join *join ) :
589 join(join), type(ParenType) {}
591 /* Construct with a longest match operator. */
592 Factor( LongestMatch *longestMatch ) :
593 longestMatch(longestMatch), type(LongestMatchType) {}
598 /* Tree traversal. */
599 FsmAp *walk( ParseData *pd );
600 void makeNameTree( ParseData *pd );
601 void resolveNameRefs( ParseData *pd );
610 LongestMatch *longestMatch;
615 /* A range machine. Only ever composed of two literals. */
618 Range( Literal *lowerLit, Literal *upperLit )
619 : lowerLit(lowerLit), upperLit(upperLit) { }
622 FsmAp *walk( ParseData *pd );
628 /* Some literal machine. Can be a number or literal string. */
631 enum LiteralType { Number, LitString };
633 Literal( const Token &token, LiteralType type )
634 : token(token), type(type) { }
636 FsmAp *walk( ParseData *pd );
642 /* Regular expression. */
645 enum RegExpType { RecurseItem, Empty };
649 type(Empty), caseInsensitive(false) { }
650 RegExpr(RegExpr *regExpr, ReItem *item) :
651 regExpr(regExpr), item(item),
652 type(RecurseItem), caseInsensitive(false) { }
655 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
660 bool caseInsensitive;
663 /* An item in a regular expression. */
666 enum ReItemType { Data, Dot, OrBlock, NegOrBlock };
668 ReItem( const InputLoc &loc, const Token &token )
669 : loc(loc), token(token), star(false), type(Data) { }
670 ReItem( const InputLoc &loc, ReItemType type )
671 : loc(loc), star(false), type(type) { }
672 ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type )
673 : loc(loc), orBlock(orBlock), star(false), type(type) { }
676 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
685 /* An or block item. */
688 enum ReOrBlockType { RecurseItem, Empty };
693 ReOrBlock(ReOrBlock *orBlock, ReOrItem *item)
694 : orBlock(orBlock), item(item), type(RecurseItem) { }
697 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
704 /* An item in an or block. */
707 enum ReOrItemType { Data, Range };
709 ReOrItem( const InputLoc &loc, const Token &token )
710 : loc(loc), token(token), type(Data) {}
711 ReOrItem( const InputLoc &loc, char lower, char upper )
712 : loc(loc), lower(lower), upper(upper), type(Range) { }
714 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
732 Text, Goto, Call, Next, GotoExpr, CallExpr, NextExpr, Ret, PChar,
733 Char, Hold, Curs, Targs, Entry, Exec, LmSwitch, LmSetActId,
734 LmSetTokEnd, LmOnLast, LmOnNext, LmOnLagBehind, LmInitAct,
735 LmInitTokStart, LmSetTokStart, Break
738 InlineItem( const InputLoc &loc, char *data, Type type ) :
739 loc(loc), data(data), nameRef(0), children(0), type(type) { }
741 InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) :
742 loc(loc), data(0), nameRef(nameRef), children(0), type(type) { }
744 InlineItem( const InputLoc &loc, LongestMatch *longestMatch,
745 LongestMatchPart *longestMatchPart, Type type ) : loc(loc), data(0),
746 nameRef(0), children(0), longestMatch(longestMatch),
747 longestMatchPart(longestMatchPart), type(type) { }
749 InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) :
750 loc(loc), data(0), nameRef(0), nameTarg(nameTarg), children(0),
753 InlineItem( const InputLoc &loc, Type type ) :
754 loc(loc), data(0), nameRef(0), children(0), type(type) { }
760 InlineList *children;
761 LongestMatch *longestMatch;
762 LongestMatchPart *longestMatchPart;
765 InlineItem *prev, *next;
768 /* Normally this would be atypedef, but that would entail including DList from
769 * ptreetypes, which should be just typedef forwards. */
770 struct InlineList : public DList<InlineItem> { };