2 * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31 #include "parsetree.h"
34 ostream &operator<<( ostream &out, const NameRef &nameRef );
35 ostream &operator<<( ostream &out, const NameInst &nameInst );
37 /* Convert the literal string which comes in from the scanner into an array of
38 * characters with escapes and options interpreted. Also null terminates the
39 * string. Though this null termination should not be relied on for
40 * interpreting literals in the parser because the string may contain a
41 * literal string with \0 */
42 void Token::prepareLitString( Token &result, bool &caseInsensitive )
44 result.data = new char[this->length+1];
45 caseInsensitive = false;
47 char *src = this->data + 1;
48 char *end = this->data + this->length - 1;
50 while ( *end != '\'' && *end != '\"' ) {
52 caseInsensitive = true;
54 error( this->loc ) << "literal string '" << *end <<
55 "' option not supported" << endl;
60 char *dest = result.data;
62 while ( src != end ) {
65 case '0': dest[len++] = '\0'; break;
66 case 'a': dest[len++] = '\a'; break;
67 case 'b': dest[len++] = '\b'; break;
68 case 't': dest[len++] = '\t'; break;
69 case 'n': dest[len++] = '\n'; break;
70 case 'v': dest[len++] = '\v'; break;
71 case 'f': dest[len++] = '\f'; break;
72 case 'r': dest[len++] = '\r'; break;
74 default: dest[len++] = src[1]; break;
83 result.data[result.length] = 0;
87 FsmAp *VarDef::walk( ParseData *pd )
89 /* We enter into a new name scope. */
90 NameFrame nameFrame = pd->enterNameScope( true, 1 );
92 /* Recurse on the expression. */
93 FsmAp *rtnVal = joinOrLm->walk( pd );
95 /* Do the tranfer of local error actions. */
96 LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
97 if ( localErrDictEl != 0 ) {
98 for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
99 rtnVal->transferErrorActions( state, localErrDictEl->value );
102 /* If the expression below is a join operation with multiple expressions
103 * then it just had epsilon transisions resolved. If it is a join
104 * with only a single expression then run the epsilon op now. */
105 if ( joinOrLm->type == JoinOrLm::JoinType && joinOrLm->join->exprList.length() == 1 )
108 /* We can now unset entry points that are not longer used. */
109 pd->unsetObsoleteEntries( rtnVal );
111 /* If the name of the variable is referenced then add the entry point to
113 if ( pd->curNameInst->numRefs > 0 )
114 rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
116 /* Pop the name scope. */
117 pd->popNameScope( nameFrame );
121 void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd )
123 /* The variable definition enters a new scope. */
124 NameInst *prevNameInst = pd->curNameInst;
125 pd->curNameInst = pd->addNameInst( loc, name, false );
127 if ( joinOrLm->type == JoinOrLm::LongestMatchType )
128 pd->curNameInst->isLongestMatch = true;
131 joinOrLm->makeNameTree( pd );
133 /* The name scope ends, pop the name instantiation. */
134 pd->curNameInst = prevNameInst;
137 void VarDef::resolveNameRefs( ParseData *pd )
139 /* Entering into a new scope. */
140 NameFrame nameFrame = pd->enterNameScope( true, 1 );
143 joinOrLm->resolveNameRefs( pd );
145 /* The name scope ends, pop the name instantiation. */
146 pd->popNameScope( nameFrame );
149 InputLoc LongestMatchPart::getLoc()
151 return action != 0 ? action->loc : semiLoc;
155 * If there are any LMs then all of the following entry points must reset
158 * 1. fentry(StateRef)
159 * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
160 * 3. targt of any transition that has an fcall (the return loc).
161 * 4. start state of all longest match routines.
164 Action *LongestMatch::newAction( ParseData *pd, const InputLoc &loc,
165 char *name, InlineList *inlineList )
167 Action *action = new Action( loc, name, inlineList, pd->nextCondId++ );
168 action->actionRefs.append( pd->curNameInst );
169 pd->actionList.append( action );
170 action->isLmAction = true;
174 void LongestMatch::makeActions( ParseData *pd )
176 /* Make actions that set the action id. */
177 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
178 /* For each part create actions for setting the match type. We need
179 * to do this so that the actions will go into the actionIndex. */
180 InlineList *inlineList = new InlineList;
181 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, InlineItem::LmSetActId ) );
182 char *actName = new char[50];
183 sprintf( actName, "store%i", lmi->longestMatchId );
184 lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
187 /* Make actions that execute the user action and restart on the last character. */
188 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
189 /* For each part create actions for setting the match type. We need
190 * to do this so that the actions will go into the actionIndex. */
191 InlineList *inlineList = new InlineList;
192 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
193 InlineItem::LmOnLast ) );
194 char *actName = new char[50];
195 sprintf( actName, "imm%i", lmi->longestMatchId );
196 lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
199 /* Make actions that execute the user action and restart on the next
200 * character. These actions will set tokend themselves (it is the current
202 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
203 /* For each part create actions for setting the match type. We need
204 * to do this so that the actions will go into the actionIndex. */
205 InlineList *inlineList = new InlineList;
206 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
207 InlineItem::LmOnNext ) );
208 char *actName = new char[50];
209 sprintf( actName, "lagh%i", lmi->longestMatchId );
210 lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
213 /* Make actions that execute the user action and restart at tokend. These
214 * actions execute some time after matching the last char. */
215 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
216 /* For each part create actions for setting the match type. We need
217 * to do this so that the actions will go into the actionIndex. */
218 InlineList *inlineList = new InlineList;
219 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
220 InlineItem::LmOnLagBehind ) );
221 char *actName = new char[50];
222 sprintf( actName, "lag%i", lmi->longestMatchId );
223 lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList );
230 /* Create the error action. */
231 InlineList *il6 = new InlineList;
232 il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) );
233 lmActSelect = newAction( pd, loc, "lagsel", il6 );
236 void LongestMatch::findName( ParseData *pd )
238 NameInst *nameInst = pd->curNameInst;
239 while ( nameInst->name == 0 ) {
240 nameInst = nameInst->parent;
241 /* Since every machine must must have a name, we should always find a
242 * name for the longest match. */
243 assert( nameInst != 0 );
245 name = nameInst->name;
248 void LongestMatch::makeNameTree( ParseData *pd )
250 /* Create an anonymous scope for the longest match. Will be used for
251 * restarting machine after matching a token. */
252 NameInst *prevNameInst = pd->curNameInst;
253 pd->curNameInst = pd->addNameInst( loc, 0, false );
255 /* Recurse into all parts of the longest match operator. */
256 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ )
257 lmi->join->makeNameTree( pd );
259 /* Traverse the name tree upwards to find a name for this lm. */
262 /* Also make the longest match's actions at this point. */
265 /* The name scope ends, pop the name instantiation. */
266 pd->curNameInst = prevNameInst;
269 void LongestMatch::resolveNameRefs( ParseData *pd )
271 /* The longest match gets its own name scope. */
272 NameFrame nameFrame = pd->enterNameScope( true, 1 );
274 /* Take an action reference for each longest match item and recurse. */
275 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
276 /* Record the reference if the item has an action. */
277 if ( lmi->action != 0 )
278 lmi->action->actionRefs.append( pd->localNameScope );
280 /* Recurse down the join. */
281 lmi->join->resolveNameRefs( pd );
284 /* The name scope ends, pop the name instantiation. */
285 pd->popNameScope( nameFrame );
288 void LongestMatch::restart( FsmAp *graph, TransAp *trans )
290 StateAp *fromState = trans->fromState;
291 graph->detachTrans( fromState, trans->toState, trans );
292 graph->attachTrans( fromState, graph->startState, trans );
295 void LongestMatch::runLonestMatch( ParseData *pd, FsmAp *graph )
297 graph->markReachableFromHereStopFinal( graph->startState );
298 for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
299 if ( ms->stateBits & SB_ISMARKED ) {
300 ms->lmItemSet.insert( 0 );
301 ms->stateBits &= ~ SB_ISMARKED;
305 /* Transfer the first item of non-empty lmAction tables to the item sets
306 * of the states that follow. Exclude states that have no transitions out.
307 * This must happen on a separate pass so that on each iteration of the
308 * next pass we have the item set entries from all lmAction tables. */
309 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
310 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
311 if ( trans->lmActionTable.length() > 0 ) {
312 LmActionTableEl *lmAct = trans->lmActionTable.data;
313 StateAp *toState = trans->toState;
316 /* Check if there are transitions out, this may be a very
317 * close approximation? Out transitions going nowhere?
319 if ( toState->outList.length() > 0 ) {
320 /* Fill the item sets. */
321 graph->markReachableFromHereStopFinal( toState );
322 for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
323 if ( ms->stateBits & SB_ISMARKED ) {
324 ms->lmItemSet.insert( lmAct->value );
325 ms->stateBits &= ~ SB_ISMARKED;
333 /* The lmItem sets are now filled, telling us which longest match rules
334 * can succeed in which states. First determine if we need to make sure
335 * act is defaulted to zero. We need to do this if there are any states
336 * with lmItemSet.length() > 1 and NULL is included. That is, that the
337 * switch may get called when in fact nothing has been matched. */
338 int maxItemSetLength = 0;
339 graph->markReachableFromHereStopFinal( graph->startState );
340 for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
341 if ( ms->stateBits & SB_ISMARKED ) {
342 if ( ms->lmItemSet.length() > maxItemSetLength )
343 maxItemSetLength = ms->lmItemSet.length();
344 ms->stateBits &= ~ SB_ISMARKED;
348 /* The actions executed on starting to match a token. */
349 graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart );
350 graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
351 if ( maxItemSetLength > 1 ) {
352 /* The longest match action switch may be called when tokens are
353 * matched, in which case act must be initialized, there must be a
354 * case to handle the error, and the generated machine will require an
356 lmSwitchHandlesError = true;
357 pd->lmRequiresErrorState = true;
358 graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
361 /* The place to store transitions to restart. It maybe possible for the
362 * restarting to affect the searching through the graph that follows. For
363 * now take the safe route and save the list of transitions to restart
364 * until after all searching is done. */
365 Vector<TransAp*> restartTrans;
367 /* Set actions that do immediate token recognition, set the longest match part
368 * id and set the token ending. */
369 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
370 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
371 if ( trans->lmActionTable.length() > 0 ) {
372 LmActionTableEl *lmAct = trans->lmActionTable.data;
373 StateAp *toState = trans->toState;
376 /* Check if there are transitions out, this may be a very
377 * close approximation? Out transitions going nowhere?
379 if ( toState->outList.length() == 0 ) {
380 /* Can execute the immediate action for the longest match
381 * part. Redirect the action to the start state. */
382 trans->actionTable.setAction( lmAct->key,
383 lmAct->value->actOnLast );
384 restartTrans.append( trans );
387 /* Look for non final states that have a non-empty item
388 * set. If these are present then we need to record the
389 * end of the token. Also Find the highest item set
390 * length reachable from here (excluding at transtions to
392 bool nonFinalNonEmptyItemSet = false;
393 maxItemSetLength = 0;
394 graph->markReachableFromHereStopFinal( toState );
395 for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
396 if ( ms->stateBits & SB_ISMARKED ) {
397 if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
398 nonFinalNonEmptyItemSet = true;
399 if ( ms->lmItemSet.length() > maxItemSetLength )
400 maxItemSetLength = ms->lmItemSet.length();
401 ms->stateBits &= ~ SB_ISMARKED;
405 /* If there are reachable states that are not final and
406 * have non empty item sets or that have an item set
407 * length greater than one then we need to set tokend
408 * because the error action that matches the token will
410 if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
411 trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
413 /* Some states may not know which longest match item to
414 * execute, must set it. */
415 if ( maxItemSetLength > 1 ) {
416 /* There are transitions out, another match may come. */
417 trans->actionTable.setAction( lmAct->key,
418 lmAct->value->setActId );
425 /* Now that all graph searching is done it certainly safe set the
426 * restarting. It may be safe above, however this must be verified. */
427 for ( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ )
428 restart( graph, *pt );
430 int lmErrActionOrd = pd->curActionOrd++;
432 /* Embed the error for recognizing a char. */
433 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
434 if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
435 if ( st->isFinState() ) {
436 /* On error execute the onActNext action, which knows that
437 * the last character of the token was one back and restart. */
438 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
439 &st->lmItemSet[0]->actOnNext, 1 );
442 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
443 &st->lmItemSet[0]->actLagBehind, 1 );
446 else if ( st->lmItemSet.length() > 1 ) {
447 /* Need to use the select. Take note of the which items the select
448 * is needed for so only the necessary actions are included. */
449 for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
451 (*plmi)->inLmSelect = true;
453 /* On error, execute the action select and go to the start state. */
454 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
459 /* Finally, the start state should be made final. */
460 graph->setFinState( graph->startState );
463 FsmAp *LongestMatch::walk( ParseData *pd )
465 /* The longest match has it's own name scope. */
466 NameFrame nameFrame = pd->enterNameScope( true, 1 );
468 /* Make each part of the longest match. */
469 FsmAp **parts = new FsmAp*[longestMatchList->length()];
470 LmPartList::Iter lmi = *longestMatchList;
471 for ( int i = 0; lmi.lte(); lmi++, i++ ) {
472 /* Create the machine and embed the setting of the longest match id. */
473 parts[i] = lmi->join->walk( pd );
474 parts[i]->longMatchAction( pd->curActionOrd++, lmi );
477 /* Union machines one and up with machine zero. The grammar dictates that
478 * there will always be at least one part. */
479 FsmAp *rtnVal = parts[0];
480 for ( int i = 1; i < longestMatchList->length(); i++ ) {
481 rtnVal->unionOp( parts[i] );
482 afterOpMinimize( rtnVal );
485 runLonestMatch( pd, rtnVal );
487 /* Pop the name scope. */
488 pd->popNameScope( nameFrame );
494 FsmAp *JoinOrLm::walk( ParseData *pd )
499 rtnVal = join->walk( pd );
501 case LongestMatchType:
502 rtnVal = longestMatch->walk( pd );
508 void JoinOrLm::makeNameTree( ParseData *pd )
512 join->makeNameTree( pd );
514 case LongestMatchType:
515 longestMatch->makeNameTree( pd );
520 void JoinOrLm::resolveNameRefs( ParseData *pd )
524 join->resolveNameRefs( pd );
526 case LongestMatchType:
527 longestMatch->resolveNameRefs( pd );
533 /* Construct with a location and the first expression. */
534 Join::Join( const InputLoc &loc, Expression *expr )
538 exprList.append( expr );
541 /* Construct with a location and the first expression. */
542 Join::Join( Expression *expr )
546 exprList.append( expr );
549 /* Walk an expression node. */
550 FsmAp *Join::walk( ParseData *pd )
552 if ( exprList.length() > 1 )
553 return walkJoin( pd );
555 return exprList.head->walk( pd );
558 /* There is a list of expressions to join. */
559 FsmAp *Join::walkJoin( ParseData *pd )
561 /* We enter into a new name scope. */
562 NameFrame nameFrame = pd->enterNameScope( true, 1 );
564 /* Evaluate the machines. */
565 FsmAp **fsms = new FsmAp*[exprList.length()];
566 ExprList::Iter expr = exprList;
567 for ( int e = 0; e < exprList.length(); e++, expr++ )
568 fsms[e] = expr->walk( pd );
570 /* Get the start and final names. Final is
571 * guaranteed to exist, start is not. */
572 NameInst *startName = pd->curNameInst->start;
573 NameInst *finalName = pd->curNameInst->final;
576 if ( startName != 0 ) {
577 /* Take note that there was an implicit link to the start machine. */
578 pd->localNameScope->referencedNames.append( startName );
579 startId = startName->id;
582 /* A final id of -1 indicates there is no epsilon that references the
583 * final state, therefor do not create one or set an entry point to it. */
585 if ( finalName->numRefs > 0 )
586 finalId = finalName->id;
588 /* Join machines 1 and up onto machine 0. */
589 FsmAp *retFsm = fsms[0];
590 retFsm->joinOp( startId, finalId, fsms+1, exprList.length()-1 );
592 /* We can now unset entry points that are not longer used. */
593 pd->unsetObsoleteEntries( retFsm );
595 /* Pop the name scope. */
596 pd->popNameScope( nameFrame );
602 void Join::makeNameTree( ParseData *pd )
604 if ( exprList.length() > 1 ) {
605 /* Create the new anonymous scope. */
606 NameInst *prevNameInst = pd->curNameInst;
607 pd->curNameInst = pd->addNameInst( loc, 0, false );
609 /* Join scopes need an implicit "final" target. */
610 pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final",
611 pd->nextNameId++, false );
613 /* Recurse into all expressions in the list. */
614 for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
615 expr->makeNameTree( pd );
617 /* The name scope ends, pop the name instantiation. */
618 pd->curNameInst = prevNameInst;
621 /* Recurse into the single expression. */
622 exprList.head->makeNameTree( pd );
627 void Join::resolveNameRefs( ParseData *pd )
629 /* Branch on whether or not there is to be a join. */
630 if ( exprList.length() > 1 ) {
631 /* The variable definition enters a new scope. */
632 NameFrame nameFrame = pd->enterNameScope( true, 1 );
634 /* The join scope must contain a start label. */
635 NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true );
636 if ( resolved.length() > 0 ) {
637 /* Take the first. */
638 pd->curNameInst->start = resolved[0];
639 if ( resolved.length() > 1 ) {
640 /* Complain about the multiple references. */
641 error(loc) << "multiple start labels" << endl;
642 errorStateLabels( resolved );
646 /* Make sure there is a start label. */
647 if ( pd->curNameInst->start != 0 ) {
648 /* There is an implicit reference to start name. */
649 pd->curNameInst->start->numRefs += 1;
652 /* No start label. Complain and recover by adding a label to the
653 * adding one. Recover ignoring the problem. */
654 error(loc) << "no start label" << endl;
657 /* Recurse into all expressions in the list. */
658 for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
659 expr->resolveNameRefs( pd );
661 /* The name scope ends, pop the name instantiation. */
662 pd->popNameScope( nameFrame );
665 /* Recurse into the single expression. */
666 exprList.head->resolveNameRefs( pd );
670 /* Clean up after an expression node. */
671 Expression::~Expression()
674 case OrType: case IntersectType: case SubtractType:
675 case StrongSubtractType:
687 /* Evaluate a single expression node. */
688 FsmAp *Expression::walk( ParseData *pd, bool lastInSeq )
693 /* Evaluate the expression. */
694 rtnVal = expression->walk( pd, false );
695 /* Evaluate the term. */
696 FsmAp *rhs = term->walk( pd );
698 rtnVal->unionOp( rhs );
699 afterOpMinimize( rtnVal, lastInSeq );
702 case IntersectType: {
703 /* Evaluate the expression. */
704 rtnVal = expression->walk( pd );
705 /* Evaluate the term. */
706 FsmAp *rhs = term->walk( pd );
707 /* Perform intersection. */
708 rtnVal->intersectOp( rhs );
709 afterOpMinimize( rtnVal, lastInSeq );
713 /* Evaluate the expression. */
714 rtnVal = expression->walk( pd );
715 /* Evaluate the term. */
716 FsmAp *rhs = term->walk( pd );
717 /* Perform subtraction. */
718 rtnVal->subtractOp( rhs );
719 afterOpMinimize( rtnVal, lastInSeq );
722 case StrongSubtractType: {
723 /* Evaluate the expression. */
724 rtnVal = expression->walk( pd );
726 /* Evaluate the term and pad it with any* machines. */
727 FsmAp *rhs = dotStarFsm( pd );
728 FsmAp *termFsm = term->walk( pd );
729 FsmAp *trailAnyStar = dotStarFsm( pd );
730 rhs->concatOp( termFsm );
731 rhs->concatOp( trailAnyStar );
733 /* Perform subtraction. */
734 rtnVal->subtractOp( rhs );
735 afterOpMinimize( rtnVal, lastInSeq );
739 /* Return result of the term. */
740 rtnVal = term->walk( pd );
744 /* Duplicate the builtin. */
745 rtnVal = makeBuiltin( builtin, pd );
753 void Expression::makeNameTree( ParseData *pd )
759 case StrongSubtractType:
760 expression->makeNameTree( pd );
761 term->makeNameTree( pd );
764 term->makeNameTree( pd );
771 void Expression::resolveNameRefs( ParseData *pd )
777 case StrongSubtractType:
778 expression->resolveNameRefs( pd );
779 term->resolveNameRefs( pd );
782 term->resolveNameRefs( pd );
789 /* Clean up after a term node. */
795 case RightFinishType:
798 delete factorWithAug;
800 case FactorWithAugType:
801 delete factorWithAug;
806 /* Evaluate a term node. */
807 FsmAp *Term::walk( ParseData *pd, bool lastInSeq )
812 /* Evaluate the Term. */
813 rtnVal = term->walk( pd, false );
814 /* Evaluate the FactorWithRep. */
815 FsmAp *rhs = factorWithAug->walk( pd );
816 /* Perform concatenation. */
817 rtnVal->concatOp( rhs );
818 afterOpMinimize( rtnVal, lastInSeq );
821 case RightStartType: {
822 /* Evaluate the Term. */
823 rtnVal = term->walk( pd );
825 /* Evaluate the FactorWithRep. */
826 FsmAp *rhs = factorWithAug->walk( pd );
828 /* Set up the priority descriptors. The left machine gets the
829 * lower priority where as the right get the higher start priority. */
830 priorDescs[0].key = pd->nextPriorKey++;
831 priorDescs[0].priority = 0;
832 rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
834 /* The start transitions right machine get the higher priority.
835 * Use the same unique key. */
836 priorDescs[1].key = priorDescs[0].key;
837 priorDescs[1].priority = 1;
838 rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
840 /* Perform concatenation. */
841 rtnVal->concatOp( rhs );
842 afterOpMinimize( rtnVal, lastInSeq );
845 case RightFinishType: {
846 /* Evaluate the Term. */
847 rtnVal = term->walk( pd );
849 /* Evaluate the FactorWithRep. */
850 FsmAp *rhs = factorWithAug->walk( pd );
852 /* Set up the priority descriptors. The left machine gets the
853 * lower priority where as the finishing transitions to the right
854 * get the higher priority. */
855 priorDescs[0].key = pd->nextPriorKey++;
856 priorDescs[0].priority = 0;
857 rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
859 /* The finishing transitions of the right machine get the higher
860 * priority. Use the same unique key. */
861 priorDescs[1].key = priorDescs[0].key;
862 priorDescs[1].priority = 1;
863 rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
865 /* Perform concatenation. */
866 rtnVal->concatOp( rhs );
867 afterOpMinimize( rtnVal, lastInSeq );
871 /* Evaluate the Term. */
872 rtnVal = term->walk( pd );
874 /* Evaluate the FactorWithRep. */
875 FsmAp *rhs = factorWithAug->walk( pd );
877 /* Set up the priority descriptors. The left machine gets the
878 * higher priority. */
879 priorDescs[0].key = pd->nextPriorKey++;
880 priorDescs[0].priority = 1;
881 rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
883 /* The right machine gets the lower priority. Since
884 * startTransPrior might unnecessarily increase the number of
885 * states during the state machine construction process (due to
886 * isolation), we use allTransPrior instead, which has the same
888 priorDescs[1].key = priorDescs[0].key;
889 priorDescs[1].priority = 0;
890 rhs->allTransPrior( pd->curPriorOrd++, &priorDescs[1] );
892 /* Perform concatenation. */
893 rtnVal->concatOp( rhs );
894 afterOpMinimize( rtnVal, lastInSeq );
897 case FactorWithAugType: {
898 rtnVal = factorWithAug->walk( pd );
905 void Term::makeNameTree( ParseData *pd )
910 case RightFinishType:
912 term->makeNameTree( pd );
913 factorWithAug->makeNameTree( pd );
915 case FactorWithAugType:
916 factorWithAug->makeNameTree( pd );
921 void Term::resolveNameRefs( ParseData *pd )
926 case RightFinishType:
928 term->resolveNameRefs( pd );
929 factorWithAug->resolveNameRefs( pd );
931 case FactorWithAugType:
932 factorWithAug->resolveNameRefs( pd );
937 /* Clean up after a factor with augmentation node. */
938 FactorWithAug::~FactorWithAug()
940 delete factorWithRep;
942 /* Walk the vector of parser actions, deleting function names. */
944 /* Clean up priority descriptors. */
945 if ( priorDescs != 0 )
949 void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd )
951 /* Assign actions. */
952 for ( int i = 0; i < actions.length(); i++ ) {
953 switch ( actions[i].type ) {
954 /* Transition actions. */
956 graph->startFsmAction( actionOrd[i], actions[i].action );
957 afterOpMinimize( graph );
960 graph->allTransAction( actionOrd[i], actions[i].action );
963 graph->finishFsmAction( actionOrd[i], actions[i].action );
966 graph->leaveFsmAction( actionOrd[i], actions[i].action );
969 /* Global error actions. */
970 case at_start_gbl_error:
971 graph->startErrorAction( actionOrd[i], actions[i].action, 0 );
972 afterOpMinimize( graph );
974 case at_all_gbl_error:
975 graph->allErrorAction( actionOrd[i], actions[i].action, 0 );
977 case at_final_gbl_error:
978 graph->finalErrorAction( actionOrd[i], actions[i].action, 0 );
980 case at_not_start_gbl_error:
981 graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 );
983 case at_not_final_gbl_error:
984 graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 );
986 case at_middle_gbl_error:
987 graph->middleErrorAction( actionOrd[i], actions[i].action, 0 );
990 /* Local error actions. */
991 case at_start_local_error:
992 graph->startErrorAction( actionOrd[i], actions[i].action,
993 actions[i].localErrKey );
994 afterOpMinimize( graph );
996 case at_all_local_error:
997 graph->allErrorAction( actionOrd[i], actions[i].action,
998 actions[i].localErrKey );
1000 case at_final_local_error:
1001 graph->finalErrorAction( actionOrd[i], actions[i].action,
1002 actions[i].localErrKey );
1004 case at_not_start_local_error:
1005 graph->notStartErrorAction( actionOrd[i], actions[i].action,
1006 actions[i].localErrKey );
1008 case at_not_final_local_error:
1009 graph->notFinalErrorAction( actionOrd[i], actions[i].action,
1010 actions[i].localErrKey );
1012 case at_middle_local_error:
1013 graph->middleErrorAction( actionOrd[i], actions[i].action,
1014 actions[i].localErrKey );
1019 graph->startEOFAction( actionOrd[i], actions[i].action );
1020 afterOpMinimize( graph );
1023 graph->allEOFAction( actionOrd[i], actions[i].action );
1026 graph->finalEOFAction( actionOrd[i], actions[i].action );
1028 case at_not_start_eof:
1029 graph->notStartEOFAction( actionOrd[i], actions[i].action );
1031 case at_not_final_eof:
1032 graph->notFinalEOFAction( actionOrd[i], actions[i].action );
1035 graph->middleEOFAction( actionOrd[i], actions[i].action );
1038 /* To State Actions. */
1039 case at_start_to_state:
1040 graph->startToStateAction( actionOrd[i], actions[i].action );
1041 afterOpMinimize( graph );
1043 case at_all_to_state:
1044 graph->allToStateAction( actionOrd[i], actions[i].action );
1046 case at_final_to_state:
1047 graph->finalToStateAction( actionOrd[i], actions[i].action );
1049 case at_not_start_to_state:
1050 graph->notStartToStateAction( actionOrd[i], actions[i].action );
1052 case at_not_final_to_state:
1053 graph->notFinalToStateAction( actionOrd[i], actions[i].action );
1055 case at_middle_to_state:
1056 graph->middleToStateAction( actionOrd[i], actions[i].action );
1059 /* From State Actions. */
1060 case at_start_from_state:
1061 graph->startFromStateAction( actionOrd[i], actions[i].action );
1062 afterOpMinimize( graph );
1064 case at_all_from_state:
1065 graph->allFromStateAction( actionOrd[i], actions[i].action );
1067 case at_final_from_state:
1068 graph->finalFromStateAction( actionOrd[i], actions[i].action );
1070 case at_not_start_from_state:
1071 graph->notStartFromStateAction( actionOrd[i], actions[i].action );
1073 case at_not_final_from_state:
1074 graph->notFinalFromStateAction( actionOrd[i], actions[i].action );
1076 case at_middle_from_state:
1077 graph->middleFromStateAction( actionOrd[i], actions[i].action );
1080 /* Remaining cases, prevented by the parser. */
1088 void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd )
1090 /* Assign priorities. */
1091 for ( int i = 0; i < priorityAugs.length(); i++ ) {
1092 switch ( priorityAugs[i].type ) {
1094 graph->startFsmPrior( priorOrd[i], &priorDescs[i]);
1095 /* Start fsm priorities are a special case that may require
1096 * minimization afterwards. */
1097 afterOpMinimize( graph );
1100 graph->allTransPrior( priorOrd[i], &priorDescs[i] );
1103 graph->finishFsmPrior( priorOrd[i], &priorDescs[i] );
1106 graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] );
1110 /* Parser Prevents this case. */
1116 void FactorWithAug::assignConditions( FsmAp *graph )
1118 for ( int i = 0; i < conditions.length(); i++ ) {
1119 switch ( conditions[i].type ) {
1120 /* Transition actions. */
1122 graph->startFsmCondition( conditions[i].action, conditions[i].sense );
1123 afterOpMinimize( graph );
1126 graph->allTransCondition( conditions[i].action, conditions[i].sense );
1129 graph->leaveFsmCondition( conditions[i].action, conditions[i].sense );
1138 /* Evaluate a factor with augmentation node. */
1139 FsmAp *FactorWithAug::walk( ParseData *pd )
1141 /* Enter into the scopes created for the labels. */
1142 NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1144 /* Make the array of function orderings. */
1146 if ( actions.length() > 0 )
1147 actionOrd = new int[actions.length()];
1149 /* First walk the list of actions, assigning order to all starting
1151 for ( int i = 0; i < actions.length(); i++ ) {
1152 if ( actions[i].type == at_start ||
1153 actions[i].type == at_start_gbl_error ||
1154 actions[i].type == at_start_local_error ||
1155 actions[i].type == at_start_to_state ||
1156 actions[i].type == at_start_from_state ||
1157 actions[i].type == at_start_eof )
1158 actionOrd[i] = pd->curActionOrd++;
1161 /* Evaluate the factor with repetition. */
1162 FsmAp *rtnVal = factorWithRep->walk( pd );
1164 /* Compute the remaining action orderings. */
1165 for ( int i = 0; i < actions.length(); i++ ) {
1166 if ( actions[i].type != at_start &&
1167 actions[i].type != at_start_gbl_error &&
1168 actions[i].type != at_start_local_error &&
1169 actions[i].type != at_start_to_state &&
1170 actions[i].type != at_start_from_state &&
1171 actions[i].type != at_start_eof )
1172 actionOrd[i] = pd->curActionOrd++;
1175 /* Embed conditions. */
1176 assignConditions( rtnVal );
1178 /* Embed actions. */
1179 assignActions( pd, rtnVal , actionOrd );
1181 /* Make the array of priority orderings. Orderings are local to this walk
1182 * of the factor with augmentation. */
1184 if ( priorityAugs.length() > 0 )
1185 priorOrd = new int[priorityAugs.length()];
1187 /* Walk all priorities, assigning the priority ordering. */
1188 for ( int i = 0; i < priorityAugs.length(); i++ )
1189 priorOrd[i] = pd->curPriorOrd++;
1191 /* If the priority descriptors have not been made, make them now. Make
1192 * priority descriptors for each priority asignment that will be passed to
1193 * the fsm. Used to keep track of the key, value and used bit. */
1194 if ( priorDescs == 0 && priorityAugs.length() > 0 ) {
1195 priorDescs = new PriorDesc[priorityAugs.length()];
1196 for ( int i = 0; i < priorityAugs.length(); i++ ) {
1197 /* Init the prior descriptor for the priority setting. */
1198 priorDescs[i].key = priorityAugs[i].priorKey;
1199 priorDescs[i].priority = priorityAugs[i].priorValue;
1203 /* Assign priorities into the machine. */
1204 assignPriorities( rtnVal, priorOrd );
1206 /* Assign epsilon transitions. */
1207 for ( int e = 0; e < epsilonLinks.length(); e++ ) {
1208 /* Get the name, which may not exist. If it doesn't then silently
1209 * ignore it because an error has already been reported. */
1210 NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++];
1211 if ( epTarg != 0 ) {
1212 /* Make the epsilon transitions. */
1213 rtnVal->epsilonTrans( epTarg->id );
1215 /* Note that we have made a link to the name. */
1216 pd->localNameScope->referencedNames.append( epTarg );
1220 /* Set entry points for labels. */
1221 if ( labels.length() > 0 ) {
1222 /* Pop the names. */
1223 pd->resetNameScope( nameFrame );
1225 /* Make labels that are referenced into entry points. */
1226 for ( int i = 0; i < labels.length(); i++ ) {
1227 pd->enterNameScope( false, 1 );
1229 /* Will always be found. */
1230 NameInst *name = pd->curNameInst;
1232 /* If the name is referenced then set the entry point. */
1233 if ( name->numRefs > 0 )
1234 rtnVal->setEntry( name->id, rtnVal->startState );
1237 pd->popNameScope( nameFrame );
1240 if ( priorOrd != 0 )
1242 if ( actionOrd != 0 )
1247 void FactorWithAug::makeNameTree( ParseData *pd )
1249 /* Add the labels to the tree of instantiated names. Each label
1250 * makes a new scope. */
1251 NameInst *prevNameInst = pd->curNameInst;
1252 for ( int i = 0; i < labels.length(); i++ )
1253 pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true );
1255 /* Recurse, then pop the names. */
1256 factorWithRep->makeNameTree( pd );
1257 pd->curNameInst = prevNameInst;
1261 void FactorWithAug::resolveNameRefs( ParseData *pd )
1263 /* Enter into the name scope created by any labels. */
1264 NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1266 /* Note action references. */
1267 for ( int i = 0; i < actions.length(); i++ )
1268 actions[i].action->actionRefs.append( pd->localNameScope );
1270 /* Recurse first. IMPORTANT: we must do the exact same traversal as when
1271 * the tree is constructed. */
1272 factorWithRep->resolveNameRefs( pd );
1274 /* Resolve epsilon transitions. */
1275 for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) {
1277 EpsilonLink &link = epsilonLinks[ep];
1278 NameInst *resolvedName = 0;
1280 if ( link.target.length() == 1 && strcmp( link.target.data[0], "final" ) == 0 ) {
1281 /* Epsilon drawn to an implicit final state. An implicit final is
1282 * only available in join operations. */
1283 resolvedName = pd->localNameScope->final;
1286 /* Do an search for the name. */
1288 pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 );
1289 if ( resolved.length() > 0 ) {
1290 /* Take the first one. */
1291 resolvedName = resolved[0];
1292 if ( resolved.length() > 1 ) {
1293 /* Complain about the multiple references. */
1294 error(link.loc) << "state reference " << link.target <<
1295 " resolves to multiple entry points" << endl;
1296 errorStateLabels( resolved );
1301 /* This is tricky, we stuff resolved epsilon transitions into one long
1302 * vector in the parse data structure. Since the name resolution and
1303 * graph generation both do identical walks of the parse tree we
1304 * should always find the link resolutions in the right place. */
1305 pd->epsilonResolvedLinks.append( resolvedName );
1307 if ( resolvedName != 0 ) {
1308 /* Found the name, bump of the reference count on it. */
1309 resolvedName->numRefs += 1;
1312 /* Complain, no recovery action, the epsilon op will ignore any
1313 * epsilon transitions whose names did not resolve. */
1314 error(link.loc) << "could not resolve label " << link.target << endl;
1318 if ( labels.length() > 0 )
1319 pd->popNameScope( nameFrame );
1323 /* Clean up after a factor with repetition node. */
1324 FactorWithRep::~FactorWithRep()
1327 case StarType: case StarStarType: case OptionalType: case PlusType:
1328 case ExactType: case MaxType: case MinType: case RangeType:
1329 delete factorWithRep;
1331 case FactorWithNegType:
1332 delete factorWithNeg;
1337 /* Evaluate a factor with repetition node. */
1338 FsmAp *FactorWithRep::walk( ParseData *pd )
1344 /* Evaluate the FactorWithRep. */
1345 retFsm = factorWithRep->walk( pd );
1346 if ( retFsm->startState->isFinState() ) {
1347 warning(loc) << "applying kleene star to a machine that "
1348 "accepts zero length word" << endl;
1351 /* Shift over the start action orders then do the kleene star. */
1352 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1354 afterOpMinimize( retFsm );
1357 case StarStarType: {
1358 /* Evaluate the FactorWithRep. */
1359 retFsm = factorWithRep->walk( pd );
1360 if ( retFsm->startState->isFinState() ) {
1361 warning(loc) << "applying kleene star to a machine that "
1362 "accepts zero length word" << endl;
1365 /* Set up the prior descs. All gets priority one, whereas leaving gets
1366 * priority zero. Make a unique key so that these priorities don't
1367 * interfere with any priorities set by the user. */
1368 priorDescs[0].key = pd->nextPriorKey++;
1369 priorDescs[0].priority = 1;
1370 retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
1372 /* Leaveing gets priority 0. Use same unique key. */
1373 priorDescs[1].key = priorDescs[0].key;
1374 priorDescs[1].priority = 0;
1375 retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
1377 /* Shift over the start action orders then do the kleene star. */
1378 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1380 afterOpMinimize( retFsm );
1383 case OptionalType: {
1384 /* Make the null fsm. */
1385 FsmAp *nu = new FsmAp();
1388 /* Evaluate the FactorWithRep. */
1389 retFsm = factorWithRep->walk( pd );
1391 /* Perform the question operator. */
1392 retFsm->unionOp( nu );
1393 afterOpMinimize( retFsm );
1397 /* Evaluate the FactorWithRep. */
1398 retFsm = factorWithRep->walk( pd );
1399 if ( retFsm->startState->isFinState() ) {
1400 warning(loc) << "applying plus operator to a machine that "
1401 "accpets zero length word" << endl;
1404 /* Need a duplicated for the star end. */
1405 FsmAp *dup = new FsmAp( *retFsm );
1407 /* The start func orders need to be shifted before doing the star. */
1408 pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd );
1410 /* Star the duplicate. */
1412 afterOpMinimize( dup );
1414 retFsm->concatOp( dup );
1415 afterOpMinimize( retFsm );
1419 /* Get an int from the repetition amount. */
1420 if ( lowerRep == 0 ) {
1421 /* No copies. Don't need to evaluate the factorWithRep.
1422 * This Defeats the purpose so give a warning. */
1423 warning(loc) << "exactly zero repetitions results "
1424 "in the null machine" << endl;
1426 retFsm = new FsmAp();
1427 retFsm->lambdaFsm();
1430 /* Evaluate the first FactorWithRep. */
1431 retFsm = factorWithRep->walk( pd );
1432 if ( retFsm->startState->isFinState() ) {
1433 warning(loc) << "applying repetition to a machine that "
1434 "accepts zero length word" << endl;
1437 /* The start func orders need to be shifted before doing the
1439 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1441 /* Do the repetition on the machine. Already guarded against n == 0 */
1442 retFsm->repeatOp( lowerRep );
1443 afterOpMinimize( retFsm );
1448 /* Get an int from the repetition amount. */
1449 if ( upperRep == 0 ) {
1450 /* No copies. Don't need to evaluate the factorWithRep.
1451 * This Defeats the purpose so give a warning. */
1452 warning(loc) << "max zero repetitions results "
1453 "in the null machine" << endl;
1455 retFsm = new FsmAp();
1456 retFsm->lambdaFsm();
1459 /* Evaluate the first FactorWithRep. */
1460 retFsm = factorWithRep->walk( pd );
1461 if ( retFsm->startState->isFinState() ) {
1462 warning(loc) << "applying max repetition to a machine that "
1463 "accepts zero length word" << endl;
1466 /* The start func orders need to be shifted before doing the
1468 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1470 /* Do the repetition on the machine. Already guarded against n == 0 */
1471 retFsm->optionalRepeatOp( upperRep );
1472 afterOpMinimize( retFsm );
1477 /* Evaluate the repeated machine. */
1478 retFsm = factorWithRep->walk( pd );
1479 if ( retFsm->startState->isFinState() ) {
1480 warning(loc) << "applying min repetition to a machine that "
1481 "accepts zero length word" << endl;
1484 /* The start func orders need to be shifted before doing the repetition
1485 * and the kleene star. */
1486 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1488 if ( lowerRep == 0 ) {
1489 /* Acts just like a star op on the machine to return. */
1491 afterOpMinimize( retFsm );
1494 /* Take a duplicate for the plus. */
1495 FsmAp *dup = new FsmAp( *retFsm );
1497 /* Do repetition on the first half. */
1498 retFsm->repeatOp( lowerRep );
1499 afterOpMinimize( retFsm );
1501 /* Star the duplicate. */
1503 afterOpMinimize( dup );
1505 /* Tak on the kleene star. */
1506 retFsm->concatOp( dup );
1507 afterOpMinimize( retFsm );
1512 /* Check for bogus range. */
1513 if ( upperRep - lowerRep < 0 ) {
1514 error(loc) << "invalid range repetition" << endl;
1516 /* Return null machine as recovery. */
1517 retFsm = new FsmAp();
1518 retFsm->lambdaFsm();
1520 else if ( lowerRep == 0 && upperRep == 0 ) {
1521 /* No copies. Don't need to evaluate the factorWithRep. This
1522 * defeats the purpose so give a warning. */
1523 warning(loc) << "zero to zero repetitions results "
1524 "in the null machine" << endl;
1526 retFsm = new FsmAp();
1527 retFsm->lambdaFsm();
1530 /* Now need to evaluate the repeated machine. */
1531 retFsm = factorWithRep->walk( pd );
1532 if ( retFsm->startState->isFinState() ) {
1533 warning(loc) << "applying range repetition to a machine that "
1534 "accepts zero length word" << endl;
1537 /* The start func orders need to be shifted before doing both kinds
1539 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1541 if ( lowerRep == 0 ) {
1542 /* Just doing max repetition. Already guarded against n == 0. */
1543 retFsm->optionalRepeatOp( upperRep );
1544 afterOpMinimize( retFsm );
1546 else if ( lowerRep == upperRep ) {
1547 /* Just doing exact repetition. Already guarded against n == 0. */
1548 retFsm->repeatOp( lowerRep );
1549 afterOpMinimize( retFsm );
1552 /* This is the case that 0 < lowerRep < upperRep. Take a
1553 * duplicate for the optional repeat. */
1554 FsmAp *dup = new FsmAp( *retFsm );
1556 /* Do repetition on the first half. */
1557 retFsm->repeatOp( lowerRep );
1558 afterOpMinimize( retFsm );
1560 /* Do optional repetition on the second half. */
1561 dup->optionalRepeatOp( upperRep - lowerRep );
1562 afterOpMinimize( dup );
1564 /* Tak on the duplicate machine. */
1565 retFsm->concatOp( dup );
1566 afterOpMinimize( retFsm );
1571 case FactorWithNegType: {
1572 /* Evaluate the Factor. Pass it up. */
1573 retFsm = factorWithNeg->walk( pd );
1579 void FactorWithRep::makeNameTree( ParseData *pd )
1590 factorWithRep->makeNameTree( pd );
1592 case FactorWithNegType:
1593 factorWithNeg->makeNameTree( pd );
1598 void FactorWithRep::resolveNameRefs( ParseData *pd )
1609 factorWithRep->resolveNameRefs( pd );
1611 case FactorWithNegType:
1612 factorWithNeg->resolveNameRefs( pd );
1617 /* Clean up after a factor with negation node. */
1618 FactorWithNeg::~FactorWithNeg()
1622 case CharNegateType:
1623 delete factorWithNeg;
1631 /* Evaluate a factor with negation node. */
1632 FsmAp *FactorWithNeg::walk( ParseData *pd )
1638 /* Evaluate the factorWithNeg. */
1639 FsmAp *toNegate = factorWithNeg->walk( pd );
1641 /* Negation is subtract from dot-star. */
1642 retFsm = dotStarFsm( pd );
1643 retFsm->subtractOp( toNegate );
1644 afterOpMinimize( retFsm );
1647 case CharNegateType: {
1648 /* Evaluate the factorWithNeg. */
1649 FsmAp *toNegate = factorWithNeg->walk( pd );
1651 /* CharNegation is subtract from dot. */
1652 retFsm = dotFsm( pd );
1653 retFsm->subtractOp( toNegate );
1654 afterOpMinimize( retFsm );
1658 /* Evaluate the Factor. Pass it up. */
1659 retFsm = factor->walk( pd );
1665 void FactorWithNeg::makeNameTree( ParseData *pd )
1669 case CharNegateType:
1670 factorWithNeg->makeNameTree( pd );
1673 factor->makeNameTree( pd );
1678 void FactorWithNeg::resolveNameRefs( ParseData *pd )
1682 case CharNegateType:
1683 factorWithNeg->resolveNameRefs( pd );
1686 factor->resolveNameRefs( pd );
1691 /* Clean up after a factor node. */
1712 case LongestMatchType:
1713 delete longestMatch;
1718 /* Evaluate a factor node. */
1719 FsmAp *Factor::walk( ParseData *pd )
1724 rtnVal = literal->walk( pd );
1727 rtnVal = range->walk( pd );
1730 rtnVal = reItem->walk( pd, 0 );
1733 rtnVal = regExpr->walk( pd, 0 );
1736 rtnVal = varDef->walk( pd );
1739 rtnVal = join->walk( pd );
1741 case LongestMatchType:
1742 rtnVal = longestMatch->walk( pd );
1749 void Factor::makeNameTree( ParseData *pd )
1758 varDef->makeNameTree( loc, pd );
1761 join->makeNameTree( pd );
1763 case LongestMatchType:
1764 longestMatch->makeNameTree( pd );
1769 void Factor::resolveNameRefs( ParseData *pd )
1778 varDef->resolveNameRefs( pd );
1781 join->resolveNameRefs( pd );
1783 case LongestMatchType:
1784 longestMatch->resolveNameRefs( pd );
1789 /* Clean up a range object. Must delete the two literals. */
1796 /* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
1797 FsmAp *Range::walk( ParseData *pd )
1799 /* Construct and verify the suitability of the lower end of the range. */
1800 FsmAp *lowerFsm = lowerLit->walk( pd );
1801 if ( !lowerFsm->checkSingleCharMachine() ) {
1802 error(lowerLit->token.loc) <<
1803 "bad range lower end, must be a single character" << endl;
1806 /* Construct and verify the upper end. */
1807 FsmAp *upperFsm = upperLit->walk( pd );
1808 if ( !upperFsm->checkSingleCharMachine() ) {
1809 error(upperLit->token.loc) <<
1810 "bad range upper end, must be a single character" << endl;
1813 /* Grab the keys from the machines, then delete them. */
1814 Key lowKey = lowerFsm->startState->outList.head->lowKey;
1815 Key highKey = upperFsm->startState->outList.head->lowKey;
1819 /* Validate the range. */
1820 if ( lowKey > highKey ) {
1821 /* Recover by setting upper to lower; */
1822 error(lowerLit->token.loc) << "lower end of range is greater then upper end" << endl;
1826 /* Return the range now that it is validated. */
1827 FsmAp *retFsm = new FsmAp();
1828 retFsm->rangeFsm( lowKey, highKey );
1832 /* Evaluate a literal object. */
1833 FsmAp *Literal::walk( ParseData *pd )
1835 /* FsmAp to return, is the alphabet signed. */
1840 /* Make the fsm key in int format. */
1841 Key fsmKey = makeFsmKeyNum( token.data, token.loc, pd );
1842 /* Make the new machine. */
1843 rtnVal = new FsmAp();
1844 rtnVal->concatFsm( fsmKey );
1848 /* Make the array of keys in int format. */
1850 bool caseInsensitive;
1851 token.prepareLitString( interp, caseInsensitive );
1852 Key *arr = new Key[interp.length];
1853 makeFsmKeyArray( arr, interp.data, interp.length, pd );
1855 /* Make the new machine. */
1856 rtnVal = new FsmAp();
1857 if ( caseInsensitive )
1858 rtnVal->concatFsmCI( arr, interp.length );
1860 rtnVal->concatFsm( arr, interp.length );
1861 delete[] interp.data;
1868 /* Clean up after a regular expression object. */
1881 /* Evaluate a regular expression object. */
1882 FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex )
1884 /* This is the root regex, pass down a pointer to this. */
1885 if ( rootRegex == 0 )
1891 /* Walk both items. */
1892 rtnVal = regExpr->walk( pd, rootRegex );
1893 FsmAp *fsm2 = item->walk( pd, rootRegex );
1894 rtnVal->concatOp( fsm2 );
1898 rtnVal = new FsmAp();
1899 rtnVal->lambdaFsm();
1906 /* Clean up after an item in a regular expression. */
1920 /* Evaluate a regular expression object. */
1921 FsmAp *ReItem::walk( ParseData *pd, RegExpr *rootRegex )
1923 /* The fsm to return, is the alphabet signed? */
1928 /* Move the data into an integer array and make a concat fsm. */
1929 Key *arr = new Key[token.length];
1930 makeFsmKeyArray( arr, token.data, token.length, pd );
1932 /* Make the concat fsm. */
1933 rtnVal = new FsmAp();
1934 if ( rootRegex != 0 && rootRegex->caseInsensitive )
1935 rtnVal->concatFsmCI( arr, token.length );
1937 rtnVal->concatFsm( arr, token.length );
1942 /* Make the dot fsm. */
1943 rtnVal = dotFsm( pd );
1947 /* Get the or block and minmize it. */
1948 rtnVal = orBlock->walk( pd, rootRegex );
1949 if ( rtnVal == 0 ) {
1950 rtnVal = new FsmAp();
1951 rtnVal->lambdaFsm();
1953 rtnVal->minimizePartition2();
1957 /* Get the or block and minimize it. */
1958 FsmAp *fsm = orBlock->walk( pd, rootRegex );
1959 fsm->minimizePartition2();
1961 /* Make a dot fsm and subtract from it. */
1962 rtnVal = dotFsm( pd );
1963 rtnVal->subtractOp( fsm );
1964 rtnVal->minimizePartition2();
1969 /* If the item is followed by a star, then apply the star op. */
1971 if ( rtnVal->startState->isFinState() ) {
1972 warning(loc) << "applying kleene star to a machine that "
1973 "accpets zero length word" << endl;
1977 rtnVal->minimizePartition2();
1982 /* Clean up after an or block of a regular expression. */
1983 ReOrBlock::~ReOrBlock()
1996 /* Evaluate an or block of a regular expression. */
1997 FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex )
2002 /* Evaluate the two fsm. */
2003 FsmAp *fsm1 = orBlock->walk( pd, rootRegex );
2004 FsmAp *fsm2 = item->walk( pd, rootRegex );
2008 fsm1->unionOp( fsm2 );
2021 /* Evaluate an or block item of a regular expression. */
2022 FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex )
2024 /* The return value, is the alphabet signed? */
2028 /* Make the or machine. */
2029 rtnVal = new FsmAp();
2031 /* Put the or data into an array of ints. Note that we find unique
2032 * keys. Duplicates are silently ignored. The alternative would be to
2033 * issue warning or an error but since we can't with [a0-9a] or 'a' |
2034 * 'a' don't bother here. */
2036 makeFsmUniqueKeyArray( keySet, token.data, token.length,
2037 rootRegex != 0 ? rootRegex->caseInsensitive : false, pd );
2039 /* Run the or operator. */
2040 rtnVal->orFsm( keySet.data, keySet.length() );
2044 /* Make the upper and lower keys. */
2045 Key lowKey = makeFsmKeyChar( lower, pd );
2046 Key highKey = makeFsmKeyChar( upper, pd );
2048 /* Validate the range. */
2049 if ( lowKey > highKey ) {
2050 /* Recover by setting upper to lower; */
2051 error(loc) << "lower end of range is greater then upper end" << endl;
2055 /* Make the range machine. */
2056 rtnVal = new FsmAp();
2057 rtnVal->rangeFsm( lowKey, highKey );
2059 if ( rootRegex != 0 && rootRegex->caseInsensitive ) {
2060 if ( lowKey <= 'Z' && 'A' <= highKey ) {
2061 Key otherLow = lowKey < 'A' ? Key('A') : lowKey;
2062 Key otherHigh = 'Z' < highKey ? Key('Z') : highKey;
2064 otherLow = 'a' + ( otherLow - 'A' );
2065 otherHigh = 'a' + ( otherHigh - 'A' );
2067 FsmAp *otherRange = new FsmAp();
2068 otherRange->rangeFsm( otherLow, otherHigh );
2069 rtnVal->unionOp( otherRange );
2070 rtnVal->minimizePartition2();
2072 else if ( lowKey <= 'z' && 'a' <= highKey ) {
2073 Key otherLow = lowKey < 'a' ? Key('a') : lowKey;
2074 Key otherHigh = 'z' < highKey ? Key('z') : highKey;
2076 otherLow = 'A' + ( otherLow - 'a' );
2077 otherHigh = 'A' + ( otherHigh - 'a' );
2079 FsmAp *otherRange = new FsmAp();
2080 otherRange->rangeFsm( otherLow, otherHigh );
2081 rtnVal->unionOp( otherRange );
2082 rtnVal->minimizePartition2();