2 * Copyright 2001-2006 Adrian Thurston <thurston@complang.org>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31 #include "parsetree.h"
34 ostream &operator<<( ostream &out, const NameRef &nameRef );
35 ostream &operator<<( ostream &out, const NameInst &nameInst );
37 /* Convert the literal string which comes in from the scanner into an array of
38 * characters with escapes and options interpreted. Also null terminates the
39 * string. Though this null termination should not be relied on for
40 * interpreting literals in the parser because the string may contain \0 */
41 char *prepareLitString( const InputLoc &loc, const char *data, long length,
42 long &resLen, bool &caseInsensitive )
44 char *resData = new char[length+1];
45 caseInsensitive = false;
47 const char *src = data + 1;
48 const char *end = data + length - 1;
50 while ( *end != '\'' && *end != '\"' ) {
52 caseInsensitive = true;
54 error( loc ) << "literal string '" << *end <<
55 "' option not supported" << endl;
62 while ( src != end ) {
65 case '0': dest[len++] = '\0'; break;
66 case 'a': dest[len++] = '\a'; break;
67 case 'b': dest[len++] = '\b'; break;
68 case 't': dest[len++] = '\t'; break;
69 case 'n': dest[len++] = '\n'; break;
70 case 'v': dest[len++] = '\v'; break;
71 case 'f': dest[len++] = '\f'; break;
72 case 'r': dest[len++] = '\r'; break;
74 default: dest[len++] = src[1]; break;
88 FsmAp *VarDef::walk( ParseData *pd )
90 /* We enter into a new name scope. */
91 NameFrame nameFrame = pd->enterNameScope( true, 1 );
93 /* Recurse on the expression. */
94 FsmAp *rtnVal = machineDef->walk( pd );
96 /* Do the tranfer of local error actions. */
97 LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
98 if ( localErrDictEl != 0 ) {
99 for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
100 rtnVal->transferErrorActions( state, localErrDictEl->value );
103 /* If the expression below is a join operation with multiple expressions
104 * then it just had epsilon transisions resolved. If it is a join
105 * with only a single expression then run the epsilon op now. */
106 if ( machineDef->type == MachineDef::JoinType && machineDef->join->exprList.length() == 1 )
109 /* We can now unset entry points that are not longer used. */
110 pd->unsetObsoleteEntries( rtnVal );
112 /* If the name of the variable is referenced then add the entry point to
114 if ( pd->curNameInst->numRefs > 0 )
115 rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
117 /* Pop the name scope. */
118 pd->popNameScope( nameFrame );
122 void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd )
124 /* The variable definition enters a new scope. */
125 NameInst *prevNameInst = pd->curNameInst;
126 pd->curNameInst = pd->addNameInst( loc, name, false );
128 if ( machineDef->type == MachineDef::LongestMatchType )
129 pd->curNameInst->isLongestMatch = true;
132 machineDef->makeNameTree( pd );
134 /* The name scope ends, pop the name instantiation. */
135 pd->curNameInst = prevNameInst;
138 void VarDef::resolveNameRefs( ParseData *pd )
140 /* Entering into a new scope. */
141 NameFrame nameFrame = pd->enterNameScope( true, 1 );
144 machineDef->resolveNameRefs( pd );
146 /* The name scope ends, pop the name instantiation. */
147 pd->popNameScope( nameFrame );
150 InputLoc LongestMatchPart::getLoc()
152 return action != 0 ? action->loc : semiLoc;
156 * If there are any LMs then all of the following entry points must reset
159 * 1. fentry(StateRef)
160 * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
161 * 3. targt of any transition that has an fcall (the return loc).
162 * 4. start state of all longest match routines.
165 Action *LongestMatch::newAction( ParseData *pd, const InputLoc &loc,
166 const char *name, InlineList *inlineList )
168 Action *action = new Action( loc, name, inlineList, pd->nextCondId++ );
169 action->actionRefs.append( pd->curNameInst );
170 pd->actionList.append( action );
171 action->isLmAction = true;
175 void LongestMatch::makeActions( ParseData *pd )
177 /* Make actions that set the action id. */
178 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
179 /* For each part create actions for setting the match type. We need
180 * to do this so that the actions will go into the actionIndex. */
181 InlineList *inlineList = new InlineList;
182 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
183 InlineItem::LmSetActId ) );
184 char *actName = new char[50];
185 sprintf( actName, "store%i", lmi->longestMatchId );
186 lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
189 /* Make actions that execute the user action and restart on the last
191 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
192 /* For each part create actions for setting the match type. We need
193 * to do this so that the actions will go into the actionIndex. */
194 InlineList *inlineList = new InlineList;
195 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
196 InlineItem::LmOnLast ) );
197 char *actName = new char[50];
198 sprintf( actName, "last%i", lmi->longestMatchId );
199 lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
202 /* Make actions that execute the user action and restart on the next
203 * character. These actions will set tokend themselves (it is the current
205 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
206 /* For each part create actions for setting the match type. We need
207 * to do this so that the actions will go into the actionIndex. */
208 InlineList *inlineList = new InlineList;
209 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
210 InlineItem::LmOnNext ) );
211 char *actName = new char[50];
212 sprintf( actName, "next%i", lmi->longestMatchId );
213 lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
216 /* Make actions that execute the user action and restart at tokend. These
217 * actions execute some time after matching the last char. */
218 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
219 /* For each part create actions for setting the match type. We need
220 * to do this so that the actions will go into the actionIndex. */
221 InlineList *inlineList = new InlineList;
222 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
223 InlineItem::LmOnLagBehind ) );
224 char *actName = new char[50];
225 sprintf( actName, "lag%i", lmi->longestMatchId );
226 lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList );
232 loc.fileName = "NONE";
234 /* Create the error action. */
235 InlineList *il6 = new InlineList;
236 il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) );
237 lmActSelect = newAction( pd, loc, "switch", il6 );
240 void LongestMatch::findName( ParseData *pd )
242 NameInst *nameInst = pd->curNameInst;
243 while ( nameInst->name == 0 ) {
244 nameInst = nameInst->parent;
245 /* Since every machine must must have a name, we should always find a
246 * name for the longest match. */
247 assert( nameInst != 0 );
249 name = nameInst->name;
252 void LongestMatch::makeNameTree( ParseData *pd )
254 /* Create an anonymous scope for the longest match. Will be used for
255 * restarting machine after matching a token. */
256 NameInst *prevNameInst = pd->curNameInst;
257 pd->curNameInst = pd->addNameInst( loc, 0, false );
259 /* Recurse into all parts of the longest match operator. */
260 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ )
261 lmi->join->makeNameTree( pd );
263 /* Traverse the name tree upwards to find a name for this lm. */
266 /* Also make the longest match's actions at this point. */
269 /* The name scope ends, pop the name instantiation. */
270 pd->curNameInst = prevNameInst;
273 void LongestMatch::resolveNameRefs( ParseData *pd )
275 /* The longest match gets its own name scope. */
276 NameFrame nameFrame = pd->enterNameScope( true, 1 );
278 /* Take an action reference for each longest match item and recurse. */
279 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
280 /* Record the reference if the item has an action. */
281 if ( lmi->action != 0 )
282 lmi->action->actionRefs.append( pd->localNameScope );
284 /* Recurse down the join. */
285 lmi->join->resolveNameRefs( pd );
288 /* The name scope ends, pop the name instantiation. */
289 pd->popNameScope( nameFrame );
292 void LongestMatch::restart( FsmAp *graph, TransAp *trans )
294 StateAp *fromState = trans->fromState;
295 graph->detachTrans( fromState, trans->toState, trans );
296 graph->attachTrans( fromState, graph->startState, trans );
299 void LongestMatch::runLongestMatch( ParseData *pd, FsmAp *graph )
301 graph->markReachableFromHereStopFinal( graph->startState );
302 for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
303 if ( ms->stateBits & STB_ISMARKED ) {
304 ms->lmItemSet.insert( 0 );
305 ms->stateBits &= ~ STB_ISMARKED;
309 /* Transfer the first item of non-empty lmAction tables to the item sets
310 * of the states that follow. Exclude states that have no transitions out.
311 * This must happen on a separate pass so that on each iteration of the
312 * next pass we have the item set entries from all lmAction tables. */
313 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
314 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
315 if ( trans->lmActionTable.length() > 0 ) {
316 LmActionTableEl *lmAct = trans->lmActionTable.data;
317 StateAp *toState = trans->toState;
320 /* Can only optimize this if there are no transitions out.
321 * Note there can be out transitions going nowhere with
322 * actions and they too must inhibit this optimization. */
323 if ( toState->outList.length() > 0 ) {
324 /* Fill the item sets. */
325 graph->markReachableFromHereStopFinal( toState );
326 for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
327 if ( ms->stateBits & STB_ISMARKED ) {
328 ms->lmItemSet.insert( lmAct->value );
329 ms->stateBits &= ~ STB_ISMARKED;
337 /* The lmItem sets are now filled, telling us which longest match rules
338 * can succeed in which states. First determine if we need to make sure
339 * act is defaulted to zero. We need to do this if there are any states
340 * with lmItemSet.length() > 1 and NULL is included. That is, that the
341 * switch may get called when in fact nothing has been matched. */
342 int maxItemSetLength = 0;
343 graph->markReachableFromHereStopFinal( graph->startState );
344 for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
345 if ( ms->stateBits & STB_ISMARKED ) {
346 if ( ms->lmItemSet.length() > maxItemSetLength )
347 maxItemSetLength = ms->lmItemSet.length();
348 ms->stateBits &= ~ STB_ISMARKED;
352 /* The actions executed on starting to match a token. */
353 graph->isolateStartState();
354 graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart );
355 graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
356 if ( maxItemSetLength > 1 ) {
357 /* The longest match action switch may be called when tokens are
358 * matched, in which case act must be initialized, there must be a
359 * case to handle the error, and the generated machine will require an
361 lmSwitchHandlesError = true;
362 pd->lmRequiresErrorState = true;
363 graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
366 /* The place to store transitions to restart. It maybe possible for the
367 * restarting to affect the searching through the graph that follows. For
368 * now take the safe route and save the list of transitions to restart
369 * until after all searching is done. */
370 Vector<TransAp*> restartTrans;
372 /* Set actions that do immediate token recognition, set the longest match part
373 * id and set the token ending. */
374 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
375 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
376 if ( trans->lmActionTable.length() > 0 ) {
377 LmActionTableEl *lmAct = trans->lmActionTable.data;
378 StateAp *toState = trans->toState;
381 /* Can only optimize this if there are no transitions out.
382 * Note there can be out transitions going nowhere with
383 * actions and they too must inhibit this optimization. */
384 if ( toState->outList.length() == 0 ) {
385 /* Can execute the immediate action for the longest match
386 * part. Redirect the action to the start state.
388 * NOTE: When we need to inhibit on_last due to leaving
389 * actions the above test suffices. If the state has out
390 * actions then it will fail because the out action will
391 * have been transferred to an error transition, which
392 * makes the outlist non-empty. */
393 trans->actionTable.setAction( lmAct->key,
394 lmAct->value->actOnLast );
395 restartTrans.append( trans );
398 /* Look for non final states that have a non-empty item
399 * set. If these are present then we need to record the
400 * end of the token. Also Find the highest item set
401 * length reachable from here (excluding at transtions to
403 bool nonFinalNonEmptyItemSet = false;
404 maxItemSetLength = 0;
405 graph->markReachableFromHereStopFinal( toState );
406 for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
407 if ( ms->stateBits & STB_ISMARKED ) {
408 if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
409 nonFinalNonEmptyItemSet = true;
410 if ( ms->lmItemSet.length() > maxItemSetLength )
411 maxItemSetLength = ms->lmItemSet.length();
412 ms->stateBits &= ~ STB_ISMARKED;
416 /* If there are reachable states that are not final and
417 * have non empty item sets or that have an item set
418 * length greater than one then we need to set tokend
419 * because the error action that matches the token will
421 if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
422 trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
424 /* Some states may not know which longest match item to
425 * execute, must set it. */
426 if ( maxItemSetLength > 1 ) {
427 /* There are transitions out, another match may come. */
428 trans->actionTable.setAction( lmAct->key,
429 lmAct->value->setActId );
436 /* Now that all graph searching is done it certainly safe set the
437 * restarting. It may be safe above, however this must be verified. */
438 for ( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ )
439 restart( graph, *pt );
441 int lmErrActionOrd = pd->curActionOrd++;
443 /* Embed the error for recognizing a char. */
444 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
445 if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
446 if ( st->isFinState() ) {
447 /* On error execute the onActNext action, which knows that
448 * the last character of the token was one back and restart. */
449 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
450 &st->lmItemSet[0]->actOnNext, 1 );
451 st->eofActionTable.setAction( lmErrActionOrd,
452 st->lmItemSet[0]->actOnNext );
453 st->eofTarget = graph->startState;
456 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
457 &st->lmItemSet[0]->actLagBehind, 1 );
458 st->eofActionTable.setAction( lmErrActionOrd,
459 st->lmItemSet[0]->actLagBehind );
460 st->eofTarget = graph->startState;
463 else if ( st->lmItemSet.length() > 1 ) {
464 /* Need to use the select. Take note of which items the select
465 * is needed for so only the necessary actions are included. */
466 for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
468 (*plmi)->inLmSelect = true;
470 /* On error, execute the action select and go to the start state. */
471 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
473 st->eofActionTable.setAction( lmErrActionOrd, lmActSelect );
474 st->eofTarget = graph->startState;
478 /* Finally, the start state should be made final. */
479 graph->setFinState( graph->startState );
482 void LongestMatch::transferScannerLeavingActions( FsmAp *graph )
484 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
485 if ( st->outActionTable.length() > 0 )
486 graph->setErrorActions( st, st->outActionTable );
490 FsmAp *LongestMatch::walk( ParseData *pd )
492 /* The longest match has it's own name scope. */
493 NameFrame nameFrame = pd->enterNameScope( true, 1 );
495 /* Make each part of the longest match. */
496 FsmAp **parts = new FsmAp*[longestMatchList->length()];
497 LmPartList::Iter lmi = *longestMatchList;
498 for ( int i = 0; lmi.lte(); lmi++, i++ ) {
499 /* Create the machine and embed the setting of the longest match id. */
500 parts[i] = lmi->join->walk( pd );
501 parts[i]->longMatchAction( pd->curActionOrd++, lmi );
504 /* Before we union the patterns we need to deal with leaving actions. They
505 * are transfered to error transitions out of the final states (like local
506 * error actions) and to eof actions. In the scanner we need to forbid
507 * on_last for any final state that has an leaving action. */
508 for ( int i = 0; i < longestMatchList->length(); i++ )
509 transferScannerLeavingActions( parts[i] );
511 /* Union machines one and up with machine zero. The grammar dictates that
512 * there will always be at least one part. */
513 FsmAp *rtnVal = parts[0];
514 for ( int i = 1; i < longestMatchList->length(); i++ ) {
515 rtnVal->unionOp( parts[i] );
516 afterOpMinimize( rtnVal );
519 runLongestMatch( pd, rtnVal );
521 /* Pop the name scope. */
522 pd->popNameScope( nameFrame );
528 FsmAp *MachineDef::walk( ParseData *pd )
533 rtnVal = join->walk( pd );
535 case LongestMatchType:
536 rtnVal = longestMatch->walk( pd );
539 condData->lastCondKey.increment();
540 rtnVal = new FsmAp();
541 rtnVal->concatFsm( condData->lastCondKey );
547 void MachineDef::makeNameTree( ParseData *pd )
551 join->makeNameTree( pd );
553 case LongestMatchType:
554 longestMatch->makeNameTree( pd );
561 void MachineDef::resolveNameRefs( ParseData *pd )
565 join->resolveNameRefs( pd );
567 case LongestMatchType:
568 longestMatch->resolveNameRefs( pd );
576 /* Construct with a location and the first expression. */
577 Join::Join( const InputLoc &loc, Expression *expr )
581 exprList.append( expr );
584 /* Construct with a location and the first expression. */
585 Join::Join( Expression *expr )
589 exprList.append( expr );
592 /* Walk an expression node. */
593 FsmAp *Join::walk( ParseData *pd )
595 if ( exprList.length() > 1 )
596 return walkJoin( pd );
598 return exprList.head->walk( pd );
601 /* There is a list of expressions to join. */
602 FsmAp *Join::walkJoin( ParseData *pd )
604 /* We enter into a new name scope. */
605 NameFrame nameFrame = pd->enterNameScope( true, 1 );
607 /* Evaluate the machines. */
608 FsmAp **fsms = new FsmAp*[exprList.length()];
609 ExprList::Iter expr = exprList;
610 for ( int e = 0; e < exprList.length(); e++, expr++ )
611 fsms[e] = expr->walk( pd );
613 /* Get the start and final names. Final is
614 * guaranteed to exist, start is not. */
615 NameInst *startName = pd->curNameInst->start;
616 NameInst *finalName = pd->curNameInst->final;
619 if ( startName != 0 ) {
620 /* Take note that there was an implicit link to the start machine. */
621 pd->localNameScope->referencedNames.append( startName );
622 startId = startName->id;
625 /* A final id of -1 indicates there is no epsilon that references the
626 * final state, therefor do not create one or set an entry point to it. */
628 if ( finalName->numRefs > 0 )
629 finalId = finalName->id;
631 /* Join machines 1 and up onto machine 0. */
632 FsmAp *retFsm = fsms[0];
633 retFsm->joinOp( startId, finalId, fsms+1, exprList.length()-1 );
635 /* We can now unset entry points that are not longer used. */
636 pd->unsetObsoleteEntries( retFsm );
638 /* Pop the name scope. */
639 pd->popNameScope( nameFrame );
645 void Join::makeNameTree( ParseData *pd )
647 if ( exprList.length() > 1 ) {
648 /* Create the new anonymous scope. */
649 NameInst *prevNameInst = pd->curNameInst;
650 pd->curNameInst = pd->addNameInst( loc, 0, false );
652 /* Join scopes need an implicit "final" target. */
653 pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final",
654 pd->nextNameId++, false );
656 /* Recurse into all expressions in the list. */
657 for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
658 expr->makeNameTree( pd );
660 /* The name scope ends, pop the name instantiation. */
661 pd->curNameInst = prevNameInst;
664 /* Recurse into the single expression. */
665 exprList.head->makeNameTree( pd );
670 void Join::resolveNameRefs( ParseData *pd )
672 /* Branch on whether or not there is to be a join. */
673 if ( exprList.length() > 1 ) {
674 /* The variable definition enters a new scope. */
675 NameFrame nameFrame = pd->enterNameScope( true, 1 );
677 /* The join scope must contain a start label. */
678 NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true );
679 if ( resolved.length() > 0 ) {
680 /* Take the first. */
681 pd->curNameInst->start = resolved[0];
682 if ( resolved.length() > 1 ) {
683 /* Complain about the multiple references. */
684 error(loc) << "join operation has multiple start labels" << endl;
685 errorStateLabels( resolved );
689 /* Make sure there is a start label. */
690 if ( pd->curNameInst->start != 0 ) {
691 /* There is an implicit reference to start name. */
692 pd->curNameInst->start->numRefs += 1;
695 /* No start label. */
696 error(loc) << "join operation has no start label" << endl;
699 /* Recurse into all expressions in the list. */
700 for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
701 expr->resolveNameRefs( pd );
703 /* The name scope ends, pop the name instantiation. */
704 pd->popNameScope( nameFrame );
707 /* Recurse into the single expression. */
708 exprList.head->resolveNameRefs( pd );
712 /* Clean up after an expression node. */
713 Expression::~Expression()
716 case OrType: case IntersectType: case SubtractType:
717 case StrongSubtractType:
729 /* Evaluate a single expression node. */
730 FsmAp *Expression::walk( ParseData *pd, bool lastInSeq )
735 /* Evaluate the expression. */
736 rtnVal = expression->walk( pd, false );
737 /* Evaluate the term. */
738 FsmAp *rhs = term->walk( pd );
740 rtnVal->unionOp( rhs );
741 afterOpMinimize( rtnVal, lastInSeq );
744 case IntersectType: {
745 /* Evaluate the expression. */
746 rtnVal = expression->walk( pd );
747 /* Evaluate the term. */
748 FsmAp *rhs = term->walk( pd );
749 /* Perform intersection. */
750 rtnVal->intersectOp( rhs );
751 afterOpMinimize( rtnVal, lastInSeq );
755 /* Evaluate the expression. */
756 rtnVal = expression->walk( pd );
757 /* Evaluate the term. */
758 FsmAp *rhs = term->walk( pd );
759 /* Perform subtraction. */
760 rtnVal->subtractOp( rhs );
761 afterOpMinimize( rtnVal, lastInSeq );
764 case StrongSubtractType: {
765 /* Evaluate the expression. */
766 rtnVal = expression->walk( pd );
768 /* Evaluate the term and pad it with any* machines. */
769 FsmAp *rhs = dotStarFsm( pd );
770 FsmAp *termFsm = term->walk( pd );
771 FsmAp *trailAnyStar = dotStarFsm( pd );
772 rhs->concatOp( termFsm );
773 rhs->concatOp( trailAnyStar );
775 /* Perform subtraction. */
776 rtnVal->subtractOp( rhs );
777 afterOpMinimize( rtnVal, lastInSeq );
781 /* Return result of the term. */
782 rtnVal = term->walk( pd );
786 /* Duplicate the builtin. */
787 rtnVal = makeBuiltin( builtin, pd );
795 void Expression::makeNameTree( ParseData *pd )
801 case StrongSubtractType:
802 expression->makeNameTree( pd );
803 term->makeNameTree( pd );
806 term->makeNameTree( pd );
813 void Expression::resolveNameRefs( ParseData *pd )
819 case StrongSubtractType:
820 expression->resolveNameRefs( pd );
821 term->resolveNameRefs( pd );
824 term->resolveNameRefs( pd );
831 /* Clean up after a term node. */
837 case RightFinishType:
840 delete factorWithAug;
842 case FactorWithAugType:
843 delete factorWithAug;
848 /* Evaluate a term node. */
849 FsmAp *Term::walk( ParseData *pd, bool lastInSeq )
854 /* Evaluate the Term. */
855 rtnVal = term->walk( pd, false );
856 /* Evaluate the FactorWithRep. */
857 FsmAp *rhs = factorWithAug->walk( pd );
858 /* Perform concatenation. */
859 rtnVal->concatOp( rhs );
860 afterOpMinimize( rtnVal, lastInSeq );
863 case RightStartType: {
864 /* Evaluate the Term. */
865 rtnVal = term->walk( pd );
867 /* Evaluate the FactorWithRep. */
868 FsmAp *rhs = factorWithAug->walk( pd );
870 /* Set up the priority descriptors. The left machine gets the
871 * lower priority where as the right get the higher start priority. */
872 priorDescs[0].key = pd->nextPriorKey++;
873 priorDescs[0].priority = 0;
874 rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
876 /* The start transitions of the right machine gets the higher
877 * priority. Use the same unique key. */
878 priorDescs[1].key = priorDescs[0].key;
879 priorDescs[1].priority = 1;
880 rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
882 /* Perform concatenation. */
883 rtnVal->concatOp( rhs );
884 afterOpMinimize( rtnVal, lastInSeq );
887 case RightFinishType: {
888 /* Evaluate the Term. */
889 rtnVal = term->walk( pd );
891 /* Evaluate the FactorWithRep. */
892 FsmAp *rhs = factorWithAug->walk( pd );
894 /* Set up the priority descriptors. The left machine gets the
895 * lower priority where as the finishing transitions to the right
896 * get the higher priority. */
897 priorDescs[0].key = pd->nextPriorKey++;
898 priorDescs[0].priority = 0;
899 rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
901 /* The finishing transitions of the right machine get the higher
902 * priority. Use the same unique key. */
903 priorDescs[1].key = priorDescs[0].key;
904 priorDescs[1].priority = 1;
905 rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
907 /* If the right machine's start state is final we need to guard
908 * against the left machine persisting by moving through the empty
910 if ( rhs->startState->isFinState() ) {
911 rhs->startState->outPriorTable.setPrior(
912 pd->curPriorOrd++, &priorDescs[1] );
915 /* Perform concatenation. */
916 rtnVal->concatOp( rhs );
917 afterOpMinimize( rtnVal, lastInSeq );
921 /* Evaluate the Term. */
922 rtnVal = term->walk( pd );
924 /* Evaluate the FactorWithRep. */
925 FsmAp *rhs = factorWithAug->walk( pd );
927 /* Set up the priority descriptors. The left machine gets the
928 * higher priority. */
929 priorDescs[0].key = pd->nextPriorKey++;
930 priorDescs[0].priority = 1;
931 rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
933 /* The right machine gets the lower priority. We cannot use
934 * allTransPrior here in case the start state of the right machine
935 * is final. It would allow the right machine thread to run along
936 * with the left if just passing through the start state. Using
937 * startFsmPrior prevents this. */
938 priorDescs[1].key = priorDescs[0].key;
939 priorDescs[1].priority = 0;
940 rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
942 /* Perform concatenation. */
943 rtnVal->concatOp( rhs );
944 afterOpMinimize( rtnVal, lastInSeq );
947 case FactorWithAugType: {
948 rtnVal = factorWithAug->walk( pd );
955 void Term::makeNameTree( ParseData *pd )
960 case RightFinishType:
962 term->makeNameTree( pd );
963 factorWithAug->makeNameTree( pd );
965 case FactorWithAugType:
966 factorWithAug->makeNameTree( pd );
971 void Term::resolveNameRefs( ParseData *pd )
976 case RightFinishType:
978 term->resolveNameRefs( pd );
979 factorWithAug->resolveNameRefs( pd );
981 case FactorWithAugType:
982 factorWithAug->resolveNameRefs( pd );
987 /* Clean up after a factor with augmentation node. */
988 FactorWithAug::~FactorWithAug()
990 delete factorWithRep;
992 /* Walk the vector of parser actions, deleting function names. */
994 /* Clean up priority descriptors. */
995 if ( priorDescs != 0 )
999 void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd )
1001 /* Assign actions. */
1002 for ( int i = 0; i < actions.length(); i++ ) {
1003 switch ( actions[i].type ) {
1004 /* Transition actions. */
1006 graph->startFsmAction( actionOrd[i], actions[i].action );
1007 afterOpMinimize( graph );
1010 graph->allTransAction( actionOrd[i], actions[i].action );
1013 graph->finishFsmAction( actionOrd[i], actions[i].action );
1016 graph->leaveFsmAction( actionOrd[i], actions[i].action );
1019 /* Global error actions. */
1020 case at_start_gbl_error:
1021 graph->startErrorAction( actionOrd[i], actions[i].action, 0 );
1022 afterOpMinimize( graph );
1024 case at_all_gbl_error:
1025 graph->allErrorAction( actionOrd[i], actions[i].action, 0 );
1027 case at_final_gbl_error:
1028 graph->finalErrorAction( actionOrd[i], actions[i].action, 0 );
1030 case at_not_start_gbl_error:
1031 graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 );
1033 case at_not_final_gbl_error:
1034 graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 );
1036 case at_middle_gbl_error:
1037 graph->middleErrorAction( actionOrd[i], actions[i].action, 0 );
1040 /* Local error actions. */
1041 case at_start_local_error:
1042 graph->startErrorAction( actionOrd[i], actions[i].action,
1043 actions[i].localErrKey );
1044 afterOpMinimize( graph );
1046 case at_all_local_error:
1047 graph->allErrorAction( actionOrd[i], actions[i].action,
1048 actions[i].localErrKey );
1050 case at_final_local_error:
1051 graph->finalErrorAction( actionOrd[i], actions[i].action,
1052 actions[i].localErrKey );
1054 case at_not_start_local_error:
1055 graph->notStartErrorAction( actionOrd[i], actions[i].action,
1056 actions[i].localErrKey );
1058 case at_not_final_local_error:
1059 graph->notFinalErrorAction( actionOrd[i], actions[i].action,
1060 actions[i].localErrKey );
1062 case at_middle_local_error:
1063 graph->middleErrorAction( actionOrd[i], actions[i].action,
1064 actions[i].localErrKey );
1069 graph->startEOFAction( actionOrd[i], actions[i].action );
1070 afterOpMinimize( graph );
1073 graph->allEOFAction( actionOrd[i], actions[i].action );
1076 graph->finalEOFAction( actionOrd[i], actions[i].action );
1078 case at_not_start_eof:
1079 graph->notStartEOFAction( actionOrd[i], actions[i].action );
1081 case at_not_final_eof:
1082 graph->notFinalEOFAction( actionOrd[i], actions[i].action );
1085 graph->middleEOFAction( actionOrd[i], actions[i].action );
1088 /* To State Actions. */
1089 case at_start_to_state:
1090 graph->startToStateAction( actionOrd[i], actions[i].action );
1091 afterOpMinimize( graph );
1093 case at_all_to_state:
1094 graph->allToStateAction( actionOrd[i], actions[i].action );
1096 case at_final_to_state:
1097 graph->finalToStateAction( actionOrd[i], actions[i].action );
1099 case at_not_start_to_state:
1100 graph->notStartToStateAction( actionOrd[i], actions[i].action );
1102 case at_not_final_to_state:
1103 graph->notFinalToStateAction( actionOrd[i], actions[i].action );
1105 case at_middle_to_state:
1106 graph->middleToStateAction( actionOrd[i], actions[i].action );
1109 /* From State Actions. */
1110 case at_start_from_state:
1111 graph->startFromStateAction( actionOrd[i], actions[i].action );
1112 afterOpMinimize( graph );
1114 case at_all_from_state:
1115 graph->allFromStateAction( actionOrd[i], actions[i].action );
1117 case at_final_from_state:
1118 graph->finalFromStateAction( actionOrd[i], actions[i].action );
1120 case at_not_start_from_state:
1121 graph->notStartFromStateAction( actionOrd[i], actions[i].action );
1123 case at_not_final_from_state:
1124 graph->notFinalFromStateAction( actionOrd[i], actions[i].action );
1126 case at_middle_from_state:
1127 graph->middleFromStateAction( actionOrd[i], actions[i].action );
1130 /* Remaining cases, prevented by the parser. */
1138 void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd )
1140 /* Assign priorities. */
1141 for ( int i = 0; i < priorityAugs.length(); i++ ) {
1142 switch ( priorityAugs[i].type ) {
1144 graph->startFsmPrior( priorOrd[i], &priorDescs[i]);
1145 /* Start fsm priorities are a special case that may require
1146 * minimization afterwards. */
1147 afterOpMinimize( graph );
1150 graph->allTransPrior( priorOrd[i], &priorDescs[i] );
1153 graph->finishFsmPrior( priorOrd[i], &priorDescs[i] );
1156 graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] );
1160 /* Parser Prevents this case. */
1166 void FactorWithAug::assignConditions( FsmAp *graph )
1168 for ( int i = 0; i < conditions.length(); i++ ) {
1169 switch ( conditions[i].type ) {
1170 /* Transition actions. */
1172 graph->startFsmCondition( conditions[i].action, conditions[i].sense );
1173 afterOpMinimize( graph );
1176 graph->allTransCondition( conditions[i].action, conditions[i].sense );
1179 graph->leaveFsmCondition( conditions[i].action, conditions[i].sense );
1188 /* Evaluate a factor with augmentation node. */
1189 FsmAp *FactorWithAug::walk( ParseData *pd )
1191 /* Enter into the scopes created for the labels. */
1192 NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1194 /* Make the array of function orderings. */
1196 if ( actions.length() > 0 )
1197 actionOrd = new int[actions.length()];
1199 /* First walk the list of actions, assigning order to all starting
1201 for ( int i = 0; i < actions.length(); i++ ) {
1202 if ( actions[i].type == at_start ||
1203 actions[i].type == at_start_gbl_error ||
1204 actions[i].type == at_start_local_error ||
1205 actions[i].type == at_start_to_state ||
1206 actions[i].type == at_start_from_state ||
1207 actions[i].type == at_start_eof )
1208 actionOrd[i] = pd->curActionOrd++;
1211 /* Evaluate the factor with repetition. */
1212 FsmAp *rtnVal = factorWithRep->walk( pd );
1214 /* Compute the remaining action orderings. */
1215 for ( int i = 0; i < actions.length(); i++ ) {
1216 if ( actions[i].type != at_start &&
1217 actions[i].type != at_start_gbl_error &&
1218 actions[i].type != at_start_local_error &&
1219 actions[i].type != at_start_to_state &&
1220 actions[i].type != at_start_from_state &&
1221 actions[i].type != at_start_eof )
1222 actionOrd[i] = pd->curActionOrd++;
1225 /* Embed conditions. */
1226 assignConditions( rtnVal );
1228 /* Embed actions. */
1229 assignActions( pd, rtnVal , actionOrd );
1231 /* Make the array of priority orderings. Orderings are local to this walk
1232 * of the factor with augmentation. */
1234 if ( priorityAugs.length() > 0 )
1235 priorOrd = new int[priorityAugs.length()];
1237 /* Walk all priorities, assigning the priority ordering. */
1238 for ( int i = 0; i < priorityAugs.length(); i++ )
1239 priorOrd[i] = pd->curPriorOrd++;
1241 /* If the priority descriptors have not been made, make them now. Make
1242 * priority descriptors for each priority asignment that will be passed to
1243 * the fsm. Used to keep track of the key, value and used bit. */
1244 if ( priorDescs == 0 && priorityAugs.length() > 0 ) {
1245 priorDescs = new PriorDesc[priorityAugs.length()];
1246 for ( int i = 0; i < priorityAugs.length(); i++ ) {
1247 /* Init the prior descriptor for the priority setting. */
1248 priorDescs[i].key = priorityAugs[i].priorKey;
1249 priorDescs[i].priority = priorityAugs[i].priorValue;
1253 /* Assign priorities into the machine. */
1254 assignPriorities( rtnVal, priorOrd );
1256 /* Assign epsilon transitions. */
1257 for ( int e = 0; e < epsilonLinks.length(); e++ ) {
1258 /* Get the name, which may not exist. If it doesn't then silently
1259 * ignore it because an error has already been reported. */
1260 NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++];
1261 if ( epTarg != 0 ) {
1262 /* Make the epsilon transitions. */
1263 rtnVal->epsilonTrans( epTarg->id );
1265 /* Note that we have made a link to the name. */
1266 pd->localNameScope->referencedNames.append( epTarg );
1270 /* Set entry points for labels. */
1271 if ( labels.length() > 0 ) {
1272 /* Pop the names. */
1273 pd->resetNameScope( nameFrame );
1275 /* Make labels that are referenced into entry points. */
1276 for ( int i = 0; i < labels.length(); i++ ) {
1277 pd->enterNameScope( false, 1 );
1279 /* Will always be found. */
1280 NameInst *name = pd->curNameInst;
1282 /* If the name is referenced then set the entry point. */
1283 if ( name->numRefs > 0 )
1284 rtnVal->setEntry( name->id, rtnVal->startState );
1287 pd->popNameScope( nameFrame );
1290 if ( priorOrd != 0 )
1292 if ( actionOrd != 0 )
1297 void FactorWithAug::makeNameTree( ParseData *pd )
1299 /* Add the labels to the tree of instantiated names. Each label
1300 * makes a new scope. */
1301 NameInst *prevNameInst = pd->curNameInst;
1302 for ( int i = 0; i < labels.length(); i++ )
1303 pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true );
1305 /* Recurse, then pop the names. */
1306 factorWithRep->makeNameTree( pd );
1307 pd->curNameInst = prevNameInst;
1311 void FactorWithAug::resolveNameRefs( ParseData *pd )
1313 /* Enter into the name scope created by any labels. */
1314 NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1316 /* Note action references. */
1317 for ( int i = 0; i < actions.length(); i++ )
1318 actions[i].action->actionRefs.append( pd->localNameScope );
1320 /* Recurse first. IMPORTANT: we must do the exact same traversal as when
1321 * the tree is constructed. */
1322 factorWithRep->resolveNameRefs( pd );
1324 /* Resolve epsilon transitions. */
1325 for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) {
1327 EpsilonLink &link = epsilonLinks[ep];
1328 NameInst *resolvedName = 0;
1330 if ( link.target.length() == 1 && strcmp( link.target.data[0], "final" ) == 0 ) {
1331 /* Epsilon drawn to an implicit final state. An implicit final is
1332 * only available in join operations. */
1333 resolvedName = pd->localNameScope->final;
1336 /* Do an search for the name. */
1338 pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 );
1339 if ( resolved.length() > 0 ) {
1340 /* Take the first one. */
1341 resolvedName = resolved[0];
1342 if ( resolved.length() > 1 ) {
1343 /* Complain about the multiple references. */
1344 error(link.loc) << "state reference " << link.target <<
1345 " resolves to multiple entry points" << endl;
1346 errorStateLabels( resolved );
1351 /* This is tricky, we stuff resolved epsilon transitions into one long
1352 * vector in the parse data structure. Since the name resolution and
1353 * graph generation both do identical walks of the parse tree we
1354 * should always find the link resolutions in the right place. */
1355 pd->epsilonResolvedLinks.append( resolvedName );
1357 if ( resolvedName != 0 ) {
1358 /* Found the name, bump of the reference count on it. */
1359 resolvedName->numRefs += 1;
1362 /* Complain, no recovery action, the epsilon op will ignore any
1363 * epsilon transitions whose names did not resolve. */
1364 error(link.loc) << "could not resolve label " << link.target << endl;
1368 if ( labels.length() > 0 )
1369 pd->popNameScope( nameFrame );
1373 /* Clean up after a factor with repetition node. */
1374 FactorWithRep::~FactorWithRep()
1377 case StarType: case StarStarType: case OptionalType: case PlusType:
1378 case ExactType: case MaxType: case MinType: case RangeType:
1379 delete factorWithRep;
1381 case FactorWithNegType:
1382 delete factorWithNeg;
1387 /* Evaluate a factor with repetition node. */
1388 FsmAp *FactorWithRep::walk( ParseData *pd )
1394 /* Evaluate the FactorWithRep. */
1395 retFsm = factorWithRep->walk( pd );
1396 if ( retFsm->startState->isFinState() ) {
1397 warning(loc) << "applying kleene star to a machine that "
1398 "accepts zero length word" << endl;
1399 retFsm->unsetFinState( retFsm->startState );
1402 /* Shift over the start action orders then do the kleene star. */
1403 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1405 afterOpMinimize( retFsm );
1408 case StarStarType: {
1409 /* Evaluate the FactorWithRep. */
1410 retFsm = factorWithRep->walk( pd );
1411 if ( retFsm->startState->isFinState() ) {
1412 warning(loc) << "applying kleene star to a machine that "
1413 "accepts zero length word" << endl;
1416 /* Set up the prior descs. All gets priority one, whereas leaving gets
1417 * priority zero. Make a unique key so that these priorities don't
1418 * interfere with any priorities set by the user. */
1419 priorDescs[0].key = pd->nextPriorKey++;
1420 priorDescs[0].priority = 1;
1421 retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
1423 /* Leaveing gets priority 0. Use same unique key. */
1424 priorDescs[1].key = priorDescs[0].key;
1425 priorDescs[1].priority = 0;
1426 retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
1428 /* Shift over the start action orders then do the kleene star. */
1429 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1431 afterOpMinimize( retFsm );
1434 case OptionalType: {
1435 /* Make the null fsm. */
1436 FsmAp *nu = new FsmAp();
1439 /* Evaluate the FactorWithRep. */
1440 retFsm = factorWithRep->walk( pd );
1442 /* Perform the question operator. */
1443 retFsm->unionOp( nu );
1444 afterOpMinimize( retFsm );
1448 /* Evaluate the FactorWithRep. */
1449 retFsm = factorWithRep->walk( pd );
1450 if ( retFsm->startState->isFinState() ) {
1451 warning(loc) << "applying plus operator to a machine that "
1452 "accepts zero length word" << endl;
1455 /* Need a duplicated for the star end. */
1456 FsmAp *dup = new FsmAp( *retFsm );
1458 /* The start func orders need to be shifted before doing the star. */
1459 pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd );
1461 /* Star the duplicate. */
1463 afterOpMinimize( dup );
1465 retFsm->concatOp( dup );
1466 afterOpMinimize( retFsm );
1470 /* Get an int from the repetition amount. */
1471 if ( lowerRep == 0 ) {
1472 /* No copies. Don't need to evaluate the factorWithRep.
1473 * This Defeats the purpose so give a warning. */
1474 warning(loc) << "exactly zero repetitions results "
1475 "in the null machine" << endl;
1477 retFsm = new FsmAp();
1478 retFsm->lambdaFsm();
1481 /* Evaluate the first FactorWithRep. */
1482 retFsm = factorWithRep->walk( pd );
1483 if ( retFsm->startState->isFinState() ) {
1484 warning(loc) << "applying repetition to a machine that "
1485 "accepts zero length word" << endl;
1488 /* The start func orders need to be shifted before doing the
1490 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1492 /* Do the repetition on the machine. Already guarded against n == 0 */
1493 retFsm->repeatOp( lowerRep );
1494 afterOpMinimize( retFsm );
1499 /* Get an int from the repetition amount. */
1500 if ( upperRep == 0 ) {
1501 /* No copies. Don't need to evaluate the factorWithRep.
1502 * This Defeats the purpose so give a warning. */
1503 warning(loc) << "max zero repetitions results "
1504 "in the null machine" << endl;
1506 retFsm = new FsmAp();
1507 retFsm->lambdaFsm();
1510 /* Evaluate the first FactorWithRep. */
1511 retFsm = factorWithRep->walk( pd );
1512 if ( retFsm->startState->isFinState() ) {
1513 warning(loc) << "applying max repetition to a machine that "
1514 "accepts zero length word" << endl;
1517 /* The start func orders need to be shifted before doing the
1519 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1521 /* Do the repetition on the machine. Already guarded against n == 0 */
1522 retFsm->optionalRepeatOp( upperRep );
1523 afterOpMinimize( retFsm );
1528 /* Evaluate the repeated machine. */
1529 retFsm = factorWithRep->walk( pd );
1530 if ( retFsm->startState->isFinState() ) {
1531 warning(loc) << "applying min repetition to a machine that "
1532 "accepts zero length word" << endl;
1535 /* The start func orders need to be shifted before doing the repetition
1536 * and the kleene star. */
1537 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1539 if ( lowerRep == 0 ) {
1540 /* Acts just like a star op on the machine to return. */
1542 afterOpMinimize( retFsm );
1545 /* Take a duplicate for the plus. */
1546 FsmAp *dup = new FsmAp( *retFsm );
1548 /* Do repetition on the first half. */
1549 retFsm->repeatOp( lowerRep );
1550 afterOpMinimize( retFsm );
1552 /* Star the duplicate. */
1554 afterOpMinimize( dup );
1556 /* Tak on the kleene star. */
1557 retFsm->concatOp( dup );
1558 afterOpMinimize( retFsm );
1563 /* Check for bogus range. */
1564 if ( upperRep - lowerRep < 0 ) {
1565 error(loc) << "invalid range repetition" << endl;
1567 /* Return null machine as recovery. */
1568 retFsm = new FsmAp();
1569 retFsm->lambdaFsm();
1571 else if ( lowerRep == 0 && upperRep == 0 ) {
1572 /* No copies. Don't need to evaluate the factorWithRep. This
1573 * defeats the purpose so give a warning. */
1574 warning(loc) << "zero to zero repetitions results "
1575 "in the null machine" << endl;
1577 retFsm = new FsmAp();
1578 retFsm->lambdaFsm();
1581 /* Now need to evaluate the repeated machine. */
1582 retFsm = factorWithRep->walk( pd );
1583 if ( retFsm->startState->isFinState() ) {
1584 warning(loc) << "applying range repetition to a machine that "
1585 "accepts zero length word" << endl;
1588 /* The start func orders need to be shifted before doing both kinds
1590 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1592 if ( lowerRep == 0 ) {
1593 /* Just doing max repetition. Already guarded against n == 0. */
1594 retFsm->optionalRepeatOp( upperRep );
1595 afterOpMinimize( retFsm );
1597 else if ( lowerRep == upperRep ) {
1598 /* Just doing exact repetition. Already guarded against n == 0. */
1599 retFsm->repeatOp( lowerRep );
1600 afterOpMinimize( retFsm );
1603 /* This is the case that 0 < lowerRep < upperRep. Take a
1604 * duplicate for the optional repeat. */
1605 FsmAp *dup = new FsmAp( *retFsm );
1607 /* Do repetition on the first half. */
1608 retFsm->repeatOp( lowerRep );
1609 afterOpMinimize( retFsm );
1611 /* Do optional repetition on the second half. */
1612 dup->optionalRepeatOp( upperRep - lowerRep );
1613 afterOpMinimize( dup );
1615 /* Tak on the duplicate machine. */
1616 retFsm->concatOp( dup );
1617 afterOpMinimize( retFsm );
1622 case FactorWithNegType: {
1623 /* Evaluate the Factor. Pass it up. */
1624 retFsm = factorWithNeg->walk( pd );
1630 void FactorWithRep::makeNameTree( ParseData *pd )
1641 factorWithRep->makeNameTree( pd );
1643 case FactorWithNegType:
1644 factorWithNeg->makeNameTree( pd );
1649 void FactorWithRep::resolveNameRefs( ParseData *pd )
1660 factorWithRep->resolveNameRefs( pd );
1662 case FactorWithNegType:
1663 factorWithNeg->resolveNameRefs( pd );
1668 /* Clean up after a factor with negation node. */
1669 FactorWithNeg::~FactorWithNeg()
1673 case CharNegateType:
1674 delete factorWithNeg;
1682 /* Evaluate a factor with negation node. */
1683 FsmAp *FactorWithNeg::walk( ParseData *pd )
1689 /* Evaluate the factorWithNeg. */
1690 FsmAp *toNegate = factorWithNeg->walk( pd );
1692 /* Negation is subtract from dot-star. */
1693 retFsm = dotStarFsm( pd );
1694 retFsm->subtractOp( toNegate );
1695 afterOpMinimize( retFsm );
1698 case CharNegateType: {
1699 /* Evaluate the factorWithNeg. */
1700 FsmAp *toNegate = factorWithNeg->walk( pd );
1702 /* CharNegation is subtract from dot. */
1703 retFsm = dotFsm( pd );
1704 retFsm->subtractOp( toNegate );
1705 afterOpMinimize( retFsm );
1709 /* Evaluate the Factor. Pass it up. */
1710 retFsm = factor->walk( pd );
1716 void FactorWithNeg::makeNameTree( ParseData *pd )
1720 case CharNegateType:
1721 factorWithNeg->makeNameTree( pd );
1724 factor->makeNameTree( pd );
1729 void FactorWithNeg::resolveNameRefs( ParseData *pd )
1733 case CharNegateType:
1734 factorWithNeg->resolveNameRefs( pd );
1737 factor->resolveNameRefs( pd );
1742 /* Clean up after a factor node. */
1763 case LongestMatchType:
1764 delete longestMatch;
1769 /* Evaluate a factor node. */
1770 FsmAp *Factor::walk( ParseData *pd )
1775 rtnVal = literal->walk( pd );
1778 rtnVal = range->walk( pd );
1781 rtnVal = reItem->walk( pd, 0 );
1784 rtnVal = regExpr->walk( pd, 0 );
1787 rtnVal = varDef->walk( pd );
1790 rtnVal = join->walk( pd );
1792 case LongestMatchType:
1793 rtnVal = longestMatch->walk( pd );
1800 void Factor::makeNameTree( ParseData *pd )
1809 varDef->makeNameTree( loc, pd );
1812 join->makeNameTree( pd );
1814 case LongestMatchType:
1815 longestMatch->makeNameTree( pd );
1820 void Factor::resolveNameRefs( ParseData *pd )
1829 varDef->resolveNameRefs( pd );
1832 join->resolveNameRefs( pd );
1834 case LongestMatchType:
1835 longestMatch->resolveNameRefs( pd );
1840 /* Clean up a range object. Must delete the two literals. */
1847 /* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
1848 FsmAp *Range::walk( ParseData *pd )
1850 /* Construct and verify the suitability of the lower end of the range. */
1851 FsmAp *lowerFsm = lowerLit->walk( pd );
1852 if ( !lowerFsm->checkSingleCharMachine() ) {
1853 error(lowerLit->token.loc) <<
1854 "bad range lower end, must be a single character" << endl;
1857 /* Construct and verify the upper end. */
1858 FsmAp *upperFsm = upperLit->walk( pd );
1859 if ( !upperFsm->checkSingleCharMachine() ) {
1860 error(upperLit->token.loc) <<
1861 "bad range upper end, must be a single character" << endl;
1864 /* Grab the keys from the machines, then delete them. */
1865 Key lowKey = lowerFsm->startState->outList.head->lowKey;
1866 Key highKey = upperFsm->startState->outList.head->lowKey;
1870 /* Validate the range. */
1871 if ( lowKey > highKey ) {
1872 /* Recover by setting upper to lower; */
1873 error(lowerLit->token.loc) << "lower end of range is greater then upper end" << endl;
1877 /* Return the range now that it is validated. */
1878 FsmAp *retFsm = new FsmAp();
1879 retFsm->rangeFsm( lowKey, highKey );
1883 /* Evaluate a literal object. */
1884 FsmAp *Literal::walk( ParseData *pd )
1886 /* FsmAp to return, is the alphabet signed. */
1891 /* Make the fsm key in int format. */
1892 Key fsmKey = makeFsmKeyNum( token.data, token.loc, pd );
1893 /* Make the new machine. */
1894 rtnVal = new FsmAp();
1895 rtnVal->concatFsm( fsmKey );
1899 /* Make the array of keys in int format. */
1901 bool caseInsensitive;
1902 char *data = prepareLitString( token.loc, token.data, token.length,
1903 length, caseInsensitive );
1904 Key *arr = new Key[length];
1905 makeFsmKeyArray( arr, data, length, pd );
1907 /* Make the new machine. */
1908 rtnVal = new FsmAp();
1909 if ( caseInsensitive )
1910 rtnVal->concatFsmCI( arr, length );
1912 rtnVal->concatFsm( arr, length );
1920 /* Clean up after a regular expression object. */
1933 /* Evaluate a regular expression object. */
1934 FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex )
1936 /* This is the root regex, pass down a pointer to this. */
1937 if ( rootRegex == 0 )
1943 /* Walk both items. */
1944 rtnVal = regExpr->walk( pd, rootRegex );
1945 FsmAp *fsm2 = item->walk( pd, rootRegex );
1946 rtnVal->concatOp( fsm2 );
1950 rtnVal = new FsmAp();
1951 rtnVal->lambdaFsm();
1958 /* Clean up after an item in a regular expression. */
1972 /* Evaluate a regular expression object. */
1973 FsmAp *ReItem::walk( ParseData *pd, RegExpr *rootRegex )
1975 /* The fsm to return, is the alphabet signed? */
1980 /* Move the data into an integer array and make a concat fsm. */
1981 Key *arr = new Key[token.length];
1982 makeFsmKeyArray( arr, token.data, token.length, pd );
1984 /* Make the concat fsm. */
1985 rtnVal = new FsmAp();
1986 if ( rootRegex != 0 && rootRegex->caseInsensitive )
1987 rtnVal->concatFsmCI( arr, token.length );
1989 rtnVal->concatFsm( arr, token.length );
1994 /* Make the dot fsm. */
1995 rtnVal = dotFsm( pd );
1999 /* Get the or block and minmize it. */
2000 rtnVal = orBlock->walk( pd, rootRegex );
2001 if ( rtnVal == 0 ) {
2002 rtnVal = new FsmAp();
2003 rtnVal->lambdaFsm();
2005 rtnVal->minimizePartition2();
2009 /* Get the or block and minimize it. */
2010 FsmAp *fsm = orBlock->walk( pd, rootRegex );
2011 fsm->minimizePartition2();
2013 /* Make a dot fsm and subtract from it. */
2014 rtnVal = dotFsm( pd );
2015 rtnVal->subtractOp( fsm );
2016 rtnVal->minimizePartition2();
2021 /* If the item is followed by a star, then apply the star op. */
2023 if ( rtnVal->startState->isFinState() ) {
2024 warning(loc) << "applying kleene star to a machine that "
2025 "accepts zero length word" << endl;
2029 rtnVal->minimizePartition2();
2034 /* Clean up after an or block of a regular expression. */
2035 ReOrBlock::~ReOrBlock()
2048 /* Evaluate an or block of a regular expression. */
2049 FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex )
2054 /* Evaluate the two fsm. */
2055 FsmAp *fsm1 = orBlock->walk( pd, rootRegex );
2056 FsmAp *fsm2 = item->walk( pd, rootRegex );
2060 fsm1->unionOp( fsm2 );
2073 /* Evaluate an or block item of a regular expression. */
2074 FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex )
2076 /* The return value, is the alphabet signed? */
2080 /* Make the or machine. */
2081 rtnVal = new FsmAp();
2083 /* Put the or data into an array of ints. Note that we find unique
2084 * keys. Duplicates are silently ignored. The alternative would be to
2085 * issue warning or an error but since we can't with [a0-9a] or 'a' |
2086 * 'a' don't bother here. */
2088 makeFsmUniqueKeyArray( keySet, token.data, token.length,
2089 rootRegex != 0 ? rootRegex->caseInsensitive : false, pd );
2091 /* Run the or operator. */
2092 rtnVal->orFsm( keySet.data, keySet.length() );
2096 /* Make the upper and lower keys. */
2097 Key lowKey = makeFsmKeyChar( lower, pd );
2098 Key highKey = makeFsmKeyChar( upper, pd );
2100 /* Validate the range. */
2101 if ( lowKey > highKey ) {
2102 /* Recover by setting upper to lower; */
2103 error(loc) << "lower end of range is greater then upper end" << endl;
2107 /* Make the range machine. */
2108 rtnVal = new FsmAp();
2109 rtnVal->rangeFsm( lowKey, highKey );
2111 if ( rootRegex != 0 && rootRegex->caseInsensitive ) {
2112 if ( lowKey <= 'Z' && 'A' <= highKey ) {
2113 Key otherLow = lowKey < 'A' ? Key('A') : lowKey;
2114 Key otherHigh = 'Z' < highKey ? Key('Z') : highKey;
2116 otherLow = 'a' + ( otherLow - 'A' );
2117 otherHigh = 'a' + ( otherHigh - 'A' );
2119 FsmAp *otherRange = new FsmAp();
2120 otherRange->rangeFsm( otherLow, otherHigh );
2121 rtnVal->unionOp( otherRange );
2122 rtnVal->minimizePartition2();
2124 else if ( lowKey <= 'z' && 'a' <= highKey ) {
2125 Key otherLow = lowKey < 'a' ? Key('a') : lowKey;
2126 Key otherHigh = 'z' < highKey ? Key('z') : highKey;
2128 otherLow = 'A' + ( otherLow - 'a' );
2129 otherHigh = 'A' + ( otherHigh - 'a' );
2131 FsmAp *otherRange = new FsmAp();
2132 otherRange->rangeFsm( otherLow, otherHigh );
2133 rtnVal->unionOp( otherRange );
2134 rtnVal->minimizePartition2();