2 * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31 #include "parsetree.h"
34 ostream &operator<<( ostream &out, const NameRef &nameRef );
35 ostream &operator<<( ostream &out, const NameInst &nameInst );
37 /* Convert the literal string which comes in from the scanner into an array of
38 * characters with escapes and options interpreted. Also null terminates the
39 * string. Though this null termination should not be relied on for
40 * interpreting literals in the parser because the string may contain a
41 * literal string with \0 */
42 void Token::prepareLitString( Token &result, bool &caseInsensitive )
44 result.data = new char[this->length+1];
45 caseInsensitive = false;
47 char *src = this->data + 1;
48 char *end = this->data + this->length - 1;
50 while ( *end != '\'' && *end != '\"' ) {
52 caseInsensitive = true;
54 error( this->loc ) << "literal string '" << *end <<
55 "' option not supported" << endl;
60 char *dest = result.data;
62 while ( src != end ) {
65 case '0': dest[len++] = '\0'; break;
66 case 'a': dest[len++] = '\a'; break;
67 case 'b': dest[len++] = '\b'; break;
68 case 't': dest[len++] = '\t'; break;
69 case 'n': dest[len++] = '\n'; break;
70 case 'v': dest[len++] = '\v'; break;
71 case 'f': dest[len++] = '\f'; break;
72 case 'r': dest[len++] = '\r'; break;
74 default: dest[len++] = src[1]; break;
83 result.data[result.length] = 0;
87 FsmAp *VarDef::walk( ParseData *pd )
89 /* We enter into a new name scope. */
90 NameFrame nameFrame = pd->enterNameScope( true, 1 );
92 /* Recurse on the expression. */
93 FsmAp *rtnVal = joinOrLm->walk( pd );
95 /* Do the tranfer of local error actions. */
96 LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
97 if ( localErrDictEl != 0 ) {
98 for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
99 rtnVal->transferErrorActions( state, localErrDictEl->value );
102 /* If the expression below is a join operation with multiple expressions
103 * then it just had epsilon transisions resolved. If it is a join
104 * with only a single expression then run the epsilon op now. */
105 if ( joinOrLm->type == JoinOrLm::JoinType && joinOrLm->join->exprList.length() == 1 )
108 /* We can now unset entry points that are not longer used. */
109 pd->unsetObsoleteEntries( rtnVal );
111 /* If the name of the variable is referenced then add the entry point to
113 if ( pd->curNameInst->numRefs > 0 )
114 rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
116 /* Pop the name scope. */
117 pd->popNameScope( nameFrame );
121 void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd )
123 /* The variable definition enters a new scope. */
124 NameInst *prevNameInst = pd->curNameInst;
125 pd->curNameInst = pd->addNameInst( loc, name, false );
127 if ( joinOrLm->type == JoinOrLm::LongestMatchType )
128 pd->curNameInst->isLongestMatch = true;
131 joinOrLm->makeNameTree( pd );
133 /* The name scope ends, pop the name instantiation. */
134 pd->curNameInst = prevNameInst;
137 void VarDef::resolveNameRefs( ParseData *pd )
139 /* Entering into a new scope. */
140 NameFrame nameFrame = pd->enterNameScope( true, 1 );
143 joinOrLm->resolveNameRefs( pd );
145 /* The name scope ends, pop the name instantiation. */
146 pd->popNameScope( nameFrame );
149 InputLoc LongestMatchPart::getLoc()
151 return action != 0 ? action->loc : semiLoc;
155 * If there are any LMs then all of the following entry points must reset
158 * 1. fentry(StateRef)
159 * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
160 * 3. targt of any transition that has an fcall (the return loc).
161 * 4. start state of all longest match routines.
164 Action *LongestMatch::newAction( ParseData *pd, const InputLoc &loc,
165 const char *name, InlineList *inlineList )
167 Action *action = new Action( loc, name, inlineList, pd->nextCondId++ );
168 action->actionRefs.append( pd->curNameInst );
169 pd->actionList.append( action );
170 action->isLmAction = true;
174 void LongestMatch::makeActions( ParseData *pd )
176 /* Make actions that set the action id. */
177 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
178 /* For each part create actions for setting the match type. We need
179 * to do this so that the actions will go into the actionIndex. */
180 InlineList *inlineList = new InlineList;
181 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
182 InlineItem::LmSetActId ) );
183 char *actName = new char[50];
184 sprintf( actName, "store%i", lmi->longestMatchId );
185 lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
188 /* Make actions that execute the user action and restart on the last
190 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
191 /* For each part create actions for setting the match type. We need
192 * to do this so that the actions will go into the actionIndex. */
193 InlineList *inlineList = new InlineList;
194 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
195 InlineItem::LmOnLast ) );
196 char *actName = new char[50];
197 sprintf( actName, "last%i", lmi->longestMatchId );
198 lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
201 /* Make actions that execute the user action and restart on the next
202 * character. These actions will set tokend themselves (it is the current
204 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
205 /* For each part create actions for setting the match type. We need
206 * to do this so that the actions will go into the actionIndex. */
207 InlineList *inlineList = new InlineList;
208 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
209 InlineItem::LmOnNext ) );
210 char *actName = new char[50];
211 sprintf( actName, "next%i", lmi->longestMatchId );
212 lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
215 /* Make actions that execute the user action and restart at tokend. These
216 * actions execute some time after matching the last char. */
217 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
218 /* For each part create actions for setting the match type. We need
219 * to do this so that the actions will go into the actionIndex. */
220 InlineList *inlineList = new InlineList;
221 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
222 InlineItem::LmOnLagBehind ) );
223 char *actName = new char[50];
224 sprintf( actName, "lag%i", lmi->longestMatchId );
225 lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList );
232 /* Create the error action. */
233 InlineList *il6 = new InlineList;
234 il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) );
235 lmActSelect = newAction( pd, loc, "switch", il6 );
238 void LongestMatch::findName( ParseData *pd )
240 NameInst *nameInst = pd->curNameInst;
241 while ( nameInst->name == 0 ) {
242 nameInst = nameInst->parent;
243 /* Since every machine must must have a name, we should always find a
244 * name for the longest match. */
245 assert( nameInst != 0 );
247 name = nameInst->name;
250 void LongestMatch::makeNameTree( ParseData *pd )
252 /* Create an anonymous scope for the longest match. Will be used for
253 * restarting machine after matching a token. */
254 NameInst *prevNameInst = pd->curNameInst;
255 pd->curNameInst = pd->addNameInst( loc, 0, false );
257 /* Recurse into all parts of the longest match operator. */
258 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ )
259 lmi->join->makeNameTree( pd );
261 /* Traverse the name tree upwards to find a name for this lm. */
264 /* Also make the longest match's actions at this point. */
267 /* The name scope ends, pop the name instantiation. */
268 pd->curNameInst = prevNameInst;
271 void LongestMatch::resolveNameRefs( ParseData *pd )
273 /* The longest match gets its own name scope. */
274 NameFrame nameFrame = pd->enterNameScope( true, 1 );
276 /* Take an action reference for each longest match item and recurse. */
277 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
278 /* Record the reference if the item has an action. */
279 if ( lmi->action != 0 )
280 lmi->action->actionRefs.append( pd->localNameScope );
282 /* Recurse down the join. */
283 lmi->join->resolveNameRefs( pd );
286 /* The name scope ends, pop the name instantiation. */
287 pd->popNameScope( nameFrame );
290 void LongestMatch::restart( FsmAp *graph, TransAp *trans )
292 StateAp *fromState = trans->fromState;
293 graph->detachTrans( fromState, trans->toState, trans );
294 graph->attachTrans( fromState, graph->startState, trans );
297 void LongestMatch::runLongestMatch( ParseData *pd, FsmAp *graph )
299 graph->markReachableFromHereStopFinal( graph->startState );
300 for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
301 if ( ms->stateBits & STB_ISMARKED ) {
302 ms->lmItemSet.insert( 0 );
303 ms->stateBits &= ~ STB_ISMARKED;
307 /* Transfer the first item of non-empty lmAction tables to the item sets
308 * of the states that follow. Exclude states that have no transitions out.
309 * This must happen on a separate pass so that on each iteration of the
310 * next pass we have the item set entries from all lmAction tables. */
311 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
312 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
313 if ( trans->lmActionTable.length() > 0 ) {
314 LmActionTableEl *lmAct = trans->lmActionTable.data;
315 StateAp *toState = trans->toState;
318 /* Can only optimize this if there are no transitions out.
319 * Note there can be out transitions going nowhere with
320 * actions and they too must inhibit this optimization. */
321 if ( toState->outList.length() > 0 ) {
322 /* Fill the item sets. */
323 graph->markReachableFromHereStopFinal( toState );
324 for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
325 if ( ms->stateBits & STB_ISMARKED ) {
326 ms->lmItemSet.insert( lmAct->value );
327 ms->stateBits &= ~ STB_ISMARKED;
335 /* The lmItem sets are now filled, telling us which longest match rules
336 * can succeed in which states. First determine if we need to make sure
337 * act is defaulted to zero. We need to do this if there are any states
338 * with lmItemSet.length() > 1 and NULL is included. That is, that the
339 * switch may get called when in fact nothing has been matched. */
340 int maxItemSetLength = 0;
341 graph->markReachableFromHereStopFinal( graph->startState );
342 for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
343 if ( ms->stateBits & STB_ISMARKED ) {
344 if ( ms->lmItemSet.length() > maxItemSetLength )
345 maxItemSetLength = ms->lmItemSet.length();
346 ms->stateBits &= ~ STB_ISMARKED;
350 /* The actions executed on starting to match a token. */
351 graph->isolateStartState();
352 graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart );
353 graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
354 if ( maxItemSetLength > 1 ) {
355 /* The longest match action switch may be called when tokens are
356 * matched, in which case act must be initialized, there must be a
357 * case to handle the error, and the generated machine will require an
359 lmSwitchHandlesError = true;
360 pd->lmRequiresErrorState = true;
361 graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
364 /* The place to store transitions to restart. It maybe possible for the
365 * restarting to affect the searching through the graph that follows. For
366 * now take the safe route and save the list of transitions to restart
367 * until after all searching is done. */
368 Vector<TransAp*> restartTrans;
370 /* Set actions that do immediate token recognition, set the longest match part
371 * id and set the token ending. */
372 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
373 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
374 if ( trans->lmActionTable.length() > 0 ) {
375 LmActionTableEl *lmAct = trans->lmActionTable.data;
376 StateAp *toState = trans->toState;
379 /* Can only optimize this if there are no transitions out.
380 * Note there can be out transitions going nowhere with
381 * actions and they too must inhibit this optimization. */
382 if ( toState->outList.length() == 0 ) {
383 /* Can execute the immediate action for the longest match
384 * part. Redirect the action to the start state.
386 * NOTE: When we need to inhibit on_last due to leaving
387 * actions the above test suffices. If the state has out
388 * actions then it will fail because the out action will
389 * have been transferred to an error transition, which
390 * makes the outlist non-empty. */
391 trans->actionTable.setAction( lmAct->key,
392 lmAct->value->actOnLast );
393 restartTrans.append( trans );
396 /* Look for non final states that have a non-empty item
397 * set. If these are present then we need to record the
398 * end of the token. Also Find the highest item set
399 * length reachable from here (excluding at transtions to
401 bool nonFinalNonEmptyItemSet = false;
402 maxItemSetLength = 0;
403 graph->markReachableFromHereStopFinal( toState );
404 for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
405 if ( ms->stateBits & STB_ISMARKED ) {
406 if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
407 nonFinalNonEmptyItemSet = true;
408 if ( ms->lmItemSet.length() > maxItemSetLength )
409 maxItemSetLength = ms->lmItemSet.length();
410 ms->stateBits &= ~ STB_ISMARKED;
414 /* If there are reachable states that are not final and
415 * have non empty item sets or that have an item set
416 * length greater than one then we need to set tokend
417 * because the error action that matches the token will
419 if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
420 trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
422 /* Some states may not know which longest match item to
423 * execute, must set it. */
424 if ( maxItemSetLength > 1 ) {
425 /* There are transitions out, another match may come. */
426 trans->actionTable.setAction( lmAct->key,
427 lmAct->value->setActId );
434 /* Now that all graph searching is done it certainly safe set the
435 * restarting. It may be safe above, however this must be verified. */
436 for ( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ )
437 restart( graph, *pt );
439 int lmErrActionOrd = pd->curActionOrd++;
441 /* Embed the error for recognizing a char. */
442 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
443 if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
444 if ( st->isFinState() ) {
445 /* On error execute the onActNext action, which knows that
446 * the last character of the token was one back and restart. */
447 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
448 &st->lmItemSet[0]->actOnNext, 1 );
449 st->eofActionTable.setAction( lmErrActionOrd,
450 st->lmItemSet[0]->actOnNext );
451 st->eofTarget = graph->startState;
454 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
455 &st->lmItemSet[0]->actLagBehind, 1 );
456 st->eofActionTable.setAction( lmErrActionOrd,
457 st->lmItemSet[0]->actLagBehind );
458 st->eofTarget = graph->startState;
461 else if ( st->lmItemSet.length() > 1 ) {
462 /* Need to use the select. Take note of which items the select
463 * is needed for so only the necessary actions are included. */
464 for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
466 (*plmi)->inLmSelect = true;
468 /* On error, execute the action select and go to the start state. */
469 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
471 st->eofActionTable.setAction( lmErrActionOrd, lmActSelect );
472 st->eofTarget = graph->startState;
476 /* Finally, the start state should be made final. */
477 graph->setFinState( graph->startState );
480 void LongestMatch::transferScannerLeavingActions( FsmAp *graph )
482 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
483 if ( st->outActionTable.length() > 0 )
484 graph->setErrorActions( st, st->outActionTable );
488 FsmAp *LongestMatch::walk( ParseData *pd )
490 /* The longest match has it's own name scope. */
491 NameFrame nameFrame = pd->enterNameScope( true, 1 );
493 /* Make each part of the longest match. */
494 FsmAp **parts = new FsmAp*[longestMatchList->length()];
495 LmPartList::Iter lmi = *longestMatchList;
496 for ( int i = 0; lmi.lte(); lmi++, i++ ) {
497 /* Create the machine and embed the setting of the longest match id. */
498 parts[i] = lmi->join->walk( pd );
499 parts[i]->longMatchAction( pd->curActionOrd++, lmi );
502 /* Before we union the patterns we need to deal with leaving actions. They
503 * are transfered to error transitions out of the final states (like local
504 * error actions) and to eof actions. In the scanner we need to forbid
505 * on_last for any final state that has an leaving action. */
506 for ( int i = 0; i < longestMatchList->length(); i++ )
507 transferScannerLeavingActions( parts[i] );
509 /* Union machines one and up with machine zero. The grammar dictates that
510 * there will always be at least one part. */
511 FsmAp *rtnVal = parts[0];
512 for ( int i = 1; i < longestMatchList->length(); i++ ) {
513 rtnVal->unionOp( parts[i] );
514 afterOpMinimize( rtnVal );
517 runLongestMatch( pd, rtnVal );
519 /* Pop the name scope. */
520 pd->popNameScope( nameFrame );
526 FsmAp *JoinOrLm::walk( ParseData *pd )
531 rtnVal = join->walk( pd );
533 case LongestMatchType:
534 rtnVal = longestMatch->walk( pd );
540 void JoinOrLm::makeNameTree( ParseData *pd )
544 join->makeNameTree( pd );
546 case LongestMatchType:
547 longestMatch->makeNameTree( pd );
552 void JoinOrLm::resolveNameRefs( ParseData *pd )
556 join->resolveNameRefs( pd );
558 case LongestMatchType:
559 longestMatch->resolveNameRefs( pd );
565 /* Construct with a location and the first expression. */
566 Join::Join( const InputLoc &loc, Expression *expr )
570 exprList.append( expr );
573 /* Construct with a location and the first expression. */
574 Join::Join( Expression *expr )
578 exprList.append( expr );
581 /* Walk an expression node. */
582 FsmAp *Join::walk( ParseData *pd )
584 if ( exprList.length() > 1 )
585 return walkJoin( pd );
587 return exprList.head->walk( pd );
590 /* There is a list of expressions to join. */
591 FsmAp *Join::walkJoin( ParseData *pd )
593 /* We enter into a new name scope. */
594 NameFrame nameFrame = pd->enterNameScope( true, 1 );
596 /* Evaluate the machines. */
597 FsmAp **fsms = new FsmAp*[exprList.length()];
598 ExprList::Iter expr = exprList;
599 for ( int e = 0; e < exprList.length(); e++, expr++ )
600 fsms[e] = expr->walk( pd );
602 /* Get the start and final names. Final is
603 * guaranteed to exist, start is not. */
604 NameInst *startName = pd->curNameInst->start;
605 NameInst *finalName = pd->curNameInst->final;
608 if ( startName != 0 ) {
609 /* Take note that there was an implicit link to the start machine. */
610 pd->localNameScope->referencedNames.append( startName );
611 startId = startName->id;
614 /* A final id of -1 indicates there is no epsilon that references the
615 * final state, therefor do not create one or set an entry point to it. */
617 if ( finalName->numRefs > 0 )
618 finalId = finalName->id;
620 /* Join machines 1 and up onto machine 0. */
621 FsmAp *retFsm = fsms[0];
622 retFsm->joinOp( startId, finalId, fsms+1, exprList.length()-1 );
624 /* We can now unset entry points that are not longer used. */
625 pd->unsetObsoleteEntries( retFsm );
627 /* Pop the name scope. */
628 pd->popNameScope( nameFrame );
634 void Join::makeNameTree( ParseData *pd )
636 if ( exprList.length() > 1 ) {
637 /* Create the new anonymous scope. */
638 NameInst *prevNameInst = pd->curNameInst;
639 pd->curNameInst = pd->addNameInst( loc, 0, false );
641 /* Join scopes need an implicit "final" target. */
642 pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final",
643 pd->nextNameId++, false );
645 /* Recurse into all expressions in the list. */
646 for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
647 expr->makeNameTree( pd );
649 /* The name scope ends, pop the name instantiation. */
650 pd->curNameInst = prevNameInst;
653 /* Recurse into the single expression. */
654 exprList.head->makeNameTree( pd );
659 void Join::resolveNameRefs( ParseData *pd )
661 /* Branch on whether or not there is to be a join. */
662 if ( exprList.length() > 1 ) {
663 /* The variable definition enters a new scope. */
664 NameFrame nameFrame = pd->enterNameScope( true, 1 );
666 /* The join scope must contain a start label. */
667 NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true );
668 if ( resolved.length() > 0 ) {
669 /* Take the first. */
670 pd->curNameInst->start = resolved[0];
671 if ( resolved.length() > 1 ) {
672 /* Complain about the multiple references. */
673 error(loc) << "multiple start labels" << endl;
674 errorStateLabels( resolved );
678 /* Make sure there is a start label. */
679 if ( pd->curNameInst->start != 0 ) {
680 /* There is an implicit reference to start name. */
681 pd->curNameInst->start->numRefs += 1;
684 /* No start label. Complain and recover by adding a label to the
685 * adding one. Recover ignoring the problem. */
686 error(loc) << "no start label" << endl;
689 /* Recurse into all expressions in the list. */
690 for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
691 expr->resolveNameRefs( pd );
693 /* The name scope ends, pop the name instantiation. */
694 pd->popNameScope( nameFrame );
697 /* Recurse into the single expression. */
698 exprList.head->resolveNameRefs( pd );
702 /* Clean up after an expression node. */
703 Expression::~Expression()
706 case OrType: case IntersectType: case SubtractType:
707 case StrongSubtractType:
719 /* Evaluate a single expression node. */
720 FsmAp *Expression::walk( ParseData *pd, bool lastInSeq )
725 /* Evaluate the expression. */
726 rtnVal = expression->walk( pd, false );
727 /* Evaluate the term. */
728 FsmAp *rhs = term->walk( pd );
730 rtnVal->unionOp( rhs );
731 afterOpMinimize( rtnVal, lastInSeq );
734 case IntersectType: {
735 /* Evaluate the expression. */
736 rtnVal = expression->walk( pd );
737 /* Evaluate the term. */
738 FsmAp *rhs = term->walk( pd );
739 /* Perform intersection. */
740 rtnVal->intersectOp( rhs );
741 afterOpMinimize( rtnVal, lastInSeq );
745 /* Evaluate the expression. */
746 rtnVal = expression->walk( pd );
747 /* Evaluate the term. */
748 FsmAp *rhs = term->walk( pd );
749 /* Perform subtraction. */
750 rtnVal->subtractOp( rhs );
751 afterOpMinimize( rtnVal, lastInSeq );
754 case StrongSubtractType: {
755 /* Evaluate the expression. */
756 rtnVal = expression->walk( pd );
758 /* Evaluate the term and pad it with any* machines. */
759 FsmAp *rhs = dotStarFsm( pd );
760 FsmAp *termFsm = term->walk( pd );
761 FsmAp *trailAnyStar = dotStarFsm( pd );
762 rhs->concatOp( termFsm );
763 rhs->concatOp( trailAnyStar );
765 /* Perform subtraction. */
766 rtnVal->subtractOp( rhs );
767 afterOpMinimize( rtnVal, lastInSeq );
771 /* Return result of the term. */
772 rtnVal = term->walk( pd );
776 /* Duplicate the builtin. */
777 rtnVal = makeBuiltin( builtin, pd );
785 void Expression::makeNameTree( ParseData *pd )
791 case StrongSubtractType:
792 expression->makeNameTree( pd );
793 term->makeNameTree( pd );
796 term->makeNameTree( pd );
803 void Expression::resolveNameRefs( ParseData *pd )
809 case StrongSubtractType:
810 expression->resolveNameRefs( pd );
811 term->resolveNameRefs( pd );
814 term->resolveNameRefs( pd );
821 /* Clean up after a term node. */
827 case RightFinishType:
830 delete factorWithAug;
832 case FactorWithAugType:
833 delete factorWithAug;
838 /* Evaluate a term node. */
839 FsmAp *Term::walk( ParseData *pd, bool lastInSeq )
844 /* Evaluate the Term. */
845 rtnVal = term->walk( pd, false );
846 /* Evaluate the FactorWithRep. */
847 FsmAp *rhs = factorWithAug->walk( pd );
848 /* Perform concatenation. */
849 rtnVal->concatOp( rhs );
850 afterOpMinimize( rtnVal, lastInSeq );
853 case RightStartType: {
854 /* Evaluate the Term. */
855 rtnVal = term->walk( pd );
857 /* Evaluate the FactorWithRep. */
858 FsmAp *rhs = factorWithAug->walk( pd );
860 /* Set up the priority descriptors. The left machine gets the
861 * lower priority where as the right get the higher start priority. */
862 priorDescs[0].key = pd->nextPriorKey++;
863 priorDescs[0].priority = 0;
864 rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
866 /* The start transitions of the right machine gets the higher
867 * priority. Use the same unique key. */
868 priorDescs[1].key = priorDescs[0].key;
869 priorDescs[1].priority = 1;
870 rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
872 /* Perform concatenation. */
873 rtnVal->concatOp( rhs );
874 afterOpMinimize( rtnVal, lastInSeq );
877 case RightFinishType: {
878 /* Evaluate the Term. */
879 rtnVal = term->walk( pd );
881 /* Evaluate the FactorWithRep. */
882 FsmAp *rhs = factorWithAug->walk( pd );
884 /* Set up the priority descriptors. The left machine gets the
885 * lower priority where as the finishing transitions to the right
886 * get the higher priority. */
887 priorDescs[0].key = pd->nextPriorKey++;
888 priorDescs[0].priority = 0;
889 rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
891 /* The finishing transitions of the right machine get the higher
892 * priority. Use the same unique key. */
893 priorDescs[1].key = priorDescs[0].key;
894 priorDescs[1].priority = 1;
895 rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
897 /* If the right machine's start state is final we need to guard
898 * against the left machine persisting by moving through the empty
900 if ( rhs->startState->isFinState() ) {
901 rhs->startState->outPriorTable.setPrior(
902 pd->curPriorOrd++, &priorDescs[1] );
905 /* Perform concatenation. */
906 rtnVal->concatOp( rhs );
907 afterOpMinimize( rtnVal, lastInSeq );
911 /* Evaluate the Term. */
912 rtnVal = term->walk( pd );
914 /* Evaluate the FactorWithRep. */
915 FsmAp *rhs = factorWithAug->walk( pd );
917 /* Set up the priority descriptors. The left machine gets the
918 * higher priority. */
919 priorDescs[0].key = pd->nextPriorKey++;
920 priorDescs[0].priority = 1;
921 rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
923 /* The right machine gets the lower priority. We cannot use
924 * allTransPrior here in case the start state of the right machine
925 * is final. It would allow the right machine thread to run along
926 * with the left if just passing through the start state. Using
927 * startFsmPrior prevents this. */
928 priorDescs[1].key = priorDescs[0].key;
929 priorDescs[1].priority = 0;
930 rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
932 /* Perform concatenation. */
933 rtnVal->concatOp( rhs );
934 afterOpMinimize( rtnVal, lastInSeq );
937 case FactorWithAugType: {
938 rtnVal = factorWithAug->walk( pd );
945 void Term::makeNameTree( ParseData *pd )
950 case RightFinishType:
952 term->makeNameTree( pd );
953 factorWithAug->makeNameTree( pd );
955 case FactorWithAugType:
956 factorWithAug->makeNameTree( pd );
961 void Term::resolveNameRefs( ParseData *pd )
966 case RightFinishType:
968 term->resolveNameRefs( pd );
969 factorWithAug->resolveNameRefs( pd );
971 case FactorWithAugType:
972 factorWithAug->resolveNameRefs( pd );
977 /* Clean up after a factor with augmentation node. */
978 FactorWithAug::~FactorWithAug()
980 delete factorWithRep;
982 /* Walk the vector of parser actions, deleting function names. */
984 /* Clean up priority descriptors. */
985 if ( priorDescs != 0 )
989 void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd )
991 /* Assign actions. */
992 for ( int i = 0; i < actions.length(); i++ ) {
993 switch ( actions[i].type ) {
994 /* Transition actions. */
996 graph->startFsmAction( actionOrd[i], actions[i].action );
997 afterOpMinimize( graph );
1000 graph->allTransAction( actionOrd[i], actions[i].action );
1003 graph->finishFsmAction( actionOrd[i], actions[i].action );
1006 graph->leaveFsmAction( actionOrd[i], actions[i].action );
1009 /* Global error actions. */
1010 case at_start_gbl_error:
1011 graph->startErrorAction( actionOrd[i], actions[i].action, 0 );
1012 afterOpMinimize( graph );
1014 case at_all_gbl_error:
1015 graph->allErrorAction( actionOrd[i], actions[i].action, 0 );
1017 case at_final_gbl_error:
1018 graph->finalErrorAction( actionOrd[i], actions[i].action, 0 );
1020 case at_not_start_gbl_error:
1021 graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 );
1023 case at_not_final_gbl_error:
1024 graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 );
1026 case at_middle_gbl_error:
1027 graph->middleErrorAction( actionOrd[i], actions[i].action, 0 );
1030 /* Local error actions. */
1031 case at_start_local_error:
1032 graph->startErrorAction( actionOrd[i], actions[i].action,
1033 actions[i].localErrKey );
1034 afterOpMinimize( graph );
1036 case at_all_local_error:
1037 graph->allErrorAction( actionOrd[i], actions[i].action,
1038 actions[i].localErrKey );
1040 case at_final_local_error:
1041 graph->finalErrorAction( actionOrd[i], actions[i].action,
1042 actions[i].localErrKey );
1044 case at_not_start_local_error:
1045 graph->notStartErrorAction( actionOrd[i], actions[i].action,
1046 actions[i].localErrKey );
1048 case at_not_final_local_error:
1049 graph->notFinalErrorAction( actionOrd[i], actions[i].action,
1050 actions[i].localErrKey );
1052 case at_middle_local_error:
1053 graph->middleErrorAction( actionOrd[i], actions[i].action,
1054 actions[i].localErrKey );
1059 graph->startEOFAction( actionOrd[i], actions[i].action );
1060 afterOpMinimize( graph );
1063 graph->allEOFAction( actionOrd[i], actions[i].action );
1066 graph->finalEOFAction( actionOrd[i], actions[i].action );
1068 case at_not_start_eof:
1069 graph->notStartEOFAction( actionOrd[i], actions[i].action );
1071 case at_not_final_eof:
1072 graph->notFinalEOFAction( actionOrd[i], actions[i].action );
1075 graph->middleEOFAction( actionOrd[i], actions[i].action );
1078 /* To State Actions. */
1079 case at_start_to_state:
1080 graph->startToStateAction( actionOrd[i], actions[i].action );
1081 afterOpMinimize( graph );
1083 case at_all_to_state:
1084 graph->allToStateAction( actionOrd[i], actions[i].action );
1086 case at_final_to_state:
1087 graph->finalToStateAction( actionOrd[i], actions[i].action );
1089 case at_not_start_to_state:
1090 graph->notStartToStateAction( actionOrd[i], actions[i].action );
1092 case at_not_final_to_state:
1093 graph->notFinalToStateAction( actionOrd[i], actions[i].action );
1095 case at_middle_to_state:
1096 graph->middleToStateAction( actionOrd[i], actions[i].action );
1099 /* From State Actions. */
1100 case at_start_from_state:
1101 graph->startFromStateAction( actionOrd[i], actions[i].action );
1102 afterOpMinimize( graph );
1104 case at_all_from_state:
1105 graph->allFromStateAction( actionOrd[i], actions[i].action );
1107 case at_final_from_state:
1108 graph->finalFromStateAction( actionOrd[i], actions[i].action );
1110 case at_not_start_from_state:
1111 graph->notStartFromStateAction( actionOrd[i], actions[i].action );
1113 case at_not_final_from_state:
1114 graph->notFinalFromStateAction( actionOrd[i], actions[i].action );
1116 case at_middle_from_state:
1117 graph->middleFromStateAction( actionOrd[i], actions[i].action );
1120 /* Remaining cases, prevented by the parser. */
1128 void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd )
1130 /* Assign priorities. */
1131 for ( int i = 0; i < priorityAugs.length(); i++ ) {
1132 switch ( priorityAugs[i].type ) {
1134 graph->startFsmPrior( priorOrd[i], &priorDescs[i]);
1135 /* Start fsm priorities are a special case that may require
1136 * minimization afterwards. */
1137 afterOpMinimize( graph );
1140 graph->allTransPrior( priorOrd[i], &priorDescs[i] );
1143 graph->finishFsmPrior( priorOrd[i], &priorDescs[i] );
1146 graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] );
1150 /* Parser Prevents this case. */
1156 void FactorWithAug::assignConditions( FsmAp *graph )
1158 for ( int i = 0; i < conditions.length(); i++ ) {
1159 switch ( conditions[i].type ) {
1160 /* Transition actions. */
1162 graph->startFsmCondition( conditions[i].action, conditions[i].sense );
1163 afterOpMinimize( graph );
1166 graph->allTransCondition( conditions[i].action, conditions[i].sense );
1169 graph->leaveFsmCondition( conditions[i].action, conditions[i].sense );
1178 /* Evaluate a factor with augmentation node. */
1179 FsmAp *FactorWithAug::walk( ParseData *pd )
1181 /* Enter into the scopes created for the labels. */
1182 NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1184 /* Make the array of function orderings. */
1186 if ( actions.length() > 0 )
1187 actionOrd = new int[actions.length()];
1189 /* First walk the list of actions, assigning order to all starting
1191 for ( int i = 0; i < actions.length(); i++ ) {
1192 if ( actions[i].type == at_start ||
1193 actions[i].type == at_start_gbl_error ||
1194 actions[i].type == at_start_local_error ||
1195 actions[i].type == at_start_to_state ||
1196 actions[i].type == at_start_from_state ||
1197 actions[i].type == at_start_eof )
1198 actionOrd[i] = pd->curActionOrd++;
1201 /* Evaluate the factor with repetition. */
1202 FsmAp *rtnVal = factorWithRep->walk( pd );
1204 /* Compute the remaining action orderings. */
1205 for ( int i = 0; i < actions.length(); i++ ) {
1206 if ( actions[i].type != at_start &&
1207 actions[i].type != at_start_gbl_error &&
1208 actions[i].type != at_start_local_error &&
1209 actions[i].type != at_start_to_state &&
1210 actions[i].type != at_start_from_state &&
1211 actions[i].type != at_start_eof )
1212 actionOrd[i] = pd->curActionOrd++;
1215 /* Embed conditions. */
1216 assignConditions( rtnVal );
1218 /* Embed actions. */
1219 assignActions( pd, rtnVal , actionOrd );
1221 /* Make the array of priority orderings. Orderings are local to this walk
1222 * of the factor with augmentation. */
1224 if ( priorityAugs.length() > 0 )
1225 priorOrd = new int[priorityAugs.length()];
1227 /* Walk all priorities, assigning the priority ordering. */
1228 for ( int i = 0; i < priorityAugs.length(); i++ )
1229 priorOrd[i] = pd->curPriorOrd++;
1231 /* If the priority descriptors have not been made, make them now. Make
1232 * priority descriptors for each priority asignment that will be passed to
1233 * the fsm. Used to keep track of the key, value and used bit. */
1234 if ( priorDescs == 0 && priorityAugs.length() > 0 ) {
1235 priorDescs = new PriorDesc[priorityAugs.length()];
1236 for ( int i = 0; i < priorityAugs.length(); i++ ) {
1237 /* Init the prior descriptor for the priority setting. */
1238 priorDescs[i].key = priorityAugs[i].priorKey;
1239 priorDescs[i].priority = priorityAugs[i].priorValue;
1243 /* Assign priorities into the machine. */
1244 assignPriorities( rtnVal, priorOrd );
1246 /* Assign epsilon transitions. */
1247 for ( int e = 0; e < epsilonLinks.length(); e++ ) {
1248 /* Get the name, which may not exist. If it doesn't then silently
1249 * ignore it because an error has already been reported. */
1250 NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++];
1251 if ( epTarg != 0 ) {
1252 /* Make the epsilon transitions. */
1253 rtnVal->epsilonTrans( epTarg->id );
1255 /* Note that we have made a link to the name. */
1256 pd->localNameScope->referencedNames.append( epTarg );
1260 /* Set entry points for labels. */
1261 if ( labels.length() > 0 ) {
1262 /* Pop the names. */
1263 pd->resetNameScope( nameFrame );
1265 /* Make labels that are referenced into entry points. */
1266 for ( int i = 0; i < labels.length(); i++ ) {
1267 pd->enterNameScope( false, 1 );
1269 /* Will always be found. */
1270 NameInst *name = pd->curNameInst;
1272 /* If the name is referenced then set the entry point. */
1273 if ( name->numRefs > 0 )
1274 rtnVal->setEntry( name->id, rtnVal->startState );
1277 pd->popNameScope( nameFrame );
1280 if ( priorOrd != 0 )
1282 if ( actionOrd != 0 )
1287 void FactorWithAug::makeNameTree( ParseData *pd )
1289 /* Add the labels to the tree of instantiated names. Each label
1290 * makes a new scope. */
1291 NameInst *prevNameInst = pd->curNameInst;
1292 for ( int i = 0; i < labels.length(); i++ )
1293 pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true );
1295 /* Recurse, then pop the names. */
1296 factorWithRep->makeNameTree( pd );
1297 pd->curNameInst = prevNameInst;
1301 void FactorWithAug::resolveNameRefs( ParseData *pd )
1303 /* Enter into the name scope created by any labels. */
1304 NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1306 /* Note action references. */
1307 for ( int i = 0; i < actions.length(); i++ )
1308 actions[i].action->actionRefs.append( pd->localNameScope );
1310 /* Recurse first. IMPORTANT: we must do the exact same traversal as when
1311 * the tree is constructed. */
1312 factorWithRep->resolveNameRefs( pd );
1314 /* Resolve epsilon transitions. */
1315 for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) {
1317 EpsilonLink &link = epsilonLinks[ep];
1318 NameInst *resolvedName = 0;
1320 if ( link.target.length() == 1 && strcmp( link.target.data[0], "final" ) == 0 ) {
1321 /* Epsilon drawn to an implicit final state. An implicit final is
1322 * only available in join operations. */
1323 resolvedName = pd->localNameScope->final;
1326 /* Do an search for the name. */
1328 pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 );
1329 if ( resolved.length() > 0 ) {
1330 /* Take the first one. */
1331 resolvedName = resolved[0];
1332 if ( resolved.length() > 1 ) {
1333 /* Complain about the multiple references. */
1334 error(link.loc) << "state reference " << link.target <<
1335 " resolves to multiple entry points" << endl;
1336 errorStateLabels( resolved );
1341 /* This is tricky, we stuff resolved epsilon transitions into one long
1342 * vector in the parse data structure. Since the name resolution and
1343 * graph generation both do identical walks of the parse tree we
1344 * should always find the link resolutions in the right place. */
1345 pd->epsilonResolvedLinks.append( resolvedName );
1347 if ( resolvedName != 0 ) {
1348 /* Found the name, bump of the reference count on it. */
1349 resolvedName->numRefs += 1;
1352 /* Complain, no recovery action, the epsilon op will ignore any
1353 * epsilon transitions whose names did not resolve. */
1354 error(link.loc) << "could not resolve label " << link.target << endl;
1358 if ( labels.length() > 0 )
1359 pd->popNameScope( nameFrame );
1363 /* Clean up after a factor with repetition node. */
1364 FactorWithRep::~FactorWithRep()
1367 case StarType: case StarStarType: case OptionalType: case PlusType:
1368 case ExactType: case MaxType: case MinType: case RangeType:
1369 delete factorWithRep;
1371 case FactorWithNegType:
1372 delete factorWithNeg;
1377 /* Evaluate a factor with repetition node. */
1378 FsmAp *FactorWithRep::walk( ParseData *pd )
1384 /* Evaluate the FactorWithRep. */
1385 retFsm = factorWithRep->walk( pd );
1386 if ( retFsm->startState->isFinState() ) {
1387 warning(loc) << "applying kleene star to a machine that "
1388 "accepts zero length word" << endl;
1389 retFsm->unsetFinState( retFsm->startState );
1392 /* Shift over the start action orders then do the kleene star. */
1393 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1395 afterOpMinimize( retFsm );
1398 case StarStarType: {
1399 /* Evaluate the FactorWithRep. */
1400 retFsm = factorWithRep->walk( pd );
1401 if ( retFsm->startState->isFinState() ) {
1402 warning(loc) << "applying kleene star to a machine that "
1403 "accepts zero length word" << endl;
1406 /* Set up the prior descs. All gets priority one, whereas leaving gets
1407 * priority zero. Make a unique key so that these priorities don't
1408 * interfere with any priorities set by the user. */
1409 priorDescs[0].key = pd->nextPriorKey++;
1410 priorDescs[0].priority = 1;
1411 retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
1413 /* Leaveing gets priority 0. Use same unique key. */
1414 priorDescs[1].key = priorDescs[0].key;
1415 priorDescs[1].priority = 0;
1416 retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
1418 /* Shift over the start action orders then do the kleene star. */
1419 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1421 afterOpMinimize( retFsm );
1424 case OptionalType: {
1425 /* Make the null fsm. */
1426 FsmAp *nu = new FsmAp();
1429 /* Evaluate the FactorWithRep. */
1430 retFsm = factorWithRep->walk( pd );
1432 /* Perform the question operator. */
1433 retFsm->unionOp( nu );
1434 afterOpMinimize( retFsm );
1438 /* Evaluate the FactorWithRep. */
1439 retFsm = factorWithRep->walk( pd );
1440 if ( retFsm->startState->isFinState() ) {
1441 warning(loc) << "applying plus operator to a machine that "
1442 "accpets zero length word" << endl;
1445 /* Need a duplicated for the star end. */
1446 FsmAp *dup = new FsmAp( *retFsm );
1448 /* The start func orders need to be shifted before doing the star. */
1449 pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd );
1451 /* Star the duplicate. */
1453 afterOpMinimize( dup );
1455 retFsm->concatOp( dup );
1456 afterOpMinimize( retFsm );
1460 /* Get an int from the repetition amount. */
1461 if ( lowerRep == 0 ) {
1462 /* No copies. Don't need to evaluate the factorWithRep.
1463 * This Defeats the purpose so give a warning. */
1464 warning(loc) << "exactly zero repetitions results "
1465 "in the null machine" << endl;
1467 retFsm = new FsmAp();
1468 retFsm->lambdaFsm();
1471 /* Evaluate the first FactorWithRep. */
1472 retFsm = factorWithRep->walk( pd );
1473 if ( retFsm->startState->isFinState() ) {
1474 warning(loc) << "applying repetition to a machine that "
1475 "accepts zero length word" << endl;
1478 /* The start func orders need to be shifted before doing the
1480 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1482 /* Do the repetition on the machine. Already guarded against n == 0 */
1483 retFsm->repeatOp( lowerRep );
1484 afterOpMinimize( retFsm );
1489 /* Get an int from the repetition amount. */
1490 if ( upperRep == 0 ) {
1491 /* No copies. Don't need to evaluate the factorWithRep.
1492 * This Defeats the purpose so give a warning. */
1493 warning(loc) << "max zero repetitions results "
1494 "in the null machine" << endl;
1496 retFsm = new FsmAp();
1497 retFsm->lambdaFsm();
1500 /* Evaluate the first FactorWithRep. */
1501 retFsm = factorWithRep->walk( pd );
1502 if ( retFsm->startState->isFinState() ) {
1503 warning(loc) << "applying max repetition to a machine that "
1504 "accepts zero length word" << endl;
1507 /* The start func orders need to be shifted before doing the
1509 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1511 /* Do the repetition on the machine. Already guarded against n == 0 */
1512 retFsm->optionalRepeatOp( upperRep );
1513 afterOpMinimize( retFsm );
1518 /* Evaluate the repeated machine. */
1519 retFsm = factorWithRep->walk( pd );
1520 if ( retFsm->startState->isFinState() ) {
1521 warning(loc) << "applying min repetition to a machine that "
1522 "accepts zero length word" << endl;
1525 /* The start func orders need to be shifted before doing the repetition
1526 * and the kleene star. */
1527 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1529 if ( lowerRep == 0 ) {
1530 /* Acts just like a star op on the machine to return. */
1532 afterOpMinimize( retFsm );
1535 /* Take a duplicate for the plus. */
1536 FsmAp *dup = new FsmAp( *retFsm );
1538 /* Do repetition on the first half. */
1539 retFsm->repeatOp( lowerRep );
1540 afterOpMinimize( retFsm );
1542 /* Star the duplicate. */
1544 afterOpMinimize( dup );
1546 /* Tak on the kleene star. */
1547 retFsm->concatOp( dup );
1548 afterOpMinimize( retFsm );
1553 /* Check for bogus range. */
1554 if ( upperRep - lowerRep < 0 ) {
1555 error(loc) << "invalid range repetition" << endl;
1557 /* Return null machine as recovery. */
1558 retFsm = new FsmAp();
1559 retFsm->lambdaFsm();
1561 else if ( lowerRep == 0 && upperRep == 0 ) {
1562 /* No copies. Don't need to evaluate the factorWithRep. This
1563 * defeats the purpose so give a warning. */
1564 warning(loc) << "zero to zero repetitions results "
1565 "in the null machine" << endl;
1567 retFsm = new FsmAp();
1568 retFsm->lambdaFsm();
1571 /* Now need to evaluate the repeated machine. */
1572 retFsm = factorWithRep->walk( pd );
1573 if ( retFsm->startState->isFinState() ) {
1574 warning(loc) << "applying range repetition to a machine that "
1575 "accepts zero length word" << endl;
1578 /* The start func orders need to be shifted before doing both kinds
1580 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1582 if ( lowerRep == 0 ) {
1583 /* Just doing max repetition. Already guarded against n == 0. */
1584 retFsm->optionalRepeatOp( upperRep );
1585 afterOpMinimize( retFsm );
1587 else if ( lowerRep == upperRep ) {
1588 /* Just doing exact repetition. Already guarded against n == 0. */
1589 retFsm->repeatOp( lowerRep );
1590 afterOpMinimize( retFsm );
1593 /* This is the case that 0 < lowerRep < upperRep. Take a
1594 * duplicate for the optional repeat. */
1595 FsmAp *dup = new FsmAp( *retFsm );
1597 /* Do repetition on the first half. */
1598 retFsm->repeatOp( lowerRep );
1599 afterOpMinimize( retFsm );
1601 /* Do optional repetition on the second half. */
1602 dup->optionalRepeatOp( upperRep - lowerRep );
1603 afterOpMinimize( dup );
1605 /* Tak on the duplicate machine. */
1606 retFsm->concatOp( dup );
1607 afterOpMinimize( retFsm );
1612 case FactorWithNegType: {
1613 /* Evaluate the Factor. Pass it up. */
1614 retFsm = factorWithNeg->walk( pd );
1620 void FactorWithRep::makeNameTree( ParseData *pd )
1631 factorWithRep->makeNameTree( pd );
1633 case FactorWithNegType:
1634 factorWithNeg->makeNameTree( pd );
1639 void FactorWithRep::resolveNameRefs( ParseData *pd )
1650 factorWithRep->resolveNameRefs( pd );
1652 case FactorWithNegType:
1653 factorWithNeg->resolveNameRefs( pd );
1658 /* Clean up after a factor with negation node. */
1659 FactorWithNeg::~FactorWithNeg()
1663 case CharNegateType:
1664 delete factorWithNeg;
1672 /* Evaluate a factor with negation node. */
1673 FsmAp *FactorWithNeg::walk( ParseData *pd )
1679 /* Evaluate the factorWithNeg. */
1680 FsmAp *toNegate = factorWithNeg->walk( pd );
1682 /* Negation is subtract from dot-star. */
1683 retFsm = dotStarFsm( pd );
1684 retFsm->subtractOp( toNegate );
1685 afterOpMinimize( retFsm );
1688 case CharNegateType: {
1689 /* Evaluate the factorWithNeg. */
1690 FsmAp *toNegate = factorWithNeg->walk( pd );
1692 /* CharNegation is subtract from dot. */
1693 retFsm = dotFsm( pd );
1694 retFsm->subtractOp( toNegate );
1695 afterOpMinimize( retFsm );
1699 /* Evaluate the Factor. Pass it up. */
1700 retFsm = factor->walk( pd );
1706 void FactorWithNeg::makeNameTree( ParseData *pd )
1710 case CharNegateType:
1711 factorWithNeg->makeNameTree( pd );
1714 factor->makeNameTree( pd );
1719 void FactorWithNeg::resolveNameRefs( ParseData *pd )
1723 case CharNegateType:
1724 factorWithNeg->resolveNameRefs( pd );
1727 factor->resolveNameRefs( pd );
1732 /* Clean up after a factor node. */
1753 case LongestMatchType:
1754 delete longestMatch;
1759 /* Evaluate a factor node. */
1760 FsmAp *Factor::walk( ParseData *pd )
1765 rtnVal = literal->walk( pd );
1768 rtnVal = range->walk( pd );
1771 rtnVal = reItem->walk( pd, 0 );
1774 rtnVal = regExpr->walk( pd, 0 );
1777 rtnVal = varDef->walk( pd );
1780 rtnVal = join->walk( pd );
1782 case LongestMatchType:
1783 rtnVal = longestMatch->walk( pd );
1790 void Factor::makeNameTree( ParseData *pd )
1799 varDef->makeNameTree( loc, pd );
1802 join->makeNameTree( pd );
1804 case LongestMatchType:
1805 longestMatch->makeNameTree( pd );
1810 void Factor::resolveNameRefs( ParseData *pd )
1819 varDef->resolveNameRefs( pd );
1822 join->resolveNameRefs( pd );
1824 case LongestMatchType:
1825 longestMatch->resolveNameRefs( pd );
1830 /* Clean up a range object. Must delete the two literals. */
1837 /* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
1838 FsmAp *Range::walk( ParseData *pd )
1840 /* Construct and verify the suitability of the lower end of the range. */
1841 FsmAp *lowerFsm = lowerLit->walk( pd );
1842 if ( !lowerFsm->checkSingleCharMachine() ) {
1843 error(lowerLit->token.loc) <<
1844 "bad range lower end, must be a single character" << endl;
1847 /* Construct and verify the upper end. */
1848 FsmAp *upperFsm = upperLit->walk( pd );
1849 if ( !upperFsm->checkSingleCharMachine() ) {
1850 error(upperLit->token.loc) <<
1851 "bad range upper end, must be a single character" << endl;
1854 /* Grab the keys from the machines, then delete them. */
1855 Key lowKey = lowerFsm->startState->outList.head->lowKey;
1856 Key highKey = upperFsm->startState->outList.head->lowKey;
1860 /* Validate the range. */
1861 if ( lowKey > highKey ) {
1862 /* Recover by setting upper to lower; */
1863 error(lowerLit->token.loc) << "lower end of range is greater then upper end" << endl;
1867 /* Return the range now that it is validated. */
1868 FsmAp *retFsm = new FsmAp();
1869 retFsm->rangeFsm( lowKey, highKey );
1873 /* Evaluate a literal object. */
1874 FsmAp *Literal::walk( ParseData *pd )
1876 /* FsmAp to return, is the alphabet signed. */
1881 /* Make the fsm key in int format. */
1882 Key fsmKey = makeFsmKeyNum( token.data, token.loc, pd );
1883 /* Make the new machine. */
1884 rtnVal = new FsmAp();
1885 rtnVal->concatFsm( fsmKey );
1889 /* Make the array of keys in int format. */
1891 bool caseInsensitive;
1892 token.prepareLitString( interp, caseInsensitive );
1893 Key *arr = new Key[interp.length];
1894 makeFsmKeyArray( arr, interp.data, interp.length, pd );
1896 /* Make the new machine. */
1897 rtnVal = new FsmAp();
1898 if ( caseInsensitive )
1899 rtnVal->concatFsmCI( arr, interp.length );
1901 rtnVal->concatFsm( arr, interp.length );
1902 delete[] interp.data;
1909 /* Clean up after a regular expression object. */
1922 /* Evaluate a regular expression object. */
1923 FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex )
1925 /* This is the root regex, pass down a pointer to this. */
1926 if ( rootRegex == 0 )
1932 /* Walk both items. */
1933 rtnVal = regExpr->walk( pd, rootRegex );
1934 FsmAp *fsm2 = item->walk( pd, rootRegex );
1935 rtnVal->concatOp( fsm2 );
1939 rtnVal = new FsmAp();
1940 rtnVal->lambdaFsm();
1947 /* Clean up after an item in a regular expression. */
1961 /* Evaluate a regular expression object. */
1962 FsmAp *ReItem::walk( ParseData *pd, RegExpr *rootRegex )
1964 /* The fsm to return, is the alphabet signed? */
1969 /* Move the data into an integer array and make a concat fsm. */
1970 Key *arr = new Key[token.length];
1971 makeFsmKeyArray( arr, token.data, token.length, pd );
1973 /* Make the concat fsm. */
1974 rtnVal = new FsmAp();
1975 if ( rootRegex != 0 && rootRegex->caseInsensitive )
1976 rtnVal->concatFsmCI( arr, token.length );
1978 rtnVal->concatFsm( arr, token.length );
1983 /* Make the dot fsm. */
1984 rtnVal = dotFsm( pd );
1988 /* Get the or block and minmize it. */
1989 rtnVal = orBlock->walk( pd, rootRegex );
1990 if ( rtnVal == 0 ) {
1991 rtnVal = new FsmAp();
1992 rtnVal->lambdaFsm();
1994 rtnVal->minimizePartition2();
1998 /* Get the or block and minimize it. */
1999 FsmAp *fsm = orBlock->walk( pd, rootRegex );
2000 fsm->minimizePartition2();
2002 /* Make a dot fsm and subtract from it. */
2003 rtnVal = dotFsm( pd );
2004 rtnVal->subtractOp( fsm );
2005 rtnVal->minimizePartition2();
2010 /* If the item is followed by a star, then apply the star op. */
2012 if ( rtnVal->startState->isFinState() ) {
2013 warning(loc) << "applying kleene star to a machine that "
2014 "accpets zero length word" << endl;
2018 rtnVal->minimizePartition2();
2023 /* Clean up after an or block of a regular expression. */
2024 ReOrBlock::~ReOrBlock()
2037 /* Evaluate an or block of a regular expression. */
2038 FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex )
2043 /* Evaluate the two fsm. */
2044 FsmAp *fsm1 = orBlock->walk( pd, rootRegex );
2045 FsmAp *fsm2 = item->walk( pd, rootRegex );
2049 fsm1->unionOp( fsm2 );
2062 /* Evaluate an or block item of a regular expression. */
2063 FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex )
2065 /* The return value, is the alphabet signed? */
2069 /* Make the or machine. */
2070 rtnVal = new FsmAp();
2072 /* Put the or data into an array of ints. Note that we find unique
2073 * keys. Duplicates are silently ignored. The alternative would be to
2074 * issue warning or an error but since we can't with [a0-9a] or 'a' |
2075 * 'a' don't bother here. */
2077 makeFsmUniqueKeyArray( keySet, token.data, token.length,
2078 rootRegex != 0 ? rootRegex->caseInsensitive : false, pd );
2080 /* Run the or operator. */
2081 rtnVal->orFsm( keySet.data, keySet.length() );
2085 /* Make the upper and lower keys. */
2086 Key lowKey = makeFsmKeyChar( lower, pd );
2087 Key highKey = makeFsmKeyChar( upper, pd );
2089 /* Validate the range. */
2090 if ( lowKey > highKey ) {
2091 /* Recover by setting upper to lower; */
2092 error(loc) << "lower end of range is greater then upper end" << endl;
2096 /* Make the range machine. */
2097 rtnVal = new FsmAp();
2098 rtnVal->rangeFsm( lowKey, highKey );
2100 if ( rootRegex != 0 && rootRegex->caseInsensitive ) {
2101 if ( lowKey <= 'Z' && 'A' <= highKey ) {
2102 Key otherLow = lowKey < 'A' ? Key('A') : lowKey;
2103 Key otherHigh = 'Z' < highKey ? Key('Z') : highKey;
2105 otherLow = 'a' + ( otherLow - 'A' );
2106 otherHigh = 'a' + ( otherHigh - 'A' );
2108 FsmAp *otherRange = new FsmAp();
2109 otherRange->rangeFsm( otherLow, otherHigh );
2110 rtnVal->unionOp( otherRange );
2111 rtnVal->minimizePartition2();
2113 else if ( lowKey <= 'z' && 'a' <= highKey ) {
2114 Key otherLow = lowKey < 'a' ? Key('a') : lowKey;
2115 Key otherHigh = 'z' < highKey ? Key('z') : highKey;
2117 otherLow = 'A' + ( otherLow - 'a' );
2118 otherHigh = 'A' + ( otherHigh - 'a' );
2120 FsmAp *otherRange = new FsmAp();
2121 otherRange->rangeFsm( otherLow, otherHigh );
2122 rtnVal->unionOp( otherRange );
2123 rtnVal->minimizePartition2();