Added a flag -d for turning off the removal of duplicate actions from actions

[external/ragel.git] / ragel / parsetree.cpp
diff --git a/ragel/parsetree.cpp b/ragel/parsetree.cpp

index 9e8a65e..048d171 100644 (file)
--- a/ragel/parsetree.cpp
+++ b/ragel/parsetree.cpp
@@ -162,7 +162,7 @@ InputLoc LongestMatchPart::getLoc()
   */
  
  Action *LongestMatch::newAction( ParseData *pd, const InputLoc &loc, 
-               char *name, InlineList *inlineList )
+               const char *name, InlineList *inlineList )
  {
         Action *action = new Action( loc, name, inlineList, pd->nextCondId++ );
         action->actionRefs.append( pd->curNameInst );
@@ -178,13 +178,15 @@ void LongestMatch::makeActions( ParseData *pd )
                 /* For each part create actions for setting the match type.  We need
                  * to do this so that the actions will go into the actionIndex. */
                 InlineList *inlineList = new InlineList;
-               inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, InlineItem::LmSetActId ) );
+               inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, 
+                               InlineItem::LmSetActId ) );
                 char *actName = new char[50];
                 sprintf( actName, "store%i", lmi->longestMatchId );
                 lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
         }
  
-       /* Make actions that execute the user action and restart on the last character. */
+       /* Make actions that execute the user action and restart on the last
+        * character. */
         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
                 /* For each part create actions for setting the match type.  We need
                  * to do this so that the actions will go into the actionIndex. */
@@ -292,13 +294,13 @@ void LongestMatch::restart( FsmAp *graph, TransAp *trans )
         graph->attachTrans( fromState, graph->startState, trans );
  }
  
-void LongestMatch::runLonestMatch( ParseData *pd, FsmAp *graph )
+void LongestMatch::runLongestMatch( ParseData *pd, FsmAp *graph )
  {
         graph->markReachableFromHereStopFinal( graph->startState );
         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
-               if ( ms->stateBits & SB_ISMARKED ) {
+               if ( ms->stateBits & STB_ISMARKED ) {
                         ms->lmItemSet.insert( 0 );
-                       ms->stateBits &= ~ SB_ISMARKED;
+                       ms->stateBits &= ~ STB_ISMARKED;
                 }
         }
  
@@ -313,16 +315,16 @@ void LongestMatch::runLonestMatch( ParseData *pd, FsmAp *graph )
                                 StateAp *toState = trans->toState;
                                 assert( toState );
  
-                               /* Check if there are transitions out, this may be a very
-                                * close approximation? Out transitions going nowhere?
-                                * FIXME: Check. */
+                               /* Can only optimize this if there are no transitions out.
+                                * Note there can be out transitions going nowhere with
+                                * actions and they too must inhibit this optimization. */
                                 if ( toState->outList.length() > 0 ) {
                                         /* Fill the item sets. */
                                         graph->markReachableFromHereStopFinal( toState );
                                         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
-                                               if ( ms->stateBits & SB_ISMARKED ) {
+                                               if ( ms->stateBits & STB_ISMARKED ) {
                                                         ms->lmItemSet.insert( lmAct->value );
-                                                       ms->stateBits &= ~ SB_ISMARKED;
+                                                       ms->stateBits &= ~ STB_ISMARKED;
                                                 }
                                         }
                                 }
@@ -338,10 +340,10 @@ void LongestMatch::runLonestMatch( ParseData *pd, FsmAp *graph )
         int maxItemSetLength = 0;
         graph->markReachableFromHereStopFinal( graph->startState );
         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
-               if ( ms->stateBits & SB_ISMARKED ) {
+               if ( ms->stateBits & STB_ISMARKED ) {
                         if ( ms->lmItemSet.length() > maxItemSetLength )
                                 maxItemSetLength = ms->lmItemSet.length();
-                       ms->stateBits &= ~ SB_ISMARKED;
+                       ms->stateBits &= ~ STB_ISMARKED;
                 }
         }
  
@@ -374,12 +376,18 @@ void LongestMatch::runLonestMatch( ParseData *pd, FsmAp *graph )
                                 StateAp *toState = trans->toState;
                                 assert( toState );
  
-                               /* Check if there are transitions out, this may be a very
-                                * close approximation? Out transitions going nowhere?
-                                * FIXME: Check. */
+                               /* Can only optimize this if there are no transitions out.
+                                * Note there can be out transitions going nowhere with
+                                * actions and they too must inhibit this optimization. */
                                 if ( toState->outList.length() == 0 ) {
                                         /* Can execute the immediate action for the longest match
-                                        * part. Redirect the action to the start state. */
+                                        * part. Redirect the action to the start state.
+                                        *
+                                        * NOTE: When we need to inhibit on_last due to leaving
+                                        * actions the above test suffices. If the state has out
+                                        * actions then it will fail because the out action will
+                                        * have been transferred to an error transition, which
+                                        * makes the outlist non-empty. */
                                         trans->actionTable.setAction( lmAct->key, 
                                                         lmAct->value->actOnLast );
                                         restartTrans.append( trans );
@@ -394,12 +402,12 @@ void LongestMatch::runLonestMatch( ParseData *pd, FsmAp *graph )
                                         maxItemSetLength = 0;
                                         graph->markReachableFromHereStopFinal( toState );
                                         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
-                                               if ( ms->stateBits & SB_ISMARKED ) {
+                                               if ( ms->stateBits & STB_ISMARKED ) {
                                                         if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
                                                                 nonFinalNonEmptyItemSet = true;
                                                         if ( ms->lmItemSet.length() > maxItemSetLength )
                                                                 maxItemSetLength = ms->lmItemSet.length();
-                                                       ms->stateBits &= ~ SB_ISMARKED;
+                                                       ms->stateBits &= ~ STB_ISMARKED;
                                                 }
                                         }
  
@@ -438,14 +446,20 @@ void LongestMatch::runLonestMatch( ParseData *pd, FsmAp *graph )
                                  * the last character of the token was one back and restart. */
                                 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, 
                                                 &st->lmItemSet[0]->actOnNext, 1 );
+                               st->eofActionTable.setAction( lmErrActionOrd, 
+                                               st->lmItemSet[0]->actOnNext );
+                               st->eofTarget = graph->startState;
                         }
                         else {
                                 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, 
                                                 &st->lmItemSet[0]->actLagBehind, 1 );
+                               st->eofActionTable.setAction( lmErrActionOrd, 
+                                               st->lmItemSet[0]->actLagBehind );
+                               st->eofTarget = graph->startState;
                         }
                 }
                 else if ( st->lmItemSet.length() > 1 ) {
-                       /* Need to use the select. Take note of the which items the select
+                       /* Need to use the select. Take note of which items the select
                          * is needed for so only the necessary actions are included. */
                         for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
                                 if ( *plmi != 0 )
@@ -454,6 +468,8 @@ void LongestMatch::runLonestMatch( ParseData *pd, FsmAp *graph )
                         /* On error, execute the action select and go to the start state. */
                         graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, 
                                         &lmActSelect, 1 );
+                       st->eofActionTable.setAction( lmErrActionOrd, lmActSelect );
+                       st->eofTarget = graph->startState;
                 }
         }
         
@@ -461,6 +477,14 @@ void LongestMatch::runLonestMatch( ParseData *pd, FsmAp *graph )
         graph->setFinState( graph->startState );
  }
  
+void LongestMatch::transferScannerLeavingActions( FsmAp *graph )
+{
+       for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+               if ( st->outActionTable.length() > 0 )
+                       graph->setErrorActions( st, st->outActionTable );
+       }
+}
+
  FsmAp *LongestMatch::walk( ParseData *pd )
  {
         /* The longest match has it's own name scope. */
@@ -475,6 +499,13 @@ FsmAp *LongestMatch::walk( ParseData *pd )
                 parts[i]->longMatchAction( pd->curActionOrd++, lmi );
         }
  
+       /* Before we union the patterns we need to deal with leaving actions. They
+        * are transfered to error transitions out of the final states (like local
+        * error actions) and to eof actions. In the scanner we need to forbid
+        * on_last for any final state that has an leaving action. */
+       for ( int i = 0; i < longestMatchList->length(); i++ )
+               transferScannerLeavingActions( parts[i] );
+
         /* Union machines one and up with machine zero. The grammar dictates that
          * there will always be at least one part. */
         FsmAp *rtnVal = parts[0];
@@ -483,7 +514,7 @@ FsmAp *LongestMatch::walk( ParseData *pd )
                 afterOpMinimize( rtnVal );
         }
  
-       runLonestMatch( pd, rtnVal );
+       runLongestMatch( pd, rtnVal );
  
         /* Pop the name scope. */
         pd->popNameScope( nameFrame );
@@ -832,8 +863,8 @@ FsmAp *Term::walk( ParseData *pd, bool lastInSeq )
                         priorDescs[0].priority = 0;
                         rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
  
-                       /* The start transitions right machine get the higher priority.
-                        * Use the same unique key. */
+                       /* The start transitions of the right machine gets the higher
+                        * priority. Use the same unique key. */
                         priorDescs[1].key = priorDescs[0].key;
                         priorDescs[1].priority = 1;
                         rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
@@ -863,6 +894,14 @@ FsmAp *Term::walk( ParseData *pd, bool lastInSeq )
                         priorDescs[1].priority = 1;
                         rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
  
+                       /* If the right machine's start state is final we need to guard
+                        * against the left machine persisting by moving through the empty
+                        * string. */
+                       if ( rhs->startState->isFinState() ) {
+                               rhs->startState->outPriorTable.setPrior( 
+                                               pd->curPriorOrd++, &priorDescs[1] );
+                       }
+
                         /* Perform concatenation. */
                         rtnVal->concatOp( rhs );
                         afterOpMinimize( rtnVal, lastInSeq );
@@ -881,14 +920,14 @@ FsmAp *Term::walk( ParseData *pd, bool lastInSeq )
                         priorDescs[0].priority = 1;
                         rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
  
-                       /* The right machine gets the lower priority.  Since
-                        * startTransPrior might unnecessarily increase the number of
-                        * states during the state machine construction process (due to
-                        * isolation), we use allTransPrior instead, which has the same
-                        * effect. */
+                       /* The right machine gets the lower priority. We cannot use
+                        * allTransPrior here in case the start state of the right machine
+                        * is final. It would allow the right machine thread to run along
+                        * with the left if just passing through the start state. Using
+                        * startFsmPrior prevents this. */
                         priorDescs[1].key = priorDescs[0].key;
                         priorDescs[1].priority = 0;
-                       rhs->allTransPrior( pd->curPriorOrd++, &priorDescs[1] );
+                       rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
  
                         /* Perform concatenation. */
                         rtnVal->concatOp( rhs );