EOF actions and targets for processing tokens are now set in scanner final
authorthurston <thurston@052ea7fc-9027-0410-9066-f65837a77df0>
Fri, 28 Sep 2007 03:56:01 +0000 (03:56 +0000)
committerthurston <thurston@052ea7fc-9027-0410-9066-f65837a77df0>
Fri, 28 Sep 2007 03:56:01 +0000 (03:56 +0000)
states. EOF transitions (present when an EOF targets are present) are now
passed to the backend and stored. Appropriate code not yet generated.

git-svn-id: http://svn.complang.org/ragel/trunk@291 052ea7fc-9027-0410-9066-f65837a77df0

ragel/fsmstate.cpp
ragel/parsetree.cpp
ragel/xmlcodegen.cpp
ragel/xmlcodegen.h
redfsm/gendata.cpp
redfsm/gendata.h
redfsm/redfsm.h
redfsm/xmlparse.kh
redfsm/xmlparse.kl
redfsm/xmlscan.rl
redfsm/xmltags.gperf

index 1250329..82675ba 100644 (file)
@@ -170,7 +170,7 @@ StateAp::~StateAp()
 }
 
 /* Compare two states using pointers to the states. With the approximate
- * compare the idea is that if the compare finds them the same, they can
+ * compare, the idea is that if the compare finds them the same, they can
  * immediately be merged. */
 int ApproxCompare::compare( const StateAp *state1, const StateAp *state2 )
 {
@@ -223,14 +223,17 @@ int ApproxCompare::compare( const StateAp *state1, const StateAp *state2 )
                }
        }
 
-       /* Not yet supporting minimization of states with EOF targets. */
-       assert( state1->eofTarget == 0 && state2->eofTarget == 0 );
+       /* Check EOF targets. */
+       if ( state1->eofTarget < state2->eofTarget )
+               return -1;
+       else if ( state1->eofTarget > state2->eofTarget )
+               return 1;
 
        /* Got through the entire state comparison, deem them equal. */
        return 0;
 }
 
-/* Compare class for the sort that does the intial partition of compaction. */
+/* Compare class used in the initial partition. */
 int InitPartitionCompare::compare( const StateAp *state1 , const StateAp *state2 )
 {
        int compareRes;
@@ -306,9 +309,6 @@ int InitPartitionCompare::compare( const StateAp *state1 , const StateAp *state2
                }
        }
 
-       /* Not yet supporting minimization of states with EOF targets. */
-       assert( state1->eofTarget == 0 && state2->eofTarget == 0 );
-
        return 0;
 }
 
@@ -347,8 +347,18 @@ int PartitionCompare::compare( const StateAp *state1, const StateAp *state2 )
                }
        }
 
-       /* Not yet supporting minimization of states with EOF targets. */
-       assert( state1->eofTarget == 0 && state2->eofTarget == 0 );
+       /* Test eof targets. */
+       if ( state1->eofTarget == 0 && state2->eofTarget != 0 )
+               return -1;
+       else if ( state1->eofTarget != 0 && state2->eofTarget == 0 )
+               return 1;
+       else if ( state1->eofTarget != 0 ) {
+               /* Both eof targets are set. */
+               compareRes = CmpOrd< MinPartition* >::compare( 
+                       state1->eofTarget->alg.partition, state2->eofTarget->alg.partition );
+               if ( compareRes != 0 )
+                       return compareRes;
+       }
 
        return 0;
 }
index 9e8a65e..f809f3c 100644 (file)
@@ -438,10 +438,16 @@ void LongestMatch::runLonestMatch( ParseData *pd, FsmAp *graph )
                                 * the last character of the token was one back and restart. */
                                graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, 
                                                &st->lmItemSet[0]->actOnNext, 1 );
+                               st->eofActionTable.setAction( lmErrActionOrd, 
+                                               st->lmItemSet[0]->actOnNext );
+                               st->eofTarget = graph->startState;
                        }
                        else {
                                graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, 
                                                &st->lmItemSet[0]->actLagBehind, 1 );
+                               st->eofActionTable.setAction( lmErrActionOrd, 
+                                               st->lmItemSet[0]->actLagBehind );
+                               st->eofTarget = graph->startState;
                        }
                }
                else if ( st->lmItemSet.length() > 1 ) {
@@ -454,6 +460,8 @@ void LongestMatch::runLonestMatch( ParseData *pd, FsmAp *graph )
                        /* On error, execute the action select and go to the start state. */
                        graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, 
                                        &lmActSelect, 1 );
+                       st->eofActionTable.setAction( lmErrActionOrd, lmActSelect );
+                       st->eofTarget = graph->startState;
                }
        }
        
index 3645c38..046c2df 100644 (file)
@@ -175,6 +175,26 @@ void XMLCodeGen::writeTransList( StateAp *state )
        out << "      </trans_list>\n";
 }
 
+void XMLCodeGen::writeEofTrans( StateAp *state )
+{
+       RedActionTable *eofActions = 0;
+       if ( state->eofActionTable.length() > 0 )
+               eofActions = actionTableMap.find( state->eofActionTable );
+       
+       /* The <eof_t> is used when there is an eof target, otherwise the eof
+        * action goes into state actions. */
+       if ( state->eofTarget != 0 ) {
+               out << "      <eof_t>" << state->eofTarget->alg.stateNum;
+
+               if ( eofActions != 0 )
+                       out << " " << eofActions->id;
+               else
+                       out << " x"; 
+
+               out << "</eof_t>" << endl;
+       }
+}
+
 void XMLCodeGen::writeText( InlineItem *item )
 {
        if ( item->prev == 0 || item->prev->type != InlineItem::Text )
@@ -438,8 +458,10 @@ void XMLCodeGen::writeStateActions( StateAp *state )
        if ( state->fromStateActionTable.length() > 0 )
                fromStateActions = actionTableMap.find( state->fromStateActionTable );
 
+       /* EOF actions go out here only if the state has no eof target. If it has
+        * an eof target then an eof transition will be used instead. */
        RedActionTable *eofActions = 0;
-       if ( state->eofActionTable.length() > 0 )
+       if ( state->eofTarget == 0 && state->eofActionTable.length() > 0 )
                eofActions = actionTableMap.find( state->eofActionTable );
        
        if ( toStateActions != 0 || fromStateActions != 0 || eofActions != 0 ) {
@@ -457,7 +479,9 @@ void XMLCodeGen::writeStateActions( StateAp *state )
                if ( eofActions != 0 )
                        out << " " << eofActions->id;
                else
-                       out << " x"; out << "</state_actions>\n";
+                       out << " x";
+
+               out << "</state_actions>\n";
        }
 }
 
@@ -489,6 +513,7 @@ void XMLCodeGen::writeStateList()
                out << ">\n";
 
                writeStateActions( st );
+               writeEofTrans( st );
                writeStateConditions( st );
                writeTransList( st );
 
index f366029..b82a8da 100644 (file)
@@ -112,6 +112,7 @@ private:
        void reduceTrans( TransAp *trans );
        void reduceActionTables();
        void writeTransList( StateAp *state );
+       void writeEofTrans( StateAp *state );
        void writeTrans( Key lowKey, Key highKey, TransAp *defTrans );
        void writeAction( Action *action );
        void writeLmSwitch( InlineItem *item );
index d503261..24dbd07 100644 (file)
@@ -235,7 +235,7 @@ void CodeGenData::setFinal( int snum )
 
 
 void CodeGenData::setStateActions( int snum, long toStateAction, 
-                       long fromStateAction, long eofAction )
+               long fromStateAction, long eofAction )
 {
        RedStateAp *curState = allStates + snum;
        if ( toStateAction >= 0 )
@@ -246,6 +246,14 @@ void CodeGenData::setStateActions( int snum, long toStateAction,
                curState->eofAction = allActionTables + eofAction;
 }
 
+void CodeGenData::setEofTrans( int snum, long eofTarget, long actId )
+{
+       RedStateAp *curState = allStates + snum;
+       RedStateAp *targState = allStates + eofTarget;
+       RedAction *eofAct = allActionTables + actId;
+       curState->eofTrans = redFsm->allocateTrans( targState, eofAct );
+}
+
 void CodeGenData::resolveTargetStates( InlineList *inlineList )
 {
        for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
@@ -398,6 +406,13 @@ void CodeGenData::findFinalActionRefs()
                                item->value->numTransRefs += 1;
                }
 
+               /* Reference count eof transitions. */
+               if ( st->eofTrans != 0 && st->eofTrans->action != 0 ) {
+                       st->eofTrans->action->numTransRefs += 1;
+                       for ( ActionTable::Iter item = st->eofTrans->action->key; item.lte(); item++ )
+                               item->value->numTransRefs += 1;
+               }
+
                /* Reference count to state actions. */
                if ( st->toStateAction != 0 ) {
                        st->toStateAction->numToStateRefs += 1;
index 4e8f527..b61c207 100644 (file)
@@ -145,6 +145,7 @@ struct CodeGenData
        void finishTransList( int snum );
        void setStateActions( int snum, long toStateAction, 
                        long fromStateAction, long eofAction );
+       void setEofTrans( int snum, long targ, long eofAction );
        void setForcedErrorState()
                { redFsm->forcedErrorState = true; }
 
index 64d73e7..b4af46b 100644 (file)
@@ -338,6 +338,7 @@ struct RedStateAp
                toStateAction(0), 
                fromStateAction(0), 
                eofAction(0), 
+               eofTrans(0), 
                id(0), 
                bAnyRegCurStateRef(false),
                partitionBoundary(false),
@@ -368,6 +369,7 @@ struct RedStateAp
        RedAction *toStateAction;
        RedAction *fromStateAction;
        RedAction *eofAction;
+       RedTransAp *eofTrans;
        int id;
        StateCondList stateCondList;
        StateCondVect stateCondVect;
index e220fc0..41d8ac3 100644 (file)
@@ -117,7 +117,7 @@ struct Parser
 
                token TAG_p_expr, TAG_pe_expr, TAG_eof_expr, TAG_cs_expr, TAG_top_expr,
                        TAG_stack_expr, TAG_act_expr, TAG_tokstart_expr, TAG_tokend_expr,
-                       TAG_data_expr, TAG_prepush, TAG_postpop;
+                       TAG_data_expr, TAG_prepush, TAG_postpop, TAG_eof_t;
        }%%
 
        %% write instance_data;
index ca0bc4d..4e12b58 100644 (file)
@@ -395,6 +395,7 @@ state_item_list: state_item_list state_item;
 state_item_list: ;
 
 state_item: tag_state_actions;
+state_item: tag_eof_t;
 state_item: tag_state_cond_list;
 state_item: tag_trans_list;
 
@@ -410,6 +411,15 @@ tag_state_actions: TAG_state_actions '/' TAG_state_actions
                                fromStateAction, eofAction );
        };
 
+tag_eof_t: TAG_eof_t '/' TAG_eof_t
+       final {
+               char *et = $3->tag->content;
+               long targ = readOffsetPtr( et, &et );
+               long eofAction = readOffsetPtr( et, &et );
+
+               cgd->setEofTrans( curState, targ, eofAction );
+       };
+
 tag_state_cond_list: tag_state_cond_list_head state_cond_list '/' TAG_cond_list;
 
 tag_state_cond_list_head: TAG_cond_list
index 474a49c..0976ed4 100644 (file)
@@ -315,7 +315,7 @@ int xml_parse( std::istream &input, char *fileName,
                                        case TAG_set_act: case TAG_start_state:
                                        case TAG_error_state: case TAG_state_actions: 
                                        case TAG_action_table: case TAG_cond_space: 
-                                       case TAG_c: case TAG_ex:
+                                       case TAG_c: case TAG_ex: case TAG_eof_t:
                                                tag->content = new char[scanner.buffer.length+1];
                                                memcpy( tag->content, scanner.buffer.data,
                                                                scanner.buffer.length );
index cc175e7..5b07e8a 100644 (file)
@@ -91,3 +91,4 @@ tokend_expr, TAG_tokend_expr
 data_expr, TAG_data_expr
 prepush, TAG_prepush
 postpop, TAG_postpop
+eof_t, TAG_eof_t