2 * Copyright 2001, 2002, 2006 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 #include "mergesort.h"
27 #include "parsedata.h"
32 /* Make a new state. The new state will be put on the graph's
33 * list of state. The new state can be created final or non final. */
34 StateAp *FsmAp::addState()
36 /* Make the new state to return. */
37 StateAp *state = new StateAp();
39 if ( misfitAccounting ) {
40 /* Create the new state on the misfit list. All states are created
41 * with no foreign in transitions. */
42 misfitList.append( state );
45 /* Create the new state. */
46 stateList.append( state );
52 /* Construct an FSM that is the concatenation of an array of characters. A new
53 * machine will be made that has len+1 states with one transition between each
54 * state for each integer in str. IsSigned determines if the integers are to
55 * be considered as signed or unsigned ints. */
56 void FsmAp::concatFsm( Key *str, int len )
58 /* Make the first state and set it as the start state. */
59 StateAp *last = addState();
60 setStartState( last );
62 /* Attach subsequent states. */
63 for ( int i = 0; i < len; i++ ) {
64 StateAp *newState = addState();
65 attachNewTrans( last, newState, str[i], str[i] );
69 /* Make the last state the final state. */
73 /* Case insensitive version of concatFsm. */
74 void FsmAp::concatFsmCI( Key *str, int len )
76 /* Make the first state and set it as the start state. */
77 StateAp *last = addState();
78 setStartState( last );
80 /* Attach subsequent states. */
81 for ( int i = 0; i < len; i++ ) {
82 StateAp *newState = addState();
85 if ( str[i].isLower() )
86 keySet.insert( str[i].toUpper() );
87 if ( str[i].isUpper() )
88 keySet.insert( str[i].toLower() );
89 keySet.insert( str[i] );
91 for ( int i = 0; i < keySet.length(); i++ )
92 attachNewTrans( last, newState, keySet[i], keySet[i] );
97 /* Make the last state the final state. */
101 /* Construct a machine that matches one character. A new machine will be made
102 * that has two states with a single transition between the states. IsSigned
103 * determines if the integers are to be considered as signed or unsigned ints. */
104 void FsmAp::concatFsm( Key chr )
106 /* Two states first start, second final. */
107 setStartState( addState() );
109 StateAp *end = addState();
112 /* Attach on the character. */
113 attachNewTrans( startState, end, chr, chr );
116 /* Construct a machine that matches any character in set. A new machine will
117 * be made that has two states and len transitions between the them. The set
118 * should be ordered correctly accroding to KeyOps and should not contain
120 void FsmAp::orFsm( Key *set, int len )
122 /* Two states first start, second final. */
123 setStartState( addState() );
125 StateAp *end = addState();
128 for ( int i = 1; i < len; i++ )
129 assert( set[i-1] < set[i] );
131 /* Attach on all the integers in the given string of ints. */
132 for ( int i = 0; i < len; i++ )
133 attachNewTrans( startState, end, set[i], set[i] );
136 /* Construct a machine that matches a range of characters. A new machine will
137 * be made with two states and a range transition between them. The range will
138 * match any characters from low to high inclusive. Low should be less than or
139 * equal to high otherwise undefined behaviour results. IsSigned determines
140 * if the integers are to be considered as signed or unsigned ints. */
141 void FsmAp::rangeFsm( Key low, Key high )
143 /* Two states first start, second final. */
144 setStartState( addState() );
146 StateAp *end = addState();
149 /* Attach using the range of characters. */
150 attachNewTrans( startState, end, low, high );
153 /* Construct a machine that a repeated range of characters. */
154 void FsmAp::rangeStarFsm( Key low, Key high)
156 /* One state which is final and is the start state. */
157 setStartState( addState() );
158 setFinState( startState );
160 /* Attach start to start using range of characters. */
161 attachNewTrans( startState, startState, low, high );
164 /* Construct a machine that matches the empty string. A new machine will be
165 * made with only one state. The new state will be both a start and final
166 * state. IsSigned determines if the machine has a signed or unsigned
167 * alphabet. Fsm operations must be done on machines with the same alphabet
169 void FsmAp::lambdaFsm( )
171 /* Give it one state with no transitions making it
172 * the start state and final state. */
173 setStartState( addState() );
174 setFinState( startState );
177 /* Construct a machine that matches nothing at all. A new machine will be
178 * made with only one state. It will not be final. */
179 void FsmAp::emptyFsm( )
181 /* Give it one state with no transitions making it
182 * the start state and final state. */
183 setStartState( addState() );
186 void FsmAp::transferOutData( StateAp *destState, StateAp *srcState )
188 for ( TransList::Iter trans = destState->outList; trans.lte(); trans++ ) {
189 if ( trans->toState != 0 ) {
190 /* Get the actions data from the outActionTable. */
191 trans->actionTable.setActions( srcState->outActionTable );
193 /* Get the priorities from the outPriorTable. */
194 trans->priorTable.setPriors( srcState->outPriorTable );
199 /* Kleene star operator. Makes this machine the kleene star of itself. Any
200 * transitions made going out of the machine and back into itself will be
201 * notified that they are leaving transitions by having the leavingFromState
202 * callback invoked. */
203 void FsmAp::starOp( )
205 /* For the merging process. */
208 /* Turn on misfit accounting to possibly catch the old start state. */
209 setMisfitAccounting( true );
211 /* Create the new new start state. It will be set final after the merging
212 * of the final states with the start state is complete. */
213 StateAp *prevStartState = startState;
215 setStartState( addState() );
217 /* Merge the new start state with the old one to isolate it. */
218 mergeStates( md, startState, prevStartState );
220 /* Merge the start state into all final states. Except the start state on
221 * the first pass. If the start state is set final we will be doubling up
222 * its transitions, which will get transfered to any final states that
223 * follow it in the final state set. This will be determined by the order
224 * of items in the final state set. To prevent this we just merge with the
225 * start on a second pass. */
226 for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) {
227 if ( *st != startState )
228 mergeStatesLeaving( md, *st, startState );
231 /* Now it is safe to merge the start state with itself (provided it
233 if ( startState->isFinState() )
234 mergeStatesLeaving( md, startState, startState );
236 /* Now ensure the new start state is a final state. */
237 setFinState( startState );
239 /* Fill in any states that were newed up as combinations of others. */
242 /* Remove the misfits and turn off misfit accounting. */
244 setMisfitAccounting( false );
247 void FsmAp::repeatOp( int times )
249 /* Must be 1 and up. 0 produces null machine and requires deleting this. */
252 /* A repeat of one does absolutely nothing. */
256 /* Make a machine to make copies from. */
257 FsmAp *copyFrom = new FsmAp( *this );
259 /* Concatentate duplicates onto the end up until before the last. */
260 for ( int i = 1; i < times-1; i++ ) {
261 FsmAp *dup = new FsmAp( *copyFrom );
262 doConcat( dup, 0, false );
265 /* Now use the copyFrom on the end. */
266 doConcat( copyFrom, 0, false );
269 void FsmAp::optionalRepeatOp( int times )
271 /* Must be 1 and up. 0 produces null machine and requires deleting this. */
274 /* A repeat of one optional merely allows zero string. */
276 setFinState( startState );
280 /* Make a machine to make copies from. */
281 FsmAp *copyFrom = new FsmAp( *this );
283 /* The state set used in the from end of the concatentation. Starts with
284 * the initial final state set, then after each concatenation, gets set to
285 * the the final states that come from the the duplicate. */
286 StateSet lastFinSet( finStateSet );
288 /* Set the initial state to zero to allow zero copies. */
289 setFinState( startState );
291 /* Concatentate duplicates onto the end up until before the last. */
292 for ( int i = 1; i < times-1; i++ ) {
293 /* Make a duplicate for concating and set the fin bits to graph 2 so we
294 * can pick out it's final states after the optional style concat. */
295 FsmAp *dup = new FsmAp( *copyFrom );
296 dup->setFinBits( SB_GRAPH2 );
297 doConcat( dup, &lastFinSet, true );
299 /* Clear the last final state set and make the new one by taking only
300 * the final states that come from graph 2.*/
302 for ( int i = 0; i < finStateSet.length(); i++ ) {
303 /* If the state came from graph 2, add it to the last set and clear
305 StateAp *fs = finStateSet[i];
306 if ( fs->stateBits & SB_GRAPH2 ) {
307 lastFinSet.insert( fs );
308 fs->stateBits &= ~SB_GRAPH2;
313 /* Now use the copyFrom on the end, no bits set, no bits to clear. */
314 doConcat( copyFrom, &lastFinSet, true );
318 /* Fsm concatentation worker. Supports treating the concatentation as optional,
319 * which essentially leaves the final states of machine one as final. */
320 void FsmAp::doConcat( FsmAp *other, StateSet *fromStates, bool optional )
322 /* For the merging process. */
323 StateSet finStateSetCopy, startStateSet;
326 /* Turn on misfit accounting for both graphs. */
327 setMisfitAccounting( true );
328 other->setMisfitAccounting( true );
330 /* Get the other's start state. */
331 StateAp *otherStartState = other->startState;
333 /* Unset other's start state before bringing in the entry points. */
334 other->unsetStartState();
336 /* Bring in the rest of other's entry points. */
337 copyInEntryPoints( other );
338 other->entryPoints.empty();
340 /* Bring in other's states into our state lists. */
341 stateList.append( other->stateList );
342 misfitList.append( other->misfitList );
344 /* If from states is not set, then get a copy of our final state set before
345 * we clobber it and use it instead. */
346 if ( fromStates == 0 ) {
347 finStateSetCopy = finStateSet;
348 fromStates = &finStateSetCopy;
351 /* Unset all of our final states and get the final states from other. */
354 finStateSet.insert( other->finStateSet );
356 /* Since other's lists are empty, we can delete the fsm without
357 * affecting any states. */
360 /* Merge our former final states with the start state of other. */
361 for ( int i = 0; i < fromStates->length(); i++ ) {
362 StateAp *state = fromStates->data[i];
364 /* Merge the former final state with other's start state. */
365 mergeStatesLeaving( md, state, otherStartState );
367 /* If the former final state was not reset final then we must clear
368 * the state's out trans data. If it got reset final then it gets to
369 * keep its out trans data. This must be done before fillInStates gets
370 * called to prevent the data from being sourced. */
371 if ( ! state->isFinState() )
372 clearOutData( state );
375 /* Fill in any new states made from merging. */
378 /* Remove the misfits and turn off misfit accounting. */
380 setMisfitAccounting( false );
383 /* Concatenates other to the end of this machine. Other is deleted. Any
384 * transitions made leaving this machine and entering into other are notified
385 * that they are leaving transitions by having the leavingFromState callback
387 void FsmAp::concatOp( FsmAp *other )
389 /* Assert same signedness and return graph concatenation op. */
390 doConcat( other, 0, false );
394 void FsmAp::doOr( FsmAp *other )
396 /* For the merging process. */
399 /* Build a state set consisting of both start states */
400 StateSet startStateSet;
401 startStateSet.insert( startState );
402 startStateSet.insert( other->startState );
404 /* Both of the original start states loose their start state status. */
406 other->unsetStartState();
408 /* Bring in the rest of other's entry points. */
409 copyInEntryPoints( other );
410 other->entryPoints.empty();
412 /* Merge the lists. This will move all the states from other
413 * into this. No states will be deleted. */
414 stateList.append( other->stateList );
415 misfitList.append( other->misfitList );
417 /* Move the final set data from other into this. */
418 finStateSet.insert(other->finStateSet);
419 other->finStateSet.empty();
421 /* Since other's list is empty, we can delete the fsm without
422 * affecting any states. */
425 /* Create a new start state. */
426 setStartState( addState() );
428 /* Merge the start states. */
429 mergeStates( md, startState, startStateSet.data, startStateSet.length() );
431 /* Fill in any new states made from merging. */
435 /* Unions other with this machine. Other is deleted. */
436 void FsmAp::unionOp( FsmAp *other )
438 /* Turn on misfit accounting for both graphs. */
439 setMisfitAccounting( true );
440 other->setMisfitAccounting( true );
442 /* Call Worker routine. */
445 /* Remove the misfits and turn off misfit accounting. */
447 setMisfitAccounting( false );
450 /* Intersects other with this machine. Other is deleted. */
451 void FsmAp::intersectOp( FsmAp *other )
453 /* Turn on misfit accounting for both graphs. */
454 setMisfitAccounting( true );
455 other->setMisfitAccounting( true );
457 /* Set the fin bits on this and other to want each other. */
458 setFinBits( SB_GRAPH1 );
459 other->setFinBits( SB_GRAPH2 );
461 /* Call worker Or routine. */
464 /* Unset any final states that are no longer to
465 * be final due to final bits. */
466 unsetIncompleteFinals();
468 /* Remove the misfits and turn off misfit accounting. */
470 setMisfitAccounting( false );
472 /* Remove states that have no path to a final state. */
473 removeDeadEndStates();
476 /* Set subtracts other machine from this machine. Other is deleted. */
477 void FsmAp::subtractOp( FsmAp *other )
479 /* Turn on misfit accounting for both graphs. */
480 setMisfitAccounting( true );
481 other->setMisfitAccounting( true );
483 /* Set the fin bits of other to be killers. */
484 other->setFinBits( SB_GRAPH1 );
486 /* Call worker Or routine. */
489 /* Unset any final states that are no longer to
490 * be final due to final bits. */
493 /* Remove the misfits and turn off misfit accounting. */
495 setMisfitAccounting( false );
497 /* Remove states that have no path to a final state. */
498 removeDeadEndStates();
501 bool FsmAp::inEptVect( EptVect *eptVect, StateAp *state )
503 if ( eptVect != 0 ) {
504 /* Vect is there, walk it looking for state. */
505 for ( int i = 0; i < eptVect->length(); i++ ) {
506 if ( eptVect->data[i].targ == state )
513 /* Fill epsilon vectors in a root state from a given starting point. Epmploys
514 * a depth first search through the graph of epsilon transitions. */
515 void FsmAp::epsilonFillEptVectFrom( StateAp *root, StateAp *from, bool parentLeaving )
517 /* Walk the epsilon transitions out of the state. */
518 for ( EpsilonTrans::Iter ep = from->epsilonTrans; ep.lte(); ep++ ) {
519 /* Find the entry point, if the it does not resove, ignore it. */
520 EntryMapEl *enLow, *enHigh;
521 if ( entryPoints.findMulti( *ep, enLow, enHigh ) ) {
522 /* Loop the targets. */
523 for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) {
524 /* Do not add the root or states already in eptVect. */
525 StateAp *targ = en->value;
526 if ( targ != from && !inEptVect(root->eptVect, targ) ) {
527 /* Maybe need to create the eptVect. */
528 if ( root->eptVect == 0 )
529 root->eptVect = new EptVect();
531 /* If moving to a different graph or if any parent is
532 * leaving then we are leaving. */
533 bool leaving = parentLeaving ||
534 root->owningGraph != targ->owningGraph;
536 /* All ok, add the target epsilon and recurse. */
537 root->eptVect->append( EptVectEl(targ, leaving) );
538 epsilonFillEptVectFrom( root, targ, leaving );
545 void FsmAp::shadowReadWriteStates( MergeData &md )
547 /* Init isolatedShadow algorithm data. */
548 for ( StateList::Iter st = stateList; st.lte(); st++ )
549 st->isolatedShadow = 0;
551 /* Any states that may be both read from and written to must
553 for ( StateList::Iter st = stateList; st.lte(); st++ ) {
554 /* Find such states by looping through stateVect lists, which give us
555 * the states that will be read from. May cause us to visit the states
556 * that we are interested in more than once. */
557 if ( st->eptVect != 0 ) {
558 /* For all states that will be read from. */
559 for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) {
560 /* Check for read and write to the same state. */
561 StateAp *targ = ept->targ;
562 if ( targ->eptVect != 0 ) {
563 /* State is to be written to, if the shadow is not already
564 * there, create it. */
565 if ( targ->isolatedShadow == 0 ) {
566 StateAp *shadow = addState();
567 mergeStates( md, shadow, targ );
568 targ->isolatedShadow = shadow;
571 /* Write shadow into the state vector so that it is the
572 * state that the epsilon transition will read from. */
573 ept->targ = targ->isolatedShadow;
580 void FsmAp::resolveEpsilonTrans( MergeData &md )
582 /* Walk the state list and invoke recursive worker on each state. */
583 for ( StateList::Iter st = stateList; st.lte(); st++ )
584 epsilonFillEptVectFrom( st, st, false );
586 /* Prevent reading from and writing to of the same state. */
587 shadowReadWriteStates( md );
589 /* For all states that have epsilon transitions out, draw the transitions,
590 * clear the epsilon transitions. */
591 for ( StateList::Iter st = stateList; st.lte(); st++ ) {
592 /* If there is a state vector, then create the pre-merge state. */
593 if ( st->eptVect != 0 ) {
594 /* Merge all the epsilon targets into the state. */
595 for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) {
597 mergeStatesLeaving( md, st, ept->targ );
599 mergeStates( md, st, ept->targ );
602 /* Clean up the target list. */
607 /* Clear the epsilon transitions vector. */
608 st->epsilonTrans.empty();
612 void FsmAp::epsilonOp()
614 /* For merging process. */
617 setMisfitAccounting( true );
619 for ( StateList::Iter st = stateList; st.lte(); st++ )
622 /* Perform merges. */
623 resolveEpsilonTrans( md );
625 /* Epsilons can caused merges which leave behind unreachable states. */
628 /* Remove the misfits and turn off misfit accounting. */
630 setMisfitAccounting( false );
633 /* Make a new maching by joining together a bunch of machines without making
634 * any transitions between them. A negative finalId results in there being no
636 void FsmAp::joinOp( int startId, int finalId, FsmAp **others, int numOthers )
638 /* For the merging process. */
641 /* Set the owning machines. Start at one. Zero is reserved for the start
642 * and final states. */
643 for ( StateList::Iter st = stateList; st.lte(); st++ )
645 for ( int m = 0; m < numOthers; m++ ) {
646 for ( StateList::Iter st = others[m]->stateList; st.lte(); st++ )
647 st->owningGraph = 2+m;
650 /* All machines loose start state status. */
652 for ( int m = 0; m < numOthers; m++ )
653 others[m]->unsetStartState();
655 /* Bring the other machines into this. */
656 for ( int m = 0; m < numOthers; m++ ) {
657 /* Bring in the rest of other's entry points. */
658 copyInEntryPoints( others[m] );
659 others[m]->entryPoints.empty();
661 /* Merge the lists. This will move all the states from other into
662 * this. No states will be deleted. */
663 stateList.append( others[m]->stateList );
664 assert( others[m]->misfitList.length() == 0 );
666 /* Move the final set data from other into this. */
667 finStateSet.insert( others[m]->finStateSet );
668 others[m]->finStateSet.empty();
670 /* Since other's list is empty, we can delete the fsm without
671 * affecting any states. */
675 /* Look up the start entry point. */
676 EntryMapEl *enLow = 0, *enHigh = 0;
677 bool findRes = entryPoints.findMulti( startId, enLow, enHigh );
679 /* No start state. Set a default one and proceed with the join. Note
680 * that the result of the join will be a very uninteresting machine. */
681 setStartState( addState() );
684 /* There is at least one start state, create a state that will become
685 * the new start state. */
686 StateAp *newStart = addState();
687 setStartState( newStart );
689 /* The start state is in an owning machine class all it's own. */
690 newStart->owningGraph = 0;
692 /* Create the set of states to merge from. */
694 for ( EntryMapEl *en = enLow; en <= enHigh; en++ )
695 stateSet.insert( en->value );
697 /* Merge in the set of start states into the new start state. */
698 mergeStates( md, newStart, stateSet.data, stateSet.length() );
701 /* Take a copy of the final state set, before unsetting them all. This
702 * will allow us to call clearOutData on the states that don't get
703 * final state status back back. */
704 StateSet finStateSetCopy = finStateSet;
706 /* Now all final states are unset. */
709 if ( finalId >= 0 ) {
710 /* Create the implicit final state. */
711 StateAp *finState = addState();
712 setFinState( finState );
714 /* Assign an entry into the final state on the final state entry id. Note
715 * that there may already be an entry on this id. That's ok. Also set the
716 * final state owning machine id. It's in a class all it's own. */
717 setEntry( finalId, finState );
718 finState->owningGraph = 0;
721 /* Hand over to workers for resolving epsilon trans. This will merge states
722 * with the targets of their epsilon transitions. */
723 resolveEpsilonTrans( md );
725 /* Invoke the relinquish final callback on any states that did not get
726 * final state status back. */
727 for ( StateSet::Iter st = finStateSetCopy; st.lte(); st++ ) {
728 if ( !((*st)->stateBits & SB_ISFINAL) )
732 /* Fill in any new states made from merging. */
735 /* Joining can be messy. Instead of having misfit accounting on (which is
736 * tricky here) do a full cleaning. */
737 removeUnreachableStates();
740 void FsmAp::globOp( FsmAp **others, int numOthers )
742 /* All other machines loose start states status. */
743 for ( int m = 0; m < numOthers; m++ )
744 others[m]->unsetStartState();
746 /* Bring the other machines into this. */
747 for ( int m = 0; m < numOthers; m++ ) {
748 /* Bring in the rest of other's entry points. */
749 copyInEntryPoints( others[m] );
750 others[m]->entryPoints.empty();
752 /* Merge the lists. This will move all the states from other into
753 * this. No states will be deleted. */
754 stateList.append( others[m]->stateList );
755 assert( others[m]->misfitList.length() == 0 );
757 /* Move the final set data from other into this. */
758 finStateSet.insert( others[m]->finStateSet );
759 others[m]->finStateSet.empty();
761 /* Since other's list is empty, we can delete the fsm without
762 * affecting any states. */
767 void FsmAp::deterministicEntry()
769 /* For the merging process. */
772 /* States may loose their entry points, turn on misfit accounting. */
773 setMisfitAccounting( true );
775 /* Get a copy of the entry map then clear all the entry points. As we
776 * iterate the old entry map finding duplicates we will add the entry
777 * points for the new states that we create. */
778 EntryMap prevEntry = entryPoints;
779 unsetAllEntryPoints();
781 for ( int enId = 0; enId < prevEntry.length(); ) {
782 /* Count the number of states on this entry key. */
784 while ( highId < prevEntry.length() && prevEntry[enId].key == prevEntry[highId].key )
787 int numIds = highId - enId;
789 /* Only a single entry point, just set the entry. */
790 setEntry( prevEntry[enId].key, prevEntry[enId].value );
793 /* Multiple entry points, need to create a new state and merge in
794 * all the targets of entry points. */
795 StateAp *newEntry = addState();
796 for ( int en = enId; en < highId; en++ )
797 mergeStates( md, newEntry, prevEntry[en].value );
799 /* Add the new state as the single entry point. */
800 setEntry( prevEntry[enId].key, newEntry );
806 /* The old start state may be unreachable. Remove the misfits and turn off
807 * misfit accounting. */
809 setMisfitAccounting( false );
812 /* Unset any final states that are no longer to be final due to final bits. */
813 void FsmAp::unsetKilledFinals()
815 /* Duplicate the final state set before we begin modifying it. */
816 StateSet fin( finStateSet );
818 for ( int s = 0; s < fin.length(); s++ ) {
819 /* Check for killing bit. */
820 StateAp *state = fin.data[s];
821 if ( state->stateBits & SB_GRAPH1 ) {
822 /* One final state is a killer, set to non-final. */
823 unsetFinState( state );
826 /* Clear all killing bits. Non final states should never have had those
827 * state bits set in the first place. */
828 state->stateBits &= ~SB_GRAPH1;
832 /* Unset any final states that are no longer to be final due to final bits. */
833 void FsmAp::unsetIncompleteFinals()
835 /* Duplicate the final state set before we begin modifying it. */
836 StateSet fin( finStateSet );
838 for ( int s = 0; s < fin.length(); s++ ) {
839 /* Check for one set but not the other. */
840 StateAp *state = fin.data[s];
841 if ( state->stateBits & SB_BOTH &&
842 (state->stateBits & SB_BOTH) != SB_BOTH )
844 /* One state wants the other but it is not there. */
845 unsetFinState( state );
848 /* Clear wanting bits. Non final states should never have had those
849 * state bits set in the first place. */
850 state->stateBits &= ~SB_BOTH;
854 /* Ensure that the start state is free of entry points (aside from the fact
855 * that it is the start state). If the start state has entry points then Make a
856 * new start state by merging with the old one. Useful before modifying start
857 * transitions. If the existing start state has any entry points other than the
858 * start state entry then modifying its transitions changes more than the start
859 * transitions. So isolate the start state by separating it out such that it
860 * only has start stateness as it's entry point. */
861 void FsmAp::isolateStartState( )
863 /* For the merging process. */
866 /* Bail out if the start state is already isolated. */
867 if ( isStartStateIsolated() )
870 /* Turn on misfit accounting to possibly catch the old start state. */
871 setMisfitAccounting( true );
873 /* This will be the new start state. The existing start
874 * state is merged with it. */
875 StateAp *prevStartState = startState;
877 setStartState( addState() );
879 /* Merge the new start state with the old one to isolate it. */
880 mergeStates( md, startState, prevStartState );
882 /* Stfil and stateDict will be empty because the merging of the old start
883 * state into the new one will not have any conflicting transitions. */
884 assert( md.stateDict.treeSize == 0 );
885 assert( md.stfillHead == 0 );
887 /* The old start state may be unreachable. Remove the misfits and turn off
888 * misfit accounting. */
890 setMisfitAccounting( false );
894 void logCondSpace( CondSpace *condSpace )
896 if ( condSpace == 0 )
899 for ( CondSet::Iter csi = condSpace->condSet.last(); csi.gtb(); csi-- ) {
902 (*csi)->actionName( cerr );
907 void logNewExpansion( Expansion *exp )
909 cerr << "created expansion:" << endl;
910 cerr << " range: " << exp->lowKey.getVal() << " .. " <<
911 exp->highKey.getVal() << endl;
913 cerr << " fromCondSpace: ";
914 logCondSpace( exp->fromCondSpace );
916 cerr << " fromVals: " << exp->fromVals << endl;
918 cerr << " toCondSpace: ";
919 logCondSpace( exp->toCondSpace );
921 cerr << " toValsList: ";
922 for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ )
929 void FsmAp::findTransExpansions( ExpansionList &expansionList,
930 StateAp *destState, StateAp *srcState )
932 PairIter<TransAp, StateCond> transCond( destState->outList.head,
933 srcState->stateCondList.head );
934 for ( ; !transCond.end(); transCond++ ) {
935 if ( transCond.userState == RangeOverlap ) {
936 Expansion *expansion = new Expansion( transCond.s1Tel.lowKey,
937 transCond.s1Tel.highKey );
938 expansion->fromTrans = new TransAp(*transCond.s1Tel.trans);
939 expansion->fromTrans->fromState = 0;
940 expansion->fromTrans->toState = transCond.s1Tel.trans->toState;
941 expansion->fromCondSpace = 0;
942 expansion->fromVals = 0;
943 CondSpace *srcCS = transCond.s2Tel.trans->condSpace;
944 expansion->toCondSpace = srcCS;
946 long numTargVals = (1 << srcCS->condSet.length());
947 for ( long targVals = 0; targVals < numTargVals; targVals++ )
948 expansion->toValsList.append( targVals );
951 logNewExpansion( expansion );
953 expansionList.append( expansion );
958 void FsmAp::findCondExpInTrans( ExpansionList &expansionList, StateAp *state,
959 Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace,
960 long fromVals, LongVect &toValsList )
963 searchTrans.lowKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() +
964 (lowKey - keyOps->minKey);
965 searchTrans.highKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() +
966 (highKey - keyOps->minKey);
967 searchTrans.prev = searchTrans.next = 0;
969 PairIter<TransAp> pairIter( state->outList.head, &searchTrans );
970 for ( ; !pairIter.end(); pairIter++ ) {
971 if ( pairIter.userState == RangeOverlap ) {
972 Expansion *expansion = new Expansion( lowKey, highKey );
973 expansion->fromTrans = new TransAp(*pairIter.s1Tel.trans);
974 expansion->fromTrans->fromState = 0;
975 expansion->fromTrans->toState = pairIter.s1Tel.trans->toState;
976 expansion->fromCondSpace = fromCondSpace;
977 expansion->fromVals = fromVals;
978 expansion->toCondSpace = toCondSpace;
979 expansion->toValsList = toValsList;
981 expansionList.append( expansion );
983 logNewExpansion( expansion );
989 void FsmAp::findCondExpansions( ExpansionList &expansionList,
990 StateAp *destState, StateAp *srcState )
992 PairIter<StateCond, StateCond> condCond( destState->stateCondList.head,
993 srcState->stateCondList.head );
994 for ( ; !condCond.end(); condCond++ ) {
995 if ( condCond.userState == RangeOverlap ) {
996 /* Loop over all existing condVals . */
997 CondSet &destCS = condCond.s1Tel.trans->condSpace->condSet;
998 long destLen = destCS.length();
1000 /* Find the items in src cond set that are not in dest
1001 * cond set. These are the items that we must expand. */
1002 CondSet srcOnlyCS = condCond.s2Tel.trans->condSpace->condSet;
1003 for ( CondSet::Iter dcsi = destCS; dcsi.lte(); dcsi++ )
1004 srcOnlyCS.remove( *dcsi );
1005 long srcOnlyLen = srcOnlyCS.length();
1007 if ( srcOnlyCS.length() > 0 ) {
1009 cerr << "there are " << srcOnlyCS.length() << " item(s) that are "
1010 "only in the srcCS" << endl;
1013 CondSet mergedCS = destCS;
1014 mergedCS.insert( condCond.s2Tel.trans->condSpace->condSet );
1016 CondSpace *fromCondSpace = addCondSpace( destCS );
1017 CondSpace *toCondSpace = addCondSpace( mergedCS );
1019 /* Loop all values in the dest space. */
1020 for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) {
1022 for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) {
1023 if ( destVals & (1 << csi.pos()) ) {
1024 Action **cim = mergedCS.find( *csi );
1025 long bitPos = (cim - mergedCS.data);
1026 basicVals |= 1 << bitPos;
1030 /* Loop all new values. */
1031 LongVect expandToVals;
1032 for ( long soVals = 0; soVals < (1 << srcOnlyLen); soVals++ ) {
1033 long targVals = basicVals;
1034 for ( CondSet::Iter csi = srcOnlyCS; csi.lte(); csi++ ) {
1035 if ( soVals & (1 << csi.pos()) ) {
1036 Action **cim = mergedCS.find( *csi );
1037 long bitPos = (cim - mergedCS.data);
1038 targVals |= 1 << bitPos;
1041 expandToVals.append( targVals );
1044 findCondExpInTrans( expansionList, destState,
1045 condCond.s1Tel.lowKey, condCond.s1Tel.highKey,
1046 fromCondSpace, toCondSpace, destVals, expandToVals );
1053 void FsmAp::doExpand( MergeData &md, StateAp *destState, ExpansionList &expList1 )
1055 for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) {
1056 for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ ) {
1057 long targVals = *to;
1059 /* We will use the copy of the transition that was made when the
1060 * expansion was created. It will get used multiple times. Each
1061 * time we must set up the keys, everything else is constant and
1062 * and already prepared. */
1063 TransAp *srcTrans = exp->fromTrans;
1065 srcTrans->lowKey = exp->toCondSpace->baseKey +
1066 targVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey);
1067 srcTrans->highKey = exp->toCondSpace->baseKey +
1068 targVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey);
1071 srcList.append( srcTrans );
1072 outTransCopy( md, destState, srcList.head );
1079 void FsmAp::doRemove( MergeData &md, StateAp *destState, ExpansionList &expList1 )
1081 for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) {
1083 if ( exp->fromCondSpace == 0 ) {
1084 removal.lowKey = exp->lowKey;
1085 removal.highKey = exp->highKey;
1088 removal.lowKey = exp->fromCondSpace->baseKey +
1089 exp->fromVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey);
1090 removal.highKey = exp->fromCondSpace->baseKey +
1091 exp->fromVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey);
1096 PairIter<TransAp, Removal> pairIter( destState->outList.head, &removal );
1097 for ( ; !pairIter.end(); pairIter++ ) {
1098 switch ( pairIter.userState ) {
1100 TransAp *destTrans = pairIter.s1Tel.trans;
1101 destTrans->lowKey = pairIter.s1Tel.lowKey;
1102 destTrans->highKey = pairIter.s1Tel.highKey;
1103 destList.append( destTrans );
1108 case RangeOverlap: {
1109 TransAp *trans = pairIter.s1Tel.trans;
1110 detachTrans( trans->fromState, trans->toState, trans );
1115 pairIter.s1Tel.trans = dupTrans( destState,
1116 pairIter.s1Tel.trans );
1123 destState->outList.transfer( destList );
1127 void FsmAp::mergeStateConds( StateAp *destState, StateAp *srcState )
1129 StateCondList destList;
1130 PairIter<StateCond> pairIter( destState->stateCondList.head,
1131 srcState->stateCondList.head );
1132 for ( ; !pairIter.end(); pairIter++ ) {
1133 switch ( pairIter.userState ) {
1135 StateCond *destCond = pairIter.s1Tel.trans;
1136 destCond->lowKey = pairIter.s1Tel.lowKey;
1137 destCond->highKey = pairIter.s1Tel.highKey;
1138 destList.append( destCond );
1142 StateCond *newCond = new StateCond( *pairIter.s2Tel.trans );
1143 newCond->lowKey = pairIter.s2Tel.lowKey;
1144 newCond->highKey = pairIter.s2Tel.highKey;
1145 destList.append( newCond );
1148 case RangeOverlap: {
1149 StateCond *destCond = pairIter.s1Tel.trans;
1150 StateCond *srcCond = pairIter.s2Tel.trans;
1151 CondSet mergedCondSet;
1152 mergedCondSet.insert( destCond->condSpace->condSet );
1153 mergedCondSet.insert( srcCond->condSpace->condSet );
1154 destCond->condSpace = addCondSpace( mergedCondSet );
1156 destCond->lowKey = pairIter.s1Tel.lowKey;
1157 destCond->highKey = pairIter.s1Tel.highKey;
1158 destList.append( destCond );
1162 pairIter.s1Tel.trans = new StateCond( *pairIter.s1Tel.trans );
1169 destState->stateCondList.transfer( destList );
1172 /* A state merge which represents the drawing in of leaving transitions. If
1173 * there is any out data then we duplicate the souce state, transfer the out
1174 * data, then merge in the state. The new state will be reaped because it will
1175 * not be given any in transitions. */
1176 void FsmAp::mergeStatesLeaving( MergeData &md, StateAp *destState, StateAp *srcState )
1178 if ( !hasOutData( destState ) )
1179 mergeStates( md, destState, srcState );
1181 StateAp *ssMutable = addState();
1182 mergeStates( md, ssMutable, srcState );
1183 transferOutData( ssMutable, destState );
1185 for ( ActionSet::Iter cond = destState->outCondSet; cond.lte(); cond++ )
1186 embedCondition( md, ssMutable, *cond );
1188 mergeStates( md, destState, ssMutable );
1192 void FsmAp::mergeStates( MergeData &md, StateAp *destState,
1193 StateAp **srcStates, int numSrc )
1195 for ( int s = 0; s < numSrc; s++ )
1196 mergeStates( md, destState, srcStates[s] );
1199 void FsmAp::mergeStates( MergeData &md, StateAp *destState, StateAp *srcState )
1201 ExpansionList expList1;
1202 ExpansionList expList2;
1204 findTransExpansions( expList1, destState, srcState );
1205 findCondExpansions( expList1, destState, srcState );
1206 findTransExpansions( expList2, srcState, destState );
1207 findCondExpansions( expList2, srcState, destState );
1209 mergeStateConds( destState, srcState );
1211 outTransCopy( md, destState, srcState->outList.head );
1213 doExpand( md, destState, expList1 );
1214 doExpand( md, destState, expList2 );
1216 doRemove( md, destState, expList1 );
1217 doRemove( md, destState, expList2 );
1222 /* Get its bits and final state status. */
1223 destState->stateBits |= ( srcState->stateBits & ~SB_ISFINAL );
1224 if ( srcState->isFinState() )
1225 setFinState( destState );
1227 /* Draw in any properties of srcState into destState. */
1228 if ( srcState == destState ) {
1229 /* Duplicate the list to protect against write to source. The
1230 * priorities sets are not copied in because that would have no
1232 destState->epsilonTrans.append( EpsilonTrans( srcState->epsilonTrans ) );
1234 /* Get all actions, duplicating to protect against write to source. */
1235 destState->toStateActionTable.setActions(
1236 ActionTable( srcState->toStateActionTable ) );
1237 destState->fromStateActionTable.setActions(
1238 ActionTable( srcState->fromStateActionTable ) );
1239 destState->outActionTable.setActions( ActionTable( srcState->outActionTable ) );
1240 destState->outCondSet.insert( ActionSet( srcState->outCondSet ) );
1241 destState->errActionTable.setActions( ErrActionTable( srcState->errActionTable ) );
1242 destState->eofActionTable.setActions( ActionTable( srcState->eofActionTable ) );
1245 /* Get the epsilons, out priorities. */
1246 destState->epsilonTrans.append( srcState->epsilonTrans );
1247 destState->outPriorTable.setPriors( srcState->outPriorTable );
1249 /* Get all actions. */
1250 destState->toStateActionTable.setActions( srcState->toStateActionTable );
1251 destState->fromStateActionTable.setActions( srcState->fromStateActionTable );
1252 destState->outActionTable.setActions( srcState->outActionTable );
1253 destState->outCondSet.insert( srcState->outCondSet );
1254 destState->errActionTable.setActions( srcState->errActionTable );
1255 destState->eofActionTable.setActions( srcState->eofActionTable );
1259 void FsmAp::fillInStates( MergeData &md )
1261 /* Merge any states that are awaiting merging. This will likey cause
1262 * other states to be added to the stfil list. */
1263 StateAp *state = md.stfillHead;
1264 while ( state != 0 ) {
1265 StateSet *stateSet = &state->stateDictEl->stateSet;
1266 mergeStates( md, state, stateSet->data, stateSet->length() );
1267 state = state->alg.next;
1270 /* Delete the state sets of all states that are on the fill list. */
1271 state = md.stfillHead;
1272 while ( state != 0 ) {
1273 /* Delete and reset the state set. */
1274 delete state->stateDictEl;
1275 state->stateDictEl = 0;
1277 /* Next state in the stfill list. */
1278 state = state->alg.next;
1281 /* StateDict will still have its ptrs/size set but all of it's element
1282 * will be deleted so we don't need to clean it up. */
1285 void FsmAp::findEmbedExpansions( ExpansionList &expansionList,
1286 StateAp *destState, Action *condAction )
1288 StateCondList destList;
1289 PairIter<TransAp, StateCond> transCond( destState->outList.head,
1290 destState->stateCondList.head );
1291 for ( ; !transCond.end(); transCond++ ) {
1292 switch ( transCond.userState ) {
1294 if ( transCond.s1Tel.lowKey <= keyOps->maxKey ) {
1295 assert( transCond.s1Tel.highKey <= keyOps->maxKey );
1297 /* Make a new state cond. */
1298 StateCond *newStateCond = new StateCond( transCond.s1Tel.lowKey,
1299 transCond.s1Tel.highKey );
1300 newStateCond->condSpace = addCondSpace( CondSet( condAction ) );
1301 destList.append( newStateCond );
1303 /* Create the expansion. */
1304 Expansion *expansion = new Expansion( transCond.s1Tel.lowKey,
1305 transCond.s1Tel.highKey );
1306 expansion->fromTrans = new TransAp(*transCond.s1Tel.trans);
1307 expansion->fromTrans->fromState = 0;
1308 expansion->fromTrans->toState = transCond.s1Tel.trans->toState;
1309 expansion->fromCondSpace = 0;
1310 expansion->fromVals = 0;
1311 expansion->toCondSpace = newStateCond->condSpace;
1312 expansion->toValsList.append( 1 );
1314 logNewExpansion( expansion );
1316 expansionList.append( expansion );
1321 /* Enhance state cond and find the expansion. */
1322 StateCond *stateCond = transCond.s2Tel.trans;
1323 stateCond->lowKey = transCond.s2Tel.lowKey;
1324 stateCond->highKey = transCond.s2Tel.highKey;
1326 CondSet &destCS = stateCond->condSpace->condSet;
1327 long destLen = destCS.length();
1328 CondSpace *fromCondSpace = stateCond->condSpace;
1330 CondSet mergedCS = destCS;
1331 mergedCS.insert( condAction );
1332 CondSpace *toCondSpace = addCondSpace( mergedCS );
1333 stateCond->condSpace = toCondSpace;
1334 destList.append( stateCond );
1336 /* Loop all values in the dest space. */
1337 for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) {
1339 for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) {
1340 if ( destVals & (1 << csi.pos()) ) {
1341 Action **cim = mergedCS.find( *csi );
1342 long bitPos = (cim - mergedCS.data);
1343 basicVals |= 1 << bitPos;
1347 long targVals = basicVals;
1348 Action **cim = mergedCS.find( condAction );
1349 long bitPos = (cim - mergedCS.data);
1350 targVals |= 1 << bitPos;
1352 LongVect expandToVals( targVals );
1353 findCondExpInTrans( expansionList, destState,
1354 transCond.s2Tel.lowKey, transCond.s2Tel.highKey,
1355 fromCondSpace, toCondSpace, destVals, expandToVals );
1369 destState->stateCondList.transfer( destList );
1372 void FsmAp::embedCondition( StateAp *state, Action *condAction )
1375 ExpansionList expList;
1377 /* Turn on misfit accounting to possibly catch the old start state. */
1378 setMisfitAccounting( true );
1381 embedCondition( md, state, condAction );
1383 /* Fill in any states that were newed up as combinations of others. */
1386 /* Remove the misfits and turn off misfit accounting. */
1388 setMisfitAccounting( false );
1391 void FsmAp::embedCondition( MergeData &md, StateAp *state, Action *condAction )
1393 ExpansionList expList;
1395 findEmbedExpansions( expList, state, condAction );
1396 doExpand( md, state, expList );
1397 doRemove( md, state, expList );
1401 /* Check if a machine defines a single character. This is useful in validating
1402 * ranges and machines to export. */
1403 bool FsmAp::checkSingleCharMachine()
1405 /* Must have two states. */
1406 if ( stateList.length() != 2 )
1408 /* The start state cannot be final. */
1409 if ( startState->isFinState() )
1411 /* There should be only one final state. */
1412 if ( finStateSet.length() != 1 )
1414 /* The final state cannot have any transitions out. */
1415 if ( finStateSet[0]->outList.length() != 0 )
1417 /* The start state should have only one transition out. */
1418 if ( startState->outList.length() != 1 )
1420 /* The singe transition out of the start state should not be a range. */
1421 TransAp *startTrans = startState->outList.head;
1422 if ( startTrans->lowKey != startTrans->highKey )