2 * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
35 #include "sbsttable.h"
41 /* Flags that control merging. */
42 #define SB_GRAPH1 0x01
43 #define SB_GRAPH2 0x02
45 #define SB_ISFINAL 0x04
46 #define SB_ISMARKED 0x08
47 #define SB_ONLIST 0x10
53 struct LongestMatchPart;
55 /* State list element for unambiguous access to list element. */
61 /* This is the marked index for a state pair. Used in minimization. It keeps
62 * track of whether or not the state pair is marked. */
65 MarkIndex(int states);
68 void markPair(int state1, int state2);
69 bool isPairMarked(int state1, int state2);
76 extern KeyOps *keyOps;
78 /* Transistion Action Element. */
79 typedef SBstMapEl< int, Action* > ActionTableEl;
81 /* Transition Action Table. */
83 : public SBstMap< int, Action*, CmpOrd<int> >
85 void setAction( int ordering, Action *action );
86 void setActions( int *orderings, Action **actions, int nActs );
87 void setActions( const ActionTable &other );
89 bool hasAction( Action *action );
92 typedef SBstSet< Action*, CmpOrd<Action*> > ActionSet;
93 typedef CmpSTable< Action*, CmpOrd<Action*> > CmpActionSet;
95 /* Transistion Action Element. */
96 typedef SBstMapEl< int, LongestMatchPart* > LmActionTableEl;
98 /* Transition Action Table. */
100 : public SBstMap< int, LongestMatchPart*, CmpOrd<int> >
102 void setAction( int ordering, LongestMatchPart *action );
103 void setActions( const LmActionTable &other );
106 /* Compare of a whole action table element (key & value). */
107 struct CmpActionTableEl
109 static int compare( const ActionTableEl &action1,
110 const ActionTableEl &action2 )
112 if ( action1.key < action2.key )
114 else if ( action1.key > action2.key )
116 else if ( action1.value < action2.value )
118 else if ( action1.value > action2.value )
124 /* Compare for ActionTable. */
125 typedef CmpSTable< ActionTableEl, CmpActionTableEl > CmpActionTable;
127 /* Compare of a whole lm action table element (key & value). */
128 struct CmpLmActionTableEl
130 static int compare( const LmActionTableEl &lmAction1,
131 const LmActionTableEl &lmAction2 )
133 if ( lmAction1.key < lmAction2.key )
135 else if ( lmAction1.key > lmAction2.key )
137 else if ( lmAction1.value < lmAction2.value )
139 else if ( lmAction1.value > lmAction2.value )
145 /* Compare for ActionTable. */
146 typedef CmpSTable< LmActionTableEl, CmpLmActionTableEl > CmpLmActionTable;
148 /* Action table element for error action tables. Adds the encoding of transfer
150 struct ErrActionTableEl
152 ErrActionTableEl( Action *action, int ordering, int transferPoint )
153 : ordering(ordering), action(action), transferPoint(transferPoint) { }
155 /* Ordering and id of the action embedding. */
159 /* Id of point of transfere from Error action table to transtions and
163 int getKey() const { return ordering; }
166 struct ErrActionTable
167 : public SBstTable< ErrActionTableEl, int, CmpOrd<int> >
169 void setAction( int ordering, Action *action, int transferPoint );
170 void setActions( const ErrActionTable &other );
173 /* Compare of an error action table element (key & value). */
174 struct CmpErrActionTableEl
176 static int compare( const ErrActionTableEl &action1,
177 const ErrActionTableEl &action2 )
179 if ( action1.ordering < action2.ordering )
181 else if ( action1.ordering > action2.ordering )
183 else if ( action1.action < action2.action )
185 else if ( action1.action > action2.action )
187 else if ( action1.transferPoint < action2.transferPoint )
189 else if ( action1.transferPoint > action2.transferPoint )
195 /* Compare for ErrActionTable. */
196 typedef CmpSTable< ErrActionTableEl, CmpErrActionTableEl > CmpErrActionTable;
199 /* Descibe a priority, shared among PriorEls.
200 * Has key and whether or not used. */
207 /* Element in the arrays of priorities for transitions and arrays. Ordering is
208 * unique among instantiations of machines, desc is shared. */
211 PriorEl( int ordering, PriorDesc *desc )
212 : ordering(ordering), desc(desc) { }
218 /* Compare priority elements, which are ordered by the priority descriptor
222 static inline int compare( const PriorEl &pel1, const PriorEl &pel2 )
224 if ( pel1.desc->key < pel2.desc->key )
226 else if ( pel1.desc->key > pel2.desc->key )
234 /* Priority Table. */
236 : public SBstSet< PriorEl, PriorElCmp >
238 void setPrior( int ordering, PriorDesc *desc );
239 void setPriors( const PriorTable &other );
242 /* Compare of prior table elements for distinguising state data. */
245 static inline int compare( const PriorEl &pel1, const PriorEl &pel2 )
247 if ( pel1.desc < pel2.desc )
249 else if ( pel1.desc > pel2.desc )
251 else if ( pel1.ordering < pel2.ordering )
253 else if ( pel1.ordering > pel2.ordering )
259 /* Compare of PriorTable distinguising state data. Using a compare of the
260 * pointers is a little more strict than it needs be. It requires that
261 * prioritiy tables have the exact same set of priority assignment operators
262 * (from the input lang) to be considered equal.
264 * Really only key-value pairs need be tested and ordering be merged. However
265 * this would require that in the fuseing of states, priority descriptors be
266 * chosen for the new fused state based on priority. Since the out transition
267 * lists and ranges aren't necessarily going to line up, this is more work for
268 * little gain. Final compression resets all priorities first, so this would
269 * only be useful for compression at every operator, which is only an
270 * undocumented test feature.
272 typedef CmpSTable<PriorEl, CmpPriorEl> CmpPriorTable;
274 /* Plain action list that imposes no ordering. */
275 typedef Vector<int> TransFuncList;
277 /* Comparison for TransFuncList. */
278 typedef CmpTable< int, CmpOrd<int> > TransFuncListCompare;
280 /* Transition class that implements actions and priorities. */
283 TransAp() : fromState(0), toState(0) {}
284 TransAp( const TransAp &other ) :
285 lowKey(other.lowKey),
286 highKey(other.highKey),
287 fromState(0), toState(0),
288 actionTable(other.actionTable),
289 priorTable(other.priorTable)
291 assert( lmActionTable.length() == 0 && other.lmActionTable.length() == 0 );
298 /* Pointers for outlist. */
299 TransAp *prev, *next;
301 /* Pointers for in-list. */
302 TransAp *ilprev, *ilnext;
304 /* The function table and priority for the transition. */
305 ActionTable actionTable;
306 PriorTable priorTable;
308 LmActionTable lmActionTable;
311 /* In transition list. Like DList except only has head pointers, which is all
312 * that is required. Insertion and deletion is handled by the graph. This
313 * class provides the iterator of a single list. */
316 TransInList() : head(0) { }
322 /* Default construct. */
325 /* Construct, assign from a list. */
326 Iter( const TransInList &il ) : ptr(il.head) { }
327 Iter &operator=( const TransInList &dl ) { ptr = dl.head; return *this; }
330 bool lte() const { return ptr != 0; }
331 bool end() const { return ptr == 0; }
333 /* At the first, last element. */
334 bool first() const { return ptr && ptr->ilprev == 0; }
335 bool last() const { return ptr && ptr->ilnext == 0; }
337 /* Cast, dereference, arrow ops. */
338 operator TransAp*() const { return ptr; }
339 TransAp &operator *() const { return *ptr; }
340 TransAp *operator->() const { return ptr; }
342 /* Increment, decrement. */
343 inline void operator++(int) { ptr = ptr->ilnext; }
344 inline void operator--(int) { ptr = ptr->ilprev; }
346 /* The iterator is simply a pointer. */
351 typedef DList<TransAp> TransList;
353 /* Set of states, list of states. */
354 typedef BstSet<StateAp*> StateSet;
355 typedef DList<StateAp> StateList;
357 /* A element in a state dict. */
360 public AvlTreeEl<StateDictEl>
362 StateDictEl(const StateSet &stateSet)
363 : stateSet(stateSet) { }
365 const StateSet &getKey() { return stateSet; }
370 /* Dictionary mapping a set of states to a target state. */
371 typedef AvlTree< StateDictEl, StateSet, CmpTable<StateAp*> > StateDict;
373 /* Data needed for a merge operation. */
377 : stfillHead(0), stfillTail(0) { }
384 void fillListAppend( StateAp *state );
391 TransEl( Key lowKey, Key highKey )
392 : lowKey(lowKey), highKey(highKey) { }
393 TransEl( Key lowKey, Key highKey, TransAp *value )
394 : lowKey(lowKey), highKey(highKey), value(value) { }
402 static int compare( const Key key1, const Key key2 )
406 else if ( key1 > key2 )
413 /* Vector based set of key items. */
414 typedef BstSet<Key, CmpKey> KeySet;
418 MinPartition() : active(false) { }
423 MinPartition *prev, *next;
426 /* Epsilon transition stored in a state. Specifies the target */
427 typedef Vector<int> EpsilonTrans;
429 /* List of states that are to be drawn into this. */
432 EptVectEl( StateAp *targ, bool leaving )
433 : targ(targ), leaving(leaving) { }
438 typedef Vector<EptVectEl> EptVect;
440 /* Set of entry ids that go into this state. */
441 typedef BstSet<int> EntryIdSet;
443 /* Set of longest match items that may be active in a given state. */
444 typedef BstSet<LongestMatchPart*> LmItemSet;
447 typedef BstSet< Action*, CmpOrd<Action*> > CondSet;
448 typedef CmpTable< Action*, CmpOrd<Action*> > CmpCondSet;
451 : public AvlTreeEl<CondSpace>
453 CondSpace( const CondSet &condSet )
454 : condSet(condSet) {}
456 const CondSet &getKey() { return condSet; }
463 typedef Vector<CondSpace*> CondSpaceVect;
465 typedef AvlTree<CondSpace, CondSet, CmpCondSet> CondSpaceMap;
469 StateCond( Key lowKey, Key highKey ) :
470 lowKey(lowKey), highKey(highKey) {}
474 CondSpace *condSpace;
476 StateCond *prev, *next;
479 typedef DList<StateCond> StateCondList;
480 typedef Vector<long> LongVect;
484 Expansion( Key lowKey, Key highKey ) :
485 lowKey(lowKey), highKey(highKey),
486 fromTrans(0), fromCondSpace(0),
491 if ( fromTrans != 0 )
499 CondSpace *fromCondSpace;
502 CondSpace *toCondSpace;
505 Expansion *prev, *next;
508 typedef DList<Expansion> ExpansionList;
520 CondData() : nextCondKey(0) {}
522 /* Condition info. */
525 CondSpaceMap condSpaceMap;
528 extern CondData *condData;
530 /* State class that implements actions and priorities. */
534 StateAp(const StateAp &other);
537 /* Is the state final? */
538 bool isFinState() { return stateBits & SB_ISFINAL; }
540 /* Out transition list and the pointer for the default out trans. */
543 /* In transition Lists. */
546 /* Entry points into the state. */
549 /* Epsilon transitions. */
550 EpsilonTrans epsilonTrans;
552 /* Condition info. */
553 StateCondList stateCondList;
555 /* Number of in transitions from states other than ourselves. */
558 /* Temporary data for various algorithms. */
560 /* When duplicating the fsm we need to map each
561 * state to the new state representing it. */
564 /* When minimizing machines by partitioning, this maps to the group
565 * the state is in. */
566 MinPartition *partition;
568 /* When merging states (state machine operations) this next pointer is
569 * used for the list of states that need to be filled in. */
572 /* Identification for printing and stable minimization. */
577 /* Data used in epsilon operation, maybe fit into alg? */
578 StateAp *isolatedShadow;
581 /* A pointer to a dict element that contains the set of states this state
582 * represents. This cannot go into alg, because alg.next is used during
583 * the merging process. */
584 StateDictEl *stateDictEl;
586 /* When drawing epsilon transitions, holds the list of states to merge
590 /* Bits controlling the behaviour of the state during collapsing to dfa. */
593 /* State list elements. */
594 StateAp *next, *prev;
597 * Priority and Action data.
600 /* Out priorities transfered to out transitions. */
601 PriorTable outPriorTable;
603 /* The following two action tables are distinguished by the fact that when
604 * toState actions are executed immediatly after transition actions of
605 * incoming transitions and the current character will be the same as the
606 * one available then. The fromState actions are executed immediately
607 * before the transition actions of outgoing transitions and the current
608 * character is same as the one available then. */
610 /* Actions to execute upon entering into a state. */
611 ActionTable toStateActionTable;
613 /* Actions to execute when going from the state to the transition. */
614 ActionTable fromStateActionTable;
616 /* Actions to add to any future transitions that leave via this state. */
617 ActionTable outActionTable;
619 /* Conditions to add to any future transiions that leave via this sttate. */
620 ActionSet outCondSet;
622 /* Error action tables. */
623 ErrActionTable errActionTable;
625 /* Actions to execute on eof. */
626 ActionTable eofActionTable;
628 /* Set of longest match items that may be active in this state. */
632 template <class ListItem> struct NextTrans
643 lowKey = trans->lowKey;
644 highKey = trans->highKey;
648 void set( ListItem *t ) {
660 /* Encodes the different states that are meaningful to the of the iterator. */
661 enum PairIterUserState
663 RangeInS1, RangeInS2,
668 template <class ListItem1, class ListItem2 = ListItem1> struct PairIter
670 /* Encodes the different states that an fsm iterator can be in. */
673 ConsumeS1Range, ConsumeS2Range,
674 OnlyInS1Range, OnlyInS2Range,
675 S1SticksOut, S1SticksOutBreak,
676 S2SticksOut, S2SticksOutBreak,
677 S1DragsBehind, S1DragsBehindBreak,
678 S2DragsBehind, S2DragsBehindBreak,
682 PairIter( ListItem1 *list1, ListItem2 *list2 );
684 /* Query iterator. */
685 bool lte() { return itState != End; }
686 bool end() { return itState == End; }
687 void operator++(int) { findNext(); }
688 void operator++() { findNext(); }
690 /* Iterator state. */
694 PairIterUserState userState;
696 NextTrans<ListItem1> s1Tel;
697 NextTrans<ListItem2> s2Tel;
698 Key bottomLow, bottomHigh;
699 ListItem1 *bottomTrans1;
700 ListItem2 *bottomTrans2;
706 /* Init the iterator by advancing to the first item. */
707 template <class ListItem1, class ListItem2> PairIter<ListItem1, ListItem2>::PairIter(
708 ListItem1 *list1, ListItem2 *list2 )
717 /* Return and re-entry for the co-routine iterators. This should ALWAYS be
718 * used inside of a block. */
719 #define CO_RETURN(label) \
722 entry##label: backIn = true
724 /* Return and re-entry for the co-routine iterators. This should ALWAYS be
725 * used inside of a block. */
726 #define CO_RETURN2(label, uState) \
728 userState = uState; \
730 entry##label: backIn = true
732 /* Advance to the next transition. When returns, trans points to the next
733 * transition, unless there are no more, in which case end() returns true. */
734 template <class ListItem1, class ListItem2> void PairIter<ListItem1, ListItem2>::findNext()
736 /* This variable is used in dummy statements that follow the entry
737 * goto labels. The compiler needs some statement to follow the label. */
740 /* Jump into the iterator routine base on the iterator state. */
742 case Begin: goto entryBegin;
743 case ConsumeS1Range: goto entryConsumeS1Range;
744 case ConsumeS2Range: goto entryConsumeS2Range;
745 case OnlyInS1Range: goto entryOnlyInS1Range;
746 case OnlyInS2Range: goto entryOnlyInS2Range;
747 case S1SticksOut: goto entryS1SticksOut;
748 case S1SticksOutBreak: goto entryS1SticksOutBreak;
749 case S2SticksOut: goto entryS2SticksOut;
750 case S2SticksOutBreak: goto entryS2SticksOutBreak;
751 case S1DragsBehind: goto entryS1DragsBehind;
752 case S1DragsBehindBreak: goto entryS1DragsBehindBreak;
753 case S2DragsBehind: goto entryS2DragsBehind;
754 case S2DragsBehindBreak: goto entryS2DragsBehindBreak;
755 case ExactOverlap: goto entryExactOverlap;
756 case End: goto entryEnd;
760 /* Set up the next structs at the head of the transition lists. */
764 /* Concurrently scan both out ranges. */
766 if ( s1Tel.trans == 0 ) {
767 /* We are at the end of state1's ranges. Process the rest of
768 * state2's ranges. */
769 while ( s2Tel.trans != 0 ) {
770 /* Range is only in s2. */
771 CO_RETURN2( ConsumeS2Range, RangeInS2 );
776 else if ( s2Tel.trans == 0 ) {
777 /* We are at the end of state2's ranges. Process the rest of
778 * state1's ranges. */
779 while ( s1Tel.trans != 0 ) {
780 /* Range is only in s1. */
781 CO_RETURN2( ConsumeS1Range, RangeInS1 );
786 /* Both state1's and state2's transition elements are good.
787 * The signiture of no overlap is a back key being in front of a
789 else if ( s1Tel.highKey < s2Tel.lowKey ) {
790 /* A range exists in state1 that does not overlap with state2. */
791 CO_RETURN2( OnlyInS1Range, RangeInS1 );
794 else if ( s2Tel.highKey < s1Tel.lowKey ) {
795 /* A range exists in state2 that does not overlap with state1. */
796 CO_RETURN2( OnlyInS2Range, RangeInS2 );
799 /* There is overlap, must mix the ranges in some way. */
800 else if ( s1Tel.lowKey < s2Tel.lowKey ) {
801 /* Range from state1 sticks out front. Must break it into
802 * non-overlaping and overlaping segments. */
803 bottomLow = s2Tel.lowKey;
804 bottomHigh = s1Tel.highKey;
805 s1Tel.highKey = s2Tel.lowKey;
806 s1Tel.highKey.decrement();
807 bottomTrans1 = s1Tel.trans;
809 /* Notify the caller that we are breaking s1. This gives them a
810 * chance to duplicate s1Tel[0,1].value. */
811 CO_RETURN2( S1SticksOutBreak, BreakS1 );
813 /* Broken off range is only in s1. */
814 CO_RETURN2( S1SticksOut, RangeInS1 );
816 /* Advance over the part sticking out front. */
817 s1Tel.lowKey = bottomLow;
818 s1Tel.highKey = bottomHigh;
819 s1Tel.trans = bottomTrans1;
821 else if ( s2Tel.lowKey < s1Tel.lowKey ) {
822 /* Range from state2 sticks out front. Must break it into
823 * non-overlaping and overlaping segments. */
824 bottomLow = s1Tel.lowKey;
825 bottomHigh = s2Tel.highKey;
826 s2Tel.highKey = s1Tel.lowKey;
827 s2Tel.highKey.decrement();
828 bottomTrans2 = s2Tel.trans;
830 /* Notify the caller that we are breaking s2. This gives them a
831 * chance to duplicate s2Tel[0,1].value. */
832 CO_RETURN2( S2SticksOutBreak, BreakS2 );
834 /* Broken off range is only in s2. */
835 CO_RETURN2( S2SticksOut, RangeInS2 );
837 /* Advance over the part sticking out front. */
838 s2Tel.lowKey = bottomLow;
839 s2Tel.highKey = bottomHigh;
840 s2Tel.trans = bottomTrans2;
842 /* Low ends are even. Are the high ends even? */
843 else if ( s1Tel.highKey < s2Tel.highKey ) {
844 /* Range from state2 goes longer than the range from state1. We
845 * must break the range from state2 into an evenly overlaping
847 bottomLow = s1Tel.highKey;
848 bottomLow.increment();
849 bottomHigh = s2Tel.highKey;
850 s2Tel.highKey = s1Tel.highKey;
851 bottomTrans2 = s2Tel.trans;
853 /* Notify the caller that we are breaking s2. This gives them a
854 * chance to duplicate s2Tel[0,1].value. */
855 CO_RETURN2( S2DragsBehindBreak, BreakS2 );
857 /* Breaking s2 produces exact overlap. */
858 CO_RETURN2( S2DragsBehind, RangeOverlap );
860 /* Advance over the front we just broke off of range 2. */
861 s2Tel.lowKey = bottomLow;
862 s2Tel.highKey = bottomHigh;
863 s2Tel.trans = bottomTrans2;
865 /* Advance over the entire s1Tel. We have consumed it. */
868 else if ( s2Tel.highKey < s1Tel.highKey ) {
869 /* Range from state1 goes longer than the range from state2. We
870 * must break the range from state1 into an evenly overlaping
872 bottomLow = s2Tel.highKey;
873 bottomLow.increment();
874 bottomHigh = s1Tel.highKey;
875 s1Tel.highKey = s2Tel.highKey;
876 bottomTrans1 = s1Tel.trans;
878 /* Notify the caller that we are breaking s1. This gives them a
879 * chance to duplicate s2Tel[0,1].value. */
880 CO_RETURN2( S1DragsBehindBreak, BreakS1 );
882 /* Breaking s1 produces exact overlap. */
883 CO_RETURN2( S1DragsBehind, RangeOverlap );
885 /* Advance over the front we just broke off of range 1. */
886 s1Tel.lowKey = bottomLow;
887 s1Tel.highKey = bottomHigh;
888 s1Tel.trans = bottomTrans1;
890 /* Advance over the entire s2Tel. We have consumed it. */
894 /* There is an exact overlap. */
895 CO_RETURN2( ExactOverlap, RangeOverlap );
902 /* Done, go into end state. */
907 /* Compare lists of epsilon transitions. Entries are name ids of targets. */
908 typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans;
910 /* Compare class for the Approximate minimization. */
915 int compare( const StateAp *pState1, const StateAp *pState2 );
918 /* Compare class for the initial partitioning of a partition minimization. */
919 class InitPartitionCompare
922 InitPartitionCompare() { }
923 int compare( const StateAp *pState1, const StateAp *pState2 );
926 /* Compare class for the regular partitioning of a partition minimization. */
927 class PartitionCompare
930 PartitionCompare() { }
931 int compare( const StateAp *pState1, const StateAp *pState2 );
934 /* Compare class for a minimization that marks pairs. Provides the shouldMark
940 bool shouldMark( MarkIndex &markIndex, const StateAp *pState1,
941 const StateAp *pState2 );
944 /* List of partitions. */
945 typedef DList< MinPartition > PartitionList;
947 /* List of transtions out of a state. */
948 typedef Vector<TransEl> TransListVect;
950 /* Entry point map used for keeping track of entry points in a machine. */
951 typedef BstSet< int > EntryIdSet;
952 typedef BstMapEl< int, StateAp* > EntryMapEl;
953 typedef BstMap< int, StateAp* > EntryMap;
954 typedef Vector<EntryMapEl> EntryMapBase;
956 /* Graph class that implements actions and priorities. */
959 /* Constructors/Destructors. */
961 FsmAp( const FsmAp &graph );
964 /* The list of states. */
966 StateList misfitList;
968 /* The map of entry points. */
969 EntryMap entryPoints;
971 /* The start state. */
974 /* The set of final states. */
975 StateSet finStateSet;
977 /* Misfit Accounting. Are misfits put on a separate list. */
978 bool misfitAccounting;
981 * Transition actions and priorities.
984 /* Set priorities on transtions. */
985 void startFsmPrior( int ordering, PriorDesc *prior );
986 void allTransPrior( int ordering, PriorDesc *prior );
987 void finishFsmPrior( int ordering, PriorDesc *prior );
988 void leaveFsmPrior( int ordering, PriorDesc *prior );
990 /* Action setting support. */
991 void transferErrorActions( StateAp *state, int transferPoint );
992 void setErrorAction( StateAp *state, int ordering, Action *action );
994 /* Fill all spaces in a transition list with an error transition. */
995 void fillGaps( StateAp *state );
997 /* Similar to setErrorAction, instead gives a state to go to on error. */
998 void setErrorTarget( StateAp *state, StateAp *target, int *orderings,
999 Action **actions, int nActs );
1001 /* Set actions to execute. */
1002 void startFsmAction( int ordering, Action *action );
1003 void allTransAction( int ordering, Action *action );
1004 void finishFsmAction( int ordering, Action *action );
1005 void leaveFsmAction( int ordering, Action *action );
1006 void longMatchAction( int ordering, LongestMatchPart *lmPart );
1008 /* Set conditions. */
1009 CondSpace *addCondSpace( const CondSet &condSet );
1011 void findEmbedExpansions( ExpansionList &expansionList,
1012 StateAp *destState, Action *condAction );
1013 void embedCondition( MergeData &md, StateAp *state, Action *condAction );
1014 void embedCondition( StateAp *state, Action *condAction );
1016 void startFsmCondition( Action *condAction );
1017 void allTransCondition( Action *condAction );
1018 void leaveFsmCondition( Action *condAction );
1020 /* Set error actions to execute. */
1021 void startErrorAction( int ordering, Action *action, int transferPoint );
1022 void allErrorAction( int ordering, Action *action, int transferPoint );
1023 void finalErrorAction( int ordering, Action *action, int transferPoint );
1024 void notStartErrorAction( int ordering, Action *action, int transferPoint );
1025 void notFinalErrorAction( int ordering, Action *action, int transferPoint );
1026 void middleErrorAction( int ordering, Action *action, int transferPoint );
1028 /* Set EOF actions. */
1029 void startEOFAction( int ordering, Action *action );
1030 void allEOFAction( int ordering, Action *action );
1031 void finalEOFAction( int ordering, Action *action );
1032 void notStartEOFAction( int ordering, Action *action );
1033 void notFinalEOFAction( int ordering, Action *action );
1034 void middleEOFAction( int ordering, Action *action );
1036 /* Set To State actions. */
1037 void startToStateAction( int ordering, Action *action );
1038 void allToStateAction( int ordering, Action *action );
1039 void finalToStateAction( int ordering, Action *action );
1040 void notStartToStateAction( int ordering, Action *action );
1041 void notFinalToStateAction( int ordering, Action *action );
1042 void middleToStateAction( int ordering, Action *action );
1044 /* Set From State actions. */
1045 void startFromStateAction( int ordering, Action *action );
1046 void allFromStateAction( int ordering, Action *action );
1047 void finalFromStateAction( int ordering, Action *action );
1048 void notStartFromStateAction( int ordering, Action *action );
1049 void notFinalFromStateAction( int ordering, Action *action );
1050 void middleFromStateAction( int ordering, Action *action );
1052 /* Shift the action ordering of the start transitions to start at
1053 * fromOrder and increase in units of 1. Useful before kleene star
1055 int shiftStartActionOrder( int fromOrder );
1057 /* Clear all priorities from the fsm to so they won't affcet minimization
1058 * of the final fsm. */
1059 void clearAllPriorities();
1061 /* Zero out all the function keys. */
1062 void nullActionKeys();
1064 /* Walk the list of states and verify state properties. */
1065 void verifyStates();
1067 /* Misfit Accounting. Are misfits put on a separate list. */
1068 void setMisfitAccounting( bool val )
1069 { misfitAccounting = val; }
1071 /* Set and Unset a state as final. */
1072 void setFinState( StateAp *state );
1073 void unsetFinState( StateAp *state );
1075 void setStartState( StateAp *state );
1076 void unsetStartState( );
1078 /* Set and unset a state as an entry point. */
1079 void setEntry( int id, StateAp *state );
1080 void changeEntry( int id, StateAp *to, StateAp *from );
1081 void unsetEntry( int id, StateAp *state );
1082 void unsetEntry( int id );
1083 void unsetAllEntryPoints();
1085 /* Epsilon transitions. */
1086 void epsilonTrans( int id );
1087 void shadowReadWriteStates( MergeData &md );
1090 * Basic attaching and detaching.
1093 /* Common to attaching/detaching list and default. */
1094 void attachToInList( StateAp *from, StateAp *to, TransAp *&head, TransAp *trans );
1095 void detachFromInList( StateAp *from, StateAp *to, TransAp *&head, TransAp *trans );
1097 /* Attach with a new transition. */
1098 TransAp *attachNewTrans( StateAp *from, StateAp *to,
1099 Key onChar1, Key onChar2 );
1101 /* Attach with an existing transition that already in an out list. */
1102 void attachTrans( StateAp *from, StateAp *to, TransAp *trans );
1104 /* Redirect a transition away from error and towards some state. */
1105 void redirectErrorTrans( StateAp *from, StateAp *to, TransAp *trans );
1107 /* Detach a transition from a target state. */
1108 void detachTrans( StateAp *from, StateAp *to, TransAp *trans );
1110 /* Detach a state from the graph. */
1111 void detachState( StateAp *state );
1114 * NFA to DFA conversion routines.
1117 /* Duplicate a transition that will dropin to a free spot. */
1118 TransAp *dupTrans( StateAp *from, TransAp *srcTrans );
1120 /* In crossing, two transitions both go to real states. */
1121 TransAp *fsmAttachStates( MergeData &md, StateAp *from,
1122 TransAp *destTrans, TransAp *srcTrans );
1124 /* Two transitions are to be crossed, handle the possibility of either
1125 * going to the error state. */
1126 TransAp *mergeTrans( MergeData &md, StateAp *from,
1127 TransAp *destTrans, TransAp *srcTrans );
1129 /* Compare deterimne relative priorities of two transition tables. */
1130 int comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 );
1132 /* Cross a src transition with one that is already occupying a spot. */
1133 TransAp *crossTransitions( MergeData &md, StateAp *from,
1134 TransAp *destTrans, TransAp *srcTrans );
1136 void outTransCopy( MergeData &md, StateAp *dest, TransAp *srcList );
1138 void doRemove( MergeData &md, StateAp *destState, ExpansionList &expList1 );
1139 void doExpand( MergeData &md, StateAp *destState, ExpansionList &expList1 );
1140 void findCondExpInTrans( ExpansionList &expansionList, StateAp *state,
1141 Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace,
1142 long destVals, LongVect &toValsList );
1143 void findTransExpansions( ExpansionList &expansionList,
1144 StateAp *destState, StateAp *srcState );
1145 void findCondExpansions( ExpansionList &expansionList,
1146 StateAp *destState, StateAp *srcState );
1147 void mergeStateConds( StateAp *destState, StateAp *srcState );
1149 /* Merge a set of states into newState. */
1150 void mergeStates( MergeData &md, StateAp *destState,
1151 StateAp **srcStates, int numSrc );
1152 void mergeStatesLeaving( MergeData &md, StateAp *destState, StateAp *srcState );
1153 void mergeStates( MergeData &md, StateAp *destState, StateAp *srcState );
1155 /* Make all states that are combinations of other states and that
1156 * have not yet had their out transitions filled in. This will
1157 * empty out stateDict and stFil. */
1158 void fillInStates( MergeData &md );
1161 * Transition Comparison.
1164 /* Compare transition data. Either of the pointers may be null. */
1165 static inline int compareDataPtr( TransAp *trans1, TransAp *trans2 );
1167 /* Compare target state and transition data. Either pointer may be null. */
1168 static inline int compareFullPtr( TransAp *trans1, TransAp *trans2 );
1170 /* Compare target partitions. Either pointer may be null. */
1171 static inline int comparePartPtr( TransAp *trans1, TransAp *trans2 );
1173 /* Check marked status of target states. Either pointer may be null. */
1174 static inline bool shouldMarkPtr( MarkIndex &markIndex,
1175 TransAp *trans1, TransAp *trans2 );
1181 /* Compare priority and function table of transitions. */
1182 static int compareTransData( TransAp *trans1, TransAp *trans2 );
1184 /* Add in the properties of srcTrans into this. */
1185 void addInTrans( TransAp *destTrans, TransAp *srcTrans );
1187 /* Compare states on data stored in the states. */
1188 static int compareStateData( const StateAp *state1, const StateAp *state2 );
1190 /* Out transition data. */
1191 void clearOutData( StateAp *state );
1192 bool hasOutData( StateAp *state );
1193 void transferOutData( StateAp *destState, StateAp *srcState );
1199 /* New up a state and add it to the graph. */
1200 StateAp *addState();
1203 * Building basic machines
1206 void concatFsm( Key c );
1207 void concatFsm( Key *str, int len );
1208 void concatFsmCI( Key *str, int len );
1209 void orFsm( Key *set, int len );
1210 void rangeFsm( Key low, Key high );
1211 void rangeStarFsm( Key low, Key high );
1220 void repeatOp( int times );
1221 void optionalRepeatOp( int times );
1222 void concatOp( FsmAp *other );
1223 void unionOp( FsmAp *other );
1224 void intersectOp( FsmAp *other );
1225 void subtractOp( FsmAp *other );
1227 void joinOp( int startId, int finalId, FsmAp **others, int numOthers );
1228 void globOp( FsmAp **others, int numOthers );
1229 void deterministicEntry();
1235 /* Determine if there are any entry points into a start state other than
1236 * the start state. */
1237 bool isStartStateIsolated();
1239 /* Make a new start state that has no entry points. Will not change the
1240 * identity of the fsm. */
1241 void isolateStartState();
1243 /* Workers for resolving epsilon transitions. */
1244 bool inEptVect( EptVect *eptVect, StateAp *targ );
1245 void epsilonFillEptVectFrom( StateAp *root, StateAp *from, bool parentLeaving );
1246 void resolveEpsilonTrans( MergeData &md );
1248 /* Workers for concatenation and union. */
1249 void doConcat( FsmAp *other, StateSet *fromStates, bool optional );
1250 void doOr( FsmAp *other );
1256 /* Unset any final states that are no longer to be final
1257 * due to final bits. */
1258 void unsetIncompleteFinals();
1259 void unsetKilledFinals();
1261 /* Bring in other's entry points. Assumes others states are going to be
1262 * copied into this machine. */
1263 void copyInEntryPoints( FsmAp *other );
1265 /* Ordering states. */
1266 void depthFirstOrdering( StateAp *state );
1267 void depthFirstOrdering();
1268 void sortStatesByFinal();
1270 /* Set sqequential state numbers starting at 0. */
1271 void setStateNumbers( int base );
1273 /* Unset all final states. */
1274 void unsetAllFinStates();
1276 /* Set the bits of final states and clear the bits of non final states. */
1277 void setFinBits( int finStateBits );
1280 * Self-consistency checks.
1283 /* Run a sanity check on the machine. */
1284 void verifyIntegrity();
1286 /* Verify that there are no unreachable states, or dead end states. */
1287 void verifyReachability();
1288 void verifyNoDeadEndStates();
1294 /* Mark all states reachable from state. */
1295 void markReachableFromHereReverse( StateAp *state );
1297 /* Mark all states reachable from state. */
1298 void markReachableFromHere( StateAp *state );
1299 void markReachableFromHereStopFinal( StateAp *state );
1301 /* Removes states that cannot be reached by any path in the fsm and are
1302 * thus wasted silicon. */
1303 void removeDeadEndStates();
1305 /* Removes states that cannot be reached by any path in the fsm and are
1306 * thus wasted silicon. */
1307 void removeUnreachableStates();
1309 /* Remove error actions from states on which the error transition will
1310 * never be taken. */
1311 bool outListCovers( StateAp *state );
1312 bool anyErrorRange( StateAp *state );
1314 /* Remove states that are on the misfit list. */
1315 void removeMisfits();
1321 /* Minimization by partitioning. */
1322 void minimizePartition1();
1323 void minimizePartition2();
1325 /* Minimize the final state Machine. The result is the minimal fsm. Slow
1326 * but stable, correct minimization. Uses n^2 space (lookout) and average
1327 * n^2 time. Worst case n^3 time, but a that is a very rare case. */
1328 void minimizeStable();
1330 /* Minimize the final state machine. Does not find the minimal fsm, but a
1331 * pretty good approximation. Does not use any extra space. Average n^2
1332 * time. Worst case n^3 time, but a that is a very rare case. */
1333 void minimizeApproximate();
1335 /* This is the worker for the minimize approximate solution. It merges
1336 * states that have identical out transitions. */
1337 bool minimizeRound( );
1339 /* Given an intial partioning of states, split partitions that have out trans
1340 * to differing partitions. */
1341 int partitionRound( StateAp **statePtrs, MinPartition *parts, int numParts );
1343 /* Split partitions that have a transition to a previously split partition, until
1344 * there are no more partitions to split. */
1345 int splitCandidates( StateAp **statePtrs, MinPartition *parts, int numParts );
1347 /* Fuse together states in the same partition. */
1348 void fusePartitions( MinPartition *parts, int numParts );
1350 /* Mark pairs where out final stateness differs, out trans data differs,
1351 * trans pairs go to a marked pair or trans data differs. Should get
1353 void initialMarkRound( MarkIndex &markIndex );
1355 /* One marking round on all state pairs. Considers if trans pairs go
1356 * to a marked state only. Returns whether or not a pair was marked. */
1357 bool markRound( MarkIndex &markIndex );
1359 /* Move the in trans into src into dest. */
1360 void inTransMove(StateAp *dest, StateAp *src);
1362 /* Make state src and dest the same state. */
1363 void fuseEquivStates(StateAp *dest, StateAp *src);
1365 /* Find any states that didn't get marked by the marking algorithm and
1366 * merge them into the primary states of their equivalence class. */
1367 void fuseUnmarkedPairs( MarkIndex &markIndex );
1369 /* Merge neighboring transitions go to the same state and have the same
1370 * transitions data. */
1371 void compressTransitions();
1375 #endif /* _FSMGRAPH_H */