2 * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
37 #include "sbsttable.h"
44 /* Flags that control merging. */
45 #define SB_GRAPH1 0x01
46 #define SB_GRAPH2 0x02
48 #define SB_ISFINAL 0x04
49 #define SB_ISMARKED 0x08
50 #define SB_ONLIST 0x10
58 struct LongestMatchPart;
60 /* State list element for unambiguous access to list element. */
66 /* This is the marked index for a state pair. Used in minimization. It keeps
67 * track of whether or not the state pair is marked. */
70 MarkIndex(int states);
73 void markPair(int state1, int state2);
74 bool isPairMarked(int state1, int state2);
81 extern KeyOps *keyOps;
83 /* Transistion Action Element. */
84 typedef SBstMapEl< int, Action* > ActionTableEl;
86 /* Nodes in the tree that use this action. */
89 typedef Vector<NameInst*> ActionRefs;
91 /* Element in list of actions. Contains the string for the code to exectute. */
94 public DListEl<Action>,
95 public AvlTreeEl<Action>
99 Action( const InputLoc &loc, char *name, InlineList *inlineList, int condId )
103 inlineList(inlineList),
116 /* Key for action dictionary. */
117 char *getKey() const { return name; }
119 /* Data collected during parse. */
122 InlineList *inlineList;
125 void actionName( ostream &out )
130 out << loc.line << ":" << loc.col;
133 /* Places in the input text that reference the action. */
134 ActionRefs actionRefs;
136 /* Number of references in the final machine. */
138 { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; }
141 int numFromStateRefs;
152 static inline int compare( const Action *cond1, const Action *cond2 )
154 if ( cond1->condId < cond2->condId )
156 else if ( cond1->condId > cond2->condId )
162 /* A list of actions. */
163 typedef DList<Action> ActionList;
164 typedef AvlTree<Action, char *, CmpStr> ActionDict;
166 /* Structure for reverse action mapping. */
167 struct RevActionMapEl
174 /* Transition Action Table. */
176 : public SBstMap< int, Action*, CmpOrd<int> >
178 void setAction( int ordering, Action *action );
179 void setActions( int *orderings, Action **actions, int nActs );
180 void setActions( const ActionTable &other );
182 bool hasAction( Action *action );
185 typedef SBstSet< Action*, CmpOrd<Action*> > ActionSet;
186 typedef CmpSTable< Action*, CmpOrd<Action*> > CmpActionSet;
188 /* Transistion Action Element. */
189 typedef SBstMapEl< int, LongestMatchPart* > LmActionTableEl;
191 /* Transition Action Table. */
193 : public SBstMap< int, LongestMatchPart*, CmpOrd<int> >
195 void setAction( int ordering, LongestMatchPart *action );
196 void setActions( const LmActionTable &other );
199 /* Compare of a whole action table element (key & value). */
200 struct CmpActionTableEl
202 static int compare( const ActionTableEl &action1,
203 const ActionTableEl &action2 )
205 if ( action1.key < action2.key )
207 else if ( action1.key > action2.key )
209 else if ( action1.value < action2.value )
211 else if ( action1.value > action2.value )
217 /* Compare for ActionTable. */
218 typedef CmpSTable< ActionTableEl, CmpActionTableEl > CmpActionTable;
220 /* Compare of a whole lm action table element (key & value). */
221 struct CmpLmActionTableEl
223 static int compare( const LmActionTableEl &lmAction1,
224 const LmActionTableEl &lmAction2 )
226 if ( lmAction1.key < lmAction2.key )
228 else if ( lmAction1.key > lmAction2.key )
230 else if ( lmAction1.value < lmAction2.value )
232 else if ( lmAction1.value > lmAction2.value )
238 /* Compare for ActionTable. */
239 typedef CmpSTable< LmActionTableEl, CmpLmActionTableEl > CmpLmActionTable;
241 /* Action table element for error action tables. Adds the encoding of transfer
243 struct ErrActionTableEl
245 ErrActionTableEl( Action *action, int ordering, int transferPoint )
246 : ordering(ordering), action(action), transferPoint(transferPoint) { }
248 /* Ordering and id of the action embedding. */
252 /* Id of point of transfere from Error action table to transtions and
256 int getKey() const { return ordering; }
259 struct ErrActionTable
260 : public SBstTable< ErrActionTableEl, int, CmpOrd<int> >
262 void setAction( int ordering, Action *action, int transferPoint );
263 void setActions( const ErrActionTable &other );
266 /* Compare of an error action table element (key & value). */
267 struct CmpErrActionTableEl
269 static int compare( const ErrActionTableEl &action1,
270 const ErrActionTableEl &action2 )
272 if ( action1.ordering < action2.ordering )
274 else if ( action1.ordering > action2.ordering )
276 else if ( action1.action < action2.action )
278 else if ( action1.action > action2.action )
280 else if ( action1.transferPoint < action2.transferPoint )
282 else if ( action1.transferPoint > action2.transferPoint )
288 /* Compare for ErrActionTable. */
289 typedef CmpSTable< ErrActionTableEl, CmpErrActionTableEl > CmpErrActionTable;
292 /* Descibe a priority, shared among PriorEls.
293 * Has key and whether or not used. */
300 /* Element in the arrays of priorities for transitions and arrays. Ordering is
301 * unique among instantiations of machines, desc is shared. */
304 PriorEl( int ordering, PriorDesc *desc )
305 : ordering(ordering), desc(desc) { }
311 /* Compare priority elements, which are ordered by the priority descriptor
315 static inline int compare( const PriorEl &pel1, const PriorEl &pel2 )
317 if ( pel1.desc->key < pel2.desc->key )
319 else if ( pel1.desc->key > pel2.desc->key )
327 /* Priority Table. */
329 : public SBstSet< PriorEl, PriorElCmp >
331 void setPrior( int ordering, PriorDesc *desc );
332 void setPriors( const PriorTable &other );
335 /* Compare of prior table elements for distinguising state data. */
338 static inline int compare( const PriorEl &pel1, const PriorEl &pel2 )
340 if ( pel1.desc < pel2.desc )
342 else if ( pel1.desc > pel2.desc )
344 else if ( pel1.ordering < pel2.ordering )
346 else if ( pel1.ordering > pel2.ordering )
352 /* Compare of PriorTable distinguising state data. Using a compare of the
353 * pointers is a little more strict than it needs be. It requires that
354 * prioritiy tables have the exact same set of priority assignment operators
355 * (from the input lang) to be considered equal.
357 * Really only key-value pairs need be tested and ordering be merged. However
358 * this would require that in the fuseing of states, priority descriptors be
359 * chosen for the new fused state based on priority. Since the out transition
360 * lists and ranges aren't necessarily going to line up, this is more work for
361 * little gain. Final compression resets all priorities first, so this would
362 * only be useful for compression at every operator, which is only an
363 * undocumented test feature.
365 typedef CmpSTable<PriorEl, CmpPriorEl> CmpPriorTable;
367 /* Plain action list that imposes no ordering. */
368 typedef Vector<int> TransFuncList;
370 /* Comparison for TransFuncList. */
371 typedef CmpTable< int, CmpOrd<int> > TransFuncListCompare;
373 /* Transition class that implements actions and priorities. */
376 TransAp() : fromState(0), toState(0) {}
377 TransAp( const TransAp &other ) :
378 lowKey(other.lowKey),
379 highKey(other.highKey),
380 fromState(0), toState(0),
381 actionTable(other.actionTable),
382 priorTable(other.priorTable)
384 assert( lmActionTable.length() == 0 && other.lmActionTable.length() == 0 );
391 /* Pointers for outlist. */
392 TransAp *prev, *next;
394 /* Pointers for in-list. */
395 TransAp *ilprev, *ilnext;
397 /* The function table and priority for the transition. */
398 ActionTable actionTable;
399 PriorTable priorTable;
401 LmActionTable lmActionTable;
404 /* In transition list. Like DList except only has head pointers, which is all
405 * that is required. Insertion and deletion is handled by the graph. This
406 * class provides the iterator of a single list. */
409 TransInList() : head(0) { }
415 /* Default construct. */
418 /* Construct, assign from a list. */
419 Iter( const TransInList &il ) : ptr(il.head) { }
420 Iter &operator=( const TransInList &dl ) { ptr = dl.head; return *this; }
423 bool lte() const { return ptr != 0; }
424 bool end() const { return ptr == 0; }
426 /* At the first, last element. */
427 bool first() const { return ptr && ptr->ilprev == 0; }
428 bool last() const { return ptr && ptr->ilnext == 0; }
430 /* Cast, dereference, arrow ops. */
431 operator TransAp*() const { return ptr; }
432 TransAp &operator *() const { return *ptr; }
433 TransAp *operator->() const { return ptr; }
435 /* Increment, decrement. */
436 inline void operator++(int) { ptr = ptr->ilnext; }
437 inline void operator--(int) { ptr = ptr->ilprev; }
439 /* The iterator is simply a pointer. */
444 typedef DList<TransAp> TransList;
446 /* Set of states, list of states. */
447 typedef BstSet<StateAp*> StateSet;
448 typedef DList<StateAp> StateList;
450 /* A element in a state dict. */
453 public AvlTreeEl<StateDictEl>
455 StateDictEl(const StateSet &stateSet)
456 : stateSet(stateSet) { }
458 const StateSet &getKey() { return stateSet; }
463 /* Dictionary mapping a set of states to a target state. */
464 typedef AvlTree< StateDictEl, StateSet, CmpTable<StateAp*> > StateDict;
466 /* Data needed for a merge operation. */
470 : stfillHead(0), stfillTail(0) { }
477 void fillListAppend( StateAp *state );
484 TransEl( Key lowKey, Key highKey )
485 : lowKey(lowKey), highKey(highKey) { }
486 TransEl( Key lowKey, Key highKey, TransAp *value )
487 : lowKey(lowKey), highKey(highKey), value(value) { }
495 static int compare( const Key key1, const Key key2 )
499 else if ( key1 > key2 )
506 /* Vector based set of key items. */
507 typedef BstSet<Key, CmpKey> KeySet;
511 MinPartition() : active(false) { }
516 MinPartition *prev, *next;
519 /* Epsilon transition stored in a state. Specifies the target */
520 typedef Vector<int> EpsilonTrans;
522 /* List of states that are to be drawn into this. */
525 EptVectEl( StateAp *targ, bool leaving )
526 : targ(targ), leaving(leaving) { }
531 typedef Vector<EptVectEl> EptVect;
533 /* Set of entry ids that go into this state. */
534 typedef BstSet<int> EntryIdSet;
536 /* Set of longest match items that may be active in a given state. */
537 typedef BstSet<LongestMatchPart*> LmItemSet;
539 /* A Conditions which is to be
540 * transfered on pending out transitions. */
543 OutCond( Action *action, bool sense )
544 : action(action), sense(sense) {}
552 static int compare( const OutCond &outCond1, const OutCond &outCond2 )
554 if ( outCond1.action < outCond2.action )
556 else if ( outCond1.action > outCond2.action )
558 else if ( outCond1.sense < outCond2.sense )
560 else if ( outCond1.sense > outCond2.sense )
566 /* Set of conditions to be transfered to on pending out transitions. */
567 typedef SBstSet< OutCond, CmpOutCond > OutCondSet;
568 typedef CmpSTable< OutCond, CmpOutCond > CmpOutCondSet;
571 typedef BstSet< Action*, CmpCondId > CondSet;
572 typedef CmpTable< Action*, CmpCondId > CmpCondSet;
575 : public AvlTreeEl<CondSpace>
577 CondSpace( const CondSet &condSet )
578 : condSet(condSet) {}
580 const CondSet &getKey() { return condSet; }
587 typedef Vector<CondSpace*> CondSpaceVect;
589 typedef AvlTree<CondSpace, CondSet, CmpCondSet> CondSpaceMap;
593 StateCond( Key lowKey, Key highKey ) :
594 lowKey(lowKey), highKey(highKey) {}
598 CondSpace *condSpace;
600 StateCond *prev, *next;
603 typedef DList<StateCond> StateCondList;
604 typedef Vector<long> LongVect;
608 Expansion( Key lowKey, Key highKey ) :
609 lowKey(lowKey), highKey(highKey),
610 fromTrans(0), fromCondSpace(0),
615 if ( fromTrans != 0 )
623 CondSpace *fromCondSpace;
626 CondSpace *toCondSpace;
629 Expansion *prev, *next;
632 typedef DList<Expansion> ExpansionList;
644 CondData() : nextCondKey(0) {}
646 /* Condition info. */
649 CondSpaceMap condSpaceMap;
652 extern CondData *condData;
654 /* State class that implements actions and priorities. */
658 StateAp(const StateAp &other);
661 /* Is the state final? */
662 bool isFinState() { return stateBits & SB_ISFINAL; }
664 /* Out transition list and the pointer for the default out trans. */
667 /* In transition Lists. */
670 /* Entry points into the state. */
673 /* Epsilon transitions. */
674 EpsilonTrans epsilonTrans;
676 /* Condition info. */
677 StateCondList stateCondList;
679 /* Number of in transitions from states other than ourselves. */
682 /* Temporary data for various algorithms. */
684 /* When duplicating the fsm we need to map each
685 * state to the new state representing it. */
688 /* When minimizing machines by partitioning, this maps to the group
689 * the state is in. */
690 MinPartition *partition;
692 /* When merging states (state machine operations) this next pointer is
693 * used for the list of states that need to be filled in. */
696 /* Identification for printing and stable minimization. */
701 /* Data used in epsilon operation, maybe fit into alg? */
702 StateAp *isolatedShadow;
705 /* A pointer to a dict element that contains the set of states this state
706 * represents. This cannot go into alg, because alg.next is used during
707 * the merging process. */
708 StateDictEl *stateDictEl;
710 /* When drawing epsilon transitions, holds the list of states to merge
714 /* Bits controlling the behaviour of the state during collapsing to dfa. */
717 /* State list elements. */
718 StateAp *next, *prev;
721 * Priority and Action data.
724 /* Out priorities transfered to out transitions. */
725 PriorTable outPriorTable;
727 /* The following two action tables are distinguished by the fact that when
728 * toState actions are executed immediatly after transition actions of
729 * incoming transitions and the current character will be the same as the
730 * one available then. The fromState actions are executed immediately
731 * before the transition actions of outgoing transitions and the current
732 * character is same as the one available then. */
734 /* Actions to execute upon entering into a state. */
735 ActionTable toStateActionTable;
737 /* Actions to execute when going from the state to the transition. */
738 ActionTable fromStateActionTable;
740 /* Actions to add to any future transitions that leave via this state. */
741 ActionTable outActionTable;
743 /* Conditions to add to any future transiions that leave via this sttate. */
744 OutCondSet outCondSet;
746 /* Error action tables. */
747 ErrActionTable errActionTable;
749 /* Actions to execute on eof. */
750 ActionTable eofActionTable;
752 /* Set of longest match items that may be active in this state. */
756 template <class ListItem> struct NextTrans
767 lowKey = trans->lowKey;
768 highKey = trans->highKey;
772 void set( ListItem *t ) {
784 /* Encodes the different states that are meaningful to the of the iterator. */
785 enum PairIterUserState
787 RangeInS1, RangeInS2,
792 template <class ListItem1, class ListItem2 = ListItem1> struct PairIter
794 /* Encodes the different states that an fsm iterator can be in. */
797 ConsumeS1Range, ConsumeS2Range,
798 OnlyInS1Range, OnlyInS2Range,
799 S1SticksOut, S1SticksOutBreak,
800 S2SticksOut, S2SticksOutBreak,
801 S1DragsBehind, S1DragsBehindBreak,
802 S2DragsBehind, S2DragsBehindBreak,
806 PairIter( ListItem1 *list1, ListItem2 *list2 );
808 /* Query iterator. */
809 bool lte() { return itState != End; }
810 bool end() { return itState == End; }
811 void operator++(int) { findNext(); }
812 void operator++() { findNext(); }
814 /* Iterator state. */
818 PairIterUserState userState;
820 NextTrans<ListItem1> s1Tel;
821 NextTrans<ListItem2> s2Tel;
822 Key bottomLow, bottomHigh;
823 ListItem1 *bottomTrans1;
824 ListItem2 *bottomTrans2;
830 /* Init the iterator by advancing to the first item. */
831 template <class ListItem1, class ListItem2> PairIter<ListItem1, ListItem2>::PairIter(
832 ListItem1 *list1, ListItem2 *list2 )
841 /* Return and re-entry for the co-routine iterators. This should ALWAYS be
842 * used inside of a block. */
843 #define CO_RETURN(label) \
846 entry##label: backIn = true
848 /* Return and re-entry for the co-routine iterators. This should ALWAYS be
849 * used inside of a block. */
850 #define CO_RETURN2(label, uState) \
852 userState = uState; \
854 entry##label: backIn = true
856 /* Advance to the next transition. When returns, trans points to the next
857 * transition, unless there are no more, in which case end() returns true. */
858 template <class ListItem1, class ListItem2> void PairIter<ListItem1, ListItem2>::findNext()
860 /* This variable is used in dummy statements that follow the entry
861 * goto labels. The compiler needs some statement to follow the label. */
864 /* Jump into the iterator routine base on the iterator state. */
866 case Begin: goto entryBegin;
867 case ConsumeS1Range: goto entryConsumeS1Range;
868 case ConsumeS2Range: goto entryConsumeS2Range;
869 case OnlyInS1Range: goto entryOnlyInS1Range;
870 case OnlyInS2Range: goto entryOnlyInS2Range;
871 case S1SticksOut: goto entryS1SticksOut;
872 case S1SticksOutBreak: goto entryS1SticksOutBreak;
873 case S2SticksOut: goto entryS2SticksOut;
874 case S2SticksOutBreak: goto entryS2SticksOutBreak;
875 case S1DragsBehind: goto entryS1DragsBehind;
876 case S1DragsBehindBreak: goto entryS1DragsBehindBreak;
877 case S2DragsBehind: goto entryS2DragsBehind;
878 case S2DragsBehindBreak: goto entryS2DragsBehindBreak;
879 case ExactOverlap: goto entryExactOverlap;
880 case End: goto entryEnd;
884 /* Set up the next structs at the head of the transition lists. */
888 /* Concurrently scan both out ranges. */
890 if ( s1Tel.trans == 0 ) {
891 /* We are at the end of state1's ranges. Process the rest of
892 * state2's ranges. */
893 while ( s2Tel.trans != 0 ) {
894 /* Range is only in s2. */
895 CO_RETURN2( ConsumeS2Range, RangeInS2 );
900 else if ( s2Tel.trans == 0 ) {
901 /* We are at the end of state2's ranges. Process the rest of
902 * state1's ranges. */
903 while ( s1Tel.trans != 0 ) {
904 /* Range is only in s1. */
905 CO_RETURN2( ConsumeS1Range, RangeInS1 );
910 /* Both state1's and state2's transition elements are good.
911 * The signiture of no overlap is a back key being in front of a
913 else if ( s1Tel.highKey < s2Tel.lowKey ) {
914 /* A range exists in state1 that does not overlap with state2. */
915 CO_RETURN2( OnlyInS1Range, RangeInS1 );
918 else if ( s2Tel.highKey < s1Tel.lowKey ) {
919 /* A range exists in state2 that does not overlap with state1. */
920 CO_RETURN2( OnlyInS2Range, RangeInS2 );
923 /* There is overlap, must mix the ranges in some way. */
924 else if ( s1Tel.lowKey < s2Tel.lowKey ) {
925 /* Range from state1 sticks out front. Must break it into
926 * non-overlaping and overlaping segments. */
927 bottomLow = s2Tel.lowKey;
928 bottomHigh = s1Tel.highKey;
929 s1Tel.highKey = s2Tel.lowKey;
930 s1Tel.highKey.decrement();
931 bottomTrans1 = s1Tel.trans;
933 /* Notify the caller that we are breaking s1. This gives them a
934 * chance to duplicate s1Tel[0,1].value. */
935 CO_RETURN2( S1SticksOutBreak, BreakS1 );
937 /* Broken off range is only in s1. */
938 CO_RETURN2( S1SticksOut, RangeInS1 );
940 /* Advance over the part sticking out front. */
941 s1Tel.lowKey = bottomLow;
942 s1Tel.highKey = bottomHigh;
943 s1Tel.trans = bottomTrans1;
945 else if ( s2Tel.lowKey < s1Tel.lowKey ) {
946 /* Range from state2 sticks out front. Must break it into
947 * non-overlaping and overlaping segments. */
948 bottomLow = s1Tel.lowKey;
949 bottomHigh = s2Tel.highKey;
950 s2Tel.highKey = s1Tel.lowKey;
951 s2Tel.highKey.decrement();
952 bottomTrans2 = s2Tel.trans;
954 /* Notify the caller that we are breaking s2. This gives them a
955 * chance to duplicate s2Tel[0,1].value. */
956 CO_RETURN2( S2SticksOutBreak, BreakS2 );
958 /* Broken off range is only in s2. */
959 CO_RETURN2( S2SticksOut, RangeInS2 );
961 /* Advance over the part sticking out front. */
962 s2Tel.lowKey = bottomLow;
963 s2Tel.highKey = bottomHigh;
964 s2Tel.trans = bottomTrans2;
966 /* Low ends are even. Are the high ends even? */
967 else if ( s1Tel.highKey < s2Tel.highKey ) {
968 /* Range from state2 goes longer than the range from state1. We
969 * must break the range from state2 into an evenly overlaping
971 bottomLow = s1Tel.highKey;
972 bottomLow.increment();
973 bottomHigh = s2Tel.highKey;
974 s2Tel.highKey = s1Tel.highKey;
975 bottomTrans2 = s2Tel.trans;
977 /* Notify the caller that we are breaking s2. This gives them a
978 * chance to duplicate s2Tel[0,1].value. */
979 CO_RETURN2( S2DragsBehindBreak, BreakS2 );
981 /* Breaking s2 produces exact overlap. */
982 CO_RETURN2( S2DragsBehind, RangeOverlap );
984 /* Advance over the front we just broke off of range 2. */
985 s2Tel.lowKey = bottomLow;
986 s2Tel.highKey = bottomHigh;
987 s2Tel.trans = bottomTrans2;
989 /* Advance over the entire s1Tel. We have consumed it. */
992 else if ( s2Tel.highKey < s1Tel.highKey ) {
993 /* Range from state1 goes longer than the range from state2. We
994 * must break the range from state1 into an evenly overlaping
996 bottomLow = s2Tel.highKey;
997 bottomLow.increment();
998 bottomHigh = s1Tel.highKey;
999 s1Tel.highKey = s2Tel.highKey;
1000 bottomTrans1 = s1Tel.trans;
1002 /* Notify the caller that we are breaking s1. This gives them a
1003 * chance to duplicate s2Tel[0,1].value. */
1004 CO_RETURN2( S1DragsBehindBreak, BreakS1 );
1006 /* Breaking s1 produces exact overlap. */
1007 CO_RETURN2( S1DragsBehind, RangeOverlap );
1009 /* Advance over the front we just broke off of range 1. */
1010 s1Tel.lowKey = bottomLow;
1011 s1Tel.highKey = bottomHigh;
1012 s1Tel.trans = bottomTrans1;
1014 /* Advance over the entire s2Tel. We have consumed it. */
1018 /* There is an exact overlap. */
1019 CO_RETURN2( ExactOverlap, RangeOverlap );
1026 /* Done, go into end state. */
1031 /* Compare lists of epsilon transitions. Entries are name ids of targets. */
1032 typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans;
1034 /* Compare class for the Approximate minimization. */
1039 int compare( const StateAp *pState1, const StateAp *pState2 );
1042 /* Compare class for the initial partitioning of a partition minimization. */
1043 class InitPartitionCompare
1046 InitPartitionCompare() { }
1047 int compare( const StateAp *pState1, const StateAp *pState2 );
1050 /* Compare class for the regular partitioning of a partition minimization. */
1051 class PartitionCompare
1054 PartitionCompare() { }
1055 int compare( const StateAp *pState1, const StateAp *pState2 );
1058 /* Compare class for a minimization that marks pairs. Provides the shouldMark
1064 bool shouldMark( MarkIndex &markIndex, const StateAp *pState1,
1065 const StateAp *pState2 );
1068 /* List of partitions. */
1069 typedef DList< MinPartition > PartitionList;
1071 /* List of transtions out of a state. */
1072 typedef Vector<TransEl> TransListVect;
1074 /* Entry point map used for keeping track of entry points in a machine. */
1075 typedef BstSet< int > EntryIdSet;
1076 typedef BstMapEl< int, StateAp* > EntryMapEl;
1077 typedef BstMap< int, StateAp* > EntryMap;
1078 typedef Vector<EntryMapEl> EntryMapBase;
1080 /* Graph class that implements actions and priorities. */
1083 /* Constructors/Destructors. */
1085 FsmAp( const FsmAp &graph );
1088 /* The list of states. */
1089 StateList stateList;
1090 StateList misfitList;
1092 /* The map of entry points. */
1093 EntryMap entryPoints;
1095 /* The start state. */
1096 StateAp *startState;
1098 /* Error state, possibly created only when the final machine has been
1099 * created and the XML machine is about to be written. No transitions
1100 * point to this state. */
1103 /* The set of final states. */
1104 StateSet finStateSet;
1106 /* Misfit Accounting. Are misfits put on a separate list. */
1107 bool misfitAccounting;
1110 * Transition actions and priorities.
1113 /* Set priorities on transtions. */
1114 void startFsmPrior( int ordering, PriorDesc *prior );
1115 void allTransPrior( int ordering, PriorDesc *prior );
1116 void finishFsmPrior( int ordering, PriorDesc *prior );
1117 void leaveFsmPrior( int ordering, PriorDesc *prior );
1119 /* Action setting support. */
1120 void transferErrorActions( StateAp *state, int transferPoint );
1121 void setErrorAction( StateAp *state, int ordering, Action *action );
1123 /* Fill all spaces in a transition list with an error transition. */
1124 void fillGaps( StateAp *state );
1126 /* Similar to setErrorAction, instead gives a state to go to on error. */
1127 void setErrorTarget( StateAp *state, StateAp *target, int *orderings,
1128 Action **actions, int nActs );
1130 /* Set actions to execute. */
1131 void startFsmAction( int ordering, Action *action );
1132 void allTransAction( int ordering, Action *action );
1133 void finishFsmAction( int ordering, Action *action );
1134 void leaveFsmAction( int ordering, Action *action );
1135 void longMatchAction( int ordering, LongestMatchPart *lmPart );
1137 /* Set conditions. */
1138 CondSpace *addCondSpace( const CondSet &condSet );
1140 void findEmbedExpansions( ExpansionList &expansionList,
1141 StateAp *destState, Action *condAction, bool sense );
1142 void embedCondition( MergeData &md, StateAp *state, Action *condAction, bool sense );
1143 void embedCondition( StateAp *state, Action *condAction, bool sense );
1145 void startFsmCondition( Action *condAction, bool sense );
1146 void allTransCondition( Action *condAction, bool sense );
1147 void leaveFsmCondition( Action *condAction, bool sense );
1149 /* Set error actions to execute. */
1150 void startErrorAction( int ordering, Action *action, int transferPoint );
1151 void allErrorAction( int ordering, Action *action, int transferPoint );
1152 void finalErrorAction( int ordering, Action *action, int transferPoint );
1153 void notStartErrorAction( int ordering, Action *action, int transferPoint );
1154 void notFinalErrorAction( int ordering, Action *action, int transferPoint );
1155 void middleErrorAction( int ordering, Action *action, int transferPoint );
1157 /* Set EOF actions. */
1158 void startEOFAction( int ordering, Action *action );
1159 void allEOFAction( int ordering, Action *action );
1160 void finalEOFAction( int ordering, Action *action );
1161 void notStartEOFAction( int ordering, Action *action );
1162 void notFinalEOFAction( int ordering, Action *action );
1163 void middleEOFAction( int ordering, Action *action );
1165 /* Set To State actions. */
1166 void startToStateAction( int ordering, Action *action );
1167 void allToStateAction( int ordering, Action *action );
1168 void finalToStateAction( int ordering, Action *action );
1169 void notStartToStateAction( int ordering, Action *action );
1170 void notFinalToStateAction( int ordering, Action *action );
1171 void middleToStateAction( int ordering, Action *action );
1173 /* Set From State actions. */
1174 void startFromStateAction( int ordering, Action *action );
1175 void allFromStateAction( int ordering, Action *action );
1176 void finalFromStateAction( int ordering, Action *action );
1177 void notStartFromStateAction( int ordering, Action *action );
1178 void notFinalFromStateAction( int ordering, Action *action );
1179 void middleFromStateAction( int ordering, Action *action );
1181 /* Shift the action ordering of the start transitions to start at
1182 * fromOrder and increase in units of 1. Useful before kleene star
1184 int shiftStartActionOrder( int fromOrder );
1186 /* Clear all priorities from the fsm to so they won't affcet minimization
1187 * of the final fsm. */
1188 void clearAllPriorities();
1190 /* Zero out all the function keys. */
1191 void nullActionKeys();
1193 /* Walk the list of states and verify state properties. */
1194 void verifyStates();
1196 /* Misfit Accounting. Are misfits put on a separate list. */
1197 void setMisfitAccounting( bool val )
1198 { misfitAccounting = val; }
1200 /* Set and Unset a state as final. */
1201 void setFinState( StateAp *state );
1202 void unsetFinState( StateAp *state );
1204 void setStartState( StateAp *state );
1205 void unsetStartState( );
1207 /* Set and unset a state as an entry point. */
1208 void setEntry( int id, StateAp *state );
1209 void changeEntry( int id, StateAp *to, StateAp *from );
1210 void unsetEntry( int id, StateAp *state );
1211 void unsetEntry( int id );
1212 void unsetAllEntryPoints();
1214 /* Epsilon transitions. */
1215 void epsilonTrans( int id );
1216 void shadowReadWriteStates( MergeData &md );
1219 * Basic attaching and detaching.
1222 /* Common to attaching/detaching list and default. */
1223 void attachToInList( StateAp *from, StateAp *to, TransAp *&head, TransAp *trans );
1224 void detachFromInList( StateAp *from, StateAp *to, TransAp *&head, TransAp *trans );
1226 /* Attach with a new transition. */
1227 TransAp *attachNewTrans( StateAp *from, StateAp *to,
1228 Key onChar1, Key onChar2 );
1230 /* Attach with an existing transition that already in an out list. */
1231 void attachTrans( StateAp *from, StateAp *to, TransAp *trans );
1233 /* Redirect a transition away from error and towards some state. */
1234 void redirectErrorTrans( StateAp *from, StateAp *to, TransAp *trans );
1236 /* Detach a transition from a target state. */
1237 void detachTrans( StateAp *from, StateAp *to, TransAp *trans );
1239 /* Detach a state from the graph. */
1240 void detachState( StateAp *state );
1243 * NFA to DFA conversion routines.
1246 /* Duplicate a transition that will dropin to a free spot. */
1247 TransAp *dupTrans( StateAp *from, TransAp *srcTrans );
1249 /* In crossing, two transitions both go to real states. */
1250 TransAp *fsmAttachStates( MergeData &md, StateAp *from,
1251 TransAp *destTrans, TransAp *srcTrans );
1253 /* Two transitions are to be crossed, handle the possibility of either
1254 * going to the error state. */
1255 TransAp *mergeTrans( MergeData &md, StateAp *from,
1256 TransAp *destTrans, TransAp *srcTrans );
1258 /* Compare deterimne relative priorities of two transition tables. */
1259 int comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 );
1261 /* Cross a src transition with one that is already occupying a spot. */
1262 TransAp *crossTransitions( MergeData &md, StateAp *from,
1263 TransAp *destTrans, TransAp *srcTrans );
1265 void outTransCopy( MergeData &md, StateAp *dest, TransAp *srcList );
1267 void doRemove( MergeData &md, StateAp *destState, ExpansionList &expList1 );
1268 void doExpand( MergeData &md, StateAp *destState, ExpansionList &expList1 );
1269 void findCondExpInTrans( ExpansionList &expansionList, StateAp *state,
1270 Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace,
1271 long destVals, LongVect &toValsList );
1272 void findTransExpansions( ExpansionList &expansionList,
1273 StateAp *destState, StateAp *srcState );
1274 void findCondExpansions( ExpansionList &expansionList,
1275 StateAp *destState, StateAp *srcState );
1276 void mergeStateConds( StateAp *destState, StateAp *srcState );
1278 /* Merge a set of states into newState. */
1279 void mergeStates( MergeData &md, StateAp *destState,
1280 StateAp **srcStates, int numSrc );
1281 void mergeStatesLeaving( MergeData &md, StateAp *destState, StateAp *srcState );
1282 void mergeStates( MergeData &md, StateAp *destState, StateAp *srcState );
1284 /* Make all states that are combinations of other states and that
1285 * have not yet had their out transitions filled in. This will
1286 * empty out stateDict and stFil. */
1287 void fillInStates( MergeData &md );
1290 * Transition Comparison.
1293 /* Compare transition data. Either of the pointers may be null. */
1294 static inline int compareDataPtr( TransAp *trans1, TransAp *trans2 );
1296 /* Compare target state and transition data. Either pointer may be null. */
1297 static inline int compareFullPtr( TransAp *trans1, TransAp *trans2 );
1299 /* Compare target partitions. Either pointer may be null. */
1300 static inline int comparePartPtr( TransAp *trans1, TransAp *trans2 );
1302 /* Check marked status of target states. Either pointer may be null. */
1303 static inline bool shouldMarkPtr( MarkIndex &markIndex,
1304 TransAp *trans1, TransAp *trans2 );
1310 /* Compare priority and function table of transitions. */
1311 static int compareTransData( TransAp *trans1, TransAp *trans2 );
1313 /* Add in the properties of srcTrans into this. */
1314 void addInTrans( TransAp *destTrans, TransAp *srcTrans );
1316 /* Compare states on data stored in the states. */
1317 static int compareStateData( const StateAp *state1, const StateAp *state2 );
1319 /* Out transition data. */
1320 void clearOutData( StateAp *state );
1321 bool hasOutData( StateAp *state );
1322 void transferOutData( StateAp *destState, StateAp *srcState );
1328 /* New up a state and add it to the graph. */
1329 StateAp *addState();
1332 * Building basic machines
1335 void concatFsm( Key c );
1336 void concatFsm( Key *str, int len );
1337 void concatFsmCI( Key *str, int len );
1338 void orFsm( Key *set, int len );
1339 void rangeFsm( Key low, Key high );
1340 void rangeStarFsm( Key low, Key high );
1349 void repeatOp( int times );
1350 void optionalRepeatOp( int times );
1351 void concatOp( FsmAp *other );
1352 void unionOp( FsmAp *other );
1353 void intersectOp( FsmAp *other );
1354 void subtractOp( FsmAp *other );
1356 void joinOp( int startId, int finalId, FsmAp **others, int numOthers );
1357 void globOp( FsmAp **others, int numOthers );
1358 void deterministicEntry();
1364 /* Determine if there are any entry points into a start state other than
1365 * the start state. */
1366 bool isStartStateIsolated();
1368 /* Make a new start state that has no entry points. Will not change the
1369 * identity of the fsm. */
1370 void isolateStartState();
1372 /* Workers for resolving epsilon transitions. */
1373 bool inEptVect( EptVect *eptVect, StateAp *targ );
1374 void epsilonFillEptVectFrom( StateAp *root, StateAp *from, bool parentLeaving );
1375 void resolveEpsilonTrans( MergeData &md );
1377 /* Workers for concatenation and union. */
1378 void doConcat( FsmAp *other, StateSet *fromStates, bool optional );
1379 void doOr( FsmAp *other );
1385 /* Unset any final states that are no longer to be final
1386 * due to final bits. */
1387 void unsetIncompleteFinals();
1388 void unsetKilledFinals();
1390 /* Bring in other's entry points. Assumes others states are going to be
1391 * copied into this machine. */
1392 void copyInEntryPoints( FsmAp *other );
1394 /* Ordering states. */
1395 void depthFirstOrdering( StateAp *state );
1396 void depthFirstOrdering();
1397 void sortStatesByFinal();
1399 /* Set sqequential state numbers starting at 0. */
1400 void setStateNumbers( int base );
1402 /* Unset all final states. */
1403 void unsetAllFinStates();
1405 /* Set the bits of final states and clear the bits of non final states. */
1406 void setFinBits( int finStateBits );
1409 * Self-consistency checks.
1412 /* Run a sanity check on the machine. */
1413 void verifyIntegrity();
1415 /* Verify that there are no unreachable states, or dead end states. */
1416 void verifyReachability();
1417 void verifyNoDeadEndStates();
1423 /* Mark all states reachable from state. */
1424 void markReachableFromHereReverse( StateAp *state );
1426 /* Mark all states reachable from state. */
1427 void markReachableFromHere( StateAp *state );
1428 void markReachableFromHereStopFinal( StateAp *state );
1430 /* Removes states that cannot be reached by any path in the fsm and are
1431 * thus wasted silicon. */
1432 void removeDeadEndStates();
1434 /* Removes states that cannot be reached by any path in the fsm and are
1435 * thus wasted silicon. */
1436 void removeUnreachableStates();
1438 /* Remove error actions from states on which the error transition will
1439 * never be taken. */
1440 bool outListCovers( StateAp *state );
1441 bool anyErrorRange( StateAp *state );
1443 /* Remove states that are on the misfit list. */
1444 void removeMisfits();
1450 /* Minimization by partitioning. */
1451 void minimizePartition1();
1452 void minimizePartition2();
1454 /* Minimize the final state Machine. The result is the minimal fsm. Slow
1455 * but stable, correct minimization. Uses n^2 space (lookout) and average
1456 * n^2 time. Worst case n^3 time, but a that is a very rare case. */
1457 void minimizeStable();
1459 /* Minimize the final state machine. Does not find the minimal fsm, but a
1460 * pretty good approximation. Does not use any extra space. Average n^2
1461 * time. Worst case n^3 time, but a that is a very rare case. */
1462 void minimizeApproximate();
1464 /* This is the worker for the minimize approximate solution. It merges
1465 * states that have identical out transitions. */
1466 bool minimizeRound( );
1468 /* Given an intial partioning of states, split partitions that have out trans
1469 * to differing partitions. */
1470 int partitionRound( StateAp **statePtrs, MinPartition *parts, int numParts );
1472 /* Split partitions that have a transition to a previously split partition, until
1473 * there are no more partitions to split. */
1474 int splitCandidates( StateAp **statePtrs, MinPartition *parts, int numParts );
1476 /* Fuse together states in the same partition. */
1477 void fusePartitions( MinPartition *parts, int numParts );
1479 /* Mark pairs where out final stateness differs, out trans data differs,
1480 * trans pairs go to a marked pair or trans data differs. Should get
1482 void initialMarkRound( MarkIndex &markIndex );
1484 /* One marking round on all state pairs. Considers if trans pairs go
1485 * to a marked state only. Returns whether or not a pair was marked. */
1486 bool markRound( MarkIndex &markIndex );
1488 /* Move the in trans into src into dest. */
1489 void inTransMove(StateAp *dest, StateAp *src);
1491 /* Make state src and dest the same state. */
1492 void fuseEquivStates(StateAp *dest, StateAp *src);
1494 /* Find any states that didn't get marked by the marking algorithm and
1495 * merge them into the primary states of their equivalence class. */
1496 void fuseUnmarkedPairs( MarkIndex &markIndex );
1498 /* Merge neighboring transitions go to the same state and have the same
1499 * transitions data. */
1500 void compressTransitions();
1502 /* Returns true if there is a transtion (either explicit or by a gap) to
1503 * the error state. */
1504 bool checkErrTrans( StateAp *state, TransAp *trans );
1505 bool checkErrTransFinish( StateAp *state );
1506 bool hasErrorTrans();
1508 /* Check if a machine defines a single character. This is useful in
1509 * validating ranges and machines to export. */
1510 bool checkSingleCharMachine( );
1514 #endif /* _FSMGRAPH_H */