2 * Copyright 2001-2008 Adrian Thurston <thurston@complang.org>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 #include "parsedata.h"
31 #include "parsetree.h"
32 #include "mergesort.h"
33 #include "xmlcodegen.h"
36 #include "inputdata.h"
40 char mainMachine[] = "main";
42 void Token::set( const char *str, int len )
45 data = new char[len+1];
46 memcpy( data, str, len );
50 void Token::append( const Token &other )
52 int newLength = length + other.length;
53 char *newString = new char[newLength+1];
54 memcpy( newString, data, length );
55 memcpy( newString + length, other.data, other.length );
56 newString[newLength] = 0;
61 /* Perform minimization after an operation according
62 * to the command line args. */
63 void afterOpMinimize( FsmAp *fsm, bool lastInSeq )
65 /* Switch on the prefered minimization algorithm. */
66 if ( minimizeOpt == MinimizeEveryOp || ( minimizeOpt == MinimizeMostOps && lastInSeq ) ) {
67 /* First clean up the graph. FsmAp operations may leave these
68 * lying around. There should be no dead end states. The subtract
69 * intersection operators are the only places where they may be
70 * created and those operators clean them up. */
71 fsm->removeUnreachableStates();
73 switch ( minimizeLevel ) {
75 fsm->minimizeApproximate();
77 case MinimizePartition1:
78 fsm->minimizePartition1();
80 case MinimizePartition2:
81 fsm->minimizePartition2();
84 fsm->minimizeStable();
90 /* Count the transitions in the fsm by walking the state list. */
91 int countTransitions( FsmAp *fsm )
94 StateAp *state = fsm->stateList.head;
95 while ( state != 0 ) {
96 numTrans += state->outList.length();
102 Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd )
104 /* Reset errno so we can check for overflow or underflow. In the event of
105 * an error, sets the return val to the upper or lower bound being tested
108 unsigned int size = keyOps->alphType->size;
109 bool unusedBits = size < sizeof(unsigned long);
111 unsigned long ul = strtoul( str, 0, 16 );
113 if ( errno == ERANGE || ( unusedBits && ul >> (size * 8) ) ) {
114 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
115 ul = 1 << (size * 8);
118 if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) )
119 ul |= ( 0xffffffff >> (size*8) ) << (size*8);
121 return Key( (long)ul );
124 Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd )
126 /* Convert the number to a decimal. First reset errno so we can check
127 * for overflow or underflow. */
129 long long minVal = keyOps->alphType->minVal;
130 long long maxVal = keyOps->alphType->maxVal;
132 long long ll = strtoll( str, 0, 10 );
134 /* Check for underflow. */
135 if ( ( errno == ERANGE && ll < 0 ) || ll < minVal) {
136 error(loc) << "literal " << str << " underflows the alphabet type" << endl;
139 /* Check for overflow. */
140 else if ( ( errno == ERANGE && ll > 0 ) || ll > maxVal ) {
141 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
145 if ( keyOps->alphType->isSigned )
146 return Key( (long)ll );
148 return Key( (unsigned long)ll );
151 /* Make an fsm key in int format (what the fsm graph uses) from an alphabet
152 * number returned by the parser. Validates that the number doesn't overflow
153 * the alphabet type. */
154 Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd )
156 /* Switch on hex/decimal format. */
157 if ( str[0] == '0' && str[1] == 'x' )
158 return makeFsmKeyHex( str, loc, pd );
160 return makeFsmKeyDec( str, loc, pd );
163 /* Make an fsm int format (what the fsm graph uses) from a single character.
164 * Performs proper conversion depending on signed/unsigned property of the
166 Key makeFsmKeyChar( char c, ParseData *pd )
168 if ( keyOps->isSigned ) {
169 /* Copy from a char type. */
173 /* Copy from an unsigned byte type. */
174 return Key( (unsigned char)c );
178 /* Make an fsm key array in int format (what the fsm graph uses) from a string
179 * of characters. Performs proper conversion depending on signed/unsigned
180 * property of the alphabet. */
181 void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd )
183 if ( keyOps->isSigned ) {
184 /* Copy from a char star type. */
186 for ( int i = 0; i < len; i++ )
187 result[i] = Key(src[i]);
190 /* Copy from an unsigned byte ptr type. */
191 unsigned char *src = (unsigned char*) data;
192 for ( int i = 0; i < len; i++ )
193 result[i] = Key(src[i]);
197 /* Like makeFsmKeyArray except the result has only unique keys. They ordering
198 * will be changed. */
199 void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
200 bool caseInsensitive, ParseData *pd )
202 /* Use a transitions list for getting unique keys. */
203 if ( keyOps->isSigned ) {
204 /* Copy from a char star type. */
206 for ( int si = 0; si < len; si++ ) {
208 result.insert( key );
209 if ( caseInsensitive ) {
211 result.insert( key.toUpper() );
212 else if ( key.isUpper() )
213 result.insert( key.toLower() );
218 /* Copy from an unsigned byte ptr type. */
219 unsigned char *src = (unsigned char*) data;
220 for ( int si = 0; si < len; si++ ) {
222 result.insert( key );
223 if ( caseInsensitive ) {
225 result.insert( key.toUpper() );
226 else if ( key.isUpper() )
227 result.insert( key.toLower() );
233 FsmAp *dotFsm( ParseData *pd )
235 FsmAp *retFsm = new FsmAp();
236 retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey );
240 FsmAp *dotStarFsm( ParseData *pd )
242 FsmAp *retFsm = new FsmAp();
243 retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey );
247 /* Make a builtin type. Depends on the signed nature of the alphabet type. */
248 FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd )
250 /* FsmAp created to return. */
252 bool isSigned = keyOps->isSigned;
256 /* All characters. */
257 retFsm = dotFsm( pd );
261 /* Ascii characters 0 to 127. */
262 retFsm = new FsmAp();
263 retFsm->rangeFsm( 0, 127 );
267 /* Ascii extended characters. This is the full byte range. Dependent
268 * on signed, vs no signed. If the alphabet is one byte then just use
271 retFsm = new FsmAp();
272 retFsm->rangeFsm( -128, 127 );
275 retFsm = new FsmAp();
276 retFsm->rangeFsm( 0, 255 );
281 /* Alpha [A-Za-z]. */
282 FsmAp *upper = new FsmAp(), *lower = new FsmAp();
283 upper->rangeFsm( 'A', 'Z' );
284 lower->rangeFsm( 'a', 'z' );
285 upper->unionOp( lower );
286 upper->minimizePartition2();
292 retFsm = new FsmAp();
293 retFsm->rangeFsm( '0', '9' );
297 /* Alpha numerics [0-9A-Za-z]. */
298 FsmAp *digit = new FsmAp(), *lower = new FsmAp();
299 FsmAp *upper = new FsmAp();
300 digit->rangeFsm( '0', '9' );
301 upper->rangeFsm( 'A', 'Z' );
302 lower->rangeFsm( 'a', 'z' );
303 digit->unionOp( upper );
304 digit->unionOp( lower );
305 digit->minimizePartition2();
310 /* Lower case characters. */
311 retFsm = new FsmAp();
312 retFsm->rangeFsm( 'a', 'z' );
316 /* Upper case characters. */
317 retFsm = new FsmAp();
318 retFsm->rangeFsm( 'A', 'Z' );
322 /* Control characters. */
323 FsmAp *cntrl = new FsmAp();
324 FsmAp *highChar = new FsmAp();
325 cntrl->rangeFsm( 0, 31 );
326 highChar->concatFsm( 127 );
327 cntrl->unionOp( highChar );
328 cntrl->minimizePartition2();
333 /* Graphical ascii characters [!-~]. */
334 retFsm = new FsmAp();
335 retFsm->rangeFsm( '!', '~' );
339 /* Printable characters. Same as graph except includes space. */
340 retFsm = new FsmAp();
341 retFsm->rangeFsm( ' ', '~' );
346 FsmAp *range1 = new FsmAp();
347 FsmAp *range2 = new FsmAp();
348 FsmAp *range3 = new FsmAp();
349 FsmAp *range4 = new FsmAp();
350 range1->rangeFsm( '!', '/' );
351 range2->rangeFsm( ':', '@' );
352 range3->rangeFsm( '[', '`' );
353 range4->rangeFsm( '{', '~' );
354 range1->unionOp( range2 );
355 range1->unionOp( range3 );
356 range1->unionOp( range4 );
357 range1->minimizePartition2();
362 /* Whitespace: [\t\v\f\n\r ]. */
363 FsmAp *cntrl = new FsmAp();
364 FsmAp *space = new FsmAp();
365 cntrl->rangeFsm( '\t', '\r' );
366 space->concatFsm( ' ' );
367 cntrl->unionOp( space );
368 cntrl->minimizePartition2();
373 /* Hex digits [0-9A-Fa-f]. */
374 FsmAp *digit = new FsmAp();
375 FsmAp *upper = new FsmAp();
376 FsmAp *lower = new FsmAp();
377 digit->rangeFsm( '0', '9' );
378 upper->rangeFsm( 'A', 'F' );
379 lower->rangeFsm( 'a', 'f' );
380 digit->unionOp( upper );
381 digit->unionOp( lower );
382 digit->minimizePartition2();
387 retFsm = new FsmAp();
392 retFsm = new FsmAp();
400 /* Check if this name inst or any name inst below is referenced. */
401 bool NameInst::anyRefsRec()
406 /* Recurse on children until true. */
407 for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) {
408 if ( (*ch)->anyRefsRec() )
419 /* Initialize the structure that will collect info during the parse of a
421 ParseData::ParseData( const char *fileName, char *sectionName,
422 const InputLoc §ionLoc )
425 generatingSectionSubset(false),
427 /* 0 is reserved for global error actions. */
449 sectionName(sectionName),
450 sectionLoc(sectionLoc),
455 nextEpsilonResolvedLink(0),
456 nextLongestMatchId(1),
457 lmRequiresErrorState(false),
460 /* Initialize the dictionary of graphs. This is our symbol table. The
461 * initialization needs to be done on construction which happens at the
462 * beginning of a machine spec so any assignment operators can reference
467 /* Clean up the data collected during a parse. */
468 ParseData::~ParseData()
470 /* Delete all the nodes in the action list. Will cause all the
471 * string data that represents the actions to be deallocated. */
475 /* Make a name id in the current name instantiation scope if it is not
477 NameInst *ParseData::addNameInst( const InputLoc &loc, const char *data, bool isLabel )
479 /* Create the name instantitaion object and insert it. */
480 NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel );
481 curNameInst->childVect.append( newNameInst );
483 curNameInst->children.insertMulti( data, newNameInst );
487 void ParseData::initNameWalk()
489 curNameInst = rootName;
493 void ParseData::initExportsNameWalk()
495 curNameInst = exportsRootName;
499 /* Goes into the next child scope. The number of the child is already set up.
500 * We need this for the syncronous name tree and parse tree walk to work
501 * properly. It is reset on entry into a scope and advanced on poping of a
502 * scope. A call to enterNameScope should be accompanied by a corresponding
504 NameFrame ParseData::enterNameScope( bool isLocal, int numScopes )
506 /* Save off the current data. */
508 retFrame.prevNameInst = curNameInst;
509 retFrame.prevNameChild = curNameChild;
510 retFrame.prevLocalScope = localNameScope;
512 /* Enter into the new name scope. */
513 for ( int i = 0; i < numScopes; i++ ) {
514 curNameInst = curNameInst->childVect[curNameChild];
519 localNameScope = curNameInst;
524 /* Return from a child scope to a parent. The parent info must be specified as
525 * an argument and is obtained from the corresponding call to enterNameScope.
527 void ParseData::popNameScope( const NameFrame &frame )
529 /* Pop the name scope. */
530 curNameInst = frame.prevNameInst;
531 curNameChild = frame.prevNameChild+1;
532 localNameScope = frame.prevLocalScope;
535 void ParseData::resetNameScope( const NameFrame &frame )
537 /* Pop the name scope. */
538 curNameInst = frame.prevNameInst;
539 curNameChild = frame.prevNameChild;
540 localNameScope = frame.prevLocalScope;
544 void ParseData::unsetObsoleteEntries( FsmAp *graph )
546 /* Loop the reference names and increment the usage. Names that are no
547 * longer needed will be unset in graph. */
548 for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) {
550 NameInst *name = *ref;
553 /* If the name is no longer needed unset its corresponding entry. */
554 if ( name->numUses == name->numRefs ) {
555 assert( graph->entryPoints.find( name->id ) != 0 );
556 graph->unsetEntry( name->id );
557 assert( graph->entryPoints.find( name->id ) == 0 );
562 NameSet ParseData::resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly )
564 /* Queue needed for breadth-first search, load it with the start node. */
565 NameInstList nameQueue;
566 nameQueue.append( refFrom );
569 while ( nameQueue.length() > 0 ) {
570 /* Pull the next from location off the queue. */
571 NameInst *from = nameQueue.detachFirst();
573 /* Look for the name. */
574 NameMapEl *low, *high;
575 if ( from->children.findMulti( data, low, high ) ) {
576 /* Record all instances of the name. */
577 for ( ; low <= high; low++ )
578 result.insert( low->value );
581 /* Name not there, do breadth-first operation of appending all
582 * childrent to the processing queue. */
583 for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) {
584 if ( !recLabelsOnly || (*name)->isLabel )
585 nameQueue.append( *name );
589 /* Queue exhausted and name never found. */
593 void ParseData::resolveFrom( NameSet &result, NameInst *refFrom,
594 const NameRef &nameRef, int namePos )
596 /* Look for the name in the owning scope of the factor with aug. */
597 NameSet partResult = resolvePart( refFrom, nameRef[namePos], false );
599 /* If there are more parts to the name then continue on. */
600 if ( ++namePos < nameRef.length() ) {
601 /* There are more components to the name, search using all the part
602 * results as the base. */
603 for ( NameSet::Iter name = partResult; name.lte(); name++ )
604 resolveFrom( result, *name, nameRef, namePos );
607 /* This is the last component, append the part results to the final
609 result.insert( partResult );
613 /* Write out a name reference. */
614 ostream &operator<<( ostream &out, const NameRef &nameRef )
617 if ( nameRef[pos] == 0 ) {
621 out << nameRef[pos++];
622 for ( ; pos < nameRef.length(); pos++ )
623 out << "::" << nameRef[pos];
627 ostream &operator<<( ostream &out, const NameInst &nameInst )
629 /* Count the number fully qualified name parts. */
631 NameInst *curParent = nameInst.parent;
632 while ( curParent != 0 ) {
634 curParent = curParent->parent;
637 /* Make an array and fill it in. */
638 curParent = nameInst.parent;
639 NameInst **parents = new NameInst*[numParents];
640 for ( int p = numParents-1; p >= 0; p-- ) {
641 parents[p] = curParent;
642 curParent = curParent->parent;
645 /* Write the parents out, skip the root. */
646 for ( int p = 1; p < numParents; p++ )
647 out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" );
649 /* Write the name and cleanup. */
650 out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" );
655 struct CmpNameInstLoc
657 static int compare( const NameInst *ni1, const NameInst *ni2 )
659 if ( ni1->loc.line < ni2->loc.line )
661 else if ( ni1->loc.line > ni2->loc.line )
663 else if ( ni1->loc.col < ni2->loc.col )
665 else if ( ni1->loc.col > ni2->loc.col )
671 void errorStateLabels( const NameSet &resolved )
673 MergeSort<NameInst*, CmpNameInstLoc> mergeSort;
674 mergeSort.sort( resolved.data, resolved.length() );
675 for ( NameSet::Iter res = resolved; res.lte(); res++ )
676 error((*res)->loc) << " -> " << **res << endl;
680 NameInst *ParseData::resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action )
682 NameInst *nameInst = 0;
684 /* Do the local search if the name is not strictly a root level name
686 if ( nameRef[0] != 0 ) {
687 /* If the action is referenced, resolve all of them. */
688 if ( action != 0 && action->actionRefs.length() > 0 ) {
689 /* Look for the name in all referencing scopes. */
691 for ( ActionRefs::Iter actRef = action->actionRefs; actRef.lte(); actRef++ )
692 resolveFrom( resolved, *actRef, nameRef, 0 );
694 if ( resolved.length() > 0 ) {
695 /* Take the first one. */
696 nameInst = resolved[0];
697 if ( resolved.length() > 1 ) {
698 /* Complain about the multiple references. */
699 error(loc) << "state reference " << nameRef <<
700 " resolves to multiple entry points" << endl;
701 errorStateLabels( resolved );
707 /* If not found in the local scope, look in global. */
708 if ( nameInst == 0 ) {
710 int fromPos = nameRef[0] != 0 ? 0 : 1;
711 resolveFrom( resolved, rootName, nameRef, fromPos );
713 if ( resolved.length() > 0 ) {
714 /* Take the first. */
715 nameInst = resolved[0];
716 if ( resolved.length() > 1 ) {
717 /* Complain about the multiple references. */
718 error(loc) << "state reference " << nameRef <<
719 " resolves to multiple entry points" << endl;
720 errorStateLabels( resolved );
725 if ( nameInst == 0 ) {
726 /* If not found then complain. */
727 error(loc) << "could not resolve state reference " << nameRef << endl;
732 void ParseData::resolveNameRefs( InlineList *inlineList, Action *action )
734 for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
735 switch ( item->type ) {
736 case InlineItem::Entry: case InlineItem::Goto:
737 case InlineItem::Call: case InlineItem::Next: {
738 /* Resolve, pass action for local search. */
739 NameInst *target = resolveStateRef( *item->nameRef, item->loc, action );
741 /* Name lookup error reporting is handled by resolveStateRef. */
743 /* Check if the target goes into a longest match. */
744 NameInst *search = target->parent;
745 while ( search != 0 ) {
746 if ( search->isLongestMatch ) {
747 error(item->loc) << "cannot enter inside a longest "
748 "match construction as an entry point" << endl;
751 search = search->parent;
754 /* Record the reference in the name. This will cause the
755 * entry point to survive to the end of the graph
756 * generating walk. */
757 target->numRefs += 1;
760 item->nameTarg = target;
767 /* Some of the item types may have children. */
768 if ( item->children != 0 )
769 resolveNameRefs( item->children, action );
773 /* Resolve references to labels in actions. */
774 void ParseData::resolveActionNameRefs()
776 for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
777 /* Only care about the actions that are referenced. */
778 if ( act->actionRefs.length() > 0 )
779 resolveNameRefs( act->inlineList, act );
783 /* Walk a name tree starting at from and fill the name index. */
784 void ParseData::fillNameIndex( NameInst *from )
786 /* Fill the value for from in the name index. */
787 nameIndex[from->id] = from;
789 /* Recurse on the implicit final state and then all children. */
790 if ( from->final != 0 )
791 fillNameIndex( from->final );
792 for ( NameVect::Iter name = from->childVect; name.lte(); name++ )
793 fillNameIndex( *name );
796 void ParseData::makeRootNames()
798 /* Create the root name. */
799 rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
800 exportsRootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
803 /* Build the name tree and supporting data structures. */
804 void ParseData::makeNameTree( GraphDictEl *dictEl )
806 /* Set up curNameInst for the walk. */
810 /* A start location has been specified. */
811 dictEl->value->makeNameTree( dictEl->loc, this );
814 /* First make the name tree. */
815 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
816 /* Recurse on the instance. */
817 glel->value->makeNameTree( glel->loc, this );
821 /* The number of nodes in the tree can now be given by nextNameId */
822 nameIndex = new NameInst*[nextNameId];
823 memset( nameIndex, 0, sizeof(NameInst*)*nextNameId );
824 fillNameIndex( rootName );
825 fillNameIndex( exportsRootName );
829 void ParseData::createBuiltin( const char *name, BuiltinMachine builtin )
831 Expression *expression = new Expression( builtin );
832 Join *join = new Join( expression );
833 JoinOrLm *joinOrLm = new JoinOrLm( join );
834 VarDef *varDef = new VarDef( name, joinOrLm );
835 GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
836 graphDict.insert( graphDictEl );
839 /* Initialize the graph dict with builtin types. */
840 void ParseData::initGraphDict( )
842 createBuiltin( "any", BT_Any );
843 createBuiltin( "ascii", BT_Ascii );
844 createBuiltin( "extend", BT_Extend );
845 createBuiltin( "alpha", BT_Alpha );
846 createBuiltin( "digit", BT_Digit );
847 createBuiltin( "alnum", BT_Alnum );
848 createBuiltin( "lower", BT_Lower );
849 createBuiltin( "upper", BT_Upper );
850 createBuiltin( "cntrl", BT_Cntrl );
851 createBuiltin( "graph", BT_Graph );
852 createBuiltin( "print", BT_Print );
853 createBuiltin( "punct", BT_Punct );
854 createBuiltin( "space", BT_Space );
855 createBuiltin( "xdigit", BT_Xdigit );
856 createBuiltin( "null", BT_Lambda );
857 createBuiltin( "zlen", BT_Lambda );
858 createBuiltin( "empty", BT_Empty );
861 /* Set the alphabet type. If the types are not valid returns false. */
862 bool ParseData::setAlphType( const InputLoc &loc, char *s1, char *s2 )
865 userAlphType = findAlphType( s1, s2 );
867 return userAlphType != 0;
870 /* Set the alphabet type. If the types are not valid returns false. */
871 bool ParseData::setAlphType( const InputLoc &loc, char *s1 )
874 userAlphType = findAlphType( s1 );
876 return userAlphType != 0;
879 bool ParseData::setVariable( char *var, InlineList *inlineList )
883 if ( strcmp( var, "p" ) == 0 )
885 else if ( strcmp( var, "pe" ) == 0 )
887 else if ( strcmp( var, "eof" ) == 0 )
888 eofExpr = inlineList;
889 else if ( strcmp( var, "cs" ) == 0 )
891 else if ( strcmp( var, "data" ) == 0 )
892 dataExpr = inlineList;
893 else if ( strcmp( var, "top" ) == 0 )
894 topExpr = inlineList;
895 else if ( strcmp( var, "stack" ) == 0 )
896 stackExpr = inlineList;
897 else if ( strcmp( var, "act" ) == 0 )
898 actExpr = inlineList;
899 else if ( strcmp( var, "ts" ) == 0 )
900 tokstartExpr = inlineList;
901 else if ( strcmp( var, "te" ) == 0 )
902 tokendExpr = inlineList;
909 /* Initialize the key operators object that will be referenced by all fsms
911 void ParseData::initKeyOps( )
913 /* Signedness and bounds. */
914 HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType;
915 thisKeyOps.setAlphType( alphType );
917 if ( lowerNum != 0 ) {
918 /* If ranges are given then interpret the alphabet type. */
919 thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this );
920 thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
923 thisCondData.lastCondKey = thisKeyOps.maxKey;
926 void ParseData::printNameInst( NameInst *nameInst, int level )
928 for ( int i = 0; i < level; i++ )
930 cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") <<
931 " id: " << nameInst->id <<
932 " refs: " << nameInst->numRefs <<
933 " uses: " << nameInst->numUses << endl;
934 for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ )
935 printNameInst( *name, level+1 );
938 /* Remove duplicates of unique actions from an action table. */
939 void ParseData::removeDups( ActionTable &table )
941 /* Scan through the table looking for unique actions to
942 * remove duplicates of. */
943 for ( int i = 0; i < table.length(); i++ ) {
944 /* Remove any duplicates ahead of i. */
945 for ( int r = i+1; r < table.length(); ) {
946 if ( table[r].value == table[i].value )
954 /* Remove duplicates from action lists. This operates only on transition and
955 * eof action lists and so should be called once all actions have been
956 * transfered to their final resting place. */
957 void ParseData::removeActionDups( FsmAp *graph )
959 /* Loop all states. */
960 for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
961 /* Loop all transitions. */
962 for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
963 removeDups( trans->actionTable );
964 removeDups( state->toStateActionTable );
965 removeDups( state->fromStateActionTable );
966 removeDups( state->eofActionTable );
970 Action *ParseData::newAction( const char *name, InlineList *inlineList )
975 loc.fileName = "<NONE>";
977 Action *action = new Action( loc, name, inlineList, nextCondId++ );
978 action->actionRefs.append( rootName );
979 actionList.append( action );
983 void ParseData::initLongestMatchData()
985 if ( lmList.length() > 0 ) {
986 /* The initTokStart action resets the token start. */
987 InlineList *il1 = new InlineList;
988 il1->append( new InlineItem( InputLoc(), InlineItem::LmInitTokStart ) );
989 initTokStart = newAction( "initts", il1 );
990 initTokStart->isLmAction = true;
992 /* The initActId action gives act a default value. */
993 InlineList *il4 = new InlineList;
994 il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) );
995 initActId = newAction( "initact", il4 );
996 initActId->isLmAction = true;
998 /* The setTokStart action sets tokstart. */
999 InlineList *il5 = new InlineList;
1000 il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) );
1001 setTokStart = newAction( "ts", il5 );
1002 setTokStart->isLmAction = true;
1004 /* The setTokEnd action sets tokend. */
1005 InlineList *il3 = new InlineList;
1006 il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) );
1007 setTokEnd = newAction( "te", il3 );
1008 setTokEnd->isLmAction = true;
1010 /* The action will also need an ordering: ahead of all user action
1012 initTokStartOrd = curActionOrd++;
1013 initActIdOrd = curActionOrd++;
1014 setTokStartOrd = curActionOrd++;
1015 setTokEndOrd = curActionOrd++;
1019 /* After building the graph, do some extra processing to ensure the runtime
1020 * data of the longest mactch operators is consistent. */
1021 void ParseData::setLongestMatchData( FsmAp *graph )
1023 if ( lmList.length() > 0 ) {
1024 /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry)
1025 * init the tokstart. */
1026 for ( EntryMap::Iter en = graph->entryPoints; en.lte(); en++ ) {
1027 /* This is run after duplicates are removed, we must guard against
1028 * inserting a duplicate. */
1029 ActionTable &actionTable = en->value->toStateActionTable;
1030 if ( ! actionTable.hasAction( initTokStart ) )
1031 actionTable.setAction( initTokStartOrd, initTokStart );
1034 /* Find the set of states that are the target of transitions with
1035 * actions that have calls. These states will be targeted by fret
1038 for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
1039 for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
1040 for ( ActionTable::Iter ati = trans->actionTable; ati.lte(); ati++ ) {
1041 if ( ati->value->anyCall && trans->toState != 0 )
1042 states.insert( trans->toState );
1048 /* Init tokstart upon entering the above collected states. */
1049 for ( StateSet::Iter ps = states; ps.lte(); ps++ ) {
1050 /* This is run after duplicates are removed, we must guard against
1051 * inserting a duplicate. */
1052 ActionTable &actionTable = (*ps)->toStateActionTable;
1053 if ( ! actionTable.hasAction( initTokStart ) )
1054 actionTable.setAction( initTokStartOrd, initTokStart );
1059 /* Make the graph from a graph dict node. Does minimization and state sorting. */
1060 FsmAp *ParseData::makeInstance( GraphDictEl *gdNode )
1062 /* Build the graph from a walk of the parse tree. */
1063 FsmAp *graph = gdNode->value->walk( this );
1065 /* Resolve any labels that point to multiple states. Any labels that are
1066 * still around are referenced only by gotos and calls and they need to be
1067 * made into deterministic entry points. */
1068 graph->deterministicEntry();
1071 * All state construction is now complete.
1074 /* Transfer actions from the out action tables to eof action tables. */
1075 for ( StateSet::Iter state = graph->finStateSet; state.lte(); state++ )
1076 graph->transferOutActions( *state );
1078 /* Transfer global error actions. */
1079 for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
1080 graph->transferErrorActions( state, 0 );
1082 if ( ::wantDupsRemoved )
1083 removeActionDups( graph );
1085 /* Remove unreachable states. There should be no dead end states. The
1086 * subtract and intersection operators are the only places where they may
1087 * be created and those operators clean them up. */
1088 graph->removeUnreachableStates();
1090 /* No more fsm operations are to be done. Action ordering numbers are
1091 * no longer of use and will just hinder minimization. Clear them. */
1092 graph->nullActionKeys();
1094 /* Transition priorities are no longer of use. We can clear them
1095 * because they will just hinder minimization as well. Clear them. */
1096 graph->clearAllPriorities();
1098 if ( minimizeOpt != MinimizeNone ) {
1099 /* Minimize here even if we minimized at every op. Now that function
1100 * keys have been cleared we may get a more minimal fsm. */
1101 switch ( minimizeLevel ) {
1102 case MinimizeApprox:
1103 graph->minimizeApproximate();
1105 case MinimizeStable:
1106 graph->minimizeStable();
1108 case MinimizePartition1:
1109 graph->minimizePartition1();
1111 case MinimizePartition2:
1112 graph->minimizePartition2();
1117 graph->compressTransitions();
1122 void ParseData::printNameTree()
1124 /* Print the name instance map. */
1125 for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ )
1126 printNameInst( *name, 0 );
1128 cerr << "name index:" << endl;
1129 /* Show that the name index is correct. */
1130 for ( int ni = 0; ni < nextNameId; ni++ ) {
1132 const char *name = nameIndex[ni]->name;
1133 cerr << ( name != 0 ? name : "<ANON>" ) << endl;
1137 FsmAp *ParseData::makeSpecific( GraphDictEl *gdNode )
1139 /* Build the name tree and supporting data structures. */
1140 makeNameTree( gdNode );
1142 /* Resove name references from gdNode. */
1144 gdNode->value->resolveNameRefs( this );
1146 /* Do not resolve action references. Since we are not building the entire
1147 * graph there's a good chance that many name references will fail. This
1148 * is okay since generating part of the graph is usually only done when
1149 * inspecting the compiled machine. */
1151 /* Same story for extern entry point references. */
1153 /* Flag this case so that the XML code generator is aware that we haven't
1154 * looked up name references in actions. It can then avoid segfaulting. */
1155 generatingSectionSubset = true;
1157 /* Just building the specified graph. */
1159 FsmAp *mainGraph = makeInstance( gdNode );
1164 FsmAp *ParseData::makeAll()
1166 /* Build the name tree and supporting data structures. */
1169 /* Resove name references in the tree. */
1171 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ )
1172 glel->value->resolveNameRefs( this );
1174 /* Resolve action code name references. */
1175 resolveActionNameRefs();
1177 /* Force name references to the top level instantiations. */
1178 for ( NameVect::Iter inst = rootName->childVect; inst.lte(); inst++ )
1179 (*inst)->numRefs += 1;
1181 FsmAp *mainGraph = 0;
1182 FsmAp **graphs = new FsmAp*[instanceList.length()];
1185 /* Make all the instantiations, we know that main exists in this list. */
1187 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
1188 if ( strcmp( glel->key, mainMachine ) == 0 ) {
1189 /* Main graph is always instantiated. */
1190 mainGraph = makeInstance( glel );
1193 /* Instantiate and store in others array. */
1194 graphs[numOthers++] = makeInstance( glel );
1198 if ( mainGraph == 0 )
1199 mainGraph = graphs[--numOthers];
1201 if ( numOthers > 0 ) {
1202 /* Add all the other graphs into main. */
1203 mainGraph->globOp( graphs, numOthers );
1210 void ParseData::analyzeAction( Action *action, InlineList *inlineList )
1212 /* FIXME: Actions used as conditions should be very constrained. */
1213 for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1214 if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
1215 action->anyCall = true;
1217 /* Need to recurse into longest match items. */
1218 if ( item->type == InlineItem::LmSwitch ) {
1219 LongestMatch *lm = item->longestMatch;
1220 for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) {
1221 if ( lmi->action != 0 )
1222 analyzeAction( action, lmi->action->inlineList );
1226 if ( item->type == InlineItem::LmOnLast ||
1227 item->type == InlineItem::LmOnNext ||
1228 item->type == InlineItem::LmOnLagBehind )
1230 LongestMatchPart *lmi = item->longestMatchPart;
1231 if ( lmi->action != 0 )
1232 analyzeAction( action, lmi->action->inlineList );
1235 if ( item->children != 0 )
1236 analyzeAction( action, item->children );
1241 /* Check actions for bad uses of fsm directives. We don't go inside longest
1242 * match items in actions created by ragel, since we just want the user
1244 void ParseData::checkInlineList( Action *act, InlineList *inlineList )
1246 for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1248 if ( act->numEofRefs > 0 ) {
1249 switch ( item->type ) {
1250 /* Currently no checks. */
1257 if ( item->children != 0 )
1258 checkInlineList( act, item->children );
1262 void ParseData::checkAction( Action *action )
1264 /* Check for actions with calls that are embedded within a longest match
1266 if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) {
1267 for ( ActionRefs::Iter ar = action->actionRefs; ar.lte(); ar++ ) {
1268 NameInst *check = *ar;
1269 while ( check != 0 ) {
1270 if ( check->isLongestMatch ) {
1271 error(action->loc) << "within a scanner, fcall is permitted"
1272 " only in pattern actions" << endl;
1275 check = check->parent;
1280 checkInlineList( action, action->inlineList );
1284 void ParseData::analyzeGraph( FsmAp *graph )
1286 for ( ActionList::Iter act = actionList; act.lte(); act++ )
1287 analyzeAction( act, act->inlineList );
1289 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
1290 /* The transition list. */
1291 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
1292 for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ )
1293 at->value->numTransRefs += 1;
1296 for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ )
1297 at->value->numToStateRefs += 1;
1299 for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ )
1300 at->value->numFromStateRefs += 1;
1302 for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ )
1303 at->value->numEofRefs += 1;
1305 for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
1306 for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ )
1307 (*sci)->numCondRefs += 1;
1311 /* Checks for bad usage of directives in action code. */
1312 for ( ActionList::Iter act = actionList; act.lte(); act++ )
1316 void ParseData::makeExportsNameTree()
1318 /* Make a name tree for the exports. */
1319 initExportsNameWalk();
1321 /* First make the name tree. */
1322 for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1323 if ( gdel->value->isExport ) {
1324 /* Recurse on the instance. */
1325 gdel->value->makeNameTree( gdel->loc, this );
1330 void ParseData::makeExports()
1332 makeExportsNameTree();
1334 /* Resove name references in the tree. */
1335 initExportsNameWalk();
1336 for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1337 if ( gdel->value->isExport )
1338 gdel->value->resolveNameRefs( this );
1341 /* Make all the instantiations, we know that main exists in this list. */
1342 initExportsNameWalk();
1343 for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1344 /* Check if this var def is an export. */
1345 if ( gdel->value->isExport ) {
1346 /* Build the graph from a walk of the parse tree. */
1347 FsmAp *graph = gdel->value->walk( this );
1349 /* Build the graph from a walk of the parse tree. */
1350 if ( !graph->checkSingleCharMachine() ) {
1351 error(gdel->loc) << "bad export machine, must define "
1352 "a single character" << endl;
1355 /* Safe to extract the key and declare the export. */
1356 Key exportKey = graph->startState->outList.head->lowKey;
1357 exportList.append( new Export( gdel->value->name, exportKey ) );
1364 /* Construct the machine and catch failures which can occur during
1366 void ParseData::prepareMachineGen( GraphDictEl *graphDictEl )
1369 /* This machine construction can fail. */
1370 prepareMachineGenTBWrapped( graphDictEl );
1372 catch ( FsmConstructFail fail ) {
1373 switch ( fail.reason ) {
1374 case FsmConstructFail::CondNoKeySpace: {
1375 InputLoc &loc = alphTypeSet ? alphTypeLoc : sectionLoc;
1376 error(loc) << "sorry, no more characters are "
1377 "available in the alphabet space" << endl;
1378 error(loc) << " for conditions, please use a "
1379 "smaller alphtype or reduce" << endl;
1380 error(loc) << " the span of characters on which "
1381 "conditions are embedded" << endl;
1388 void ParseData::prepareMachineGenTBWrapped( GraphDictEl *graphDictEl )
1393 initLongestMatchData();
1395 /* Make the graph, do minimization. */
1396 if ( graphDictEl == 0 )
1397 sectionGraph = makeAll();
1399 sectionGraph = makeSpecific( graphDictEl );
1401 /* Compute exports from the export definitions. */
1404 /* If any errors have occured in the input file then don't write anything. */
1405 if ( gblErrorCount > 0 )
1408 analyzeGraph( sectionGraph );
1410 /* Depends on the graph analysis. */
1411 setLongestMatchData( sectionGraph );
1413 /* Decide if an error state is necessary.
1414 * 1. There is an error transition
1415 * 2. There is a gap in the transitions
1416 * 3. The longest match operator requires it. */
1417 if ( lmRequiresErrorState || sectionGraph->hasErrorTrans() )
1418 sectionGraph->errState = sectionGraph->addState();
1420 /* State numbers need to be assigned such that all final states have a
1421 * larger state id number than all non-final states. This enables the
1422 * first_final mechanism to function correctly. We also want states to be
1423 * ordered in a predictable fashion. So we first apply a depth-first
1424 * search, then do a stable sort by final state status, then assign
1427 sectionGraph->depthFirstOrdering();
1428 sectionGraph->sortStatesByFinal();
1429 sectionGraph->setStateNumbers( 0 );
1432 void ParseData::generateReduced( InputData &inputData )
1436 cgd = makeCodeGen( inputData.inputFileName, sectionName, *inputData.outStream );
1438 /* Make the generator. */
1439 BackendGen backendGen( sectionName, this, sectionGraph, cgd );
1441 /* Write out with it. */
1442 backendGen.makeBackend();
1444 if ( printStatistics ) {
1445 cerr << "fsm name : " << sectionName << endl;
1446 cerr << "num states: " << sectionGraph->stateList.length() << endl;
1451 void ParseData::generateXML( ostream &out )
1455 /* Make the generator. */
1456 XMLCodeGen codeGen( sectionName, this, sectionGraph, out );
1458 /* Write out with it. */
1461 if ( printStatistics ) {
1462 cerr << "fsm name : " << sectionName << endl;
1463 cerr << "num states: " << sectionGraph->stateList.length() << endl;