2 * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 #include "parsedata.h"
31 #include "parsetree.h"
32 #include "mergesort.h"
33 #include "xmlcodegen.h"
37 char machineMain[] = "main";
39 void Token::set( char *str, int len )
42 data = new char[len+1];
43 memcpy( data, str, len );
47 void Token::append( const Token &other )
49 int newLength = length + other.length;
50 char *newString = new char[newLength+1];
51 memcpy( newString, data, length );
52 memcpy( newString + length, other.data, other.length );
53 newString[newLength] = 0;
58 /* Perform minimization after an operation according
59 * to the command line args. */
60 void afterOpMinimize( FsmAp *fsm, bool lastInSeq )
62 /* Switch on the prefered minimization algorithm. */
63 if ( minimizeOpt == MinimizeEveryOp || minimizeOpt == MinimizeMostOps && lastInSeq ) {
64 /* First clean up the graph. FsmAp operations may leave these
65 * lying around. There should be no dead end states. The subtract
66 * intersection operators are the only places where they may be
67 * created and those operators clean them up. */
68 fsm->removeUnreachableStates();
70 switch ( minimizeLevel ) {
72 fsm->minimizeApproximate();
74 case MinimizePartition1:
75 fsm->minimizePartition1();
77 case MinimizePartition2:
78 fsm->minimizePartition2();
81 fsm->minimizeStable();
87 /* Count the transitions in the fsm by walking the state list. */
88 int countTransitions( FsmAp *fsm )
91 StateAp *state = fsm->stateList.head;
92 while ( state != 0 ) {
93 numTrans += state->outList.length();
99 Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd )
101 /* Reset errno so we can check for overflow or underflow. In the event of
102 * an error, sets the return val to the upper or lower bound being tested
105 unsigned int size = keyOps->alphType->size;
106 bool unusedBits = size < sizeof(unsigned long);
108 unsigned long ul = strtoul( str, 0, 16 );
110 if ( errno == ERANGE || unusedBits && ul >> (size * 8) ) {
111 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
112 ul = 1 << (size * 8);
115 if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) )
116 ul |= (0xffffffff >> (size*8 ) ) << (size*8);
118 return Key( (long)ul );
121 Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd )
123 /* Convert the number to a decimal. First reset errno so we can check
124 * for overflow or underflow. */
126 long long minVal = keyOps->alphType->minVal;
127 long long maxVal = keyOps->alphType->maxVal;
129 long long ll = strtoll( str, 0, 10 );
131 /* Check for underflow. */
132 if ( errno == ERANGE && ll < 0 || ll < minVal) {
133 error(loc) << "literal " << str << " underflows the alphabet type" << endl;
136 /* Check for overflow. */
137 else if ( errno == ERANGE && ll > 0 || ll > maxVal ) {
138 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
142 if ( keyOps->alphType->isSigned )
143 return Key( (long)ll );
145 return Key( (unsigned long)ll );
148 /* Make an fsm key in int format (what the fsm graph uses) from an alphabet
149 * number returned by the parser. Validates that the number doesn't overflow
150 * the alphabet type. */
151 Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd )
153 /* Switch on hex/decimal format. */
154 if ( str[0] == '0' && str[1] == 'x' )
155 return makeFsmKeyHex( str, loc, pd );
157 return makeFsmKeyDec( str, loc, pd );
160 /* Make an fsm int format (what the fsm graph uses) from a single character.
161 * Performs proper conversion depending on signed/unsigned property of the
163 Key makeFsmKeyChar( char c, ParseData *pd )
165 if ( keyOps->isSigned ) {
166 /* Copy from a char type. */
170 /* Copy from an unsigned byte type. */
171 return Key( (unsigned char)c );
175 /* Make an fsm key array in int format (what the fsm graph uses) from a string
176 * of characters. Performs proper conversion depending on signed/unsigned
177 * property of the alphabet. */
178 void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd )
180 if ( keyOps->isSigned ) {
181 /* Copy from a char star type. */
183 for ( int i = 0; i < len; i++ )
184 result[i] = Key(src[i]);
187 /* Copy from an unsigned byte ptr type. */
188 unsigned char *src = (unsigned char*) data;
189 for ( int i = 0; i < len; i++ )
190 result[i] = Key(src[i]);
194 /* Like makeFsmKeyArray except the result has only unique keys. They ordering
195 * will be changed. */
196 void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
197 bool caseInsensitive, ParseData *pd )
199 /* Use a transitions list for getting unique keys. */
200 if ( keyOps->isSigned ) {
201 /* Copy from a char star type. */
203 for ( int si = 0; si < len; si++ ) {
205 result.insert( key );
206 if ( caseInsensitive ) {
208 result.insert( key.toUpper() );
209 else if ( key.isUpper() )
210 result.insert( key.toLower() );
215 /* Copy from an unsigned byte ptr type. */
216 unsigned char *src = (unsigned char*) data;
217 for ( int si = 0; si < len; si++ ) {
219 result.insert( key );
220 if ( caseInsensitive ) {
222 result.insert( key.toUpper() );
223 else if ( key.isUpper() )
224 result.insert( key.toLower() );
230 FsmAp *dotFsm( ParseData *pd )
232 FsmAp *retFsm = new FsmAp();
233 retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey );
237 FsmAp *dotStarFsm( ParseData *pd )
239 FsmAp *retFsm = new FsmAp();
240 retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey );
244 /* Make a builtin type. Depends on the signed nature of the alphabet type. */
245 FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd )
247 /* FsmAp created to return. */
249 bool isSigned = keyOps->isSigned;
253 /* All characters. */
254 retFsm = dotFsm( pd );
258 /* Ascii characters 0 to 127. */
259 retFsm = new FsmAp();
260 retFsm->rangeFsm( 0, 127 );
264 /* Ascii extended characters. This is the full byte range. Dependent
265 * on signed, vs no signed. If the alphabet is one byte then just use
268 retFsm = new FsmAp();
269 retFsm->rangeFsm( -128, 127 );
272 retFsm = new FsmAp();
273 retFsm->rangeFsm( 0, 255 );
278 /* Alpha [A-Za-z]. */
279 FsmAp *upper = new FsmAp(), *lower = new FsmAp();
280 upper->rangeFsm( 'A', 'Z' );
281 lower->rangeFsm( 'a', 'z' );
282 upper->unionOp( lower );
283 upper->minimizePartition2();
289 retFsm = new FsmAp();
290 retFsm->rangeFsm( '0', '9' );
294 /* Alpha numerics [0-9A-Za-z]. */
295 FsmAp *digit = new FsmAp(), *lower = new FsmAp();
296 FsmAp *upper = new FsmAp();
297 digit->rangeFsm( '0', '9' );
298 upper->rangeFsm( 'A', 'Z' );
299 lower->rangeFsm( 'a', 'z' );
300 digit->unionOp( upper );
301 digit->unionOp( lower );
302 digit->minimizePartition2();
307 /* Lower case characters. */
308 retFsm = new FsmAp();
309 retFsm->rangeFsm( 'a', 'z' );
313 /* Upper case characters. */
314 retFsm = new FsmAp();
315 retFsm->rangeFsm( 'A', 'Z' );
319 /* Control characters. */
320 FsmAp *cntrl = new FsmAp();
321 FsmAp *highChar = new FsmAp();
322 cntrl->rangeFsm( 0, 31 );
323 highChar->concatFsm( 127 );
324 cntrl->unionOp( highChar );
325 cntrl->minimizePartition2();
330 /* Graphical ascii characters [!-~]. */
331 retFsm = new FsmAp();
332 retFsm->rangeFsm( '!', '~' );
336 /* Printable characters. Same as graph except includes space. */
337 retFsm = new FsmAp();
338 retFsm->rangeFsm( ' ', '~' );
343 FsmAp *range1 = new FsmAp();
344 FsmAp *range2 = new FsmAp();
345 FsmAp *range3 = new FsmAp();
346 FsmAp *range4 = new FsmAp();
347 range1->rangeFsm( '!', '/' );
348 range2->rangeFsm( ':', '@' );
349 range3->rangeFsm( '[', '`' );
350 range4->rangeFsm( '{', '~' );
351 range1->unionOp( range2 );
352 range1->unionOp( range3 );
353 range1->unionOp( range4 );
354 range1->minimizePartition2();
359 /* Whitespace: [\t\v\f\n\r ]. */
360 FsmAp *cntrl = new FsmAp();
361 FsmAp *space = new FsmAp();
362 cntrl->rangeFsm( '\t', '\r' );
363 space->concatFsm( ' ' );
364 cntrl->unionOp( space );
365 cntrl->minimizePartition2();
370 /* Hex digits [0-9A-Fa-f]. */
371 FsmAp *digit = new FsmAp();
372 FsmAp *upper = new FsmAp();
373 FsmAp *lower = new FsmAp();
374 digit->rangeFsm( '0', '9' );
375 upper->rangeFsm( 'A', 'F' );
376 lower->rangeFsm( 'a', 'f' );
377 digit->unionOp( upper );
378 digit->unionOp( lower );
379 digit->minimizePartition2();
384 retFsm = new FsmAp();
389 retFsm = new FsmAp();
397 /* Check if this name inst or any name inst below is referenced. */
398 bool NameInst::anyRefsRec()
403 /* Recurse on children until true. */
404 for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) {
405 if ( (*ch)->anyRefsRec() )
416 /* Initialize the structure that will collect info during the parse of a
418 ParseData::ParseData( char *fileName, char *sectionName,
419 const InputLoc §ionLoc )
422 generatingSectionSubset(false),
424 /* 0 is reserved for global error actions. */
435 sectionName(sectionName),
436 sectionLoc(sectionLoc),
441 nextEpsilonResolvedLink(0),
442 nextLongestMatchId(1),
443 lmRequiresErrorState(false)
445 /* Initialize the dictionary of graphs. This is our symbol table. The
446 * initialization needs to be done on construction which happens at the
447 * beginning of a machine spec so any assignment operators can reference
452 /* Clean up the data collected during a parse. */
453 ParseData::~ParseData()
455 /* Delete all the nodes in the action list. Will cause all the
456 * string data that represents the actions to be deallocated. */
460 /* Make a name id in the current name instantiation scope if it is not
462 NameInst *ParseData::addNameInst( const InputLoc &loc, char *data, bool isLabel )
464 /* Create the name instantitaion object and insert it. */
465 NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel );
466 curNameInst->childVect.append( newNameInst );
468 curNameInst->children.insertMulti( data, newNameInst );
472 void ParseData::initNameWalk()
474 curNameInst = rootName;
478 /* Goes into the next child scope. The number of the child is already set up.
479 * We need this for the syncronous name tree and parse tree walk to work
480 * properly. It is reset on entry into a scope and advanced on poping of a
481 * scope. A call to enterNameScope should be accompanied by a corresponding
483 NameFrame ParseData::enterNameScope( bool isLocal, int numScopes )
485 /* Save off the current data. */
487 retFrame.prevNameInst = curNameInst;
488 retFrame.prevNameChild = curNameChild;
489 retFrame.prevLocalScope = localNameScope;
491 /* Enter into the new name scope. */
492 for ( int i = 0; i < numScopes; i++ ) {
493 curNameInst = curNameInst->childVect[curNameChild];
498 localNameScope = curNameInst;
503 /* Return from a child scope to a parent. The parent info must be specified as
504 * an argument and is obtained from the corresponding call to enterNameScope.
506 void ParseData::popNameScope( const NameFrame &frame )
508 /* Pop the name scope. */
509 curNameInst = frame.prevNameInst;
510 curNameChild = frame.prevNameChild+1;
511 localNameScope = frame.prevLocalScope;
514 void ParseData::resetNameScope( const NameFrame &frame )
516 /* Pop the name scope. */
517 curNameInst = frame.prevNameInst;
518 curNameChild = frame.prevNameChild;
519 localNameScope = frame.prevLocalScope;
523 void ParseData::unsetObsoleteEntries( FsmAp *graph )
525 /* Loop the reference names and increment the usage. Names that are no
526 * longer needed will be unset in graph. */
527 for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) {
529 NameInst *name = *ref;
532 /* If the name is no longer needed unset its corresponding entry. */
533 if ( name->numUses == name->numRefs ) {
534 assert( graph->entryPoints.find( name->id ) != 0 );
535 graph->unsetEntry( name->id );
540 NameSet ParseData::resolvePart( NameInst *refFrom, char *data, bool recLabelsOnly )
542 /* Queue needed for breadth-first search, load it with the start node. */
543 NameInstList nameQueue;
544 nameQueue.append( refFrom );
547 while ( nameQueue.length() > 0 ) {
548 /* Pull the next from location off the queue. */
549 NameInst *from = nameQueue.detachFirst();
551 /* Look for the name. */
552 NameMapEl *low, *high;
553 if ( from->children.findMulti( data, low, high ) ) {
554 /* Record all instances of the name. */
555 for ( ; low <= high; low++ )
556 result.insert( low->value );
559 /* Name not there, do breadth-first operation of appending all
560 * childrent to the processing queue. */
561 for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) {
562 if ( !recLabelsOnly || (*name)->isLabel )
563 nameQueue.append( *name );
567 /* Queue exhausted and name never found. */
571 void ParseData::resolveFrom( NameSet &result, NameInst *refFrom,
572 const NameRef &nameRef, int namePos )
574 /* Look for the name in the owning scope of the factor with aug. */
575 NameSet partResult = resolvePart( refFrom, nameRef[namePos], false );
577 /* If there are more parts to the name then continue on. */
578 if ( ++namePos < nameRef.length() ) {
579 /* There are more components to the name, search using all the part
580 * results as the base. */
581 for ( NameSet::Iter name = partResult; name.lte(); name++ )
582 resolveFrom( result, *name, nameRef, namePos );
585 /* This is the last component, append the part results to the final
587 result.insert( partResult );
591 /* Write out a name reference. */
592 ostream &operator<<( ostream &out, const NameRef &nameRef )
595 if ( nameRef[pos] == 0 ) {
599 out << nameRef[pos++];
600 for ( ; pos < nameRef.length(); pos++ )
601 out << "::" << nameRef[pos];
605 ostream &operator<<( ostream &out, const NameInst &nameInst )
607 /* Count the number fully qualified name parts. */
609 NameInst *curParent = nameInst.parent;
610 while ( curParent != 0 ) {
612 curParent = curParent->parent;
615 /* Make an array and fill it in. */
616 curParent = nameInst.parent;
617 NameInst **parents = new NameInst*[numParents];
618 for ( int p = numParents-1; p >= 0; p-- ) {
619 parents[p] = curParent;
620 curParent = curParent->parent;
623 /* Write the parents out, skip the root. */
624 for ( int p = 1; p < numParents; p++ )
625 out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" );
627 /* Write the name and cleanup. */
628 out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" );
633 struct CmpNameInstLoc
635 static int compare( const NameInst *ni1, const NameInst *ni2 )
637 if ( ni1->loc.line < ni2->loc.line )
639 else if ( ni1->loc.line > ni2->loc.line )
641 else if ( ni1->loc.col < ni2->loc.col )
643 else if ( ni1->loc.col > ni2->loc.col )
649 void errorStateLabels( const NameSet &resolved )
651 MergeSort<NameInst*, CmpNameInstLoc> mergeSort;
652 mergeSort.sort( resolved.data, resolved.length() );
653 for ( NameSet::Iter res = resolved; res.lte(); res++ )
654 error((*res)->loc) << " -> " << **res << endl;
658 NameInst *ParseData::resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action )
660 NameInst *nameInst = 0;
662 /* Do the local search if the name is not strictly a root level name
664 if ( nameRef[0] != 0 ) {
665 /* If the action is referenced, resolve all of them. */
666 if ( action != 0 && action->actionRefs.length() > 0 ) {
667 /* Look for the name in all referencing scopes. */
669 for ( ActionRefs::Iter actRef = action->actionRefs; actRef.lte(); actRef++ )
670 resolveFrom( resolved, *actRef, nameRef, 0 );
672 if ( resolved.length() > 0 ) {
673 /* Take the first one. */
674 nameInst = resolved[0];
675 if ( resolved.length() > 1 ) {
676 /* Complain about the multiple references. */
677 error(loc) << "state reference " << nameRef <<
678 " resolves to multiple entry points" << endl;
679 errorStateLabels( resolved );
685 /* If not found in the local scope, look in global. */
686 if ( nameInst == 0 ) {
688 int fromPos = nameRef[0] != 0 ? 0 : 1;
689 resolveFrom( resolved, rootName, nameRef, fromPos );
691 if ( resolved.length() > 0 ) {
692 /* Take the first. */
693 nameInst = resolved[0];
694 if ( resolved.length() > 1 ) {
695 /* Complain about the multiple references. */
696 error(loc) << "state reference " << nameRef <<
697 " resolves to multiple entry points" << endl;
698 errorStateLabels( resolved );
703 if ( nameInst == 0 ) {
704 /* If not found then complain. */
705 error(loc) << "could not resolve state reference " << nameRef << endl;
710 void ParseData::resolveNameRefs( InlineList *inlineList, Action *action )
712 for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
713 switch ( item->type ) {
714 case InlineItem::Entry: case InlineItem::Goto:
715 case InlineItem::Call: case InlineItem::Next: {
716 /* Resolve, pass action for local search. */
717 NameInst *target = resolveStateRef( *item->nameRef, item->loc, action );
719 /* Check if the target goes into a longest match. */
720 NameInst *search = target->parent;
721 while ( search != 0 ) {
722 if ( search->isLongestMatch ) {
723 error(item->loc) << "cannot enter inside a longest "
724 "match construction as an entry point" << endl;
727 search = search->parent;
730 /* Note the reference in the name. This will cause the entry
731 * point to survive to the end of the graph generating walk. */
733 target->numRefs += 1;
734 item->nameTarg = target;
741 /* Some of the item types may have children. */
742 if ( item->children != 0 )
743 resolveNameRefs( item->children, action );
747 /* Resolve references to labels in actions. */
748 void ParseData::resolveActionNameRefs()
750 for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
751 /* Only care about the actions that are referenced. */
752 if ( act->actionRefs.length() > 0 )
753 resolveNameRefs( act->inlineList, act );
757 /* Walk a name tree starting at from and fill the name index. */
758 void ParseData::fillNameIndex( NameInst *from )
760 /* Fill the value for from in the name index. */
761 nameIndex[from->id] = from;
763 /* Recurse on the implicit final state and then all children. */
764 if ( from->final != 0 )
765 fillNameIndex( from->final );
766 for ( NameVect::Iter name = from->childVect; name.lte(); name++ )
767 fillNameIndex( *name );
770 void ParseData::makeRootName()
772 /* Create the root name. */
773 rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
776 /* Build the name tree and supporting data structures. */
777 void ParseData::makeNameTree( GraphDictEl *dictEl )
779 /* Set up curNameInst for the walk. */
780 curNameInst = rootName;
784 /* A start location has been specified. */
785 dictEl->value->makeNameTree( dictEl->loc, this );
788 /* First make the name tree. */
789 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
790 /* Recurse on the instance. */
791 glel->value->makeNameTree( glel->loc, this );
795 /* The number of nodes in the tree can now be given by nextNameId */
796 nameIndex = new NameInst*[nextNameId];
797 memset( nameIndex, 0, sizeof(NameInst*)*nextNameId );
798 fillNameIndex( rootName );
801 void ParseData::createBuiltin( char *name, BuiltinMachine builtin )
803 Expression *expression = new Expression( builtin );
804 Join *join = new Join( expression );
805 JoinOrLm *joinOrLm = new JoinOrLm( join );
806 VarDef *varDef = new VarDef( name, joinOrLm );
807 GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
808 graphDict.insert( graphDictEl );
811 /* Initialize the graph dict with builtin types. */
812 void ParseData::initGraphDict( )
814 createBuiltin( "any", BT_Any );
815 createBuiltin( "ascii", BT_Ascii );
816 createBuiltin( "extend", BT_Extend );
817 createBuiltin( "alpha", BT_Alpha );
818 createBuiltin( "digit", BT_Digit );
819 createBuiltin( "alnum", BT_Alnum );
820 createBuiltin( "lower", BT_Lower );
821 createBuiltin( "upper", BT_Upper );
822 createBuiltin( "cntrl", BT_Cntrl );
823 createBuiltin( "graph", BT_Graph );
824 createBuiltin( "print", BT_Print );
825 createBuiltin( "punct", BT_Punct );
826 createBuiltin( "space", BT_Space );
827 createBuiltin( "xdigit", BT_Xdigit );
828 createBuiltin( "null", BT_Lambda );
829 createBuiltin( "zlen", BT_Lambda );
830 createBuiltin( "empty", BT_Empty );
833 /* Set the alphabet type. If the types are not valid returns false. */
834 bool ParseData::setAlphType( char *s1, char *s2 )
837 for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
838 if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 &&
839 hostLang->hostTypes[i].data2 != 0 &&
840 strcmp( s2, hostLang->hostTypes[i].data2 ) == 0 )
843 userAlphType = hostLang->hostTypes + i;
852 /* Set the alphabet type. If the types are not valid returns false. */
853 bool ParseData::setAlphType( char *s1 )
856 for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
857 if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 &&
858 hostLang->hostTypes[i].data2 == 0 )
861 userAlphType = hostLang->hostTypes + i;
870 /* Initialize the key operators object that will be referenced by all fsms
872 void ParseData::initKeyOps( )
874 /* Signedness and bounds. */
875 HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType;
876 thisKeyOps.setAlphType( alphType );
878 if ( lowerNum != 0 ) {
879 /* If ranges are given then interpret the alphabet type. */
880 thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this );
881 thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
884 thisCondData.nextCondKey = thisKeyOps.maxKey;
885 thisCondData.nextCondKey.increment();
888 void ParseData::printNameInst( NameInst *nameInst, int level )
890 for ( int i = 0; i < level; i++ )
892 cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") <<
893 " id: " << nameInst->id <<
894 " refs: " << nameInst->numRefs << endl;
895 for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ )
896 printNameInst( *name, level+1 );
899 /* Remove duplicates of unique actions from an action table. */
900 void ParseData::removeDups( ActionTable &table )
902 /* Scan through the table looking for unique actions to
903 * remove duplicates of. */
904 for ( int i = 0; i < table.length(); i++ ) {
905 /* Remove any duplicates ahead of i. */
906 for ( int r = i+1; r < table.length(); ) {
907 if ( table[r].value == table[i].value )
915 /* Remove duplicates from action lists. This operates only on transition and
916 * eof action lists and so should be called once all actions have been
917 * transfered to their final resting place. */
918 void ParseData::removeActionDups( FsmAp *graph )
920 /* Loop all states. */
921 for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
922 /* Loop all transitions. */
923 for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
924 removeDups( trans->actionTable );
925 removeDups( state->toStateActionTable );
926 removeDups( state->fromStateActionTable );
927 removeDups( state->eofActionTable );
931 Action *ParseData::newAction( char *name, InlineList *inlineList )
937 Action *action = new Action( loc, name, inlineList, nextCondId++ );
938 action->actionRefs.append( rootName );
939 actionList.append( action );
943 void ParseData::initLongestMatchData()
945 if ( lmList.length() > 0 ) {
946 /* The initTokStart action resets the token start. */
947 InlineList *il1 = new InlineList;
948 il1->append( new InlineItem( InputLoc(), InlineItem::LmInitTokStart ) );
949 initTokStart = newAction( "initts", il1 );
950 initTokStart->isLmAction = true;
952 /* The initActId action gives act a default value. */
953 InlineList *il4 = new InlineList;
954 il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) );
955 initActId = newAction( "initact", il4 );
956 initActId->isLmAction = true;
958 /* The setTokStart action sets tokstart. */
959 InlineList *il5 = new InlineList;
960 il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) );
961 setTokStart = newAction( "tokstart", il5 );
962 setTokStart->isLmAction = true;
964 /* The setTokEnd action sets tokend. */
965 InlineList *il3 = new InlineList;
966 il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) );
967 setTokEnd = newAction( "tokend", il3 );
968 setTokEnd->isLmAction = true;
970 /* The action will also need an ordering: ahead of all user action
972 initTokStartOrd = curActionOrd++;
973 initActIdOrd = curActionOrd++;
974 setTokStartOrd = curActionOrd++;
975 setTokEndOrd = curActionOrd++;
979 /* After building the graph, do some extra processing to ensure the runtime
980 * data of the longest mactch operators is consistent. */
981 void ParseData::setLongestMatchData( FsmAp *graph )
983 if ( lmList.length() > 0 ) {
984 /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry)
985 * init the tokstart. */
986 for ( EntryMap::Iter en = graph->entryPoints; en.lte(); en++ ) {
987 /* This is run after duplicates are removed, we must guard against
988 * inserting a duplicate. */
989 ActionTable &actionTable = en->value->toStateActionTable;
990 if ( ! actionTable.hasAction( initTokStart ) )
991 actionTable.setAction( initTokStartOrd, initTokStart );
994 /* Find the set of states that are the target of transitions with
995 * actions that have calls. These states will be targeted by fret
998 for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
999 for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
1000 for ( ActionTable::Iter ati = trans->actionTable; ati.lte(); ati++ ) {
1001 if ( ati->value->anyCall && trans->toState != 0 )
1002 states.insert( trans->toState );
1008 /* Init tokstart upon entering the above collected states. */
1009 for ( StateSet::Iter ps = states; ps.lte(); ps++ ) {
1010 /* This is run after duplicates are removed, we must guard against
1011 * inserting a duplicate. */
1012 ActionTable &actionTable = (*ps)->toStateActionTable;
1013 if ( ! actionTable.hasAction( initTokStart ) )
1014 actionTable.setAction( initTokStartOrd, initTokStart );
1019 /* Make the graph from a graph dict node. Does minimization and state sorting. */
1020 FsmAp *ParseData::makeInstance( GraphDictEl *gdNode )
1022 /* Build the graph from a walk of the parse tree. */
1023 FsmAp *graph = gdNode->value->walk( this );
1025 /* Resolve any labels that point to multiple states. Any labels that are
1026 * still around are referenced only by gotos and calls and they need to be
1027 * made into deterministic entry points. */
1028 graph->deterministicEntry();
1031 * All state construction is now complete.
1034 /* Transfer global error actions. */
1035 for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
1036 graph->transferErrorActions( state, 0 );
1038 removeActionDups( graph );
1040 /* Remove unreachable states. There should be no dead end states. The
1041 * subtract and intersection operators are the only places where they may
1042 * be created and those operators clean them up. */
1043 graph->removeUnreachableStates();
1045 /* No more fsm operations are to be done. Action ordering numbers are
1046 * no longer of use and will just hinder minimization. Clear them. */
1047 graph->nullActionKeys();
1049 /* Transition priorities are no longer of use. We can clear them
1050 * because they will just hinder minimization as well. Clear them. */
1051 graph->clearAllPriorities();
1053 if ( minimizeOpt != MinimizeNone ) {
1054 /* Minimize here even if we minimized at every op. Now that function
1055 * keys have been cleared we may get a more minimal fsm. */
1056 switch ( minimizeLevel ) {
1057 case MinimizeApprox:
1058 graph->minimizeApproximate();
1060 case MinimizeStable:
1061 graph->minimizeStable();
1063 case MinimizePartition1:
1064 graph->minimizePartition1();
1066 case MinimizePartition2:
1067 graph->minimizePartition2();
1072 graph->compressTransitions();
1077 void ParseData::printNameTree()
1079 /* Print the name instance map. */
1080 for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ )
1081 printNameInst( *name, 0 );
1083 cerr << "name index:" << endl;
1084 /* Show that the name index is correct. */
1085 for ( int ni = 0; ni < nextNameId; ni++ ) {
1087 char *name = nameIndex[ni]->name;
1088 cerr << ( name != 0 ? name : "<ANON>" ) << endl;
1092 FsmAp *ParseData::makeSpecific( GraphDictEl *gdNode )
1094 /* Build the name tree and supporting data structures. */
1095 makeNameTree( gdNode );
1097 /* Resove name references from gdNode. */
1099 gdNode->value->resolveNameRefs( this );
1101 /* Do not resolve action references. Since we are not building the entire
1102 * graph there's a good chance that many name references will fail. This
1103 * is okay since generating part of the graph is usually only done when
1104 * inspecting the compiled machine. */
1106 /* Flag this case so that the XML code generator is aware that we haven't
1107 * looked up name references in actions. It can then avoid segfaulting. */
1108 generatingSectionSubset = true;
1110 /* Just building the specified graph. */
1112 FsmAp *mainGraph = makeInstance( gdNode );
1117 FsmAp *ParseData::makeAll()
1119 /* Build the name tree and supporting data structures. */
1122 /* Resove name references in the tree. */
1124 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ )
1125 glel->value->resolveNameRefs( this );
1127 /* Resolve action code name references. */
1128 resolveActionNameRefs();
1130 FsmAp *mainGraph = 0;
1131 FsmAp **graphs = new FsmAp*[instanceList.length()];
1134 /* Make all the instantiations, we know that main exists in this list. */
1136 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
1137 if ( strcmp( glel->key, machineMain ) == 0 ) {
1138 /* Main graph is always instantiated. */
1139 mainGraph = makeInstance( glel );
1142 /* Check to see if the instance is ever referenced. */
1143 NameInst *nameInst = nextNameScope();
1144 if ( nameInst->anyRefsRec() )
1145 graphs[numOthers++] = makeInstance( glel );
1147 /* Need to walk over the name tree item. */
1148 NameFrame nameFrame = enterNameScope( true, 1 );
1149 popNameScope( nameFrame );
1154 if ( numOthers > 0 ) {
1155 /* Add all the other graphs into main. */
1156 mainGraph->globOp( graphs, numOthers );
1163 void ParseData::analyzeAction( Action *action, InlineList *inlineList )
1165 /* FIXME: Actions used as conditions should be very constrained. */
1166 for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1167 if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
1168 action->anyCall = true;
1170 /* Need to recurse into longest match items. */
1171 if ( item->type == InlineItem::LmSwitch ) {
1172 LongestMatch *lm = item->longestMatch;
1173 for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) {
1174 if ( lmi->action != 0 )
1175 analyzeAction( action, lmi->action->inlineList );
1179 if ( item->type == InlineItem::LmOnLast ||
1180 item->type == InlineItem::LmOnNext ||
1181 item->type == InlineItem::LmOnLagBehind )
1183 LongestMatchPart *lmi = item->longestMatchPart;
1184 if ( lmi->action != 0 )
1185 analyzeAction( action, lmi->action->inlineList );
1188 if ( item->children != 0 )
1189 analyzeAction( action, item->children );
1194 /* Check actions for bad uses of fsm directives. We don't go inside longest
1195 * match items in actions created by ragel, since we just want the user
1197 void ParseData::checkInlineList( Action *act, InlineList *inlineList )
1199 for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1201 if ( act->numEofRefs > 0 ) {
1202 switch ( item->type ) {
1203 case InlineItem::PChar:
1204 error(item->loc) << "pointer to current element does not exist in "
1205 "EOF action code" << endl;
1207 case InlineItem::Char:
1208 error(item->loc) << "current element does not exist in "
1209 "EOF action code" << endl;
1211 case InlineItem::Hold:
1212 error(item->loc) << "changing the current element not possible in "
1213 "EOF action code" << endl;
1215 case InlineItem::Exec:
1216 error(item->loc) << "changing the current element not possible in "
1217 "EOF action code" << endl;
1219 case InlineItem::Goto: case InlineItem::Call:
1220 case InlineItem::Next: case InlineItem::GotoExpr:
1221 case InlineItem::CallExpr: case InlineItem::NextExpr:
1222 case InlineItem::Ret:
1223 error(item->loc) << "changing the current state not possible in "
1224 "EOF action code" << endl;
1232 if ( item->children != 0 )
1233 checkInlineList( act, item->children );
1237 void ParseData::checkAction( Action *action )
1239 /* Check for actions with calls that are embedded within a longest match
1241 if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) {
1242 for ( ActionRefs::Iter ar = action->actionRefs; ar.lte(); ar++ ) {
1243 NameInst *check = *ar;
1244 while ( check != 0 ) {
1245 if ( check->isLongestMatch ) {
1246 error(action->loc) << "within a scanner, fcall is permitted"
1247 " only in pattern actions" << endl;
1250 check = check->parent;
1255 checkInlineList( action, action->inlineList );
1259 void ParseData::analyzeGraph( FsmAp *graph )
1261 for ( ActionList::Iter act = actionList; act.lte(); act++ )
1262 analyzeAction( act, act->inlineList );
1264 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
1265 /* The transition list. */
1266 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
1267 for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ )
1268 at->value->numTransRefs += 1;
1271 for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ )
1272 at->value->numToStateRefs += 1;
1274 for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ )
1275 at->value->numFromStateRefs += 1;
1277 for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ )
1278 at->value->numEofRefs += 1;
1280 for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
1281 for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ )
1282 (*sci)->numCondRefs += 1;
1286 /* Checks for bad usage of directives in action code. */
1287 for ( ActionList::Iter act = actionList; act.lte(); act++ )
1291 void ParseData::prepareMachineGen( GraphDictEl *graphDictEl )
1296 initLongestMatchData();
1298 /* Make the graph, do minimization. */
1299 if ( graphDictEl == 0 )
1300 sectionGraph = makeAll();
1302 sectionGraph = makeSpecific( graphDictEl );
1304 /* If any errors have occured in the input file then don't write anything. */
1305 if ( gblErrorCount > 0 )
1308 analyzeGraph( sectionGraph );
1310 /* Depends on the graph analysis. */
1311 setLongestMatchData( sectionGraph );
1313 /* Decide if an error state is necessary.
1314 * 1. There is an error transition
1315 * 2. There is a gap in the transitions
1316 * 3. The longest match operator requires it. */
1317 if ( lmRequiresErrorState || sectionGraph->hasErrorTrans() )
1318 sectionGraph->errState = sectionGraph->addState();
1320 /* State numbers need to be assigned such that all final states have a
1321 * larger state id number than all non-final states. This enables the
1322 * first_final mechanism to function correctly. We also want states to be
1323 * ordered in a predictable fashion. So we first apply a depth-first
1324 * search, then do a stable sort by final state status, then assign
1327 sectionGraph->depthFirstOrdering();
1328 sectionGraph->sortStatesByFinal();
1329 sectionGraph->setStateNumbers( 0 );
1332 void ParseData::generateXML( ostream &out )
1336 /* Make the generator. */
1337 XMLCodeGen codeGen( sectionName, this, sectionGraph, out );
1339 /* Write out with it. */
1342 if ( printStatistics ) {
1343 cerr << "fsm name : " << sectionName << endl;
1344 cerr << "num states: " << sectionGraph->stateList.length() << endl;
1349 /* Send eof to all parsers. */
1350 void terminateAllParsers( )
1352 /* FIXME: a proper token is needed here. Suppose we should use the
1353 * location of EOF in the last file that the parser was referenced in. */
1355 loc.fileName = "<EOF>";
1358 for ( ParserDict::Iter pdel = parserDict; pdel.lte(); pdel++ )
1359 pdel->value->token( loc, _eof, 0, 0 );
1362 void checkMachines( )
1364 for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1365 ParseData *pd = parser->value->pd;
1366 if ( pd->instanceList.length() > 0 ) {
1367 /* There must be a main graph defined. */
1368 /* No machine name. Need to have a main. Make sure it was given. */
1369 GraphDictEl *mainEl = pd->graphDict.find( machineMain );
1370 if ( mainEl == 0 ) {
1371 error(pd->sectionLoc) << "main graph not defined in \"" <<
1372 pd->sectionName << "\"" << endl;
1378 void writeLanguage( std::ostream &out )
1381 switch ( hostLangType ) {
1382 case CCode: out << "C"; break;
1383 case DCode: out << "D"; break;
1384 case JavaCode: out << "Java"; break;
1385 case RubyCode: out << "Ruby"; break;
1391 void writeMachines( std::ostream &out, std::string hostData, char *inputFileName )
1393 if ( machineSpec == 0 && machineName == 0 ) {
1394 /* No machine spec or machine name given. Generate everything. */
1395 for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1396 ParseData *pd = parser->value->pd;
1397 if ( pd->instanceList.length() > 0 )
1398 pd->prepareMachineGen( 0 );
1401 if ( gblErrorCount == 0 ) {
1402 out << "<ragel filename=\"" << inputFileName << "\"";
1403 writeLanguage( out );
1405 for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1406 ParseData *pd = parser->value->pd;
1407 if ( pd->instanceList.length() > 0 )
1408 pd->generateXML( out );
1411 out << "</ragel>\n";
1414 else if ( parserDict.length() > 0 ) {
1415 /* There is either a machine spec or machine name given. */
1416 ParseData *parseData = 0;
1417 GraphDictEl *graphDictEl = 0;
1419 /* Traverse the sections, break out when we find a section/machine
1420 * that matches the one specified. */
1421 for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1422 ParseData *checkPd = parser->value->pd;
1423 if ( machineSpec == 0 || strcmp( checkPd->sectionName, machineSpec ) == 0 ) {
1424 GraphDictEl *checkGdEl = 0;
1425 if ( machineName == 0 || (checkGdEl =
1426 checkPd->graphDict.find( machineName )) != 0 )
1428 /* Have a machine spec and/or machine name that matches
1429 * the -M/-S options. */
1430 parseData = checkPd;
1431 graphDictEl = checkGdEl;
1437 if ( parseData == 0 )
1438 error() << "could not locate machine specified with -S and/or -M" << endl;
1440 /* Section/Machine to emit was found. Prepare and emit it. */
1441 parseData->prepareMachineGen( graphDictEl );
1442 if ( gblErrorCount == 0 ) {
1443 out << "<ragel filename=\"" << inputFileName << "\"";
1444 writeLanguage( out );
1446 parseData->generateXML( out );
1448 out << "</ragel>\n";