2 * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 #include "parsedata.h"
31 #include "parsetree.h"
32 #include "mergesort.h"
33 #include "xmlcodegen.h"
38 char mainMachine[] = "main";
40 void Token::set( char *str, int len )
43 data = new char[len+1];
44 memcpy( data, str, len );
48 void Token::append( const Token &other )
50 int newLength = length + other.length;
51 char *newString = new char[newLength+1];
52 memcpy( newString, data, length );
53 memcpy( newString + length, other.data, other.length );
54 newString[newLength] = 0;
59 /* Perform minimization after an operation according
60 * to the command line args. */
61 void afterOpMinimize( FsmAp *fsm, bool lastInSeq )
63 /* Switch on the prefered minimization algorithm. */
64 if ( minimizeOpt == MinimizeEveryOp || minimizeOpt == MinimizeMostOps && lastInSeq ) {
65 /* First clean up the graph. FsmAp operations may leave these
66 * lying around. There should be no dead end states. The subtract
67 * intersection operators are the only places where they may be
68 * created and those operators clean them up. */
69 fsm->removeUnreachableStates();
71 switch ( minimizeLevel ) {
73 fsm->minimizeApproximate();
75 case MinimizePartition1:
76 fsm->minimizePartition1();
78 case MinimizePartition2:
79 fsm->minimizePartition2();
82 fsm->minimizeStable();
88 /* Count the transitions in the fsm by walking the state list. */
89 int countTransitions( FsmAp *fsm )
92 StateAp *state = fsm->stateList.head;
93 while ( state != 0 ) {
94 numTrans += state->outList.length();
100 Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd )
102 /* Reset errno so we can check for overflow or underflow. In the event of
103 * an error, sets the return val to the upper or lower bound being tested
106 unsigned int size = keyOps->alphType->size;
107 bool unusedBits = size < sizeof(unsigned long);
109 unsigned long ul = strtoul( str, 0, 16 );
111 if ( errno == ERANGE || unusedBits && ul >> (size * 8) ) {
112 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
113 ul = 1 << (size * 8);
116 if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) )
117 ul |= (0xffffffff >> (size*8 ) ) << (size*8);
119 return Key( (long)ul );
122 Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd )
124 /* Convert the number to a decimal. First reset errno so we can check
125 * for overflow or underflow. */
127 long long minVal = keyOps->alphType->minVal;
128 long long maxVal = keyOps->alphType->maxVal;
130 long long ll = strtoll( str, 0, 10 );
132 /* Check for underflow. */
133 if ( errno == ERANGE && ll < 0 || ll < minVal) {
134 error(loc) << "literal " << str << " underflows the alphabet type" << endl;
137 /* Check for overflow. */
138 else if ( errno == ERANGE && ll > 0 || ll > maxVal ) {
139 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
143 if ( keyOps->alphType->isSigned )
144 return Key( (long)ll );
146 return Key( (unsigned long)ll );
149 /* Make an fsm key in int format (what the fsm graph uses) from an alphabet
150 * number returned by the parser. Validates that the number doesn't overflow
151 * the alphabet type. */
152 Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd )
154 /* Switch on hex/decimal format. */
155 if ( str[0] == '0' && str[1] == 'x' )
156 return makeFsmKeyHex( str, loc, pd );
158 return makeFsmKeyDec( str, loc, pd );
161 /* Make an fsm int format (what the fsm graph uses) from a single character.
162 * Performs proper conversion depending on signed/unsigned property of the
164 Key makeFsmKeyChar( char c, ParseData *pd )
166 if ( keyOps->isSigned ) {
167 /* Copy from a char type. */
171 /* Copy from an unsigned byte type. */
172 return Key( (unsigned char)c );
176 /* Make an fsm key array in int format (what the fsm graph uses) from a string
177 * of characters. Performs proper conversion depending on signed/unsigned
178 * property of the alphabet. */
179 void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd )
181 if ( keyOps->isSigned ) {
182 /* Copy from a char star type. */
184 for ( int i = 0; i < len; i++ )
185 result[i] = Key(src[i]);
188 /* Copy from an unsigned byte ptr type. */
189 unsigned char *src = (unsigned char*) data;
190 for ( int i = 0; i < len; i++ )
191 result[i] = Key(src[i]);
195 /* Like makeFsmKeyArray except the result has only unique keys. They ordering
196 * will be changed. */
197 void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
198 bool caseInsensitive, ParseData *pd )
200 /* Use a transitions list for getting unique keys. */
201 if ( keyOps->isSigned ) {
202 /* Copy from a char star type. */
204 for ( int si = 0; si < len; si++ ) {
206 result.insert( key );
207 if ( caseInsensitive ) {
209 result.insert( key.toUpper() );
210 else if ( key.isUpper() )
211 result.insert( key.toLower() );
216 /* Copy from an unsigned byte ptr type. */
217 unsigned char *src = (unsigned char*) data;
218 for ( int si = 0; si < len; si++ ) {
220 result.insert( key );
221 if ( caseInsensitive ) {
223 result.insert( key.toUpper() );
224 else if ( key.isUpper() )
225 result.insert( key.toLower() );
231 FsmAp *dotFsm( ParseData *pd )
233 FsmAp *retFsm = new FsmAp();
234 retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey );
238 FsmAp *dotStarFsm( ParseData *pd )
240 FsmAp *retFsm = new FsmAp();
241 retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey );
245 /* Make a builtin type. Depends on the signed nature of the alphabet type. */
246 FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd )
248 /* FsmAp created to return. */
250 bool isSigned = keyOps->isSigned;
254 /* All characters. */
255 retFsm = dotFsm( pd );
259 /* Ascii characters 0 to 127. */
260 retFsm = new FsmAp();
261 retFsm->rangeFsm( 0, 127 );
265 /* Ascii extended characters. This is the full byte range. Dependent
266 * on signed, vs no signed. If the alphabet is one byte then just use
269 retFsm = new FsmAp();
270 retFsm->rangeFsm( -128, 127 );
273 retFsm = new FsmAp();
274 retFsm->rangeFsm( 0, 255 );
279 /* Alpha [A-Za-z]. */
280 FsmAp *upper = new FsmAp(), *lower = new FsmAp();
281 upper->rangeFsm( 'A', 'Z' );
282 lower->rangeFsm( 'a', 'z' );
283 upper->unionOp( lower );
284 upper->minimizePartition2();
290 retFsm = new FsmAp();
291 retFsm->rangeFsm( '0', '9' );
295 /* Alpha numerics [0-9A-Za-z]. */
296 FsmAp *digit = new FsmAp(), *lower = new FsmAp();
297 FsmAp *upper = new FsmAp();
298 digit->rangeFsm( '0', '9' );
299 upper->rangeFsm( 'A', 'Z' );
300 lower->rangeFsm( 'a', 'z' );
301 digit->unionOp( upper );
302 digit->unionOp( lower );
303 digit->minimizePartition2();
308 /* Lower case characters. */
309 retFsm = new FsmAp();
310 retFsm->rangeFsm( 'a', 'z' );
314 /* Upper case characters. */
315 retFsm = new FsmAp();
316 retFsm->rangeFsm( 'A', 'Z' );
320 /* Control characters. */
321 FsmAp *cntrl = new FsmAp();
322 FsmAp *highChar = new FsmAp();
323 cntrl->rangeFsm( 0, 31 );
324 highChar->concatFsm( 127 );
325 cntrl->unionOp( highChar );
326 cntrl->minimizePartition2();
331 /* Graphical ascii characters [!-~]. */
332 retFsm = new FsmAp();
333 retFsm->rangeFsm( '!', '~' );
337 /* Printable characters. Same as graph except includes space. */
338 retFsm = new FsmAp();
339 retFsm->rangeFsm( ' ', '~' );
344 FsmAp *range1 = new FsmAp();
345 FsmAp *range2 = new FsmAp();
346 FsmAp *range3 = new FsmAp();
347 FsmAp *range4 = new FsmAp();
348 range1->rangeFsm( '!', '/' );
349 range2->rangeFsm( ':', '@' );
350 range3->rangeFsm( '[', '`' );
351 range4->rangeFsm( '{', '~' );
352 range1->unionOp( range2 );
353 range1->unionOp( range3 );
354 range1->unionOp( range4 );
355 range1->minimizePartition2();
360 /* Whitespace: [\t\v\f\n\r ]. */
361 FsmAp *cntrl = new FsmAp();
362 FsmAp *space = new FsmAp();
363 cntrl->rangeFsm( '\t', '\r' );
364 space->concatFsm( ' ' );
365 cntrl->unionOp( space );
366 cntrl->minimizePartition2();
371 /* Hex digits [0-9A-Fa-f]. */
372 FsmAp *digit = new FsmAp();
373 FsmAp *upper = new FsmAp();
374 FsmAp *lower = new FsmAp();
375 digit->rangeFsm( '0', '9' );
376 upper->rangeFsm( 'A', 'F' );
377 lower->rangeFsm( 'a', 'f' );
378 digit->unionOp( upper );
379 digit->unionOp( lower );
380 digit->minimizePartition2();
385 retFsm = new FsmAp();
390 retFsm = new FsmAp();
398 /* Check if this name inst or any name inst below is referenced. */
399 bool NameInst::anyRefsRec()
404 /* Recurse on children until true. */
405 for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) {
406 if ( (*ch)->anyRefsRec() )
417 /* Initialize the structure that will collect info during the parse of a
419 ParseData::ParseData( char *fileName, char *sectionName,
420 const InputLoc §ionLoc )
423 generatingSectionSubset(false),
425 /* 0 is reserved for global error actions. */
446 sectionName(sectionName),
447 sectionLoc(sectionLoc),
452 nextEpsilonResolvedLink(0),
453 nextLongestMatchId(1),
454 lmRequiresErrorState(false)
456 /* Initialize the dictionary of graphs. This is our symbol table. The
457 * initialization needs to be done on construction which happens at the
458 * beginning of a machine spec so any assignment operators can reference
463 /* Clean up the data collected during a parse. */
464 ParseData::~ParseData()
466 /* Delete all the nodes in the action list. Will cause all the
467 * string data that represents the actions to be deallocated. */
471 /* Make a name id in the current name instantiation scope if it is not
473 NameInst *ParseData::addNameInst( const InputLoc &loc, char *data, bool isLabel )
475 /* Create the name instantitaion object and insert it. */
476 NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel );
477 curNameInst->childVect.append( newNameInst );
479 curNameInst->children.insertMulti( data, newNameInst );
483 void ParseData::initNameWalk()
485 curNameInst = rootName;
489 void ParseData::initExportsNameWalk()
491 curNameInst = exportsRootName;
495 /* Goes into the next child scope. The number of the child is already set up.
496 * We need this for the syncronous name tree and parse tree walk to work
497 * properly. It is reset on entry into a scope and advanced on poping of a
498 * scope. A call to enterNameScope should be accompanied by a corresponding
500 NameFrame ParseData::enterNameScope( bool isLocal, int numScopes )
502 /* Save off the current data. */
504 retFrame.prevNameInst = curNameInst;
505 retFrame.prevNameChild = curNameChild;
506 retFrame.prevLocalScope = localNameScope;
508 /* Enter into the new name scope. */
509 for ( int i = 0; i < numScopes; i++ ) {
510 curNameInst = curNameInst->childVect[curNameChild];
515 localNameScope = curNameInst;
520 /* Return from a child scope to a parent. The parent info must be specified as
521 * an argument and is obtained from the corresponding call to enterNameScope.
523 void ParseData::popNameScope( const NameFrame &frame )
525 /* Pop the name scope. */
526 curNameInst = frame.prevNameInst;
527 curNameChild = frame.prevNameChild+1;
528 localNameScope = frame.prevLocalScope;
531 void ParseData::resetNameScope( const NameFrame &frame )
533 /* Pop the name scope. */
534 curNameInst = frame.prevNameInst;
535 curNameChild = frame.prevNameChild;
536 localNameScope = frame.prevLocalScope;
540 void ParseData::unsetObsoleteEntries( FsmAp *graph )
542 /* Loop the reference names and increment the usage. Names that are no
543 * longer needed will be unset in graph. */
544 for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) {
546 NameInst *name = *ref;
549 /* If the name is no longer needed unset its corresponding entry. */
550 if ( name->numUses == name->numRefs ) {
551 assert( graph->entryPoints.find( name->id ) != 0 );
552 graph->unsetEntry( name->id );
553 assert( graph->entryPoints.find( name->id ) == 0 );
558 NameSet ParseData::resolvePart( NameInst *refFrom, char *data, bool recLabelsOnly )
560 /* Queue needed for breadth-first search, load it with the start node. */
561 NameInstList nameQueue;
562 nameQueue.append( refFrom );
565 while ( nameQueue.length() > 0 ) {
566 /* Pull the next from location off the queue. */
567 NameInst *from = nameQueue.detachFirst();
569 /* Look for the name. */
570 NameMapEl *low, *high;
571 if ( from->children.findMulti( data, low, high ) ) {
572 /* Record all instances of the name. */
573 for ( ; low <= high; low++ )
574 result.insert( low->value );
577 /* Name not there, do breadth-first operation of appending all
578 * childrent to the processing queue. */
579 for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) {
580 if ( !recLabelsOnly || (*name)->isLabel )
581 nameQueue.append( *name );
585 /* Queue exhausted and name never found. */
589 void ParseData::resolveFrom( NameSet &result, NameInst *refFrom,
590 const NameRef &nameRef, int namePos )
592 /* Look for the name in the owning scope of the factor with aug. */
593 NameSet partResult = resolvePart( refFrom, nameRef[namePos], false );
595 /* If there are more parts to the name then continue on. */
596 if ( ++namePos < nameRef.length() ) {
597 /* There are more components to the name, search using all the part
598 * results as the base. */
599 for ( NameSet::Iter name = partResult; name.lte(); name++ )
600 resolveFrom( result, *name, nameRef, namePos );
603 /* This is the last component, append the part results to the final
605 result.insert( partResult );
609 /* Write out a name reference. */
610 ostream &operator<<( ostream &out, const NameRef &nameRef )
613 if ( nameRef[pos] == 0 ) {
617 out << nameRef[pos++];
618 for ( ; pos < nameRef.length(); pos++ )
619 out << "::" << nameRef[pos];
623 ostream &operator<<( ostream &out, const NameInst &nameInst )
625 /* Count the number fully qualified name parts. */
627 NameInst *curParent = nameInst.parent;
628 while ( curParent != 0 ) {
630 curParent = curParent->parent;
633 /* Make an array and fill it in. */
634 curParent = nameInst.parent;
635 NameInst **parents = new NameInst*[numParents];
636 for ( int p = numParents-1; p >= 0; p-- ) {
637 parents[p] = curParent;
638 curParent = curParent->parent;
641 /* Write the parents out, skip the root. */
642 for ( int p = 1; p < numParents; p++ )
643 out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" );
645 /* Write the name and cleanup. */
646 out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" );
651 struct CmpNameInstLoc
653 static int compare( const NameInst *ni1, const NameInst *ni2 )
655 if ( ni1->loc.line < ni2->loc.line )
657 else if ( ni1->loc.line > ni2->loc.line )
659 else if ( ni1->loc.col < ni2->loc.col )
661 else if ( ni1->loc.col > ni2->loc.col )
667 void errorStateLabels( const NameSet &resolved )
669 MergeSort<NameInst*, CmpNameInstLoc> mergeSort;
670 mergeSort.sort( resolved.data, resolved.length() );
671 for ( NameSet::Iter res = resolved; res.lte(); res++ )
672 error((*res)->loc) << " -> " << **res << endl;
676 NameInst *ParseData::resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action )
678 NameInst *nameInst = 0;
680 /* Do the local search if the name is not strictly a root level name
682 if ( nameRef[0] != 0 ) {
683 /* If the action is referenced, resolve all of them. */
684 if ( action != 0 && action->actionRefs.length() > 0 ) {
685 /* Look for the name in all referencing scopes. */
687 for ( ActionRefs::Iter actRef = action->actionRefs; actRef.lte(); actRef++ )
688 resolveFrom( resolved, *actRef, nameRef, 0 );
690 if ( resolved.length() > 0 ) {
691 /* Take the first one. */
692 nameInst = resolved[0];
693 if ( resolved.length() > 1 ) {
694 /* Complain about the multiple references. */
695 error(loc) << "state reference " << nameRef <<
696 " resolves to multiple entry points" << endl;
697 errorStateLabels( resolved );
703 /* If not found in the local scope, look in global. */
704 if ( nameInst == 0 ) {
706 int fromPos = nameRef[0] != 0 ? 0 : 1;
707 resolveFrom( resolved, rootName, nameRef, fromPos );
709 if ( resolved.length() > 0 ) {
710 /* Take the first. */
711 nameInst = resolved[0];
712 if ( resolved.length() > 1 ) {
713 /* Complain about the multiple references. */
714 error(loc) << "state reference " << nameRef <<
715 " resolves to multiple entry points" << endl;
716 errorStateLabels( resolved );
721 if ( nameInst == 0 ) {
722 /* If not found then complain. */
723 error(loc) << "could not resolve state reference " << nameRef << endl;
728 void ParseData::resolveNameRefs( InlineList *inlineList, Action *action )
730 for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
731 switch ( item->type ) {
732 case InlineItem::Entry: case InlineItem::Goto:
733 case InlineItem::Call: case InlineItem::Next: {
734 /* Resolve, pass action for local search. */
735 NameInst *target = resolveStateRef( *item->nameRef, item->loc, action );
737 /* Check if the target goes into a longest match. */
738 NameInst *search = target->parent;
739 while ( search != 0 ) {
740 if ( search->isLongestMatch ) {
741 error(item->loc) << "cannot enter inside a longest "
742 "match construction as an entry point" << endl;
745 search = search->parent;
748 /* Note the reference in the name. This will cause the entry
749 * point to survive to the end of the graph generating walk. */
751 target->numRefs += 1;
752 item->nameTarg = target;
759 /* Some of the item types may have children. */
760 if ( item->children != 0 )
761 resolveNameRefs( item->children, action );
765 /* Resolve references to labels in actions. */
766 void ParseData::resolveActionNameRefs()
768 for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
769 /* Only care about the actions that are referenced. */
770 if ( act->actionRefs.length() > 0 )
771 resolveNameRefs( act->inlineList, act );
775 /* Walk a name tree starting at from and fill the name index. */
776 void ParseData::fillNameIndex( NameInst *from )
778 /* Fill the value for from in the name index. */
779 nameIndex[from->id] = from;
781 /* Recurse on the implicit final state and then all children. */
782 if ( from->final != 0 )
783 fillNameIndex( from->final );
784 for ( NameVect::Iter name = from->childVect; name.lte(); name++ )
785 fillNameIndex( *name );
788 void ParseData::makeRootNames()
790 /* Create the root name. */
791 rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
792 exportsRootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
795 /* Build the name tree and supporting data structures. */
796 void ParseData::makeNameTree( GraphDictEl *dictEl )
798 /* Set up curNameInst for the walk. */
802 /* A start location has been specified. */
803 dictEl->value->makeNameTree( dictEl->loc, this );
806 /* First make the name tree. */
807 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
808 /* Recurse on the instance. */
809 glel->value->makeNameTree( glel->loc, this );
813 /* The number of nodes in the tree can now be given by nextNameId */
814 nameIndex = new NameInst*[nextNameId];
815 memset( nameIndex, 0, sizeof(NameInst*)*nextNameId );
816 fillNameIndex( rootName );
817 fillNameIndex( exportsRootName );
821 void ParseData::createBuiltin( char *name, BuiltinMachine builtin )
823 Expression *expression = new Expression( builtin );
824 Join *join = new Join( expression );
825 JoinOrLm *joinOrLm = new JoinOrLm( join );
826 VarDef *varDef = new VarDef( name, joinOrLm );
827 GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
828 graphDict.insert( graphDictEl );
831 /* Initialize the graph dict with builtin types. */
832 void ParseData::initGraphDict( )
834 createBuiltin( "any", BT_Any );
835 createBuiltin( "ascii", BT_Ascii );
836 createBuiltin( "extend", BT_Extend );
837 createBuiltin( "alpha", BT_Alpha );
838 createBuiltin( "digit", BT_Digit );
839 createBuiltin( "alnum", BT_Alnum );
840 createBuiltin( "lower", BT_Lower );
841 createBuiltin( "upper", BT_Upper );
842 createBuiltin( "cntrl", BT_Cntrl );
843 createBuiltin( "graph", BT_Graph );
844 createBuiltin( "print", BT_Print );
845 createBuiltin( "punct", BT_Punct );
846 createBuiltin( "space", BT_Space );
847 createBuiltin( "xdigit", BT_Xdigit );
848 createBuiltin( "null", BT_Lambda );
849 createBuiltin( "zlen", BT_Lambda );
850 createBuiltin( "empty", BT_Empty );
853 /* Set the alphabet type. If the types are not valid returns false. */
854 bool ParseData::setAlphType( const InputLoc &loc, char *s1, char *s2 )
857 userAlphType = findAlphType( s1, s2 );
859 return userAlphType != 0;
862 /* Set the alphabet type. If the types are not valid returns false. */
863 bool ParseData::setAlphType( const InputLoc &loc, char *s1 )
866 userAlphType = findAlphType( s1 );
868 return userAlphType != 0;
871 bool ParseData::setVariable( char *var, InlineList *inlineList )
875 if ( strcmp( var, "p" ) == 0 )
877 else if ( strcmp( var, "pe" ) == 0 )
879 else if ( strcmp( var, "cs" ) == 0 )
881 else if ( strcmp( var, "data" ) == 0 )
882 dataExpr = inlineList;
883 else if ( strcmp( var, "top" ) == 0 )
884 topExpr = inlineList;
885 else if ( strcmp( var, "stack" ) == 0 )
886 stackExpr = inlineList;
887 else if ( strcmp( var, "act" ) == 0 )
888 actExpr = inlineList;
889 else if ( strcmp( var, "tokstart" ) == 0 )
890 tokstartExpr = inlineList;
891 else if ( strcmp( var, "tokend" ) == 0 )
892 tokendExpr = inlineList;
899 /* Initialize the key operators object that will be referenced by all fsms
901 void ParseData::initKeyOps( )
903 /* Signedness and bounds. */
904 HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType;
905 thisKeyOps.setAlphType( alphType );
907 if ( lowerNum != 0 ) {
908 /* If ranges are given then interpret the alphabet type. */
909 thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this );
910 thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
913 thisCondData.lastCondKey = thisKeyOps.maxKey;
916 void ParseData::printNameInst( NameInst *nameInst, int level )
918 for ( int i = 0; i < level; i++ )
920 cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") <<
921 " id: " << nameInst->id <<
922 " refs: " << nameInst->numRefs <<
923 " uses: " << nameInst->numUses << endl;
924 for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ )
925 printNameInst( *name, level+1 );
928 /* Remove duplicates of unique actions from an action table. */
929 void ParseData::removeDups( ActionTable &table )
931 /* Scan through the table looking for unique actions to
932 * remove duplicates of. */
933 for ( int i = 0; i < table.length(); i++ ) {
934 /* Remove any duplicates ahead of i. */
935 for ( int r = i+1; r < table.length(); ) {
936 if ( table[r].value == table[i].value )
944 /* Remove duplicates from action lists. This operates only on transition and
945 * eof action lists and so should be called once all actions have been
946 * transfered to their final resting place. */
947 void ParseData::removeActionDups( FsmAp *graph )
949 /* Loop all states. */
950 for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
951 /* Loop all transitions. */
952 for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
953 removeDups( trans->actionTable );
954 removeDups( state->toStateActionTable );
955 removeDups( state->fromStateActionTable );
956 removeDups( state->eofActionTable );
960 Action *ParseData::newAction( char *name, InlineList *inlineList )
965 loc.fileName = "<NONE>";
967 Action *action = new Action( loc, name, inlineList, nextCondId++ );
968 action->actionRefs.append( rootName );
969 actionList.append( action );
973 void ParseData::initLongestMatchData()
975 if ( lmList.length() > 0 ) {
976 /* The initTokStart action resets the token start. */
977 InlineList *il1 = new InlineList;
978 il1->append( new InlineItem( InputLoc(), InlineItem::LmInitTokStart ) );
979 initTokStart = newAction( "initts", il1 );
980 initTokStart->isLmAction = true;
982 /* The initActId action gives act a default value. */
983 InlineList *il4 = new InlineList;
984 il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) );
985 initActId = newAction( "initact", il4 );
986 initActId->isLmAction = true;
988 /* The setTokStart action sets tokstart. */
989 InlineList *il5 = new InlineList;
990 il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) );
991 setTokStart = newAction( "tokstart", il5 );
992 setTokStart->isLmAction = true;
994 /* The setTokEnd action sets tokend. */
995 InlineList *il3 = new InlineList;
996 il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) );
997 setTokEnd = newAction( "tokend", il3 );
998 setTokEnd->isLmAction = true;
1000 /* The action will also need an ordering: ahead of all user action
1002 initTokStartOrd = curActionOrd++;
1003 initActIdOrd = curActionOrd++;
1004 setTokStartOrd = curActionOrd++;
1005 setTokEndOrd = curActionOrd++;
1009 /* After building the graph, do some extra processing to ensure the runtime
1010 * data of the longest mactch operators is consistent. */
1011 void ParseData::setLongestMatchData( FsmAp *graph )
1013 if ( lmList.length() > 0 ) {
1014 /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry)
1015 * init the tokstart. */
1016 for ( EntryMap::Iter en = graph->entryPoints; en.lte(); en++ ) {
1017 /* This is run after duplicates are removed, we must guard against
1018 * inserting a duplicate. */
1019 ActionTable &actionTable = en->value->toStateActionTable;
1020 if ( ! actionTable.hasAction( initTokStart ) )
1021 actionTable.setAction( initTokStartOrd, initTokStart );
1024 /* Find the set of states that are the target of transitions with
1025 * actions that have calls. These states will be targeted by fret
1028 for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
1029 for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
1030 for ( ActionTable::Iter ati = trans->actionTable; ati.lte(); ati++ ) {
1031 if ( ati->value->anyCall && trans->toState != 0 )
1032 states.insert( trans->toState );
1038 /* Init tokstart upon entering the above collected states. */
1039 for ( StateSet::Iter ps = states; ps.lte(); ps++ ) {
1040 /* This is run after duplicates are removed, we must guard against
1041 * inserting a duplicate. */
1042 ActionTable &actionTable = (*ps)->toStateActionTable;
1043 if ( ! actionTable.hasAction( initTokStart ) )
1044 actionTable.setAction( initTokStartOrd, initTokStart );
1049 /* Make the graph from a graph dict node. Does minimization and state sorting. */
1050 FsmAp *ParseData::makeInstance( GraphDictEl *gdNode )
1052 /* Build the graph from a walk of the parse tree. */
1053 FsmAp *graph = gdNode->value->walk( this );
1055 /* Resolve any labels that point to multiple states. Any labels that are
1056 * still around are referenced only by gotos and calls and they need to be
1057 * made into deterministic entry points. */
1058 graph->deterministicEntry();
1061 * All state construction is now complete.
1064 /* Transfer global error actions. */
1065 for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
1066 graph->transferErrorActions( state, 0 );
1068 removeActionDups( graph );
1070 /* Remove unreachable states. There should be no dead end states. The
1071 * subtract and intersection operators are the only places where they may
1072 * be created and those operators clean them up. */
1073 graph->removeUnreachableStates();
1075 /* No more fsm operations are to be done. Action ordering numbers are
1076 * no longer of use and will just hinder minimization. Clear them. */
1077 graph->nullActionKeys();
1079 /* Transition priorities are no longer of use. We can clear them
1080 * because they will just hinder minimization as well. Clear them. */
1081 graph->clearAllPriorities();
1083 if ( minimizeOpt != MinimizeNone ) {
1084 /* Minimize here even if we minimized at every op. Now that function
1085 * keys have been cleared we may get a more minimal fsm. */
1086 switch ( minimizeLevel ) {
1087 case MinimizeApprox:
1088 graph->minimizeApproximate();
1090 case MinimizeStable:
1091 graph->minimizeStable();
1093 case MinimizePartition1:
1094 graph->minimizePartition1();
1096 case MinimizePartition2:
1097 graph->minimizePartition2();
1102 graph->compressTransitions();
1107 void ParseData::printNameTree()
1109 /* Print the name instance map. */
1110 for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ )
1111 printNameInst( *name, 0 );
1113 cerr << "name index:" << endl;
1114 /* Show that the name index is correct. */
1115 for ( int ni = 0; ni < nextNameId; ni++ ) {
1117 char *name = nameIndex[ni]->name;
1118 cerr << ( name != 0 ? name : "<ANON>" ) << endl;
1122 FsmAp *ParseData::makeSpecific( GraphDictEl *gdNode )
1124 /* Build the name tree and supporting data structures. */
1125 makeNameTree( gdNode );
1127 /* Resove name references from gdNode. */
1129 gdNode->value->resolveNameRefs( this );
1131 /* Do not resolve action references. Since we are not building the entire
1132 * graph there's a good chance that many name references will fail. This
1133 * is okay since generating part of the graph is usually only done when
1134 * inspecting the compiled machine. */
1136 /* Same story for extern entry point references. */
1138 /* Flag this case so that the XML code generator is aware that we haven't
1139 * looked up name references in actions. It can then avoid segfaulting. */
1140 generatingSectionSubset = true;
1142 /* Just building the specified graph. */
1144 FsmAp *mainGraph = makeInstance( gdNode );
1149 FsmAp *ParseData::makeAll()
1151 /* Build the name tree and supporting data structures. */
1154 /* Resove name references in the tree. */
1156 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ )
1157 glel->value->resolveNameRefs( this );
1159 /* Resolve action code name references. */
1160 resolveActionNameRefs();
1162 /* Force name references to the top level instantiations. */
1163 for ( NameVect::Iter inst = rootName->childVect; inst.lte(); inst++ )
1164 (*inst)->numRefs += 1;
1166 FsmAp *mainGraph = 0;
1167 FsmAp **graphs = new FsmAp*[instanceList.length()];
1170 /* Make all the instantiations, we know that main exists in this list. */
1172 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
1173 if ( strcmp( glel->key, mainMachine ) == 0 ) {
1174 /* Main graph is always instantiated. */
1175 mainGraph = makeInstance( glel );
1178 /* Instantiate and store in others array. */
1179 graphs[numOthers++] = makeInstance( glel );
1183 if ( mainGraph == 0 )
1184 mainGraph = graphs[--numOthers];
1186 if ( numOthers > 0 ) {
1187 /* Add all the other graphs into main. */
1188 mainGraph->globOp( graphs, numOthers );
1195 void ParseData::analyzeAction( Action *action, InlineList *inlineList )
1197 /* FIXME: Actions used as conditions should be very constrained. */
1198 for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1199 if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
1200 action->anyCall = true;
1202 /* Need to recurse into longest match items. */
1203 if ( item->type == InlineItem::LmSwitch ) {
1204 LongestMatch *lm = item->longestMatch;
1205 for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) {
1206 if ( lmi->action != 0 )
1207 analyzeAction( action, lmi->action->inlineList );
1211 if ( item->type == InlineItem::LmOnLast ||
1212 item->type == InlineItem::LmOnNext ||
1213 item->type == InlineItem::LmOnLagBehind )
1215 LongestMatchPart *lmi = item->longestMatchPart;
1216 if ( lmi->action != 0 )
1217 analyzeAction( action, lmi->action->inlineList );
1220 if ( item->children != 0 )
1221 analyzeAction( action, item->children );
1226 /* Check actions for bad uses of fsm directives. We don't go inside longest
1227 * match items in actions created by ragel, since we just want the user
1229 void ParseData::checkInlineList( Action *act, InlineList *inlineList )
1231 for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1233 if ( act->numEofRefs > 0 ) {
1234 switch ( item->type ) {
1235 case InlineItem::PChar:
1236 error(item->loc) << "pointer to current element does not exist in "
1237 "EOF action code" << endl;
1239 case InlineItem::Char:
1240 error(item->loc) << "current element does not exist in "
1241 "EOF action code" << endl;
1243 case InlineItem::Hold:
1244 error(item->loc) << "changing the current element not possible in "
1245 "EOF action code" << endl;
1247 case InlineItem::Exec:
1248 error(item->loc) << "changing the current element not possible in "
1249 "EOF action code" << endl;
1251 case InlineItem::Goto: case InlineItem::Call:
1252 case InlineItem::Next: case InlineItem::GotoExpr:
1253 case InlineItem::CallExpr: case InlineItem::NextExpr:
1254 case InlineItem::Ret:
1255 error(item->loc) << "changing the current state not possible in "
1256 "EOF action code" << endl;
1264 if ( item->children != 0 )
1265 checkInlineList( act, item->children );
1269 void ParseData::checkAction( Action *action )
1271 /* Check for actions with calls that are embedded within a longest match
1273 if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) {
1274 for ( ActionRefs::Iter ar = action->actionRefs; ar.lte(); ar++ ) {
1275 NameInst *check = *ar;
1276 while ( check != 0 ) {
1277 if ( check->isLongestMatch ) {
1278 error(action->loc) << "within a scanner, fcall is permitted"
1279 " only in pattern actions" << endl;
1282 check = check->parent;
1287 checkInlineList( action, action->inlineList );
1291 void ParseData::analyzeGraph( FsmAp *graph )
1293 for ( ActionList::Iter act = actionList; act.lte(); act++ )
1294 analyzeAction( act, act->inlineList );
1296 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
1297 /* The transition list. */
1298 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
1299 for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ )
1300 at->value->numTransRefs += 1;
1303 for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ )
1304 at->value->numToStateRefs += 1;
1306 for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ )
1307 at->value->numFromStateRefs += 1;
1309 for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ )
1310 at->value->numEofRefs += 1;
1312 for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
1313 for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ )
1314 (*sci)->numCondRefs += 1;
1318 /* Checks for bad usage of directives in action code. */
1319 for ( ActionList::Iter act = actionList; act.lte(); act++ )
1323 void ParseData::makeExportsNameTree()
1325 /* Make a name tree for the exports. */
1326 initExportsNameWalk();
1328 /* First make the name tree. */
1329 for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1330 if ( gdel->value->isExport ) {
1331 /* Recurse on the instance. */
1332 gdel->value->makeNameTree( gdel->loc, this );
1337 void ParseData::makeExports()
1339 makeExportsNameTree();
1341 /* Resove name references in the tree. */
1342 initExportsNameWalk();
1343 for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1344 if ( gdel->value->isExport )
1345 gdel->value->resolveNameRefs( this );
1348 /* Make all the instantiations, we know that main exists in this list. */
1349 initExportsNameWalk();
1350 for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1351 /* Check if this var def is an export. */
1352 if ( gdel->value->isExport ) {
1353 /* Build the graph from a walk of the parse tree. */
1354 FsmAp *graph = gdel->value->walk( this );
1356 /* Build the graph from a walk of the parse tree. */
1357 if ( !graph->checkSingleCharMachine() ) {
1358 error(gdel->loc) << "bad export machine, must define "
1359 "a single character" << endl;
1362 /* Safe to extract the key and declare the export. */
1363 Key exportKey = graph->startState->outList.head->lowKey;
1364 exportList.append( new Export( gdel->value->name, exportKey ) );
1371 /* Construct the machine and catch failures which can occur during
1373 void ParseData::prepareMachineGen( GraphDictEl *graphDictEl )
1376 /* This machine construction can fail. */
1377 prepareMachineGenTBWrapped( graphDictEl );
1379 catch ( FsmConstructFail fail ) {
1380 switch ( fail.reason ) {
1381 case FsmConstructFail::CondNoKeySpace: {
1382 InputLoc &loc = alphTypeSet ? alphTypeLoc : sectionLoc;
1383 error(loc) << "sorry, no more characters are "
1384 "available in the alphabet space" << endl;
1385 error(loc) << " for conditions, please use a "
1386 "smaller alphtype or reduce" << endl;
1387 error(loc) << " the span of characters on which "
1388 "conditions are embedded" << endl;
1395 void ParseData::prepareMachineGenTBWrapped( GraphDictEl *graphDictEl )
1400 initLongestMatchData();
1402 /* Make the graph, do minimization. */
1403 if ( graphDictEl == 0 )
1404 sectionGraph = makeAll();
1406 sectionGraph = makeSpecific( graphDictEl );
1408 /* Compute exports from the export definitions. */
1411 /* If any errors have occured in the input file then don't write anything. */
1412 if ( gblErrorCount > 0 )
1415 analyzeGraph( sectionGraph );
1417 /* Depends on the graph analysis. */
1418 setLongestMatchData( sectionGraph );
1420 /* Decide if an error state is necessary.
1421 * 1. There is an error transition
1422 * 2. There is a gap in the transitions
1423 * 3. The longest match operator requires it. */
1424 if ( lmRequiresErrorState || sectionGraph->hasErrorTrans() )
1425 sectionGraph->errState = sectionGraph->addState();
1427 /* State numbers need to be assigned such that all final states have a
1428 * larger state id number than all non-final states. This enables the
1429 * first_final mechanism to function correctly. We also want states to be
1430 * ordered in a predictable fashion. So we first apply a depth-first
1431 * search, then do a stable sort by final state status, then assign
1434 sectionGraph->depthFirstOrdering();
1435 sectionGraph->sortStatesByFinal();
1436 sectionGraph->setStateNumbers( 0 );
1439 void ParseData::generateXML( ostream &out )
1443 /* Make the generator. */
1444 XMLCodeGen codeGen( sectionName, this, sectionGraph, out );
1446 /* Write out with it. */
1449 if ( printStatistics ) {
1450 cerr << "fsm name : " << sectionName << endl;
1451 cerr << "num states: " << sectionGraph->stateList.length() << endl;
1456 /* Send eof to all parsers. */
1457 void terminateAllParsers( )
1459 /* FIXME: a proper token is needed here. Suppose we should use the
1460 * location of EOF in the last file that the parser was referenced in. */
1462 loc.fileName = "<EOF>";
1465 for ( ParserDict::Iter pdel = parserDict; pdel.lte(); pdel++ )
1466 pdel->value->token( loc, _eof, 0, 0 );
1469 void writeLanguage( std::ostream &out )
1472 switch ( hostLang->lang ) {
1473 case HostLang::C: out << "C"; break;
1474 case HostLang::D: out << "D"; break;
1475 case HostLang::Java: out << "Java"; break;
1476 case HostLang::Ruby: out << "Ruby"; break;
1482 void writeMachines( std::ostream &out, std::string hostData, char *inputFileName )
1484 if ( machineSpec == 0 && machineName == 0 ) {
1485 /* No machine spec or machine name given. Generate everything. */
1486 for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1487 ParseData *pd = parser->value->pd;
1488 if ( pd->instanceList.length() > 0 )
1489 pd->prepareMachineGen( 0 );
1492 if ( gblErrorCount == 0 ) {
1493 out << "<ragel version=\"" VERSION "\" filename=\"" << inputFileName << "\"";
1494 writeLanguage( out );
1496 for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1497 ParseData *pd = parser->value->pd;
1498 if ( pd->instanceList.length() > 0 )
1499 pd->generateXML( out );
1502 out << "</ragel>\n";
1505 else if ( parserDict.length() > 0 ) {
1506 /* There is either a machine spec or machine name given. */
1507 ParseData *parseData = 0;
1508 GraphDictEl *graphDictEl = 0;
1510 /* Traverse the sections, break out when we find a section/machine
1511 * that matches the one specified. */
1512 for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1513 ParseData *checkPd = parser->value->pd;
1514 if ( machineSpec == 0 || strcmp( checkPd->sectionName, machineSpec ) == 0 ) {
1515 GraphDictEl *checkGdEl = 0;
1516 if ( machineName == 0 || (checkGdEl =
1517 checkPd->graphDict.find( machineName )) != 0 )
1519 /* Have a machine spec and/or machine name that matches
1520 * the -M/-S options. */
1521 parseData = checkPd;
1522 graphDictEl = checkGdEl;
1528 if ( parseData == 0 )
1529 error() << "could not locate machine specified with -S and/or -M" << endl;
1531 /* Section/Machine to emit was found. Prepare and emit it. */
1532 parseData->prepareMachineGen( graphDictEl );
1533 if ( gblErrorCount == 0 ) {
1534 out << "<ragel version=\"" VERSION "\" filename=\"" << inputFileName << "\"";
1535 writeLanguage( out );
1537 parseData->generateXML( out );
1539 out << "</ragel>\n";