ragel/parsedata.cpp

   1 /*
   2  *  Copyright 2001-2008 Adrian Thurston <thurston@complang.org>
   3  */
   4
   5 /*  This file is part of Ragel.
   6  *
   7  *  Ragel is free software; you can redistribute it and/or modify
   8  *  it under the terms of the GNU General Public License as published by
   9  *  the Free Software Foundation; either version 2 of the License, or
  10  *  (at your option) any later version.
  11  *
  12  *  Ragel is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  *  GNU General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU General Public License
  18  *  along with Ragel; if not, write to the Free Software
  19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  */
  21
  22 #include <iostream>
  23 #include <iomanip>
  24 #include <errno.h>
  25 #include <stdlib.h>
  26 #include <limits.h>
  27
  28 #include "ragel.h"
  29 #include "rlparse.h"
  30 #include "parsedata.h"
  31 #include "parsetree.h"
  32 #include "mergesort.h"
  33 #include "xmlcodegen.h"
  34 #include "version.h"
  35 #include "inputdata.h"
  36
  37 using namespace std;
  38
  39 char mainMachine[] = "main";
  40
  41 void Token::set( const char *str, int len )
  42 {
  43         length = len;
  44         data = new char[len+1];
  45         memcpy( data, str, len );
  46         data[len] = 0;
  47 }
  48
  49 void Token::append( const Token &other )
  50 {
  51         int newLength = length + other.length;
  52         char *newString = new char[newLength+1];
  53         memcpy( newString, data, length );
  54         memcpy( newString + length, other.data, other.length );
  55         newString[newLength] = 0;
  56         data = newString;
  57         length = newLength;
  58 }
  59
  60 /* Perform minimization after an operation according
  61  * to the command line args. */
  62 void afterOpMinimize( FsmAp *fsm, bool lastInSeq )
  63 {
  64         /* Switch on the prefered minimization algorithm. */
  65         if ( minimizeOpt == MinimizeEveryOp || ( minimizeOpt == MinimizeMostOps && lastInSeq ) ) {
  66                 /* First clean up the graph. FsmAp operations may leave these
  67                  * lying around. There should be no dead end states. The subtract
  68                  * intersection operators are the only places where they may be
  69                  * created and those operators clean them up. */
  70                 fsm->removeUnreachableStates();
  71
  72                 switch ( minimizeLevel ) {
  73                         case MinimizeApprox:
  74                                 fsm->minimizeApproximate();
  75                                 break;
  76                         case MinimizePartition1:
  77                                 fsm->minimizePartition1();
  78                                 break;
  79                         case MinimizePartition2:
  80                                 fsm->minimizePartition2();
  81                                 break;
  82                         case MinimizeStable:
  83                                 fsm->minimizeStable();
  84                                 break;
  85                 }
  86         }
  87 }
  88
  89 /* Count the transitions in the fsm by walking the state list. */
  90 int countTransitions( FsmAp *fsm )
  91 {
  92         int numTrans = 0;
  93         StateAp *state = fsm->stateList.head;
  94         while ( state != 0 ) {
  95                 numTrans += state->outList.length();
  96                 state = state->next;
  97         }
  98         return numTrans;
  99 }
 100
 101 Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd )
 102 {
 103         /* Reset errno so we can check for overflow or underflow. In the event of
 104          * an error, sets the return val to the upper or lower bound being tested
 105          * against. */
 106         errno = 0;
 107         unsigned int size = keyOps->alphType->size;
 108         bool unusedBits = size < sizeof(unsigned long);
 109
 110         unsigned long ul = strtoul( str, 0, 16 );
 111
 112         if ( errno == ERANGE || ( unusedBits && ul >> (size * 8) ) ) {
 113                 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
 114                 ul = 1 << (size * 8);
 115         }
 116
 117         if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) )
 118                 ul |= ( -1L >> (size*8) ) << (size*8);
 119
 120         return Key( (long)ul );
 121 }
 122
 123 Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd )
 124 {
 125         /* Convert the number to a decimal. First reset errno so we can check
 126          * for overflow or underflow. */
 127         errno = 0;
 128         long long minVal = keyOps->alphType->minVal;
 129         long long maxVal = keyOps->alphType->maxVal;
 130
 131         long long ll = strtoll( str, 0, 10 );
 132
 133         /* Check for underflow. */
 134         if ( ( errno == ERANGE && ll < 0 ) || ll < minVal) {
 135                 error(loc) << "literal " << str << " underflows the alphabet type" << endl;
 136                 ll = minVal;
 137         }
 138         /* Check for overflow. */
 139         else if ( ( errno == ERANGE && ll > 0 ) || ll > maxVal ) {
 140                 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
 141                 ll = maxVal;
 142         }
 143
 144         if ( keyOps->alphType->isSigned )
 145                 return Key( (long)ll );
 146         else
 147                 return Key( (unsigned long)ll );
 148 }
 149
 150 /* Make an fsm key in int format (what the fsm graph uses) from an alphabet
 151  * number returned by the parser. Validates that the number doesn't overflow
 152  * the alphabet type. */
 153 Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd )
 154 {
 155         /* Switch on hex/decimal format. */
 156         if ( str[0] == '0' && str[1] == 'x' )
 157                 return makeFsmKeyHex( str, loc, pd );
 158         else
 159                 return makeFsmKeyDec( str, loc, pd );
 160 }
 161
 162 /* Make an fsm int format (what the fsm graph uses) from a single character.
 163  * Performs proper conversion depending on signed/unsigned property of the
 164  * alphabet. */
 165 Key makeFsmKeyChar( char c, ParseData *pd )
 166 {
 167         if ( keyOps->isSigned ) {
 168                 /* Copy from a char type. */
 169                 return Key( c );
 170         }
 171         else {
 172                 /* Copy from an unsigned byte type. */
 173                 return Key( (unsigned char)c );
 174         }
 175 }
 176
 177 /* Make an fsm key array in int format (what the fsm graph uses) from a string
 178  * of characters. Performs proper conversion depending on signed/unsigned
 179  * property of the alphabet. */
 180 void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd )
 181 {
 182         if ( keyOps->isSigned ) {
 183                 /* Copy from a char star type. */
 184                 char *src = data;
 185                 for ( int i = 0; i < len; i++ )
 186                         result[i] = Key(src[i]);
 187         }
 188         else {
 189                 /* Copy from an unsigned byte ptr type. */
 190                 unsigned char *src = (unsigned char*) data;
 191                 for ( int i = 0; i < len; i++ )
 192                         result[i] = Key(src[i]);
 193         }
 194 }
 195
 196 /* Like makeFsmKeyArray except the result has only unique keys. They ordering
 197  * will be changed. */
 198 void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
 199                 bool caseInsensitive, ParseData *pd )
 200 {
 201         /* Use a transitions list for getting unique keys. */
 202         if ( keyOps->isSigned ) {
 203                 /* Copy from a char star type. */
 204                 char *src = data;
 205                 for ( int si = 0; si < len; si++ ) {
 206                         Key key( src[si] );
 207                         result.insert( key );
 208                         if ( caseInsensitive ) {
 209                                 if ( key.isLower() )
 210                                         result.insert( key.toUpper() );
 211                                 else if ( key.isUpper() )
 212                                         result.insert( key.toLower() );
 213                         }
 214                 }
 215         }
 216         else {
 217                 /* Copy from an unsigned byte ptr type. */
 218                 unsigned char *src = (unsigned char*) data;
 219                 for ( int si = 0; si < len; si++ ) {
 220                         Key key( src[si] );
 221                         result.insert( key );
 222                         if ( caseInsensitive ) {
 223                                 if ( key.isLower() )
 224                                         result.insert( key.toUpper() );
 225                                 else if ( key.isUpper() )
 226                                         result.insert( key.toLower() );
 227                         }
 228                 }
 229         }
 230 }
 231
 232 FsmAp *dotFsm( ParseData *pd )
 233 {
 234         FsmAp *retFsm = new FsmAp();
 235         retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey );
 236         return retFsm;
 237 }
 238
 239 FsmAp *dotStarFsm( ParseData *pd )
 240 {
 241         FsmAp *retFsm = new FsmAp();
 242         retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey );
 243         return retFsm;
 244 }
 245
 246 /* Make a builtin type. Depends on the signed nature of the alphabet type. */
 247 FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd )
 248 {
 249         /* FsmAp created to return. */
 250         FsmAp *retFsm = 0;
 251         bool isSigned = keyOps->isSigned;
 252
 253         switch ( builtin ) {
 254         case BT_Any: {
 255                 /* All characters. */
 256                 retFsm = dotFsm( pd );
 257                 break;
 258         }
 259         case BT_Ascii: {
 260                 /* Ascii characters 0 to 127. */
 261                 retFsm = new FsmAp();
 262                 retFsm->rangeFsm( 0, 127 );
 263                 break;
 264         }
 265         case BT_Extend: {
 266                 /* Ascii extended characters. This is the full byte range. Dependent
 267                  * on signed, vs no signed. If the alphabet is one byte then just use
 268                  * dot fsm. */
 269                 if ( isSigned ) {
 270                         retFsm = new FsmAp();
 271                         retFsm->rangeFsm( -128, 127 );
 272                 }
 273                 else {
 274                         retFsm = new FsmAp();
 275                         retFsm->rangeFsm( 0, 255 );
 276                 }
 277                 break;
 278         }
 279         case BT_Alpha: {
 280                 /* Alpha [A-Za-z]. */
 281                 FsmAp *upper = new FsmAp(), *lower = new FsmAp();
 282                 upper->rangeFsm( 'A', 'Z' );
 283                 lower->rangeFsm( 'a', 'z' );
 284                 upper->unionOp( lower );
 285                 upper->minimizePartition2();
 286                 retFsm = upper;
 287                 break;
 288         }
 289         case BT_Digit: {
 290                 /* Digits [0-9]. */
 291                 retFsm = new FsmAp();
 292                 retFsm->rangeFsm( '0', '9' );
 293                 break;
 294         }
 295         case BT_Alnum: {
 296                 /* Alpha numerics [0-9A-Za-z]. */
 297                 FsmAp *digit = new FsmAp(), *lower = new FsmAp();
 298                 FsmAp *upper = new FsmAp();
 299                 digit->rangeFsm( '0', '9' );
 300                 upper->rangeFsm( 'A', 'Z' );
 301                 lower->rangeFsm( 'a', 'z' );
 302                 digit->unionOp( upper );
 303                 digit->unionOp( lower );
 304                 digit->minimizePartition2();
 305                 retFsm = digit;
 306                 break;
 307         }
 308         case BT_Lower: {
 309                 /* Lower case characters. */
 310                 retFsm = new FsmAp();
 311                 retFsm->rangeFsm( 'a', 'z' );
 312                 break;
 313         }
 314         case BT_Upper: {
 315                 /* Upper case characters. */
 316                 retFsm = new FsmAp();
 317                 retFsm->rangeFsm( 'A', 'Z' );
 318                 break;
 319         }
 320         case BT_Cntrl: {
 321                 /* Control characters. */
 322                 FsmAp *cntrl = new FsmAp();
 323                 FsmAp *highChar = new FsmAp();
 324                 cntrl->rangeFsm( 0, 31 );
 325                 highChar->concatFsm( 127 );
 326                 cntrl->unionOp( highChar );
 327                 cntrl->minimizePartition2();
 328                 retFsm = cntrl;
 329                 break;
 330         }
 331         case BT_Graph: {
 332                 /* Graphical ascii characters [!-~]. */
 333                 retFsm = new FsmAp();
 334                 retFsm->rangeFsm( '!', '~' );
 335                 break;
 336         }
 337         case BT_Print: {
 338                 /* Printable characters. Same as graph except includes space. */
 339                 retFsm = new FsmAp();
 340                 retFsm->rangeFsm( ' ', '~' );
 341                 break;
 342         }
 343         case BT_Punct: {
 344                 /* Punctuation. */
 345                 FsmAp *range1 = new FsmAp();
 346                 FsmAp *range2 = new FsmAp();
 347                 FsmAp *range3 = new FsmAp();
 348                 FsmAp *range4 = new FsmAp();
 349                 range1->rangeFsm( '!', '/' );
 350                 range2->rangeFsm( ':', '@' );
 351                 range3->rangeFsm( '[', '`' );
 352                 range4->rangeFsm( '{', '~' );
 353                 range1->unionOp( range2 );
 354                 range1->unionOp( range3 );
 355                 range1->unionOp( range4 );
 356                 range1->minimizePartition2();
 357                 retFsm = range1;
 358                 break;
 359         }
 360         case BT_Space: {
 361                 /* Whitespace: [\t\v\f\n\r ]. */
 362                 FsmAp *cntrl = new FsmAp();
 363                 FsmAp *space = new FsmAp();
 364                 cntrl->rangeFsm( '\t', '\r' );
 365                 space->concatFsm( ' ' );
 366                 cntrl->unionOp( space );
 367                 cntrl->minimizePartition2();
 368                 retFsm = cntrl;
 369                 break;
 370         }
 371         case BT_Xdigit: {
 372                 /* Hex digits [0-9A-Fa-f]. */
 373                 FsmAp *digit = new FsmAp();
 374                 FsmAp *upper = new FsmAp();
 375                 FsmAp *lower = new FsmAp();
 376                 digit->rangeFsm( '0', '9' );
 377                 upper->rangeFsm( 'A', 'F' );
 378                 lower->rangeFsm( 'a', 'f' );
 379                 digit->unionOp( upper );
 380                 digit->unionOp( lower );
 381                 digit->minimizePartition2();
 382                 retFsm = digit;
 383                 break;
 384         }
 385         case BT_Lambda: {
 386                 retFsm = new FsmAp();
 387                 retFsm->lambdaFsm();
 388                 break;
 389         }
 390         case BT_Empty: {
 391                 retFsm = new FsmAp();
 392                 retFsm->emptyFsm();
 393                 break;
 394         }}
 395
 396         return retFsm;
 397 }
 398
 399 /* Check if this name inst or any name inst below is referenced. */
 400 bool NameInst::anyRefsRec()
 401 {
 402         if ( numRefs > 0 )
 403                 return true;
 404
 405         /* Recurse on children until true. */
 406         for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) {
 407                 if ( (*ch)->anyRefsRec() )
 408                         return true;
 409         }
 410
 411         return false;
 412 }
 413
 414 /*
 415  * ParseData
 416  */
 417
 418 /* Initialize the structure that will collect info during the parse of a
 419  * machine. */
 420 ParseData::ParseData( const char *fileName, char *sectionName,
 421                 const InputLoc &sectionLoc )
 422 :
 423         sectionGraph(0),
 424         generatingSectionSubset(false),
 425         nextPriorKey(0),
 426         /* 0 is reserved for global error actions. */
 427         nextLocalErrKey(1),
 428         nextNameId(0),
 429         nextCondId(0),
 430         alphTypeSet(false),
 431         getKeyExpr(0),
 432         accessExpr(0),
 433         prePushExpr(0),
 434         postPopExpr(0),
 435         pExpr(0),
 436         peExpr(0),
 437         eofExpr(0),
 438         csExpr(0),
 439         topExpr(0),
 440         stackExpr(0),
 441         actExpr(0),
 442         tokstartExpr(0),
 443         tokendExpr(0),
 444         dataExpr(0),
 445         lowerNum(0),
 446         upperNum(0),
 447         fileName(fileName),
 448         sectionName(sectionName),
 449         sectionLoc(sectionLoc),
 450         curActionOrd(0),
 451         curPriorOrd(0),
 452         rootName(0),
 453         exportsRootName(0),
 454         nextEpsilonResolvedLink(0),
 455         nextLongestMatchId(1),
 456         lmRequiresErrorState(false),
 457         cgd(0)
 458 {
 459         /* Initialize the dictionary of graphs. This is our symbol table. The
 460          * initialization needs to be done on construction which happens at the
 461          * beginning of a machine spec so any assignment operators can reference
 462          * the builtins. */
 463         initGraphDict();
 464 }
 465
 466 /* Clean up the data collected during a parse. */
 467 ParseData::~ParseData()
 468 {
 469         /* Delete all the nodes in the action list. Will cause all the
 470          * string data that represents the actions to be deallocated. */
 471         actionList.empty();
 472 }
 473
 474 /* Make a name id in the current name instantiation scope if it is not
 475  * already there. */
 476 NameInst *ParseData::addNameInst( const InputLoc &loc, const char *data, bool isLabel )
 477 {
 478         /* Create the name instantitaion object and insert it. */
 479         NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel );
 480         curNameInst->childVect.append( newNameInst );
 481         if ( data != 0 )
 482                 curNameInst->children.insertMulti( data, newNameInst );
 483         return newNameInst;
 484 }
 485
 486 void ParseData::initNameWalk()
 487 {
 488         curNameInst = rootName;
 489         curNameChild = 0;
 490 }
 491
 492 void ParseData::initExportsNameWalk()
 493 {
 494         curNameInst = exportsRootName;
 495         curNameChild = 0;
 496 }
 497
 498 /* Goes into the next child scope. The number of the child is already set up.
 499  * We need this for the syncronous name tree and parse tree walk to work
 500  * properly. It is reset on entry into a scope and advanced on poping of a
 501  * scope. A call to enterNameScope should be accompanied by a corresponding
 502  * popNameScope. */
 503 NameFrame ParseData::enterNameScope( bool isLocal, int numScopes )
 504 {
 505         /* Save off the current data. */
 506         NameFrame retFrame;
 507         retFrame.prevNameInst = curNameInst;
 508         retFrame.prevNameChild = curNameChild;
 509         retFrame.prevLocalScope = localNameScope;
 510
 511         /* Enter into the new name scope. */
 512         for ( int i = 0; i < numScopes; i++ ) {
 513                 curNameInst = curNameInst->childVect[curNameChild];
 514                 curNameChild = 0;
 515         }
 516
 517         if ( isLocal )
 518                 localNameScope = curNameInst;
 519
 520         return retFrame;
 521 }
 522
 523 /* Return from a child scope to a parent. The parent info must be specified as
 524  * an argument and is obtained from the corresponding call to enterNameScope.
 525  * */
 526 void ParseData::popNameScope( const NameFrame &frame )
 527 {
 528         /* Pop the name scope. */
 529         curNameInst = frame.prevNameInst;
 530         curNameChild = frame.prevNameChild+1;
 531         localNameScope = frame.prevLocalScope;
 532 }
 533
 534 void ParseData::resetNameScope( const NameFrame &frame )
 535 {
 536         /* Pop the name scope. */
 537         curNameInst = frame.prevNameInst;
 538         curNameChild = frame.prevNameChild;
 539         localNameScope = frame.prevLocalScope;
 540 }
 541
 542
 543 void ParseData::unsetObsoleteEntries( FsmAp *graph )
 544 {
 545         /* Loop the reference names and increment the usage. Names that are no
 546          * longer needed will be unset in graph. */
 547         for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) {
 548                 /* Get the name. */
 549                 NameInst *name = *ref;
 550                 name->numUses += 1;
 551
 552                 /* If the name is no longer needed unset its corresponding entry. */
 553                 if ( name->numUses == name->numRefs ) {
 554                         assert( graph->entryPoints.find( name->id ) != 0 );
 555                         graph->unsetEntry( name->id );
 556                         assert( graph->entryPoints.find( name->id ) == 0 );
 557                 }
 558         }
 559 }
 560
 561 NameSet ParseData::resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly )
 562 {
 563         /* Queue needed for breadth-first search, load it with the start node. */
 564         NameInstList nameQueue;
 565         nameQueue.append( refFrom );
 566
 567         NameSet result;
 568         while ( nameQueue.length() > 0 ) {
 569                 /* Pull the next from location off the queue. */
 570                 NameInst *from = nameQueue.detachFirst();
 571
 572                 /* Look for the name. */
 573                 NameMapEl *low, *high;
 574                 if ( from->children.findMulti( data, low, high ) ) {
 575                         /* Record all instances of the name. */
 576                         for ( ; low <= high; low++ )
 577                                 result.insert( low->value );
 578                 }
 579
 580                 /* Name not there, do breadth-first operation of appending all
 581                  * childrent to the processing queue. */
 582                 for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) {
 583                         if ( !recLabelsOnly || (*name)->isLabel )
 584                                 nameQueue.append( *name );
 585                 }
 586         }
 587
 588         /* Queue exhausted and name never found. */
 589         return result;
 590 }
 591
 592 void ParseData::resolveFrom( NameSet &result, NameInst *refFrom,
 593                 const NameRef &nameRef, int namePos )
 594 {
 595         /* Look for the name in the owning scope of the factor with aug. */
 596         NameSet partResult = resolvePart( refFrom, nameRef[namePos], false );
 597
 598         /* If there are more parts to the name then continue on. */
 599         if ( ++namePos < nameRef.length() ) {
 600                 /* There are more components to the name, search using all the part
 601                  * results as the base. */
 602                 for ( NameSet::Iter name = partResult; name.lte(); name++ )
 603                         resolveFrom( result, *name, nameRef, namePos );
 604         }
 605         else {
 606                 /* This is the last component, append the part results to the final
 607                  * results. */
 608                 result.insert( partResult );
 609         }
 610 }
 611
 612 /* Write out a name reference. */
 613 ostream &operator<<( ostream &out, const NameRef &nameRef )
 614 {
 615         int pos = 0;
 616         if ( nameRef[pos] == 0 ) {
 617                 out << "::";
 618                 pos += 1;
 619         }
 620         out << nameRef[pos++];
 621         for ( ; pos < nameRef.length(); pos++ )
 622                 out << "::" << nameRef[pos];
 623         return out;
 624 }
 625
 626 ostream &operator<<( ostream &out, const NameInst &nameInst )
 627 {
 628         /* Count the number fully qualified name parts. */
 629         int numParents = 0;
 630         NameInst *curParent = nameInst.parent;
 631         while ( curParent != 0 ) {
 632                 numParents += 1;
 633                 curParent = curParent->parent;
 634         }
 635
 636         /* Make an array and fill it in. */
 637         curParent = nameInst.parent;
 638         NameInst **parents = new NameInst*[numParents];
 639         for ( int p = numParents-1; p >= 0; p-- ) {
 640                 parents[p] = curParent;
 641                 curParent = curParent->parent;
 642         }
 643
 644         /* Write the parents out, skip the root. */
 645         for ( int p = 1; p < numParents; p++ )
 646                 out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" );
 647
 648         /* Write the name and cleanup. */
 649         out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" );
 650         delete[] parents;
 651         return out;
 652 }
 653
 654 struct CmpNameInstLoc
 655 {
 656         static int compare( const NameInst *ni1, const NameInst *ni2 )
 657         {
 658                 if ( ni1->loc.line < ni2->loc.line )
 659                         return -1;
 660                 else if ( ni1->loc.line > ni2->loc.line )
 661                         return 1;
 662                 else if ( ni1->loc.col < ni2->loc.col )
 663                         return -1;
 664                 else if ( ni1->loc.col > ni2->loc.col )
 665                         return 1;
 666                 return 0;
 667         }
 668 };
 669
 670 void errorStateLabels( const NameSet &resolved )
 671 {
 672         MergeSort<NameInst*, CmpNameInstLoc> mergeSort;
 673         mergeSort.sort( resolved.data, resolved.length() );
 674         for ( NameSet::Iter res = resolved; res.lte(); res++ )
 675                 error((*res)->loc) << "  -> " << **res << endl;
 676 }
 677
 678
 679 NameInst *ParseData::resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action )
 680 {
 681         NameInst *nameInst = 0;
 682
 683         /* Do the local search if the name is not strictly a root level name
 684          * search. */
 685         if ( nameRef[0] != 0 ) {
 686                 /* If the action is referenced, resolve all of them. */
 687                 if ( action != 0 && action->actionRefs.length() > 0 ) {
 688                         /* Look for the name in all referencing scopes. */
 689                         NameSet resolved;
 690                         for ( ActionRefs::Iter actRef = action->actionRefs; actRef.lte(); actRef++ )
 691                                 resolveFrom( resolved, *actRef, nameRef, 0 );
 692
 693                         if ( resolved.length() > 0 ) {
 694                                 /* Take the first one. */
 695                                 nameInst = resolved[0];
 696                                 if ( resolved.length() > 1 ) {
 697                                         /* Complain about the multiple references. */
 698                                         error(loc) << "state reference " << nameRef <<
 699                                                         " resolves to multiple entry points" << endl;
 700                                         errorStateLabels( resolved );
 701                                 }
 702                         }
 703                 }
 704         }
 705
 706         /* If not found in the local scope, look in global. */
 707         if ( nameInst == 0 ) {
 708                 NameSet resolved;
 709                 int fromPos = nameRef[0] != 0 ? 0 : 1;
 710                 resolveFrom( resolved, rootName, nameRef, fromPos );
 711
 712                 if ( resolved.length() > 0 ) {
 713                         /* Take the first. */
 714                         nameInst = resolved[0];
 715                         if ( resolved.length() > 1 ) {
 716                                 /* Complain about the multiple references. */
 717                                 error(loc) << "state reference " << nameRef <<
 718                                                 " resolves to multiple entry points" << endl;
 719                                 errorStateLabels( resolved );
 720                         }
 721                 }
 722         }
 723
 724         if ( nameInst == 0 ) {
 725                 /* If not found then complain. */
 726                 error(loc) << "could not resolve state reference " << nameRef << endl;
 727         }
 728         return nameInst;
 729 }
 730
 731 void ParseData::resolveNameRefs( InlineList *inlineList, Action *action )
 732 {
 733         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
 734                 switch ( item->type ) {
 735                         case InlineItem::Entry: case InlineItem::Goto:
 736                         case InlineItem::Call: case InlineItem::Next: {
 737                                 /* Resolve, pass action for local search. */
 738                                 NameInst *target = resolveStateRef( *item->nameRef, item->loc, action );
 739
 740                                 /* Name lookup error reporting is handled by resolveStateRef. */
 741                                 if ( target != 0 ) {
 742                                         /* Check if the target goes into a longest match. */
 743                                         NameInst *search = target->parent;
 744                                         while ( search != 0 ) {
 745                                                 if ( search->isLongestMatch ) {
 746                                                         error(item->loc) << "cannot enter inside a longest "
 747                                                                         "match construction as an entry point" << endl;
 748                                                         break;
 749                                                 }
 750                                                 search = search->parent;
 751                                         }
 752
 753                                         /* Record the reference in the name. This will cause the
 754                                          * entry point to survive to the end of the graph
 755                                          * generating walk. */
 756                                         target->numRefs += 1;
 757                                 }
 758
 759                                 item->nameTarg = target;
 760                                 break;
 761                         }
 762                         default:
 763                                 break;
 764                 }
 765
 766                 /* Some of the item types may have children. */
 767                 if ( item->children != 0 )
 768                         resolveNameRefs( item->children, action );
 769         }
 770 }
 771
 772 /* Resolve references to labels in actions. */
 773 void ParseData::resolveActionNameRefs()
 774 {
 775         for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
 776                 /* Only care about the actions that are referenced. */
 777                 if ( act->actionRefs.length() > 0 )
 778                         resolveNameRefs( act->inlineList, act );
 779         }
 780 }
 781
 782 /* Walk a name tree starting at from and fill the name index. */
 783 void ParseData::fillNameIndex( NameInst *from )
 784 {
 785         /* Fill the value for from in the name index. */
 786         nameIndex[from->id] = from;
 787
 788         /* Recurse on the implicit final state and then all children. */
 789         if ( from->final != 0 )
 790                 fillNameIndex( from->final );
 791         for ( NameVect::Iter name = from->childVect; name.lte(); name++ )
 792                 fillNameIndex( *name );
 793 }
 794
 795 void ParseData::makeRootNames()
 796 {
 797         /* Create the root name. */
 798         rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
 799         exportsRootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
 800 }
 801
 802 /* Build the name tree and supporting data structures. */
 803 void ParseData::makeNameTree( GraphDictEl *dictEl )
 804 {
 805         /* Set up curNameInst for the walk. */
 806         initNameWalk();
 807
 808         if ( dictEl != 0 ) {
 809                 /* A start location has been specified. */
 810                 dictEl->value->makeNameTree( dictEl->loc, this );
 811         }
 812         else {
 813                 /* First make the name tree. */
 814                 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
 815                         /* Recurse on the instance. */
 816                         glel->value->makeNameTree( glel->loc, this );
 817                 }
 818         }
 819
 820         /* The number of nodes in the tree can now be given by nextNameId */
 821         nameIndex = new NameInst*[nextNameId];
 822         memset( nameIndex, 0, sizeof(NameInst*)*nextNameId );
 823         fillNameIndex( rootName );
 824         fillNameIndex( exportsRootName );
 825 }
 826
 827
 828 void ParseData::createBuiltin( const char *name, BuiltinMachine builtin )
 829 {
 830         Expression *expression = new Expression( builtin );
 831         Join *join = new Join( expression );
 832         MachineDef *machineDef = new MachineDef( join );
 833         VarDef *varDef = new VarDef( name, machineDef );
 834         GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
 835         graphDict.insert( graphDictEl );
 836 }
 837
 838 /* Initialize the graph dict with builtin types. */
 839 void ParseData::initGraphDict( )
 840 {
 841         createBuiltin( "any", BT_Any );
 842         createBuiltin( "ascii", BT_Ascii );
 843         createBuiltin( "extend", BT_Extend );
 844         createBuiltin( "alpha", BT_Alpha );
 845         createBuiltin( "digit", BT_Digit );
 846         createBuiltin( "alnum", BT_Alnum );
 847         createBuiltin( "lower", BT_Lower );
 848         createBuiltin( "upper", BT_Upper );
 849         createBuiltin( "cntrl", BT_Cntrl );
 850         createBuiltin( "graph", BT_Graph );
 851         createBuiltin( "print", BT_Print );
 852         createBuiltin( "punct", BT_Punct );
 853         createBuiltin( "space", BT_Space );
 854         createBuiltin( "xdigit", BT_Xdigit );
 855         createBuiltin( "null", BT_Lambda );
 856         createBuiltin( "zlen", BT_Lambda );
 857         createBuiltin( "empty", BT_Empty );
 858 }
 859
 860 /* Set the alphabet type. If the types are not valid returns false. */
 861 bool ParseData::setAlphType( const InputLoc &loc, char *s1, char *s2 )
 862 {
 863         alphTypeLoc = loc;
 864         userAlphType = findAlphType( s1, s2 );
 865         alphTypeSet = true;
 866         return userAlphType != 0;
 867 }
 868
 869 /* Set the alphabet type. If the types are not valid returns false. */
 870 bool ParseData::setAlphType( const InputLoc &loc, char *s1 )
 871 {
 872         alphTypeLoc = loc;
 873         userAlphType = findAlphType( s1 );
 874         alphTypeSet = true;
 875         return userAlphType != 0;
 876 }
 877
 878 bool ParseData::setVariable( char *var, InlineList *inlineList )
 879 {
 880         bool set = true;
 881
 882         if ( strcmp( var, "p" ) == 0 )
 883                 pExpr = inlineList;
 884         else if ( strcmp( var, "pe" ) == 0 )
 885                 peExpr = inlineList;
 886         else if ( strcmp( var, "eof" ) == 0 )
 887                 eofExpr = inlineList;
 888         else if ( strcmp( var, "cs" ) == 0 )
 889                 csExpr = inlineList;
 890         else if ( strcmp( var, "data" ) == 0 )
 891                 dataExpr = inlineList;
 892         else if ( strcmp( var, "top" ) == 0 )
 893                 topExpr = inlineList;
 894         else if ( strcmp( var, "stack" ) == 0 )
 895                 stackExpr = inlineList;
 896         else if ( strcmp( var, "act" ) == 0 )
 897                 actExpr = inlineList;
 898         else if ( strcmp( var, "ts" ) == 0 )
 899                 tokstartExpr = inlineList;
 900         else if ( strcmp( var, "te" ) == 0 )
 901                 tokendExpr = inlineList;
 902         else
 903                 set = false;
 904
 905         return set;
 906 }
 907
 908 /* Initialize the key operators object that will be referenced by all fsms
 909  * created. */
 910 void ParseData::initKeyOps( )
 911 {
 912         /* Signedness and bounds. */
 913         HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType;
 914         thisKeyOps.setAlphType( alphType );
 915
 916         if ( lowerNum != 0 ) {
 917                 /* If ranges are given then interpret the alphabet type. */
 918                 thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this );
 919                 thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
 920         }
 921
 922         thisCondData.lastCondKey = thisKeyOps.maxKey;
 923 }
 924
 925 void ParseData::printNameInst( NameInst *nameInst, int level )
 926 {
 927         for ( int i = 0; i < level; i++ )
 928                 cerr << "  ";
 929         cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") <<
 930                         "  id: " << nameInst->id <<
 931                         "  refs: " << nameInst->numRefs <<
 932                         "  uses: " << nameInst->numUses << endl;
 933         for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ )
 934                 printNameInst( *name, level+1 );
 935 }
 936
 937 /* Remove duplicates of unique actions from an action table. */
 938 void ParseData::removeDups( ActionTable &table )
 939 {
 940         /* Scan through the table looking for unique actions to
 941          * remove duplicates of. */
 942         for ( int i = 0; i < table.length(); i++ ) {
 943                 /* Remove any duplicates ahead of i. */
 944                 for ( int r = i+1; r < table.length(); ) {
 945                         if ( table[r].value == table[i].value )
 946                                 table.vremove(r);
 947                         else
 948                                 r += 1;
 949                 }
 950         }
 951 }
 952
 953 /* Remove duplicates from action lists. This operates only on transition and
 954  * eof action lists and so should be called once all actions have been
 955  * transfered to their final resting place. */
 956 void ParseData::removeActionDups( FsmAp *graph )
 957 {
 958         /* Loop all states. */
 959         for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
 960                 /* Loop all transitions. */
 961                 for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
 962                         removeDups( trans->actionTable );
 963                 removeDups( state->toStateActionTable );
 964                 removeDups( state->fromStateActionTable );
 965                 removeDups( state->eofActionTable );
 966         }
 967 }
 968
 969 Action *ParseData::newAction( const char *name, InlineList *inlineList )
 970 {
 971         InputLoc loc;
 972         loc.line = 1;
 973         loc.col = 1;
 974         loc.fileName = "NONE";
 975
 976         Action *action = new Action( loc, name, inlineList, nextCondId++ );
 977         action->actionRefs.append( rootName );
 978         actionList.append( action );
 979         return action;
 980 }
 981
 982 void ParseData::initLongestMatchData()
 983 {
 984         if ( lmList.length() > 0 ) {
 985                 /* The initTokStart action resets the token start. */
 986                 InlineList *il1 = new InlineList;
 987                 il1->append( new InlineItem( InputLoc(), InlineItem::LmInitTokStart ) );
 988                 initTokStart = newAction( "initts", il1 );
 989                 initTokStart->isLmAction = true;
 990
 991                 /* The initActId action gives act a default value. */
 992                 InlineList *il4 = new InlineList;
 993                 il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) );
 994                 initActId = newAction( "initact", il4 );
 995                 initActId->isLmAction = true;
 996
 997                 /* The setTokStart action sets tokstart. */
 998                 InlineList *il5 = new InlineList;
 999                 il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) );
1000                 setTokStart = newAction( "ts", il5 );
1001                 setTokStart->isLmAction = true;
1002
1003                 /* The setTokEnd action sets tokend. */
1004                 InlineList *il3 = new InlineList;
1005                 il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) );
1006                 setTokEnd = newAction( "te", il3 );
1007                 setTokEnd->isLmAction = true;
1008
1009                 /* The action will also need an ordering: ahead of all user action
1010                  * embeddings. */
1011                 initTokStartOrd = curActionOrd++;
1012                 initActIdOrd = curActionOrd++;
1013                 setTokStartOrd = curActionOrd++;
1014                 setTokEndOrd = curActionOrd++;
1015         }
1016 }
1017
1018 /* After building the graph, do some extra processing to ensure the runtime
1019  * data of the longest mactch operators is consistent. */
1020 void ParseData::setLongestMatchData( FsmAp *graph )
1021 {
1022         if ( lmList.length() > 0 ) {
1023                 /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry)
1024                  * init the tokstart. */
1025                 for ( EntryMap::Iter en = graph->entryPoints; en.lte(); en++ ) {
1026                         /* This is run after duplicates are removed, we must guard against
1027                          * inserting a duplicate. */
1028                         ActionTable &actionTable = en->value->toStateActionTable;
1029                         if ( ! actionTable.hasAction( initTokStart ) )
1030                                 actionTable.setAction( initTokStartOrd, initTokStart );
1031                 }
1032
1033                 /* Find the set of states that are the target of transitions with
1034                  * actions that have calls. These states will be targeted by fret
1035                  * statements. */
1036                 StateSet states;
1037                 for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
1038                         for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
1039                                 for ( ActionTable::Iter ati = trans->actionTable; ati.lte(); ati++ ) {
1040                                         if ( ati->value->anyCall && trans->toState != 0 )
1041                                                 states.insert( trans->toState );
1042                                 }
1043                         }
1044                 }
1045
1046
1047                 /* Init tokstart upon entering the above collected states. */
1048                 for ( StateSet::Iter ps = states; ps.lte(); ps++ ) {
1049                         /* This is run after duplicates are removed, we must guard against
1050                          * inserting a duplicate. */
1051                         ActionTable &actionTable = (*ps)->toStateActionTable;
1052                         if ( ! actionTable.hasAction( initTokStart ) )
1053                                 actionTable.setAction( initTokStartOrd, initTokStart );
1054                 }
1055         }
1056 }
1057
1058 /* Make the graph from a graph dict node. Does minimization and state sorting. */
1059 FsmAp *ParseData::makeInstance( GraphDictEl *gdNode )
1060 {
1061         /* Build the graph from a walk of the parse tree. */
1062         FsmAp *graph = gdNode->value->walk( this );
1063
1064         /* Resolve any labels that point to multiple states. Any labels that are
1065          * still around are referenced only by gotos and calls and they need to be
1066          * made into deterministic entry points. */
1067         graph->deterministicEntry();
1068
1069         /*
1070          * All state construction is now complete.
1071          */
1072
1073         /* Transfer actions from the out action tables to eof action tables. */
1074         for ( StateSet::Iter state = graph->finStateSet; state.lte(); state++ )
1075                 graph->transferOutActions( *state );
1076
1077         /* Transfer global error actions. */
1078         for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
1079                 graph->transferErrorActions( state, 0 );
1080
1081         if ( ::wantDupsRemoved )
1082                 removeActionDups( graph );
1083
1084         /* Remove unreachable states. There should be no dead end states. The
1085          * subtract and intersection operators are the only places where they may
1086          * be created and those operators clean them up. */
1087         graph->removeUnreachableStates();
1088
1089         /* No more fsm operations are to be done. Action ordering numbers are
1090          * no longer of use and will just hinder minimization. Clear them. */
1091         graph->nullActionKeys();
1092
1093         /* Transition priorities are no longer of use. We can clear them
1094          * because they will just hinder minimization as well. Clear them. */
1095         graph->clearAllPriorities();
1096
1097         if ( minimizeOpt != MinimizeNone ) {
1098                 /* Minimize here even if we minimized at every op. Now that function
1099                  * keys have been cleared we may get a more minimal fsm. */
1100                 switch ( minimizeLevel ) {
1101                         case MinimizeApprox:
1102                                 graph->minimizeApproximate();
1103                                 break;
1104                         case MinimizeStable:
1105                                 graph->minimizeStable();
1106                                 break;
1107                         case MinimizePartition1:
1108                                 graph->minimizePartition1();
1109                                 break;
1110                         case MinimizePartition2:
1111                                 graph->minimizePartition2();
1112                                 break;
1113                 }
1114         }
1115
1116         graph->compressTransitions();
1117
1118         return graph;
1119 }
1120
1121 void ParseData::printNameTree()
1122 {
1123         /* Print the name instance map. */
1124         for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ )
1125                 printNameInst( *name, 0 );
1126
1127         cerr << "name index:" << endl;
1128         /* Show that the name index is correct. */
1129         for ( int ni = 0; ni < nextNameId; ni++ ) {
1130                 cerr << ni << ": ";
1131                 const char *name = nameIndex[ni]->name;
1132                 cerr << ( name != 0 ? name : "<ANON>" ) << endl;
1133         }
1134 }
1135
1136 FsmAp *ParseData::makeSpecific( GraphDictEl *gdNode )
1137 {
1138         /* Build the name tree and supporting data structures. */
1139         makeNameTree( gdNode );
1140
1141         /* Resove name references from gdNode. */
1142         initNameWalk();
1143         gdNode->value->resolveNameRefs( this );
1144
1145         /* Do not resolve action references. Since we are not building the entire
1146          * graph there's a good chance that many name references will fail. This
1147          * is okay since generating part of the graph is usually only done when
1148          * inspecting the compiled machine. */
1149
1150         /* Same story for extern entry point references. */
1151
1152         /* Flag this case so that the XML code generator is aware that we haven't
1153          * looked up name references in actions. It can then avoid segfaulting. */
1154         generatingSectionSubset = true;
1155
1156         /* Just building the specified graph. */
1157         initNameWalk();
1158         FsmAp *mainGraph = makeInstance( gdNode );
1159
1160         return mainGraph;
1161 }
1162
1163 FsmAp *ParseData::makeAll()
1164 {
1165         /* Build the name tree and supporting data structures. */
1166         makeNameTree( 0 );
1167
1168         /* Resove name references in the tree. */
1169         initNameWalk();
1170         for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ )
1171                 glel->value->resolveNameRefs( this );
1172
1173         /* Resolve action code name references. */
1174         resolveActionNameRefs();
1175
1176         /* Force name references to the top level instantiations. */
1177         for ( NameVect::Iter inst = rootName->childVect; inst.lte(); inst++ )
1178                 (*inst)->numRefs += 1;
1179
1180         FsmAp *mainGraph = 0;
1181         FsmAp **graphs = new FsmAp*[instanceList.length()];
1182         int numOthers = 0;
1183
1184         /* Make all the instantiations, we know that main exists in this list. */
1185         initNameWalk();
1186         for ( GraphList::Iter glel = instanceList; glel.lte();  glel++ ) {
1187                 if ( strcmp( glel->key, mainMachine ) == 0 ) {
1188                         /* Main graph is always instantiated. */
1189                         mainGraph = makeInstance( glel );
1190                 }
1191                 else {
1192                         /* Instantiate and store in others array. */
1193                         graphs[numOthers++] = makeInstance( glel );
1194                 }
1195         }
1196
1197         if ( mainGraph == 0 )
1198                 mainGraph = graphs[--numOthers];
1199
1200         if ( numOthers > 0 ) {
1201                 /* Add all the other graphs into main. */
1202                 mainGraph->globOp( graphs, numOthers );
1203         }
1204
1205         delete[] graphs;
1206         return mainGraph;
1207 }
1208
1209 void ParseData::analyzeAction( Action *action, InlineList *inlineList )
1210 {
1211         /* FIXME: Actions used as conditions should be very constrained. */
1212         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1213                 if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
1214                         action->anyCall = true;
1215
1216                 /* Need to recurse into longest match items. */
1217                 if ( item->type == InlineItem::LmSwitch ) {
1218                         LongestMatch *lm = item->longestMatch;
1219                         for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) {
1220                                 if ( lmi->action != 0 )
1221                                         analyzeAction( action, lmi->action->inlineList );
1222                         }
1223                 }
1224
1225                 if ( item->type == InlineItem::LmOnLast ||
1226                                 item->type == InlineItem::LmOnNext ||
1227                                 item->type == InlineItem::LmOnLagBehind )
1228                 {
1229                         LongestMatchPart *lmi = item->longestMatchPart;
1230                         if ( lmi->action != 0 )
1231                                 analyzeAction( action, lmi->action->inlineList );
1232                 }
1233
1234                 if ( item->children != 0 )
1235                         analyzeAction( action, item->children );
1236         }
1237 }
1238
1239
1240 /* Check actions for bad uses of fsm directives. We don't go inside longest
1241  * match items in actions created by ragel, since we just want the user
1242  * actions. */
1243 void ParseData::checkInlineList( Action *act, InlineList *inlineList )
1244 {
1245         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1246                 /* EOF checks. */
1247                 if ( act->numEofRefs > 0 ) {
1248                         switch ( item->type ) {
1249                                 /* Currently no checks. */
1250                                 default:
1251                                         break;
1252                         }
1253                 }
1254
1255                 /* Recurse. */
1256                 if ( item->children != 0 )
1257                         checkInlineList( act, item->children );
1258         }
1259 }
1260
1261 void ParseData::checkAction( Action *action )
1262 {
1263         /* Check for actions with calls that are embedded within a longest match
1264          * machine. */
1265         if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) {
1266                 for ( ActionRefs::Iter ar = action->actionRefs; ar.lte(); ar++ ) {
1267                         NameInst *check = *ar;
1268                         while ( check != 0 ) {
1269                                 if ( check->isLongestMatch ) {
1270                                         error(action->loc) << "within a scanner, fcall is permitted"
1271                                                 " only in pattern actions" << endl;
1272                                         break;
1273                                 }
1274                                 check = check->parent;
1275                         }
1276                 }
1277         }
1278
1279         checkInlineList( action, action->inlineList );
1280 }
1281
1282
1283 void ParseData::analyzeGraph( FsmAp *graph )
1284 {
1285         for ( ActionList::Iter act = actionList; act.lte(); act++ )
1286                 analyzeAction( act, act->inlineList );
1287
1288         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
1289                 /* The transition list. */
1290                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
1291                         for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ )
1292                                 at->value->numTransRefs += 1;
1293                 }
1294
1295                 for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ )
1296                         at->value->numToStateRefs += 1;
1297
1298                 for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ )
1299                         at->value->numFromStateRefs += 1;
1300
1301                 for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ )
1302                         at->value->numEofRefs += 1;
1303
1304                 for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
1305                         for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ )
1306                                 (*sci)->numCondRefs += 1;
1307                 }
1308         }
1309
1310         /* Checks for bad usage of directives in action code. */
1311         for ( ActionList::Iter act = actionList; act.lte(); act++ )
1312                 checkAction( act );
1313 }
1314
1315 void ParseData::makeExportsNameTree()
1316 {
1317         /* Make a name tree for the exports. */
1318         initExportsNameWalk();
1319
1320         /* First make the name tree. */
1321         for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1322                 if ( gdel->value->isExport ) {
1323                         /* Recurse on the instance. */
1324                         gdel->value->makeNameTree( gdel->loc, this );
1325                 }
1326         }
1327 }
1328
1329 void ParseData::makeExports()
1330 {
1331         makeExportsNameTree();
1332
1333         /* Resove name references in the tree. */
1334         initExportsNameWalk();
1335         for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1336                 if ( gdel->value->isExport )
1337                         gdel->value->resolveNameRefs( this );
1338         }
1339
1340         /* Make all the instantiations, we know that main exists in this list. */
1341         initExportsNameWalk();
1342         for ( GraphDict::Iter gdel = graphDict; gdel.lte();  gdel++ ) {
1343                 /* Check if this var def is an export. */
1344                 if ( gdel->value->isExport ) {
1345                         /* Build the graph from a walk of the parse tree. */
1346                         FsmAp *graph = gdel->value->walk( this );
1347
1348                         /* Build the graph from a walk of the parse tree. */
1349                         if ( !graph->checkSingleCharMachine() ) {
1350                                 error(gdel->loc) << "bad export machine, must define "
1351                                                 "a single character" << endl;
1352                         }
1353                         else {
1354                                 /* Safe to extract the key and declare the export. */
1355                                 Key exportKey = graph->startState->outList.head->lowKey;
1356                                 exportList.append( new Export( gdel->value->name, exportKey ) );
1357                         }
1358                 }
1359         }
1360
1361 }
1362
1363 /* Construct the machine and catch failures which can occur during
1364  * construction. */
1365 void ParseData::prepareMachineGen( GraphDictEl *graphDictEl )
1366 {
1367         try {
1368                 /* This machine construction can fail. */
1369                 prepareMachineGenTBWrapped( graphDictEl );
1370         }
1371         catch ( FsmConstructFail fail ) {
1372                 switch ( fail.reason ) {
1373                         case FsmConstructFail::CondNoKeySpace: {
1374                                 InputLoc &loc = alphTypeSet ? alphTypeLoc : sectionLoc;
1375                                 error(loc) << "sorry, no more characters are "
1376                                                 "available in the alphabet space" << endl;
1377                                 error(loc) << "  for conditions, please use a "
1378                                                 "smaller alphtype or reduce" << endl;
1379                                 error(loc) << "  the span of characters on which "
1380                                                 "conditions are embedded" << endl;
1381                                 break;
1382                         }
1383                 }
1384         }
1385 }
1386
1387 void ParseData::prepareMachineGenTBWrapped( GraphDictEl *graphDictEl )
1388 {
1389         beginProcessing();
1390         initKeyOps();
1391         makeRootNames();
1392         initLongestMatchData();
1393
1394         /* Make the graph, do minimization. */
1395         if ( graphDictEl == 0 )
1396                 sectionGraph = makeAll();
1397         else
1398                 sectionGraph = makeSpecific( graphDictEl );
1399
1400         /* Compute exports from the export definitions. */
1401         makeExports();
1402
1403         /* If any errors have occured in the input file then don't write anything. */
1404         if ( gblErrorCount > 0 )
1405                 return;
1406
1407         analyzeGraph( sectionGraph );
1408
1409         /* Depends on the graph analysis. */
1410         setLongestMatchData( sectionGraph );
1411
1412         /* Decide if an error state is necessary.
1413          *  1. There is an error transition
1414          *  2. There is a gap in the transitions
1415          *  3. The longest match operator requires it. */
1416         if ( lmRequiresErrorState || sectionGraph->hasErrorTrans() )
1417                 sectionGraph->errState = sectionGraph->addState();
1418
1419         /* State numbers need to be assigned such that all final states have a
1420          * larger state id number than all non-final states. This enables the
1421          * first_final mechanism to function correctly. We also want states to be
1422          * ordered in a predictable fashion. So we first apply a depth-first
1423          * search, then do a stable sort by final state status, then assign
1424          * numbers. */
1425
1426         sectionGraph->depthFirstOrdering();
1427         sectionGraph->sortStatesByFinal();
1428         sectionGraph->setStateNumbers( 0 );
1429 }
1430
1431 void ParseData::generateReduced( InputData &inputData )
1432 {
1433         beginProcessing();
1434
1435         cgd = makeCodeGen( inputData.inputFileName, sectionName, *inputData.outStream );
1436
1437         /* Make the generator. */
1438         BackendGen backendGen( sectionName, this, sectionGraph, cgd );
1439
1440         /* Write out with it. */
1441         backendGen.makeBackend();
1442
1443         if ( printStatistics ) {
1444                 cerr << "fsm name  : " << sectionName << endl;
1445                 cerr << "num states: " << sectionGraph->stateList.length() << endl;
1446                 cerr << endl;
1447         }
1448 }
1449
1450 void ParseData::generateXML( ostream &out )
1451 {
1452         beginProcessing();
1453
1454         /* Make the generator. */
1455         XMLCodeGen codeGen( sectionName, this, sectionGraph, out );
1456
1457         /* Write out with it. */
1458         codeGen.writeXML();
1459
1460         if ( printStatistics ) {
1461                 cerr << "fsm name  : " << sectionName << endl;
1462                 cerr << "num states: " << sectionGraph->stateList.length() << endl;
1463                 cerr << endl;
1464         }
1465 }
1466