ragel/parsedata.cpp

   1 /*
   2  *  Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
   3  */
   4
   5 /*  This file is part of Ragel.
   6  *
   7  *  Ragel is free software; you can redistribute it and/or modify
   8  *  it under the terms of the GNU General Public License as published by
   9  *  the Free Software Foundation; either version 2 of the License, or
  10  *  (at your option) any later version.
  11  *
  12  *  Ragel is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  *  GNU General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU General Public License
  18  *  along with Ragel; if not, write to the Free Software
  19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  */
  21
  22 #include <iostream>
  23 #include <iomanip>
  24 #include <errno.h>
  25 #include <stdlib.h>
  26 #include <limits.h>
  27
  28 #include "ragel.h"
  29 #include "rlparse.h"
  30 #include "parsedata.h"
  31 #include "parsetree.h"
  32 #include "mergesort.h"
  33 #include "xmlcodegen.h"
  34 #include "version.h"
  35
  36 using namespace std;
  37
  38 char mainMachine[] = "main";
  39
  40 void Token::set( const char *str, int len )
  41 {
  42         length = len;
  43         data = new char[len+1];
  44         memcpy( data, str, len );
  45         data[len] = 0;
  46 }
  47
  48 void Token::append( const Token &other )
  49 {
  50         int newLength = length + other.length;
  51         char *newString = new char[newLength+1];
  52         memcpy( newString, data, length );
  53         memcpy( newString + length, other.data, other.length );
  54         newString[newLength] = 0;
  55         data = newString;
  56         length = newLength;
  57 }
  58
  59 /* Perform minimization after an operation according
  60  * to the command line args. */
  61 void afterOpMinimize( FsmAp *fsm, bool lastInSeq )
  62 {
  63         /* Switch on the prefered minimization algorithm. */
  64         if ( minimizeOpt == MinimizeEveryOp || ( minimizeOpt == MinimizeMostOps && lastInSeq ) ) {
  65                 /* First clean up the graph. FsmAp operations may leave these
  66                  * lying around. There should be no dead end states. The subtract
  67                  * intersection operators are the only places where they may be
  68                  * created and those operators clean them up. */
  69                 fsm->removeUnreachableStates();
  70
  71                 switch ( minimizeLevel ) {
  72                         case MinimizeApprox:
  73                                 fsm->minimizeApproximate();
  74                                 break;
  75                         case MinimizePartition1:
  76                                 fsm->minimizePartition1();
  77                                 break;
  78                         case MinimizePartition2:
  79                                 fsm->minimizePartition2();
  80                                 break;
  81                         case MinimizeStable:
  82                                 fsm->minimizeStable();
  83                                 break;
  84                 }
  85         }
  86 }
  87
  88 /* Count the transitions in the fsm by walking the state list. */
  89 int countTransitions( FsmAp *fsm )
  90 {
  91         int numTrans = 0;
  92         StateAp *state = fsm->stateList.head;
  93         while ( state != 0 ) {
  94                 numTrans += state->outList.length();
  95                 state = state->next;
  96         }
  97         return numTrans;
  98 }
  99
 100 Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd )
 101 {
 102         /* Reset errno so we can check for overflow or underflow. In the event of
 103          * an error, sets the return val to the upper or lower bound being tested
 104          * against. */
 105         errno = 0;
 106         unsigned int size = keyOps->alphType->size;
 107         bool unusedBits = size < sizeof(unsigned long);
 108
 109         unsigned long ul = strtoul( str, 0, 16 );
 110
 111         if ( errno == ERANGE || ( unusedBits && ul >> (size * 8) ) ) {
 112                 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
 113                 ul = 1 << (size * 8);
 114         }
 115
 116         if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) )
 117                 ul |= (0xffffffff >> (size*8 ) ) << (size*8);
 118
 119         return Key( (long)ul );
 120 }
 121
 122 Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd )
 123 {
 124         /* Convert the number to a decimal. First reset errno so we can check
 125          * for overflow or underflow. */
 126         errno = 0;
 127         long long minVal = keyOps->alphType->minVal;
 128         long long maxVal = keyOps->alphType->maxVal;
 129
 130         long long ll = strtoll( str, 0, 10 );
 131
 132         /* Check for underflow. */
 133         if ( ( errno == ERANGE && ll < 0 ) || ll < minVal) {
 134                 error(loc) << "literal " << str << " underflows the alphabet type" << endl;
 135                 ll = minVal;
 136         }
 137         /* Check for overflow. */
 138         else if ( ( errno == ERANGE && ll > 0 ) || ll > maxVal ) {
 139                 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
 140                 ll = maxVal;
 141         }
 142
 143         if ( keyOps->alphType->isSigned )
 144                 return Key( (long)ll );
 145         else
 146                 return Key( (unsigned long)ll );
 147 }
 148
 149 /* Make an fsm key in int format (what the fsm graph uses) from an alphabet
 150  * number returned by the parser. Validates that the number doesn't overflow
 151  * the alphabet type. */
 152 Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd )
 153 {
 154         /* Switch on hex/decimal format. */
 155         if ( str[0] == '0' && str[1] == 'x' )
 156                 return makeFsmKeyHex( str, loc, pd );
 157         else
 158                 return makeFsmKeyDec( str, loc, pd );
 159 }
 160
 161 /* Make an fsm int format (what the fsm graph uses) from a single character.
 162  * Performs proper conversion depending on signed/unsigned property of the
 163  * alphabet. */
 164 Key makeFsmKeyChar( char c, ParseData *pd )
 165 {
 166         if ( keyOps->isSigned ) {
 167                 /* Copy from a char type. */
 168                 return Key( c );
 169         }
 170         else {
 171                 /* Copy from an unsigned byte type. */
 172                 return Key( (unsigned char)c );
 173         }
 174 }
 175
 176 /* Make an fsm key array in int format (what the fsm graph uses) from a string
 177  * of characters. Performs proper conversion depending on signed/unsigned
 178  * property of the alphabet. */
 179 void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd )
 180 {
 181         if ( keyOps->isSigned ) {
 182                 /* Copy from a char star type. */
 183                 char *src = data;
 184                 for ( int i = 0; i < len; i++ )
 185                         result[i] = Key(src[i]);
 186         }
 187         else {
 188                 /* Copy from an unsigned byte ptr type. */
 189                 unsigned char *src = (unsigned char*) data;
 190                 for ( int i = 0; i < len; i++ )
 191                         result[i] = Key(src[i]);
 192         }
 193 }
 194
 195 /* Like makeFsmKeyArray except the result has only unique keys. They ordering
 196  * will be changed. */
 197 void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
 198                 bool caseInsensitive, ParseData *pd )
 199 {
 200         /* Use a transitions list for getting unique keys. */
 201         if ( keyOps->isSigned ) {
 202                 /* Copy from a char star type. */
 203                 char *src = data;
 204                 for ( int si = 0; si < len; si++ ) {
 205                         Key key( src[si] );
 206                         result.insert( key );
 207                         if ( caseInsensitive ) {
 208                                 if ( key.isLower() )
 209                                         result.insert( key.toUpper() );
 210                                 else if ( key.isUpper() )
 211                                         result.insert( key.toLower() );
 212                         }
 213                 }
 214         }
 215         else {
 216                 /* Copy from an unsigned byte ptr type. */
 217                 unsigned char *src = (unsigned char*) data;
 218                 for ( int si = 0; si < len; si++ ) {
 219                         Key key( src[si] );
 220                         result.insert( key );
 221                         if ( caseInsensitive ) {
 222                                 if ( key.isLower() )
 223                                         result.insert( key.toUpper() );
 224                                 else if ( key.isUpper() )
 225                                         result.insert( key.toLower() );
 226                         }
 227                 }
 228         }
 229 }
 230
 231 FsmAp *dotFsm( ParseData *pd )
 232 {
 233         FsmAp *retFsm = new FsmAp();
 234         retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey );
 235         return retFsm;
 236 }
 237
 238 FsmAp *dotStarFsm( ParseData *pd )
 239 {
 240         FsmAp *retFsm = new FsmAp();
 241         retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey );
 242         return retFsm;
 243 }
 244
 245 /* Make a builtin type. Depends on the signed nature of the alphabet type. */
 246 FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd )
 247 {
 248         /* FsmAp created to return. */
 249         FsmAp *retFsm = 0;
 250         bool isSigned = keyOps->isSigned;
 251
 252         switch ( builtin ) {
 253         case BT_Any: {
 254                 /* All characters. */
 255                 retFsm = dotFsm( pd );
 256                 break;
 257         }
 258         case BT_Ascii: {
 259                 /* Ascii characters 0 to 127. */
 260                 retFsm = new FsmAp();
 261                 retFsm->rangeFsm( 0, 127 );
 262                 break;
 263         }
 264         case BT_Extend: {
 265                 /* Ascii extended characters. This is the full byte range. Dependent
 266                  * on signed, vs no signed. If the alphabet is one byte then just use
 267                  * dot fsm. */
 268                 if ( isSigned ) {
 269                         retFsm = new FsmAp();
 270                         retFsm->rangeFsm( -128, 127 );
 271                 }
 272                 else {
 273                         retFsm = new FsmAp();
 274                         retFsm->rangeFsm( 0, 255 );
 275                 }
 276                 break;
 277         }
 278         case BT_Alpha: {
 279                 /* Alpha [A-Za-z]. */
 280                 FsmAp *upper = new FsmAp(), *lower = new FsmAp();
 281                 upper->rangeFsm( 'A', 'Z' );
 282                 lower->rangeFsm( 'a', 'z' );
 283                 upper->unionOp( lower );
 284                 upper->minimizePartition2();
 285                 retFsm = upper;
 286                 break;
 287         }
 288         case BT_Digit: {
 289                 /* Digits [0-9]. */
 290                 retFsm = new FsmAp();
 291                 retFsm->rangeFsm( '0', '9' );
 292                 break;
 293         }
 294         case BT_Alnum: {
 295                 /* Alpha numerics [0-9A-Za-z]. */
 296                 FsmAp *digit = new FsmAp(), *lower = new FsmAp();
 297                 FsmAp *upper = new FsmAp();
 298                 digit->rangeFsm( '0', '9' );
 299                 upper->rangeFsm( 'A', 'Z' );
 300                 lower->rangeFsm( 'a', 'z' );
 301                 digit->unionOp( upper );
 302                 digit->unionOp( lower );
 303                 digit->minimizePartition2();
 304                 retFsm = digit;
 305                 break;
 306         }
 307         case BT_Lower: {
 308                 /* Lower case characters. */
 309                 retFsm = new FsmAp();
 310                 retFsm->rangeFsm( 'a', 'z' );
 311                 break;
 312         }
 313         case BT_Upper: {
 314                 /* Upper case characters. */
 315                 retFsm = new FsmAp();
 316                 retFsm->rangeFsm( 'A', 'Z' );
 317                 break;
 318         }
 319         case BT_Cntrl: {
 320                 /* Control characters. */
 321                 FsmAp *cntrl = new FsmAp();
 322                 FsmAp *highChar = new FsmAp();
 323                 cntrl->rangeFsm( 0, 31 );
 324                 highChar->concatFsm( 127 );
 325                 cntrl->unionOp( highChar );
 326                 cntrl->minimizePartition2();
 327                 retFsm = cntrl;
 328                 break;
 329         }
 330         case BT_Graph: {
 331                 /* Graphical ascii characters [!-~]. */
 332                 retFsm = new FsmAp();
 333                 retFsm->rangeFsm( '!', '~' );
 334                 break;
 335         }
 336         case BT_Print: {
 337                 /* Printable characters. Same as graph except includes space. */
 338                 retFsm = new FsmAp();
 339                 retFsm->rangeFsm( ' ', '~' );
 340                 break;
 341         }
 342         case BT_Punct: {
 343                 /* Punctuation. */
 344                 FsmAp *range1 = new FsmAp();
 345                 FsmAp *range2 = new FsmAp();
 346                 FsmAp *range3 = new FsmAp();
 347                 FsmAp *range4 = new FsmAp();
 348                 range1->rangeFsm( '!', '/' );
 349                 range2->rangeFsm( ':', '@' );
 350                 range3->rangeFsm( '[', '`' );
 351                 range4->rangeFsm( '{', '~' );
 352                 range1->unionOp( range2 );
 353                 range1->unionOp( range3 );
 354                 range1->unionOp( range4 );
 355                 range1->minimizePartition2();
 356                 retFsm = range1;
 357                 break;
 358         }
 359         case BT_Space: {
 360                 /* Whitespace: [\t\v\f\n\r ]. */
 361                 FsmAp *cntrl = new FsmAp();
 362                 FsmAp *space = new FsmAp();
 363                 cntrl->rangeFsm( '\t', '\r' );
 364                 space->concatFsm( ' ' );
 365                 cntrl->unionOp( space );
 366                 cntrl->minimizePartition2();
 367                 retFsm = cntrl;
 368                 break;
 369         }
 370         case BT_Xdigit: {
 371                 /* Hex digits [0-9A-Fa-f]. */
 372                 FsmAp *digit = new FsmAp();
 373                 FsmAp *upper = new FsmAp();
 374                 FsmAp *lower = new FsmAp();
 375                 digit->rangeFsm( '0', '9' );
 376                 upper->rangeFsm( 'A', 'F' );
 377                 lower->rangeFsm( 'a', 'f' );
 378                 digit->unionOp( upper );
 379                 digit->unionOp( lower );
 380                 digit->minimizePartition2();
 381                 retFsm = digit;
 382                 break;
 383         }
 384         case BT_Lambda: {
 385                 retFsm = new FsmAp();
 386                 retFsm->lambdaFsm();
 387                 break;
 388         }
 389         case BT_Empty: {
 390                 retFsm = new FsmAp();
 391                 retFsm->emptyFsm();
 392                 break;
 393         }}
 394
 395         return retFsm;
 396 }
 397
 398 /* Check if this name inst or any name inst below is referenced. */
 399 bool NameInst::anyRefsRec()
 400 {
 401         if ( numRefs > 0 )
 402                 return true;
 403
 404         /* Recurse on children until true. */
 405         for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) {
 406                 if ( (*ch)->anyRefsRec() )
 407                         return true;
 408         }
 409
 410         return false;
 411 }
 412
 413 /*
 414  * ParseData
 415  */
 416
 417 /* Initialize the structure that will collect info during the parse of a
 418  * machine. */
 419 ParseData::ParseData( char *fileName, char *sectionName,
 420                 const InputLoc &sectionLoc )
 421 :
 422         sectionGraph(0),
 423         generatingSectionSubset(false),
 424         nextPriorKey(0),
 425         /* 0 is reserved for global error actions. */
 426         nextLocalErrKey(1),
 427         nextNameId(0),
 428         nextCondId(0),
 429         alphTypeSet(false),
 430         getKeyExpr(0),
 431         accessExpr(0),
 432         prePushExpr(0),
 433         postPopExpr(0),
 434         pExpr(0),
 435         peExpr(0),
 436         eofExpr(0),
 437         csExpr(0),
 438         topExpr(0),
 439         stackExpr(0),
 440         actExpr(0),
 441         tokstartExpr(0),
 442         tokendExpr(0),
 443         dataExpr(0),
 444         lowerNum(0),
 445         upperNum(0),
 446         fileName(fileName),
 447         sectionName(sectionName),
 448         sectionLoc(sectionLoc),
 449         curActionOrd(0),
 450         curPriorOrd(0),
 451         rootName(0),
 452         exportsRootName(0),
 453         nextEpsilonResolvedLink(0),
 454         nextLongestMatchId(1),
 455         lmRequiresErrorState(false)
 456 {
 457         /* Initialize the dictionary of graphs. This is our symbol table. The
 458          * initialization needs to be done on construction which happens at the
 459          * beginning of a machine spec so any assignment operators can reference
 460          * the builtins. */
 461         initGraphDict();
 462 }
 463
 464 /* Clean up the data collected during a parse. */
 465 ParseData::~ParseData()
 466 {
 467         /* Delete all the nodes in the action list. Will cause all the
 468          * string data that represents the actions to be deallocated. */
 469         actionList.empty();
 470 }
 471
 472 /* Make a name id in the current name instantiation scope if it is not
 473  * already there. */
 474 NameInst *ParseData::addNameInst( const InputLoc &loc, const char *data, bool isLabel )
 475 {
 476         /* Create the name instantitaion object and insert it. */
 477         NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel );
 478         curNameInst->childVect.append( newNameInst );
 479         if ( data != 0 )
 480                 curNameInst->children.insertMulti( data, newNameInst );
 481         return newNameInst;
 482 }
 483
 484 void ParseData::initNameWalk()
 485 {
 486         curNameInst = rootName;
 487         curNameChild = 0;
 488 }
 489
 490 void ParseData::initExportsNameWalk()
 491 {
 492         curNameInst = exportsRootName;
 493         curNameChild = 0;
 494 }
 495
 496 /* Goes into the next child scope. The number of the child is already set up.
 497  * We need this for the syncronous name tree and parse tree walk to work
 498  * properly. It is reset on entry into a scope and advanced on poping of a
 499  * scope. A call to enterNameScope should be accompanied by a corresponding
 500  * popNameScope. */
 501 NameFrame ParseData::enterNameScope( bool isLocal, int numScopes )
 502 {
 503         /* Save off the current data. */
 504         NameFrame retFrame;
 505         retFrame.prevNameInst = curNameInst;
 506         retFrame.prevNameChild = curNameChild;
 507         retFrame.prevLocalScope = localNameScope;
 508
 509         /* Enter into the new name scope. */
 510         for ( int i = 0; i < numScopes; i++ ) {
 511                 curNameInst = curNameInst->childVect[curNameChild];
 512                 curNameChild = 0;
 513         }
 514
 515         if ( isLocal )
 516                 localNameScope = curNameInst;
 517
 518         return retFrame;
 519 }
 520
 521 /* Return from a child scope to a parent. The parent info must be specified as
 522  * an argument and is obtained from the corresponding call to enterNameScope.
 523  * */
 524 void ParseData::popNameScope( const NameFrame &frame )
 525 {
 526         /* Pop the name scope. */
 527         curNameInst = frame.prevNameInst;
 528         curNameChild = frame.prevNameChild+1;
 529         localNameScope = frame.prevLocalScope;
 530 }
 531
 532 void ParseData::resetNameScope( const NameFrame &frame )
 533 {
 534         /* Pop the name scope. */
 535         curNameInst = frame.prevNameInst;
 536         curNameChild = frame.prevNameChild;
 537         localNameScope = frame.prevLocalScope;
 538 }
 539
 540
 541 void ParseData::unsetObsoleteEntries( FsmAp *graph )
 542 {
 543         /* Loop the reference names and increment the usage. Names that are no
 544          * longer needed will be unset in graph. */
 545         for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) {
 546                 /* Get the name. */
 547                 NameInst *name = *ref;
 548                 name->numUses += 1;
 549
 550                 /* If the name is no longer needed unset its corresponding entry. */
 551                 if ( name->numUses == name->numRefs ) {
 552                         assert( graph->entryPoints.find( name->id ) != 0 );
 553                         graph->unsetEntry( name->id );
 554                         assert( graph->entryPoints.find( name->id ) == 0 );
 555                 }
 556         }
 557 }
 558
 559 NameSet ParseData::resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly )
 560 {
 561         /* Queue needed for breadth-first search, load it with the start node. */
 562         NameInstList nameQueue;
 563         nameQueue.append( refFrom );
 564
 565         NameSet result;
 566         while ( nameQueue.length() > 0 ) {
 567                 /* Pull the next from location off the queue. */
 568                 NameInst *from = nameQueue.detachFirst();
 569
 570                 /* Look for the name. */
 571                 NameMapEl *low, *high;
 572                 if ( from->children.findMulti( data, low, high ) ) {
 573                         /* Record all instances of the name. */
 574                         for ( ; low <= high; low++ )
 575                                 result.insert( low->value );
 576                 }
 577
 578                 /* Name not there, do breadth-first operation of appending all
 579                  * childrent to the processing queue. */
 580                 for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) {
 581                         if ( !recLabelsOnly || (*name)->isLabel )
 582                                 nameQueue.append( *name );
 583                 }
 584         }
 585
 586         /* Queue exhausted and name never found. */
 587         return result;
 588 }
 589
 590 void ParseData::resolveFrom( NameSet &result, NameInst *refFrom,
 591                 const NameRef &nameRef, int namePos )
 592 {
 593         /* Look for the name in the owning scope of the factor with aug. */
 594         NameSet partResult = resolvePart( refFrom, nameRef[namePos], false );
 595
 596         /* If there are more parts to the name then continue on. */
 597         if ( ++namePos < nameRef.length() ) {
 598                 /* There are more components to the name, search using all the part
 599                  * results as the base. */
 600                 for ( NameSet::Iter name = partResult; name.lte(); name++ )
 601                         resolveFrom( result, *name, nameRef, namePos );
 602         }
 603         else {
 604                 /* This is the last component, append the part results to the final
 605                  * results. */
 606                 result.insert( partResult );
 607         }
 608 }
 609
 610 /* Write out a name reference. */
 611 ostream &operator<<( ostream &out, const NameRef &nameRef )
 612 {
 613         int pos = 0;
 614         if ( nameRef[pos] == 0 ) {
 615                 out << "::";
 616                 pos += 1;
 617         }
 618         out << nameRef[pos++];
 619         for ( ; pos < nameRef.length(); pos++ )
 620                 out << "::" << nameRef[pos];
 621         return out;
 622 }
 623
 624 ostream &operator<<( ostream &out, const NameInst &nameInst )
 625 {
 626         /* Count the number fully qualified name parts. */
 627         int numParents = 0;
 628         NameInst *curParent = nameInst.parent;
 629         while ( curParent != 0 ) {
 630                 numParents += 1;
 631                 curParent = curParent->parent;
 632         }
 633
 634         /* Make an array and fill it in. */
 635         curParent = nameInst.parent;
 636         NameInst **parents = new NameInst*[numParents];
 637         for ( int p = numParents-1; p >= 0; p-- ) {
 638                 parents[p] = curParent;
 639                 curParent = curParent->parent;
 640         }
 641
 642         /* Write the parents out, skip the root. */
 643         for ( int p = 1; p < numParents; p++ )
 644                 out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" );
 645
 646         /* Write the name and cleanup. */
 647         out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" );
 648         delete[] parents;
 649         return out;
 650 }
 651
 652 struct CmpNameInstLoc
 653 {
 654         static int compare( const NameInst *ni1, const NameInst *ni2 )
 655         {
 656                 if ( ni1->loc.line < ni2->loc.line )
 657                         return -1;
 658                 else if ( ni1->loc.line > ni2->loc.line )
 659                         return 1;
 660                 else if ( ni1->loc.col < ni2->loc.col )
 661                         return -1;
 662                 else if ( ni1->loc.col > ni2->loc.col )
 663                         return 1;
 664                 return 0;
 665         }
 666 };
 667
 668 void errorStateLabels( const NameSet &resolved )
 669 {
 670         MergeSort<NameInst*, CmpNameInstLoc> mergeSort;
 671         mergeSort.sort( resolved.data, resolved.length() );
 672         for ( NameSet::Iter res = resolved; res.lte(); res++ )
 673                 error((*res)->loc) << "  -> " << **res << endl;
 674 }
 675
 676
 677 NameInst *ParseData::resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action )
 678 {
 679         NameInst *nameInst = 0;
 680
 681         /* Do the local search if the name is not strictly a root level name
 682          * search. */
 683         if ( nameRef[0] != 0 ) {
 684                 /* If the action is referenced, resolve all of them. */
 685                 if ( action != 0 && action->actionRefs.length() > 0 ) {
 686                         /* Look for the name in all referencing scopes. */
 687                         NameSet resolved;
 688                         for ( ActionRefs::Iter actRef = action->actionRefs; actRef.lte(); actRef++ )
 689                                 resolveFrom( resolved, *actRef, nameRef, 0 );
 690
 691                         if ( resolved.length() > 0 ) {
 692                                 /* Take the first one. */
 693                                 nameInst = resolved[0];
 694                                 if ( resolved.length() > 1 ) {
 695                                         /* Complain about the multiple references. */
 696                                         error(loc) << "state reference " << nameRef <<
 697                                                         " resolves to multiple entry points" << endl;
 698                                         errorStateLabels( resolved );
 699                                 }
 700                         }
 701                 }
 702         }
 703
 704         /* If not found in the local scope, look in global. */
 705         if ( nameInst == 0 ) {
 706                 NameSet resolved;
 707                 int fromPos = nameRef[0] != 0 ? 0 : 1;
 708                 resolveFrom( resolved, rootName, nameRef, fromPos );
 709
 710                 if ( resolved.length() > 0 ) {
 711                         /* Take the first. */
 712                         nameInst = resolved[0];
 713                         if ( resolved.length() > 1 ) {
 714                                 /* Complain about the multiple references. */
 715                                 error(loc) << "state reference " << nameRef <<
 716                                                 " resolves to multiple entry points" << endl;
 717                                 errorStateLabels( resolved );
 718                         }
 719                 }
 720         }
 721
 722         if ( nameInst == 0 ) {
 723                 /* If not found then complain. */
 724                 error(loc) << "could not resolve state reference " << nameRef << endl;
 725         }
 726         return nameInst;
 727 }
 728
 729 void ParseData::resolveNameRefs( InlineList *inlineList, Action *action )
 730 {
 731         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
 732                 switch ( item->type ) {
 733                         case InlineItem::Entry: case InlineItem::Goto:
 734                         case InlineItem::Call: case InlineItem::Next: {
 735                                 /* Resolve, pass action for local search. */
 736                                 NameInst *target = resolveStateRef( *item->nameRef, item->loc, action );
 737
 738                                 /* Name lookup error reporting is handled by resolveStateRef. */
 739                                 if ( target != 0 ) {
 740                                         /* Check if the target goes into a longest match. */
 741                                         NameInst *search = target->parent;
 742                                         while ( search != 0 ) {
 743                                                 if ( search->isLongestMatch ) {
 744                                                         error(item->loc) << "cannot enter inside a longest "
 745                                                                         "match construction as an entry point" << endl;
 746                                                         break;
 747                                                 }
 748                                                 search = search->parent;
 749                                         }
 750
 751                                         /* Record the reference in the name. This will cause the
 752                                          * entry point to survive to the end of the graph
 753                                          * generating walk. */
 754                                         target->numRefs += 1;
 755                                 }
 756
 757                                 item->nameTarg = target;
 758                                 break;
 759                         }
 760                         default:
 761                                 break;
 762                 }
 763
 764                 /* Some of the item types may have children. */
 765                 if ( item->children != 0 )
 766                         resolveNameRefs( item->children, action );
 767         }
 768 }
 769
 770 /* Resolve references to labels in actions. */
 771 void ParseData::resolveActionNameRefs()
 772 {
 773         for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
 774                 /* Only care about the actions that are referenced. */
 775                 if ( act->actionRefs.length() > 0 )
 776                         resolveNameRefs( act->inlineList, act );
 777         }
 778 }
 779
 780 /* Walk a name tree starting at from and fill the name index. */
 781 void ParseData::fillNameIndex( NameInst *from )
 782 {
 783         /* Fill the value for from in the name index. */
 784         nameIndex[from->id] = from;
 785
 786         /* Recurse on the implicit final state and then all children. */
 787         if ( from->final != 0 )
 788                 fillNameIndex( from->final );
 789         for ( NameVect::Iter name = from->childVect; name.lte(); name++ )
 790                 fillNameIndex( *name );
 791 }
 792
 793 void ParseData::makeRootNames()
 794 {
 795         /* Create the root name. */
 796         rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
 797         exportsRootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
 798 }
 799
 800 /* Build the name tree and supporting data structures. */
 801 void ParseData::makeNameTree( GraphDictEl *dictEl )
 802 {
 803         /* Set up curNameInst for the walk. */
 804         initNameWalk();
 805
 806         if ( dictEl != 0 ) {
 807                 /* A start location has been specified. */
 808                 dictEl->value->makeNameTree( dictEl->loc, this );
 809         }
 810         else {
 811                 /* First make the name tree. */
 812                 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
 813                         /* Recurse on the instance. */
 814                         glel->value->makeNameTree( glel->loc, this );
 815                 }
 816         }
 817
 818         /* The number of nodes in the tree can now be given by nextNameId */
 819         nameIndex = new NameInst*[nextNameId];
 820         memset( nameIndex, 0, sizeof(NameInst*)*nextNameId );
 821         fillNameIndex( rootName );
 822         fillNameIndex( exportsRootName );
 823 }
 824
 825
 826 void ParseData::createBuiltin( const char *name, BuiltinMachine builtin )
 827 {
 828         Expression *expression = new Expression( builtin );
 829         Join *join = new Join( expression );
 830         JoinOrLm *joinOrLm = new JoinOrLm( join );
 831         VarDef *varDef = new VarDef( name, joinOrLm );
 832         GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
 833         graphDict.insert( graphDictEl );
 834 }
 835
 836 /* Initialize the graph dict with builtin types. */
 837 void ParseData::initGraphDict( )
 838 {
 839         createBuiltin( "any", BT_Any );
 840         createBuiltin( "ascii", BT_Ascii );
 841         createBuiltin( "extend", BT_Extend );
 842         createBuiltin( "alpha", BT_Alpha );
 843         createBuiltin( "digit", BT_Digit );
 844         createBuiltin( "alnum", BT_Alnum );
 845         createBuiltin( "lower", BT_Lower );
 846         createBuiltin( "upper", BT_Upper );
 847         createBuiltin( "cntrl", BT_Cntrl );
 848         createBuiltin( "graph", BT_Graph );
 849         createBuiltin( "print", BT_Print );
 850         createBuiltin( "punct", BT_Punct );
 851         createBuiltin( "space", BT_Space );
 852         createBuiltin( "xdigit", BT_Xdigit );
 853         createBuiltin( "null", BT_Lambda );
 854         createBuiltin( "zlen", BT_Lambda );
 855         createBuiltin( "empty", BT_Empty );
 856 }
 857
 858 /* Set the alphabet type. If the types are not valid returns false. */
 859 bool ParseData::setAlphType( const InputLoc &loc, char *s1, char *s2 )
 860 {
 861         alphTypeLoc = loc;
 862         userAlphType = findAlphType( s1, s2 );
 863         alphTypeSet = true;
 864         return userAlphType != 0;
 865 }
 866
 867 /* Set the alphabet type. If the types are not valid returns false. */
 868 bool ParseData::setAlphType( const InputLoc &loc, char *s1 )
 869 {
 870         alphTypeLoc = loc;
 871         userAlphType = findAlphType( s1 );
 872         alphTypeSet = true;
 873         return userAlphType != 0;
 874 }
 875
 876 bool ParseData::setVariable( char *var, InlineList *inlineList )
 877 {
 878         bool set = true;
 879
 880         if ( strcmp( var, "p" ) == 0 )
 881                 pExpr = inlineList;
 882         else if ( strcmp( var, "pe" ) == 0 )
 883                 peExpr = inlineList;
 884         else if ( strcmp( var, "eof" ) == 0 )
 885                 eofExpr = inlineList;
 886         else if ( strcmp( var, "cs" ) == 0 )
 887                 csExpr = inlineList;
 888         else if ( strcmp( var, "data" ) == 0 )
 889                 dataExpr = inlineList;
 890         else if ( strcmp( var, "top" ) == 0 )
 891                 topExpr = inlineList;
 892         else if ( strcmp( var, "stack" ) == 0 )
 893                 stackExpr = inlineList;
 894         else if ( strcmp( var, "act" ) == 0 )
 895                 actExpr = inlineList;
 896         else if ( strcmp( var, "ts" ) == 0 )
 897                 tokstartExpr = inlineList;
 898         else if ( strcmp( var, "te" ) == 0 )
 899                 tokendExpr = inlineList;
 900         else
 901                 set = false;
 902
 903         return set;
 904 }
 905
 906 /* Initialize the key operators object that will be referenced by all fsms
 907  * created. */
 908 void ParseData::initKeyOps( )
 909 {
 910         /* Signedness and bounds. */
 911         HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType;
 912         thisKeyOps.setAlphType( alphType );
 913
 914         if ( lowerNum != 0 ) {
 915                 /* If ranges are given then interpret the alphabet type. */
 916                 thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this );
 917                 thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
 918         }
 919
 920         thisCondData.lastCondKey = thisKeyOps.maxKey;
 921 }
 922
 923 void ParseData::printNameInst( NameInst *nameInst, int level )
 924 {
 925         for ( int i = 0; i < level; i++ )
 926                 cerr << "  ";
 927         cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") <<
 928                         "  id: " << nameInst->id <<
 929                         "  refs: " << nameInst->numRefs <<
 930                         "  uses: " << nameInst->numUses << endl;
 931         for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ )
 932                 printNameInst( *name, level+1 );
 933 }
 934
 935 /* Remove duplicates of unique actions from an action table. */
 936 void ParseData::removeDups( ActionTable &table )
 937 {
 938         /* Scan through the table looking for unique actions to
 939          * remove duplicates of. */
 940         for ( int i = 0; i < table.length(); i++ ) {
 941                 /* Remove any duplicates ahead of i. */
 942                 for ( int r = i+1; r < table.length(); ) {
 943                         if ( table[r].value == table[i].value )
 944                                 table.vremove(r);
 945                         else
 946                                 r += 1;
 947                 }
 948         }
 949 }
 950
 951 /* Remove duplicates from action lists. This operates only on transition and
 952  * eof action lists and so should be called once all actions have been
 953  * transfered to their final resting place. */
 954 void ParseData::removeActionDups( FsmAp *graph )
 955 {
 956         /* Loop all states. */
 957         for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
 958                 /* Loop all transitions. */
 959                 for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
 960                         removeDups( trans->actionTable );
 961                 removeDups( state->toStateActionTable );
 962                 removeDups( state->fromStateActionTable );
 963                 removeDups( state->eofActionTable );
 964         }
 965 }
 966
 967 Action *ParseData::newAction( const char *name, InlineList *inlineList )
 968 {
 969         InputLoc loc;
 970         loc.line = 1;
 971         loc.col = 1;
 972         loc.fileName = "<NONE>";
 973
 974         Action *action = new Action( loc, name, inlineList, nextCondId++ );
 975         action->actionRefs.append( rootName );
 976         actionList.append( action );
 977         return action;
 978 }
 979
 980 void ParseData::initLongestMatchData()
 981 {
 982         if ( lmList.length() > 0 ) {
 983                 /* The initTokStart action resets the token start. */
 984                 InlineList *il1 = new InlineList;
 985                 il1->append( new InlineItem( InputLoc(), InlineItem::LmInitTokStart ) );
 986                 initTokStart = newAction( "initts", il1 );
 987                 initTokStart->isLmAction = true;
 988
 989                 /* The initActId action gives act a default value. */
 990                 InlineList *il4 = new InlineList;
 991                 il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) );
 992                 initActId = newAction( "initact", il4 );
 993                 initActId->isLmAction = true;
 994
 995                 /* The setTokStart action sets tokstart. */
 996                 InlineList *il5 = new InlineList;
 997                 il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) );
 998                 setTokStart = newAction( "ts", il5 );
 999                 setTokStart->isLmAction = true;
1000
1001                 /* The setTokEnd action sets tokend. */
1002                 InlineList *il3 = new InlineList;
1003                 il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) );
1004                 setTokEnd = newAction( "te", il3 );
1005                 setTokEnd->isLmAction = true;
1006
1007                 /* The action will also need an ordering: ahead of all user action
1008                  * embeddings. */
1009                 initTokStartOrd = curActionOrd++;
1010                 initActIdOrd = curActionOrd++;
1011                 setTokStartOrd = curActionOrd++;
1012                 setTokEndOrd = curActionOrd++;
1013         }
1014 }
1015
1016 /* After building the graph, do some extra processing to ensure the runtime
1017  * data of the longest mactch operators is consistent. */
1018 void ParseData::setLongestMatchData( FsmAp *graph )
1019 {
1020         if ( lmList.length() > 0 ) {
1021                 /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry)
1022                  * init the tokstart. */
1023                 for ( EntryMap::Iter en = graph->entryPoints; en.lte(); en++ ) {
1024                         /* This is run after duplicates are removed, we must guard against
1025                          * inserting a duplicate. */
1026                         ActionTable &actionTable = en->value->toStateActionTable;
1027                         if ( ! actionTable.hasAction( initTokStart ) )
1028                                 actionTable.setAction( initTokStartOrd, initTokStart );
1029                 }
1030
1031                 /* Find the set of states that are the target of transitions with
1032                  * actions that have calls. These states will be targeted by fret
1033                  * statements. */
1034                 StateSet states;
1035                 for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
1036                         for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
1037                                 for ( ActionTable::Iter ati = trans->actionTable; ati.lte(); ati++ ) {
1038                                         if ( ati->value->anyCall && trans->toState != 0 )
1039                                                 states.insert( trans->toState );
1040                                 }
1041                         }
1042                 }
1043
1044
1045                 /* Init tokstart upon entering the above collected states. */
1046                 for ( StateSet::Iter ps = states; ps.lte(); ps++ ) {
1047                         /* This is run after duplicates are removed, we must guard against
1048                          * inserting a duplicate. */
1049                         ActionTable &actionTable = (*ps)->toStateActionTable;
1050                         if ( ! actionTable.hasAction( initTokStart ) )
1051                                 actionTable.setAction( initTokStartOrd, initTokStart );
1052                 }
1053         }
1054 }
1055
1056 /* Make the graph from a graph dict node. Does minimization and state sorting. */
1057 FsmAp *ParseData::makeInstance( GraphDictEl *gdNode )
1058 {
1059         /* Build the graph from a walk of the parse tree. */
1060         FsmAp *graph = gdNode->value->walk( this );
1061
1062         /* Resolve any labels that point to multiple states. Any labels that are
1063          * still around are referenced only by gotos and calls and they need to be
1064          * made into deterministic entry points. */
1065         graph->deterministicEntry();
1066
1067         /*
1068          * All state construction is now complete.
1069          */
1070
1071         /* Transfer actions from the out action tables to eof action tables. */
1072         for ( StateSet::Iter state = graph->finStateSet; state.lte(); state++ )
1073                 graph->transferOutActions( *state );
1074
1075         /* Transfer global error actions. */
1076         for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
1077                 graph->transferErrorActions( state, 0 );
1078
1079         if ( ::wantDupsRemoved )
1080                 removeActionDups( graph );
1081
1082         /* Remove unreachable states. There should be no dead end states. The
1083          * subtract and intersection operators are the only places where they may
1084          * be created and those operators clean them up. */
1085         graph->removeUnreachableStates();
1086
1087         /* No more fsm operations are to be done. Action ordering numbers are
1088          * no longer of use and will just hinder minimization. Clear them. */
1089         graph->nullActionKeys();
1090
1091         /* Transition priorities are no longer of use. We can clear them
1092          * because they will just hinder minimization as well. Clear them. */
1093         graph->clearAllPriorities();
1094
1095         if ( minimizeOpt != MinimizeNone ) {
1096                 /* Minimize here even if we minimized at every op. Now that function
1097                  * keys have been cleared we may get a more minimal fsm. */
1098                 switch ( minimizeLevel ) {
1099                         case MinimizeApprox:
1100                                 graph->minimizeApproximate();
1101                                 break;
1102                         case MinimizeStable:
1103                                 graph->minimizeStable();
1104                                 break;
1105                         case MinimizePartition1:
1106                                 graph->minimizePartition1();
1107                                 break;
1108                         case MinimizePartition2:
1109                                 graph->minimizePartition2();
1110                                 break;
1111                 }
1112         }
1113
1114         graph->compressTransitions();
1115
1116         return graph;
1117 }
1118
1119 void ParseData::printNameTree()
1120 {
1121         /* Print the name instance map. */
1122         for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ )
1123                 printNameInst( *name, 0 );
1124
1125         cerr << "name index:" << endl;
1126         /* Show that the name index is correct. */
1127         for ( int ni = 0; ni < nextNameId; ni++ ) {
1128                 cerr << ni << ": ";
1129                 const char *name = nameIndex[ni]->name;
1130                 cerr << ( name != 0 ? name : "<ANON>" ) << endl;
1131         }
1132 }
1133
1134 FsmAp *ParseData::makeSpecific( GraphDictEl *gdNode )
1135 {
1136         /* Build the name tree and supporting data structures. */
1137         makeNameTree( gdNode );
1138
1139         /* Resove name references from gdNode. */
1140         initNameWalk();
1141         gdNode->value->resolveNameRefs( this );
1142
1143         /* Do not resolve action references. Since we are not building the entire
1144          * graph there's a good chance that many name references will fail. This
1145          * is okay since generating part of the graph is usually only done when
1146          * inspecting the compiled machine. */
1147
1148         /* Same story for extern entry point references. */
1149
1150         /* Flag this case so that the XML code generator is aware that we haven't
1151          * looked up name references in actions. It can then avoid segfaulting. */
1152         generatingSectionSubset = true;
1153
1154         /* Just building the specified graph. */
1155         initNameWalk();
1156         FsmAp *mainGraph = makeInstance( gdNode );
1157
1158         return mainGraph;
1159 }
1160
1161 FsmAp *ParseData::makeAll()
1162 {
1163         /* Build the name tree and supporting data structures. */
1164         makeNameTree( 0 );
1165
1166         /* Resove name references in the tree. */
1167         initNameWalk();
1168         for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ )
1169                 glel->value->resolveNameRefs( this );
1170
1171         /* Resolve action code name references. */
1172         resolveActionNameRefs();
1173
1174         /* Force name references to the top level instantiations. */
1175         for ( NameVect::Iter inst = rootName->childVect; inst.lte(); inst++ )
1176                 (*inst)->numRefs += 1;
1177
1178         FsmAp *mainGraph = 0;
1179         FsmAp **graphs = new FsmAp*[instanceList.length()];
1180         int numOthers = 0;
1181
1182         /* Make all the instantiations, we know that main exists in this list. */
1183         initNameWalk();
1184         for ( GraphList::Iter glel = instanceList; glel.lte();  glel++ ) {
1185                 if ( strcmp( glel->key, mainMachine ) == 0 ) {
1186                         /* Main graph is always instantiated. */
1187                         mainGraph = makeInstance( glel );
1188                 }
1189                 else {
1190                         /* Instantiate and store in others array. */
1191                         graphs[numOthers++] = makeInstance( glel );
1192                 }
1193         }
1194
1195         if ( mainGraph == 0 )
1196                 mainGraph = graphs[--numOthers];
1197
1198         if ( numOthers > 0 ) {
1199                 /* Add all the other graphs into main. */
1200                 mainGraph->globOp( graphs, numOthers );
1201         }
1202
1203         delete[] graphs;
1204         return mainGraph;
1205 }
1206
1207 void ParseData::analyzeAction( Action *action, InlineList *inlineList )
1208 {
1209         /* FIXME: Actions used as conditions should be very constrained. */
1210         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1211                 if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
1212                         action->anyCall = true;
1213
1214                 /* Need to recurse into longest match items. */
1215                 if ( item->type == InlineItem::LmSwitch ) {
1216                         LongestMatch *lm = item->longestMatch;
1217                         for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) {
1218                                 if ( lmi->action != 0 )
1219                                         analyzeAction( action, lmi->action->inlineList );
1220                         }
1221                 }
1222
1223                 if ( item->type == InlineItem::LmOnLast ||
1224                                 item->type == InlineItem::LmOnNext ||
1225                                 item->type == InlineItem::LmOnLagBehind )
1226                 {
1227                         LongestMatchPart *lmi = item->longestMatchPart;
1228                         if ( lmi->action != 0 )
1229                                 analyzeAction( action, lmi->action->inlineList );
1230                 }
1231
1232                 if ( item->children != 0 )
1233                         analyzeAction( action, item->children );
1234         }
1235 }
1236
1237
1238 /* Check actions for bad uses of fsm directives. We don't go inside longest
1239  * match items in actions created by ragel, since we just want the user
1240  * actions. */
1241 void ParseData::checkInlineList( Action *act, InlineList *inlineList )
1242 {
1243         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1244                 /* EOF checks. */
1245                 if ( act->numEofRefs > 0 ) {
1246                         switch ( item->type ) {
1247                                 /* Currently no checks. */
1248                                 default:
1249                                         break;
1250                         }
1251                 }
1252
1253                 /* Recurse. */
1254                 if ( item->children != 0 )
1255                         checkInlineList( act, item->children );
1256         }
1257 }
1258
1259 void ParseData::checkAction( Action *action )
1260 {
1261         /* Check for actions with calls that are embedded within a longest match
1262          * machine. */
1263         if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) {
1264                 for ( ActionRefs::Iter ar = action->actionRefs; ar.lte(); ar++ ) {
1265                         NameInst *check = *ar;
1266                         while ( check != 0 ) {
1267                                 if ( check->isLongestMatch ) {
1268                                         error(action->loc) << "within a scanner, fcall is permitted"
1269                                                 " only in pattern actions" << endl;
1270                                         break;
1271                                 }
1272                                 check = check->parent;
1273                         }
1274                 }
1275         }
1276
1277         checkInlineList( action, action->inlineList );
1278 }
1279
1280
1281 void ParseData::analyzeGraph( FsmAp *graph )
1282 {
1283         for ( ActionList::Iter act = actionList; act.lte(); act++ )
1284                 analyzeAction( act, act->inlineList );
1285
1286         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
1287                 /* The transition list. */
1288                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
1289                         for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ )
1290                                 at->value->numTransRefs += 1;
1291                 }
1292
1293                 for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ )
1294                         at->value->numToStateRefs += 1;
1295
1296                 for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ )
1297                         at->value->numFromStateRefs += 1;
1298
1299                 for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ )
1300                         at->value->numEofRefs += 1;
1301
1302                 for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
1303                         for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ )
1304                                 (*sci)->numCondRefs += 1;
1305                 }
1306         }
1307
1308         /* Checks for bad usage of directives in action code. */
1309         for ( ActionList::Iter act = actionList; act.lte(); act++ )
1310                 checkAction( act );
1311 }
1312
1313 void ParseData::makeExportsNameTree()
1314 {
1315         /* Make a name tree for the exports. */
1316         initExportsNameWalk();
1317
1318         /* First make the name tree. */
1319         for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1320                 if ( gdel->value->isExport ) {
1321                         /* Recurse on the instance. */
1322                         gdel->value->makeNameTree( gdel->loc, this );
1323                 }
1324         }
1325 }
1326
1327 void ParseData::makeExports()
1328 {
1329         makeExportsNameTree();
1330
1331         /* Resove name references in the tree. */
1332         initExportsNameWalk();
1333         for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1334                 if ( gdel->value->isExport )
1335                         gdel->value->resolveNameRefs( this );
1336         }
1337
1338         /* Make all the instantiations, we know that main exists in this list. */
1339         initExportsNameWalk();
1340         for ( GraphDict::Iter gdel = graphDict; gdel.lte();  gdel++ ) {
1341                 /* Check if this var def is an export. */
1342                 if ( gdel->value->isExport ) {
1343                         /* Build the graph from a walk of the parse tree. */
1344                         FsmAp *graph = gdel->value->walk( this );
1345
1346                         /* Build the graph from a walk of the parse tree. */
1347                         if ( !graph->checkSingleCharMachine() ) {
1348                                 error(gdel->loc) << "bad export machine, must define "
1349                                                 "a single character" << endl;
1350                         }
1351                         else {
1352                                 /* Safe to extract the key and declare the export. */
1353                                 Key exportKey = graph->startState->outList.head->lowKey;
1354                                 exportList.append( new Export( gdel->value->name, exportKey ) );
1355                         }
1356                 }
1357         }
1358
1359 }
1360
1361 /* Construct the machine and catch failures which can occur during
1362  * construction. */
1363 void ParseData::prepareMachineGen( GraphDictEl *graphDictEl )
1364 {
1365         try {
1366                 /* This machine construction can fail. */
1367                 prepareMachineGenTBWrapped( graphDictEl );
1368         }
1369         catch ( FsmConstructFail fail ) {
1370                 switch ( fail.reason ) {
1371                         case FsmConstructFail::CondNoKeySpace: {
1372                                 InputLoc &loc = alphTypeSet ? alphTypeLoc : sectionLoc;
1373                                 error(loc) << "sorry, no more characters are "
1374                                                 "available in the alphabet space" << endl;
1375                                 error(loc) << "  for conditions, please use a "
1376                                                 "smaller alphtype or reduce" << endl;
1377                                 error(loc) << "  the span of characters on which "
1378                                                 "conditions are embedded" << endl;
1379                                 break;
1380                         }
1381                 }
1382         }
1383 }
1384
1385 void ParseData::prepareMachineGenTBWrapped( GraphDictEl *graphDictEl )
1386 {
1387         beginProcessing();
1388         initKeyOps();
1389         makeRootNames();
1390         initLongestMatchData();
1391
1392         /* Make the graph, do minimization. */
1393         if ( graphDictEl == 0 )
1394                 sectionGraph = makeAll();
1395         else
1396                 sectionGraph = makeSpecific( graphDictEl );
1397
1398         /* Compute exports from the export definitions. */
1399         makeExports();
1400
1401         /* If any errors have occured in the input file then don't write anything. */
1402         if ( gblErrorCount > 0 )
1403                 return;
1404
1405         analyzeGraph( sectionGraph );
1406
1407         /* Depends on the graph analysis. */
1408         setLongestMatchData( sectionGraph );
1409
1410         /* Decide if an error state is necessary.
1411          *  1. There is an error transition
1412          *  2. There is a gap in the transitions
1413          *  3. The longest match operator requires it. */
1414         if ( lmRequiresErrorState || sectionGraph->hasErrorTrans() )
1415                 sectionGraph->errState = sectionGraph->addState();
1416
1417         /* State numbers need to be assigned such that all final states have a
1418          * larger state id number than all non-final states. This enables the
1419          * first_final mechanism to function correctly. We also want states to be
1420          * ordered in a predictable fashion. So we first apply a depth-first
1421          * search, then do a stable sort by final state status, then assign
1422          * numbers. */
1423
1424         sectionGraph->depthFirstOrdering();
1425         sectionGraph->sortStatesByFinal();
1426         sectionGraph->setStateNumbers( 0 );
1427 }
1428
1429 void ParseData::generateXML( ostream &out )
1430 {
1431         beginProcessing();
1432
1433         /* Make the generator. */
1434         XMLCodeGen codeGen( sectionName, this, sectionGraph, out );
1435
1436         /* Write out with it. */
1437         codeGen.writeXML();
1438
1439         if ( printStatistics ) {
1440                 cerr << "fsm name  : " << sectionName << endl;
1441                 cerr << "num states: " << sectionGraph->stateList.length() << endl;
1442                 cerr << endl;
1443         }
1444 }
1445
1446 /* Send eof to all parsers. */
1447 void terminateAllParsers( )
1448 {
1449         /* FIXME: a proper token is needed here. Suppose we should use the
1450          * location of EOF in the last file that the parser was referenced in. */
1451         InputLoc loc;
1452         loc.fileName = "<EOF>";
1453         loc.line = 0;
1454         loc.col = 0;
1455         for ( ParserDict::Iter pdel = parserDict; pdel.lte(); pdel++ )
1456                 pdel->value->token( loc, _eof, 0, 0 );
1457 }
1458
1459 void writeLanguage( std::ostream &out )
1460 {
1461         out << " lang=\"";
1462         switch ( hostLang->lang ) {
1463                 case HostLang::C:    out << "C"; break;
1464                 case HostLang::D:    out << "D"; break;
1465                 case HostLang::Java: out << "Java"; break;
1466                 case HostLang::Ruby: out << "Ruby"; break;
1467                 case HostLang::CSharp: out << "C#"; break;
1468         }
1469         out << "\"";
1470
1471 }
1472
1473 void writeMachines( std::ostream &out, std::string hostData, char *inputFileName )
1474 {
1475         if ( machineSpec == 0 && machineName == 0 ) {
1476                 /* No machine spec or machine name given. Generate everything. */
1477                 for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1478                         ParseData *pd = parser->value->pd;
1479                         if ( pd->instanceList.length() > 0 )
1480                                 pd->prepareMachineGen( 0 );
1481                 }
1482
1483                 if ( gblErrorCount == 0 ) {
1484                         out << "<ragel version=\"" VERSION "\" filename=\"" << inputFileName << "\"";
1485                         writeLanguage( out );
1486                         out << ">\n";
1487                         for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1488                                 ParseData *pd = parser->value->pd;
1489                                 if ( pd->instanceList.length() > 0 )
1490                                         pd->generateXML( out );
1491                         }
1492                         out << hostData;
1493                         out << "</ragel>\n";
1494                 }
1495         }
1496         else if ( parserDict.length() > 0 ) {
1497                 /* There is either a machine spec or machine name given. */
1498                 ParseData *parseData = 0;
1499                 GraphDictEl *graphDictEl = 0;
1500
1501                 /* Traverse the sections, break out when we find a section/machine
1502                  * that matches the one specified. */
1503                 for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1504                         ParseData *checkPd = parser->value->pd;
1505                         if ( machineSpec == 0 || strcmp( checkPd->sectionName, machineSpec ) == 0 ) {
1506                                 GraphDictEl *checkGdEl = 0;
1507                                 if ( machineName == 0 || (checkGdEl =
1508                                                 checkPd->graphDict.find( machineName )) != 0 )
1509                                 {
1510                                         /* Have a machine spec and/or machine name that matches
1511                                          * the -M/-S options. */
1512                                         parseData = checkPd;
1513                                         graphDictEl = checkGdEl;
1514                                         break;
1515                                 }
1516                         }
1517                 }
1518
1519                 if ( parseData == 0 )
1520                         error() << "could not locate machine specified with -S and/or -M" << endl;
1521                 else {
1522                         /* Section/Machine to emit was found. Prepare and emit it. */
1523                         parseData->prepareMachineGen( graphDictEl );
1524                         if ( gblErrorCount == 0 ) {
1525                                 out << "<ragel version=\"" VERSION "\" filename=\"" << inputFileName << "\"";
1526                                 writeLanguage( out );
1527                                 out << ">\n";
1528                                 parseData->generateXML( out );
1529                                 out << hostData;
1530                                 out << "</ragel>\n";
1531                         }
1532                 }
1533         }
1534 }