ragel/parsedata.cpp

   1 /*
   2  *  Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
   3  */
   4
   5 /*  This file is part of Ragel.
   6  *
   7  *  Ragel is free software; you can redistribute it and/or modify
   8  *  it under the terms of the GNU General Public License as published by
   9  *  the Free Software Foundation; either version 2 of the License, or
  10  *  (at your option) any later version.
  11  *
  12  *  Ragel is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  *  GNU General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU General Public License
  18  *  along with Ragel; if not, write to the Free Software
  19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  */
  21
  22 #include <iostream>
  23 #include <iomanip>
  24 #include <errno.h>
  25 #include <stdlib.h>
  26 #include <limits.h>
  27
  28 #include "ragel.h"
  29 #include "rlparse.h"
  30 #include "parsedata.h"
  31 #include "parsetree.h"
  32 #include "mergesort.h"
  33 #include "xmlcodegen.h"
  34
  35 using namespace std;
  36
  37 char mainMachine[] = "main";
  38
  39 void Token::set( char *str, int len )
  40 {
  41         length = len;
  42         data = new char[len+1];
  43         memcpy( data, str, len );
  44         data[len] = 0;
  45 }
  46
  47 void Token::append( const Token &other )
  48 {
  49         int newLength = length + other.length;
  50         char *newString = new char[newLength+1];
  51         memcpy( newString, data, length );
  52         memcpy( newString + length, other.data, other.length );
  53         newString[newLength] = 0;
  54         data = newString;
  55         length = newLength;
  56 }
  57
  58 /* Perform minimization after an operation according
  59  * to the command line args. */
  60 void afterOpMinimize( FsmAp *fsm, bool lastInSeq )
  61 {
  62         /* Switch on the prefered minimization algorithm. */
  63         if ( minimizeOpt == MinimizeEveryOp || minimizeOpt == MinimizeMostOps && lastInSeq ) {
  64                 /* First clean up the graph. FsmAp operations may leave these
  65                  * lying around. There should be no dead end states. The subtract
  66                  * intersection operators are the only places where they may be
  67                  * created and those operators clean them up. */
  68                 fsm->removeUnreachableStates();
  69
  70                 switch ( minimizeLevel ) {
  71                         case MinimizeApprox:
  72                                 fsm->minimizeApproximate();
  73                                 break;
  74                         case MinimizePartition1:
  75                                 fsm->minimizePartition1();
  76                                 break;
  77                         case MinimizePartition2:
  78                                 fsm->minimizePartition2();
  79                                 break;
  80                         case MinimizeStable:
  81                                 fsm->minimizeStable();
  82                                 break;
  83                 }
  84         }
  85 }
  86
  87 /* Count the transitions in the fsm by walking the state list. */
  88 int countTransitions( FsmAp *fsm )
  89 {
  90         int numTrans = 0;
  91         StateAp *state = fsm->stateList.head;
  92         while ( state != 0 ) {
  93                 numTrans += state->outList.length();
  94                 state = state->next;
  95         }
  96         return numTrans;
  97 }
  98
  99 Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd )
 100 {
 101         /* Reset errno so we can check for overflow or underflow. In the event of
 102          * an error, sets the return val to the upper or lower bound being tested
 103          * against. */
 104         errno = 0;
 105         unsigned int size = keyOps->alphType->size;
 106         bool unusedBits = size < sizeof(unsigned long);
 107
 108         unsigned long ul = strtoul( str, 0, 16 );
 109
 110         if ( errno == ERANGE || unusedBits && ul >> (size * 8) ) {
 111                 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
 112                 ul = 1 << (size * 8);
 113         }
 114
 115         if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) )
 116                 ul |= (0xffffffff >> (size*8 ) ) << (size*8);
 117
 118         return Key( (long)ul );
 119 }
 120
 121 Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd )
 122 {
 123         /* Convert the number to a decimal. First reset errno so we can check
 124          * for overflow or underflow. */
 125         errno = 0;
 126         long long minVal = keyOps->alphType->minVal;
 127         long long maxVal = keyOps->alphType->maxVal;
 128
 129         long long ll = strtoll( str, 0, 10 );
 130
 131         /* Check for underflow. */
 132         if ( errno == ERANGE && ll < 0 || ll < minVal) {
 133                 error(loc) << "literal " << str << " underflows the alphabet type" << endl;
 134                 ll = minVal;
 135         }
 136         /* Check for overflow. */
 137         else if ( errno == ERANGE && ll > 0 || ll > maxVal ) {
 138                 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
 139                 ll = maxVal;
 140         }
 141
 142         if ( keyOps->alphType->isSigned )
 143                 return Key( (long)ll );
 144         else
 145                 return Key( (unsigned long)ll );
 146 }
 147
 148 /* Make an fsm key in int format (what the fsm graph uses) from an alphabet
 149  * number returned by the parser. Validates that the number doesn't overflow
 150  * the alphabet type. */
 151 Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd )
 152 {
 153         /* Switch on hex/decimal format. */
 154         if ( str[0] == '0' && str[1] == 'x' )
 155                 return makeFsmKeyHex( str, loc, pd );
 156         else
 157                 return makeFsmKeyDec( str, loc, pd );
 158 }
 159
 160 /* Make an fsm int format (what the fsm graph uses) from a single character.
 161  * Performs proper conversion depending on signed/unsigned property of the
 162  * alphabet. */
 163 Key makeFsmKeyChar( char c, ParseData *pd )
 164 {
 165         if ( keyOps->isSigned ) {
 166                 /* Copy from a char type. */
 167                 return Key( c );
 168         }
 169         else {
 170                 /* Copy from an unsigned byte type. */
 171                 return Key( (unsigned char)c );
 172         }
 173 }
 174
 175 /* Make an fsm key array in int format (what the fsm graph uses) from a string
 176  * of characters. Performs proper conversion depending on signed/unsigned
 177  * property of the alphabet. */
 178 void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd )
 179 {
 180         if ( keyOps->isSigned ) {
 181                 /* Copy from a char star type. */
 182                 char *src = data;
 183                 for ( int i = 0; i < len; i++ )
 184                         result[i] = Key(src[i]);
 185         }
 186         else {
 187                 /* Copy from an unsigned byte ptr type. */
 188                 unsigned char *src = (unsigned char*) data;
 189                 for ( int i = 0; i < len; i++ )
 190                         result[i] = Key(src[i]);
 191         }
 192 }
 193
 194 /* Like makeFsmKeyArray except the result has only unique keys. They ordering
 195  * will be changed. */
 196 void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
 197                 bool caseInsensitive, ParseData *pd )
 198 {
 199         /* Use a transitions list for getting unique keys. */
 200         if ( keyOps->isSigned ) {
 201                 /* Copy from a char star type. */
 202                 char *src = data;
 203                 for ( int si = 0; si < len; si++ ) {
 204                         Key key( src[si] );
 205                         result.insert( key );
 206                         if ( caseInsensitive ) {
 207                                 if ( key.isLower() )
 208                                         result.insert( key.toUpper() );
 209                                 else if ( key.isUpper() )
 210                                         result.insert( key.toLower() );
 211                         }
 212                 }
 213         }
 214         else {
 215                 /* Copy from an unsigned byte ptr type. */
 216                 unsigned char *src = (unsigned char*) data;
 217                 for ( int si = 0; si < len; si++ ) {
 218                         Key key( src[si] );
 219                         result.insert( key );
 220                         if ( caseInsensitive ) {
 221                                 if ( key.isLower() )
 222                                         result.insert( key.toUpper() );
 223                                 else if ( key.isUpper() )
 224                                         result.insert( key.toLower() );
 225                         }
 226                 }
 227         }
 228 }
 229
 230 FsmAp *dotFsm( ParseData *pd )
 231 {
 232         FsmAp *retFsm = new FsmAp();
 233         retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey );
 234         return retFsm;
 235 }
 236
 237 FsmAp *dotStarFsm( ParseData *pd )
 238 {
 239         FsmAp *retFsm = new FsmAp();
 240         retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey );
 241         return retFsm;
 242 }
 243
 244 /* Make a builtin type. Depends on the signed nature of the alphabet type. */
 245 FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd )
 246 {
 247         /* FsmAp created to return. */
 248         FsmAp *retFsm = 0;
 249         bool isSigned = keyOps->isSigned;
 250
 251         switch ( builtin ) {
 252         case BT_Any: {
 253                 /* All characters. */
 254                 retFsm = dotFsm( pd );
 255                 break;
 256         }
 257         case BT_Ascii: {
 258                 /* Ascii characters 0 to 127. */
 259                 retFsm = new FsmAp();
 260                 retFsm->rangeFsm( 0, 127 );
 261                 break;
 262         }
 263         case BT_Extend: {
 264                 /* Ascii extended characters. This is the full byte range. Dependent
 265                  * on signed, vs no signed. If the alphabet is one byte then just use
 266                  * dot fsm. */
 267                 if ( isSigned ) {
 268                         retFsm = new FsmAp();
 269                         retFsm->rangeFsm( -128, 127 );
 270                 }
 271                 else {
 272                         retFsm = new FsmAp();
 273                         retFsm->rangeFsm( 0, 255 );
 274                 }
 275                 break;
 276         }
 277         case BT_Alpha: {
 278                 /* Alpha [A-Za-z]. */
 279                 FsmAp *upper = new FsmAp(), *lower = new FsmAp();
 280                 upper->rangeFsm( 'A', 'Z' );
 281                 lower->rangeFsm( 'a', 'z' );
 282                 upper->unionOp( lower );
 283                 upper->minimizePartition2();
 284                 retFsm = upper;
 285                 break;
 286         }
 287         case BT_Digit: {
 288                 /* Digits [0-9]. */
 289                 retFsm = new FsmAp();
 290                 retFsm->rangeFsm( '0', '9' );
 291                 break;
 292         }
 293         case BT_Alnum: {
 294                 /* Alpha numerics [0-9A-Za-z]. */
 295                 FsmAp *digit = new FsmAp(), *lower = new FsmAp();
 296                 FsmAp *upper = new FsmAp();
 297                 digit->rangeFsm( '0', '9' );
 298                 upper->rangeFsm( 'A', 'Z' );
 299                 lower->rangeFsm( 'a', 'z' );
 300                 digit->unionOp( upper );
 301                 digit->unionOp( lower );
 302                 digit->minimizePartition2();
 303                 retFsm = digit;
 304                 break;
 305         }
 306         case BT_Lower: {
 307                 /* Lower case characters. */
 308                 retFsm = new FsmAp();
 309                 retFsm->rangeFsm( 'a', 'z' );
 310                 break;
 311         }
 312         case BT_Upper: {
 313                 /* Upper case characters. */
 314                 retFsm = new FsmAp();
 315                 retFsm->rangeFsm( 'A', 'Z' );
 316                 break;
 317         }
 318         case BT_Cntrl: {
 319                 /* Control characters. */
 320                 FsmAp *cntrl = new FsmAp();
 321                 FsmAp *highChar = new FsmAp();
 322                 cntrl->rangeFsm( 0, 31 );
 323                 highChar->concatFsm( 127 );
 324                 cntrl->unionOp( highChar );
 325                 cntrl->minimizePartition2();
 326                 retFsm = cntrl;
 327                 break;
 328         }
 329         case BT_Graph: {
 330                 /* Graphical ascii characters [!-~]. */
 331                 retFsm = new FsmAp();
 332                 retFsm->rangeFsm( '!', '~' );
 333                 break;
 334         }
 335         case BT_Print: {
 336                 /* Printable characters. Same as graph except includes space. */
 337                 retFsm = new FsmAp();
 338                 retFsm->rangeFsm( ' ', '~' );
 339                 break;
 340         }
 341         case BT_Punct: {
 342                 /* Punctuation. */
 343                 FsmAp *range1 = new FsmAp();
 344                 FsmAp *range2 = new FsmAp();
 345                 FsmAp *range3 = new FsmAp();
 346                 FsmAp *range4 = new FsmAp();
 347                 range1->rangeFsm( '!', '/' );
 348                 range2->rangeFsm( ':', '@' );
 349                 range3->rangeFsm( '[', '`' );
 350                 range4->rangeFsm( '{', '~' );
 351                 range1->unionOp( range2 );
 352                 range1->unionOp( range3 );
 353                 range1->unionOp( range4 );
 354                 range1->minimizePartition2();
 355                 retFsm = range1;
 356                 break;
 357         }
 358         case BT_Space: {
 359                 /* Whitespace: [\t\v\f\n\r ]. */
 360                 FsmAp *cntrl = new FsmAp();
 361                 FsmAp *space = new FsmAp();
 362                 cntrl->rangeFsm( '\t', '\r' );
 363                 space->concatFsm( ' ' );
 364                 cntrl->unionOp( space );
 365                 cntrl->minimizePartition2();
 366                 retFsm = cntrl;
 367                 break;
 368         }
 369         case BT_Xdigit: {
 370                 /* Hex digits [0-9A-Fa-f]. */
 371                 FsmAp *digit = new FsmAp();
 372                 FsmAp *upper = new FsmAp();
 373                 FsmAp *lower = new FsmAp();
 374                 digit->rangeFsm( '0', '9' );
 375                 upper->rangeFsm( 'A', 'F' );
 376                 lower->rangeFsm( 'a', 'f' );
 377                 digit->unionOp( upper );
 378                 digit->unionOp( lower );
 379                 digit->minimizePartition2();
 380                 retFsm = digit;
 381                 break;
 382         }
 383         case BT_Lambda: {
 384                 retFsm = new FsmAp();
 385                 retFsm->lambdaFsm();
 386                 break;
 387         }
 388         case BT_Empty: {
 389                 retFsm = new FsmAp();
 390                 retFsm->emptyFsm();
 391                 break;
 392         }}
 393
 394         return retFsm;
 395 }
 396
 397 /* Check if this name inst or any name inst below is referenced. */
 398 bool NameInst::anyRefsRec()
 399 {
 400         if ( numRefs > 0 )
 401                 return true;
 402
 403         /* Recurse on children until true. */
 404         for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) {
 405                 if ( (*ch)->anyRefsRec() )
 406                         return true;
 407         }
 408
 409         return false;
 410 }
 411
 412 /*
 413  * ParseData
 414  */
 415
 416 /* Initialize the structure that will collect info during the parse of a
 417  * machine. */
 418 ParseData::ParseData( char *fileName, char *sectionName,
 419                 const InputLoc &sectionLoc )
 420 :
 421         sectionGraph(0),
 422         generatingSectionSubset(false),
 423         nextPriorKey(0),
 424         /* 0 is reserved for global error actions. */
 425         nextLocalErrKey(1),
 426         nextNameId(0),
 427         nextCondId(0),
 428         alphTypeSet(false),
 429         getKeyExpr(0),
 430         accessExpr(0),
 431         curStateExpr(0),
 432         lowerNum(0),
 433         upperNum(0),
 434         fileName(fileName),
 435         sectionName(sectionName),
 436         sectionLoc(sectionLoc),
 437         errorCount(0),
 438         curActionOrd(0),
 439         curPriorOrd(0),
 440         rootName(0),
 441         exportsRootName(0),
 442         nextEpsilonResolvedLink(0),
 443         nextLongestMatchId(1),
 444         lmRequiresErrorState(false)
 445 {
 446         /* Initialize the dictionary of graphs. This is our symbol table. The
 447          * initialization needs to be done on construction which happens at the
 448          * beginning of a machine spec so any assignment operators can reference
 449          * the builtins. */
 450         initGraphDict();
 451 }
 452
 453 /* Clean up the data collected during a parse. */
 454 ParseData::~ParseData()
 455 {
 456         /* Delete all the nodes in the action list. Will cause all the
 457          * string data that represents the actions to be deallocated. */
 458         actionList.empty();
 459 }
 460
 461 /* Make a name id in the current name instantiation scope if it is not
 462  * already there. */
 463 NameInst *ParseData::addNameInst( const InputLoc &loc, char *data, bool isLabel )
 464 {
 465         /* Create the name instantitaion object and insert it. */
 466         NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel );
 467         curNameInst->childVect.append( newNameInst );
 468         if ( data != 0 )
 469                 curNameInst->children.insertMulti( data, newNameInst );
 470         return newNameInst;
 471 }
 472
 473 void ParseData::initNameWalk()
 474 {
 475         curNameInst = rootName;
 476         curNameChild = 0;
 477 }
 478
 479 void ParseData::initExportsNameWalk()
 480 {
 481         curNameInst = exportsRootName;
 482         curNameChild = 0;
 483 }
 484
 485 /* Goes into the next child scope. The number of the child is already set up.
 486  * We need this for the syncronous name tree and parse tree walk to work
 487  * properly. It is reset on entry into a scope and advanced on poping of a
 488  * scope. A call to enterNameScope should be accompanied by a corresponding
 489  * popNameScope. */
 490 NameFrame ParseData::enterNameScope( bool isLocal, int numScopes )
 491 {
 492         /* Save off the current data. */
 493         NameFrame retFrame;
 494         retFrame.prevNameInst = curNameInst;
 495         retFrame.prevNameChild = curNameChild;
 496         retFrame.prevLocalScope = localNameScope;
 497
 498         /* Enter into the new name scope. */
 499         for ( int i = 0; i < numScopes; i++ ) {
 500                 curNameInst = curNameInst->childVect[curNameChild];
 501                 curNameChild = 0;
 502         }
 503
 504         if ( isLocal )
 505                 localNameScope = curNameInst;
 506
 507         return retFrame;
 508 }
 509
 510 /* Return from a child scope to a parent. The parent info must be specified as
 511  * an argument and is obtained from the corresponding call to enterNameScope.
 512  * */
 513 void ParseData::popNameScope( const NameFrame &frame )
 514 {
 515         /* Pop the name scope. */
 516         curNameInst = frame.prevNameInst;
 517         curNameChild = frame.prevNameChild+1;
 518         localNameScope = frame.prevLocalScope;
 519 }
 520
 521 void ParseData::resetNameScope( const NameFrame &frame )
 522 {
 523         /* Pop the name scope. */
 524         curNameInst = frame.prevNameInst;
 525         curNameChild = frame.prevNameChild;
 526         localNameScope = frame.prevLocalScope;
 527 }
 528
 529
 530 void ParseData::unsetObsoleteEntries( FsmAp *graph )
 531 {
 532         /* Loop the reference names and increment the usage. Names that are no
 533          * longer needed will be unset in graph. */
 534         for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) {
 535                 /* Get the name. */
 536                 NameInst *name = *ref;
 537                 name->numUses += 1;
 538
 539                 /* If the name is no longer needed unset its corresponding entry. */
 540                 if ( name->numUses == name->numRefs ) {
 541                         assert( graph->entryPoints.find( name->id ) != 0 );
 542                         graph->unsetEntry( name->id );
 543                         assert( graph->entryPoints.find( name->id ) == 0 );
 544                 }
 545         }
 546 }
 547
 548 NameSet ParseData::resolvePart( NameInst *refFrom, char *data, bool recLabelsOnly )
 549 {
 550         /* Queue needed for breadth-first search, load it with the start node. */
 551         NameInstList nameQueue;
 552         nameQueue.append( refFrom );
 553
 554         NameSet result;
 555         while ( nameQueue.length() > 0 ) {
 556                 /* Pull the next from location off the queue. */
 557                 NameInst *from = nameQueue.detachFirst();
 558
 559                 /* Look for the name. */
 560                 NameMapEl *low, *high;
 561                 if ( from->children.findMulti( data, low, high ) ) {
 562                         /* Record all instances of the name. */
 563                         for ( ; low <= high; low++ )
 564                                 result.insert( low->value );
 565                 }
 566
 567                 /* Name not there, do breadth-first operation of appending all
 568                  * childrent to the processing queue. */
 569                 for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) {
 570                         if ( !recLabelsOnly || (*name)->isLabel )
 571                                 nameQueue.append( *name );
 572                 }
 573         }
 574
 575         /* Queue exhausted and name never found. */
 576         return result;
 577 }
 578
 579 void ParseData::resolveFrom( NameSet &result, NameInst *refFrom,
 580                 const NameRef &nameRef, int namePos )
 581 {
 582         /* Look for the name in the owning scope of the factor with aug. */
 583         NameSet partResult = resolvePart( refFrom, nameRef[namePos], false );
 584
 585         /* If there are more parts to the name then continue on. */
 586         if ( ++namePos < nameRef.length() ) {
 587                 /* There are more components to the name, search using all the part
 588                  * results as the base. */
 589                 for ( NameSet::Iter name = partResult; name.lte(); name++ )
 590                         resolveFrom( result, *name, nameRef, namePos );
 591         }
 592         else {
 593                 /* This is the last component, append the part results to the final
 594                  * results. */
 595                 result.insert( partResult );
 596         }
 597 }
 598
 599 /* Write out a name reference. */
 600 ostream &operator<<( ostream &out, const NameRef &nameRef )
 601 {
 602         int pos = 0;
 603         if ( nameRef[pos] == 0 ) {
 604                 out << "::";
 605                 pos += 1;
 606         }
 607         out << nameRef[pos++];
 608         for ( ; pos < nameRef.length(); pos++ )
 609                 out << "::" << nameRef[pos];
 610         return out;
 611 }
 612
 613 ostream &operator<<( ostream &out, const NameInst &nameInst )
 614 {
 615         /* Count the number fully qualified name parts. */
 616         int numParents = 0;
 617         NameInst *curParent = nameInst.parent;
 618         while ( curParent != 0 ) {
 619                 numParents += 1;
 620                 curParent = curParent->parent;
 621         }
 622
 623         /* Make an array and fill it in. */
 624         curParent = nameInst.parent;
 625         NameInst **parents = new NameInst*[numParents];
 626         for ( int p = numParents-1; p >= 0; p-- ) {
 627                 parents[p] = curParent;
 628                 curParent = curParent->parent;
 629         }
 630
 631         /* Write the parents out, skip the root. */
 632         for ( int p = 1; p < numParents; p++ )
 633                 out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" );
 634
 635         /* Write the name and cleanup. */
 636         out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" );
 637         delete[] parents;
 638         return out;
 639 }
 640
 641 struct CmpNameInstLoc
 642 {
 643         static int compare( const NameInst *ni1, const NameInst *ni2 )
 644         {
 645                 if ( ni1->loc.line < ni2->loc.line )
 646                         return -1;
 647                 else if ( ni1->loc.line > ni2->loc.line )
 648                         return 1;
 649                 else if ( ni1->loc.col < ni2->loc.col )
 650                         return -1;
 651                 else if ( ni1->loc.col > ni2->loc.col )
 652                         return 1;
 653                 return 0;
 654         }
 655 };
 656
 657 void errorStateLabels( const NameSet &resolved )
 658 {
 659         MergeSort<NameInst*, CmpNameInstLoc> mergeSort;
 660         mergeSort.sort( resolved.data, resolved.length() );
 661         for ( NameSet::Iter res = resolved; res.lte(); res++ )
 662                 error((*res)->loc) << "  -> " << **res << endl;
 663 }
 664
 665
 666 NameInst *ParseData::resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action )
 667 {
 668         NameInst *nameInst = 0;
 669
 670         /* Do the local search if the name is not strictly a root level name
 671          * search. */
 672         if ( nameRef[0] != 0 ) {
 673                 /* If the action is referenced, resolve all of them. */
 674                 if ( action != 0 && action->actionRefs.length() > 0 ) {
 675                         /* Look for the name in all referencing scopes. */
 676                         NameSet resolved;
 677                         for ( ActionRefs::Iter actRef = action->actionRefs; actRef.lte(); actRef++ )
 678                                 resolveFrom( resolved, *actRef, nameRef, 0 );
 679
 680                         if ( resolved.length() > 0 ) {
 681                                 /* Take the first one. */
 682                                 nameInst = resolved[0];
 683                                 if ( resolved.length() > 1 ) {
 684                                         /* Complain about the multiple references. */
 685                                         error(loc) << "state reference " << nameRef <<
 686                                                         " resolves to multiple entry points" << endl;
 687                                         errorStateLabels( resolved );
 688                                 }
 689                         }
 690                 }
 691         }
 692
 693         /* If not found in the local scope, look in global. */
 694         if ( nameInst == 0 ) {
 695                 NameSet resolved;
 696                 int fromPos = nameRef[0] != 0 ? 0 : 1;
 697                 resolveFrom( resolved, rootName, nameRef, fromPos );
 698
 699                 if ( resolved.length() > 0 ) {
 700                         /* Take the first. */
 701                         nameInst = resolved[0];
 702                         if ( resolved.length() > 1 ) {
 703                                 /* Complain about the multiple references. */
 704                                 error(loc) << "state reference " << nameRef <<
 705                                                 " resolves to multiple entry points" << endl;
 706                                 errorStateLabels( resolved );
 707                         }
 708                 }
 709         }
 710
 711         if ( nameInst == 0 ) {
 712                 /* If not found then complain. */
 713                 error(loc) << "could not resolve state reference " << nameRef << endl;
 714         }
 715         return nameInst;
 716 }
 717
 718 void ParseData::resolveNameRefs( InlineList *inlineList, Action *action )
 719 {
 720         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
 721                 switch ( item->type ) {
 722                         case InlineItem::Entry: case InlineItem::Goto:
 723                         case InlineItem::Call: case InlineItem::Next: {
 724                                 /* Resolve, pass action for local search. */
 725                                 NameInst *target = resolveStateRef( *item->nameRef, item->loc, action );
 726
 727                                 /* Check if the target goes into a longest match. */
 728                                 NameInst *search = target->parent;
 729                                 while ( search != 0 ) {
 730                                         if ( search->isLongestMatch ) {
 731                                                 error(item->loc) << "cannot enter inside a longest "
 732                                                                 "match construction as an entry point" << endl;
 733                                                 break;
 734                                         }
 735                                         search = search->parent;
 736                                 }
 737
 738                                 /* Note the reference in the name. This will cause the entry
 739                                  * point to survive to the end of the graph generating walk. */
 740                                 if ( target != 0 )
 741                                         target->numRefs += 1;
 742                                 item->nameTarg = target;
 743                                 break;
 744                         }
 745                         default:
 746                                 break;
 747                 }
 748
 749                 /* Some of the item types may have children. */
 750                 if ( item->children != 0 )
 751                         resolveNameRefs( item->children, action );
 752         }
 753 }
 754
 755 /* Resolve references to labels in actions. */
 756 void ParseData::resolveActionNameRefs()
 757 {
 758         for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
 759                 /* Only care about the actions that are referenced. */
 760                 if ( act->actionRefs.length() > 0 )
 761                         resolveNameRefs( act->inlineList, act );
 762         }
 763 }
 764
 765 /* Walk a name tree starting at from and fill the name index. */
 766 void ParseData::fillNameIndex( NameInst *from )
 767 {
 768         /* Fill the value for from in the name index. */
 769         nameIndex[from->id] = from;
 770
 771         /* Recurse on the implicit final state and then all children. */
 772         if ( from->final != 0 )
 773                 fillNameIndex( from->final );
 774         for ( NameVect::Iter name = from->childVect; name.lte(); name++ )
 775                 fillNameIndex( *name );
 776 }
 777
 778 void ParseData::makeRootNames()
 779 {
 780         /* Create the root name. */
 781         rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
 782         exportsRootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
 783 }
 784
 785 /* Build the name tree and supporting data structures. */
 786 void ParseData::makeNameTree( GraphDictEl *dictEl )
 787 {
 788         /* Set up curNameInst for the walk. */
 789         initNameWalk();
 790
 791         if ( dictEl != 0 ) {
 792                 /* A start location has been specified. */
 793                 dictEl->value->makeNameTree( dictEl->loc, this );
 794         }
 795         else {
 796                 /* First make the name tree. */
 797                 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
 798                         /* Recurse on the instance. */
 799                         glel->value->makeNameTree( glel->loc, this );
 800                 }
 801         }
 802
 803         /* The number of nodes in the tree can now be given by nextNameId */
 804         nameIndex = new NameInst*[nextNameId];
 805         memset( nameIndex, 0, sizeof(NameInst*)*nextNameId );
 806         fillNameIndex( rootName );
 807         fillNameIndex( exportsRootName );
 808 }
 809
 810
 811 void ParseData::createBuiltin( char *name, BuiltinMachine builtin )
 812 {
 813         Expression *expression = new Expression( builtin );
 814         Join *join = new Join( expression );
 815         JoinOrLm *joinOrLm = new JoinOrLm( join );
 816         VarDef *varDef = new VarDef( name, joinOrLm );
 817         GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
 818         graphDict.insert( graphDictEl );
 819 }
 820
 821 /* Initialize the graph dict with builtin types. */
 822 void ParseData::initGraphDict( )
 823 {
 824         createBuiltin( "any", BT_Any );
 825         createBuiltin( "ascii", BT_Ascii );
 826         createBuiltin( "extend", BT_Extend );
 827         createBuiltin( "alpha", BT_Alpha );
 828         createBuiltin( "digit", BT_Digit );
 829         createBuiltin( "alnum", BT_Alnum );
 830         createBuiltin( "lower", BT_Lower );
 831         createBuiltin( "upper", BT_Upper );
 832         createBuiltin( "cntrl", BT_Cntrl );
 833         createBuiltin( "graph", BT_Graph );
 834         createBuiltin( "print", BT_Print );
 835         createBuiltin( "punct", BT_Punct );
 836         createBuiltin( "space", BT_Space );
 837         createBuiltin( "xdigit", BT_Xdigit );
 838         createBuiltin( "null", BT_Lambda );
 839         createBuiltin( "zlen", BT_Lambda );
 840         createBuiltin( "empty", BT_Empty );
 841 }
 842
 843 /* Set the alphabet type. If the types are not valid returns false. */
 844 bool ParseData::setAlphType( char *s1, char *s2 )
 845 {
 846         bool valid = false;
 847         for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
 848                 if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 &&
 849                                 hostLang->hostTypes[i].data2 != 0 &&
 850                                 strcmp( s2, hostLang->hostTypes[i].data2 ) == 0 )
 851                 {
 852                         valid = true;
 853                         userAlphType = hostLang->hostTypes + i;
 854                         break;
 855                 }
 856         }
 857
 858         alphTypeSet = true;
 859         return valid;
 860 }
 861
 862 /* Set the alphabet type. If the types are not valid returns false. */
 863 bool ParseData::setAlphType( char *s1 )
 864 {
 865         bool valid = false;
 866         for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
 867                 if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 &&
 868                                 hostLang->hostTypes[i].data2 == 0 )
 869                 {
 870                         valid = true;
 871                         userAlphType = hostLang->hostTypes + i;
 872                         break;
 873                 }
 874         }
 875
 876         alphTypeSet = true;
 877         return valid;
 878 }
 879
 880 /* Initialize the key operators object that will be referenced by all fsms
 881  * created. */
 882 void ParseData::initKeyOps( )
 883 {
 884         /* Signedness and bounds. */
 885         HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType;
 886         thisKeyOps.setAlphType( alphType );
 887
 888         if ( lowerNum != 0 ) {
 889                 /* If ranges are given then interpret the alphabet type. */
 890                 thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this );
 891                 thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
 892         }
 893
 894         thisCondData.nextCondKey = thisKeyOps.maxKey;
 895         thisCondData.nextCondKey.increment();
 896 }
 897
 898 void ParseData::printNameInst( NameInst *nameInst, int level )
 899 {
 900         for ( int i = 0; i < level; i++ )
 901                 cerr << "  ";
 902         cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") <<
 903                         "  id: " << nameInst->id <<
 904                         "  refs: " << nameInst->numRefs <<
 905                         "  uses: " << nameInst->numUses << endl;
 906         for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ )
 907                 printNameInst( *name, level+1 );
 908 }
 909
 910 /* Remove duplicates of unique actions from an action table. */
 911 void ParseData::removeDups( ActionTable &table )
 912 {
 913         /* Scan through the table looking for unique actions to
 914          * remove duplicates of. */
 915         for ( int i = 0; i < table.length(); i++ ) {
 916                 /* Remove any duplicates ahead of i. */
 917                 for ( int r = i+1; r < table.length(); ) {
 918                         if ( table[r].value == table[i].value )
 919                                 table.vremove(r);
 920                         else
 921                                 r += 1;
 922                 }
 923         }
 924 }
 925
 926 /* Remove duplicates from action lists. This operates only on transition and
 927  * eof action lists and so should be called once all actions have been
 928  * transfered to their final resting place. */
 929 void ParseData::removeActionDups( FsmAp *graph )
 930 {
 931         /* Loop all states. */
 932         for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
 933                 /* Loop all transitions. */
 934                 for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
 935                         removeDups( trans->actionTable );
 936                 removeDups( state->toStateActionTable );
 937                 removeDups( state->fromStateActionTable );
 938                 removeDups( state->eofActionTable );
 939         }
 940 }
 941
 942 Action *ParseData::newAction( char *name, InlineList *inlineList )
 943 {
 944         InputLoc loc;
 945         loc.line = 1;
 946         loc.col = 1;
 947
 948         Action *action = new Action( loc, name, inlineList, nextCondId++ );
 949         action->actionRefs.append( rootName );
 950         actionList.append( action );
 951         return action;
 952 }
 953
 954 void ParseData::initLongestMatchData()
 955 {
 956         if ( lmList.length() > 0 ) {
 957                 /* The initTokStart action resets the token start. */
 958                 InlineList *il1 = new InlineList;
 959                 il1->append( new InlineItem( InputLoc(), InlineItem::LmInitTokStart ) );
 960                 initTokStart = newAction( "initts", il1 );
 961                 initTokStart->isLmAction = true;
 962
 963                 /* The initActId action gives act a default value. */
 964                 InlineList *il4 = new InlineList;
 965                 il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) );
 966                 initActId = newAction( "initact", il4 );
 967                 initActId->isLmAction = true;
 968
 969                 /* The setTokStart action sets tokstart. */
 970                 InlineList *il5 = new InlineList;
 971                 il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) );
 972                 setTokStart = newAction( "tokstart", il5 );
 973                 setTokStart->isLmAction = true;
 974
 975                 /* The setTokEnd action sets tokend. */
 976                 InlineList *il3 = new InlineList;
 977                 il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) );
 978                 setTokEnd = newAction( "tokend", il3 );
 979                 setTokEnd->isLmAction = true;
 980
 981                 /* The action will also need an ordering: ahead of all user action
 982                  * embeddings. */
 983                 initTokStartOrd = curActionOrd++;
 984                 initActIdOrd = curActionOrd++;
 985                 setTokStartOrd = curActionOrd++;
 986                 setTokEndOrd = curActionOrd++;
 987         }
 988 }
 989
 990 /* After building the graph, do some extra processing to ensure the runtime
 991  * data of the longest mactch operators is consistent. */
 992 void ParseData::setLongestMatchData( FsmAp *graph )
 993 {
 994         if ( lmList.length() > 0 ) {
 995                 /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry)
 996                  * init the tokstart. */
 997                 for ( EntryMap::Iter en = graph->entryPoints; en.lte(); en++ ) {
 998                         /* This is run after duplicates are removed, we must guard against
 999                          * inserting a duplicate. */
1000                         ActionTable &actionTable = en->value->toStateActionTable;
1001                         if ( ! actionTable.hasAction( initTokStart ) )
1002                                 actionTable.setAction( initTokStartOrd, initTokStart );
1003                 }
1004
1005                 /* Find the set of states that are the target of transitions with
1006                  * actions that have calls. These states will be targeted by fret
1007                  * statements. */
1008                 StateSet states;
1009                 for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
1010                         for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
1011                                 for ( ActionTable::Iter ati = trans->actionTable; ati.lte(); ati++ ) {
1012                                         if ( ati->value->anyCall && trans->toState != 0 )
1013                                                 states.insert( trans->toState );
1014                                 }
1015                         }
1016                 }
1017
1018
1019                 /* Init tokstart upon entering the above collected states. */
1020                 for ( StateSet::Iter ps = states; ps.lte(); ps++ ) {
1021                         /* This is run after duplicates are removed, we must guard against
1022                          * inserting a duplicate. */
1023                         ActionTable &actionTable = (*ps)->toStateActionTable;
1024                         if ( ! actionTable.hasAction( initTokStart ) )
1025                                 actionTable.setAction( initTokStartOrd, initTokStart );
1026                 }
1027         }
1028 }
1029
1030 /* Make the graph from a graph dict node. Does minimization and state sorting. */
1031 FsmAp *ParseData::makeInstance( GraphDictEl *gdNode )
1032 {
1033         /* Build the graph from a walk of the parse tree. */
1034         FsmAp *graph = gdNode->value->walk( this );
1035
1036         /* Resolve any labels that point to multiple states. Any labels that are
1037          * still around are referenced only by gotos and calls and they need to be
1038          * made into deterministic entry points. */
1039         graph->deterministicEntry();
1040
1041         /*
1042          * All state construction is now complete.
1043          */
1044
1045         /* Transfer global error actions. */
1046         for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
1047                 graph->transferErrorActions( state, 0 );
1048
1049         removeActionDups( graph );
1050
1051         /* Remove unreachable states. There should be no dead end states. The
1052          * subtract and intersection operators are the only places where they may
1053          * be created and those operators clean them up. */
1054         graph->removeUnreachableStates();
1055
1056         /* No more fsm operations are to be done. Action ordering numbers are
1057          * no longer of use and will just hinder minimization. Clear them. */
1058         graph->nullActionKeys();
1059
1060         /* Transition priorities are no longer of use. We can clear them
1061          * because they will just hinder minimization as well. Clear them. */
1062         graph->clearAllPriorities();
1063
1064         if ( minimizeOpt != MinimizeNone ) {
1065                 /* Minimize here even if we minimized at every op. Now that function
1066                  * keys have been cleared we may get a more minimal fsm. */
1067                 switch ( minimizeLevel ) {
1068                         case MinimizeApprox:
1069                                 graph->minimizeApproximate();
1070                                 break;
1071                         case MinimizeStable:
1072                                 graph->minimizeStable();
1073                                 break;
1074                         case MinimizePartition1:
1075                                 graph->minimizePartition1();
1076                                 break;
1077                         case MinimizePartition2:
1078                                 graph->minimizePartition2();
1079                                 break;
1080                 }
1081         }
1082
1083         graph->compressTransitions();
1084
1085         return graph;
1086 }
1087
1088 void ParseData::printNameTree()
1089 {
1090         /* Print the name instance map. */
1091         for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ )
1092                 printNameInst( *name, 0 );
1093
1094         cerr << "name index:" << endl;
1095         /* Show that the name index is correct. */
1096         for ( int ni = 0; ni < nextNameId; ni++ ) {
1097                 cerr << ni << ": ";
1098                 char *name = nameIndex[ni]->name;
1099                 cerr << ( name != 0 ? name : "<ANON>" ) << endl;
1100         }
1101 }
1102
1103 FsmAp *ParseData::makeSpecific( GraphDictEl *gdNode )
1104 {
1105         /* Build the name tree and supporting data structures. */
1106         makeNameTree( gdNode );
1107
1108         /* Resove name references from gdNode. */
1109         initNameWalk();
1110         gdNode->value->resolveNameRefs( this );
1111
1112         /* Do not resolve action references. Since we are not building the entire
1113          * graph there's a good chance that many name references will fail. This
1114          * is okay since generating part of the graph is usually only done when
1115          * inspecting the compiled machine. */
1116
1117         /* Same story for extern entry point references. */
1118
1119         /* Flag this case so that the XML code generator is aware that we haven't
1120          * looked up name references in actions. It can then avoid segfaulting. */
1121         generatingSectionSubset = true;
1122
1123         /* Just building the specified graph. */
1124         initNameWalk();
1125         FsmAp *mainGraph = makeInstance( gdNode );
1126
1127         return mainGraph;
1128 }
1129
1130 FsmAp *ParseData::makeAll()
1131 {
1132         /* Build the name tree and supporting data structures. */
1133         makeNameTree( 0 );
1134
1135         /* Resove name references in the tree. */
1136         initNameWalk();
1137         for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ )
1138                 glel->value->resolveNameRefs( this );
1139
1140         /* Resolve action code name references. */
1141         resolveActionNameRefs();
1142
1143         /* Force name references to the top level instantiations. */
1144         for ( NameVect::Iter inst = rootName->childVect; inst.lte(); inst++ )
1145                 (*inst)->numRefs += 1;
1146
1147         FsmAp *mainGraph = 0;
1148         FsmAp **graphs = new FsmAp*[instanceList.length()];
1149         int numOthers = 0;
1150
1151         /* Make all the instantiations, we know that main exists in this list. */
1152         initNameWalk();
1153         for ( GraphList::Iter glel = instanceList; glel.lte();  glel++ ) {
1154                 if ( strcmp( glel->key, mainMachine ) == 0 ) {
1155                         /* Main graph is always instantiated. */
1156                         mainGraph = makeInstance( glel );
1157                 }
1158                 else {
1159                         /* Instantiate and store in others array. */
1160                         graphs[numOthers++] = makeInstance( glel );
1161                 }
1162         }
1163
1164         if ( mainGraph == 0 )
1165                 mainGraph = graphs[--numOthers];
1166
1167         if ( numOthers > 0 ) {
1168                 /* Add all the other graphs into main. */
1169                 mainGraph->globOp( graphs, numOthers );
1170         }
1171
1172         delete[] graphs;
1173         return mainGraph;
1174 }
1175
1176 void ParseData::analyzeAction( Action *action, InlineList *inlineList )
1177 {
1178         /* FIXME: Actions used as conditions should be very constrained. */
1179         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1180                 if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
1181                         action->anyCall = true;
1182
1183                 /* Need to recurse into longest match items. */
1184                 if ( item->type == InlineItem::LmSwitch ) {
1185                         LongestMatch *lm = item->longestMatch;
1186                         for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) {
1187                                 if ( lmi->action != 0 )
1188                                         analyzeAction( action, lmi->action->inlineList );
1189                         }
1190                 }
1191
1192                 if ( item->type == InlineItem::LmOnLast ||
1193                                 item->type == InlineItem::LmOnNext ||
1194                                 item->type == InlineItem::LmOnLagBehind )
1195                 {
1196                         LongestMatchPart *lmi = item->longestMatchPart;
1197                         if ( lmi->action != 0 )
1198                                 analyzeAction( action, lmi->action->inlineList );
1199                 }
1200
1201                 if ( item->children != 0 )
1202                         analyzeAction( action, item->children );
1203         }
1204 }
1205
1206
1207 /* Check actions for bad uses of fsm directives. We don't go inside longest
1208  * match items in actions created by ragel, since we just want the user
1209  * actions. */
1210 void ParseData::checkInlineList( Action *act, InlineList *inlineList )
1211 {
1212         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1213                 /* EOF checks. */
1214                 if ( act->numEofRefs > 0 ) {
1215                         switch ( item->type ) {
1216                         case InlineItem::PChar:
1217                                 error(item->loc) << "pointer to current element does not exist in "
1218                                                 "EOF action code" << endl;
1219                                 break;
1220                         case InlineItem::Char:
1221                                 error(item->loc) << "current element does not exist in "
1222                                                 "EOF action code" << endl;
1223                                 break;
1224                         case InlineItem::Hold:
1225                                 error(item->loc) << "changing the current element not possible in "
1226                                                 "EOF action code" << endl;
1227                                 break;
1228                         case InlineItem::Exec:
1229                                 error(item->loc) << "changing the current element not possible in "
1230                                                 "EOF action code" << endl;
1231                                 break;
1232                         case InlineItem::Goto: case InlineItem::Call:
1233                         case InlineItem::Next: case InlineItem::GotoExpr:
1234                         case InlineItem::CallExpr: case InlineItem::NextExpr:
1235                         case InlineItem::Ret:
1236                                 error(item->loc) << "changing the current state not possible in "
1237                                                 "EOF action code" << endl;
1238                                 break;
1239                         default:
1240                                 break;
1241                         }
1242                 }
1243
1244                 /* Recurse. */
1245                 if ( item->children != 0 )
1246                         checkInlineList( act, item->children );
1247         }
1248 }
1249
1250 void ParseData::checkAction( Action *action )
1251 {
1252         /* Check for actions with calls that are embedded within a longest match
1253          * machine. */
1254         if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) {
1255                 for ( ActionRefs::Iter ar = action->actionRefs; ar.lte(); ar++ ) {
1256                         NameInst *check = *ar;
1257                         while ( check != 0 ) {
1258                                 if ( check->isLongestMatch ) {
1259                                         error(action->loc) << "within a scanner, fcall is permitted"
1260                                                 " only in pattern actions" << endl;
1261                                         break;
1262                                 }
1263                                 check = check->parent;
1264                         }
1265                 }
1266         }
1267
1268         checkInlineList( action, action->inlineList );
1269 }
1270
1271
1272 void ParseData::analyzeGraph( FsmAp *graph )
1273 {
1274         for ( ActionList::Iter act = actionList; act.lte(); act++ )
1275                 analyzeAction( act, act->inlineList );
1276
1277         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
1278                 /* The transition list. */
1279                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
1280                         for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ )
1281                                 at->value->numTransRefs += 1;
1282                 }
1283
1284                 for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ )
1285                         at->value->numToStateRefs += 1;
1286
1287                 for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ )
1288                         at->value->numFromStateRefs += 1;
1289
1290                 for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ )
1291                         at->value->numEofRefs += 1;
1292
1293                 for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
1294                         for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ )
1295                                 (*sci)->numCondRefs += 1;
1296                 }
1297         }
1298
1299         /* Checks for bad usage of directives in action code. */
1300         for ( ActionList::Iter act = actionList; act.lte(); act++ )
1301                 checkAction( act );
1302 }
1303
1304 void ParseData::makeExportsNameTree()
1305 {
1306         /* Make a name tree for the exports. */
1307         initExportsNameWalk();
1308
1309         /* First make the name tree. */
1310         for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1311                 if ( gdel->value->isExport ) {
1312                         /* Recurse on the instance. */
1313                         gdel->value->makeNameTree( gdel->loc, this );
1314                 }
1315         }
1316 }
1317
1318 void ParseData::makeExports()
1319 {
1320         makeExportsNameTree();
1321
1322         /* Resove name references in the tree. */
1323         initExportsNameWalk();
1324         for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1325                 if ( gdel->value->isExport )
1326                         gdel->value->resolveNameRefs( this );
1327         }
1328
1329         /* Make all the instantiations, we know that main exists in this list. */
1330         initExportsNameWalk();
1331         for ( GraphDict::Iter gdel = graphDict; gdel.lte();  gdel++ ) {
1332                 /* Check if this var def is an export. */
1333                 if ( gdel->value->isExport ) {
1334                         /* Build the graph from a walk of the parse tree. */
1335                         FsmAp *graph = gdel->value->walk( this );
1336
1337                         /* Build the graph from a walk of the parse tree. */
1338                         if ( !graph->checkSingleCharMachine() ) {
1339                                 error(gdel->loc) << "bad export machine, must define "
1340                                                 "a single character" << endl;
1341                         }
1342                         else {
1343                                 /* Safe to extract the key and declare the export. */
1344                                 Key exportKey = graph->startState->outList.head->lowKey;
1345                                 exportList.append( new Export( gdel->value->name, exportKey ) );
1346                         }
1347                 }
1348         }
1349
1350 }
1351
1352 void ParseData::prepareMachineGen( GraphDictEl *graphDictEl )
1353 {
1354         beginProcessing();
1355         initKeyOps();
1356         makeRootNames();
1357         initLongestMatchData();
1358
1359         /* Make the graph, do minimization. */
1360         if ( graphDictEl == 0 )
1361                 sectionGraph = makeAll();
1362         else
1363                 sectionGraph = makeSpecific( graphDictEl );
1364
1365         /* Compute exports from the export definitions. */
1366         makeExports();
1367
1368         /* If any errors have occured in the input file then don't write anything. */
1369         if ( gblErrorCount > 0 )
1370                 return;
1371
1372         analyzeGraph( sectionGraph );
1373
1374         /* Depends on the graph analysis. */
1375         setLongestMatchData( sectionGraph );
1376
1377         /* Decide if an error state is necessary.
1378          *  1. There is an error transition
1379          *  2. There is a gap in the transitions
1380          *  3. The longest match operator requires it. */
1381         if ( lmRequiresErrorState || sectionGraph->hasErrorTrans() )
1382                 sectionGraph->errState = sectionGraph->addState();
1383
1384         /* State numbers need to be assigned such that all final states have a
1385          * larger state id number than all non-final states. This enables the
1386          * first_final mechanism to function correctly. We also want states to be
1387          * ordered in a predictable fashion. So we first apply a depth-first
1388          * search, then do a stable sort by final state status, then assign
1389          * numbers. */
1390
1391         sectionGraph->depthFirstOrdering();
1392         sectionGraph->sortStatesByFinal();
1393         sectionGraph->setStateNumbers( 0 );
1394 }
1395
1396 void ParseData::generateXML( ostream &out )
1397 {
1398         beginProcessing();
1399
1400         /* Make the generator. */
1401         XMLCodeGen codeGen( sectionName, this, sectionGraph, out );
1402
1403         /* Write out with it. */
1404         codeGen.writeXML();
1405
1406         if ( printStatistics ) {
1407                 cerr << "fsm name  : " << sectionName << endl;
1408                 cerr << "num states: " << sectionGraph->stateList.length() << endl;
1409                 cerr << endl;
1410         }
1411 }
1412
1413 /* Send eof to all parsers. */
1414 void terminateAllParsers( )
1415 {
1416         /* FIXME: a proper token is needed here. Suppose we should use the
1417          * location of EOF in the last file that the parser was referenced in. */
1418         InputLoc loc;
1419         loc.fileName = "<EOF>";
1420         loc.line = 0;
1421         loc.col = 0;
1422         for ( ParserDict::Iter pdel = parserDict; pdel.lte(); pdel++ )
1423                 pdel->value->token( loc, _eof, 0, 0 );
1424 }
1425
1426 void writeLanguage( std::ostream &out )
1427 {
1428         out << " lang=\"";
1429         switch ( hostLangType ) {
1430                 case CCode:    out << "C"; break;
1431                 case DCode:    out << "D"; break;
1432                 case JavaCode: out << "Java"; break;
1433                 case RubyCode: out << "Ruby"; break;
1434         }
1435         out << "\"";
1436
1437 }
1438
1439 void writeMachines( std::ostream &out, std::string hostData, char *inputFileName )
1440 {
1441         if ( machineSpec == 0 && machineName == 0 ) {
1442                 /* No machine spec or machine name given. Generate everything. */
1443                 for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1444                         ParseData *pd = parser->value->pd;
1445                         if ( pd->instanceList.length() > 0 )
1446                                 pd->prepareMachineGen( 0 );
1447                 }
1448
1449                 if ( gblErrorCount == 0 ) {
1450                         out << "<ragel filename=\"" << inputFileName << "\"";
1451                         writeLanguage( out );
1452                         out << ">\n";
1453                         for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1454                                 ParseData *pd = parser->value->pd;
1455                                 if ( pd->instanceList.length() > 0 )
1456                                         pd->generateXML( out );
1457                         }
1458                         out << hostData;
1459                         out << "</ragel>\n";
1460                 }
1461         }
1462         else if ( parserDict.length() > 0 ) {
1463                 /* There is either a machine spec or machine name given. */
1464                 ParseData *parseData = 0;
1465                 GraphDictEl *graphDictEl = 0;
1466
1467                 /* Traverse the sections, break out when we find a section/machine
1468                  * that matches the one specified. */
1469                 for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1470                         ParseData *checkPd = parser->value->pd;
1471                         if ( machineSpec == 0 || strcmp( checkPd->sectionName, machineSpec ) == 0 ) {
1472                                 GraphDictEl *checkGdEl = 0;
1473                                 if ( machineName == 0 || (checkGdEl =
1474                                                 checkPd->graphDict.find( machineName )) != 0 )
1475                                 {
1476                                         /* Have a machine spec and/or machine name that matches
1477                                          * the -M/-S options. */
1478                                         parseData = checkPd;
1479                                         graphDictEl = checkGdEl;
1480                                         break;
1481                                 }
1482                         }
1483                 }
1484
1485                 if ( parseData == 0 )
1486                         error() << "could not locate machine specified with -S and/or -M" << endl;
1487                 else {
1488                         /* Section/Machine to emit was found. Prepare and emit it. */
1489                         parseData->prepareMachineGen( graphDictEl );
1490                         if ( gblErrorCount == 0 ) {
1491                                 out << "<ragel filename=\"" << inputFileName << "\"";
1492                                 writeLanguage( out );
1493                                 out << ">\n";
1494                                 parseData->generateXML( out );
1495                                 out << hostData;
1496                                 out << "</ragel>\n";
1497                         }
1498                 }
1499         }
1500 }