ragel/parsedata.cpp

   1 /*
   2  *  Copyright 2001-2008 Adrian Thurston <thurston@complang.org>
   3  */
   4
   5 /*  This file is part of Ragel.
   6  *
   7  *  Ragel is free software; you can redistribute it and/or modify
   8  *  it under the terms of the GNU General Public License as published by
   9  *  the Free Software Foundation; either version 2 of the License, or
  10  *  (at your option) any later version.
  11  *
  12  *  Ragel is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  *  GNU General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU General Public License
  18  *  along with Ragel; if not, write to the Free Software
  19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  */
  21
  22 #include <iostream>
  23 #include <iomanip>
  24 #include <errno.h>
  25 #include <stdlib.h>
  26 #include <limits.h>
  27
  28 #include "ragel.h"
  29 #include "rlparse.h"
  30 #include "parsedata.h"
  31 #include "parsetree.h"
  32 #include "mergesort.h"
  33 #include "xml-codegen.h"
  34 #include "version.h"
  35 #include "xmlparse.h"
  36 #include "inputdata.h"
  37
  38 using namespace std;
  39
  40 char mainMachine[] = "main";
  41
  42 void Token::set( const char *str, int len )
  43 {
  44         length = len;
  45         data = new char[len+1];
  46         memcpy( data, str, len );
  47         data[len] = 0;
  48 }
  49
  50 void Token::append( const Token &other )
  51 {
  52         int newLength = length + other.length;
  53         char *newString = new char[newLength+1];
  54         memcpy( newString, data, length );
  55         memcpy( newString + length, other.data, other.length );
  56         newString[newLength] = 0;
  57         data = newString;
  58         length = newLength;
  59 }
  60
  61 /* Perform minimization after an operation according
  62  * to the command line args. */
  63 void afterOpMinimize( FsmAp *fsm, bool lastInSeq )
  64 {
  65         /* Switch on the prefered minimization algorithm. */
  66         if ( minimizeOpt == MinimizeEveryOp || ( minimizeOpt == MinimizeMostOps && lastInSeq ) ) {
  67                 /* First clean up the graph. FsmAp operations may leave these
  68                  * lying around. There should be no dead end states. The subtract
  69                  * intersection operators are the only places where they may be
  70                  * created and those operators clean them up. */
  71                 fsm->removeUnreachableStates();
  72
  73                 switch ( minimizeLevel ) {
  74                         case MinimizeApprox:
  75                                 fsm->minimizeApproximate();
  76                                 break;
  77                         case MinimizePartition1:
  78                                 fsm->minimizePartition1();
  79                                 break;
  80                         case MinimizePartition2:
  81                                 fsm->minimizePartition2();
  82                                 break;
  83                         case MinimizeStable:
  84                                 fsm->minimizeStable();
  85                                 break;
  86                 }
  87         }
  88 }
  89
  90 /* Count the transitions in the fsm by walking the state list. */
  91 int countTransitions( FsmAp *fsm )
  92 {
  93         int numTrans = 0;
  94         StateAp *state = fsm->stateList.head;
  95         while ( state != 0 ) {
  96                 numTrans += state->outList.length();
  97                 state = state->next;
  98         }
  99         return numTrans;
 100 }
 101
 102 Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd )
 103 {
 104         /* Reset errno so we can check for overflow or underflow. In the event of
 105          * an error, sets the return val to the upper or lower bound being tested
 106          * against. */
 107         errno = 0;
 108         unsigned int size = keyOps->alphType->size;
 109         bool unusedBits = size < sizeof(unsigned long);
 110
 111         unsigned long ul = strtoul( str, 0, 16 );
 112
 113         if ( errno == ERANGE || ( unusedBits && ul >> (size * 8) ) ) {
 114                 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
 115                 ul = 1 << (size * 8);
 116         }
 117
 118         if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) )
 119                 ul |= (0xffffffff >> (size*8 ) ) << (size*8);
 120
 121         return Key( (long)ul );
 122 }
 123
 124 Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd )
 125 {
 126         /* Convert the number to a decimal. First reset errno so we can check
 127          * for overflow or underflow. */
 128         errno = 0;
 129         long long minVal = keyOps->alphType->minVal;
 130         long long maxVal = keyOps->alphType->maxVal;
 131
 132         long long ll = strtoll( str, 0, 10 );
 133
 134         /* Check for underflow. */
 135         if ( ( errno == ERANGE && ll < 0 ) || ll < minVal) {
 136                 error(loc) << "literal " << str << " underflows the alphabet type" << endl;
 137                 ll = minVal;
 138         }
 139         /* Check for overflow. */
 140         else if ( ( errno == ERANGE && ll > 0 ) || ll > maxVal ) {
 141                 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
 142                 ll = maxVal;
 143         }
 144
 145         if ( keyOps->alphType->isSigned )
 146                 return Key( (long)ll );
 147         else
 148                 return Key( (unsigned long)ll );
 149 }
 150
 151 /* Make an fsm key in int format (what the fsm graph uses) from an alphabet
 152  * number returned by the parser. Validates that the number doesn't overflow
 153  * the alphabet type. */
 154 Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd )
 155 {
 156         /* Switch on hex/decimal format. */
 157         if ( str[0] == '0' && str[1] == 'x' )
 158                 return makeFsmKeyHex( str, loc, pd );
 159         else
 160                 return makeFsmKeyDec( str, loc, pd );
 161 }
 162
 163 /* Make an fsm int format (what the fsm graph uses) from a single character.
 164  * Performs proper conversion depending on signed/unsigned property of the
 165  * alphabet. */
 166 Key makeFsmKeyChar( char c, ParseData *pd )
 167 {
 168         if ( keyOps->isSigned ) {
 169                 /* Copy from a char type. */
 170                 return Key( c );
 171         }
 172         else {
 173                 /* Copy from an unsigned byte type. */
 174                 return Key( (unsigned char)c );
 175         }
 176 }
 177
 178 /* Make an fsm key array in int format (what the fsm graph uses) from a string
 179  * of characters. Performs proper conversion depending on signed/unsigned
 180  * property of the alphabet. */
 181 void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd )
 182 {
 183         if ( keyOps->isSigned ) {
 184                 /* Copy from a char star type. */
 185                 char *src = data;
 186                 for ( int i = 0; i < len; i++ )
 187                         result[i] = Key(src[i]);
 188         }
 189         else {
 190                 /* Copy from an unsigned byte ptr type. */
 191                 unsigned char *src = (unsigned char*) data;
 192                 for ( int i = 0; i < len; i++ )
 193                         result[i] = Key(src[i]);
 194         }
 195 }
 196
 197 /* Like makeFsmKeyArray except the result has only unique keys. They ordering
 198  * will be changed. */
 199 void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
 200                 bool caseInsensitive, ParseData *pd )
 201 {
 202         /* Use a transitions list for getting unique keys. */
 203         if ( keyOps->isSigned ) {
 204                 /* Copy from a char star type. */
 205                 char *src = data;
 206                 for ( int si = 0; si < len; si++ ) {
 207                         Key key( src[si] );
 208                         result.insert( key );
 209                         if ( caseInsensitive ) {
 210                                 if ( key.isLower() )
 211                                         result.insert( key.toUpper() );
 212                                 else if ( key.isUpper() )
 213                                         result.insert( key.toLower() );
 214                         }
 215                 }
 216         }
 217         else {
 218                 /* Copy from an unsigned byte ptr type. */
 219                 unsigned char *src = (unsigned char*) data;
 220                 for ( int si = 0; si < len; si++ ) {
 221                         Key key( src[si] );
 222                         result.insert( key );
 223                         if ( caseInsensitive ) {
 224                                 if ( key.isLower() )
 225                                         result.insert( key.toUpper() );
 226                                 else if ( key.isUpper() )
 227                                         result.insert( key.toLower() );
 228                         }
 229                 }
 230         }
 231 }
 232
 233 FsmAp *dotFsm( ParseData *pd )
 234 {
 235         FsmAp *retFsm = new FsmAp();
 236         retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey );
 237         return retFsm;
 238 }
 239
 240 FsmAp *dotStarFsm( ParseData *pd )
 241 {
 242         FsmAp *retFsm = new FsmAp();
 243         retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey );
 244         return retFsm;
 245 }
 246
 247 /* Make a builtin type. Depends on the signed nature of the alphabet type. */
 248 FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd )
 249 {
 250         /* FsmAp created to return. */
 251         FsmAp *retFsm = 0;
 252         bool isSigned = keyOps->isSigned;
 253
 254         switch ( builtin ) {
 255         case BT_Any: {
 256                 /* All characters. */
 257                 retFsm = dotFsm( pd );
 258                 break;
 259         }
 260         case BT_Ascii: {
 261                 /* Ascii characters 0 to 127. */
 262                 retFsm = new FsmAp();
 263                 retFsm->rangeFsm( 0, 127 );
 264                 break;
 265         }
 266         case BT_Extend: {
 267                 /* Ascii extended characters. This is the full byte range. Dependent
 268                  * on signed, vs no signed. If the alphabet is one byte then just use
 269                  * dot fsm. */
 270                 if ( isSigned ) {
 271                         retFsm = new FsmAp();
 272                         retFsm->rangeFsm( -128, 127 );
 273                 }
 274                 else {
 275                         retFsm = new FsmAp();
 276                         retFsm->rangeFsm( 0, 255 );
 277                 }
 278                 break;
 279         }
 280         case BT_Alpha: {
 281                 /* Alpha [A-Za-z]. */
 282                 FsmAp *upper = new FsmAp(), *lower = new FsmAp();
 283                 upper->rangeFsm( 'A', 'Z' );
 284                 lower->rangeFsm( 'a', 'z' );
 285                 upper->unionOp( lower );
 286                 upper->minimizePartition2();
 287                 retFsm = upper;
 288                 break;
 289         }
 290         case BT_Digit: {
 291                 /* Digits [0-9]. */
 292                 retFsm = new FsmAp();
 293                 retFsm->rangeFsm( '0', '9' );
 294                 break;
 295         }
 296         case BT_Alnum: {
 297                 /* Alpha numerics [0-9A-Za-z]. */
 298                 FsmAp *digit = new FsmAp(), *lower = new FsmAp();
 299                 FsmAp *upper = new FsmAp();
 300                 digit->rangeFsm( '0', '9' );
 301                 upper->rangeFsm( 'A', 'Z' );
 302                 lower->rangeFsm( 'a', 'z' );
 303                 digit->unionOp( upper );
 304                 digit->unionOp( lower );
 305                 digit->minimizePartition2();
 306                 retFsm = digit;
 307                 break;
 308         }
 309         case BT_Lower: {
 310                 /* Lower case characters. */
 311                 retFsm = new FsmAp();
 312                 retFsm->rangeFsm( 'a', 'z' );
 313                 break;
 314         }
 315         case BT_Upper: {
 316                 /* Upper case characters. */
 317                 retFsm = new FsmAp();
 318                 retFsm->rangeFsm( 'A', 'Z' );
 319                 break;
 320         }
 321         case BT_Cntrl: {
 322                 /* Control characters. */
 323                 FsmAp *cntrl = new FsmAp();
 324                 FsmAp *highChar = new FsmAp();
 325                 cntrl->rangeFsm( 0, 31 );
 326                 highChar->concatFsm( 127 );
 327                 cntrl->unionOp( highChar );
 328                 cntrl->minimizePartition2();
 329                 retFsm = cntrl;
 330                 break;
 331         }
 332         case BT_Graph: {
 333                 /* Graphical ascii characters [!-~]. */
 334                 retFsm = new FsmAp();
 335                 retFsm->rangeFsm( '!', '~' );
 336                 break;
 337         }
 338         case BT_Print: {
 339                 /* Printable characters. Same as graph except includes space. */
 340                 retFsm = new FsmAp();
 341                 retFsm->rangeFsm( ' ', '~' );
 342                 break;
 343         }
 344         case BT_Punct: {
 345                 /* Punctuation. */
 346                 FsmAp *range1 = new FsmAp();
 347                 FsmAp *range2 = new FsmAp();
 348                 FsmAp *range3 = new FsmAp();
 349                 FsmAp *range4 = new FsmAp();
 350                 range1->rangeFsm( '!', '/' );
 351                 range2->rangeFsm( ':', '@' );
 352                 range3->rangeFsm( '[', '`' );
 353                 range4->rangeFsm( '{', '~' );
 354                 range1->unionOp( range2 );
 355                 range1->unionOp( range3 );
 356                 range1->unionOp( range4 );
 357                 range1->minimizePartition2();
 358                 retFsm = range1;
 359                 break;
 360         }
 361         case BT_Space: {
 362                 /* Whitespace: [\t\v\f\n\r ]. */
 363                 FsmAp *cntrl = new FsmAp();
 364                 FsmAp *space = new FsmAp();
 365                 cntrl->rangeFsm( '\t', '\r' );
 366                 space->concatFsm( ' ' );
 367                 cntrl->unionOp( space );
 368                 cntrl->minimizePartition2();
 369                 retFsm = cntrl;
 370                 break;
 371         }
 372         case BT_Xdigit: {
 373                 /* Hex digits [0-9A-Fa-f]. */
 374                 FsmAp *digit = new FsmAp();
 375                 FsmAp *upper = new FsmAp();
 376                 FsmAp *lower = new FsmAp();
 377                 digit->rangeFsm( '0', '9' );
 378                 upper->rangeFsm( 'A', 'F' );
 379                 lower->rangeFsm( 'a', 'f' );
 380                 digit->unionOp( upper );
 381                 digit->unionOp( lower );
 382                 digit->minimizePartition2();
 383                 retFsm = digit;
 384                 break;
 385         }
 386         case BT_Lambda: {
 387                 retFsm = new FsmAp();
 388                 retFsm->lambdaFsm();
 389                 break;
 390         }
 391         case BT_Empty: {
 392                 retFsm = new FsmAp();
 393                 retFsm->emptyFsm();
 394                 break;
 395         }}
 396
 397         return retFsm;
 398 }
 399
 400 /* Check if this name inst or any name inst below is referenced. */
 401 bool NameInst::anyRefsRec()
 402 {
 403         if ( numRefs > 0 )
 404                 return true;
 405
 406         /* Recurse on children until true. */
 407         for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) {
 408                 if ( (*ch)->anyRefsRec() )
 409                         return true;
 410         }
 411
 412         return false;
 413 }
 414
 415 /*
 416  * ParseData
 417  */
 418
 419 /* Initialize the structure that will collect info during the parse of a
 420  * machine. */
 421 ParseData::ParseData( const char *fileName, char *sectionName,
 422                 const InputLoc &sectionLoc )
 423 :
 424         sectionGraph(0),
 425         generatingSectionSubset(false),
 426         nextPriorKey(0),
 427         /* 0 is reserved for global error actions. */
 428         nextLocalErrKey(1),
 429         nextNameId(0),
 430         nextCondId(0),
 431         alphTypeSet(false),
 432         getKeyExpr(0),
 433         accessExpr(0),
 434         prePushExpr(0),
 435         postPopExpr(0),
 436         pExpr(0),
 437         peExpr(0),
 438         eofExpr(0),
 439         csExpr(0),
 440         topExpr(0),
 441         stackExpr(0),
 442         actExpr(0),
 443         tokstartExpr(0),
 444         tokendExpr(0),
 445         dataExpr(0),
 446         lowerNum(0),
 447         upperNum(0),
 448         fileName(fileName),
 449         sectionName(sectionName),
 450         sectionLoc(sectionLoc),
 451         curActionOrd(0),
 452         curPriorOrd(0),
 453         rootName(0),
 454         exportsRootName(0),
 455         nextEpsilonResolvedLink(0),
 456         nextLongestMatchId(1),
 457         lmRequiresErrorState(false)
 458 {
 459         /* Initialize the dictionary of graphs. This is our symbol table. The
 460          * initialization needs to be done on construction which happens at the
 461          * beginning of a machine spec so any assignment operators can reference
 462          * the builtins. */
 463         initGraphDict();
 464 }
 465
 466 /* Clean up the data collected during a parse. */
 467 ParseData::~ParseData()
 468 {
 469         /* Delete all the nodes in the action list. Will cause all the
 470          * string data that represents the actions to be deallocated. */
 471         actionList.empty();
 472 }
 473
 474 /* Make a name id in the current name instantiation scope if it is not
 475  * already there. */
 476 NameInst *ParseData::addNameInst( const InputLoc &loc, const char *data, bool isLabel )
 477 {
 478         /* Create the name instantitaion object and insert it. */
 479         NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel );
 480         curNameInst->childVect.append( newNameInst );
 481         if ( data != 0 )
 482                 curNameInst->children.insertMulti( data, newNameInst );
 483         return newNameInst;
 484 }
 485
 486 void ParseData::initNameWalk()
 487 {
 488         curNameInst = rootName;
 489         curNameChild = 0;
 490 }
 491
 492 void ParseData::initExportsNameWalk()
 493 {
 494         curNameInst = exportsRootName;
 495         curNameChild = 0;
 496 }
 497
 498 /* Goes into the next child scope. The number of the child is already set up.
 499  * We need this for the syncronous name tree and parse tree walk to work
 500  * properly. It is reset on entry into a scope and advanced on poping of a
 501  * scope. A call to enterNameScope should be accompanied by a corresponding
 502  * popNameScope. */
 503 NameFrame ParseData::enterNameScope( bool isLocal, int numScopes )
 504 {
 505         /* Save off the current data. */
 506         NameFrame retFrame;
 507         retFrame.prevNameInst = curNameInst;
 508         retFrame.prevNameChild = curNameChild;
 509         retFrame.prevLocalScope = localNameScope;
 510
 511         /* Enter into the new name scope. */
 512         for ( int i = 0; i < numScopes; i++ ) {
 513                 curNameInst = curNameInst->childVect[curNameChild];
 514                 curNameChild = 0;
 515         }
 516
 517         if ( isLocal )
 518                 localNameScope = curNameInst;
 519
 520         return retFrame;
 521 }
 522
 523 /* Return from a child scope to a parent. The parent info must be specified as
 524  * an argument and is obtained from the corresponding call to enterNameScope.
 525  * */
 526 void ParseData::popNameScope( const NameFrame &frame )
 527 {
 528         /* Pop the name scope. */
 529         curNameInst = frame.prevNameInst;
 530         curNameChild = frame.prevNameChild+1;
 531         localNameScope = frame.prevLocalScope;
 532 }
 533
 534 void ParseData::resetNameScope( const NameFrame &frame )
 535 {
 536         /* Pop the name scope. */
 537         curNameInst = frame.prevNameInst;
 538         curNameChild = frame.prevNameChild;
 539         localNameScope = frame.prevLocalScope;
 540 }
 541
 542
 543 void ParseData::unsetObsoleteEntries( FsmAp *graph )
 544 {
 545         /* Loop the reference names and increment the usage. Names that are no
 546          * longer needed will be unset in graph. */
 547         for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) {
 548                 /* Get the name. */
 549                 NameInst *name = *ref;
 550                 name->numUses += 1;
 551
 552                 /* If the name is no longer needed unset its corresponding entry. */
 553                 if ( name->numUses == name->numRefs ) {
 554                         assert( graph->entryPoints.find( name->id ) != 0 );
 555                         graph->unsetEntry( name->id );
 556                         assert( graph->entryPoints.find( name->id ) == 0 );
 557                 }
 558         }
 559 }
 560
 561 NameSet ParseData::resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly )
 562 {
 563         /* Queue needed for breadth-first search, load it with the start node. */
 564         NameInstList nameQueue;
 565         nameQueue.append( refFrom );
 566
 567         NameSet result;
 568         while ( nameQueue.length() > 0 ) {
 569                 /* Pull the next from location off the queue. */
 570                 NameInst *from = nameQueue.detachFirst();
 571
 572                 /* Look for the name. */
 573                 NameMapEl *low, *high;
 574                 if ( from->children.findMulti( data, low, high ) ) {
 575                         /* Record all instances of the name. */
 576                         for ( ; low <= high; low++ )
 577                                 result.insert( low->value );
 578                 }
 579
 580                 /* Name not there, do breadth-first operation of appending all
 581                  * childrent to the processing queue. */
 582                 for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) {
 583                         if ( !recLabelsOnly || (*name)->isLabel )
 584                                 nameQueue.append( *name );
 585                 }
 586         }
 587
 588         /* Queue exhausted and name never found. */
 589         return result;
 590 }
 591
 592 void ParseData::resolveFrom( NameSet &result, NameInst *refFrom,
 593                 const NameRef &nameRef, int namePos )
 594 {
 595         /* Look for the name in the owning scope of the factor with aug. */
 596         NameSet partResult = resolvePart( refFrom, nameRef[namePos], false );
 597
 598         /* If there are more parts to the name then continue on. */
 599         if ( ++namePos < nameRef.length() ) {
 600                 /* There are more components to the name, search using all the part
 601                  * results as the base. */
 602                 for ( NameSet::Iter name = partResult; name.lte(); name++ )
 603                         resolveFrom( result, *name, nameRef, namePos );
 604         }
 605         else {
 606                 /* This is the last component, append the part results to the final
 607                  * results. */
 608                 result.insert( partResult );
 609         }
 610 }
 611
 612 /* Write out a name reference. */
 613 ostream &operator<<( ostream &out, const NameRef &nameRef )
 614 {
 615         int pos = 0;
 616         if ( nameRef[pos] == 0 ) {
 617                 out << "::";
 618                 pos += 1;
 619         }
 620         out << nameRef[pos++];
 621         for ( ; pos < nameRef.length(); pos++ )
 622                 out << "::" << nameRef[pos];
 623         return out;
 624 }
 625
 626 ostream &operator<<( ostream &out, const NameInst &nameInst )
 627 {
 628         /* Count the number fully qualified name parts. */
 629         int numParents = 0;
 630         NameInst *curParent = nameInst.parent;
 631         while ( curParent != 0 ) {
 632                 numParents += 1;
 633                 curParent = curParent->parent;
 634         }
 635
 636         /* Make an array and fill it in. */
 637         curParent = nameInst.parent;
 638         NameInst **parents = new NameInst*[numParents];
 639         for ( int p = numParents-1; p >= 0; p-- ) {
 640                 parents[p] = curParent;
 641                 curParent = curParent->parent;
 642         }
 643
 644         /* Write the parents out, skip the root. */
 645         for ( int p = 1; p < numParents; p++ )
 646                 out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" );
 647
 648         /* Write the name and cleanup. */
 649         out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" );
 650         delete[] parents;
 651         return out;
 652 }
 653
 654 struct CmpNameInstLoc
 655 {
 656         static int compare( const NameInst *ni1, const NameInst *ni2 )
 657         {
 658                 if ( ni1->loc.line < ni2->loc.line )
 659                         return -1;
 660                 else if ( ni1->loc.line > ni2->loc.line )
 661                         return 1;
 662                 else if ( ni1->loc.col < ni2->loc.col )
 663                         return -1;
 664                 else if ( ni1->loc.col > ni2->loc.col )
 665                         return 1;
 666                 return 0;
 667         }
 668 };
 669
 670 void errorStateLabels( const NameSet &resolved )
 671 {
 672         MergeSort<NameInst*, CmpNameInstLoc> mergeSort;
 673         mergeSort.sort( resolved.data, resolved.length() );
 674         for ( NameSet::Iter res = resolved; res.lte(); res++ )
 675                 error((*res)->loc) << "  -> " << **res << endl;
 676 }
 677
 678
 679 NameInst *ParseData::resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action )
 680 {
 681         NameInst *nameInst = 0;
 682
 683         /* Do the local search if the name is not strictly a root level name
 684          * search. */
 685         if ( nameRef[0] != 0 ) {
 686                 /* If the action is referenced, resolve all of them. */
 687                 if ( action != 0 && action->actionRefs.length() > 0 ) {
 688                         /* Look for the name in all referencing scopes. */
 689                         NameSet resolved;
 690                         for ( ActionRefs::Iter actRef = action->actionRefs; actRef.lte(); actRef++ )
 691                                 resolveFrom( resolved, *actRef, nameRef, 0 );
 692
 693                         if ( resolved.length() > 0 ) {
 694                                 /* Take the first one. */
 695                                 nameInst = resolved[0];
 696                                 if ( resolved.length() > 1 ) {
 697                                         /* Complain about the multiple references. */
 698                                         error(loc) << "state reference " << nameRef <<
 699                                                         " resolves to multiple entry points" << endl;
 700                                         errorStateLabels( resolved );
 701                                 }
 702                         }
 703                 }
 704         }
 705
 706         /* If not found in the local scope, look in global. */
 707         if ( nameInst == 0 ) {
 708                 NameSet resolved;
 709                 int fromPos = nameRef[0] != 0 ? 0 : 1;
 710                 resolveFrom( resolved, rootName, nameRef, fromPos );
 711
 712                 if ( resolved.length() > 0 ) {
 713                         /* Take the first. */
 714                         nameInst = resolved[0];
 715                         if ( resolved.length() > 1 ) {
 716                                 /* Complain about the multiple references. */
 717                                 error(loc) << "state reference " << nameRef <<
 718                                                 " resolves to multiple entry points" << endl;
 719                                 errorStateLabels( resolved );
 720                         }
 721                 }
 722         }
 723
 724         if ( nameInst == 0 ) {
 725                 /* If not found then complain. */
 726                 error(loc) << "could not resolve state reference " << nameRef << endl;
 727         }
 728         return nameInst;
 729 }
 730
 731 void ParseData::resolveNameRefs( InlineList *inlineList, Action *action )
 732 {
 733         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
 734                 switch ( item->type ) {
 735                         case InlineItem::Entry: case InlineItem::Goto:
 736                         case InlineItem::Call: case InlineItem::Next: {
 737                                 /* Resolve, pass action for local search. */
 738                                 NameInst *target = resolveStateRef( *item->nameRef, item->loc, action );
 739
 740                                 /* Name lookup error reporting is handled by resolveStateRef. */
 741                                 if ( target != 0 ) {
 742                                         /* Check if the target goes into a longest match. */
 743                                         NameInst *search = target->parent;
 744                                         while ( search != 0 ) {
 745                                                 if ( search->isLongestMatch ) {
 746                                                         error(item->loc) << "cannot enter inside a longest "
 747                                                                         "match construction as an entry point" << endl;
 748                                                         break;
 749                                                 }
 750                                                 search = search->parent;
 751                                         }
 752
 753                                         /* Record the reference in the name. This will cause the
 754                                          * entry point to survive to the end of the graph
 755                                          * generating walk. */
 756                                         target->numRefs += 1;
 757                                 }
 758
 759                                 item->nameTarg = target;
 760                                 break;
 761                         }
 762                         default:
 763                                 break;
 764                 }
 765
 766                 /* Some of the item types may have children. */
 767                 if ( item->children != 0 )
 768                         resolveNameRefs( item->children, action );
 769         }
 770 }
 771
 772 /* Resolve references to labels in actions. */
 773 void ParseData::resolveActionNameRefs()
 774 {
 775         for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
 776                 /* Only care about the actions that are referenced. */
 777                 if ( act->actionRefs.length() > 0 )
 778                         resolveNameRefs( act->inlineList, act );
 779         }
 780 }
 781
 782 /* Walk a name tree starting at from and fill the name index. */
 783 void ParseData::fillNameIndex( NameInst *from )
 784 {
 785         /* Fill the value for from in the name index. */
 786         nameIndex[from->id] = from;
 787
 788         /* Recurse on the implicit final state and then all children. */
 789         if ( from->final != 0 )
 790                 fillNameIndex( from->final );
 791         for ( NameVect::Iter name = from->childVect; name.lte(); name++ )
 792                 fillNameIndex( *name );
 793 }
 794
 795 void ParseData::makeRootNames()
 796 {
 797         /* Create the root name. */
 798         rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
 799         exportsRootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
 800 }
 801
 802 /* Build the name tree and supporting data structures. */
 803 void ParseData::makeNameTree( GraphDictEl *dictEl )
 804 {
 805         /* Set up curNameInst for the walk. */
 806         initNameWalk();
 807
 808         if ( dictEl != 0 ) {
 809                 /* A start location has been specified. */
 810                 dictEl->value->makeNameTree( dictEl->loc, this );
 811         }
 812         else {
 813                 /* First make the name tree. */
 814                 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
 815                         /* Recurse on the instance. */
 816                         glel->value->makeNameTree( glel->loc, this );
 817                 }
 818         }
 819
 820         /* The number of nodes in the tree can now be given by nextNameId */
 821         nameIndex = new NameInst*[nextNameId];
 822         memset( nameIndex, 0, sizeof(NameInst*)*nextNameId );
 823         fillNameIndex( rootName );
 824         fillNameIndex( exportsRootName );
 825 }
 826
 827
 828 void ParseData::createBuiltin( const char *name, BuiltinMachine builtin )
 829 {
 830         Expression *expression = new Expression( builtin );
 831         Join *join = new Join( expression );
 832         JoinOrLm *joinOrLm = new JoinOrLm( join );
 833         VarDef *varDef = new VarDef( name, joinOrLm );
 834         GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
 835         graphDict.insert( graphDictEl );
 836 }
 837
 838 /* Initialize the graph dict with builtin types. */
 839 void ParseData::initGraphDict( )
 840 {
 841         createBuiltin( "any", BT_Any );
 842         createBuiltin( "ascii", BT_Ascii );
 843         createBuiltin( "extend", BT_Extend );
 844         createBuiltin( "alpha", BT_Alpha );
 845         createBuiltin( "digit", BT_Digit );
 846         createBuiltin( "alnum", BT_Alnum );
 847         createBuiltin( "lower", BT_Lower );
 848         createBuiltin( "upper", BT_Upper );
 849         createBuiltin( "cntrl", BT_Cntrl );
 850         createBuiltin( "graph", BT_Graph );
 851         createBuiltin( "print", BT_Print );
 852         createBuiltin( "punct", BT_Punct );
 853         createBuiltin( "space", BT_Space );
 854         createBuiltin( "xdigit", BT_Xdigit );
 855         createBuiltin( "null", BT_Lambda );
 856         createBuiltin( "zlen", BT_Lambda );
 857         createBuiltin( "empty", BT_Empty );
 858 }
 859
 860 /* Set the alphabet type. If the types are not valid returns false. */
 861 bool ParseData::setAlphType( const InputLoc &loc, char *s1, char *s2 )
 862 {
 863         alphTypeLoc = loc;
 864         userAlphType = findAlphType( s1, s2 );
 865         alphTypeSet = true;
 866         return userAlphType != 0;
 867 }
 868
 869 /* Set the alphabet type. If the types are not valid returns false. */
 870 bool ParseData::setAlphType( const InputLoc &loc, char *s1 )
 871 {
 872         alphTypeLoc = loc;
 873         userAlphType = findAlphType( s1 );
 874         alphTypeSet = true;
 875         return userAlphType != 0;
 876 }
 877
 878 bool ParseData::setVariable( char *var, InlineList *inlineList )
 879 {
 880         bool set = true;
 881
 882         if ( strcmp( var, "p" ) == 0 )
 883                 pExpr = inlineList;
 884         else if ( strcmp( var, "pe" ) == 0 )
 885                 peExpr = inlineList;
 886         else if ( strcmp( var, "eof" ) == 0 )
 887                 eofExpr = inlineList;
 888         else if ( strcmp( var, "cs" ) == 0 )
 889                 csExpr = inlineList;
 890         else if ( strcmp( var, "data" ) == 0 )
 891                 dataExpr = inlineList;
 892         else if ( strcmp( var, "top" ) == 0 )
 893                 topExpr = inlineList;
 894         else if ( strcmp( var, "stack" ) == 0 )
 895                 stackExpr = inlineList;
 896         else if ( strcmp( var, "act" ) == 0 )
 897                 actExpr = inlineList;
 898         else if ( strcmp( var, "ts" ) == 0 )
 899                 tokstartExpr = inlineList;
 900         else if ( strcmp( var, "te" ) == 0 )
 901                 tokendExpr = inlineList;
 902         else
 903                 set = false;
 904
 905         return set;
 906 }
 907
 908 /* Initialize the key operators object that will be referenced by all fsms
 909  * created. */
 910 void ParseData::initKeyOps( )
 911 {
 912         /* Signedness and bounds. */
 913         HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType;
 914         thisKeyOps.setAlphType( alphType );
 915
 916         if ( lowerNum != 0 ) {
 917                 /* If ranges are given then interpret the alphabet type. */
 918                 thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this );
 919                 thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
 920         }
 921
 922         thisCondData.lastCondKey = thisKeyOps.maxKey;
 923 }
 924
 925 void ParseData::printNameInst( NameInst *nameInst, int level )
 926 {
 927         for ( int i = 0; i < level; i++ )
 928                 cerr << "  ";
 929         cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") <<
 930                         "  id: " << nameInst->id <<
 931                         "  refs: " << nameInst->numRefs <<
 932                         "  uses: " << nameInst->numUses << endl;
 933         for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ )
 934                 printNameInst( *name, level+1 );
 935 }
 936
 937 /* Remove duplicates of unique actions from an action table. */
 938 void ParseData::removeDups( ActionTable &table )
 939 {
 940         /* Scan through the table looking for unique actions to
 941          * remove duplicates of. */
 942         for ( int i = 0; i < table.length(); i++ ) {
 943                 /* Remove any duplicates ahead of i. */
 944                 for ( int r = i+1; r < table.length(); ) {
 945                         if ( table[r].value == table[i].value )
 946                                 table.vremove(r);
 947                         else
 948                                 r += 1;
 949                 }
 950         }
 951 }
 952
 953 /* Remove duplicates from action lists. This operates only on transition and
 954  * eof action lists and so should be called once all actions have been
 955  * transfered to their final resting place. */
 956 void ParseData::removeActionDups( FsmAp *graph )
 957 {
 958         /* Loop all states. */
 959         for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
 960                 /* Loop all transitions. */
 961                 for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
 962                         removeDups( trans->actionTable );
 963                 removeDups( state->toStateActionTable );
 964                 removeDups( state->fromStateActionTable );
 965                 removeDups( state->eofActionTable );
 966         }
 967 }
 968
 969 Action *ParseData::newAction( const char *name, InlineList *inlineList )
 970 {
 971         InputLoc loc;
 972         loc.line = 1;
 973         loc.col = 1;
 974         loc.fileName = "<NONE>";
 975
 976         Action *action = new Action( loc, name, inlineList, nextCondId++ );
 977         action->actionRefs.append( rootName );
 978         actionList.append( action );
 979         return action;
 980 }
 981
 982 void ParseData::initLongestMatchData()
 983 {
 984         if ( lmList.length() > 0 ) {
 985                 /* The initTokStart action resets the token start. */
 986                 InlineList *il1 = new InlineList;
 987                 il1->append( new InlineItem( InputLoc(), InlineItem::LmInitTokStart ) );
 988                 initTokStart = newAction( "initts", il1 );
 989                 initTokStart->isLmAction = true;
 990
 991                 /* The initActId action gives act a default value. */
 992                 InlineList *il4 = new InlineList;
 993                 il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) );
 994                 initActId = newAction( "initact", il4 );
 995                 initActId->isLmAction = true;
 996
 997                 /* The setTokStart action sets tokstart. */
 998                 InlineList *il5 = new InlineList;
 999                 il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) );
1000                 setTokStart = newAction( "ts", il5 );
1001                 setTokStart->isLmAction = true;
1002
1003                 /* The setTokEnd action sets tokend. */
1004                 InlineList *il3 = new InlineList;
1005                 il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) );
1006                 setTokEnd = newAction( "te", il3 );
1007                 setTokEnd->isLmAction = true;
1008
1009                 /* The action will also need an ordering: ahead of all user action
1010                  * embeddings. */
1011                 initTokStartOrd = curActionOrd++;
1012                 initActIdOrd = curActionOrd++;
1013                 setTokStartOrd = curActionOrd++;
1014                 setTokEndOrd = curActionOrd++;
1015         }
1016 }
1017
1018 /* After building the graph, do some extra processing to ensure the runtime
1019  * data of the longest mactch operators is consistent. */
1020 void ParseData::setLongestMatchData( FsmAp *graph )
1021 {
1022         if ( lmList.length() > 0 ) {
1023                 /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry)
1024                  * init the tokstart. */
1025                 for ( EntryMap::Iter en = graph->entryPoints; en.lte(); en++ ) {
1026                         /* This is run after duplicates are removed, we must guard against
1027                          * inserting a duplicate. */
1028                         ActionTable &actionTable = en->value->toStateActionTable;
1029                         if ( ! actionTable.hasAction( initTokStart ) )
1030                                 actionTable.setAction( initTokStartOrd, initTokStart );
1031                 }
1032
1033                 /* Find the set of states that are the target of transitions with
1034                  * actions that have calls. These states will be targeted by fret
1035                  * statements. */
1036                 StateSet states;
1037                 for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
1038                         for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
1039                                 for ( ActionTable::Iter ati = trans->actionTable; ati.lte(); ati++ ) {
1040                                         if ( ati->value->anyCall && trans->toState != 0 )
1041                                                 states.insert( trans->toState );
1042                                 }
1043                         }
1044                 }
1045
1046
1047                 /* Init tokstart upon entering the above collected states. */
1048                 for ( StateSet::Iter ps = states; ps.lte(); ps++ ) {
1049                         /* This is run after duplicates are removed, we must guard against
1050                          * inserting a duplicate. */
1051                         ActionTable &actionTable = (*ps)->toStateActionTable;
1052                         if ( ! actionTable.hasAction( initTokStart ) )
1053                                 actionTable.setAction( initTokStartOrd, initTokStart );
1054                 }
1055         }
1056 }
1057
1058 /* Make the graph from a graph dict node. Does minimization and state sorting. */
1059 FsmAp *ParseData::makeInstance( GraphDictEl *gdNode )
1060 {
1061         /* Build the graph from a walk of the parse tree. */
1062         FsmAp *graph = gdNode->value->walk( this );
1063
1064         /* Resolve any labels that point to multiple states. Any labels that are
1065          * still around are referenced only by gotos and calls and they need to be
1066          * made into deterministic entry points. */
1067         graph->deterministicEntry();
1068
1069         /*
1070          * All state construction is now complete.
1071          */
1072
1073         /* Transfer actions from the out action tables to eof action tables. */
1074         for ( StateSet::Iter state = graph->finStateSet; state.lte(); state++ )
1075                 graph->transferOutActions( *state );
1076
1077         /* Transfer global error actions. */
1078         for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
1079                 graph->transferErrorActions( state, 0 );
1080
1081         if ( ::wantDupsRemoved )
1082                 removeActionDups( graph );
1083
1084         /* Remove unreachable states. There should be no dead end states. The
1085          * subtract and intersection operators are the only places where they may
1086          * be created and those operators clean them up. */
1087         graph->removeUnreachableStates();
1088
1089         /* No more fsm operations are to be done. Action ordering numbers are
1090          * no longer of use and will just hinder minimization. Clear them. */
1091         graph->nullActionKeys();
1092
1093         /* Transition priorities are no longer of use. We can clear them
1094          * because they will just hinder minimization as well. Clear them. */
1095         graph->clearAllPriorities();
1096
1097         if ( minimizeOpt != MinimizeNone ) {
1098                 /* Minimize here even if we minimized at every op. Now that function
1099                  * keys have been cleared we may get a more minimal fsm. */
1100                 switch ( minimizeLevel ) {
1101                         case MinimizeApprox:
1102                                 graph->minimizeApproximate();
1103                                 break;
1104                         case MinimizeStable:
1105                                 graph->minimizeStable();
1106                                 break;
1107                         case MinimizePartition1:
1108                                 graph->minimizePartition1();
1109                                 break;
1110                         case MinimizePartition2:
1111                                 graph->minimizePartition2();
1112                                 break;
1113                 }
1114         }
1115
1116         graph->compressTransitions();
1117
1118         return graph;
1119 }
1120
1121 void ParseData::printNameTree()
1122 {
1123         /* Print the name instance map. */
1124         for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ )
1125                 printNameInst( *name, 0 );
1126
1127         cerr << "name index:" << endl;
1128         /* Show that the name index is correct. */
1129         for ( int ni = 0; ni < nextNameId; ni++ ) {
1130                 cerr << ni << ": ";
1131                 const char *name = nameIndex[ni]->name;
1132                 cerr << ( name != 0 ? name : "<ANON>" ) << endl;
1133         }
1134 }
1135
1136 FsmAp *ParseData::makeSpecific( GraphDictEl *gdNode )
1137 {
1138         /* Build the name tree and supporting data structures. */
1139         makeNameTree( gdNode );
1140
1141         /* Resove name references from gdNode. */
1142         initNameWalk();
1143         gdNode->value->resolveNameRefs( this );
1144
1145         /* Do not resolve action references. Since we are not building the entire
1146          * graph there's a good chance that many name references will fail. This
1147          * is okay since generating part of the graph is usually only done when
1148          * inspecting the compiled machine. */
1149
1150         /* Same story for extern entry point references. */
1151
1152         /* Flag this case so that the XML code generator is aware that we haven't
1153          * looked up name references in actions. It can then avoid segfaulting. */
1154         generatingSectionSubset = true;
1155
1156         /* Just building the specified graph. */
1157         initNameWalk();
1158         FsmAp *mainGraph = makeInstance( gdNode );
1159
1160         return mainGraph;
1161 }
1162
1163 FsmAp *ParseData::makeAll()
1164 {
1165         /* Build the name tree and supporting data structures. */
1166         makeNameTree( 0 );
1167
1168         /* Resove name references in the tree. */
1169         initNameWalk();
1170         for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ )
1171                 glel->value->resolveNameRefs( this );
1172
1173         /* Resolve action code name references. */
1174         resolveActionNameRefs();
1175
1176         /* Force name references to the top level instantiations. */
1177         for ( NameVect::Iter inst = rootName->childVect; inst.lte(); inst++ )
1178                 (*inst)->numRefs += 1;
1179
1180         FsmAp *mainGraph = 0;
1181         FsmAp **graphs = new FsmAp*[instanceList.length()];
1182         int numOthers = 0;
1183
1184         /* Make all the instantiations, we know that main exists in this list. */
1185         initNameWalk();
1186         for ( GraphList::Iter glel = instanceList; glel.lte();  glel++ ) {
1187                 if ( strcmp( glel->key, mainMachine ) == 0 ) {
1188                         /* Main graph is always instantiated. */
1189                         mainGraph = makeInstance( glel );
1190                 }
1191                 else {
1192                         /* Instantiate and store in others array. */
1193                         graphs[numOthers++] = makeInstance( glel );
1194                 }
1195         }
1196
1197         if ( mainGraph == 0 )
1198                 mainGraph = graphs[--numOthers];
1199
1200         if ( numOthers > 0 ) {
1201                 /* Add all the other graphs into main. */
1202                 mainGraph->globOp( graphs, numOthers );
1203         }
1204
1205         delete[] graphs;
1206         return mainGraph;
1207 }
1208
1209 void ParseData::analyzeAction( Action *action, InlineList *inlineList )
1210 {
1211         /* FIXME: Actions used as conditions should be very constrained. */
1212         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1213                 if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
1214                         action->anyCall = true;
1215
1216                 /* Need to recurse into longest match items. */
1217                 if ( item->type == InlineItem::LmSwitch ) {
1218                         LongestMatch *lm = item->longestMatch;
1219                         for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) {
1220                                 if ( lmi->action != 0 )
1221                                         analyzeAction( action, lmi->action->inlineList );
1222                         }
1223                 }
1224
1225                 if ( item->type == InlineItem::LmOnLast ||
1226                                 item->type == InlineItem::LmOnNext ||
1227                                 item->type == InlineItem::LmOnLagBehind )
1228                 {
1229                         LongestMatchPart *lmi = item->longestMatchPart;
1230                         if ( lmi->action != 0 )
1231                                 analyzeAction( action, lmi->action->inlineList );
1232                 }
1233
1234                 if ( item->children != 0 )
1235                         analyzeAction( action, item->children );
1236         }
1237 }
1238
1239
1240 /* Check actions for bad uses of fsm directives. We don't go inside longest
1241  * match items in actions created by ragel, since we just want the user
1242  * actions. */
1243 void ParseData::checkInlineList( Action *act, InlineList *inlineList )
1244 {
1245         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1246                 /* EOF checks. */
1247                 if ( act->numEofRefs > 0 ) {
1248                         switch ( item->type ) {
1249                                 /* Currently no checks. */
1250                                 default:
1251                                         break;
1252                         }
1253                 }
1254
1255                 /* Recurse. */
1256                 if ( item->children != 0 )
1257                         checkInlineList( act, item->children );
1258         }
1259 }
1260
1261 void ParseData::checkAction( Action *action )
1262 {
1263         /* Check for actions with calls that are embedded within a longest match
1264          * machine. */
1265         if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) {
1266                 for ( ActionRefs::Iter ar = action->actionRefs; ar.lte(); ar++ ) {
1267                         NameInst *check = *ar;
1268                         while ( check != 0 ) {
1269                                 if ( check->isLongestMatch ) {
1270                                         error(action->loc) << "within a scanner, fcall is permitted"
1271                                                 " only in pattern actions" << endl;
1272                                         break;
1273                                 }
1274                                 check = check->parent;
1275                         }
1276                 }
1277         }
1278
1279         checkInlineList( action, action->inlineList );
1280 }
1281
1282
1283 void ParseData::analyzeGraph( FsmAp *graph )
1284 {
1285         for ( ActionList::Iter act = actionList; act.lte(); act++ )
1286                 analyzeAction( act, act->inlineList );
1287
1288         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
1289                 /* The transition list. */
1290                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
1291                         for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ )
1292                                 at->value->numTransRefs += 1;
1293                 }
1294
1295                 for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ )
1296                         at->value->numToStateRefs += 1;
1297
1298                 for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ )
1299                         at->value->numFromStateRefs += 1;
1300
1301                 for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ )
1302                         at->value->numEofRefs += 1;
1303
1304                 for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
1305                         for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ )
1306                                 (*sci)->numCondRefs += 1;
1307                 }
1308         }
1309
1310         /* Checks for bad usage of directives in action code. */
1311         for ( ActionList::Iter act = actionList; act.lte(); act++ )
1312                 checkAction( act );
1313 }
1314
1315 void ParseData::makeExportsNameTree()
1316 {
1317         /* Make a name tree for the exports. */
1318         initExportsNameWalk();
1319
1320         /* First make the name tree. */
1321         for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1322                 if ( gdel->value->isExport ) {
1323                         /* Recurse on the instance. */
1324                         gdel->value->makeNameTree( gdel->loc, this );
1325                 }
1326         }
1327 }
1328
1329 void ParseData::makeExports()
1330 {
1331         makeExportsNameTree();
1332
1333         /* Resove name references in the tree. */
1334         initExportsNameWalk();
1335         for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1336                 if ( gdel->value->isExport )
1337                         gdel->value->resolveNameRefs( this );
1338         }
1339
1340         /* Make all the instantiations, we know that main exists in this list. */
1341         initExportsNameWalk();
1342         for ( GraphDict::Iter gdel = graphDict; gdel.lte();  gdel++ ) {
1343                 /* Check if this var def is an export. */
1344                 if ( gdel->value->isExport ) {
1345                         /* Build the graph from a walk of the parse tree. */
1346                         FsmAp *graph = gdel->value->walk( this );
1347
1348                         /* Build the graph from a walk of the parse tree. */
1349                         if ( !graph->checkSingleCharMachine() ) {
1350                                 error(gdel->loc) << "bad export machine, must define "
1351                                                 "a single character" << endl;
1352                         }
1353                         else {
1354                                 /* Safe to extract the key and declare the export. */
1355                                 Key exportKey = graph->startState->outList.head->lowKey;
1356                                 exportList.append( new Export( gdel->value->name, exportKey ) );
1357                         }
1358                 }
1359         }
1360
1361 }
1362
1363 /* Construct the machine and catch failures which can occur during
1364  * construction. */
1365 void ParseData::prepareMachineGen( GraphDictEl *graphDictEl )
1366 {
1367         try {
1368                 /* This machine construction can fail. */
1369                 prepareMachineGenTBWrapped( graphDictEl );
1370         }
1371         catch ( FsmConstructFail fail ) {
1372                 switch ( fail.reason ) {
1373                         case FsmConstructFail::CondNoKeySpace: {
1374                                 InputLoc &loc = alphTypeSet ? alphTypeLoc : sectionLoc;
1375                                 error(loc) << "sorry, no more characters are "
1376                                                 "available in the alphabet space" << endl;
1377                                 error(loc) << "  for conditions, please use a "
1378                                                 "smaller alphtype or reduce" << endl;
1379                                 error(loc) << "  the span of characters on which "
1380                                                 "conditions are embedded" << endl;
1381                                 break;
1382                         }
1383                 }
1384         }
1385 }
1386
1387 void ParseData::prepareMachineGenTBWrapped( GraphDictEl *graphDictEl )
1388 {
1389         beginProcessing();
1390         initKeyOps();
1391         makeRootNames();
1392         initLongestMatchData();
1393
1394         /* Make the graph, do minimization. */
1395         if ( graphDictEl == 0 )
1396                 sectionGraph = makeAll();
1397         else
1398                 sectionGraph = makeSpecific( graphDictEl );
1399
1400         /* Compute exports from the export definitions. */
1401         makeExports();
1402
1403         /* If any errors have occured in the input file then don't write anything. */
1404         if ( gblErrorCount > 0 )
1405                 return;
1406
1407         analyzeGraph( sectionGraph );
1408
1409         /* Depends on the graph analysis. */
1410         setLongestMatchData( sectionGraph );
1411
1412         /* Decide if an error state is necessary.
1413          *  1. There is an error transition
1414          *  2. There is a gap in the transitions
1415          *  3. The longest match operator requires it. */
1416         if ( lmRequiresErrorState || sectionGraph->hasErrorTrans() )
1417                 sectionGraph->errState = sectionGraph->addState();
1418
1419         /* State numbers need to be assigned such that all final states have a
1420          * larger state id number than all non-final states. This enables the
1421          * first_final mechanism to function correctly. We also want states to be
1422          * ordered in a predictable fashion. So we first apply a depth-first
1423          * search, then do a stable sort by final state status, then assign
1424          * numbers. */
1425
1426         sectionGraph->depthFirstOrdering();
1427         sectionGraph->sortStatesByFinal();
1428         sectionGraph->setStateNumbers( 0 );
1429 }
1430
1431 void ParseData::generateReduced( InputData &inputData )
1432 {
1433         beginProcessing();
1434
1435         /* Open the definition. */
1436         CodeGenMapEl *mapEl = inputData.codeGenMap.find( sectionName );
1437         CodeGenData *cgd = 0;
1438         if ( mapEl != 0 )
1439                 cgd = mapEl->value;
1440         else {
1441                 cgd = makeCodeGen( inputData.inputFileName, sectionName,
1442                                 *inputData.outStream, inputData.wantComplete );
1443                 inputData.codeGenMap.insert( sectionName, cgd );
1444         }
1445
1446         /* Make the generator. */
1447         BackendGen backendGen( sectionName, this, sectionGraph, cgd );
1448
1449         /* Write out with it. */
1450         backendGen.makeBackend();
1451
1452         if ( printStatistics ) {
1453                 cerr << "fsm name  : " << sectionName << endl;
1454                 cerr << "num states: " << sectionGraph->stateList.length() << endl;
1455                 cerr << endl;
1456         }
1457 }
1458
1459 /* Send eof to all parsers. */
1460 void terminateAllParsers( )
1461 {
1462         /* FIXME: a proper token is needed here. Suppose we should use the
1463          * location of EOF in the last file that the parser was referenced in. */
1464         InputLoc loc;
1465         loc.fileName = "<EOF>";
1466         loc.line = 0;
1467         loc.col = 0;
1468         for ( ParserDict::Iter pdel = parserDict; pdel.lte(); pdel++ )
1469                 pdel->value->token( loc, Parser_tk_eof, 0, 0 );
1470 }
1471
1472 void writeLanguage( std::ostream &out )
1473 {
1474         out << " lang=\"";
1475         switch ( hostLang->lang ) {
1476                 case HostLang::C:    out << "C"; break;
1477                 case HostLang::D:    out << "D"; break;
1478                 case HostLang::Java: out << "Java"; break;
1479                 case HostLang::Ruby: out << "Ruby"; break;
1480                 case HostLang::CSharp: out << "C#"; break;
1481         }
1482         out << "\"";
1483 }
1484
1485 void ParseData::generateXML( ostream &out )
1486 {
1487         beginProcessing();
1488
1489         /* Make the generator. */
1490         XMLCodeGen codeGen( sectionName, this, sectionGraph, out );
1491
1492         /* Write out with it. */
1493         codeGen.writeXML();
1494
1495         if ( printStatistics ) {
1496                 cerr << "fsm name  : " << sectionName << endl;
1497                 cerr << "num states: " << sectionGraph->stateList.length() << endl;
1498                 cerr << endl;
1499         }
1500 }
1501
1502 void writeMachines( std::ostream &out, std::string hostData, const char *inputFileName )
1503 {
1504         if ( machineSpec == 0 && machineName == 0 ) {
1505                 /* No machine spec or machine name given. Generate everything. */
1506                 for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1507                         ParseData *pd = parser->value->pd;
1508                         if ( pd->instanceList.length() > 0 )
1509                                 pd->prepareMachineGen( 0 );
1510                 }
1511
1512                 if ( gblErrorCount == 0 ) {
1513                         out << "<ragel version=\"" VERSION "\" filename=\"" << inputFileName << "\"";
1514                         writeLanguage( out );
1515                         out << ">\n";
1516                         for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1517                                 ParseData *pd = parser->value->pd;
1518                                 if ( pd->instanceList.length() > 0 )
1519                                         pd->generateXML( out );
1520                         }
1521                         out << hostData;
1522                         out << "</ragel>\n";
1523                 }
1524         }
1525         else if ( parserDict.length() > 0 ) {
1526                 /* There is either a machine spec or machine name given. */
1527                 ParseData *parseData = 0;
1528                 GraphDictEl *graphDictEl = 0;
1529
1530                 /* Traverse the sections, break out when we find a section/machine
1531                  * that matches the one specified. */
1532                 for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1533                         ParseData *checkPd = parser->value->pd;
1534                         if ( machineSpec == 0 || strcmp( checkPd->sectionName, machineSpec ) == 0 ) {
1535                                 GraphDictEl *checkGdEl = 0;
1536                                 if ( machineName == 0 || (checkGdEl =
1537                                                 checkPd->graphDict.find( machineName )) != 0 )
1538                                 {
1539                                         /* Have a machine spec and/or machine name that matches
1540                                          * the -M/-S options. */
1541                                         parseData = checkPd;
1542                                         graphDictEl = checkGdEl;
1543                                         break;
1544                                 }
1545                         }
1546                 }
1547
1548                 if ( parseData == 0 )
1549                         error() << "could not locate machine specified with -S and/or -M" << endl;
1550                 else {
1551                         /* Section/Machine to emit was found. Prepare and emit it. */
1552                         parseData->prepareMachineGen( graphDictEl );
1553                         if ( gblErrorCount == 0 ) {
1554                                 out << "<ragel version=\"" VERSION "\" filename=\"" << inputFileName << "\"";
1555                                 writeLanguage( out );
1556                                 out << ">\n";
1557                                 parseData->generateXML( out );
1558                                 out << hostData;
1559                                 out << "</ragel>\n";
1560                         }
1561                 }
1562         }
1563 }
1564