ragel/parsedata.cpp

   1 /*
   2  *  Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
   3  */
   4
   5 /*  This file is part of Ragel.
   6  *
   7  *  Ragel is free software; you can redistribute it and/or modify
   8  *  it under the terms of the GNU General Public License as published by
   9  *  the Free Software Foundation; either version 2 of the License, or
  10  *  (at your option) any later version.
  11  *
  12  *  Ragel is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  *  GNU General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU General Public License
  18  *  along with Ragel; if not, write to the Free Software
  19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  */
  21
  22 #include <iostream>
  23 #include <iomanip>
  24 #include <errno.h>
  25 #include <stdlib.h>
  26 #include <limits.h>
  27
  28 #include "ragel.h"
  29 #include "rlparse.h"
  30 #include "parsedata.h"
  31 #include "parsetree.h"
  32 #include "mergesort.h"
  33 #include "xmlcodegen.h"
  34
  35 using namespace std;
  36
  37 char mainMachine[] = "main";
  38
  39 void Token::set( char *str, int len )
  40 {
  41         length = len;
  42         data = new char[len+1];
  43         memcpy( data, str, len );
  44         data[len] = 0;
  45 }
  46
  47 void Token::append( const Token &other )
  48 {
  49         int newLength = length + other.length;
  50         char *newString = new char[newLength+1];
  51         memcpy( newString, data, length );
  52         memcpy( newString + length, other.data, other.length );
  53         newString[newLength] = 0;
  54         data = newString;
  55         length = newLength;
  56 }
  57
  58 /* Perform minimization after an operation according
  59  * to the command line args. */
  60 void afterOpMinimize( FsmAp *fsm, bool lastInSeq )
  61 {
  62         /* Switch on the prefered minimization algorithm. */
  63         if ( minimizeOpt == MinimizeEveryOp || minimizeOpt == MinimizeMostOps && lastInSeq ) {
  64                 /* First clean up the graph. FsmAp operations may leave these
  65                  * lying around. There should be no dead end states. The subtract
  66                  * intersection operators are the only places where they may be
  67                  * created and those operators clean them up. */
  68                 fsm->removeUnreachableStates();
  69
  70                 switch ( minimizeLevel ) {
  71                         case MinimizeApprox:
  72                                 fsm->minimizeApproximate();
  73                                 break;
  74                         case MinimizePartition1:
  75                                 fsm->minimizePartition1();
  76                                 break;
  77                         case MinimizePartition2:
  78                                 fsm->minimizePartition2();
  79                                 break;
  80                         case MinimizeStable:
  81                                 fsm->minimizeStable();
  82                                 break;
  83                 }
  84         }
  85 }
  86
  87 /* Count the transitions in the fsm by walking the state list. */
  88 int countTransitions( FsmAp *fsm )
  89 {
  90         int numTrans = 0;
  91         StateAp *state = fsm->stateList.head;
  92         while ( state != 0 ) {
  93                 numTrans += state->outList.length();
  94                 state = state->next;
  95         }
  96         return numTrans;
  97 }
  98
  99 Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd )
 100 {
 101         /* Reset errno so we can check for overflow or underflow. In the event of
 102          * an error, sets the return val to the upper or lower bound being tested
 103          * against. */
 104         errno = 0;
 105         unsigned int size = keyOps->alphType->size;
 106         bool unusedBits = size < sizeof(unsigned long);
 107
 108         unsigned long ul = strtoul( str, 0, 16 );
 109
 110         if ( errno == ERANGE || unusedBits && ul >> (size * 8) ) {
 111                 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
 112                 ul = 1 << (size * 8);
 113         }
 114
 115         if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) )
 116                 ul |= (0xffffffff >> (size*8 ) ) << (size*8);
 117
 118         return Key( (long)ul );
 119 }
 120
 121 Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd )
 122 {
 123         /* Convert the number to a decimal. First reset errno so we can check
 124          * for overflow or underflow. */
 125         errno = 0;
 126         long long minVal = keyOps->alphType->minVal;
 127         long long maxVal = keyOps->alphType->maxVal;
 128
 129         long long ll = strtoll( str, 0, 10 );
 130
 131         /* Check for underflow. */
 132         if ( errno == ERANGE && ll < 0 || ll < minVal) {
 133                 error(loc) << "literal " << str << " underflows the alphabet type" << endl;
 134                 ll = minVal;
 135         }
 136         /* Check for overflow. */
 137         else if ( errno == ERANGE && ll > 0 || ll > maxVal ) {
 138                 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
 139                 ll = maxVal;
 140         }
 141
 142         if ( keyOps->alphType->isSigned )
 143                 return Key( (long)ll );
 144         else
 145                 return Key( (unsigned long)ll );
 146 }
 147
 148 /* Make an fsm key in int format (what the fsm graph uses) from an alphabet
 149  * number returned by the parser. Validates that the number doesn't overflow
 150  * the alphabet type. */
 151 Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd )
 152 {
 153         /* Switch on hex/decimal format. */
 154         if ( str[0] == '0' && str[1] == 'x' )
 155                 return makeFsmKeyHex( str, loc, pd );
 156         else
 157                 return makeFsmKeyDec( str, loc, pd );
 158 }
 159
 160 /* Make an fsm int format (what the fsm graph uses) from a single character.
 161  * Performs proper conversion depending on signed/unsigned property of the
 162  * alphabet. */
 163 Key makeFsmKeyChar( char c, ParseData *pd )
 164 {
 165         if ( keyOps->isSigned ) {
 166                 /* Copy from a char type. */
 167                 return Key( c );
 168         }
 169         else {
 170                 /* Copy from an unsigned byte type. */
 171                 return Key( (unsigned char)c );
 172         }
 173 }
 174
 175 /* Make an fsm key array in int format (what the fsm graph uses) from a string
 176  * of characters. Performs proper conversion depending on signed/unsigned
 177  * property of the alphabet. */
 178 void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd )
 179 {
 180         if ( keyOps->isSigned ) {
 181                 /* Copy from a char star type. */
 182                 char *src = data;
 183                 for ( int i = 0; i < len; i++ )
 184                         result[i] = Key(src[i]);
 185         }
 186         else {
 187                 /* Copy from an unsigned byte ptr type. */
 188                 unsigned char *src = (unsigned char*) data;
 189                 for ( int i = 0; i < len; i++ )
 190                         result[i] = Key(src[i]);
 191         }
 192 }
 193
 194 /* Like makeFsmKeyArray except the result has only unique keys. They ordering
 195  * will be changed. */
 196 void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
 197                 bool caseInsensitive, ParseData *pd )
 198 {
 199         /* Use a transitions list for getting unique keys. */
 200         if ( keyOps->isSigned ) {
 201                 /* Copy from a char star type. */
 202                 char *src = data;
 203                 for ( int si = 0; si < len; si++ ) {
 204                         Key key( src[si] );
 205                         result.insert( key );
 206                         if ( caseInsensitive ) {
 207                                 if ( key.isLower() )
 208                                         result.insert( key.toUpper() );
 209                                 else if ( key.isUpper() )
 210                                         result.insert( key.toLower() );
 211                         }
 212                 }
 213         }
 214         else {
 215                 /* Copy from an unsigned byte ptr type. */
 216                 unsigned char *src = (unsigned char*) data;
 217                 for ( int si = 0; si < len; si++ ) {
 218                         Key key( src[si] );
 219                         result.insert( key );
 220                         if ( caseInsensitive ) {
 221                                 if ( key.isLower() )
 222                                         result.insert( key.toUpper() );
 223                                 else if ( key.isUpper() )
 224                                         result.insert( key.toLower() );
 225                         }
 226                 }
 227         }
 228 }
 229
 230 FsmAp *dotFsm( ParseData *pd )
 231 {
 232         FsmAp *retFsm = new FsmAp();
 233         retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey );
 234         return retFsm;
 235 }
 236
 237 FsmAp *dotStarFsm( ParseData *pd )
 238 {
 239         FsmAp *retFsm = new FsmAp();
 240         retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey );
 241         return retFsm;
 242 }
 243
 244 /* Make a builtin type. Depends on the signed nature of the alphabet type. */
 245 FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd )
 246 {
 247         /* FsmAp created to return. */
 248         FsmAp *retFsm = 0;
 249         bool isSigned = keyOps->isSigned;
 250
 251         switch ( builtin ) {
 252         case BT_Any: {
 253                 /* All characters. */
 254                 retFsm = dotFsm( pd );
 255                 break;
 256         }
 257         case BT_Ascii: {
 258                 /* Ascii characters 0 to 127. */
 259                 retFsm = new FsmAp();
 260                 retFsm->rangeFsm( 0, 127 );
 261                 break;
 262         }
 263         case BT_Extend: {
 264                 /* Ascii extended characters. This is the full byte range. Dependent
 265                  * on signed, vs no signed. If the alphabet is one byte then just use
 266                  * dot fsm. */
 267                 if ( isSigned ) {
 268                         retFsm = new FsmAp();
 269                         retFsm->rangeFsm( -128, 127 );
 270                 }
 271                 else {
 272                         retFsm = new FsmAp();
 273                         retFsm->rangeFsm( 0, 255 );
 274                 }
 275                 break;
 276         }
 277         case BT_Alpha: {
 278                 /* Alpha [A-Za-z]. */
 279                 FsmAp *upper = new FsmAp(), *lower = new FsmAp();
 280                 upper->rangeFsm( 'A', 'Z' );
 281                 lower->rangeFsm( 'a', 'z' );
 282                 upper->unionOp( lower );
 283                 upper->minimizePartition2();
 284                 retFsm = upper;
 285                 break;
 286         }
 287         case BT_Digit: {
 288                 /* Digits [0-9]. */
 289                 retFsm = new FsmAp();
 290                 retFsm->rangeFsm( '0', '9' );
 291                 break;
 292         }
 293         case BT_Alnum: {
 294                 /* Alpha numerics [0-9A-Za-z]. */
 295                 FsmAp *digit = new FsmAp(), *lower = new FsmAp();
 296                 FsmAp *upper = new FsmAp();
 297                 digit->rangeFsm( '0', '9' );
 298                 upper->rangeFsm( 'A', 'Z' );
 299                 lower->rangeFsm( 'a', 'z' );
 300                 digit->unionOp( upper );
 301                 digit->unionOp( lower );
 302                 digit->minimizePartition2();
 303                 retFsm = digit;
 304                 break;
 305         }
 306         case BT_Lower: {
 307                 /* Lower case characters. */
 308                 retFsm = new FsmAp();
 309                 retFsm->rangeFsm( 'a', 'z' );
 310                 break;
 311         }
 312         case BT_Upper: {
 313                 /* Upper case characters. */
 314                 retFsm = new FsmAp();
 315                 retFsm->rangeFsm( 'A', 'Z' );
 316                 break;
 317         }
 318         case BT_Cntrl: {
 319                 /* Control characters. */
 320                 FsmAp *cntrl = new FsmAp();
 321                 FsmAp *highChar = new FsmAp();
 322                 cntrl->rangeFsm( 0, 31 );
 323                 highChar->concatFsm( 127 );
 324                 cntrl->unionOp( highChar );
 325                 cntrl->minimizePartition2();
 326                 retFsm = cntrl;
 327                 break;
 328         }
 329         case BT_Graph: {
 330                 /* Graphical ascii characters [!-~]. */
 331                 retFsm = new FsmAp();
 332                 retFsm->rangeFsm( '!', '~' );
 333                 break;
 334         }
 335         case BT_Print: {
 336                 /* Printable characters. Same as graph except includes space. */
 337                 retFsm = new FsmAp();
 338                 retFsm->rangeFsm( ' ', '~' );
 339                 break;
 340         }
 341         case BT_Punct: {
 342                 /* Punctuation. */
 343                 FsmAp *range1 = new FsmAp();
 344                 FsmAp *range2 = new FsmAp();
 345                 FsmAp *range3 = new FsmAp();
 346                 FsmAp *range4 = new FsmAp();
 347                 range1->rangeFsm( '!', '/' );
 348                 range2->rangeFsm( ':', '@' );
 349                 range3->rangeFsm( '[', '`' );
 350                 range4->rangeFsm( '{', '~' );
 351                 range1->unionOp( range2 );
 352                 range1->unionOp( range3 );
 353                 range1->unionOp( range4 );
 354                 range1->minimizePartition2();
 355                 retFsm = range1;
 356                 break;
 357         }
 358         case BT_Space: {
 359                 /* Whitespace: [\t\v\f\n\r ]. */
 360                 FsmAp *cntrl = new FsmAp();
 361                 FsmAp *space = new FsmAp();
 362                 cntrl->rangeFsm( '\t', '\r' );
 363                 space->concatFsm( ' ' );
 364                 cntrl->unionOp( space );
 365                 cntrl->minimizePartition2();
 366                 retFsm = cntrl;
 367                 break;
 368         }
 369         case BT_Xdigit: {
 370                 /* Hex digits [0-9A-Fa-f]. */
 371                 FsmAp *digit = new FsmAp();
 372                 FsmAp *upper = new FsmAp();
 373                 FsmAp *lower = new FsmAp();
 374                 digit->rangeFsm( '0', '9' );
 375                 upper->rangeFsm( 'A', 'F' );
 376                 lower->rangeFsm( 'a', 'f' );
 377                 digit->unionOp( upper );
 378                 digit->unionOp( lower );
 379                 digit->minimizePartition2();
 380                 retFsm = digit;
 381                 break;
 382         }
 383         case BT_Lambda: {
 384                 retFsm = new FsmAp();
 385                 retFsm->lambdaFsm();
 386                 break;
 387         }
 388         case BT_Empty: {
 389                 retFsm = new FsmAp();
 390                 retFsm->emptyFsm();
 391                 break;
 392         }}
 393
 394         return retFsm;
 395 }
 396
 397 /* Check if this name inst or any name inst below is referenced. */
 398 bool NameInst::anyRefsRec()
 399 {
 400         if ( numRefs > 0 )
 401                 return true;
 402
 403         /* Recurse on children until true. */
 404         for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) {
 405                 if ( (*ch)->anyRefsRec() )
 406                         return true;
 407         }
 408
 409         return false;
 410 }
 411
 412 /*
 413  * ParseData
 414  */
 415
 416 /* Initialize the structure that will collect info during the parse of a
 417  * machine. */
 418 ParseData::ParseData( char *fileName, char *sectionName,
 419                 const InputLoc &sectionLoc )
 420 :
 421         sectionGraph(0),
 422         generatingSectionSubset(false),
 423         nextPriorKey(0),
 424         /* 0 is reserved for global error actions. */
 425         nextLocalErrKey(1),
 426         nextNameId(0),
 427         nextCondId(0),
 428         alphTypeSet(false),
 429         getKeyExpr(0),
 430         accessExpr(0),
 431         pExpr(0),
 432         peExpr(0),
 433         csExpr(0),
 434         topExpr(0),
 435         stackExpr(0),
 436         actExpr(0),
 437         tokstartExpr(0),
 438         tokendExpr(0),
 439         lowerNum(0),
 440         upperNum(0),
 441         fileName(fileName),
 442         sectionName(sectionName),
 443         sectionLoc(sectionLoc),
 444         curActionOrd(0),
 445         curPriorOrd(0),
 446         rootName(0),
 447         exportsRootName(0),
 448         nextEpsilonResolvedLink(0),
 449         nextLongestMatchId(1),
 450         lmRequiresErrorState(false)
 451 {
 452         /* Initialize the dictionary of graphs. This is our symbol table. The
 453          * initialization needs to be done on construction which happens at the
 454          * beginning of a machine spec so any assignment operators can reference
 455          * the builtins. */
 456         initGraphDict();
 457 }
 458
 459 /* Clean up the data collected during a parse. */
 460 ParseData::~ParseData()
 461 {
 462         /* Delete all the nodes in the action list. Will cause all the
 463          * string data that represents the actions to be deallocated. */
 464         actionList.empty();
 465 }
 466
 467 /* Make a name id in the current name instantiation scope if it is not
 468  * already there. */
 469 NameInst *ParseData::addNameInst( const InputLoc &loc, char *data, bool isLabel )
 470 {
 471         /* Create the name instantitaion object and insert it. */
 472         NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel );
 473         curNameInst->childVect.append( newNameInst );
 474         if ( data != 0 )
 475                 curNameInst->children.insertMulti( data, newNameInst );
 476         return newNameInst;
 477 }
 478
 479 void ParseData::initNameWalk()
 480 {
 481         curNameInst = rootName;
 482         curNameChild = 0;
 483 }
 484
 485 void ParseData::initExportsNameWalk()
 486 {
 487         curNameInst = exportsRootName;
 488         curNameChild = 0;
 489 }
 490
 491 /* Goes into the next child scope. The number of the child is already set up.
 492  * We need this for the syncronous name tree and parse tree walk to work
 493  * properly. It is reset on entry into a scope and advanced on poping of a
 494  * scope. A call to enterNameScope should be accompanied by a corresponding
 495  * popNameScope. */
 496 NameFrame ParseData::enterNameScope( bool isLocal, int numScopes )
 497 {
 498         /* Save off the current data. */
 499         NameFrame retFrame;
 500         retFrame.prevNameInst = curNameInst;
 501         retFrame.prevNameChild = curNameChild;
 502         retFrame.prevLocalScope = localNameScope;
 503
 504         /* Enter into the new name scope. */
 505         for ( int i = 0; i < numScopes; i++ ) {
 506                 curNameInst = curNameInst->childVect[curNameChild];
 507                 curNameChild = 0;
 508         }
 509
 510         if ( isLocal )
 511                 localNameScope = curNameInst;
 512
 513         return retFrame;
 514 }
 515
 516 /* Return from a child scope to a parent. The parent info must be specified as
 517  * an argument and is obtained from the corresponding call to enterNameScope.
 518  * */
 519 void ParseData::popNameScope( const NameFrame &frame )
 520 {
 521         /* Pop the name scope. */
 522         curNameInst = frame.prevNameInst;
 523         curNameChild = frame.prevNameChild+1;
 524         localNameScope = frame.prevLocalScope;
 525 }
 526
 527 void ParseData::resetNameScope( const NameFrame &frame )
 528 {
 529         /* Pop the name scope. */
 530         curNameInst = frame.prevNameInst;
 531         curNameChild = frame.prevNameChild;
 532         localNameScope = frame.prevLocalScope;
 533 }
 534
 535
 536 void ParseData::unsetObsoleteEntries( FsmAp *graph )
 537 {
 538         /* Loop the reference names and increment the usage. Names that are no
 539          * longer needed will be unset in graph. */
 540         for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) {
 541                 /* Get the name. */
 542                 NameInst *name = *ref;
 543                 name->numUses += 1;
 544
 545                 /* If the name is no longer needed unset its corresponding entry. */
 546                 if ( name->numUses == name->numRefs ) {
 547                         assert( graph->entryPoints.find( name->id ) != 0 );
 548                         graph->unsetEntry( name->id );
 549                         assert( graph->entryPoints.find( name->id ) == 0 );
 550                 }
 551         }
 552 }
 553
 554 NameSet ParseData::resolvePart( NameInst *refFrom, char *data, bool recLabelsOnly )
 555 {
 556         /* Queue needed for breadth-first search, load it with the start node. */
 557         NameInstList nameQueue;
 558         nameQueue.append( refFrom );
 559
 560         NameSet result;
 561         while ( nameQueue.length() > 0 ) {
 562                 /* Pull the next from location off the queue. */
 563                 NameInst *from = nameQueue.detachFirst();
 564
 565                 /* Look for the name. */
 566                 NameMapEl *low, *high;
 567                 if ( from->children.findMulti( data, low, high ) ) {
 568                         /* Record all instances of the name. */
 569                         for ( ; low <= high; low++ )
 570                                 result.insert( low->value );
 571                 }
 572
 573                 /* Name not there, do breadth-first operation of appending all
 574                  * childrent to the processing queue. */
 575                 for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) {
 576                         if ( !recLabelsOnly || (*name)->isLabel )
 577                                 nameQueue.append( *name );
 578                 }
 579         }
 580
 581         /* Queue exhausted and name never found. */
 582         return result;
 583 }
 584
 585 void ParseData::resolveFrom( NameSet &result, NameInst *refFrom,
 586                 const NameRef &nameRef, int namePos )
 587 {
 588         /* Look for the name in the owning scope of the factor with aug. */
 589         NameSet partResult = resolvePart( refFrom, nameRef[namePos], false );
 590
 591         /* If there are more parts to the name then continue on. */
 592         if ( ++namePos < nameRef.length() ) {
 593                 /* There are more components to the name, search using all the part
 594                  * results as the base. */
 595                 for ( NameSet::Iter name = partResult; name.lte(); name++ )
 596                         resolveFrom( result, *name, nameRef, namePos );
 597         }
 598         else {
 599                 /* This is the last component, append the part results to the final
 600                  * results. */
 601                 result.insert( partResult );
 602         }
 603 }
 604
 605 /* Write out a name reference. */
 606 ostream &operator<<( ostream &out, const NameRef &nameRef )
 607 {
 608         int pos = 0;
 609         if ( nameRef[pos] == 0 ) {
 610                 out << "::";
 611                 pos += 1;
 612         }
 613         out << nameRef[pos++];
 614         for ( ; pos < nameRef.length(); pos++ )
 615                 out << "::" << nameRef[pos];
 616         return out;
 617 }
 618
 619 ostream &operator<<( ostream &out, const NameInst &nameInst )
 620 {
 621         /* Count the number fully qualified name parts. */
 622         int numParents = 0;
 623         NameInst *curParent = nameInst.parent;
 624         while ( curParent != 0 ) {
 625                 numParents += 1;
 626                 curParent = curParent->parent;
 627         }
 628
 629         /* Make an array and fill it in. */
 630         curParent = nameInst.parent;
 631         NameInst **parents = new NameInst*[numParents];
 632         for ( int p = numParents-1; p >= 0; p-- ) {
 633                 parents[p] = curParent;
 634                 curParent = curParent->parent;
 635         }
 636
 637         /* Write the parents out, skip the root. */
 638         for ( int p = 1; p < numParents; p++ )
 639                 out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" );
 640
 641         /* Write the name and cleanup. */
 642         out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" );
 643         delete[] parents;
 644         return out;
 645 }
 646
 647 struct CmpNameInstLoc
 648 {
 649         static int compare( const NameInst *ni1, const NameInst *ni2 )
 650         {
 651                 if ( ni1->loc.line < ni2->loc.line )
 652                         return -1;
 653                 else if ( ni1->loc.line > ni2->loc.line )
 654                         return 1;
 655                 else if ( ni1->loc.col < ni2->loc.col )
 656                         return -1;
 657                 else if ( ni1->loc.col > ni2->loc.col )
 658                         return 1;
 659                 return 0;
 660         }
 661 };
 662
 663 void errorStateLabels( const NameSet &resolved )
 664 {
 665         MergeSort<NameInst*, CmpNameInstLoc> mergeSort;
 666         mergeSort.sort( resolved.data, resolved.length() );
 667         for ( NameSet::Iter res = resolved; res.lte(); res++ )
 668                 error((*res)->loc) << "  -> " << **res << endl;
 669 }
 670
 671
 672 NameInst *ParseData::resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action )
 673 {
 674         NameInst *nameInst = 0;
 675
 676         /* Do the local search if the name is not strictly a root level name
 677          * search. */
 678         if ( nameRef[0] != 0 ) {
 679                 /* If the action is referenced, resolve all of them. */
 680                 if ( action != 0 && action->actionRefs.length() > 0 ) {
 681                         /* Look for the name in all referencing scopes. */
 682                         NameSet resolved;
 683                         for ( ActionRefs::Iter actRef = action->actionRefs; actRef.lte(); actRef++ )
 684                                 resolveFrom( resolved, *actRef, nameRef, 0 );
 685
 686                         if ( resolved.length() > 0 ) {
 687                                 /* Take the first one. */
 688                                 nameInst = resolved[0];
 689                                 if ( resolved.length() > 1 ) {
 690                                         /* Complain about the multiple references. */
 691                                         error(loc) << "state reference " << nameRef <<
 692                                                         " resolves to multiple entry points" << endl;
 693                                         errorStateLabels( resolved );
 694                                 }
 695                         }
 696                 }
 697         }
 698
 699         /* If not found in the local scope, look in global. */
 700         if ( nameInst == 0 ) {
 701                 NameSet resolved;
 702                 int fromPos = nameRef[0] != 0 ? 0 : 1;
 703                 resolveFrom( resolved, rootName, nameRef, fromPos );
 704
 705                 if ( resolved.length() > 0 ) {
 706                         /* Take the first. */
 707                         nameInst = resolved[0];
 708                         if ( resolved.length() > 1 ) {
 709                                 /* Complain about the multiple references. */
 710                                 error(loc) << "state reference " << nameRef <<
 711                                                 " resolves to multiple entry points" << endl;
 712                                 errorStateLabels( resolved );
 713                         }
 714                 }
 715         }
 716
 717         if ( nameInst == 0 ) {
 718                 /* If not found then complain. */
 719                 error(loc) << "could not resolve state reference " << nameRef << endl;
 720         }
 721         return nameInst;
 722 }
 723
 724 void ParseData::resolveNameRefs( InlineList *inlineList, Action *action )
 725 {
 726         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
 727                 switch ( item->type ) {
 728                         case InlineItem::Entry: case InlineItem::Goto:
 729                         case InlineItem::Call: case InlineItem::Next: {
 730                                 /* Resolve, pass action for local search. */
 731                                 NameInst *target = resolveStateRef( *item->nameRef, item->loc, action );
 732
 733                                 /* Check if the target goes into a longest match. */
 734                                 NameInst *search = target->parent;
 735                                 while ( search != 0 ) {
 736                                         if ( search->isLongestMatch ) {
 737                                                 error(item->loc) << "cannot enter inside a longest "
 738                                                                 "match construction as an entry point" << endl;
 739                                                 break;
 740                                         }
 741                                         search = search->parent;
 742                                 }
 743
 744                                 /* Note the reference in the name. This will cause the entry
 745                                  * point to survive to the end of the graph generating walk. */
 746                                 if ( target != 0 )
 747                                         target->numRefs += 1;
 748                                 item->nameTarg = target;
 749                                 break;
 750                         }
 751                         default:
 752                                 break;
 753                 }
 754
 755                 /* Some of the item types may have children. */
 756                 if ( item->children != 0 )
 757                         resolveNameRefs( item->children, action );
 758         }
 759 }
 760
 761 /* Resolve references to labels in actions. */
 762 void ParseData::resolveActionNameRefs()
 763 {
 764         for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
 765                 /* Only care about the actions that are referenced. */
 766                 if ( act->actionRefs.length() > 0 )
 767                         resolveNameRefs( act->inlineList, act );
 768         }
 769 }
 770
 771 /* Walk a name tree starting at from and fill the name index. */
 772 void ParseData::fillNameIndex( NameInst *from )
 773 {
 774         /* Fill the value for from in the name index. */
 775         nameIndex[from->id] = from;
 776
 777         /* Recurse on the implicit final state and then all children. */
 778         if ( from->final != 0 )
 779                 fillNameIndex( from->final );
 780         for ( NameVect::Iter name = from->childVect; name.lte(); name++ )
 781                 fillNameIndex( *name );
 782 }
 783
 784 void ParseData::makeRootNames()
 785 {
 786         /* Create the root name. */
 787         rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
 788         exportsRootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
 789 }
 790
 791 /* Build the name tree and supporting data structures. */
 792 void ParseData::makeNameTree( GraphDictEl *dictEl )
 793 {
 794         /* Set up curNameInst for the walk. */
 795         initNameWalk();
 796
 797         if ( dictEl != 0 ) {
 798                 /* A start location has been specified. */
 799                 dictEl->value->makeNameTree( dictEl->loc, this );
 800         }
 801         else {
 802                 /* First make the name tree. */
 803                 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
 804                         /* Recurse on the instance. */
 805                         glel->value->makeNameTree( glel->loc, this );
 806                 }
 807         }
 808
 809         /* The number of nodes in the tree can now be given by nextNameId */
 810         nameIndex = new NameInst*[nextNameId];
 811         memset( nameIndex, 0, sizeof(NameInst*)*nextNameId );
 812         fillNameIndex( rootName );
 813         fillNameIndex( exportsRootName );
 814 }
 815
 816
 817 void ParseData::createBuiltin( char *name, BuiltinMachine builtin )
 818 {
 819         Expression *expression = new Expression( builtin );
 820         Join *join = new Join( expression );
 821         JoinOrLm *joinOrLm = new JoinOrLm( join );
 822         VarDef *varDef = new VarDef( name, joinOrLm );
 823         GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
 824         graphDict.insert( graphDictEl );
 825 }
 826
 827 /* Initialize the graph dict with builtin types. */
 828 void ParseData::initGraphDict( )
 829 {
 830         createBuiltin( "any", BT_Any );
 831         createBuiltin( "ascii", BT_Ascii );
 832         createBuiltin( "extend", BT_Extend );
 833         createBuiltin( "alpha", BT_Alpha );
 834         createBuiltin( "digit", BT_Digit );
 835         createBuiltin( "alnum", BT_Alnum );
 836         createBuiltin( "lower", BT_Lower );
 837         createBuiltin( "upper", BT_Upper );
 838         createBuiltin( "cntrl", BT_Cntrl );
 839         createBuiltin( "graph", BT_Graph );
 840         createBuiltin( "print", BT_Print );
 841         createBuiltin( "punct", BT_Punct );
 842         createBuiltin( "space", BT_Space );
 843         createBuiltin( "xdigit", BT_Xdigit );
 844         createBuiltin( "null", BT_Lambda );
 845         createBuiltin( "zlen", BT_Lambda );
 846         createBuiltin( "empty", BT_Empty );
 847 }
 848
 849 /* Set the alphabet type. If the types are not valid returns false. */
 850 bool ParseData::setAlphType( char *s1, char *s2 )
 851 {
 852         bool valid = false;
 853         for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
 854                 if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 &&
 855                                 hostLang->hostTypes[i].data2 != 0 &&
 856                                 strcmp( s2, hostLang->hostTypes[i].data2 ) == 0 )
 857                 {
 858                         valid = true;
 859                         userAlphType = hostLang->hostTypes + i;
 860                         break;
 861                 }
 862         }
 863
 864         alphTypeSet = true;
 865         return valid;
 866 }
 867
 868 /* Set the alphabet type. If the types are not valid returns false. */
 869 bool ParseData::setAlphType( char *s1 )
 870 {
 871         bool valid = false;
 872         for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
 873                 if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 &&
 874                                 hostLang->hostTypes[i].data2 == 0 )
 875                 {
 876                         valid = true;
 877                         userAlphType = hostLang->hostTypes + i;
 878                         break;
 879                 }
 880         }
 881
 882         alphTypeSet = true;
 883         return valid;
 884 }
 885
 886 bool ParseData::setVariable( char *var, InlineList *inlineList )
 887 {
 888         bool set = true;
 889
 890         if ( strcmp( var, "p" ) == 0 )
 891                 pExpr = inlineList;
 892         else if ( strcmp( var, "pe" ) == 0 )
 893                 peExpr = inlineList;
 894         else if ( strcmp( var, "cs" ) == 0 )
 895                 csExpr = inlineList;
 896         else if ( strcmp( var, "top" ) == 0 )
 897                 topExpr = inlineList;
 898         else if ( strcmp( var, "stack" ) == 0 )
 899                 stackExpr = inlineList;
 900         else if ( strcmp( var, "act" ) == 0 )
 901                 actExpr = inlineList;
 902         else if ( strcmp( var, "tokstart" ) == 0 )
 903                 tokstartExpr = inlineList;
 904         else if ( strcmp( var, "tokend" ) == 0 )
 905                 tokendExpr = inlineList;
 906         else
 907                 set = false;
 908
 909         return set;
 910 }
 911
 912 /* Initialize the key operators object that will be referenced by all fsms
 913  * created. */
 914 void ParseData::initKeyOps( )
 915 {
 916         /* Signedness and bounds. */
 917         HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType;
 918         thisKeyOps.setAlphType( alphType );
 919
 920         if ( lowerNum != 0 ) {
 921                 /* If ranges are given then interpret the alphabet type. */
 922                 thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this );
 923                 thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
 924         }
 925
 926         thisCondData.nextCondKey = thisKeyOps.maxKey;
 927         thisCondData.nextCondKey.increment();
 928 }
 929
 930 void ParseData::printNameInst( NameInst *nameInst, int level )
 931 {
 932         for ( int i = 0; i < level; i++ )
 933                 cerr << "  ";
 934         cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") <<
 935                         "  id: " << nameInst->id <<
 936                         "  refs: " << nameInst->numRefs <<
 937                         "  uses: " << nameInst->numUses << endl;
 938         for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ )
 939                 printNameInst( *name, level+1 );
 940 }
 941
 942 /* Remove duplicates of unique actions from an action table. */
 943 void ParseData::removeDups( ActionTable &table )
 944 {
 945         /* Scan through the table looking for unique actions to
 946          * remove duplicates of. */
 947         for ( int i = 0; i < table.length(); i++ ) {
 948                 /* Remove any duplicates ahead of i. */
 949                 for ( int r = i+1; r < table.length(); ) {
 950                         if ( table[r].value == table[i].value )
 951                                 table.vremove(r);
 952                         else
 953                                 r += 1;
 954                 }
 955         }
 956 }
 957
 958 /* Remove duplicates from action lists. This operates only on transition and
 959  * eof action lists and so should be called once all actions have been
 960  * transfered to their final resting place. */
 961 void ParseData::removeActionDups( FsmAp *graph )
 962 {
 963         /* Loop all states. */
 964         for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
 965                 /* Loop all transitions. */
 966                 for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
 967                         removeDups( trans->actionTable );
 968                 removeDups( state->toStateActionTable );
 969                 removeDups( state->fromStateActionTable );
 970                 removeDups( state->eofActionTable );
 971         }
 972 }
 973
 974 Action *ParseData::newAction( char *name, InlineList *inlineList )
 975 {
 976         InputLoc loc;
 977         loc.line = 1;
 978         loc.col = 1;
 979         loc.fileName = "<NONE>";
 980
 981         Action *action = new Action( loc, name, inlineList, nextCondId++ );
 982         action->actionRefs.append( rootName );
 983         actionList.append( action );
 984         return action;
 985 }
 986
 987 void ParseData::initLongestMatchData()
 988 {
 989         if ( lmList.length() > 0 ) {
 990                 /* The initTokStart action resets the token start. */
 991                 InlineList *il1 = new InlineList;
 992                 il1->append( new InlineItem( InputLoc(), InlineItem::LmInitTokStart ) );
 993                 initTokStart = newAction( "initts", il1 );
 994                 initTokStart->isLmAction = true;
 995
 996                 /* The initActId action gives act a default value. */
 997                 InlineList *il4 = new InlineList;
 998                 il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) );
 999                 initActId = newAction( "initact", il4 );
1000                 initActId->isLmAction = true;
1001
1002                 /* The setTokStart action sets tokstart. */
1003                 InlineList *il5 = new InlineList;
1004                 il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) );
1005                 setTokStart = newAction( "tokstart", il5 );
1006                 setTokStart->isLmAction = true;
1007
1008                 /* The setTokEnd action sets tokend. */
1009                 InlineList *il3 = new InlineList;
1010                 il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) );
1011                 setTokEnd = newAction( "tokend", il3 );
1012                 setTokEnd->isLmAction = true;
1013
1014                 /* The action will also need an ordering: ahead of all user action
1015                  * embeddings. */
1016                 initTokStartOrd = curActionOrd++;
1017                 initActIdOrd = curActionOrd++;
1018                 setTokStartOrd = curActionOrd++;
1019                 setTokEndOrd = curActionOrd++;
1020         }
1021 }
1022
1023 /* After building the graph, do some extra processing to ensure the runtime
1024  * data of the longest mactch operators is consistent. */
1025 void ParseData::setLongestMatchData( FsmAp *graph )
1026 {
1027         if ( lmList.length() > 0 ) {
1028                 /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry)
1029                  * init the tokstart. */
1030                 for ( EntryMap::Iter en = graph->entryPoints; en.lte(); en++ ) {
1031                         /* This is run after duplicates are removed, we must guard against
1032                          * inserting a duplicate. */
1033                         ActionTable &actionTable = en->value->toStateActionTable;
1034                         if ( ! actionTable.hasAction( initTokStart ) )
1035                                 actionTable.setAction( initTokStartOrd, initTokStart );
1036                 }
1037
1038                 /* Find the set of states that are the target of transitions with
1039                  * actions that have calls. These states will be targeted by fret
1040                  * statements. */
1041                 StateSet states;
1042                 for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
1043                         for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
1044                                 for ( ActionTable::Iter ati = trans->actionTable; ati.lte(); ati++ ) {
1045                                         if ( ati->value->anyCall && trans->toState != 0 )
1046                                                 states.insert( trans->toState );
1047                                 }
1048                         }
1049                 }
1050
1051
1052                 /* Init tokstart upon entering the above collected states. */
1053                 for ( StateSet::Iter ps = states; ps.lte(); ps++ ) {
1054                         /* This is run after duplicates are removed, we must guard against
1055                          * inserting a duplicate. */
1056                         ActionTable &actionTable = (*ps)->toStateActionTable;
1057                         if ( ! actionTable.hasAction( initTokStart ) )
1058                                 actionTable.setAction( initTokStartOrd, initTokStart );
1059                 }
1060         }
1061 }
1062
1063 /* Make the graph from a graph dict node. Does minimization and state sorting. */
1064 FsmAp *ParseData::makeInstance( GraphDictEl *gdNode )
1065 {
1066         /* Build the graph from a walk of the parse tree. */
1067         FsmAp *graph = gdNode->value->walk( this );
1068
1069         /* Resolve any labels that point to multiple states. Any labels that are
1070          * still around are referenced only by gotos and calls and they need to be
1071          * made into deterministic entry points. */
1072         graph->deterministicEntry();
1073
1074         /*
1075          * All state construction is now complete.
1076          */
1077
1078         /* Transfer global error actions. */
1079         for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
1080                 graph->transferErrorActions( state, 0 );
1081
1082         removeActionDups( graph );
1083
1084         /* Remove unreachable states. There should be no dead end states. The
1085          * subtract and intersection operators are the only places where they may
1086          * be created and those operators clean them up. */
1087         graph->removeUnreachableStates();
1088
1089         /* No more fsm operations are to be done. Action ordering numbers are
1090          * no longer of use and will just hinder minimization. Clear them. */
1091         graph->nullActionKeys();
1092
1093         /* Transition priorities are no longer of use. We can clear them
1094          * because they will just hinder minimization as well. Clear them. */
1095         graph->clearAllPriorities();
1096
1097         if ( minimizeOpt != MinimizeNone ) {
1098                 /* Minimize here even if we minimized at every op. Now that function
1099                  * keys have been cleared we may get a more minimal fsm. */
1100                 switch ( minimizeLevel ) {
1101                         case MinimizeApprox:
1102                                 graph->minimizeApproximate();
1103                                 break;
1104                         case MinimizeStable:
1105                                 graph->minimizeStable();
1106                                 break;
1107                         case MinimizePartition1:
1108                                 graph->minimizePartition1();
1109                                 break;
1110                         case MinimizePartition2:
1111                                 graph->minimizePartition2();
1112                                 break;
1113                 }
1114         }
1115
1116         graph->compressTransitions();
1117
1118         return graph;
1119 }
1120
1121 void ParseData::printNameTree()
1122 {
1123         /* Print the name instance map. */
1124         for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ )
1125                 printNameInst( *name, 0 );
1126
1127         cerr << "name index:" << endl;
1128         /* Show that the name index is correct. */
1129         for ( int ni = 0; ni < nextNameId; ni++ ) {
1130                 cerr << ni << ": ";
1131                 char *name = nameIndex[ni]->name;
1132                 cerr << ( name != 0 ? name : "<ANON>" ) << endl;
1133         }
1134 }
1135
1136 FsmAp *ParseData::makeSpecific( GraphDictEl *gdNode )
1137 {
1138         /* Build the name tree and supporting data structures. */
1139         makeNameTree( gdNode );
1140
1141         /* Resove name references from gdNode. */
1142         initNameWalk();
1143         gdNode->value->resolveNameRefs( this );
1144
1145         /* Do not resolve action references. Since we are not building the entire
1146          * graph there's a good chance that many name references will fail. This
1147          * is okay since generating part of the graph is usually only done when
1148          * inspecting the compiled machine. */
1149
1150         /* Same story for extern entry point references. */
1151
1152         /* Flag this case so that the XML code generator is aware that we haven't
1153          * looked up name references in actions. It can then avoid segfaulting. */
1154         generatingSectionSubset = true;
1155
1156         /* Just building the specified graph. */
1157         initNameWalk();
1158         FsmAp *mainGraph = makeInstance( gdNode );
1159
1160         return mainGraph;
1161 }
1162
1163 FsmAp *ParseData::makeAll()
1164 {
1165         /* Build the name tree and supporting data structures. */
1166         makeNameTree( 0 );
1167
1168         /* Resove name references in the tree. */
1169         initNameWalk();
1170         for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ )
1171                 glel->value->resolveNameRefs( this );
1172
1173         /* Resolve action code name references. */
1174         resolveActionNameRefs();
1175
1176         /* Force name references to the top level instantiations. */
1177         for ( NameVect::Iter inst = rootName->childVect; inst.lte(); inst++ )
1178                 (*inst)->numRefs += 1;
1179
1180         FsmAp *mainGraph = 0;
1181         FsmAp **graphs = new FsmAp*[instanceList.length()];
1182         int numOthers = 0;
1183
1184         /* Make all the instantiations, we know that main exists in this list. */
1185         initNameWalk();
1186         for ( GraphList::Iter glel = instanceList; glel.lte();  glel++ ) {
1187                 if ( strcmp( glel->key, mainMachine ) == 0 ) {
1188                         /* Main graph is always instantiated. */
1189                         mainGraph = makeInstance( glel );
1190                 }
1191                 else {
1192                         /* Instantiate and store in others array. */
1193                         graphs[numOthers++] = makeInstance( glel );
1194                 }
1195         }
1196
1197         if ( mainGraph == 0 )
1198                 mainGraph = graphs[--numOthers];
1199
1200         if ( numOthers > 0 ) {
1201                 /* Add all the other graphs into main. */
1202                 mainGraph->globOp( graphs, numOthers );
1203         }
1204
1205         delete[] graphs;
1206         return mainGraph;
1207 }
1208
1209 void ParseData::analyzeAction( Action *action, InlineList *inlineList )
1210 {
1211         /* FIXME: Actions used as conditions should be very constrained. */
1212         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1213                 if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
1214                         action->anyCall = true;
1215
1216                 /* Need to recurse into longest match items. */
1217                 if ( item->type == InlineItem::LmSwitch ) {
1218                         LongestMatch *lm = item->longestMatch;
1219                         for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) {
1220                                 if ( lmi->action != 0 )
1221                                         analyzeAction( action, lmi->action->inlineList );
1222                         }
1223                 }
1224
1225                 if ( item->type == InlineItem::LmOnLast ||
1226                                 item->type == InlineItem::LmOnNext ||
1227                                 item->type == InlineItem::LmOnLagBehind )
1228                 {
1229                         LongestMatchPart *lmi = item->longestMatchPart;
1230                         if ( lmi->action != 0 )
1231                                 analyzeAction( action, lmi->action->inlineList );
1232                 }
1233
1234                 if ( item->children != 0 )
1235                         analyzeAction( action, item->children );
1236         }
1237 }
1238
1239
1240 /* Check actions for bad uses of fsm directives. We don't go inside longest
1241  * match items in actions created by ragel, since we just want the user
1242  * actions. */
1243 void ParseData::checkInlineList( Action *act, InlineList *inlineList )
1244 {
1245         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1246                 /* EOF checks. */
1247                 if ( act->numEofRefs > 0 ) {
1248                         switch ( item->type ) {
1249                         case InlineItem::PChar:
1250                                 error(item->loc) << "pointer to current element does not exist in "
1251                                                 "EOF action code" << endl;
1252                                 break;
1253                         case InlineItem::Char:
1254                                 error(item->loc) << "current element does not exist in "
1255                                                 "EOF action code" << endl;
1256                                 break;
1257                         case InlineItem::Hold:
1258                                 error(item->loc) << "changing the current element not possible in "
1259                                                 "EOF action code" << endl;
1260                                 break;
1261                         case InlineItem::Exec:
1262                                 error(item->loc) << "changing the current element not possible in "
1263                                                 "EOF action code" << endl;
1264                                 break;
1265                         case InlineItem::Goto: case InlineItem::Call:
1266                         case InlineItem::Next: case InlineItem::GotoExpr:
1267                         case InlineItem::CallExpr: case InlineItem::NextExpr:
1268                         case InlineItem::Ret:
1269                                 error(item->loc) << "changing the current state not possible in "
1270                                                 "EOF action code" << endl;
1271                                 break;
1272                         default:
1273                                 break;
1274                         }
1275                 }
1276
1277                 /* Recurse. */
1278                 if ( item->children != 0 )
1279                         checkInlineList( act, item->children );
1280         }
1281 }
1282
1283 void ParseData::checkAction( Action *action )
1284 {
1285         /* Check for actions with calls that are embedded within a longest match
1286          * machine. */
1287         if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) {
1288                 for ( ActionRefs::Iter ar = action->actionRefs; ar.lte(); ar++ ) {
1289                         NameInst *check = *ar;
1290                         while ( check != 0 ) {
1291                                 if ( check->isLongestMatch ) {
1292                                         error(action->loc) << "within a scanner, fcall is permitted"
1293                                                 " only in pattern actions" << endl;
1294                                         break;
1295                                 }
1296                                 check = check->parent;
1297                         }
1298                 }
1299         }
1300
1301         checkInlineList( action, action->inlineList );
1302 }
1303
1304
1305 void ParseData::analyzeGraph( FsmAp *graph )
1306 {
1307         for ( ActionList::Iter act = actionList; act.lte(); act++ )
1308                 analyzeAction( act, act->inlineList );
1309
1310         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
1311                 /* The transition list. */
1312                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
1313                         for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ )
1314                                 at->value->numTransRefs += 1;
1315                 }
1316
1317                 for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ )
1318                         at->value->numToStateRefs += 1;
1319
1320                 for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ )
1321                         at->value->numFromStateRefs += 1;
1322
1323                 for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ )
1324                         at->value->numEofRefs += 1;
1325
1326                 for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
1327                         for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ )
1328                                 (*sci)->numCondRefs += 1;
1329                 }
1330         }
1331
1332         /* Checks for bad usage of directives in action code. */
1333         for ( ActionList::Iter act = actionList; act.lte(); act++ )
1334                 checkAction( act );
1335 }
1336
1337 void ParseData::makeExportsNameTree()
1338 {
1339         /* Make a name tree for the exports. */
1340         initExportsNameWalk();
1341
1342         /* First make the name tree. */
1343         for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1344                 if ( gdel->value->isExport ) {
1345                         /* Recurse on the instance. */
1346                         gdel->value->makeNameTree( gdel->loc, this );
1347                 }
1348         }
1349 }
1350
1351 void ParseData::makeExports()
1352 {
1353         makeExportsNameTree();
1354
1355         /* Resove name references in the tree. */
1356         initExportsNameWalk();
1357         for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1358                 if ( gdel->value->isExport )
1359                         gdel->value->resolveNameRefs( this );
1360         }
1361
1362         /* Make all the instantiations, we know that main exists in this list. */
1363         initExportsNameWalk();
1364         for ( GraphDict::Iter gdel = graphDict; gdel.lte();  gdel++ ) {
1365                 /* Check if this var def is an export. */
1366                 if ( gdel->value->isExport ) {
1367                         /* Build the graph from a walk of the parse tree. */
1368                         FsmAp *graph = gdel->value->walk( this );
1369
1370                         /* Build the graph from a walk of the parse tree. */
1371                         if ( !graph->checkSingleCharMachine() ) {
1372                                 error(gdel->loc) << "bad export machine, must define "
1373                                                 "a single character" << endl;
1374                         }
1375                         else {
1376                                 /* Safe to extract the key and declare the export. */
1377                                 Key exportKey = graph->startState->outList.head->lowKey;
1378                                 exportList.append( new Export( gdel->value->name, exportKey ) );
1379                         }
1380                 }
1381         }
1382
1383 }
1384
1385 void ParseData::prepareMachineGen( GraphDictEl *graphDictEl )
1386 {
1387         beginProcessing();
1388         initKeyOps();
1389         makeRootNames();
1390         initLongestMatchData();
1391
1392         /* Make the graph, do minimization. */
1393         if ( graphDictEl == 0 )
1394                 sectionGraph = makeAll();
1395         else
1396                 sectionGraph = makeSpecific( graphDictEl );
1397
1398         /* Compute exports from the export definitions. */
1399         makeExports();
1400
1401         /* If any errors have occured in the input file then don't write anything. */
1402         if ( gblErrorCount > 0 )
1403                 return;
1404
1405         analyzeGraph( sectionGraph );
1406
1407         /* Depends on the graph analysis. */
1408         setLongestMatchData( sectionGraph );
1409
1410         /* Decide if an error state is necessary.
1411          *  1. There is an error transition
1412          *  2. There is a gap in the transitions
1413          *  3. The longest match operator requires it. */
1414         if ( lmRequiresErrorState || sectionGraph->hasErrorTrans() )
1415                 sectionGraph->errState = sectionGraph->addState();
1416
1417         /* State numbers need to be assigned such that all final states have a
1418          * larger state id number than all non-final states. This enables the
1419          * first_final mechanism to function correctly. We also want states to be
1420          * ordered in a predictable fashion. So we first apply a depth-first
1421          * search, then do a stable sort by final state status, then assign
1422          * numbers. */
1423
1424         sectionGraph->depthFirstOrdering();
1425         sectionGraph->sortStatesByFinal();
1426         sectionGraph->setStateNumbers( 0 );
1427 }
1428
1429 void ParseData::generateXML( ostream &out )
1430 {
1431         beginProcessing();
1432
1433         /* Make the generator. */
1434         XMLCodeGen codeGen( sectionName, this, sectionGraph, out );
1435
1436         /* Write out with it. */
1437         codeGen.writeXML();
1438
1439         if ( printStatistics ) {
1440                 cerr << "fsm name  : " << sectionName << endl;
1441                 cerr << "num states: " << sectionGraph->stateList.length() << endl;
1442                 cerr << endl;
1443         }
1444 }
1445
1446 /* Send eof to all parsers. */
1447 void terminateAllParsers( )
1448 {
1449         /* FIXME: a proper token is needed here. Suppose we should use the
1450          * location of EOF in the last file that the parser was referenced in. */
1451         InputLoc loc;
1452         loc.fileName = "<EOF>";
1453         loc.line = 0;
1454         loc.col = 0;
1455         for ( ParserDict::Iter pdel = parserDict; pdel.lte(); pdel++ )
1456                 pdel->value->token( loc, _eof, 0, 0 );
1457 }
1458
1459 void writeLanguage( std::ostream &out )
1460 {
1461         out << " lang=\"";
1462         switch ( hostLangType ) {
1463                 case CCode:    out << "C"; break;
1464                 case DCode:    out << "D"; break;
1465                 case JavaCode: out << "Java"; break;
1466                 case RubyCode: out << "Ruby"; break;
1467         }
1468         out << "\"";
1469
1470 }
1471
1472 void writeMachines( std::ostream &out, std::string hostData, char *inputFileName )
1473 {
1474         if ( machineSpec == 0 && machineName == 0 ) {
1475                 /* No machine spec or machine name given. Generate everything. */
1476                 for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1477                         ParseData *pd = parser->value->pd;
1478                         if ( pd->instanceList.length() > 0 )
1479                                 pd->prepareMachineGen( 0 );
1480                 }
1481
1482                 if ( gblErrorCount == 0 ) {
1483                         out << "<ragel filename=\"" << inputFileName << "\"";
1484                         writeLanguage( out );
1485                         out << ">\n";
1486                         for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1487                                 ParseData *pd = parser->value->pd;
1488                                 if ( pd->instanceList.length() > 0 )
1489                                         pd->generateXML( out );
1490                         }
1491                         out << hostData;
1492                         out << "</ragel>\n";
1493                 }
1494         }
1495         else if ( parserDict.length() > 0 ) {
1496                 /* There is either a machine spec or machine name given. */
1497                 ParseData *parseData = 0;
1498                 GraphDictEl *graphDictEl = 0;
1499
1500                 /* Traverse the sections, break out when we find a section/machine
1501                  * that matches the one specified. */
1502                 for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1503                         ParseData *checkPd = parser->value->pd;
1504                         if ( machineSpec == 0 || strcmp( checkPd->sectionName, machineSpec ) == 0 ) {
1505                                 GraphDictEl *checkGdEl = 0;
1506                                 if ( machineName == 0 || (checkGdEl =
1507                                                 checkPd->graphDict.find( machineName )) != 0 )
1508                                 {
1509                                         /* Have a machine spec and/or machine name that matches
1510                                          * the -M/-S options. */
1511                                         parseData = checkPd;
1512                                         graphDictEl = checkGdEl;
1513                                         break;
1514                                 }
1515                         }
1516                 }
1517
1518                 if ( parseData == 0 )
1519                         error() << "could not locate machine specified with -S and/or -M" << endl;
1520                 else {
1521                         /* Section/Machine to emit was found. Prepare and emit it. */
1522                         parseData->prepareMachineGen( graphDictEl );
1523                         if ( gblErrorCount == 0 ) {
1524                                 out << "<ragel filename=\"" << inputFileName << "\"";
1525                                 writeLanguage( out );
1526                                 out << ">\n";
1527                                 parseData->generateXML( out );
1528                                 out << hostData;
1529                                 out << "</ragel>\n";
1530                         }
1531                 }
1532         }
1533 }