ragel/parsedata.cpp

   1 /*
   2  *  Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
   3  */
   4
   5 /*  This file is part of Ragel.
   6  *
   7  *  Ragel is free software; you can redistribute it and/or modify
   8  *  it under the terms of the GNU General Public License as published by
   9  *  the Free Software Foundation; either version 2 of the License, or
  10  *  (at your option) any later version.
  11  *
  12  *  Ragel is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  *  GNU General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU General Public License
  18  *  along with Ragel; if not, write to the Free Software
  19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  */
  21
  22 #include <iostream>
  23 #include <iomanip>
  24 #include <errno.h>
  25 #include <stdlib.h>
  26 #include <limits.h>
  27
  28 #include "ragel.h"
  29 #include "rlparse.h"
  30 #include "parsedata.h"
  31 #include "parsetree.h"
  32 #include "mergesort.h"
  33 #include "xmlcodegen.h"
  34
  35 using namespace std;
  36
  37 char mainMachine[] = "main";
  38
  39 void Token::set( char *str, int len )
  40 {
  41         length = len;
  42         data = new char[len+1];
  43         memcpy( data, str, len );
  44         data[len] = 0;
  45 }
  46
  47 void Token::append( const Token &other )
  48 {
  49         int newLength = length + other.length;
  50         char *newString = new char[newLength+1];
  51         memcpy( newString, data, length );
  52         memcpy( newString + length, other.data, other.length );
  53         newString[newLength] = 0;
  54         data = newString;
  55         length = newLength;
  56 }
  57
  58 /* Perform minimization after an operation according
  59  * to the command line args. */
  60 void afterOpMinimize( FsmAp *fsm, bool lastInSeq )
  61 {
  62         /* Switch on the prefered minimization algorithm. */
  63         if ( minimizeOpt == MinimizeEveryOp || minimizeOpt == MinimizeMostOps && lastInSeq ) {
  64                 /* First clean up the graph. FsmAp operations may leave these
  65                  * lying around. There should be no dead end states. The subtract
  66                  * intersection operators are the only places where they may be
  67                  * created and those operators clean them up. */
  68                 fsm->removeUnreachableStates();
  69
  70                 switch ( minimizeLevel ) {
  71                         case MinimizeApprox:
  72                                 fsm->minimizeApproximate();
  73                                 break;
  74                         case MinimizePartition1:
  75                                 fsm->minimizePartition1();
  76                                 break;
  77                         case MinimizePartition2:
  78                                 fsm->minimizePartition2();
  79                                 break;
  80                         case MinimizeStable:
  81                                 fsm->minimizeStable();
  82                                 break;
  83                 }
  84         }
  85 }
  86
  87 /* Count the transitions in the fsm by walking the state list. */
  88 int countTransitions( FsmAp *fsm )
  89 {
  90         int numTrans = 0;
  91         StateAp *state = fsm->stateList.head;
  92         while ( state != 0 ) {
  93                 numTrans += state->outList.length();
  94                 state = state->next;
  95         }
  96         return numTrans;
  97 }
  98
  99 Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd )
 100 {
 101         /* Reset errno so we can check for overflow or underflow. In the event of
 102          * an error, sets the return val to the upper or lower bound being tested
 103          * against. */
 104         errno = 0;
 105         unsigned int size = keyOps->alphType->size;
 106         bool unusedBits = size < sizeof(unsigned long);
 107
 108         unsigned long ul = strtoul( str, 0, 16 );
 109
 110         if ( errno == ERANGE || unusedBits && ul >> (size * 8) ) {
 111                 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
 112                 ul = 1 << (size * 8);
 113         }
 114
 115         if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) )
 116                 ul |= (0xffffffff >> (size*8 ) ) << (size*8);
 117
 118         return Key( (long)ul );
 119 }
 120
 121 Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd )
 122 {
 123         /* Convert the number to a decimal. First reset errno so we can check
 124          * for overflow or underflow. */
 125         errno = 0;
 126         long long minVal = keyOps->alphType->minVal;
 127         long long maxVal = keyOps->alphType->maxVal;
 128
 129         long long ll = strtoll( str, 0, 10 );
 130
 131         /* Check for underflow. */
 132         if ( errno == ERANGE && ll < 0 || ll < minVal) {
 133                 error(loc) << "literal " << str << " underflows the alphabet type" << endl;
 134                 ll = minVal;
 135         }
 136         /* Check for overflow. */
 137         else if ( errno == ERANGE && ll > 0 || ll > maxVal ) {
 138                 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
 139                 ll = maxVal;
 140         }
 141
 142         if ( keyOps->alphType->isSigned )
 143                 return Key( (long)ll );
 144         else
 145                 return Key( (unsigned long)ll );
 146 }
 147
 148 /* Make an fsm key in int format (what the fsm graph uses) from an alphabet
 149  * number returned by the parser. Validates that the number doesn't overflow
 150  * the alphabet type. */
 151 Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd )
 152 {
 153         /* Switch on hex/decimal format. */
 154         if ( str[0] == '0' && str[1] == 'x' )
 155                 return makeFsmKeyHex( str, loc, pd );
 156         else
 157                 return makeFsmKeyDec( str, loc, pd );
 158 }
 159
 160 /* Make an fsm int format (what the fsm graph uses) from a single character.
 161  * Performs proper conversion depending on signed/unsigned property of the
 162  * alphabet. */
 163 Key makeFsmKeyChar( char c, ParseData *pd )
 164 {
 165         if ( keyOps->isSigned ) {
 166                 /* Copy from a char type. */
 167                 return Key( c );
 168         }
 169         else {
 170                 /* Copy from an unsigned byte type. */
 171                 return Key( (unsigned char)c );
 172         }
 173 }
 174
 175 /* Make an fsm key array in int format (what the fsm graph uses) from a string
 176  * of characters. Performs proper conversion depending on signed/unsigned
 177  * property of the alphabet. */
 178 void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd )
 179 {
 180         if ( keyOps->isSigned ) {
 181                 /* Copy from a char star type. */
 182                 char *src = data;
 183                 for ( int i = 0; i < len; i++ )
 184                         result[i] = Key(src[i]);
 185         }
 186         else {
 187                 /* Copy from an unsigned byte ptr type. */
 188                 unsigned char *src = (unsigned char*) data;
 189                 for ( int i = 0; i < len; i++ )
 190                         result[i] = Key(src[i]);
 191         }
 192 }
 193
 194 /* Like makeFsmKeyArray except the result has only unique keys. They ordering
 195  * will be changed. */
 196 void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
 197                 bool caseInsensitive, ParseData *pd )
 198 {
 199         /* Use a transitions list for getting unique keys. */
 200         if ( keyOps->isSigned ) {
 201                 /* Copy from a char star type. */
 202                 char *src = data;
 203                 for ( int si = 0; si < len; si++ ) {
 204                         Key key( src[si] );
 205                         result.insert( key );
 206                         if ( caseInsensitive ) {
 207                                 if ( key.isLower() )
 208                                         result.insert( key.toUpper() );
 209                                 else if ( key.isUpper() )
 210                                         result.insert( key.toLower() );
 211                         }
 212                 }
 213         }
 214         else {
 215                 /* Copy from an unsigned byte ptr type. */
 216                 unsigned char *src = (unsigned char*) data;
 217                 for ( int si = 0; si < len; si++ ) {
 218                         Key key( src[si] );
 219                         result.insert( key );
 220                         if ( caseInsensitive ) {
 221                                 if ( key.isLower() )
 222                                         result.insert( key.toUpper() );
 223                                 else if ( key.isUpper() )
 224                                         result.insert( key.toLower() );
 225                         }
 226                 }
 227         }
 228 }
 229
 230 FsmAp *dotFsm( ParseData *pd )
 231 {
 232         FsmAp *retFsm = new FsmAp();
 233         retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey );
 234         return retFsm;
 235 }
 236
 237 FsmAp *dotStarFsm( ParseData *pd )
 238 {
 239         FsmAp *retFsm = new FsmAp();
 240         retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey );
 241         return retFsm;
 242 }
 243
 244 /* Make a builtin type. Depends on the signed nature of the alphabet type. */
 245 FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd )
 246 {
 247         /* FsmAp created to return. */
 248         FsmAp *retFsm = 0;
 249         bool isSigned = keyOps->isSigned;
 250
 251         switch ( builtin ) {
 252         case BT_Any: {
 253                 /* All characters. */
 254                 retFsm = dotFsm( pd );
 255                 break;
 256         }
 257         case BT_Ascii: {
 258                 /* Ascii characters 0 to 127. */
 259                 retFsm = new FsmAp();
 260                 retFsm->rangeFsm( 0, 127 );
 261                 break;
 262         }
 263         case BT_Extend: {
 264                 /* Ascii extended characters. This is the full byte range. Dependent
 265                  * on signed, vs no signed. If the alphabet is one byte then just use
 266                  * dot fsm. */
 267                 if ( isSigned ) {
 268                         retFsm = new FsmAp();
 269                         retFsm->rangeFsm( -128, 127 );
 270                 }
 271                 else {
 272                         retFsm = new FsmAp();
 273                         retFsm->rangeFsm( 0, 255 );
 274                 }
 275                 break;
 276         }
 277         case BT_Alpha: {
 278                 /* Alpha [A-Za-z]. */
 279                 FsmAp *upper = new FsmAp(), *lower = new FsmAp();
 280                 upper->rangeFsm( 'A', 'Z' );
 281                 lower->rangeFsm( 'a', 'z' );
 282                 upper->unionOp( lower );
 283                 upper->minimizePartition2();
 284                 retFsm = upper;
 285                 break;
 286         }
 287         case BT_Digit: {
 288                 /* Digits [0-9]. */
 289                 retFsm = new FsmAp();
 290                 retFsm->rangeFsm( '0', '9' );
 291                 break;
 292         }
 293         case BT_Alnum: {
 294                 /* Alpha numerics [0-9A-Za-z]. */
 295                 FsmAp *digit = new FsmAp(), *lower = new FsmAp();
 296                 FsmAp *upper = new FsmAp();
 297                 digit->rangeFsm( '0', '9' );
 298                 upper->rangeFsm( 'A', 'Z' );
 299                 lower->rangeFsm( 'a', 'z' );
 300                 digit->unionOp( upper );
 301                 digit->unionOp( lower );
 302                 digit->minimizePartition2();
 303                 retFsm = digit;
 304                 break;
 305         }
 306         case BT_Lower: {
 307                 /* Lower case characters. */
 308                 retFsm = new FsmAp();
 309                 retFsm->rangeFsm( 'a', 'z' );
 310                 break;
 311         }
 312         case BT_Upper: {
 313                 /* Upper case characters. */
 314                 retFsm = new FsmAp();
 315                 retFsm->rangeFsm( 'A', 'Z' );
 316                 break;
 317         }
 318         case BT_Cntrl: {
 319                 /* Control characters. */
 320                 FsmAp *cntrl = new FsmAp();
 321                 FsmAp *highChar = new FsmAp();
 322                 cntrl->rangeFsm( 0, 31 );
 323                 highChar->concatFsm( 127 );
 324                 cntrl->unionOp( highChar );
 325                 cntrl->minimizePartition2();
 326                 retFsm = cntrl;
 327                 break;
 328         }
 329         case BT_Graph: {
 330                 /* Graphical ascii characters [!-~]. */
 331                 retFsm = new FsmAp();
 332                 retFsm->rangeFsm( '!', '~' );
 333                 break;
 334         }
 335         case BT_Print: {
 336                 /* Printable characters. Same as graph except includes space. */
 337                 retFsm = new FsmAp();
 338                 retFsm->rangeFsm( ' ', '~' );
 339                 break;
 340         }
 341         case BT_Punct: {
 342                 /* Punctuation. */
 343                 FsmAp *range1 = new FsmAp();
 344                 FsmAp *range2 = new FsmAp();
 345                 FsmAp *range3 = new FsmAp();
 346                 FsmAp *range4 = new FsmAp();
 347                 range1->rangeFsm( '!', '/' );
 348                 range2->rangeFsm( ':', '@' );
 349                 range3->rangeFsm( '[', '`' );
 350                 range4->rangeFsm( '{', '~' );
 351                 range1->unionOp( range2 );
 352                 range1->unionOp( range3 );
 353                 range1->unionOp( range4 );
 354                 range1->minimizePartition2();
 355                 retFsm = range1;
 356                 break;
 357         }
 358         case BT_Space: {
 359                 /* Whitespace: [\t\v\f\n\r ]. */
 360                 FsmAp *cntrl = new FsmAp();
 361                 FsmAp *space = new FsmAp();
 362                 cntrl->rangeFsm( '\t', '\r' );
 363                 space->concatFsm( ' ' );
 364                 cntrl->unionOp( space );
 365                 cntrl->minimizePartition2();
 366                 retFsm = cntrl;
 367                 break;
 368         }
 369         case BT_Xdigit: {
 370                 /* Hex digits [0-9A-Fa-f]. */
 371                 FsmAp *digit = new FsmAp();
 372                 FsmAp *upper = new FsmAp();
 373                 FsmAp *lower = new FsmAp();
 374                 digit->rangeFsm( '0', '9' );
 375                 upper->rangeFsm( 'A', 'F' );
 376                 lower->rangeFsm( 'a', 'f' );
 377                 digit->unionOp( upper );
 378                 digit->unionOp( lower );
 379                 digit->minimizePartition2();
 380                 retFsm = digit;
 381                 break;
 382         }
 383         case BT_Lambda: {
 384                 retFsm = new FsmAp();
 385                 retFsm->lambdaFsm();
 386                 break;
 387         }
 388         case BT_Empty: {
 389                 retFsm = new FsmAp();
 390                 retFsm->emptyFsm();
 391                 break;
 392         }}
 393
 394         return retFsm;
 395 }
 396
 397 /* Check if this name inst or any name inst below is referenced. */
 398 bool NameInst::anyRefsRec()
 399 {
 400         if ( numRefs > 0 )
 401                 return true;
 402
 403         /* Recurse on children until true. */
 404         for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) {
 405                 if ( (*ch)->anyRefsRec() )
 406                         return true;
 407         }
 408
 409         return false;
 410 }
 411
 412 /*
 413  * ParseData
 414  */
 415
 416 /* Initialize the structure that will collect info during the parse of a
 417  * machine. */
 418 ParseData::ParseData( char *fileName, char *sectionName,
 419                 const InputLoc &sectionLoc )
 420 :
 421         sectionGraph(0),
 422         generatingSectionSubset(false),
 423         nextPriorKey(0),
 424         /* 0 is reserved for global error actions. */
 425         nextLocalErrKey(1),
 426         nextNameId(0),
 427         nextCondId(0),
 428         alphTypeSet(false),
 429         getKeyExpr(0),
 430         accessExpr(0),
 431         pExpr(0),
 432         peExpr(0),
 433         csExpr(0),
 434         topExpr(0),
 435         stackExpr(0),
 436         actExpr(0),
 437         tokstartExpr(0),
 438         tokendExpr(0),
 439         lowerNum(0),
 440         upperNum(0),
 441         fileName(fileName),
 442         sectionName(sectionName),
 443         sectionLoc(sectionLoc),
 444         errorCount(0),
 445         curActionOrd(0),
 446         curPriorOrd(0),
 447         rootName(0),
 448         exportsRootName(0),
 449         nextEpsilonResolvedLink(0),
 450         nextLongestMatchId(1),
 451         lmRequiresErrorState(false)
 452 {
 453         /* Initialize the dictionary of graphs. This is our symbol table. The
 454          * initialization needs to be done on construction which happens at the
 455          * beginning of a machine spec so any assignment operators can reference
 456          * the builtins. */
 457         initGraphDict();
 458 }
 459
 460 /* Clean up the data collected during a parse. */
 461 ParseData::~ParseData()
 462 {
 463         /* Delete all the nodes in the action list. Will cause all the
 464          * string data that represents the actions to be deallocated. */
 465         actionList.empty();
 466 }
 467
 468 /* Make a name id in the current name instantiation scope if it is not
 469  * already there. */
 470 NameInst *ParseData::addNameInst( const InputLoc &loc, char *data, bool isLabel )
 471 {
 472         /* Create the name instantitaion object and insert it. */
 473         NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel );
 474         curNameInst->childVect.append( newNameInst );
 475         if ( data != 0 )
 476                 curNameInst->children.insertMulti( data, newNameInst );
 477         return newNameInst;
 478 }
 479
 480 void ParseData::initNameWalk()
 481 {
 482         curNameInst = rootName;
 483         curNameChild = 0;
 484 }
 485
 486 void ParseData::initExportsNameWalk()
 487 {
 488         curNameInst = exportsRootName;
 489         curNameChild = 0;
 490 }
 491
 492 /* Goes into the next child scope. The number of the child is already set up.
 493  * We need this for the syncronous name tree and parse tree walk to work
 494  * properly. It is reset on entry into a scope and advanced on poping of a
 495  * scope. A call to enterNameScope should be accompanied by a corresponding
 496  * popNameScope. */
 497 NameFrame ParseData::enterNameScope( bool isLocal, int numScopes )
 498 {
 499         /* Save off the current data. */
 500         NameFrame retFrame;
 501         retFrame.prevNameInst = curNameInst;
 502         retFrame.prevNameChild = curNameChild;
 503         retFrame.prevLocalScope = localNameScope;
 504
 505         /* Enter into the new name scope. */
 506         for ( int i = 0; i < numScopes; i++ ) {
 507                 curNameInst = curNameInst->childVect[curNameChild];
 508                 curNameChild = 0;
 509         }
 510
 511         if ( isLocal )
 512                 localNameScope = curNameInst;
 513
 514         return retFrame;
 515 }
 516
 517 /* Return from a child scope to a parent. The parent info must be specified as
 518  * an argument and is obtained from the corresponding call to enterNameScope.
 519  * */
 520 void ParseData::popNameScope( const NameFrame &frame )
 521 {
 522         /* Pop the name scope. */
 523         curNameInst = frame.prevNameInst;
 524         curNameChild = frame.prevNameChild+1;
 525         localNameScope = frame.prevLocalScope;
 526 }
 527
 528 void ParseData::resetNameScope( const NameFrame &frame )
 529 {
 530         /* Pop the name scope. */
 531         curNameInst = frame.prevNameInst;
 532         curNameChild = frame.prevNameChild;
 533         localNameScope = frame.prevLocalScope;
 534 }
 535
 536
 537 void ParseData::unsetObsoleteEntries( FsmAp *graph )
 538 {
 539         /* Loop the reference names and increment the usage. Names that are no
 540          * longer needed will be unset in graph. */
 541         for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) {
 542                 /* Get the name. */
 543                 NameInst *name = *ref;
 544                 name->numUses += 1;
 545
 546                 /* If the name is no longer needed unset its corresponding entry. */
 547                 if ( name->numUses == name->numRefs ) {
 548                         assert( graph->entryPoints.find( name->id ) != 0 );
 549                         graph->unsetEntry( name->id );
 550                         assert( graph->entryPoints.find( name->id ) == 0 );
 551                 }
 552         }
 553 }
 554
 555 NameSet ParseData::resolvePart( NameInst *refFrom, char *data, bool recLabelsOnly )
 556 {
 557         /* Queue needed for breadth-first search, load it with the start node. */
 558         NameInstList nameQueue;
 559         nameQueue.append( refFrom );
 560
 561         NameSet result;
 562         while ( nameQueue.length() > 0 ) {
 563                 /* Pull the next from location off the queue. */
 564                 NameInst *from = nameQueue.detachFirst();
 565
 566                 /* Look for the name. */
 567                 NameMapEl *low, *high;
 568                 if ( from->children.findMulti( data, low, high ) ) {
 569                         /* Record all instances of the name. */
 570                         for ( ; low <= high; low++ )
 571                                 result.insert( low->value );
 572                 }
 573
 574                 /* Name not there, do breadth-first operation of appending all
 575                  * childrent to the processing queue. */
 576                 for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) {
 577                         if ( !recLabelsOnly || (*name)->isLabel )
 578                                 nameQueue.append( *name );
 579                 }
 580         }
 581
 582         /* Queue exhausted and name never found. */
 583         return result;
 584 }
 585
 586 void ParseData::resolveFrom( NameSet &result, NameInst *refFrom,
 587                 const NameRef &nameRef, int namePos )
 588 {
 589         /* Look for the name in the owning scope of the factor with aug. */
 590         NameSet partResult = resolvePart( refFrom, nameRef[namePos], false );
 591
 592         /* If there are more parts to the name then continue on. */
 593         if ( ++namePos < nameRef.length() ) {
 594                 /* There are more components to the name, search using all the part
 595                  * results as the base. */
 596                 for ( NameSet::Iter name = partResult; name.lte(); name++ )
 597                         resolveFrom( result, *name, nameRef, namePos );
 598         }
 599         else {
 600                 /* This is the last component, append the part results to the final
 601                  * results. */
 602                 result.insert( partResult );
 603         }
 604 }
 605
 606 /* Write out a name reference. */
 607 ostream &operator<<( ostream &out, const NameRef &nameRef )
 608 {
 609         int pos = 0;
 610         if ( nameRef[pos] == 0 ) {
 611                 out << "::";
 612                 pos += 1;
 613         }
 614         out << nameRef[pos++];
 615         for ( ; pos < nameRef.length(); pos++ )
 616                 out << "::" << nameRef[pos];
 617         return out;
 618 }
 619
 620 ostream &operator<<( ostream &out, const NameInst &nameInst )
 621 {
 622         /* Count the number fully qualified name parts. */
 623         int numParents = 0;
 624         NameInst *curParent = nameInst.parent;
 625         while ( curParent != 0 ) {
 626                 numParents += 1;
 627                 curParent = curParent->parent;
 628         }
 629
 630         /* Make an array and fill it in. */
 631         curParent = nameInst.parent;
 632         NameInst **parents = new NameInst*[numParents];
 633         for ( int p = numParents-1; p >= 0; p-- ) {
 634                 parents[p] = curParent;
 635                 curParent = curParent->parent;
 636         }
 637
 638         /* Write the parents out, skip the root. */
 639         for ( int p = 1; p < numParents; p++ )
 640                 out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" );
 641
 642         /* Write the name and cleanup. */
 643         out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" );
 644         delete[] parents;
 645         return out;
 646 }
 647
 648 struct CmpNameInstLoc
 649 {
 650         static int compare( const NameInst *ni1, const NameInst *ni2 )
 651         {
 652                 if ( ni1->loc.line < ni2->loc.line )
 653                         return -1;
 654                 else if ( ni1->loc.line > ni2->loc.line )
 655                         return 1;
 656                 else if ( ni1->loc.col < ni2->loc.col )
 657                         return -1;
 658                 else if ( ni1->loc.col > ni2->loc.col )
 659                         return 1;
 660                 return 0;
 661         }
 662 };
 663
 664 void errorStateLabels( const NameSet &resolved )
 665 {
 666         MergeSort<NameInst*, CmpNameInstLoc> mergeSort;
 667         mergeSort.sort( resolved.data, resolved.length() );
 668         for ( NameSet::Iter res = resolved; res.lte(); res++ )
 669                 error((*res)->loc) << "  -> " << **res << endl;
 670 }
 671
 672
 673 NameInst *ParseData::resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action )
 674 {
 675         NameInst *nameInst = 0;
 676
 677         /* Do the local search if the name is not strictly a root level name
 678          * search. */
 679         if ( nameRef[0] != 0 ) {
 680                 /* If the action is referenced, resolve all of them. */
 681                 if ( action != 0 && action->actionRefs.length() > 0 ) {
 682                         /* Look for the name in all referencing scopes. */
 683                         NameSet resolved;
 684                         for ( ActionRefs::Iter actRef = action->actionRefs; actRef.lte(); actRef++ )
 685                                 resolveFrom( resolved, *actRef, nameRef, 0 );
 686
 687                         if ( resolved.length() > 0 ) {
 688                                 /* Take the first one. */
 689                                 nameInst = resolved[0];
 690                                 if ( resolved.length() > 1 ) {
 691                                         /* Complain about the multiple references. */
 692                                         error(loc) << "state reference " << nameRef <<
 693                                                         " resolves to multiple entry points" << endl;
 694                                         errorStateLabels( resolved );
 695                                 }
 696                         }
 697                 }
 698         }
 699
 700         /* If not found in the local scope, look in global. */
 701         if ( nameInst == 0 ) {
 702                 NameSet resolved;
 703                 int fromPos = nameRef[0] != 0 ? 0 : 1;
 704                 resolveFrom( resolved, rootName, nameRef, fromPos );
 705
 706                 if ( resolved.length() > 0 ) {
 707                         /* Take the first. */
 708                         nameInst = resolved[0];
 709                         if ( resolved.length() > 1 ) {
 710                                 /* Complain about the multiple references. */
 711                                 error(loc) << "state reference " << nameRef <<
 712                                                 " resolves to multiple entry points" << endl;
 713                                 errorStateLabels( resolved );
 714                         }
 715                 }
 716         }
 717
 718         if ( nameInst == 0 ) {
 719                 /* If not found then complain. */
 720                 error(loc) << "could not resolve state reference " << nameRef << endl;
 721         }
 722         return nameInst;
 723 }
 724
 725 void ParseData::resolveNameRefs( InlineList *inlineList, Action *action )
 726 {
 727         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
 728                 switch ( item->type ) {
 729                         case InlineItem::Entry: case InlineItem::Goto:
 730                         case InlineItem::Call: case InlineItem::Next: {
 731                                 /* Resolve, pass action for local search. */
 732                                 NameInst *target = resolveStateRef( *item->nameRef, item->loc, action );
 733
 734                                 /* Check if the target goes into a longest match. */
 735                                 NameInst *search = target->parent;
 736                                 while ( search != 0 ) {
 737                                         if ( search->isLongestMatch ) {
 738                                                 error(item->loc) << "cannot enter inside a longest "
 739                                                                 "match construction as an entry point" << endl;
 740                                                 break;
 741                                         }
 742                                         search = search->parent;
 743                                 }
 744
 745                                 /* Note the reference in the name. This will cause the entry
 746                                  * point to survive to the end of the graph generating walk. */
 747                                 if ( target != 0 )
 748                                         target->numRefs += 1;
 749                                 item->nameTarg = target;
 750                                 break;
 751                         }
 752                         default:
 753                                 break;
 754                 }
 755
 756                 /* Some of the item types may have children. */
 757                 if ( item->children != 0 )
 758                         resolveNameRefs( item->children, action );
 759         }
 760 }
 761
 762 /* Resolve references to labels in actions. */
 763 void ParseData::resolveActionNameRefs()
 764 {
 765         for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
 766                 /* Only care about the actions that are referenced. */
 767                 if ( act->actionRefs.length() > 0 )
 768                         resolveNameRefs( act->inlineList, act );
 769         }
 770 }
 771
 772 /* Walk a name tree starting at from and fill the name index. */
 773 void ParseData::fillNameIndex( NameInst *from )
 774 {
 775         /* Fill the value for from in the name index. */
 776         nameIndex[from->id] = from;
 777
 778         /* Recurse on the implicit final state and then all children. */
 779         if ( from->final != 0 )
 780                 fillNameIndex( from->final );
 781         for ( NameVect::Iter name = from->childVect; name.lte(); name++ )
 782                 fillNameIndex( *name );
 783 }
 784
 785 void ParseData::makeRootNames()
 786 {
 787         /* Create the root name. */
 788         rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
 789         exportsRootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
 790 }
 791
 792 /* Build the name tree and supporting data structures. */
 793 void ParseData::makeNameTree( GraphDictEl *dictEl )
 794 {
 795         /* Set up curNameInst for the walk. */
 796         initNameWalk();
 797
 798         if ( dictEl != 0 ) {
 799                 /* A start location has been specified. */
 800                 dictEl->value->makeNameTree( dictEl->loc, this );
 801         }
 802         else {
 803                 /* First make the name tree. */
 804                 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
 805                         /* Recurse on the instance. */
 806                         glel->value->makeNameTree( glel->loc, this );
 807                 }
 808         }
 809
 810         /* The number of nodes in the tree can now be given by nextNameId */
 811         nameIndex = new NameInst*[nextNameId];
 812         memset( nameIndex, 0, sizeof(NameInst*)*nextNameId );
 813         fillNameIndex( rootName );
 814         fillNameIndex( exportsRootName );
 815 }
 816
 817
 818 void ParseData::createBuiltin( char *name, BuiltinMachine builtin )
 819 {
 820         Expression *expression = new Expression( builtin );
 821         Join *join = new Join( expression );
 822         JoinOrLm *joinOrLm = new JoinOrLm( join );
 823         VarDef *varDef = new VarDef( name, joinOrLm );
 824         GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
 825         graphDict.insert( graphDictEl );
 826 }
 827
 828 /* Initialize the graph dict with builtin types. */
 829 void ParseData::initGraphDict( )
 830 {
 831         createBuiltin( "any", BT_Any );
 832         createBuiltin( "ascii", BT_Ascii );
 833         createBuiltin( "extend", BT_Extend );
 834         createBuiltin( "alpha", BT_Alpha );
 835         createBuiltin( "digit", BT_Digit );
 836         createBuiltin( "alnum", BT_Alnum );
 837         createBuiltin( "lower", BT_Lower );
 838         createBuiltin( "upper", BT_Upper );
 839         createBuiltin( "cntrl", BT_Cntrl );
 840         createBuiltin( "graph", BT_Graph );
 841         createBuiltin( "print", BT_Print );
 842         createBuiltin( "punct", BT_Punct );
 843         createBuiltin( "space", BT_Space );
 844         createBuiltin( "xdigit", BT_Xdigit );
 845         createBuiltin( "null", BT_Lambda );
 846         createBuiltin( "zlen", BT_Lambda );
 847         createBuiltin( "empty", BT_Empty );
 848 }
 849
 850 /* Set the alphabet type. If the types are not valid returns false. */
 851 bool ParseData::setAlphType( char *s1, char *s2 )
 852 {
 853         bool valid = false;
 854         for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
 855                 if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 &&
 856                                 hostLang->hostTypes[i].data2 != 0 &&
 857                                 strcmp( s2, hostLang->hostTypes[i].data2 ) == 0 )
 858                 {
 859                         valid = true;
 860                         userAlphType = hostLang->hostTypes + i;
 861                         break;
 862                 }
 863         }
 864
 865         alphTypeSet = true;
 866         return valid;
 867 }
 868
 869 /* Set the alphabet type. If the types are not valid returns false. */
 870 bool ParseData::setAlphType( char *s1 )
 871 {
 872         bool valid = false;
 873         for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
 874                 if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 &&
 875                                 hostLang->hostTypes[i].data2 == 0 )
 876                 {
 877                         valid = true;
 878                         userAlphType = hostLang->hostTypes + i;
 879                         break;
 880                 }
 881         }
 882
 883         alphTypeSet = true;
 884         return valid;
 885 }
 886
 887 bool ParseData::setVariable( char *var, InlineList *inlineList )
 888 {
 889         bool set = true;
 890
 891         if ( strcmp( var, "p" ) == 0 )
 892                 pExpr = inlineList;
 893         else if ( strcmp( var, "pe" ) == 0 )
 894                 peExpr = inlineList;
 895         else if ( strcmp( var, "cs" ) == 0 )
 896                 csExpr = inlineList;
 897         else if ( strcmp( var, "top" ) == 0 )
 898                 topExpr = inlineList;
 899         else if ( strcmp( var, "stack" ) == 0 )
 900                 stackExpr = inlineList;
 901         else if ( strcmp( var, "act" ) == 0 )
 902                 actExpr = inlineList;
 903         else if ( strcmp( var, "tokstart" ) == 0 )
 904                 tokstartExpr = inlineList;
 905         else if ( strcmp( var, "tokend" ) == 0 )
 906                 tokendExpr = inlineList;
 907         else
 908                 set = false;
 909
 910         return set;
 911 }
 912
 913 /* Initialize the key operators object that will be referenced by all fsms
 914  * created. */
 915 void ParseData::initKeyOps( )
 916 {
 917         /* Signedness and bounds. */
 918         HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType;
 919         thisKeyOps.setAlphType( alphType );
 920
 921         if ( lowerNum != 0 ) {
 922                 /* If ranges are given then interpret the alphabet type. */
 923                 thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this );
 924                 thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
 925         }
 926
 927         thisCondData.nextCondKey = thisKeyOps.maxKey;
 928         thisCondData.nextCondKey.increment();
 929 }
 930
 931 void ParseData::printNameInst( NameInst *nameInst, int level )
 932 {
 933         for ( int i = 0; i < level; i++ )
 934                 cerr << "  ";
 935         cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") <<
 936                         "  id: " << nameInst->id <<
 937                         "  refs: " << nameInst->numRefs <<
 938                         "  uses: " << nameInst->numUses << endl;
 939         for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ )
 940                 printNameInst( *name, level+1 );
 941 }
 942
 943 /* Remove duplicates of unique actions from an action table. */
 944 void ParseData::removeDups( ActionTable &table )
 945 {
 946         /* Scan through the table looking for unique actions to
 947          * remove duplicates of. */
 948         for ( int i = 0; i < table.length(); i++ ) {
 949                 /* Remove any duplicates ahead of i. */
 950                 for ( int r = i+1; r < table.length(); ) {
 951                         if ( table[r].value == table[i].value )
 952                                 table.vremove(r);
 953                         else
 954                                 r += 1;
 955                 }
 956         }
 957 }
 958
 959 /* Remove duplicates from action lists. This operates only on transition and
 960  * eof action lists and so should be called once all actions have been
 961  * transfered to their final resting place. */
 962 void ParseData::removeActionDups( FsmAp *graph )
 963 {
 964         /* Loop all states. */
 965         for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
 966                 /* Loop all transitions. */
 967                 for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
 968                         removeDups( trans->actionTable );
 969                 removeDups( state->toStateActionTable );
 970                 removeDups( state->fromStateActionTable );
 971                 removeDups( state->eofActionTable );
 972         }
 973 }
 974
 975 Action *ParseData::newAction( char *name, InlineList *inlineList )
 976 {
 977         InputLoc loc;
 978         loc.line = 1;
 979         loc.col = 1;
 980         loc.fileName = "<NONE>";
 981
 982         Action *action = new Action( loc, name, inlineList, nextCondId++ );
 983         action->actionRefs.append( rootName );
 984         actionList.append( action );
 985         return action;
 986 }
 987
 988 void ParseData::initLongestMatchData()
 989 {
 990         if ( lmList.length() > 0 ) {
 991                 /* The initTokStart action resets the token start. */
 992                 InlineList *il1 = new InlineList;
 993                 il1->append( new InlineItem( InputLoc(), InlineItem::LmInitTokStart ) );
 994                 initTokStart = newAction( "initts", il1 );
 995                 initTokStart->isLmAction = true;
 996
 997                 /* The initActId action gives act a default value. */
 998                 InlineList *il4 = new InlineList;
 999                 il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) );
1000                 initActId = newAction( "initact", il4 );
1001                 initActId->isLmAction = true;
1002
1003                 /* The setTokStart action sets tokstart. */
1004                 InlineList *il5 = new InlineList;
1005                 il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) );
1006                 setTokStart = newAction( "tokstart", il5 );
1007                 setTokStart->isLmAction = true;
1008
1009                 /* The setTokEnd action sets tokend. */
1010                 InlineList *il3 = new InlineList;
1011                 il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) );
1012                 setTokEnd = newAction( "tokend", il3 );
1013                 setTokEnd->isLmAction = true;
1014
1015                 /* The action will also need an ordering: ahead of all user action
1016                  * embeddings. */
1017                 initTokStartOrd = curActionOrd++;
1018                 initActIdOrd = curActionOrd++;
1019                 setTokStartOrd = curActionOrd++;
1020                 setTokEndOrd = curActionOrd++;
1021         }
1022 }
1023
1024 /* After building the graph, do some extra processing to ensure the runtime
1025  * data of the longest mactch operators is consistent. */
1026 void ParseData::setLongestMatchData( FsmAp *graph )
1027 {
1028         if ( lmList.length() > 0 ) {
1029                 /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry)
1030                  * init the tokstart. */
1031                 for ( EntryMap::Iter en = graph->entryPoints; en.lte(); en++ ) {
1032                         /* This is run after duplicates are removed, we must guard against
1033                          * inserting a duplicate. */
1034                         ActionTable &actionTable = en->value->toStateActionTable;
1035                         if ( ! actionTable.hasAction( initTokStart ) )
1036                                 actionTable.setAction( initTokStartOrd, initTokStart );
1037                 }
1038
1039                 /* Find the set of states that are the target of transitions with
1040                  * actions that have calls. These states will be targeted by fret
1041                  * statements. */
1042                 StateSet states;
1043                 for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
1044                         for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
1045                                 for ( ActionTable::Iter ati = trans->actionTable; ati.lte(); ati++ ) {
1046                                         if ( ati->value->anyCall && trans->toState != 0 )
1047                                                 states.insert( trans->toState );
1048                                 }
1049                         }
1050                 }
1051
1052
1053                 /* Init tokstart upon entering the above collected states. */
1054                 for ( StateSet::Iter ps = states; ps.lte(); ps++ ) {
1055                         /* This is run after duplicates are removed, we must guard against
1056                          * inserting a duplicate. */
1057                         ActionTable &actionTable = (*ps)->toStateActionTable;
1058                         if ( ! actionTable.hasAction( initTokStart ) )
1059                                 actionTable.setAction( initTokStartOrd, initTokStart );
1060                 }
1061         }
1062 }
1063
1064 /* Make the graph from a graph dict node. Does minimization and state sorting. */
1065 FsmAp *ParseData::makeInstance( GraphDictEl *gdNode )
1066 {
1067         /* Build the graph from a walk of the parse tree. */
1068         FsmAp *graph = gdNode->value->walk( this );
1069
1070         /* Resolve any labels that point to multiple states. Any labels that are
1071          * still around are referenced only by gotos and calls and they need to be
1072          * made into deterministic entry points. */
1073         graph->deterministicEntry();
1074
1075         /*
1076          * All state construction is now complete.
1077          */
1078
1079         /* Transfer global error actions. */
1080         for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
1081                 graph->transferErrorActions( state, 0 );
1082
1083         removeActionDups( graph );
1084
1085         /* Remove unreachable states. There should be no dead end states. The
1086          * subtract and intersection operators are the only places where they may
1087          * be created and those operators clean them up. */
1088         graph->removeUnreachableStates();
1089
1090         /* No more fsm operations are to be done. Action ordering numbers are
1091          * no longer of use and will just hinder minimization. Clear them. */
1092         graph->nullActionKeys();
1093
1094         /* Transition priorities are no longer of use. We can clear them
1095          * because they will just hinder minimization as well. Clear them. */
1096         graph->clearAllPriorities();
1097
1098         if ( minimizeOpt != MinimizeNone ) {
1099                 /* Minimize here even if we minimized at every op. Now that function
1100                  * keys have been cleared we may get a more minimal fsm. */
1101                 switch ( minimizeLevel ) {
1102                         case MinimizeApprox:
1103                                 graph->minimizeApproximate();
1104                                 break;
1105                         case MinimizeStable:
1106                                 graph->minimizeStable();
1107                                 break;
1108                         case MinimizePartition1:
1109                                 graph->minimizePartition1();
1110                                 break;
1111                         case MinimizePartition2:
1112                                 graph->minimizePartition2();
1113                                 break;
1114                 }
1115         }
1116
1117         graph->compressTransitions();
1118
1119         return graph;
1120 }
1121
1122 void ParseData::printNameTree()
1123 {
1124         /* Print the name instance map. */
1125         for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ )
1126                 printNameInst( *name, 0 );
1127
1128         cerr << "name index:" << endl;
1129         /* Show that the name index is correct. */
1130         for ( int ni = 0; ni < nextNameId; ni++ ) {
1131                 cerr << ni << ": ";
1132                 char *name = nameIndex[ni]->name;
1133                 cerr << ( name != 0 ? name : "<ANON>" ) << endl;
1134         }
1135 }
1136
1137 FsmAp *ParseData::makeSpecific( GraphDictEl *gdNode )
1138 {
1139         /* Build the name tree and supporting data structures. */
1140         makeNameTree( gdNode );
1141
1142         /* Resove name references from gdNode. */
1143         initNameWalk();
1144         gdNode->value->resolveNameRefs( this );
1145
1146         /* Do not resolve action references. Since we are not building the entire
1147          * graph there's a good chance that many name references will fail. This
1148          * is okay since generating part of the graph is usually only done when
1149          * inspecting the compiled machine. */
1150
1151         /* Same story for extern entry point references. */
1152
1153         /* Flag this case so that the XML code generator is aware that we haven't
1154          * looked up name references in actions. It can then avoid segfaulting. */
1155         generatingSectionSubset = true;
1156
1157         /* Just building the specified graph. */
1158         initNameWalk();
1159         FsmAp *mainGraph = makeInstance( gdNode );
1160
1161         return mainGraph;
1162 }
1163
1164 FsmAp *ParseData::makeAll()
1165 {
1166         /* Build the name tree and supporting data structures. */
1167         makeNameTree( 0 );
1168
1169         /* Resove name references in the tree. */
1170         initNameWalk();
1171         for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ )
1172                 glel->value->resolveNameRefs( this );
1173
1174         /* Resolve action code name references. */
1175         resolveActionNameRefs();
1176
1177         /* Force name references to the top level instantiations. */
1178         for ( NameVect::Iter inst = rootName->childVect; inst.lte(); inst++ )
1179                 (*inst)->numRefs += 1;
1180
1181         FsmAp *mainGraph = 0;
1182         FsmAp **graphs = new FsmAp*[instanceList.length()];
1183         int numOthers = 0;
1184
1185         /* Make all the instantiations, we know that main exists in this list. */
1186         initNameWalk();
1187         for ( GraphList::Iter glel = instanceList; glel.lte();  glel++ ) {
1188                 if ( strcmp( glel->key, mainMachine ) == 0 ) {
1189                         /* Main graph is always instantiated. */
1190                         mainGraph = makeInstance( glel );
1191                 }
1192                 else {
1193                         /* Instantiate and store in others array. */
1194                         graphs[numOthers++] = makeInstance( glel );
1195                 }
1196         }
1197
1198         if ( mainGraph == 0 )
1199                 mainGraph = graphs[--numOthers];
1200
1201         if ( numOthers > 0 ) {
1202                 /* Add all the other graphs into main. */
1203                 mainGraph->globOp( graphs, numOthers );
1204         }
1205
1206         delete[] graphs;
1207         return mainGraph;
1208 }
1209
1210 void ParseData::analyzeAction( Action *action, InlineList *inlineList )
1211 {
1212         /* FIXME: Actions used as conditions should be very constrained. */
1213         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1214                 if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
1215                         action->anyCall = true;
1216
1217                 /* Need to recurse into longest match items. */
1218                 if ( item->type == InlineItem::LmSwitch ) {
1219                         LongestMatch *lm = item->longestMatch;
1220                         for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) {
1221                                 if ( lmi->action != 0 )
1222                                         analyzeAction( action, lmi->action->inlineList );
1223                         }
1224                 }
1225
1226                 if ( item->type == InlineItem::LmOnLast ||
1227                                 item->type == InlineItem::LmOnNext ||
1228                                 item->type == InlineItem::LmOnLagBehind )
1229                 {
1230                         LongestMatchPart *lmi = item->longestMatchPart;
1231                         if ( lmi->action != 0 )
1232                                 analyzeAction( action, lmi->action->inlineList );
1233                 }
1234
1235                 if ( item->children != 0 )
1236                         analyzeAction( action, item->children );
1237         }
1238 }
1239
1240
1241 /* Check actions for bad uses of fsm directives. We don't go inside longest
1242  * match items in actions created by ragel, since we just want the user
1243  * actions. */
1244 void ParseData::checkInlineList( Action *act, InlineList *inlineList )
1245 {
1246         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1247                 /* EOF checks. */
1248                 if ( act->numEofRefs > 0 ) {
1249                         switch ( item->type ) {
1250                         case InlineItem::PChar:
1251                                 error(item->loc) << "pointer to current element does not exist in "
1252                                                 "EOF action code" << endl;
1253                                 break;
1254                         case InlineItem::Char:
1255                                 error(item->loc) << "current element does not exist in "
1256                                                 "EOF action code" << endl;
1257                                 break;
1258                         case InlineItem::Hold:
1259                                 error(item->loc) << "changing the current element not possible in "
1260                                                 "EOF action code" << endl;
1261                                 break;
1262                         case InlineItem::Exec:
1263                                 error(item->loc) << "changing the current element not possible in "
1264                                                 "EOF action code" << endl;
1265                                 break;
1266                         case InlineItem::Goto: case InlineItem::Call:
1267                         case InlineItem::Next: case InlineItem::GotoExpr:
1268                         case InlineItem::CallExpr: case InlineItem::NextExpr:
1269                         case InlineItem::Ret:
1270                                 error(item->loc) << "changing the current state not possible in "
1271                                                 "EOF action code" << endl;
1272                                 break;
1273                         default:
1274                                 break;
1275                         }
1276                 }
1277
1278                 /* Recurse. */
1279                 if ( item->children != 0 )
1280                         checkInlineList( act, item->children );
1281         }
1282 }
1283
1284 void ParseData::checkAction( Action *action )
1285 {
1286         /* Check for actions with calls that are embedded within a longest match
1287          * machine. */
1288         if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) {
1289                 for ( ActionRefs::Iter ar = action->actionRefs; ar.lte(); ar++ ) {
1290                         NameInst *check = *ar;
1291                         while ( check != 0 ) {
1292                                 if ( check->isLongestMatch ) {
1293                                         error(action->loc) << "within a scanner, fcall is permitted"
1294                                                 " only in pattern actions" << endl;
1295                                         break;
1296                                 }
1297                                 check = check->parent;
1298                         }
1299                 }
1300         }
1301
1302         checkInlineList( action, action->inlineList );
1303 }
1304
1305
1306 void ParseData::analyzeGraph( FsmAp *graph )
1307 {
1308         for ( ActionList::Iter act = actionList; act.lte(); act++ )
1309                 analyzeAction( act, act->inlineList );
1310
1311         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
1312                 /* The transition list. */
1313                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
1314                         for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ )
1315                                 at->value->numTransRefs += 1;
1316                 }
1317
1318                 for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ )
1319                         at->value->numToStateRefs += 1;
1320
1321                 for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ )
1322                         at->value->numFromStateRefs += 1;
1323
1324                 for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ )
1325                         at->value->numEofRefs += 1;
1326
1327                 for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
1328                         for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ )
1329                                 (*sci)->numCondRefs += 1;
1330                 }
1331         }
1332
1333         /* Checks for bad usage of directives in action code. */
1334         for ( ActionList::Iter act = actionList; act.lte(); act++ )
1335                 checkAction( act );
1336 }
1337
1338 void ParseData::makeExportsNameTree()
1339 {
1340         /* Make a name tree for the exports. */
1341         initExportsNameWalk();
1342
1343         /* First make the name tree. */
1344         for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1345                 if ( gdel->value->isExport ) {
1346                         /* Recurse on the instance. */
1347                         gdel->value->makeNameTree( gdel->loc, this );
1348                 }
1349         }
1350 }
1351
1352 void ParseData::makeExports()
1353 {
1354         makeExportsNameTree();
1355
1356         /* Resove name references in the tree. */
1357         initExportsNameWalk();
1358         for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1359                 if ( gdel->value->isExport )
1360                         gdel->value->resolveNameRefs( this );
1361         }
1362
1363         /* Make all the instantiations, we know that main exists in this list. */
1364         initExportsNameWalk();
1365         for ( GraphDict::Iter gdel = graphDict; gdel.lte();  gdel++ ) {
1366                 /* Check if this var def is an export. */
1367                 if ( gdel->value->isExport ) {
1368                         /* Build the graph from a walk of the parse tree. */
1369                         FsmAp *graph = gdel->value->walk( this );
1370
1371                         /* Build the graph from a walk of the parse tree. */
1372                         if ( !graph->checkSingleCharMachine() ) {
1373                                 error(gdel->loc) << "bad export machine, must define "
1374                                                 "a single character" << endl;
1375                         }
1376                         else {
1377                                 /* Safe to extract the key and declare the export. */
1378                                 Key exportKey = graph->startState->outList.head->lowKey;
1379                                 exportList.append( new Export( gdel->value->name, exportKey ) );
1380                         }
1381                 }
1382         }
1383
1384 }
1385
1386 void ParseData::prepareMachineGen( GraphDictEl *graphDictEl )
1387 {
1388         beginProcessing();
1389         initKeyOps();
1390         makeRootNames();
1391         initLongestMatchData();
1392
1393         /* Make the graph, do minimization. */
1394         if ( graphDictEl == 0 )
1395                 sectionGraph = makeAll();
1396         else
1397                 sectionGraph = makeSpecific( graphDictEl );
1398
1399         /* Compute exports from the export definitions. */
1400         makeExports();
1401
1402         /* If any errors have occured in the input file then don't write anything. */
1403         if ( gblErrorCount > 0 )
1404                 return;
1405
1406         analyzeGraph( sectionGraph );
1407
1408         /* Depends on the graph analysis. */
1409         setLongestMatchData( sectionGraph );
1410
1411         /* Decide if an error state is necessary.
1412          *  1. There is an error transition
1413          *  2. There is a gap in the transitions
1414          *  3. The longest match operator requires it. */
1415         if ( lmRequiresErrorState || sectionGraph->hasErrorTrans() )
1416                 sectionGraph->errState = sectionGraph->addState();
1417
1418         /* State numbers need to be assigned such that all final states have a
1419          * larger state id number than all non-final states. This enables the
1420          * first_final mechanism to function correctly. We also want states to be
1421          * ordered in a predictable fashion. So we first apply a depth-first
1422          * search, then do a stable sort by final state status, then assign
1423          * numbers. */
1424
1425         sectionGraph->depthFirstOrdering();
1426         sectionGraph->sortStatesByFinal();
1427         sectionGraph->setStateNumbers( 0 );
1428 }
1429
1430 void ParseData::generateXML( ostream &out )
1431 {
1432         beginProcessing();
1433
1434         /* Make the generator. */
1435         XMLCodeGen codeGen( sectionName, this, sectionGraph, out );
1436
1437         /* Write out with it. */
1438         codeGen.writeXML();
1439
1440         if ( printStatistics ) {
1441                 cerr << "fsm name  : " << sectionName << endl;
1442                 cerr << "num states: " << sectionGraph->stateList.length() << endl;
1443                 cerr << endl;
1444         }
1445 }
1446
1447 /* Send eof to all parsers. */
1448 void terminateAllParsers( )
1449 {
1450         /* FIXME: a proper token is needed here. Suppose we should use the
1451          * location of EOF in the last file that the parser was referenced in. */
1452         InputLoc loc;
1453         loc.fileName = "<EOF>";
1454         loc.line = 0;
1455         loc.col = 0;
1456         for ( ParserDict::Iter pdel = parserDict; pdel.lte(); pdel++ )
1457                 pdel->value->token( loc, _eof, 0, 0 );
1458 }
1459
1460 void writeLanguage( std::ostream &out )
1461 {
1462         out << " lang=\"";
1463         switch ( hostLangType ) {
1464                 case CCode:    out << "C"; break;
1465                 case DCode:    out << "D"; break;
1466                 case JavaCode: out << "Java"; break;
1467                 case RubyCode: out << "Ruby"; break;
1468         }
1469         out << "\"";
1470
1471 }
1472
1473 void writeMachines( std::ostream &out, std::string hostData, char *inputFileName )
1474 {
1475         if ( machineSpec == 0 && machineName == 0 ) {
1476                 /* No machine spec or machine name given. Generate everything. */
1477                 for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1478                         ParseData *pd = parser->value->pd;
1479                         if ( pd->instanceList.length() > 0 )
1480                                 pd->prepareMachineGen( 0 );
1481                 }
1482
1483                 if ( gblErrorCount == 0 ) {
1484                         out << "<ragel filename=\"" << inputFileName << "\"";
1485                         writeLanguage( out );
1486                         out << ">\n";
1487                         for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1488                                 ParseData *pd = parser->value->pd;
1489                                 if ( pd->instanceList.length() > 0 )
1490                                         pd->generateXML( out );
1491                         }
1492                         out << hostData;
1493                         out << "</ragel>\n";
1494                 }
1495         }
1496         else if ( parserDict.length() > 0 ) {
1497                 /* There is either a machine spec or machine name given. */
1498                 ParseData *parseData = 0;
1499                 GraphDictEl *graphDictEl = 0;
1500
1501                 /* Traverse the sections, break out when we find a section/machine
1502                  * that matches the one specified. */
1503                 for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1504                         ParseData *checkPd = parser->value->pd;
1505                         if ( machineSpec == 0 || strcmp( checkPd->sectionName, machineSpec ) == 0 ) {
1506                                 GraphDictEl *checkGdEl = 0;
1507                                 if ( machineName == 0 || (checkGdEl =
1508                                                 checkPd->graphDict.find( machineName )) != 0 )
1509                                 {
1510                                         /* Have a machine spec and/or machine name that matches
1511                                          * the -M/-S options. */
1512                                         parseData = checkPd;
1513                                         graphDictEl = checkGdEl;
1514                                         break;
1515                                 }
1516                         }
1517                 }
1518
1519                 if ( parseData == 0 )
1520                         error() << "could not locate machine specified with -S and/or -M" << endl;
1521                 else {
1522                         /* Section/Machine to emit was found. Prepare and emit it. */
1523                         parseData->prepareMachineGen( graphDictEl );
1524                         if ( gblErrorCount == 0 ) {
1525                                 out << "<ragel filename=\"" << inputFileName << "\"";
1526                                 writeLanguage( out );
1527                                 out << ">\n";
1528                                 parseData->generateXML( out );
1529                                 out << hostData;
1530                                 out << "</ragel>\n";
1531                         }
1532                 }
1533         }
1534 }