ragel/parsedata.cpp

   1 /*
   2  *  Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
   3  */
   4
   5 /*  This file is part of Ragel.
   6  *
   7  *  Ragel is free software; you can redistribute it and/or modify
   8  *  it under the terms of the GNU General Public License as published by
   9  *  the Free Software Foundation; either version 2 of the License, or
  10  *  (at your option) any later version.
  11  *
  12  *  Ragel is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  *  GNU General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU General Public License
  18  *  along with Ragel; if not, write to the Free Software
  19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  */
  21
  22 #include <iostream>
  23 #include <iomanip>
  24 #include <errno.h>
  25 #include <stdlib.h>
  26 #include <limits.h>
  27
  28 #include "ragel.h"
  29 #include "rlparse.h"
  30 #include "parsedata.h"
  31 #include "parsetree.h"
  32 #include "mergesort.h"
  33 #include "xmlcodegen.h"
  34
  35 using namespace std;
  36
  37 char machineMain[] = "main";
  38
  39 void Token::set( char *str, int len )
  40 {
  41         length = len;
  42         data = new char[len+1];
  43         memcpy( data, str, len );
  44         data[len] = 0;
  45 }
  46
  47 void Token::append( const Token &other )
  48 {
  49         int newLength = length + other.length;
  50         char *newString = new char[newLength+1];
  51         memcpy( newString, data, length );
  52         memcpy( newString + length, other.data, other.length );
  53         newString[newLength] = 0;
  54         data = newString;
  55         length = newLength;
  56 }
  57
  58 /* Perform minimization after an operation according
  59  * to the command line args. */
  60 void afterOpMinimize( FsmAp *fsm, bool lastInSeq )
  61 {
  62         /* Switch on the prefered minimization algorithm. */
  63         if ( minimizeOpt == MinimizeEveryOp || minimizeOpt == MinimizeMostOps && lastInSeq ) {
  64                 /* First clean up the graph. FsmAp operations may leave these
  65                  * lying around. There should be no dead end states. The subtract
  66                  * intersection operators are the only places where they may be
  67                  * created and those operators clean them up. */
  68                 fsm->removeUnreachableStates();
  69
  70                 switch ( minimizeLevel ) {
  71                         case MinimizeApprox:
  72                                 fsm->minimizeApproximate();
  73                                 break;
  74                         case MinimizePartition1:
  75                                 fsm->minimizePartition1();
  76                                 break;
  77                         case MinimizePartition2:
  78                                 fsm->minimizePartition2();
  79                                 break;
  80                         case MinimizeStable:
  81                                 fsm->minimizeStable();
  82                                 break;
  83                 }
  84         }
  85 }
  86
  87 /* Count the transitions in the fsm by walking the state list. */
  88 int countTransitions( FsmAp *fsm )
  89 {
  90         int numTrans = 0;
  91         StateAp *state = fsm->stateList.head;
  92         while ( state != 0 ) {
  93                 numTrans += state->outList.length();
  94                 state = state->next;
  95         }
  96         return numTrans;
  97 }
  98
  99 Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd )
 100 {
 101         /* Reset errno so we can check for overflow or underflow. In the event of
 102          * an error, sets the return val to the upper or lower bound being tested
 103          * against. */
 104         errno = 0;
 105         unsigned int size = keyOps->alphType->size;
 106         bool unusedBits = size < sizeof(unsigned long);
 107
 108         unsigned long ul = strtoul( str, 0, 16 );
 109
 110         if ( errno == ERANGE || unusedBits && ul >> (size * 8) ) {
 111                 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
 112                 ul = 1 << (size * 8);
 113         }
 114
 115         if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) )
 116                 ul |= (0xffffffff >> (size*8 ) ) << (size*8);
 117
 118         return Key( (long)ul );
 119 }
 120
 121 Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd )
 122 {
 123         /* Convert the number to a decimal. First reset errno so we can check
 124          * for overflow or underflow. */
 125         errno = 0;
 126         long long minVal = keyOps->alphType->minVal;
 127         long long maxVal = keyOps->alphType->maxVal;
 128
 129         long long ll = strtoll( str, 0, 10 );
 130
 131         /* Check for underflow. */
 132         if ( errno == ERANGE && ll < 0 || ll < minVal) {
 133                 error(loc) << "literal " << str << " underflows the alphabet type" << endl;
 134                 ll = minVal;
 135         }
 136         /* Check for overflow. */
 137         else if ( errno == ERANGE && ll > 0 || ll > maxVal ) {
 138                 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
 139                 ll = maxVal;
 140         }
 141
 142         if ( keyOps->alphType->isSigned )
 143                 return Key( (long)ll );
 144         else
 145                 return Key( (unsigned long)ll );
 146 }
 147
 148 /* Make an fsm key in int format (what the fsm graph uses) from an alphabet
 149  * number returned by the parser. Validates that the number doesn't overflow
 150  * the alphabet type. */
 151 Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd )
 152 {
 153         /* Switch on hex/decimal format. */
 154         if ( str[0] == '0' && str[1] == 'x' )
 155                 return makeFsmKeyHex( str, loc, pd );
 156         else
 157                 return makeFsmKeyDec( str, loc, pd );
 158 }
 159
 160 /* Make an fsm int format (what the fsm graph uses) from a single character.
 161  * Performs proper conversion depending on signed/unsigned property of the
 162  * alphabet. */
 163 Key makeFsmKeyChar( char c, ParseData *pd )
 164 {
 165         if ( keyOps->isSigned ) {
 166                 /* Copy from a char type. */
 167                 return Key( c );
 168         }
 169         else {
 170                 /* Copy from an unsigned byte type. */
 171                 return Key( (unsigned char)c );
 172         }
 173 }
 174
 175 /* Make an fsm key array in int format (what the fsm graph uses) from a string
 176  * of characters. Performs proper conversion depending on signed/unsigned
 177  * property of the alphabet. */
 178 void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd )
 179 {
 180         if ( keyOps->isSigned ) {
 181                 /* Copy from a char star type. */
 182                 char *src = data;
 183                 for ( int i = 0; i < len; i++ )
 184                         result[i] = Key(src[i]);
 185         }
 186         else {
 187                 /* Copy from an unsigned byte ptr type. */
 188                 unsigned char *src = (unsigned char*) data;
 189                 for ( int i = 0; i < len; i++ )
 190                         result[i] = Key(src[i]);
 191         }
 192 }
 193
 194 /* Like makeFsmKeyArray except the result has only unique keys. They ordering
 195  * will be changed. */
 196 void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
 197                 bool caseInsensitive, ParseData *pd )
 198 {
 199         /* Use a transitions list for getting unique keys. */
 200         if ( keyOps->isSigned ) {
 201                 /* Copy from a char star type. */
 202                 char *src = data;
 203                 for ( int si = 0; si < len; si++ ) {
 204                         Key key( src[si] );
 205                         result.insert( key );
 206                         if ( caseInsensitive ) {
 207                                 if ( key.isLower() )
 208                                         result.insert( key.toUpper() );
 209                                 else if ( key.isUpper() )
 210                                         result.insert( key.toLower() );
 211                         }
 212                 }
 213         }
 214         else {
 215                 /* Copy from an unsigned byte ptr type. */
 216                 unsigned char *src = (unsigned char*) data;
 217                 for ( int si = 0; si < len; si++ ) {
 218                         Key key( src[si] );
 219                         result.insert( key );
 220                         if ( caseInsensitive ) {
 221                                 if ( key.isLower() )
 222                                         result.insert( key.toUpper() );
 223                                 else if ( key.isUpper() )
 224                                         result.insert( key.toLower() );
 225                         }
 226                 }
 227         }
 228 }
 229
 230 FsmAp *dotFsm( ParseData *pd )
 231 {
 232         FsmAp *retFsm = new FsmAp();
 233         retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey );
 234         return retFsm;
 235 }
 236
 237 FsmAp *dotStarFsm( ParseData *pd )
 238 {
 239         FsmAp *retFsm = new FsmAp();
 240         retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey );
 241         return retFsm;
 242 }
 243
 244 /* Make a builtin type. Depends on the signed nature of the alphabet type. */
 245 FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd )
 246 {
 247         /* FsmAp created to return. */
 248         FsmAp *retFsm = 0;
 249         bool isSigned = keyOps->isSigned;
 250
 251         switch ( builtin ) {
 252         case BT_Any: {
 253                 /* All characters. */
 254                 retFsm = dotFsm( pd );
 255                 break;
 256         }
 257         case BT_Ascii: {
 258                 /* Ascii characters 0 to 127. */
 259                 retFsm = new FsmAp();
 260                 retFsm->rangeFsm( 0, 127 );
 261                 break;
 262         }
 263         case BT_Extend: {
 264                 /* Ascii extended characters. This is the full byte range. Dependent
 265                  * on signed, vs no signed. If the alphabet is one byte then just use
 266                  * dot fsm. */
 267                 if ( isSigned ) {
 268                         retFsm = new FsmAp();
 269                         retFsm->rangeFsm( -128, 127 );
 270                 }
 271                 else {
 272                         retFsm = new FsmAp();
 273                         retFsm->rangeFsm( 0, 255 );
 274                 }
 275                 break;
 276         }
 277         case BT_Alpha: {
 278                 /* Alpha [A-Za-z]. */
 279                 FsmAp *upper = new FsmAp(), *lower = new FsmAp();
 280                 upper->rangeFsm( 'A', 'Z' );
 281                 lower->rangeFsm( 'a', 'z' );
 282                 upper->unionOp( lower );
 283                 upper->minimizePartition2();
 284                 retFsm = upper;
 285                 break;
 286         }
 287         case BT_Digit: {
 288                 /* Digits [0-9]. */
 289                 retFsm = new FsmAp();
 290                 retFsm->rangeFsm( '0', '9' );
 291                 break;
 292         }
 293         case BT_Alnum: {
 294                 /* Alpha numerics [0-9A-Za-z]. */
 295                 FsmAp *digit = new FsmAp(), *lower = new FsmAp();
 296                 FsmAp *upper = new FsmAp();
 297                 digit->rangeFsm( '0', '9' );
 298                 upper->rangeFsm( 'A', 'Z' );
 299                 lower->rangeFsm( 'a', 'z' );
 300                 digit->unionOp( upper );
 301                 digit->unionOp( lower );
 302                 digit->minimizePartition2();
 303                 retFsm = digit;
 304                 break;
 305         }
 306         case BT_Lower: {
 307                 /* Lower case characters. */
 308                 retFsm = new FsmAp();
 309                 retFsm->rangeFsm( 'a', 'z' );
 310                 break;
 311         }
 312         case BT_Upper: {
 313                 /* Upper case characters. */
 314                 retFsm = new FsmAp();
 315                 retFsm->rangeFsm( 'A', 'Z' );
 316                 break;
 317         }
 318         case BT_Cntrl: {
 319                 /* Control characters. */
 320                 FsmAp *cntrl = new FsmAp();
 321                 FsmAp *highChar = new FsmAp();
 322                 cntrl->rangeFsm( 0, 31 );
 323                 highChar->concatFsm( 127 );
 324                 cntrl->unionOp( highChar );
 325                 cntrl->minimizePartition2();
 326                 retFsm = cntrl;
 327                 break;
 328         }
 329         case BT_Graph: {
 330                 /* Graphical ascii characters [!-~]. */
 331                 retFsm = new FsmAp();
 332                 retFsm->rangeFsm( '!', '~' );
 333                 break;
 334         }
 335         case BT_Print: {
 336                 /* Printable characters. Same as graph except includes space. */
 337                 retFsm = new FsmAp();
 338                 retFsm->rangeFsm( ' ', '~' );
 339                 break;
 340         }
 341         case BT_Punct: {
 342                 /* Punctuation. */
 343                 FsmAp *range1 = new FsmAp();
 344                 FsmAp *range2 = new FsmAp();
 345                 FsmAp *range3 = new FsmAp();
 346                 FsmAp *range4 = new FsmAp();
 347                 range1->rangeFsm( '!', '/' );
 348                 range2->rangeFsm( ':', '@' );
 349                 range3->rangeFsm( '[', '`' );
 350                 range4->rangeFsm( '{', '~' );
 351                 range1->unionOp( range2 );
 352                 range1->unionOp( range3 );
 353                 range1->unionOp( range4 );
 354                 range1->minimizePartition2();
 355                 retFsm = range1;
 356                 break;
 357         }
 358         case BT_Space: {
 359                 /* Whitespace: [\t\v\f\n\r ]. */
 360                 FsmAp *cntrl = new FsmAp();
 361                 FsmAp *space = new FsmAp();
 362                 cntrl->rangeFsm( '\t', '\r' );
 363                 space->concatFsm( ' ' );
 364                 cntrl->unionOp( space );
 365                 cntrl->minimizePartition2();
 366                 retFsm = cntrl;
 367                 break;
 368         }
 369         case BT_Xdigit: {
 370                 /* Hex digits [0-9A-Fa-f]. */
 371                 FsmAp *digit = new FsmAp();
 372                 FsmAp *upper = new FsmAp();
 373                 FsmAp *lower = new FsmAp();
 374                 digit->rangeFsm( '0', '9' );
 375                 upper->rangeFsm( 'A', 'F' );
 376                 lower->rangeFsm( 'a', 'f' );
 377                 digit->unionOp( upper );
 378                 digit->unionOp( lower );
 379                 digit->minimizePartition2();
 380                 retFsm = digit;
 381                 break;
 382         }
 383         case BT_Lambda: {
 384                 retFsm = new FsmAp();
 385                 retFsm->lambdaFsm();
 386                 break;
 387         }
 388         case BT_Empty: {
 389                 retFsm = new FsmAp();
 390                 retFsm->emptyFsm();
 391                 break;
 392         }}
 393
 394         return retFsm;
 395 }
 396
 397 /* Check if this name inst or any name inst below is referenced. */
 398 bool NameInst::anyRefsRec()
 399 {
 400         if ( numRefs > 0 )
 401                 return true;
 402
 403         /* Recurse on children until true. */
 404         for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) {
 405                 if ( (*ch)->anyRefsRec() )
 406                         return true;
 407         }
 408
 409         return false;
 410 }
 411
 412 /*
 413  * ParseData
 414  */
 415
 416 /* Initialize the structure that will collect info during the parse of a
 417  * machine. */
 418 ParseData::ParseData( char *fileName, char *sectionName,
 419                 const InputLoc &sectionLoc )
 420 :
 421         sectionGraph(0),
 422         generatingSectionSubset(false),
 423         nextPriorKey(0),
 424         /* 0 is reserved for global error actions. */
 425         nextLocalErrKey(1),
 426         nextNameId(0),
 427         nextCondId(0),
 428         alphTypeSet(false),
 429         getKeyExpr(0),
 430         accessExpr(0),
 431         curStateExpr(0),
 432         lowerNum(0),
 433         upperNum(0),
 434         fileName(fileName),
 435         sectionName(sectionName),
 436         sectionLoc(sectionLoc),
 437         errorCount(0),
 438         curActionOrd(0),
 439         curPriorOrd(0),
 440         rootName(0),
 441         nextEpsilonResolvedLink(0),
 442         nextLongestMatchId(1),
 443         lmRequiresErrorState(false)
 444 {
 445         /* Initialize the dictionary of graphs. This is our symbol table. The
 446          * initialization needs to be done on construction which happens at the
 447          * beginning of a machine spec so any assignment operators can reference
 448          * the builtins. */
 449         initGraphDict();
 450 }
 451
 452 /* Clean up the data collected during a parse. */
 453 ParseData::~ParseData()
 454 {
 455         /* Delete all the nodes in the action list. Will cause all the
 456          * string data that represents the actions to be deallocated. */
 457         actionList.empty();
 458 }
 459
 460 /* Make a name id in the current name instantiation scope if it is not
 461  * already there. */
 462 NameInst *ParseData::addNameInst( const InputLoc &loc, char *data, bool isLabel )
 463 {
 464         /* Create the name instantitaion object and insert it. */
 465         NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel );
 466         curNameInst->childVect.append( newNameInst );
 467         if ( data != 0 )
 468                 curNameInst->children.insertMulti( data, newNameInst );
 469         return newNameInst;
 470 }
 471
 472 void ParseData::initNameWalk()
 473 {
 474         curNameInst = rootName;
 475         curNameChild = 0;
 476 }
 477
 478 /* Goes into the next child scope. The number of the child is already set up.
 479  * We need this for the syncronous name tree and parse tree walk to work
 480  * properly. It is reset on entry into a scope and advanced on poping of a
 481  * scope. A call to enterNameScope should be accompanied by a corresponding
 482  * popNameScope. */
 483 NameFrame ParseData::enterNameScope( bool isLocal, int numScopes )
 484 {
 485         /* Save off the current data. */
 486         NameFrame retFrame;
 487         retFrame.prevNameInst = curNameInst;
 488         retFrame.prevNameChild = curNameChild;
 489         retFrame.prevLocalScope = localNameScope;
 490
 491         /* Enter into the new name scope. */
 492         for ( int i = 0; i < numScopes; i++ ) {
 493                 curNameInst = curNameInst->childVect[curNameChild];
 494                 curNameChild = 0;
 495         }
 496
 497         if ( isLocal )
 498                 localNameScope = curNameInst;
 499
 500         return retFrame;
 501 }
 502
 503 /* Return from a child scope to a parent. The parent info must be specified as
 504  * an argument and is obtained from the corresponding call to enterNameScope.
 505  * */
 506 void ParseData::popNameScope( const NameFrame &frame )
 507 {
 508         /* Pop the name scope. */
 509         curNameInst = frame.prevNameInst;
 510         curNameChild = frame.prevNameChild+1;
 511         localNameScope = frame.prevLocalScope;
 512 }
 513
 514 void ParseData::resetNameScope( const NameFrame &frame )
 515 {
 516         /* Pop the name scope. */
 517         curNameInst = frame.prevNameInst;
 518         curNameChild = frame.prevNameChild;
 519         localNameScope = frame.prevLocalScope;
 520 }
 521
 522
 523 void ParseData::unsetObsoleteEntries( FsmAp *graph )
 524 {
 525         /* Loop the reference names and increment the usage. Names that are no
 526          * longer needed will be unset in graph. */
 527         for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) {
 528                 /* Get the name. */
 529                 NameInst *name = *ref;
 530                 name->numUses += 1;
 531
 532                 /* If the name is no longer needed unset its corresponding entry. */
 533                 if ( name->numUses == name->numRefs ) {
 534                         assert( graph->entryPoints.find( name->id ) != 0 );
 535                         graph->unsetEntry( name->id );
 536                 }
 537         }
 538 }
 539
 540 NameSet ParseData::resolvePart( NameInst *refFrom, char *data, bool recLabelsOnly )
 541 {
 542         /* Queue needed for breadth-first search, load it with the start node. */
 543         NameInstList nameQueue;
 544         nameQueue.append( refFrom );
 545
 546         NameSet result;
 547         while ( nameQueue.length() > 0 ) {
 548                 /* Pull the next from location off the queue. */
 549                 NameInst *from = nameQueue.detachFirst();
 550
 551                 /* Look for the name. */
 552                 NameMapEl *low, *high;
 553                 if ( from->children.findMulti( data, low, high ) ) {
 554                         /* Record all instances of the name. */
 555                         for ( ; low <= high; low++ )
 556                                 result.insert( low->value );
 557                 }
 558
 559                 /* Name not there, do breadth-first operation of appending all
 560                  * childrent to the processing queue. */
 561                 for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) {
 562                         if ( !recLabelsOnly || (*name)->isLabel )
 563                                 nameQueue.append( *name );
 564                 }
 565         }
 566
 567         /* Queue exhausted and name never found. */
 568         return result;
 569 }
 570
 571 void ParseData::resolveFrom( NameSet &result, NameInst *refFrom,
 572                 const NameRef &nameRef, int namePos )
 573 {
 574         /* Look for the name in the owning scope of the factor with aug. */
 575         NameSet partResult = resolvePart( refFrom, nameRef[namePos], false );
 576
 577         /* If there are more parts to the name then continue on. */
 578         if ( ++namePos < nameRef.length() ) {
 579                 /* There are more components to the name, search using all the part
 580                  * results as the base. */
 581                 for ( NameSet::Iter name = partResult; name.lte(); name++ )
 582                         resolveFrom( result, *name, nameRef, namePos );
 583         }
 584         else {
 585                 /* This is the last component, append the part results to the final
 586                  * results. */
 587                 result.insert( partResult );
 588         }
 589 }
 590
 591 /* Write out a name reference. */
 592 ostream &operator<<( ostream &out, const NameRef &nameRef )
 593 {
 594         int pos = 0;
 595         if ( nameRef[pos] == 0 ) {
 596                 out << "::";
 597                 pos += 1;
 598         }
 599         out << nameRef[pos++];
 600         for ( ; pos < nameRef.length(); pos++ )
 601                 out << "::" << nameRef[pos];
 602         return out;
 603 }
 604
 605 ostream &operator<<( ostream &out, const NameInst &nameInst )
 606 {
 607         /* Count the number fully qualified name parts. */
 608         int numParents = 0;
 609         NameInst *curParent = nameInst.parent;
 610         while ( curParent != 0 ) {
 611                 numParents += 1;
 612                 curParent = curParent->parent;
 613         }
 614
 615         /* Make an array and fill it in. */
 616         curParent = nameInst.parent;
 617         NameInst **parents = new NameInst*[numParents];
 618         for ( int p = numParents-1; p >= 0; p-- ) {
 619                 parents[p] = curParent;
 620                 curParent = curParent->parent;
 621         }
 622
 623         /* Write the parents out, skip the root. */
 624         for ( int p = 1; p < numParents; p++ )
 625                 out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" );
 626
 627         /* Write the name and cleanup. */
 628         out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" );
 629         delete[] parents;
 630         return out;
 631 }
 632
 633 struct CmpNameInstLoc
 634 {
 635         static int compare( const NameInst *ni1, const NameInst *ni2 )
 636         {
 637                 if ( ni1->loc.line < ni2->loc.line )
 638                         return -1;
 639                 else if ( ni1->loc.line > ni2->loc.line )
 640                         return 1;
 641                 else if ( ni1->loc.col < ni2->loc.col )
 642                         return -1;
 643                 else if ( ni1->loc.col > ni2->loc.col )
 644                         return 1;
 645                 return 0;
 646         }
 647 };
 648
 649 void errorStateLabels( const NameSet &resolved )
 650 {
 651         MergeSort<NameInst*, CmpNameInstLoc> mergeSort;
 652         mergeSort.sort( resolved.data, resolved.length() );
 653         for ( NameSet::Iter res = resolved; res.lte(); res++ )
 654                 error((*res)->loc) << "  -> " << **res << endl;
 655 }
 656
 657
 658 NameInst *ParseData::resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action )
 659 {
 660         NameInst *nameInst = 0;
 661
 662         /* Do the local search if the name is not strictly a root level name
 663          * search. */
 664         if ( nameRef[0] != 0 ) {
 665                 /* If the action is referenced, resolve all of them. */
 666                 if ( action != 0 && action->actionRefs.length() > 0 ) {
 667                         /* Look for the name in all referencing scopes. */
 668                         NameSet resolved;
 669                         for ( ActionRefs::Iter actRef = action->actionRefs; actRef.lte(); actRef++ )
 670                                 resolveFrom( resolved, *actRef, nameRef, 0 );
 671
 672                         if ( resolved.length() > 0 ) {
 673                                 /* Take the first one. */
 674                                 nameInst = resolved[0];
 675                                 if ( resolved.length() > 1 ) {
 676                                         /* Complain about the multiple references. */
 677                                         error(loc) << "state reference " << nameRef <<
 678                                                         " resolves to multiple entry points" << endl;
 679                                         errorStateLabels( resolved );
 680                                 }
 681                         }
 682                 }
 683         }
 684
 685         /* If not found in the local scope, look in global. */
 686         if ( nameInst == 0 ) {
 687                 NameSet resolved;
 688                 int fromPos = nameRef[0] != 0 ? 0 : 1;
 689                 resolveFrom( resolved, rootName, nameRef, fromPos );
 690
 691                 if ( resolved.length() > 0 ) {
 692                         /* Take the first. */
 693                         nameInst = resolved[0];
 694                         if ( resolved.length() > 1 ) {
 695                                 /* Complain about the multiple references. */
 696                                 error(loc) << "state reference " << nameRef <<
 697                                                 " resolves to multiple entry points" << endl;
 698                                 errorStateLabels( resolved );
 699                         }
 700                 }
 701         }
 702
 703         if ( nameInst == 0 ) {
 704                 /* If not found then complain. */
 705                 error(loc) << "could not resolve state reference " << nameRef << endl;
 706         }
 707         return nameInst;
 708 }
 709
 710 void ParseData::resolveNameRefs( InlineList *inlineList, Action *action )
 711 {
 712         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
 713                 switch ( item->type ) {
 714                         case InlineItem::Entry: case InlineItem::Goto:
 715                         case InlineItem::Call: case InlineItem::Next: {
 716                                 /* Resolve, pass action for local search. */
 717                                 NameInst *target = resolveStateRef( *item->nameRef, item->loc, action );
 718
 719                                 /* Check if the target goes into a longest match. */
 720                                 NameInst *search = target->parent;
 721                                 while ( search != 0 ) {
 722                                         if ( search->isLongestMatch ) {
 723                                                 error(item->loc) << "cannot enter inside a longest "
 724                                                                 "match construction as an entry point" << endl;
 725                                                 break;
 726                                         }
 727                                         search = search->parent;
 728                                 }
 729
 730                                 /* Note the reference in the name. This will cause the entry
 731                                  * point to survive to the end of the graph generating walk. */
 732                                 if ( target != 0 )
 733                                         target->numRefs += 1;
 734                                 item->nameTarg = target;
 735                                 break;
 736                         }
 737                         default:
 738                                 break;
 739                 }
 740
 741                 /* Some of the item types may have children. */
 742                 if ( item->children != 0 )
 743                         resolveNameRefs( item->children, action );
 744         }
 745 }
 746
 747 /* Resolve references to labels in actions. */
 748 void ParseData::resolveActionNameRefs()
 749 {
 750         for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
 751                 /* Only care about the actions that are referenced. */
 752                 if ( act->actionRefs.length() > 0 )
 753                         resolveNameRefs( act->inlineList, act );
 754         }
 755 }
 756
 757 /* Walk a name tree starting at from and fill the name index. */
 758 void ParseData::fillNameIndex( NameInst *from )
 759 {
 760         /* Fill the value for from in the name index. */
 761         nameIndex[from->id] = from;
 762
 763         /* Recurse on the implicit final state and then all children. */
 764         if ( from->final != 0 )
 765                 fillNameIndex( from->final );
 766         for ( NameVect::Iter name = from->childVect; name.lte(); name++ )
 767                 fillNameIndex( *name );
 768 }
 769
 770 void ParseData::makeRootName()
 771 {
 772         /* Create the root name. */
 773         rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
 774 }
 775
 776 /* Build the name tree and supporting data structures. */
 777 void ParseData::makeNameTree( GraphDictEl *dictEl )
 778 {
 779         /* Set up curNameInst for the walk. */
 780         curNameInst = rootName;
 781         curNameChild = 0;
 782
 783         if ( dictEl != 0 ) {
 784                 /* A start location has been specified. */
 785                 dictEl->value->makeNameTree( dictEl->loc, this );
 786         }
 787         else {
 788                 /* First make the name tree. */
 789                 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
 790                         /* Recurse on the instance. */
 791                         glel->value->makeNameTree( glel->loc, this );
 792                 }
 793         }
 794
 795         /* The number of nodes in the tree can now be given by nextNameId */
 796         nameIndex = new NameInst*[nextNameId];
 797         memset( nameIndex, 0, sizeof(NameInst*)*nextNameId );
 798         fillNameIndex( rootName );
 799 }
 800
 801 void ParseData::createBuiltin( char *name, BuiltinMachine builtin )
 802 {
 803         Expression *expression = new Expression( builtin );
 804         Join *join = new Join( expression );
 805         JoinOrLm *joinOrLm = new JoinOrLm( join );
 806         VarDef *varDef = new VarDef( name, joinOrLm );
 807         GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
 808         graphDict.insert( graphDictEl );
 809 }
 810
 811 /* Initialize the graph dict with builtin types. */
 812 void ParseData::initGraphDict( )
 813 {
 814         createBuiltin( "any", BT_Any );
 815         createBuiltin( "ascii", BT_Ascii );
 816         createBuiltin( "extend", BT_Extend );
 817         createBuiltin( "alpha", BT_Alpha );
 818         createBuiltin( "digit", BT_Digit );
 819         createBuiltin( "alnum", BT_Alnum );
 820         createBuiltin( "lower", BT_Lower );
 821         createBuiltin( "upper", BT_Upper );
 822         createBuiltin( "cntrl", BT_Cntrl );
 823         createBuiltin( "graph", BT_Graph );
 824         createBuiltin( "print", BT_Print );
 825         createBuiltin( "punct", BT_Punct );
 826         createBuiltin( "space", BT_Space );
 827         createBuiltin( "xdigit", BT_Xdigit );
 828         createBuiltin( "null", BT_Lambda );
 829         createBuiltin( "zlen", BT_Lambda );
 830         createBuiltin( "empty", BT_Empty );
 831 }
 832
 833 /* Set the alphabet type. If the types are not valid returns false. */
 834 bool ParseData::setAlphType( char *s1, char *s2 )
 835 {
 836         bool valid = false;
 837         for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
 838                 if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 &&
 839                                 hostLang->hostTypes[i].data2 != 0 &&
 840                                 strcmp( s2, hostLang->hostTypes[i].data2 ) == 0 )
 841                 {
 842                         valid = true;
 843                         userAlphType = hostLang->hostTypes + i;
 844                         break;
 845                 }
 846         }
 847
 848         alphTypeSet = true;
 849         return valid;
 850 }
 851
 852 /* Set the alphabet type. If the types are not valid returns false. */
 853 bool ParseData::setAlphType( char *s1 )
 854 {
 855         bool valid = false;
 856         for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
 857                 if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 &&
 858                                 hostLang->hostTypes[i].data2 == 0 )
 859                 {
 860                         valid = true;
 861                         userAlphType = hostLang->hostTypes + i;
 862                         break;
 863                 }
 864         }
 865
 866         alphTypeSet = true;
 867         return valid;
 868 }
 869
 870 /* Initialize the key operators object that will be referenced by all fsms
 871  * created. */
 872 void ParseData::initKeyOps( )
 873 {
 874         /* Signedness and bounds. */
 875         HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType;
 876         thisKeyOps.setAlphType( alphType );
 877
 878         if ( lowerNum != 0 ) {
 879                 /* If ranges are given then interpret the alphabet type. */
 880                 thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this );
 881                 thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
 882         }
 883
 884         thisCondData.nextCondKey = thisKeyOps.maxKey;
 885         thisCondData.nextCondKey.increment();
 886 }
 887
 888 void ParseData::printNameInst( NameInst *nameInst, int level )
 889 {
 890         for ( int i = 0; i < level; i++ )
 891                 cerr << "  ";
 892         cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") <<
 893                         "  id: " << nameInst->id <<
 894                         "  refs: " << nameInst->numRefs << endl;
 895         for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ )
 896                 printNameInst( *name, level+1 );
 897 }
 898
 899 /* Remove duplicates of unique actions from an action table. */
 900 void ParseData::removeDups( ActionTable &table )
 901 {
 902         /* Scan through the table looking for unique actions to
 903          * remove duplicates of. */
 904         for ( int i = 0; i < table.length(); i++ ) {
 905                 /* Remove any duplicates ahead of i. */
 906                 for ( int r = i+1; r < table.length(); ) {
 907                         if ( table[r].value == table[i].value )
 908                                 table.vremove(r);
 909                         else
 910                                 r += 1;
 911                 }
 912         }
 913 }
 914
 915 /* Remove duplicates from action lists. This operates only on transition and
 916  * eof action lists and so should be called once all actions have been
 917  * transfered to their final resting place. */
 918 void ParseData::removeActionDups( FsmAp *graph )
 919 {
 920         /* Loop all states. */
 921         for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
 922                 /* Loop all transitions. */
 923                 for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
 924                         removeDups( trans->actionTable );
 925                 removeDups( state->toStateActionTable );
 926                 removeDups( state->fromStateActionTable );
 927                 removeDups( state->eofActionTable );
 928         }
 929 }
 930
 931 Action *ParseData::newAction( char *name, InlineList *inlineList )
 932 {
 933         InputLoc loc;
 934         loc.line = 1;
 935         loc.col = 1;
 936
 937         Action *action = new Action( loc, name, inlineList, nextCondId++ );
 938         action->actionRefs.append( rootName );
 939         actionList.append( action );
 940         return action;
 941 }
 942
 943 void ParseData::initLongestMatchData()
 944 {
 945         if ( lmList.length() > 0 ) {
 946                 /* The initTokStart action resets the token start. */
 947                 InlineList *il1 = new InlineList;
 948                 il1->append( new InlineItem( InputLoc(), InlineItem::LmInitTokStart ) );
 949                 initTokStart = newAction( "initts", il1 );
 950                 initTokStart->isLmAction = true;
 951
 952                 /* The initActId action gives act a default value. */
 953                 InlineList *il4 = new InlineList;
 954                 il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) );
 955                 initActId = newAction( "initact", il4 );
 956                 initActId->isLmAction = true;
 957
 958                 /* The setTokStart action sets tokstart. */
 959                 InlineList *il5 = new InlineList;
 960                 il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) );
 961                 setTokStart = newAction( "tokstart", il5 );
 962                 setTokStart->isLmAction = true;
 963
 964                 /* The setTokEnd action sets tokend. */
 965                 InlineList *il3 = new InlineList;
 966                 il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) );
 967                 setTokEnd = newAction( "tokend", il3 );
 968                 setTokEnd->isLmAction = true;
 969
 970                 /* The action will also need an ordering: ahead of all user action
 971                  * embeddings. */
 972                 initTokStartOrd = curActionOrd++;
 973                 initActIdOrd = curActionOrd++;
 974                 setTokStartOrd = curActionOrd++;
 975                 setTokEndOrd = curActionOrd++;
 976         }
 977 }
 978
 979 /* After building the graph, do some extra processing to ensure the runtime
 980  * data of the longest mactch operators is consistent. */
 981 void ParseData::setLongestMatchData( FsmAp *graph )
 982 {
 983         if ( lmList.length() > 0 ) {
 984                 /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry)
 985                  * init the tokstart. */
 986                 for ( EntryMap::Iter en = graph->entryPoints; en.lte(); en++ ) {
 987                         /* This is run after duplicates are removed, we must guard against
 988                          * inserting a duplicate. */
 989                         ActionTable &actionTable = en->value->toStateActionTable;
 990                         if ( ! actionTable.hasAction( initTokStart ) )
 991                                 actionTable.setAction( initTokStartOrd, initTokStart );
 992                 }
 993
 994                 /* Find the set of states that are the target of transitions with
 995                  * actions that have calls. These states will be targeted by fret
 996                  * statements. */
 997                 StateSet states;
 998                 for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
 999                         for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
1000                                 for ( ActionTable::Iter ati = trans->actionTable; ati.lte(); ati++ ) {
1001                                         if ( ati->value->anyCall && trans->toState != 0 )
1002                                                 states.insert( trans->toState );
1003                                 }
1004                         }
1005                 }
1006
1007
1008                 /* Init tokstart upon entering the above collected states. */
1009                 for ( StateSet::Iter ps = states; ps.lte(); ps++ ) {
1010                         /* This is run after duplicates are removed, we must guard against
1011                          * inserting a duplicate. */
1012                         ActionTable &actionTable = (*ps)->toStateActionTable;
1013                         if ( ! actionTable.hasAction( initTokStart ) )
1014                                 actionTable.setAction( initTokStartOrd, initTokStart );
1015                 }
1016         }
1017 }
1018
1019 /* Make the graph from a graph dict node. Does minimization and state sorting. */
1020 FsmAp *ParseData::makeInstance( GraphDictEl *gdNode )
1021 {
1022         /* Build the graph from a walk of the parse tree. */
1023         FsmAp *graph = gdNode->value->walk( this );
1024
1025         /* Resolve any labels that point to multiple states. Any labels that are
1026          * still around are referenced only by gotos and calls and they need to be
1027          * made into deterministic entry points. */
1028         graph->deterministicEntry();
1029
1030         /*
1031          * All state construction is now complete.
1032          */
1033
1034         /* Transfer global error actions. */
1035         for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
1036                 graph->transferErrorActions( state, 0 );
1037
1038         removeActionDups( graph );
1039
1040         /* Remove unreachable states. There should be no dead end states. The
1041          * subtract and intersection operators are the only places where they may
1042          * be created and those operators clean them up. */
1043         graph->removeUnreachableStates();
1044
1045         /* No more fsm operations are to be done. Action ordering numbers are
1046          * no longer of use and will just hinder minimization. Clear them. */
1047         graph->nullActionKeys();
1048
1049         /* Transition priorities are no longer of use. We can clear them
1050          * because they will just hinder minimization as well. Clear them. */
1051         graph->clearAllPriorities();
1052
1053         if ( minimizeOpt != MinimizeNone ) {
1054                 /* Minimize here even if we minimized at every op. Now that function
1055                  * keys have been cleared we may get a more minimal fsm. */
1056                 switch ( minimizeLevel ) {
1057                         case MinimizeApprox:
1058                                 graph->minimizeApproximate();
1059                                 break;
1060                         case MinimizeStable:
1061                                 graph->minimizeStable();
1062                                 break;
1063                         case MinimizePartition1:
1064                                 graph->minimizePartition1();
1065                                 break;
1066                         case MinimizePartition2:
1067                                 graph->minimizePartition2();
1068                                 break;
1069                 }
1070         }
1071
1072         graph->compressTransitions();
1073
1074         return graph;
1075 }
1076
1077 void ParseData::printNameTree()
1078 {
1079         /* Print the name instance map. */
1080         for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ )
1081                 printNameInst( *name, 0 );
1082
1083         cerr << "name index:" << endl;
1084         /* Show that the name index is correct. */
1085         for ( int ni = 0; ni < nextNameId; ni++ ) {
1086                 cerr << ni << ": ";
1087                 char *name = nameIndex[ni]->name;
1088                 cerr << ( name != 0 ? name : "<ANON>" ) << endl;
1089         }
1090 }
1091
1092 FsmAp *ParseData::makeSpecific( GraphDictEl *gdNode )
1093 {
1094         /* Build the name tree and supporting data structures. */
1095         makeNameTree( gdNode );
1096
1097         /* Resove name references from gdNode. */
1098         initNameWalk();
1099         gdNode->value->resolveNameRefs( this );
1100
1101         /* Do not resolve action references. Since we are not building the entire
1102          * graph there's a good chance that many name references will fail. This
1103          * is okay since generating part of the graph is usually only done when
1104          * inspecting the compiled machine. */
1105
1106         /* Flag this case so that the XML code generator is aware that we haven't
1107          * looked up name references in actions. It can then avoid segfaulting. */
1108         generatingSectionSubset = true;
1109
1110         /* Just building the specified graph. */
1111         initNameWalk();
1112         FsmAp *mainGraph = makeInstance( gdNode );
1113
1114         return mainGraph;
1115 }
1116
1117 FsmAp *ParseData::makeAll()
1118 {
1119         /* Build the name tree and supporting data structures. */
1120         makeNameTree( 0 );
1121
1122         /* Resove name references in the tree. */
1123         initNameWalk();
1124         for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ )
1125                 glel->value->resolveNameRefs( this );
1126
1127         /* Resolve action code name references. */
1128         resolveActionNameRefs();
1129
1130         FsmAp *mainGraph = 0;
1131         FsmAp **graphs = new FsmAp*[instanceList.length()];
1132         int numOthers = 0;
1133
1134         /* Make all the instantiations, we know that main exists in this list. */
1135         initNameWalk();
1136         for ( GraphList::Iter glel = instanceList; glel.lte();  glel++ ) {
1137                 if ( strcmp( glel->key, machineMain ) == 0 ) {
1138                         /* Main graph is always instantiated. */
1139                         mainGraph = makeInstance( glel );
1140                 }
1141                 else {
1142                         /* Check to see if the instance is ever referenced. */
1143                         NameInst *nameInst = nextNameScope();
1144                         if ( nameInst->anyRefsRec() )
1145                                 graphs[numOthers++] = makeInstance( glel );
1146                         else {
1147                                 /* Need to walk over the name tree item. */
1148                                 NameFrame nameFrame = enterNameScope( true, 1 );
1149                                 popNameScope( nameFrame );
1150                         }
1151                 }
1152         }
1153
1154         if ( numOthers > 0 ) {
1155                 /* Add all the other graphs into main. */
1156                 mainGraph->globOp( graphs, numOthers );
1157         }
1158
1159         delete[] graphs;
1160         return mainGraph;
1161 }
1162
1163 void ParseData::analyzeAction( Action *action, InlineList *inlineList )
1164 {
1165         /* FIXME: Actions used as conditions should be very constrained. */
1166         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1167                 if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
1168                         action->anyCall = true;
1169
1170                 /* Need to recurse into longest match items. */
1171                 if ( item->type == InlineItem::LmSwitch ) {
1172                         LongestMatch *lm = item->longestMatch;
1173                         for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) {
1174                                 if ( lmi->action != 0 )
1175                                         analyzeAction( action, lmi->action->inlineList );
1176                         }
1177                 }
1178
1179                 if ( item->type == InlineItem::LmOnLast ||
1180                                 item->type == InlineItem::LmOnNext ||
1181                                 item->type == InlineItem::LmOnLagBehind )
1182                 {
1183                         LongestMatchPart *lmi = item->longestMatchPart;
1184                         if ( lmi->action != 0 )
1185                                 analyzeAction( action, lmi->action->inlineList );
1186                 }
1187
1188                 if ( item->children != 0 )
1189                         analyzeAction( action, item->children );
1190         }
1191 }
1192
1193
1194 /* Check actions for bad uses of fsm directives. We don't go inside longest
1195  * match items in actions created by ragel, since we just want the user
1196  * actions. */
1197 void ParseData::checkInlineList( Action *act, InlineList *inlineList )
1198 {
1199         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1200                 /* EOF checks. */
1201                 if ( act->numEofRefs > 0 ) {
1202                         switch ( item->type ) {
1203                         case InlineItem::PChar:
1204                                 error(item->loc) << "pointer to current element does not exist in "
1205                                                 "EOF action code" << endl;
1206                                 break;
1207                         case InlineItem::Char:
1208                                 error(item->loc) << "current element does not exist in "
1209                                                 "EOF action code" << endl;
1210                                 break;
1211                         case InlineItem::Hold:
1212                                 error(item->loc) << "changing the current element not possible in "
1213                                                 "EOF action code" << endl;
1214                                 break;
1215                         case InlineItem::Exec:
1216                                 error(item->loc) << "changing the current element not possible in "
1217                                                 "EOF action code" << endl;
1218                                 break;
1219                         case InlineItem::Goto: case InlineItem::Call:
1220                         case InlineItem::Next: case InlineItem::GotoExpr:
1221                         case InlineItem::CallExpr: case InlineItem::NextExpr:
1222                         case InlineItem::Ret:
1223                                 error(item->loc) << "changing the current state not possible in "
1224                                                 "EOF action code" << endl;
1225                                 break;
1226                         default:
1227                                 break;
1228                         }
1229                 }
1230
1231                 /* Recurse. */
1232                 if ( item->children != 0 )
1233                         checkInlineList( act, item->children );
1234         }
1235 }
1236
1237 void ParseData::checkAction( Action *action )
1238 {
1239         /* Check for actions with calls that are embedded within a longest match
1240          * machine. */
1241         if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) {
1242                 for ( ActionRefs::Iter ar = action->actionRefs; ar.lte(); ar++ ) {
1243                         NameInst *check = *ar;
1244                         while ( check != 0 ) {
1245                                 if ( check->isLongestMatch ) {
1246                                         error(action->loc) << "within a scanner, fcall is permitted"
1247                                                 " only in pattern actions" << endl;
1248                                         break;
1249                                 }
1250                                 check = check->parent;
1251                         }
1252                 }
1253         }
1254
1255         checkInlineList( action, action->inlineList );
1256 }
1257
1258
1259 void ParseData::analyzeGraph( FsmAp *graph )
1260 {
1261         for ( ActionList::Iter act = actionList; act.lte(); act++ )
1262                 analyzeAction( act, act->inlineList );
1263
1264         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
1265                 /* The transition list. */
1266                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
1267                         for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ )
1268                                 at->value->numTransRefs += 1;
1269                 }
1270
1271                 for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ )
1272                         at->value->numToStateRefs += 1;
1273
1274                 for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ )
1275                         at->value->numFromStateRefs += 1;
1276
1277                 for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ )
1278                         at->value->numEofRefs += 1;
1279
1280                 for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
1281                         for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ )
1282                                 (*sci)->numCondRefs += 1;
1283                 }
1284         }
1285
1286         /* Checks for bad usage of directives in action code. */
1287         for ( ActionList::Iter act = actionList; act.lte(); act++ )
1288                 checkAction( act );
1289 }
1290
1291 void ParseData::prepareMachineGen( GraphDictEl *graphDictEl )
1292 {
1293         beginProcessing();
1294         initKeyOps();
1295         makeRootName();
1296         initLongestMatchData();
1297
1298         /* Make the graph, do minimization. */
1299         if ( graphDictEl == 0 )
1300                 sectionGraph = makeAll();
1301         else
1302                 sectionGraph = makeSpecific( graphDictEl );
1303
1304         /* If any errors have occured in the input file then don't write anything. */
1305         if ( gblErrorCount > 0 )
1306                 return;
1307
1308         analyzeGraph( sectionGraph );
1309
1310         /* Depends on the graph analysis. */
1311         setLongestMatchData( sectionGraph );
1312
1313         /* Decide if an error state is necessary.
1314          *  1. There is an error transition
1315          *  2. There is a gap in the transitions
1316          *  3. The longest match operator requires it. */
1317         if ( lmRequiresErrorState || sectionGraph->hasErrorTrans() )
1318                 sectionGraph->errState = sectionGraph->addState();
1319
1320         /* State numbers need to be assigned such that all final states have a
1321          * larger state id number than all non-final states. This enables the
1322          * first_final mechanism to function correctly. We also want states to be
1323          * ordered in a predictable fashion. So we first apply a depth-first
1324          * search, then do a stable sort by final state status, then assign
1325          * numbers. */
1326
1327         sectionGraph->depthFirstOrdering();
1328         sectionGraph->sortStatesByFinal();
1329         sectionGraph->setStateNumbers( 0 );
1330 }
1331
1332 void ParseData::generateXML( ostream &out )
1333 {
1334         beginProcessing();
1335
1336         /* Make the generator. */
1337         XMLCodeGen codeGen( sectionName, this, sectionGraph, out );
1338
1339         /* Write out with it. */
1340         codeGen.writeXML();
1341
1342         if ( printStatistics ) {
1343                 cerr << "fsm name  : " << sectionName << endl;
1344                 cerr << "num states: " << sectionGraph->stateList.length() << endl;
1345                 cerr << endl;
1346         }
1347 }
1348
1349 /* Send eof to all parsers. */
1350 void terminateAllParsers( )
1351 {
1352         /* FIXME: a proper token is needed here. Suppose we should use the
1353          * location of EOF in the last file that the parser was referenced in. */
1354         InputLoc loc;
1355         loc.fileName = "<EOF>";
1356         loc.line = 0;
1357         loc.col = 0;
1358         for ( ParserDict::Iter pdel = parserDict; pdel.lte(); pdel++ )
1359                 pdel->value->token( loc, _eof, 0, 0 );
1360 }
1361
1362 void checkMachines( )
1363 {
1364         for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1365                 ParseData *pd = parser->value->pd;
1366                 if ( pd->instanceList.length() > 0 ) {
1367                         /* There must be a main graph defined. */
1368                         /* No machine name. Need to have a main. Make sure it was given. */
1369                         GraphDictEl *mainEl = pd->graphDict.find( machineMain );
1370                         if ( mainEl == 0 ) {
1371                                 error(pd->sectionLoc) << "main graph not defined in \"" <<
1372                                                 pd->sectionName << "\"" << endl;
1373                         }
1374                 }
1375         }
1376 }
1377
1378 void writeLanguage( std::ostream &out )
1379 {
1380         out << " lang=\"";
1381         switch ( hostLangType ) {
1382                 case CCode:    out << "C"; break;
1383                 case DCode:    out << "D"; break;
1384                 case JavaCode: out << "Java"; break;
1385                 case RubyCode: out << "Ruby"; break;
1386         }
1387         out << "\"";
1388
1389 }
1390
1391 void writeMachines( std::ostream &out, std::string hostData, char *inputFileName )
1392 {
1393         if ( machineSpec == 0 && machineName == 0 ) {
1394                 /* No machine spec or machine name given. Generate everything. */
1395                 for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1396                         ParseData *pd = parser->value->pd;
1397                         if ( pd->instanceList.length() > 0 )
1398                                 pd->prepareMachineGen( 0 );
1399                 }
1400
1401                 if ( gblErrorCount == 0 ) {
1402                         out << "<ragel filename=\"" << inputFileName << "\"";
1403                         writeLanguage( out );
1404                         out << ">\n";
1405                         for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1406                                 ParseData *pd = parser->value->pd;
1407                                 if ( pd->instanceList.length() > 0 )
1408                                         pd->generateXML( out );
1409                         }
1410                         out << hostData;
1411                         out << "</ragel>\n";
1412                 }
1413         }
1414         else if ( parserDict.length() > 0 ) {
1415                 /* There is either a machine spec or machine name given. */
1416                 ParseData *parseData = 0;
1417                 GraphDictEl *graphDictEl = 0;
1418
1419                 /* Traverse the sections, break out when we find a section/machine
1420                  * that matches the one specified. */
1421                 for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1422                         ParseData *checkPd = parser->value->pd;
1423                         if ( machineSpec == 0 || strcmp( checkPd->sectionName, machineSpec ) == 0 ) {
1424                                 GraphDictEl *checkGdEl = 0;
1425                                 if ( machineName == 0 || (checkGdEl =
1426                                                 checkPd->graphDict.find( machineName )) != 0 )
1427                                 {
1428                                         /* Have a machine spec and/or machine name that matches
1429                                          * the -M/-S options. */
1430                                         parseData = checkPd;
1431                                         graphDictEl = checkGdEl;
1432                                         break;
1433                                 }
1434                         }
1435                 }
1436
1437                 if ( parseData == 0 )
1438                         error() << "could not locate machine specified with -S and/or -M" << endl;
1439                 else {
1440                         /* Section/Machine to emit was found. Prepare and emit it. */
1441                         parseData->prepareMachineGen( graphDictEl );
1442                         if ( gblErrorCount == 0 ) {
1443                                 out << "<ragel filename=\"" << inputFileName << "\"";
1444                                 writeLanguage( out );
1445                                 out << ">\n";
1446                                 parseData->generateXML( out );
1447                                 out << hostData;
1448                                 out << "</ragel>\n";
1449                         }
1450                 }
1451         }
1452 }