ragel/parsedata.cpp

   1 /*
   2  *  Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
   3  */
   4
   5 /*  This file is part of Ragel.
   6  *
   7  *  Ragel is free software; you can redistribute it and/or modify
   8  *  it under the terms of the GNU General Public License as published by
   9  *  the Free Software Foundation; either version 2 of the License, or
  10  *  (at your option) any later version.
  11  *
  12  *  Ragel is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  *  GNU General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU General Public License
  18  *  along with Ragel; if not, write to the Free Software
  19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  */
  21
  22 #include <iostream>
  23 #include <iomanip>
  24 #include <errno.h>
  25 #include <stdlib.h>
  26 #include <limits.h>
  27
  28 #include "ragel.h"
  29 #include "rlparse.h"
  30 #include "parsedata.h"
  31 #include "parsetree.h"
  32 #include "mergesort.h"
  33 #include "xmlcodegen.h"
  34
  35 using namespace std;
  36
  37 char machineMain[] = "main";
  38
  39 void Token::set( char *str, int len )
  40 {
  41         length = len;
  42         data = new char[len+1];
  43         memcpy( data, str, len );
  44         data[len] = 0;
  45 }
  46
  47 void Token::append( const Token &other )
  48 {
  49         int newLength = length + other.length;
  50         char *newString = new char[newLength+1];
  51         memcpy( newString, data, length );
  52         memcpy( newString + length, other.data, other.length );
  53         newString[newLength] = 0;
  54         data = newString;
  55         length = newLength;
  56 }
  57
  58 /* Perform minimization after an operation according
  59  * to the command line args. */
  60 void afterOpMinimize( FsmAp *fsm, bool lastInSeq )
  61 {
  62         /* Switch on the prefered minimization algorithm. */
  63         if ( minimizeOpt == MinimizeEveryOp || minimizeOpt == MinimizeMostOps && lastInSeq ) {
  64                 /* First clean up the graph. FsmAp operations may leave these
  65                  * lying around. There should be no dead end states. The subtract
  66                  * intersection operators are the only places where they may be
  67                  * created and those operators clean them up. */
  68                 fsm->removeUnreachableStates();
  69
  70                 switch ( minimizeLevel ) {
  71                         case MinimizeApprox:
  72                                 fsm->minimizeApproximate();
  73                                 break;
  74                         case MinimizePartition1:
  75                                 fsm->minimizePartition1();
  76                                 break;
  77                         case MinimizePartition2:
  78                                 fsm->minimizePartition2();
  79                                 break;
  80                         case MinimizeStable:
  81                                 fsm->minimizeStable();
  82                                 break;
  83                 }
  84         }
  85 }
  86
  87 /* Count the transitions in the fsm by walking the state list. */
  88 int countTransitions( FsmAp *fsm )
  89 {
  90         int numTrans = 0;
  91         StateAp *state = fsm->stateList.head;
  92         while ( state != 0 ) {
  93                 numTrans += state->outList.length();
  94                 state = state->next;
  95         }
  96         return numTrans;
  97 }
  98
  99 Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd )
 100 {
 101         /* Reset errno so we can check for overflow or underflow. In the event of
 102          * an error, sets the return val to the upper or lower bound being tested
 103          * against. */
 104         errno = 0;
 105         unsigned int size = keyOps->alphType->size;
 106         bool unusedBits = size < sizeof(unsigned long);
 107
 108         unsigned long ul = strtoul( str, 0, 16 );
 109
 110         if ( errno == ERANGE || unusedBits && ul >> (size * 8) ) {
 111                 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
 112                 ul = 1 << (size * 8);
 113         }
 114
 115         if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) )
 116                 ul |= (0xffffffff >> (size*8 ) ) << (size*8);
 117
 118         return Key( (long)ul );
 119 }
 120
 121 Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd )
 122 {
 123         /* Convert the number to a decimal. First reset errno so we can check
 124          * for overflow or underflow. */
 125         errno = 0;
 126         long long minVal = keyOps->alphType->minVal;
 127         long long maxVal = keyOps->alphType->maxVal;
 128
 129         long long ll = strtoll( str, 0, 10 );
 130
 131         /* Check for underflow. */
 132         if ( errno == ERANGE && ll < 0 || ll < minVal) {
 133                 error(loc) << "literal " << str << " underflows the alphabet type" << endl;
 134                 ll = minVal;
 135         }
 136         /* Check for overflow. */
 137         else if ( errno == ERANGE && ll > 0 || ll > maxVal ) {
 138                 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
 139                 ll = maxVal;
 140         }
 141
 142         if ( keyOps->alphType->isSigned )
 143                 return Key( (long)ll );
 144         else
 145                 return Key( (unsigned long)ll );
 146 }
 147
 148 /* Make an fsm key in int format (what the fsm graph uses) from an alphabet
 149  * number returned by the parser. Validates that the number doesn't overflow
 150  * the alphabet type. */
 151 Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd )
 152 {
 153         /* Switch on hex/decimal format. */
 154         if ( str[0] == '0' && str[1] == 'x' )
 155                 return makeFsmKeyHex( str, loc, pd );
 156         else
 157                 return makeFsmKeyDec( str, loc, pd );
 158 }
 159
 160 /* Make an fsm int format (what the fsm graph uses) from a single character.
 161  * Performs proper conversion depending on signed/unsigned property of the
 162  * alphabet. */
 163 Key makeFsmKeyChar( char c, ParseData *pd )
 164 {
 165         if ( keyOps->isSigned ) {
 166                 /* Copy from a char type. */
 167                 return Key( c );
 168         }
 169         else {
 170                 /* Copy from an unsigned byte type. */
 171                 return Key( (unsigned char)c );
 172         }
 173 }
 174
 175 /* Make an fsm key array in int format (what the fsm graph uses) from a string
 176  * of characters. Performs proper conversion depending on signed/unsigned
 177  * property of the alphabet. */
 178 void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd )
 179 {
 180         if ( keyOps->isSigned ) {
 181                 /* Copy from a char star type. */
 182                 char *src = data;
 183                 for ( int i = 0; i < len; i++ )
 184                         result[i] = Key(src[i]);
 185         }
 186         else {
 187                 /* Copy from an unsigned byte ptr type. */
 188                 unsigned char *src = (unsigned char*) data;
 189                 for ( int i = 0; i < len; i++ )
 190                         result[i] = Key(src[i]);
 191         }
 192 }
 193
 194 /* Like makeFsmKeyArray except the result has only unique keys. They ordering
 195  * will be changed. */
 196 void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
 197                 bool caseInsensitive, ParseData *pd )
 198 {
 199         /* Use a transitions list for getting unique keys. */
 200         if ( keyOps->isSigned ) {
 201                 /* Copy from a char star type. */
 202                 char *src = data;
 203                 for ( int si = 0; si < len; si++ ) {
 204                         Key key( src[si] );
 205                         result.insert( key );
 206                         if ( caseInsensitive ) {
 207                                 if ( key.isLower() )
 208                                         result.insert( key.toUpper() );
 209                                 else if ( key.isUpper() )
 210                                         result.insert( key.toLower() );
 211                         }
 212                 }
 213         }
 214         else {
 215                 /* Copy from an unsigned byte ptr type. */
 216                 unsigned char *src = (unsigned char*) data;
 217                 for ( int si = 0; si < len; si++ ) {
 218                         Key key( src[si] );
 219                         result.insert( key );
 220                         if ( caseInsensitive ) {
 221                                 if ( key.isLower() )
 222                                         result.insert( key.toUpper() );
 223                                 else if ( key.isUpper() )
 224                                         result.insert( key.toLower() );
 225                         }
 226                 }
 227         }
 228 }
 229
 230 FsmAp *dotFsm( ParseData *pd )
 231 {
 232         FsmAp *retFsm = new FsmAp();
 233         retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey );
 234         return retFsm;
 235 }
 236
 237 FsmAp *dotStarFsm( ParseData *pd )
 238 {
 239         FsmAp *retFsm = new FsmAp();
 240         retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey );
 241         return retFsm;
 242 }
 243
 244 /* Make a builtin type. Depends on the signed nature of the alphabet type. */
 245 FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd )
 246 {
 247         /* FsmAp created to return. */
 248         FsmAp *retFsm = 0;
 249         bool isSigned = keyOps->isSigned;
 250
 251         switch ( builtin ) {
 252         case BT_Any: {
 253                 /* All characters. */
 254                 retFsm = dotFsm( pd );
 255                 break;
 256         }
 257         case BT_Ascii: {
 258                 /* Ascii characters 0 to 127. */
 259                 retFsm = new FsmAp();
 260                 retFsm->rangeFsm( 0, 127 );
 261                 break;
 262         }
 263         case BT_Extend: {
 264                 /* Ascii extended characters. This is the full byte range. Dependent
 265                  * on signed, vs no signed. If the alphabet is one byte then just use
 266                  * dot fsm. */
 267                 if ( isSigned ) {
 268                         retFsm = new FsmAp();
 269                         retFsm->rangeFsm( -128, 127 );
 270                 }
 271                 else {
 272                         retFsm = new FsmAp();
 273                         retFsm->rangeFsm( 0, 255 );
 274                 }
 275                 break;
 276         }
 277         case BT_Alpha: {
 278                 /* Alpha [A-Za-z]. */
 279                 FsmAp *upper = new FsmAp(), *lower = new FsmAp();
 280                 upper->rangeFsm( 'A', 'Z' );
 281                 lower->rangeFsm( 'a', 'z' );
 282                 upper->unionOp( lower );
 283                 upper->minimizePartition2();
 284                 retFsm = upper;
 285                 break;
 286         }
 287         case BT_Digit: {
 288                 /* Digits [0-9]. */
 289                 retFsm = new FsmAp();
 290                 retFsm->rangeFsm( '0', '9' );
 291                 break;
 292         }
 293         case BT_Alnum: {
 294                 /* Alpha numerics [0-9A-Za-z]. */
 295                 FsmAp *digit = new FsmAp(), *lower = new FsmAp();
 296                 FsmAp *upper = new FsmAp();
 297                 digit->rangeFsm( '0', '9' );
 298                 upper->rangeFsm( 'A', 'Z' );
 299                 lower->rangeFsm( 'a', 'z' );
 300                 digit->unionOp( upper );
 301                 digit->unionOp( lower );
 302                 digit->minimizePartition2();
 303                 retFsm = digit;
 304                 break;
 305         }
 306         case BT_Lower: {
 307                 /* Lower case characters. */
 308                 retFsm = new FsmAp();
 309                 retFsm->rangeFsm( 'a', 'z' );
 310                 break;
 311         }
 312         case BT_Upper: {
 313                 /* Upper case characters. */
 314                 retFsm = new FsmAp();
 315                 retFsm->rangeFsm( 'A', 'Z' );
 316                 break;
 317         }
 318         case BT_Cntrl: {
 319                 /* Control characters. */
 320                 FsmAp *cntrl = new FsmAp();
 321                 FsmAp *highChar = new FsmAp();
 322                 cntrl->rangeFsm( 0, 31 );
 323                 highChar->concatFsm( 127 );
 324                 cntrl->unionOp( highChar );
 325                 cntrl->minimizePartition2();
 326                 retFsm = cntrl;
 327                 break;
 328         }
 329         case BT_Graph: {
 330                 /* Graphical ascii characters [!-~]. */
 331                 retFsm = new FsmAp();
 332                 retFsm->rangeFsm( '!', '~' );
 333                 break;
 334         }
 335         case BT_Print: {
 336                 /* Printable characters. Same as graph except includes space. */
 337                 retFsm = new FsmAp();
 338                 retFsm->rangeFsm( ' ', '~' );
 339                 break;
 340         }
 341         case BT_Punct: {
 342                 /* Punctuation. */
 343                 FsmAp *range1 = new FsmAp();
 344                 FsmAp *range2 = new FsmAp();
 345                 FsmAp *range3 = new FsmAp();
 346                 FsmAp *range4 = new FsmAp();
 347                 range1->rangeFsm( '!', '/' );
 348                 range2->rangeFsm( ':', '@' );
 349                 range3->rangeFsm( '[', '`' );
 350                 range4->rangeFsm( '{', '~' );
 351                 range1->unionOp( range2 );
 352                 range1->unionOp( range3 );
 353                 range1->unionOp( range4 );
 354                 range1->minimizePartition2();
 355                 retFsm = range1;
 356                 break;
 357         }
 358         case BT_Space: {
 359                 /* Whitespace: [\t\v\f\n\r ]. */
 360                 FsmAp *cntrl = new FsmAp();
 361                 FsmAp *space = new FsmAp();
 362                 cntrl->rangeFsm( '\t', '\r' );
 363                 space->concatFsm( ' ' );
 364                 cntrl->unionOp( space );
 365                 cntrl->minimizePartition2();
 366                 retFsm = cntrl;
 367                 break;
 368         }
 369         case BT_Xdigit: {
 370                 /* Hex digits [0-9A-Fa-f]. */
 371                 FsmAp *digit = new FsmAp();
 372                 FsmAp *upper = new FsmAp();
 373                 FsmAp *lower = new FsmAp();
 374                 digit->rangeFsm( '0', '9' );
 375                 upper->rangeFsm( 'A', 'F' );
 376                 lower->rangeFsm( 'a', 'f' );
 377                 digit->unionOp( upper );
 378                 digit->unionOp( lower );
 379                 digit->minimizePartition2();
 380                 retFsm = digit;
 381                 break;
 382         }
 383         case BT_Lambda: {
 384                 retFsm = new FsmAp();
 385                 retFsm->lambdaFsm();
 386                 break;
 387         }
 388         case BT_Empty: {
 389                 retFsm = new FsmAp();
 390                 retFsm->emptyFsm();
 391                 break;
 392         }}
 393
 394         return retFsm;
 395 }
 396
 397 /* Check if this name inst or any name inst below is referenced. */
 398 bool NameInst::anyRefsRec()
 399 {
 400         if ( numRefs > 0 )
 401                 return true;
 402
 403         /* Recurse on children until true. */
 404         for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) {
 405                 if ( (*ch)->anyRefsRec() )
 406                         return true;
 407         }
 408
 409         return false;
 410 }
 411
 412 /*
 413  * ParseData
 414  */
 415
 416 /* Initialize the structure that will collect info during the parse of a
 417  * machine. */
 418 ParseData::ParseData( char *fileName, char *sectionName,
 419                 const InputLoc &sectionLoc )
 420 :
 421         sectionGraph(0),
 422         generatingSectionSubset(false),
 423         nextPriorKey(0),
 424         /* 0 is reserved for global error actions. */
 425         nextLocalErrKey(1),
 426         nextNameId(0),
 427         alphTypeSet(false),
 428         getKeyExpr(0),
 429         accessExpr(0),
 430         curStateExpr(0),
 431         lowerNum(0),
 432         upperNum(0),
 433         fileName(fileName),
 434         sectionName(sectionName),
 435         sectionLoc(sectionLoc),
 436         errorCount(0),
 437         curActionOrd(0),
 438         curPriorOrd(0),
 439         rootName(0),
 440         nextEpsilonResolvedLink(0),
 441         nextLongestMatchId(1),
 442         lmRequiresErrorState(false)
 443 {
 444         /* Initialize the dictionary of graphs. This is our symbol table. The
 445          * initialization needs to be done on construction which happens at the
 446          * beginning of a machine spec so any assignment operators can reference
 447          * the builtins. */
 448         initGraphDict();
 449 }
 450
 451 /* Clean up the data collected during a parse. */
 452 ParseData::~ParseData()
 453 {
 454         /* Delete all the nodes in the action list. Will cause all the
 455          * string data that represents the actions to be deallocated. */
 456         actionList.empty();
 457 }
 458
 459 /* Make a name id in the current name instantiation scope if it is not
 460  * already there. */
 461 NameInst *ParseData::addNameInst( const InputLoc &loc, char *data, bool isLabel )
 462 {
 463         /* Create the name instantitaion object and insert it. */
 464         NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel );
 465         curNameInst->childVect.append( newNameInst );
 466         if ( data != 0 )
 467                 curNameInst->children.insertMulti( data, newNameInst );
 468         return newNameInst;
 469 }
 470
 471 void ParseData::initNameWalk()
 472 {
 473         curNameInst = rootName;
 474         curNameChild = 0;
 475 }
 476
 477 /* Goes into the next child scope. The number of the child is already set up.
 478  * We need this for the syncronous name tree and parse tree walk to work
 479  * properly. It is reset on entry into a scope and advanced on poping of a
 480  * scope. A call to enterNameScope should be accompanied by a corresponding
 481  * popNameScope. */
 482 NameFrame ParseData::enterNameScope( bool isLocal, int numScopes )
 483 {
 484         /* Save off the current data. */
 485         NameFrame retFrame;
 486         retFrame.prevNameInst = curNameInst;
 487         retFrame.prevNameChild = curNameChild;
 488         retFrame.prevLocalScope = localNameScope;
 489
 490         /* Enter into the new name scope. */
 491         for ( int i = 0; i < numScopes; i++ ) {
 492                 curNameInst = curNameInst->childVect[curNameChild];
 493                 curNameChild = 0;
 494         }
 495
 496         if ( isLocal )
 497                 localNameScope = curNameInst;
 498
 499         return retFrame;
 500 }
 501
 502 /* Return from a child scope to a parent. The parent info must be specified as
 503  * an argument and is obtained from the corresponding call to enterNameScope.
 504  * */
 505 void ParseData::popNameScope( const NameFrame &frame )
 506 {
 507         /* Pop the name scope. */
 508         curNameInst = frame.prevNameInst;
 509         curNameChild = frame.prevNameChild+1;
 510         localNameScope = frame.prevLocalScope;
 511 }
 512
 513 void ParseData::resetNameScope( const NameFrame &frame )
 514 {
 515         /* Pop the name scope. */
 516         curNameInst = frame.prevNameInst;
 517         curNameChild = frame.prevNameChild;
 518         localNameScope = frame.prevLocalScope;
 519 }
 520
 521
 522 void ParseData::unsetObsoleteEntries( FsmAp *graph )
 523 {
 524         /* Loop the reference names and increment the usage. Names that are no
 525          * longer needed will be unset in graph. */
 526         for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) {
 527                 /* Get the name. */
 528                 NameInst *name = *ref;
 529                 name->numUses += 1;
 530
 531                 /* If the name is no longer needed unset its corresponding entry. */
 532                 if ( name->numUses == name->numRefs ) {
 533                         assert( graph->entryPoints.find( name->id ) != 0 );
 534                         graph->unsetEntry( name->id );
 535                 }
 536         }
 537 }
 538
 539 NameSet ParseData::resolvePart( NameInst *refFrom, char *data, bool recLabelsOnly )
 540 {
 541         /* Queue needed for breadth-first search, load it with the start node. */
 542         NameInstList nameQueue;
 543         nameQueue.append( refFrom );
 544
 545         NameSet result;
 546         while ( nameQueue.length() > 0 ) {
 547                 /* Pull the next from location off the queue. */
 548                 NameInst *from = nameQueue.detachFirst();
 549
 550                 /* Look for the name. */
 551                 NameMapEl *low, *high;
 552                 if ( from->children.findMulti( data, low, high ) ) {
 553                         /* Record all instances of the name. */
 554                         for ( ; low <= high; low++ )
 555                                 result.insert( low->value );
 556                 }
 557
 558                 /* Name not there, do breadth-first operation of appending all
 559                  * childrent to the processing queue. */
 560                 for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) {
 561                         if ( !recLabelsOnly || (*name)->isLabel )
 562                                 nameQueue.append( *name );
 563                 }
 564         }
 565
 566         /* Queue exhausted and name never found. */
 567         return result;
 568 }
 569
 570 void ParseData::resolveFrom( NameSet &result, NameInst *refFrom,
 571                 const NameRef &nameRef, int namePos )
 572 {
 573         /* Look for the name in the owning scope of the factor with aug. */
 574         NameSet partResult = resolvePart( refFrom, nameRef[namePos], false );
 575
 576         /* If there are more parts to the name then continue on. */
 577         if ( ++namePos < nameRef.length() ) {
 578                 /* There are more components to the name, search using all the part
 579                  * results as the base. */
 580                 for ( NameSet::Iter name = partResult; name.lte(); name++ )
 581                         resolveFrom( result, *name, nameRef, namePos );
 582         }
 583         else {
 584                 /* This is the last component, append the part results to the final
 585                  * results. */
 586                 result.insert( partResult );
 587         }
 588 }
 589
 590 /* Write out a name reference. */
 591 ostream &operator<<( ostream &out, const NameRef &nameRef )
 592 {
 593         int pos = 0;
 594         if ( nameRef[pos] == 0 ) {
 595                 out << "::";
 596                 pos += 1;
 597         }
 598         out << nameRef[pos++];
 599         for ( ; pos < nameRef.length(); pos++ )
 600                 out << "::" << nameRef[pos];
 601         return out;
 602 }
 603
 604 ostream &operator<<( ostream &out, const NameInst &nameInst )
 605 {
 606         /* Count the number fully qualified name parts. */
 607         int numParents = 0;
 608         NameInst *curParent = nameInst.parent;
 609         while ( curParent != 0 ) {
 610                 numParents += 1;
 611                 curParent = curParent->parent;
 612         }
 613
 614         /* Make an array and fill it in. */
 615         curParent = nameInst.parent;
 616         NameInst **parents = new NameInst*[numParents];
 617         for ( int p = numParents-1; p >= 0; p-- ) {
 618                 parents[p] = curParent;
 619                 curParent = curParent->parent;
 620         }
 621
 622         /* Write the parents out, skip the root. */
 623         for ( int p = 1; p < numParents; p++ )
 624                 out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" );
 625
 626         /* Write the name and cleanup. */
 627         out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" );
 628         delete[] parents;
 629         return out;
 630 }
 631
 632 struct CmpNameInstLoc
 633 {
 634         static int compare( const NameInst *ni1, const NameInst *ni2 )
 635         {
 636                 if ( ni1->loc.line < ni2->loc.line )
 637                         return -1;
 638                 else if ( ni1->loc.line > ni2->loc.line )
 639                         return 1;
 640                 else if ( ni1->loc.col < ni2->loc.col )
 641                         return -1;
 642                 else if ( ni1->loc.col > ni2->loc.col )
 643                         return 1;
 644                 return 0;
 645         }
 646 };
 647
 648 void errorStateLabels( const NameSet &resolved )
 649 {
 650         MergeSort<NameInst*, CmpNameInstLoc> mergeSort;
 651         mergeSort.sort( resolved.data, resolved.length() );
 652         for ( NameSet::Iter res = resolved; res.lte(); res++ )
 653                 error((*res)->loc) << "  -> " << **res << endl;
 654 }
 655
 656
 657 NameInst *ParseData::resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action )
 658 {
 659         NameInst *nameInst = 0;
 660
 661         /* Do the local search if the name is not strictly a root level name
 662          * search. */
 663         if ( nameRef[0] != 0 ) {
 664                 /* If the action is referenced, resolve all of them. */
 665                 if ( action != 0 && action->actionRefs.length() > 0 ) {
 666                         /* Look for the name in all referencing scopes. */
 667                         NameSet resolved;
 668                         for ( ActionRefs::Iter actRef = action->actionRefs; actRef.lte(); actRef++ )
 669                                 resolveFrom( resolved, *actRef, nameRef, 0 );
 670
 671                         if ( resolved.length() > 0 ) {
 672                                 /* Take the first one. */
 673                                 nameInst = resolved[0];
 674                                 if ( resolved.length() > 1 ) {
 675                                         /* Complain about the multiple references. */
 676                                         error(loc) << "state reference " << nameRef <<
 677                                                         " resolves to multiple entry points" << endl;
 678                                         errorStateLabels( resolved );
 679                                 }
 680                         }
 681                 }
 682         }
 683
 684         /* If not found in the local scope, look in global. */
 685         if ( nameInst == 0 ) {
 686                 NameSet resolved;
 687                 int fromPos = nameRef[0] != 0 ? 0 : 1;
 688                 resolveFrom( resolved, rootName, nameRef, fromPos );
 689
 690                 if ( resolved.length() > 0 ) {
 691                         /* Take the first. */
 692                         nameInst = resolved[0];
 693                         if ( resolved.length() > 1 ) {
 694                                 /* Complain about the multiple references. */
 695                                 error(loc) << "state reference " << nameRef <<
 696                                                 " resolves to multiple entry points" << endl;
 697                                 errorStateLabels( resolved );
 698                         }
 699                 }
 700         }
 701
 702         if ( nameInst == 0 ) {
 703                 /* If not found then complain. */
 704                 error(loc) << "could not resolve state reference " << nameRef << endl;
 705         }
 706         return nameInst;
 707 }
 708
 709 void ParseData::resolveNameRefs( InlineList *inlineList, Action *action )
 710 {
 711         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
 712                 switch ( item->type ) {
 713                         case InlineItem::Entry: case InlineItem::Goto:
 714                         case InlineItem::Call: case InlineItem::Next: {
 715                                 /* Resolve, pass action for local search. */
 716                                 NameInst *target = resolveStateRef( *item->nameRef, item->loc, action );
 717
 718                                 /* Check if the target goes into a longest match. */
 719                                 NameInst *search = target->parent;
 720                                 while ( search != 0 ) {
 721                                         if ( search->isLongestMatch ) {
 722                                                 error(item->loc) << "cannot enter inside a longest "
 723                                                                 "match construction as an entry point" << endl;
 724                                                 break;
 725                                         }
 726                                         search = search->parent;
 727                                 }
 728
 729                                 /* Note the reference in the name. This will cause the entry
 730                                  * point to survive to the end of the graph generating walk. */
 731                                 if ( target != 0 )
 732                                         target->numRefs += 1;
 733                                 item->nameTarg = target;
 734                                 break;
 735                         }
 736                         default:
 737                                 break;
 738                 }
 739
 740                 /* Some of the item types may have children. */
 741                 if ( item->children != 0 )
 742                         resolveNameRefs( item->children, action );
 743         }
 744 }
 745
 746 /* Resolve references to labels in actions. */
 747 void ParseData::resolveActionNameRefs()
 748 {
 749         for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
 750                 /* Only care about the actions that are referenced. */
 751                 if ( act->actionRefs.length() > 0 )
 752                         resolveNameRefs( act->inlineList, act );
 753         }
 754 }
 755
 756 /* Walk a name tree starting at from and fill the name index. */
 757 void ParseData::fillNameIndex( NameInst *from )
 758 {
 759         /* Fill the value for from in the name index. */
 760         nameIndex[from->id] = from;
 761
 762         /* Recurse on the implicit final state and then all children. */
 763         if ( from->final != 0 )
 764                 fillNameIndex( from->final );
 765         for ( NameVect::Iter name = from->childVect; name.lte(); name++ )
 766                 fillNameIndex( *name );
 767 }
 768
 769 void ParseData::makeRootName()
 770 {
 771         /* Create the root name. */
 772         rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
 773 }
 774
 775 /* Build the name tree and supporting data structures. */
 776 void ParseData::makeNameTree( GraphDictEl *dictEl )
 777 {
 778         /* Set up curNameInst for the walk. */
 779         curNameInst = rootName;
 780         curNameChild = 0;
 781
 782         if ( dictEl != 0 ) {
 783                 /* A start location has been specified. */
 784                 dictEl->value->makeNameTree( dictEl->loc, this );
 785         }
 786         else {
 787                 /* First make the name tree. */
 788                 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
 789                         /* Recurse on the instance. */
 790                         glel->value->makeNameTree( glel->loc, this );
 791                 }
 792         }
 793
 794         /* The number of nodes in the tree can now be given by nextNameId */
 795         nameIndex = new NameInst*[nextNameId];
 796         memset( nameIndex, 0, sizeof(NameInst*)*nextNameId );
 797         fillNameIndex( rootName );
 798 }
 799
 800 void ParseData::createBuiltin( char *name, BuiltinMachine builtin )
 801 {
 802         Expression *expression = new Expression( builtin );
 803         Join *join = new Join( expression );
 804         JoinOrLm *joinOrLm = new JoinOrLm( join );
 805         VarDef *varDef = new VarDef( name, joinOrLm );
 806         GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
 807         graphDict.insert( graphDictEl );
 808 }
 809
 810 /* Initialize the graph dict with builtin types. */
 811 void ParseData::initGraphDict( )
 812 {
 813         createBuiltin( "any", BT_Any );
 814         createBuiltin( "ascii", BT_Ascii );
 815         createBuiltin( "extend", BT_Extend );
 816         createBuiltin( "alpha", BT_Alpha );
 817         createBuiltin( "digit", BT_Digit );
 818         createBuiltin( "alnum", BT_Alnum );
 819         createBuiltin( "lower", BT_Lower );
 820         createBuiltin( "upper", BT_Upper );
 821         createBuiltin( "cntrl", BT_Cntrl );
 822         createBuiltin( "graph", BT_Graph );
 823         createBuiltin( "print", BT_Print );
 824         createBuiltin( "punct", BT_Punct );
 825         createBuiltin( "space", BT_Space );
 826         createBuiltin( "xdigit", BT_Xdigit );
 827         createBuiltin( "null", BT_Lambda );
 828         createBuiltin( "zlen", BT_Lambda );
 829         createBuiltin( "empty", BT_Empty );
 830 }
 831
 832 /* Set the alphabet type. If the types are not valid returns false. */
 833 bool ParseData::setAlphType( char *s1, char *s2 )
 834 {
 835         bool valid = false;
 836         for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
 837                 if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 &&
 838                                 hostLang->hostTypes[i].data2 != 0 &&
 839                                 strcmp( s2, hostLang->hostTypes[i].data2 ) == 0 )
 840                 {
 841                         valid = true;
 842                         userAlphType = hostLang->hostTypes + i;
 843                         break;
 844                 }
 845         }
 846
 847         alphTypeSet = true;
 848         return valid;
 849 }
 850
 851 /* Set the alphabet type. If the types are not valid returns false. */
 852 bool ParseData::setAlphType( char *s1 )
 853 {
 854         bool valid = false;
 855         for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
 856                 if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 &&
 857                                 hostLang->hostTypes[i].data2 == 0 )
 858                 {
 859                         valid = true;
 860                         userAlphType = hostLang->hostTypes + i;
 861                         break;
 862                 }
 863         }
 864
 865         alphTypeSet = true;
 866         return valid;
 867 }
 868
 869 /* Initialize the key operators object that will be referenced by all fsms
 870  * created. */
 871 void ParseData::initKeyOps( )
 872 {
 873         /* Signedness and bounds. */
 874         HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType;
 875         thisKeyOps.setAlphType( alphType );
 876
 877         if ( lowerNum != 0 ) {
 878                 /* If ranges are given then interpret the alphabet type. */
 879                 thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this );
 880                 thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
 881         }
 882
 883         thisCondData.nextCondKey = thisKeyOps.maxKey;
 884         thisCondData.nextCondKey.increment();
 885 }
 886
 887 void ParseData::printNameInst( NameInst *nameInst, int level )
 888 {
 889         for ( int i = 0; i < level; i++ )
 890                 cerr << "  ";
 891         cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") <<
 892                         "  id: " << nameInst->id <<
 893                         "  refs: " << nameInst->numRefs << endl;
 894         for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ )
 895                 printNameInst( *name, level+1 );
 896 }
 897
 898 /* Remove duplicates of unique actions from an action table. */
 899 void ParseData::removeDups( ActionTable &table )
 900 {
 901         /* Scan through the table looking for unique actions to
 902          * remove duplicates of. */
 903         for ( int i = 0; i < table.length(); i++ ) {
 904                 /* Remove any duplicates ahead of i. */
 905                 for ( int r = i+1; r < table.length(); ) {
 906                         if ( table[r].value == table[i].value )
 907                                 table.vremove(r);
 908                         else
 909                                 r += 1;
 910                 }
 911         }
 912 }
 913
 914 /* Remove duplicates from action lists. This operates only on transition and
 915  * eof action lists and so should be called once all actions have been
 916  * transfered to their final resting place. */
 917 void ParseData::removeActionDups( FsmAp *graph )
 918 {
 919         /* Loop all states. */
 920         for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
 921                 /* Loop all transitions. */
 922                 for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
 923                         removeDups( trans->actionTable );
 924                 removeDups( state->toStateActionTable );
 925                 removeDups( state->fromStateActionTable );
 926                 removeDups( state->eofActionTable );
 927         }
 928 }
 929
 930 Action *ParseData::newAction( char *name, InlineList *inlineList )
 931 {
 932         InputLoc loc;
 933         loc.line = 1;
 934         loc.col = 1;
 935
 936         Action *action = new Action( loc, name, inlineList );
 937         action->actionRefs.append( rootName );
 938         actionList.append( action );
 939         return action;
 940 }
 941
 942 void ParseData::initLongestMatchData()
 943 {
 944         if ( lmList.length() > 0 ) {
 945                 /* The initTokStart action resets the token start. */
 946                 InlineList *il1 = new InlineList;
 947                 il1->append( new InlineItem( InputLoc(), InlineItem::LmInitTokStart ) );
 948                 initTokStart = newAction( "initts", il1 );
 949                 initTokStart->isLmAction = true;
 950
 951                 /* The initActId action gives act a default value. */
 952                 InlineList *il4 = new InlineList;
 953                 il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) );
 954                 initActId = newAction( "initact", il4 );
 955                 initActId->isLmAction = true;
 956
 957                 /* The setTokStart action sets tokstart. */
 958                 InlineList *il5 = new InlineList;
 959                 il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) );
 960                 setTokStart = newAction( "tokstart", il5 );
 961                 setTokStart->isLmAction = true;
 962
 963                 /* The setTokEnd action sets tokend. */
 964                 InlineList *il3 = new InlineList;
 965                 il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) );
 966                 setTokEnd = newAction( "tokend", il3 );
 967                 setTokEnd->isLmAction = true;
 968
 969                 /* The action will also need an ordering: ahead of all user action
 970                  * embeddings. */
 971                 initTokStartOrd = curActionOrd++;
 972                 initActIdOrd = curActionOrd++;
 973                 setTokStartOrd = curActionOrd++;
 974                 setTokEndOrd = curActionOrd++;
 975         }
 976 }
 977
 978 /* After building the graph, do some extra processing to ensure the runtime
 979  * data of the longest mactch operators is consistent. */
 980 void ParseData::setLongestMatchData( FsmAp *graph )
 981 {
 982         if ( lmList.length() > 0 ) {
 983                 /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry)
 984                  * init the tokstart. */
 985                 for ( EntryMap::Iter en = graph->entryPoints; en.lte(); en++ ) {
 986                         /* This is run after duplicates are removed, we must guard against
 987                          * inserting a duplicate. */
 988                         ActionTable &actionTable = en->value->toStateActionTable;
 989                         if ( ! actionTable.hasAction( initTokStart ) )
 990                                 actionTable.setAction( initTokStartOrd, initTokStart );
 991                 }
 992
 993                 /* Find the set of states that are the target of transitions with
 994                  * actions that have calls. These states will be targeted by fret
 995                  * statements. */
 996                 StateSet states;
 997                 for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
 998                         for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
 999                                 for ( ActionTable::Iter ati = trans->actionTable; ati.lte(); ati++ ) {
1000                                         if ( ati->value->anyCall && trans->toState != 0 )
1001                                                 states.insert( trans->toState );
1002                                 }
1003                         }
1004                 }
1005
1006
1007                 /* Init tokstart upon entering the above collected states. */
1008                 for ( StateSet::Iter ps = states; ps.lte(); ps++ ) {
1009                         /* This is run after duplicates are removed, we must guard against
1010                          * inserting a duplicate. */
1011                         ActionTable &actionTable = (*ps)->toStateActionTable;
1012                         if ( ! actionTable.hasAction( initTokStart ) )
1013                                 actionTable.setAction( initTokStartOrd, initTokStart );
1014                 }
1015         }
1016 }
1017
1018 /* Make the graph from a graph dict node. Does minimization and state sorting. */
1019 FsmAp *ParseData::makeInstance( GraphDictEl *gdNode )
1020 {
1021         /* Build the graph from a walk of the parse tree. */
1022         FsmAp *graph = gdNode->value->walk( this );
1023
1024         /* Resolve any labels that point to multiple states. Any labels that are
1025          * still around are referenced only by gotos and calls and they need to be
1026          * made into deterministic entry points. */
1027         graph->deterministicEntry();
1028
1029         /*
1030          * All state construction is now complete.
1031          */
1032
1033         /* Transfer global error actions. */
1034         for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
1035                 graph->transferErrorActions( state, 0 );
1036
1037         removeActionDups( graph );
1038
1039         /* Remove unreachable states. There should be no dead end states. The
1040          * subtract and intersection operators are the only places where they may
1041          * be created and those operators clean them up. */
1042         graph->removeUnreachableStates();
1043
1044         /* No more fsm operations are to be done. Action ordering numbers are
1045          * no longer of use and will just hinder minimization. Clear them. */
1046         graph->nullActionKeys();
1047
1048         /* Transition priorities are no longer of use. We can clear them
1049          * because they will just hinder minimization as well. Clear them. */
1050         graph->clearAllPriorities();
1051
1052         if ( minimizeOpt != MinimizeNone ) {
1053                 /* Minimize here even if we minimized at every op. Now that function
1054                  * keys have been cleared we may get a more minimal fsm. */
1055                 switch ( minimizeLevel ) {
1056                         case MinimizeApprox:
1057                                 graph->minimizeApproximate();
1058                                 break;
1059                         case MinimizeStable:
1060                                 graph->minimizeStable();
1061                                 break;
1062                         case MinimizePartition1:
1063                                 graph->minimizePartition1();
1064                                 break;
1065                         case MinimizePartition2:
1066                                 graph->minimizePartition2();
1067                                 break;
1068                 }
1069         }
1070
1071         graph->compressTransitions();
1072
1073         return graph;
1074 }
1075
1076 void ParseData::printNameTree()
1077 {
1078         /* Print the name instance map. */
1079         for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ )
1080                 printNameInst( *name, 0 );
1081
1082         cerr << "name index:" << endl;
1083         /* Show that the name index is correct. */
1084         for ( int ni = 0; ni < nextNameId; ni++ ) {
1085                 cerr << ni << ": ";
1086                 char *name = nameIndex[ni]->name;
1087                 cerr << ( name != 0 ? name : "<ANON>" ) << endl;
1088         }
1089 }
1090
1091 FsmAp *ParseData::makeSpecific( GraphDictEl *gdNode )
1092 {
1093         /* Build the name tree and supporting data structures. */
1094         makeNameTree( gdNode );
1095
1096         /* Resove name references from gdNode. */
1097         initNameWalk();
1098         gdNode->value->resolveNameRefs( this );
1099
1100         /* Do not resolve action references. Since we are not building the entire
1101          * graph there's a good chance that many name references will fail. This
1102          * is okay since generating part of the graph is usually only done when
1103          * inspecting the compiled machine. */
1104
1105         /* Flag this case so that the XML code generator is aware that we haven't
1106          * looked up name references in actions. It can then avoid segfaulting. */
1107         generatingSectionSubset = true;
1108
1109         /* Just building the specified graph. */
1110         initNameWalk();
1111         FsmAp *mainGraph = makeInstance( gdNode );
1112
1113         return mainGraph;
1114 }
1115
1116 FsmAp *ParseData::makeAll()
1117 {
1118         /* Build the name tree and supporting data structures. */
1119         makeNameTree( 0 );
1120
1121         /* Resove name references in the tree. */
1122         initNameWalk();
1123         for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ )
1124                 glel->value->resolveNameRefs( this );
1125
1126         /* Resolve action code name references. */
1127         resolveActionNameRefs();
1128
1129         FsmAp *mainGraph = 0;
1130         FsmAp **graphs = new FsmAp*[instanceList.length()];
1131         int numOthers = 0;
1132
1133         /* Make all the instantiations, we know that main exists in this list. */
1134         initNameWalk();
1135         for ( GraphList::Iter glel = instanceList; glel.lte();  glel++ ) {
1136                 if ( strcmp( glel->key, machineMain ) == 0 ) {
1137                         /* Main graph is always instantiated. */
1138                         mainGraph = makeInstance( glel );
1139                 }
1140                 else {
1141                         /* Check to see if the instance is ever referenced. */
1142                         NameInst *nameInst = nextNameScope();
1143                         if ( nameInst->anyRefsRec() )
1144                                 graphs[numOthers++] = makeInstance( glel );
1145                         else {
1146                                 /* Need to walk over the name tree item. */
1147                                 NameFrame nameFrame = enterNameScope( true, 1 );
1148                                 popNameScope( nameFrame );
1149                         }
1150                 }
1151         }
1152
1153         if ( numOthers > 0 ) {
1154                 /* Add all the other graphs into main. */
1155                 mainGraph->globOp( graphs, numOthers );
1156         }
1157
1158         delete[] graphs;
1159         return mainGraph;
1160 }
1161
1162 void ParseData::analyzeAction( Action *action, InlineList *inlineList )
1163 {
1164         /* FIXME: Actions used as conditions should be very constrained. */
1165         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1166                 if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
1167                         action->anyCall = true;
1168
1169                 /* Need to recurse into longest match items. */
1170                 if ( item->type == InlineItem::LmSwitch ) {
1171                         LongestMatch *lm = item->longestMatch;
1172                         for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) {
1173                                 if ( lmi->action != 0 )
1174                                         analyzeAction( action, lmi->action->inlineList );
1175                         }
1176                 }
1177
1178                 if ( item->type == InlineItem::LmOnLast ||
1179                                 item->type == InlineItem::LmOnNext ||
1180                                 item->type == InlineItem::LmOnLagBehind )
1181                 {
1182                         LongestMatchPart *lmi = item->longestMatchPart;
1183                         if ( lmi->action != 0 )
1184                                 analyzeAction( action, lmi->action->inlineList );
1185                 }
1186
1187                 if ( item->children != 0 )
1188                         analyzeAction( action, item->children );
1189         }
1190 }
1191
1192
1193 /* Check actions for bad uses of fsm directives. We don't go inside longest
1194  * match items in actions created by ragel, since we just want the user
1195  * actions. */
1196 void ParseData::checkInlineList( Action *act, InlineList *inlineList )
1197 {
1198         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1199                 /* EOF checks. */
1200                 if ( act->numEofRefs > 0 ) {
1201                         switch ( item->type ) {
1202                         case InlineItem::PChar:
1203                                 error(item->loc) << "pointer to current element does not exist in "
1204                                                 "EOF action code" << endl;
1205                                 break;
1206                         case InlineItem::Char:
1207                                 error(item->loc) << "current element does not exist in "
1208                                                 "EOF action code" << endl;
1209                                 break;
1210                         case InlineItem::Hold:
1211                                 error(item->loc) << "changing the current element not possible in "
1212                                                 "EOF action code" << endl;
1213                                 break;
1214                         case InlineItem::Exec:
1215                                 error(item->loc) << "changing the current element not possible in "
1216                                                 "EOF action code" << endl;
1217                                 break;
1218                         case InlineItem::Goto: case InlineItem::Call:
1219                         case InlineItem::Next: case InlineItem::GotoExpr:
1220                         case InlineItem::CallExpr: case InlineItem::NextExpr:
1221                         case InlineItem::Ret:
1222                                 error(item->loc) << "changing the current state not possible in "
1223                                                 "EOF action code" << endl;
1224                                 break;
1225                         default:
1226                                 break;
1227                         }
1228                 }
1229
1230                 /* Recurse. */
1231                 if ( item->children != 0 )
1232                         checkInlineList( act, item->children );
1233         }
1234 }
1235
1236 void ParseData::checkAction( Action *action )
1237 {
1238         /* Check for actions with calls that are embedded within a longest match
1239          * machine. */
1240         if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) {
1241                 for ( ActionRefs::Iter ar = action->actionRefs; ar.lte(); ar++ ) {
1242                         NameInst *check = *ar;
1243                         while ( check != 0 ) {
1244                                 if ( check->isLongestMatch ) {
1245                                         error(action->loc) << "within a scanner, fcall is permitted"
1246                                                 " only in pattern actions" << endl;
1247                                         break;
1248                                 }
1249                                 check = check->parent;
1250                         }
1251                 }
1252         }
1253
1254         checkInlineList( action, action->inlineList );
1255 }
1256
1257
1258 void ParseData::analyzeGraph( FsmAp *graph )
1259 {
1260         for ( ActionList::Iter act = actionList; act.lte(); act++ )
1261                 analyzeAction( act, act->inlineList );
1262
1263         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
1264                 /* The transition list. */
1265                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
1266                         for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ )
1267                                 at->value->numTransRefs += 1;
1268                 }
1269
1270                 for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ )
1271                         at->value->numToStateRefs += 1;
1272
1273                 for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ )
1274                         at->value->numFromStateRefs += 1;
1275
1276                 for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ )
1277                         at->value->numEofRefs += 1;
1278
1279                 for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
1280                         for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ )
1281                                 (*sci)->numCondRefs += 1;
1282                 }
1283         }
1284
1285         /* Checks for bad usage of directives in action code. */
1286         for ( ActionList::Iter act = actionList; act.lte(); act++ )
1287                 checkAction( act );
1288 }
1289
1290 void ParseData::prepareMachineGen( GraphDictEl *graphDictEl )
1291 {
1292         beginProcessing();
1293         initKeyOps();
1294         makeRootName();
1295         initLongestMatchData();
1296
1297         /* Make the graph, do minimization. */
1298         if ( graphDictEl == 0 )
1299                 sectionGraph = makeAll();
1300         else
1301                 sectionGraph = makeSpecific( graphDictEl );
1302
1303         /* If any errors have occured in the input file then don't write anything. */
1304         if ( gblErrorCount > 0 )
1305                 return;
1306
1307         analyzeGraph( sectionGraph );
1308
1309         /* Depends on the graph analysis. */
1310         setLongestMatchData( sectionGraph );
1311
1312         /* State numbers need to be assigned such that all final states have a
1313          * larger state id number than all non-final states. This enables the
1314          * first_final mechanism to function correctly. We also want states to be
1315          * ordered in a predictable fashion. So we first apply a depth-first
1316          * search, then do a stable sort by final state status, then assign
1317          * numbers. */
1318
1319         sectionGraph->depthFirstOrdering();
1320         sectionGraph->sortStatesByFinal();
1321         sectionGraph->setStateNumbers( 1 );
1322 }
1323
1324 void ParseData::generateXML( ostream &out )
1325 {
1326         beginProcessing();
1327
1328         /* Make the generator. */
1329         XMLCodeGen codeGen( sectionName, this, sectionGraph, out );
1330
1331         /* Write out with it. */
1332         codeGen.writeXML();
1333
1334         if ( printStatistics ) {
1335                 cerr << "fsm name  : " << sectionName << endl;
1336                 cerr << "num states: " << sectionGraph->stateList.length() << endl;
1337                 cerr << endl;
1338         }
1339 }
1340
1341 /* Send eof to all parsers. */
1342 void terminateAllParsers( )
1343 {
1344         /* FIXME: a proper token is needed here. Suppose we should use the
1345          * location of EOF in the last file that the parser was referenced in. */
1346         InputLoc loc;
1347         loc.fileName = "<EOF>";
1348         loc.line = 0;
1349         loc.col = 0;
1350         for ( ParserDict::Iter pdel = parserDict; pdel.lte(); pdel++ )
1351                 pdel->value->token( loc, _eof, 0, 0 );
1352 }
1353
1354 void checkMachines( )
1355 {
1356         for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1357                 ParseData *pd = parser->value->pd;
1358                 if ( pd->instanceList.length() > 0 ) {
1359                         /* There must be a main graph defined. */
1360                         /* No machine name. Need to have a main. Make sure it was given. */
1361                         GraphDictEl *mainEl = pd->graphDict.find( machineMain );
1362                         if ( mainEl == 0 ) {
1363                                 error(pd->sectionLoc) << "main graph not defined in \"" <<
1364                                                 pd->sectionName << "\"" << endl;
1365                         }
1366                 }
1367         }
1368 }
1369
1370 void writeLanguage( std::ostream &out )
1371 {
1372         out << " lang=\"";
1373         switch ( hostLangType ) {
1374                 case CCode:    out << "C"; break;
1375                 case DCode:    out << "D"; break;
1376                 case JavaCode: out << "Java"; break;
1377                 case RubyCode: out << "Ruby"; break;
1378         }
1379         out << "\"";
1380
1381 }
1382
1383 void writeMachines( std::ostream &out, std::string hostData, char *inputFileName )
1384 {
1385         if ( machineSpec == 0 && machineName == 0 ) {
1386                 /* No machine spec or machine name given. Generate everything. */
1387                 for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1388                         ParseData *pd = parser->value->pd;
1389                         if ( pd->instanceList.length() > 0 )
1390                                 pd->prepareMachineGen( 0 );
1391                 }
1392
1393                 if ( gblErrorCount == 0 ) {
1394                         out << "<ragel filename=\"" << inputFileName << "\"";
1395                         writeLanguage( out );
1396                         out << ">\n";
1397                         for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1398                                 ParseData *pd = parser->value->pd;
1399                                 if ( pd->instanceList.length() > 0 )
1400                                         pd->generateXML( out );
1401                         }
1402                         out << hostData;
1403                         out << "</ragel>\n";
1404                 }
1405         }
1406         else if ( parserDict.length() > 0 ) {
1407                 /* There is either a machine spec or machine name given. */
1408                 ParseData *parseData = 0;
1409                 GraphDictEl *graphDictEl = 0;
1410
1411                 /* Traverse the sections, break out when we find a section/machine
1412                  * that matches the one specified. */
1413                 for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
1414                         ParseData *checkPd = parser->value->pd;
1415                         if ( machineSpec == 0 || strcmp( checkPd->sectionName, machineSpec ) == 0 ) {
1416                                 GraphDictEl *checkGdEl = 0;
1417                                 if ( machineName == 0 || (checkGdEl =
1418                                                 checkPd->graphDict.find( machineName )) != 0 )
1419                                 {
1420                                         /* Have a machine spec and/or machine name that matches
1421                                          * the -M/-S options. */
1422                                         parseData = checkPd;
1423                                         graphDictEl = checkGdEl;
1424                                         break;
1425                                 }
1426                         }
1427                 }
1428
1429                 if ( parseData == 0 )
1430                         error() << "could not locate machine specified with -S and/or -M" << endl;
1431                 else {
1432                         /* Section/Machine to emit was found. Prepare and emit it. */
1433                         parseData->prepareMachineGen( graphDictEl );
1434                         if ( gblErrorCount == 0 ) {
1435                                 out << "<ragel filename=\"" << inputFileName << "\"";
1436                                 writeLanguage( out );
1437                                 out << ">\n";
1438                                 parseData->generateXML( out );
1439                                 out << hostData;
1440                                 out << "</ragel>\n";
1441                         }
1442                 }
1443         }
1444 }