ragel/parsetree.cpp

   1 /*
   2  *  Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
   3  */
   4
   5 /*  This file is part of Ragel.
   6  *
   7  *  Ragel is free software; you can redistribute it and/or modify
   8  *  it under the terms of the GNU General Public License as published by
   9  *  the Free Software Foundation; either version 2 of the License, or
  10  *  (at your option) any later version.
  11  *
  12  *  Ragel is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  *  GNU General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU General Public License
  18  *  along with Ragel; if not, write to the Free Software
  19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  */
  21
  22 #include <iostream>
  23 #include <iomanip>
  24 #include <errno.h>
  25 #include <limits.h>
  26 #include <stdlib.h>
  27
  28 /* Parsing. */
  29 #include "ragel.h"
  30 #include "rlparse.h"
  31 #include "parsetree.h"
  32
  33 using namespace std;
  34 ostream &operator<<( ostream &out, const NameRef &nameRef );
  35 ostream &operator<<( ostream &out, const NameInst &nameInst );
  36
  37 /* Convert the literal string which comes in from the scanner into an array of
  38  * characters with escapes and options interpreted. Also null terminates the
  39  * string. Though this null termination should not be relied on for
  40  * interpreting literals in the parser because the string may contain a
  41  * literal string with \0 */
  42 void Token::prepareLitString( Token &result, bool &caseInsensitive )
  43 {
  44         result.data = new char[this->length+1];
  45         caseInsensitive = false;
  46
  47         char *src = this->data + 1;
  48         char *end = this->data + this->length - 1;
  49
  50         while ( *end != '\'' && *end != '\"' ) {
  51                 if ( *end == 'i' )
  52                         caseInsensitive = true;
  53                 else {
  54                         error( this->loc ) << "literal string '" << *end <<
  55                                         "' option not supported" << endl;
  56                 }
  57                 end -= 1;
  58         }
  59
  60         char *dest = result.data;
  61         int len = 0;
  62         while ( src != end ) {
  63                 if ( *src == '\\' ) {
  64                         switch ( src[1] ) {
  65                         case '0': dest[len++] = '\0'; break;
  66                         case 'a': dest[len++] = '\a'; break;
  67                         case 'b': dest[len++] = '\b'; break;
  68                         case 't': dest[len++] = '\t'; break;
  69                         case 'n': dest[len++] = '\n'; break;
  70                         case 'v': dest[len++] = '\v'; break;
  71                         case 'f': dest[len++] = '\f'; break;
  72                         case 'r': dest[len++] = '\r'; break;
  73                         case '\n':  break;
  74                         default: dest[len++] = src[1]; break;
  75                         }
  76                         src += 2;
  77                 }
  78                 else {
  79                         dest[len++] = *src++;
  80                 }
  81         }
  82         result.length = len;
  83         result.data[result.length] = 0;
  84 }
  85
  86
  87 FsmAp *VarDef::walk( ParseData *pd )
  88 {
  89         /* We enter into a new name scope. */
  90         NameFrame nameFrame = pd->enterNameScope( true, 1 );
  91
  92         /* Recurse on the expression. */
  93         FsmAp *rtnVal = joinOrLm->walk( pd );
  94
  95         /* Do the tranfer of local error actions. */
  96         LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
  97         if ( localErrDictEl != 0 ) {
  98                 for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
  99                         rtnVal->transferErrorActions( state, localErrDictEl->value );
 100         }
 101
 102         /* If the expression below is a join operation with multiple expressions
 103          * then it just had epsilon transisions resolved. If it is a join
 104          * with only a single expression then run the epsilon op now. */
 105         if ( joinOrLm->type == JoinOrLm::JoinType && joinOrLm->join->exprList.length() == 1 )
 106                 rtnVal->epsilonOp();
 107
 108         /* We can now unset entry points that are not longer used. */
 109         pd->unsetObsoleteEntries( rtnVal );
 110
 111         /* If the name of the variable is referenced then add the entry point to
 112          * the graph. */
 113         if ( pd->curNameInst->numRefs > 0 )
 114                 rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
 115
 116         /* Pop the name scope. */
 117         pd->popNameScope( nameFrame );
 118         return rtnVal;
 119 }
 120
 121 void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd )
 122 {
 123         /* The variable definition enters a new scope. */
 124         NameInst *prevNameInst = pd->curNameInst;
 125         pd->curNameInst = pd->addNameInst( loc, name, false );
 126
 127         if ( joinOrLm->type == JoinOrLm::LongestMatchType )
 128                 pd->curNameInst->isLongestMatch = true;
 129
 130         /* Recurse. */
 131         joinOrLm->makeNameTree( pd );
 132
 133         /* The name scope ends, pop the name instantiation. */
 134         pd->curNameInst = prevNameInst;
 135 }
 136
 137 void VarDef::resolveNameRefs( ParseData *pd )
 138 {
 139         /* Entering into a new scope. */
 140         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 141
 142         /* Recurse. */
 143         joinOrLm->resolveNameRefs( pd );
 144
 145         /* The name scope ends, pop the name instantiation. */
 146         pd->popNameScope( nameFrame );
 147 }
 148
 149 InputLoc LongestMatchPart::getLoc()
 150 {
 151         return action != 0 ? action->loc : semiLoc;
 152 }
 153
 154 /*
 155  * If there are any LMs then all of the following entry points must reset
 156  * tokstart:
 157  *
 158  *  1. fentry(StateRef)
 159  *  2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
 160  *  3. targt of any transition that has an fcall (the return loc).
 161  *  4. start state of all longest match routines.
 162  */
 163
 164 Action *LongestMatch::newAction( ParseData *pd, const InputLoc &loc,
 165                 const char *name, InlineList *inlineList )
 166 {
 167         Action *action = new Action( loc, name, inlineList, pd->nextCondId++ );
 168         action->actionRefs.append( pd->curNameInst );
 169         pd->actionList.append( action );
 170         action->isLmAction = true;
 171         return action;
 172 }
 173
 174 void LongestMatch::makeActions( ParseData *pd )
 175 {
 176         /* Make actions that set the action id. */
 177         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 178                 /* For each part create actions for setting the match type.  We need
 179                  * to do this so that the actions will go into the actionIndex. */
 180                 InlineList *inlineList = new InlineList;
 181                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, InlineItem::LmSetActId ) );
 182                 char *actName = new char[50];
 183                 sprintf( actName, "store%i", lmi->longestMatchId );
 184                 lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
 185         }
 186
 187         /* Make actions that execute the user action and restart on the last character. */
 188         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 189                 /* For each part create actions for setting the match type.  We need
 190                  * to do this so that the actions will go into the actionIndex. */
 191                 InlineList *inlineList = new InlineList;
 192                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
 193                                 InlineItem::LmOnLast ) );
 194                 char *actName = new char[50];
 195                 sprintf( actName, "last%i", lmi->longestMatchId );
 196                 lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
 197         }
 198
 199         /* Make actions that execute the user action and restart on the next
 200          * character.  These actions will set tokend themselves (it is the current
 201          * char). */
 202         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 203                 /* For each part create actions for setting the match type.  We need
 204                  * to do this so that the actions will go into the actionIndex. */
 205                 InlineList *inlineList = new InlineList;
 206                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
 207                                 InlineItem::LmOnNext ) );
 208                 char *actName = new char[50];
 209                 sprintf( actName, "next%i", lmi->longestMatchId );
 210                 lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
 211         }
 212
 213         /* Make actions that execute the user action and restart at tokend. These
 214          * actions execute some time after matching the last char. */
 215         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 216                 /* For each part create actions for setting the match type.  We need
 217                  * to do this so that the actions will go into the actionIndex. */
 218                 InlineList *inlineList = new InlineList;
 219                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
 220                                 InlineItem::LmOnLagBehind ) );
 221                 char *actName = new char[50];
 222                 sprintf( actName, "lag%i", lmi->longestMatchId );
 223                 lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList );
 224         }
 225
 226         InputLoc loc;
 227         loc.line = 1;
 228         loc.col = 1;
 229
 230         /* Create the error action. */
 231         InlineList *il6 = new InlineList;
 232         il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) );
 233         lmActSelect = newAction( pd, loc, "switch", il6 );
 234 }
 235
 236 void LongestMatch::findName( ParseData *pd )
 237 {
 238         NameInst *nameInst = pd->curNameInst;
 239         while ( nameInst->name == 0 ) {
 240                 nameInst = nameInst->parent;
 241                 /* Since every machine must must have a name, we should always find a
 242                  * name for the longest match. */
 243                 assert( nameInst != 0 );
 244         }
 245         name = nameInst->name;
 246 }
 247
 248 void LongestMatch::makeNameTree( ParseData *pd )
 249 {
 250         /* Create an anonymous scope for the longest match. Will be used for
 251          * restarting machine after matching a token. */
 252         NameInst *prevNameInst = pd->curNameInst;
 253         pd->curNameInst = pd->addNameInst( loc, 0, false );
 254
 255         /* Recurse into all parts of the longest match operator. */
 256         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ )
 257                 lmi->join->makeNameTree( pd );
 258
 259         /* Traverse the name tree upwards to find a name for this lm. */
 260         findName( pd );
 261
 262         /* Also make the longest match's actions at this point. */
 263         makeActions( pd );
 264
 265         /* The name scope ends, pop the name instantiation. */
 266         pd->curNameInst = prevNameInst;
 267 }
 268
 269 void LongestMatch::resolveNameRefs( ParseData *pd )
 270 {
 271         /* The longest match gets its own name scope. */
 272         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 273
 274         /* Take an action reference for each longest match item and recurse. */
 275         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 276                 /* Record the reference if the item has an action. */
 277                 if ( lmi->action != 0 )
 278                         lmi->action->actionRefs.append( pd->localNameScope );
 279
 280                 /* Recurse down the join. */
 281                 lmi->join->resolveNameRefs( pd );
 282         }
 283
 284         /* The name scope ends, pop the name instantiation. */
 285         pd->popNameScope( nameFrame );
 286 }
 287
 288 void LongestMatch::restart( FsmAp *graph, TransAp *trans )
 289 {
 290         StateAp *fromState = trans->fromState;
 291         graph->detachTrans( fromState, trans->toState, trans );
 292         graph->attachTrans( fromState, graph->startState, trans );
 293 }
 294
 295 void LongestMatch::runLonestMatch( ParseData *pd, FsmAp *graph )
 296 {
 297         graph->markReachableFromHereStopFinal( graph->startState );
 298         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 299                 if ( ms->stateBits & STB_ISMARKED ) {
 300                         ms->lmItemSet.insert( 0 );
 301                         ms->stateBits &= ~ STB_ISMARKED;
 302                 }
 303         }
 304
 305         /* Transfer the first item of non-empty lmAction tables to the item sets
 306          * of the states that follow. Exclude states that have no transitions out.
 307          * This must happen on a separate pass so that on each iteration of the
 308          * next pass we have the item set entries from all lmAction tables. */
 309         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
 310                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
 311                         if ( trans->lmActionTable.length() > 0 ) {
 312                                 LmActionTableEl *lmAct = trans->lmActionTable.data;
 313                                 StateAp *toState = trans->toState;
 314                                 assert( toState );
 315
 316                                 /* Check if there are transitions out, this may be a very
 317                                  * close approximation? Out transitions going nowhere?
 318                                  * FIXME: Check. */
 319                                 if ( toState->outList.length() > 0 ) {
 320                                         /* Fill the item sets. */
 321                                         graph->markReachableFromHereStopFinal( toState );
 322                                         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 323                                                 if ( ms->stateBits & STB_ISMARKED ) {
 324                                                         ms->lmItemSet.insert( lmAct->value );
 325                                                         ms->stateBits &= ~ STB_ISMARKED;
 326                                                 }
 327                                         }
 328                                 }
 329                         }
 330                 }
 331         }
 332
 333         /* The lmItem sets are now filled, telling us which longest match rules
 334          * can succeed in which states. First determine if we need to make sure
 335          * act is defaulted to zero. We need to do this if there are any states
 336          * with lmItemSet.length() > 1 and NULL is included. That is, that the
 337          * switch may get called when in fact nothing has been matched. */
 338         int maxItemSetLength = 0;
 339         graph->markReachableFromHereStopFinal( graph->startState );
 340         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 341                 if ( ms->stateBits & STB_ISMARKED ) {
 342                         if ( ms->lmItemSet.length() > maxItemSetLength )
 343                                 maxItemSetLength = ms->lmItemSet.length();
 344                         ms->stateBits &= ~ STB_ISMARKED;
 345                 }
 346         }
 347
 348         /* The actions executed on starting to match a token. */
 349         graph->isolateStartState();
 350         graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart );
 351         graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
 352         if ( maxItemSetLength > 1 ) {
 353                 /* The longest match action switch may be called when tokens are
 354                  * matched, in which case act must be initialized, there must be a
 355                  * case to handle the error, and the generated machine will require an
 356                  * error state. */
 357                 lmSwitchHandlesError = true;
 358                 pd->lmRequiresErrorState = true;
 359                 graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
 360         }
 361
 362         /* The place to store transitions to restart. It maybe possible for the
 363          * restarting to affect the searching through the graph that follows. For
 364          * now take the safe route and save the list of transitions to restart
 365          * until after all searching is done. */
 366         Vector<TransAp*> restartTrans;
 367
 368         /* Set actions that do immediate token recognition, set the longest match part
 369          * id and set the token ending. */
 370         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
 371                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
 372                         if ( trans->lmActionTable.length() > 0 ) {
 373                                 LmActionTableEl *lmAct = trans->lmActionTable.data;
 374                                 StateAp *toState = trans->toState;
 375                                 assert( toState );
 376
 377                                 /* Check if there are transitions out, this may be a very
 378                                  * close approximation? Out transitions going nowhere?
 379                                  * FIXME: Check. */
 380                                 if ( toState->outList.length() == 0 ) {
 381                                         /* Can execute the immediate action for the longest match
 382                                          * part. Redirect the action to the start state. */
 383                                         trans->actionTable.setAction( lmAct->key,
 384                                                         lmAct->value->actOnLast );
 385                                         restartTrans.append( trans );
 386                                 }
 387                                 else {
 388                                         /* Look for non final states that have a non-empty item
 389                                          * set. If these are present then we need to record the
 390                                          * end of the token.  Also Find the highest item set
 391                                          * length reachable from here (excluding at transtions to
 392                                          * final states). */
 393                                         bool nonFinalNonEmptyItemSet = false;
 394                                         maxItemSetLength = 0;
 395                                         graph->markReachableFromHereStopFinal( toState );
 396                                         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 397                                                 if ( ms->stateBits & STB_ISMARKED ) {
 398                                                         if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
 399                                                                 nonFinalNonEmptyItemSet = true;
 400                                                         if ( ms->lmItemSet.length() > maxItemSetLength )
 401                                                                 maxItemSetLength = ms->lmItemSet.length();
 402                                                         ms->stateBits &= ~ STB_ISMARKED;
 403                                                 }
 404                                         }
 405
 406                                         /* If there are reachable states that are not final and
 407                                          * have non empty item sets or that have an item set
 408                                          * length greater than one then we need to set tokend
 409                                          * because the error action that matches the token will
 410                                          * require it. */
 411                                         if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
 412                                                 trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
 413
 414                                         /* Some states may not know which longest match item to
 415                                          * execute, must set it. */
 416                                         if ( maxItemSetLength > 1 ) {
 417                                                 /* There are transitions out, another match may come. */
 418                                                 trans->actionTable.setAction( lmAct->key,
 419                                                                 lmAct->value->setActId );
 420                                         }
 421                                 }
 422                         }
 423                 }
 424         }
 425
 426         /* Now that all graph searching is done it certainly safe set the
 427          * restarting. It may be safe above, however this must be verified. */
 428         for ( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ )
 429                 restart( graph, *pt );
 430
 431         int lmErrActionOrd = pd->curActionOrd++;
 432
 433         /* Embed the error for recognizing a char. */
 434         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
 435                 if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
 436                         if ( st->isFinState() ) {
 437                                 /* On error execute the onActNext action, which knows that
 438                                  * the last character of the token was one back and restart. */
 439                                 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
 440                                                 &st->lmItemSet[0]->actOnNext, 1 );
 441                                 st->eofActionTable.setAction( lmErrActionOrd,
 442                                                 st->lmItemSet[0]->actOnNext );
 443                                 st->eofTarget = graph->startState;
 444                         }
 445                         else {
 446                                 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
 447                                                 &st->lmItemSet[0]->actLagBehind, 1 );
 448                                 st->eofActionTable.setAction( lmErrActionOrd,
 449                                                 st->lmItemSet[0]->actLagBehind );
 450                                 st->eofTarget = graph->startState;
 451                         }
 452                 }
 453                 else if ( st->lmItemSet.length() > 1 ) {
 454                         /* Need to use the select. Take note of the which items the select
 455                          * is needed for so only the necessary actions are included. */
 456                         for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
 457                                 if ( *plmi != 0 )
 458                                         (*plmi)->inLmSelect = true;
 459                         }
 460                         /* On error, execute the action select and go to the start state. */
 461                         graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
 462                                         &lmActSelect, 1 );
 463                         st->eofActionTable.setAction( lmErrActionOrd, lmActSelect );
 464                         st->eofTarget = graph->startState;
 465                 }
 466         }
 467
 468         /* Finally, the start state should be made final. */
 469         graph->setFinState( graph->startState );
 470 }
 471
 472 FsmAp *LongestMatch::walk( ParseData *pd )
 473 {
 474         /* The longest match has it's own name scope. */
 475         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 476
 477         /* Make each part of the longest match. */
 478         FsmAp **parts = new FsmAp*[longestMatchList->length()];
 479         LmPartList::Iter lmi = *longestMatchList;
 480         for ( int i = 0; lmi.lte(); lmi++, i++ ) {
 481                 /* Create the machine and embed the setting of the longest match id. */
 482                 parts[i] = lmi->join->walk( pd );
 483                 parts[i]->longMatchAction( pd->curActionOrd++, lmi );
 484         }
 485
 486         /* Union machines one and up with machine zero. The grammar dictates that
 487          * there will always be at least one part. */
 488         FsmAp *rtnVal = parts[0];
 489         for ( int i = 1; i < longestMatchList->length(); i++ ) {
 490                 rtnVal->unionOp( parts[i] );
 491                 afterOpMinimize( rtnVal );
 492         }
 493
 494         runLonestMatch( pd, rtnVal );
 495
 496         /* Pop the name scope. */
 497         pd->popNameScope( nameFrame );
 498
 499         delete[] parts;
 500         return rtnVal;
 501 }
 502
 503 FsmAp *JoinOrLm::walk( ParseData *pd )
 504 {
 505         FsmAp *rtnVal = 0;
 506         switch ( type ) {
 507         case JoinType:
 508                 rtnVal = join->walk( pd );
 509                 break;
 510         case LongestMatchType:
 511                 rtnVal = longestMatch->walk( pd );
 512                 break;
 513         }
 514         return rtnVal;
 515 }
 516
 517 void JoinOrLm::makeNameTree( ParseData *pd )
 518 {
 519         switch ( type ) {
 520         case JoinType:
 521                 join->makeNameTree( pd );
 522                 break;
 523         case LongestMatchType:
 524                 longestMatch->makeNameTree( pd );
 525                 break;
 526         }
 527 }
 528
 529 void JoinOrLm::resolveNameRefs( ParseData *pd )
 530 {
 531         switch ( type ) {
 532         case JoinType:
 533                 join->resolveNameRefs( pd );
 534                 break;
 535         case LongestMatchType:
 536                 longestMatch->resolveNameRefs( pd );
 537                 break;
 538         }
 539 }
 540
 541
 542 /* Construct with a location and the first expression. */
 543 Join::Join( const InputLoc &loc, Expression *expr )
 544 :
 545         loc(loc)
 546 {
 547         exprList.append( expr );
 548 }
 549
 550 /* Construct with a location and the first expression. */
 551 Join::Join( Expression *expr )
 552 :
 553         loc(loc)
 554 {
 555         exprList.append( expr );
 556 }
 557
 558 /* Walk an expression node. */
 559 FsmAp *Join::walk( ParseData *pd )
 560 {
 561         if ( exprList.length() > 1 )
 562                 return walkJoin( pd );
 563         else
 564                 return exprList.head->walk( pd );
 565 }
 566
 567 /* There is a list of expressions to join. */
 568 FsmAp *Join::walkJoin( ParseData *pd )
 569 {
 570         /* We enter into a new name scope. */
 571         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 572
 573         /* Evaluate the machines. */
 574         FsmAp **fsms = new FsmAp*[exprList.length()];
 575         ExprList::Iter expr = exprList;
 576         for ( int e = 0; e < exprList.length(); e++, expr++ )
 577                 fsms[e] = expr->walk( pd );
 578
 579         /* Get the start and final names. Final is
 580          * guaranteed to exist, start is not. */
 581         NameInst *startName = pd->curNameInst->start;
 582         NameInst *finalName = pd->curNameInst->final;
 583
 584         int startId = -1;
 585         if ( startName != 0 ) {
 586                 /* Take note that there was an implicit link to the start machine. */
 587                 pd->localNameScope->referencedNames.append( startName );
 588                 startId = startName->id;
 589         }
 590
 591         /* A final id of -1 indicates there is no epsilon that references the
 592          * final state, therefor do not create one or set an entry point to it. */
 593         int finalId = -1;
 594         if ( finalName->numRefs > 0 )
 595                 finalId = finalName->id;
 596
 597         /* Join machines 1 and up onto machine 0. */
 598         FsmAp *retFsm = fsms[0];
 599         retFsm->joinOp( startId, finalId, fsms+1, exprList.length()-1 );
 600
 601         /* We can now unset entry points that are not longer used. */
 602         pd->unsetObsoleteEntries( retFsm );
 603
 604         /* Pop the name scope. */
 605         pd->popNameScope( nameFrame );
 606
 607         delete[] fsms;
 608         return retFsm;
 609 }
 610
 611 void Join::makeNameTree( ParseData *pd )
 612 {
 613         if ( exprList.length() > 1 ) {
 614                 /* Create the new anonymous scope. */
 615                 NameInst *prevNameInst = pd->curNameInst;
 616                 pd->curNameInst = pd->addNameInst( loc, 0, false );
 617
 618                 /* Join scopes need an implicit "final" target. */
 619                 pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final",
 620                                 pd->nextNameId++, false );
 621
 622                 /* Recurse into all expressions in the list. */
 623                 for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
 624                         expr->makeNameTree( pd );
 625
 626                 /* The name scope ends, pop the name instantiation. */
 627                 pd->curNameInst = prevNameInst;
 628         }
 629         else {
 630                 /* Recurse into the single expression. */
 631                 exprList.head->makeNameTree( pd );
 632         }
 633 }
 634
 635
 636 void Join::resolveNameRefs( ParseData *pd )
 637 {
 638         /* Branch on whether or not there is to be a join. */
 639         if ( exprList.length() > 1 ) {
 640                 /* The variable definition enters a new scope. */
 641                 NameFrame nameFrame = pd->enterNameScope( true, 1 );
 642
 643                 /* The join scope must contain a start label. */
 644                 NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true );
 645                 if ( resolved.length() > 0 ) {
 646                         /* Take the first. */
 647                         pd->curNameInst->start = resolved[0];
 648                         if ( resolved.length() > 1 ) {
 649                                 /* Complain about the multiple references. */
 650                                 error(loc) << "multiple start labels" << endl;
 651                                 errorStateLabels( resolved );
 652                         }
 653                 }
 654
 655                 /* Make sure there is a start label. */
 656                 if ( pd->curNameInst->start != 0 ) {
 657                         /* There is an implicit reference to start name. */
 658                         pd->curNameInst->start->numRefs += 1;
 659                 }
 660                 else {
 661                         /* No start label. Complain and recover by adding a label to the
 662                          * adding one. Recover ignoring the problem. */
 663                         error(loc) << "no start label" << endl;
 664                 }
 665
 666                 /* Recurse into all expressions in the list. */
 667                 for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
 668                         expr->resolveNameRefs( pd );
 669
 670                 /* The name scope ends, pop the name instantiation. */
 671                 pd->popNameScope( nameFrame );
 672         }
 673         else {
 674                 /* Recurse into the single expression. */
 675                 exprList.head->resolveNameRefs( pd );
 676         }
 677 }
 678
 679 /* Clean up after an expression node. */
 680 Expression::~Expression()
 681 {
 682         switch ( type ) {
 683                 case OrType: case IntersectType: case SubtractType:
 684                 case StrongSubtractType:
 685                         delete expression;
 686                         delete term;
 687                         break;
 688                 case TermType:
 689                         delete term;
 690                         break;
 691                 case BuiltinType:
 692                         break;
 693         }
 694 }
 695
 696 /* Evaluate a single expression node. */
 697 FsmAp *Expression::walk( ParseData *pd, bool lastInSeq )
 698 {
 699         FsmAp *rtnVal = 0;
 700         switch ( type ) {
 701                 case OrType: {
 702                         /* Evaluate the expression. */
 703                         rtnVal = expression->walk( pd, false );
 704                         /* Evaluate the term. */
 705                         FsmAp *rhs = term->walk( pd );
 706                         /* Perform union. */
 707                         rtnVal->unionOp( rhs );
 708                         afterOpMinimize( rtnVal, lastInSeq );
 709                         break;
 710                 }
 711                 case IntersectType: {
 712                         /* Evaluate the expression. */
 713                         rtnVal = expression->walk( pd );
 714                         /* Evaluate the term. */
 715                         FsmAp *rhs = term->walk( pd );
 716                         /* Perform intersection. */
 717                         rtnVal->intersectOp( rhs );
 718                         afterOpMinimize( rtnVal, lastInSeq );
 719                         break;
 720                 }
 721                 case SubtractType: {
 722                         /* Evaluate the expression. */
 723                         rtnVal = expression->walk( pd );
 724                         /* Evaluate the term. */
 725                         FsmAp *rhs = term->walk( pd );
 726                         /* Perform subtraction. */
 727                         rtnVal->subtractOp( rhs );
 728                         afterOpMinimize( rtnVal, lastInSeq );
 729                         break;
 730                 }
 731                 case StrongSubtractType: {
 732                         /* Evaluate the expression. */
 733                         rtnVal = expression->walk( pd );
 734
 735                         /* Evaluate the term and pad it with any* machines. */
 736                         FsmAp *rhs = dotStarFsm( pd );
 737                         FsmAp *termFsm = term->walk( pd );
 738                         FsmAp *trailAnyStar = dotStarFsm( pd );
 739                         rhs->concatOp( termFsm );
 740                         rhs->concatOp( trailAnyStar );
 741
 742                         /* Perform subtraction. */
 743                         rtnVal->subtractOp( rhs );
 744                         afterOpMinimize( rtnVal, lastInSeq );
 745                         break;
 746                 }
 747                 case TermType: {
 748                         /* Return result of the term. */
 749                         rtnVal = term->walk( pd );
 750                         break;
 751                 }
 752                 case BuiltinType: {
 753                         /* Duplicate the builtin. */
 754                         rtnVal = makeBuiltin( builtin, pd );
 755                         break;
 756                 }
 757         }
 758
 759         return rtnVal;
 760 }
 761
 762 void Expression::makeNameTree( ParseData *pd )
 763 {
 764         switch ( type ) {
 765         case OrType:
 766         case IntersectType:
 767         case SubtractType:
 768         case StrongSubtractType:
 769                 expression->makeNameTree( pd );
 770                 term->makeNameTree( pd );
 771                 break;
 772         case TermType:
 773                 term->makeNameTree( pd );
 774                 break;
 775         case BuiltinType:
 776                 break;
 777         }
 778 }
 779
 780 void Expression::resolveNameRefs( ParseData *pd )
 781 {
 782         switch ( type ) {
 783         case OrType:
 784         case IntersectType:
 785         case SubtractType:
 786         case StrongSubtractType:
 787                 expression->resolveNameRefs( pd );
 788                 term->resolveNameRefs( pd );
 789                 break;
 790         case TermType:
 791                 term->resolveNameRefs( pd );
 792                 break;
 793         case BuiltinType:
 794                 break;
 795         }
 796 }
 797
 798 /* Clean up after a term node. */
 799 Term::~Term()
 800 {
 801         switch ( type ) {
 802                 case ConcatType:
 803                 case RightStartType:
 804                 case RightFinishType:
 805                 case LeftType:
 806                         delete term;
 807                         delete factorWithAug;
 808                         break;
 809                 case FactorWithAugType:
 810                         delete factorWithAug;
 811                         break;
 812         }
 813 }
 814
 815 /* Evaluate a term node. */
 816 FsmAp *Term::walk( ParseData *pd, bool lastInSeq )
 817 {
 818         FsmAp *rtnVal = 0;
 819         switch ( type ) {
 820                 case ConcatType: {
 821                         /* Evaluate the Term. */
 822                         rtnVal = term->walk( pd, false );
 823                         /* Evaluate the FactorWithRep. */
 824                         FsmAp *rhs = factorWithAug->walk( pd );
 825                         /* Perform concatenation. */
 826                         rtnVal->concatOp( rhs );
 827                         afterOpMinimize( rtnVal, lastInSeq );
 828                         break;
 829                 }
 830                 case RightStartType: {
 831                         /* Evaluate the Term. */
 832                         rtnVal = term->walk( pd );
 833
 834                         /* Evaluate the FactorWithRep. */
 835                         FsmAp *rhs = factorWithAug->walk( pd );
 836
 837                         /* Set up the priority descriptors. The left machine gets the
 838                          * lower priority where as the right get the higher start priority. */
 839                         priorDescs[0].key = pd->nextPriorKey++;
 840                         priorDescs[0].priority = 0;
 841                         rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
 842
 843                         /* The start transitions of the right machine gets the higher
 844                          * priority. Use the same unique key. */
 845                         priorDescs[1].key = priorDescs[0].key;
 846                         priorDescs[1].priority = 1;
 847                         rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
 848
 849                         /* Perform concatenation. */
 850                         rtnVal->concatOp( rhs );
 851                         afterOpMinimize( rtnVal, lastInSeq );
 852                         break;
 853                 }
 854                 case RightFinishType: {
 855                         /* Evaluate the Term. */
 856                         rtnVal = term->walk( pd );
 857
 858                         /* Evaluate the FactorWithRep. */
 859                         FsmAp *rhs = factorWithAug->walk( pd );
 860
 861                         /* Set up the priority descriptors. The left machine gets the
 862                          * lower priority where as the finishing transitions to the right
 863                          * get the higher priority. */
 864                         priorDescs[0].key = pd->nextPriorKey++;
 865                         priorDescs[0].priority = 0;
 866                         rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
 867
 868                         /* The finishing transitions of the right machine get the higher
 869                          * priority. Use the same unique key. */
 870                         priorDescs[1].key = priorDescs[0].key;
 871                         priorDescs[1].priority = 1;
 872                         rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
 873
 874                         /* If the right machine's start state is final we need to guard
 875                          * against the left machine persisting by moving through the empty
 876                          * string. */
 877                         if ( rhs->startState->isFinState() ) {
 878                                 rhs->startState->outPriorTable.setPrior(
 879                                                 pd->curPriorOrd++, &priorDescs[1] );
 880                         }
 881
 882                         /* Perform concatenation. */
 883                         rtnVal->concatOp( rhs );
 884                         afterOpMinimize( rtnVal, lastInSeq );
 885                         break;
 886                 }
 887                 case LeftType: {
 888                         /* Evaluate the Term. */
 889                         rtnVal = term->walk( pd );
 890
 891                         /* Evaluate the FactorWithRep. */
 892                         FsmAp *rhs = factorWithAug->walk( pd );
 893
 894                         /* Set up the priority descriptors. The left machine gets the
 895                          * higher priority. */
 896                         priorDescs[0].key = pd->nextPriorKey++;
 897                         priorDescs[0].priority = 1;
 898                         rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
 899
 900                         /* The right machine gets the lower priority. We cannot use
 901                          * allTransPrior here in case the start state of the right machine
 902                          * is final. It would allow the right machine thread to run along
 903                          * with the left if just passing through the start state. Using
 904                          * startFsmPrior prevents this. */
 905                         priorDescs[1].key = priorDescs[0].key;
 906                         priorDescs[1].priority = 0;
 907                         rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
 908
 909                         /* Perform concatenation. */
 910                         rtnVal->concatOp( rhs );
 911                         afterOpMinimize( rtnVal, lastInSeq );
 912                         break;
 913                 }
 914                 case FactorWithAugType: {
 915                         rtnVal = factorWithAug->walk( pd );
 916                         break;
 917                 }
 918         }
 919         return rtnVal;
 920 }
 921
 922 void Term::makeNameTree( ParseData *pd )
 923 {
 924         switch ( type ) {
 925         case ConcatType:
 926         case RightStartType:
 927         case RightFinishType:
 928         case LeftType:
 929                 term->makeNameTree( pd );
 930                 factorWithAug->makeNameTree( pd );
 931                 break;
 932         case FactorWithAugType:
 933                 factorWithAug->makeNameTree( pd );
 934                 break;
 935         }
 936 }
 937
 938 void Term::resolveNameRefs( ParseData *pd )
 939 {
 940         switch ( type ) {
 941         case ConcatType:
 942         case RightStartType:
 943         case RightFinishType:
 944         case LeftType:
 945                 term->resolveNameRefs( pd );
 946                 factorWithAug->resolveNameRefs( pd );
 947                 break;
 948         case FactorWithAugType:
 949                 factorWithAug->resolveNameRefs( pd );
 950                 break;
 951         }
 952 }
 953
 954 /* Clean up after a factor with augmentation node. */
 955 FactorWithAug::~FactorWithAug()
 956 {
 957         delete factorWithRep;
 958
 959         /* Walk the vector of parser actions, deleting function names. */
 960
 961         /* Clean up priority descriptors. */
 962         if ( priorDescs != 0 )
 963                 delete[] priorDescs;
 964 }
 965
 966 void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd )
 967 {
 968         /* Assign actions. */
 969         for ( int i = 0; i < actions.length(); i++ )  {
 970                 switch ( actions[i].type ) {
 971                 /* Transition actions. */
 972                 case at_start:
 973                         graph->startFsmAction( actionOrd[i], actions[i].action );
 974                         afterOpMinimize( graph );
 975                         break;
 976                 case at_all:
 977                         graph->allTransAction( actionOrd[i], actions[i].action );
 978                         break;
 979                 case at_finish:
 980                         graph->finishFsmAction( actionOrd[i], actions[i].action );
 981                         break;
 982                 case at_leave:
 983                         graph->leaveFsmAction( actionOrd[i], actions[i].action );
 984                         break;
 985
 986                 /* Global error actions. */
 987                 case at_start_gbl_error:
 988                         graph->startErrorAction( actionOrd[i], actions[i].action, 0 );
 989                         afterOpMinimize( graph );
 990                         break;
 991                 case at_all_gbl_error:
 992                         graph->allErrorAction( actionOrd[i], actions[i].action, 0 );
 993                         break;
 994                 case at_final_gbl_error:
 995                         graph->finalErrorAction( actionOrd[i], actions[i].action, 0 );
 996                         break;
 997                 case at_not_start_gbl_error:
 998                         graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 );
 999                         break;
1000                 case at_not_final_gbl_error:
1001                         graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 );
1002                         break;
1003                 case at_middle_gbl_error:
1004                         graph->middleErrorAction( actionOrd[i], actions[i].action, 0 );
1005                         break;
1006
1007                 /* Local error actions. */
1008                 case at_start_local_error:
1009                         graph->startErrorAction( actionOrd[i], actions[i].action,
1010                                         actions[i].localErrKey );
1011                         afterOpMinimize( graph );
1012                         break;
1013                 case at_all_local_error:
1014                         graph->allErrorAction( actionOrd[i], actions[i].action,
1015                                         actions[i].localErrKey );
1016                         break;
1017                 case at_final_local_error:
1018                         graph->finalErrorAction( actionOrd[i], actions[i].action,
1019                                         actions[i].localErrKey );
1020                         break;
1021                 case at_not_start_local_error:
1022                         graph->notStartErrorAction( actionOrd[i], actions[i].action,
1023                                         actions[i].localErrKey );
1024                         break;
1025                 case at_not_final_local_error:
1026                         graph->notFinalErrorAction( actionOrd[i], actions[i].action,
1027                                         actions[i].localErrKey );
1028                         break;
1029                 case at_middle_local_error:
1030                         graph->middleErrorAction( actionOrd[i], actions[i].action,
1031                                         actions[i].localErrKey );
1032                         break;
1033
1034                 /* EOF actions. */
1035                 case at_start_eof:
1036                         graph->startEOFAction( actionOrd[i], actions[i].action );
1037                         afterOpMinimize( graph );
1038                         break;
1039                 case at_all_eof:
1040                         graph->allEOFAction( actionOrd[i], actions[i].action );
1041                         break;
1042                 case at_final_eof:
1043                         graph->finalEOFAction( actionOrd[i], actions[i].action );
1044                         break;
1045                 case at_not_start_eof:
1046                         graph->notStartEOFAction( actionOrd[i], actions[i].action );
1047                         break;
1048                 case at_not_final_eof:
1049                         graph->notFinalEOFAction( actionOrd[i], actions[i].action );
1050                         break;
1051                 case at_middle_eof:
1052                         graph->middleEOFAction( actionOrd[i], actions[i].action );
1053                         break;
1054
1055                 /* To State Actions. */
1056                 case at_start_to_state:
1057                         graph->startToStateAction( actionOrd[i], actions[i].action );
1058                         afterOpMinimize( graph );
1059                         break;
1060                 case at_all_to_state:
1061                         graph->allToStateAction( actionOrd[i], actions[i].action );
1062                         break;
1063                 case at_final_to_state:
1064                         graph->finalToStateAction( actionOrd[i], actions[i].action );
1065                         break;
1066                 case at_not_start_to_state:
1067                         graph->notStartToStateAction( actionOrd[i], actions[i].action );
1068                         break;
1069                 case at_not_final_to_state:
1070                         graph->notFinalToStateAction( actionOrd[i], actions[i].action );
1071                         break;
1072                 case at_middle_to_state:
1073                         graph->middleToStateAction( actionOrd[i], actions[i].action );
1074                         break;
1075
1076                 /* From State Actions. */
1077                 case at_start_from_state:
1078                         graph->startFromStateAction( actionOrd[i], actions[i].action );
1079                         afterOpMinimize( graph );
1080                         break;
1081                 case at_all_from_state:
1082                         graph->allFromStateAction( actionOrd[i], actions[i].action );
1083                         break;
1084                 case at_final_from_state:
1085                         graph->finalFromStateAction( actionOrd[i], actions[i].action );
1086                         break;
1087                 case at_not_start_from_state:
1088                         graph->notStartFromStateAction( actionOrd[i], actions[i].action );
1089                         break;
1090                 case at_not_final_from_state:
1091                         graph->notFinalFromStateAction( actionOrd[i], actions[i].action );
1092                         break;
1093                 case at_middle_from_state:
1094                         graph->middleFromStateAction( actionOrd[i], actions[i].action );
1095                         break;
1096
1097                 /* Remaining cases, prevented by the parser. */
1098                 default:
1099                         assert( false );
1100                         break;
1101                 }
1102         }
1103 }
1104
1105 void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd )
1106 {
1107         /* Assign priorities. */
1108         for ( int i = 0; i < priorityAugs.length(); i++ ) {
1109                 switch ( priorityAugs[i].type ) {
1110                 case at_start:
1111                         graph->startFsmPrior( priorOrd[i], &priorDescs[i]);
1112                         /* Start fsm priorities are a special case that may require
1113                          * minimization afterwards. */
1114                         afterOpMinimize( graph );
1115                         break;
1116                 case at_all:
1117                         graph->allTransPrior( priorOrd[i], &priorDescs[i] );
1118                         break;
1119                 case at_finish:
1120                         graph->finishFsmPrior( priorOrd[i], &priorDescs[i] );
1121                         break;
1122                 case at_leave:
1123                         graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] );
1124                         break;
1125
1126                 default:
1127                         /* Parser Prevents this case. */
1128                         break;
1129                 }
1130         }
1131 }
1132
1133 void FactorWithAug::assignConditions( FsmAp *graph )
1134 {
1135         for ( int i = 0; i < conditions.length(); i++ )  {
1136                 switch ( conditions[i].type ) {
1137                 /* Transition actions. */
1138                 case at_start:
1139                         graph->startFsmCondition( conditions[i].action, conditions[i].sense );
1140                         afterOpMinimize( graph );
1141                         break;
1142                 case at_all:
1143                         graph->allTransCondition( conditions[i].action, conditions[i].sense );
1144                         break;
1145                 case at_leave:
1146                         graph->leaveFsmCondition( conditions[i].action, conditions[i].sense );
1147                         break;
1148                 default:
1149                         break;
1150                 }
1151         }
1152 }
1153
1154
1155 /* Evaluate a factor with augmentation node. */
1156 FsmAp *FactorWithAug::walk( ParseData *pd )
1157 {
1158         /* Enter into the scopes created for the labels. */
1159         NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1160
1161         /* Make the array of function orderings. */
1162         int *actionOrd = 0;
1163         if ( actions.length() > 0 )
1164                 actionOrd = new int[actions.length()];
1165
1166         /* First walk the list of actions, assigning order to all starting
1167          * actions. */
1168         for ( int i = 0; i < actions.length(); i++ ) {
1169                 if ( actions[i].type == at_start ||
1170                                 actions[i].type == at_start_gbl_error ||
1171                                 actions[i].type == at_start_local_error ||
1172                                 actions[i].type == at_start_to_state ||
1173                                 actions[i].type == at_start_from_state ||
1174                                 actions[i].type == at_start_eof )
1175                         actionOrd[i] = pd->curActionOrd++;
1176         }
1177
1178         /* Evaluate the factor with repetition. */
1179         FsmAp *rtnVal = factorWithRep->walk( pd );
1180
1181         /* Compute the remaining action orderings. */
1182         for ( int i = 0; i < actions.length(); i++ ) {
1183                 if ( actions[i].type != at_start &&
1184                                 actions[i].type != at_start_gbl_error &&
1185                                 actions[i].type != at_start_local_error &&
1186                                 actions[i].type != at_start_to_state &&
1187                                 actions[i].type != at_start_from_state &&
1188                                 actions[i].type != at_start_eof )
1189                         actionOrd[i] = pd->curActionOrd++;
1190         }
1191
1192         /* Embed conditions. */
1193         assignConditions( rtnVal );
1194
1195         /* Embed actions. */
1196         assignActions( pd, rtnVal , actionOrd );
1197
1198         /* Make the array of priority orderings. Orderings are local to this walk
1199          * of the factor with augmentation. */
1200         int *priorOrd = 0;
1201         if ( priorityAugs.length() > 0 )
1202                 priorOrd = new int[priorityAugs.length()];
1203
1204         /* Walk all priorities, assigning the priority ordering. */
1205         for ( int i = 0; i < priorityAugs.length(); i++ )
1206                 priorOrd[i] = pd->curPriorOrd++;
1207
1208         /* If the priority descriptors have not been made, make them now.  Make
1209          * priority descriptors for each priority asignment that will be passed to
1210          * the fsm. Used to keep track of the key, value and used bit. */
1211         if ( priorDescs == 0 && priorityAugs.length() > 0 ) {
1212                 priorDescs = new PriorDesc[priorityAugs.length()];
1213                 for ( int i = 0; i < priorityAugs.length(); i++ ) {
1214                         /* Init the prior descriptor for the priority setting. */
1215                         priorDescs[i].key = priorityAugs[i].priorKey;
1216                         priorDescs[i].priority = priorityAugs[i].priorValue;
1217                 }
1218         }
1219
1220         /* Assign priorities into the machine. */
1221         assignPriorities( rtnVal, priorOrd );
1222
1223         /* Assign epsilon transitions. */
1224         for ( int e = 0; e < epsilonLinks.length(); e++ ) {
1225                 /* Get the name, which may not exist. If it doesn't then silently
1226                  * ignore it because an error has already been reported. */
1227                 NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++];
1228                 if ( epTarg != 0 ) {
1229                         /* Make the epsilon transitions. */
1230                         rtnVal->epsilonTrans( epTarg->id );
1231
1232                         /* Note that we have made a link to the name. */
1233                         pd->localNameScope->referencedNames.append( epTarg );
1234                 }
1235         }
1236
1237         /* Set entry points for labels. */
1238         if ( labels.length() > 0 ) {
1239                 /* Pop the names. */
1240                 pd->resetNameScope( nameFrame );
1241
1242                 /* Make labels that are referenced into entry points. */
1243                 for ( int i = 0; i < labels.length(); i++ ) {
1244                         pd->enterNameScope( false, 1 );
1245
1246                         /* Will always be found. */
1247                         NameInst *name = pd->curNameInst;
1248
1249                         /* If the name is referenced then set the entry point. */
1250                         if ( name->numRefs > 0 )
1251                                 rtnVal->setEntry( name->id, rtnVal->startState );
1252                 }
1253
1254                 pd->popNameScope( nameFrame );
1255         }
1256
1257         if ( priorOrd != 0 )
1258                 delete[] priorOrd;
1259         if ( actionOrd != 0 )
1260                 delete[] actionOrd;
1261         return rtnVal;
1262 }
1263
1264 void FactorWithAug::makeNameTree( ParseData *pd )
1265 {
1266         /* Add the labels to the tree of instantiated names. Each label
1267          * makes a new scope. */
1268         NameInst *prevNameInst = pd->curNameInst;
1269         for ( int i = 0; i < labels.length(); i++ )
1270                 pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true );
1271
1272         /* Recurse, then pop the names. */
1273         factorWithRep->makeNameTree( pd );
1274         pd->curNameInst = prevNameInst;
1275 }
1276
1277
1278 void FactorWithAug::resolveNameRefs( ParseData *pd )
1279 {
1280         /* Enter into the name scope created by any labels. */
1281         NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1282
1283         /* Note action references. */
1284         for ( int i = 0; i < actions.length(); i++ )
1285                 actions[i].action->actionRefs.append( pd->localNameScope );
1286
1287         /* Recurse first. IMPORTANT: we must do the exact same traversal as when
1288          * the tree is constructed. */
1289         factorWithRep->resolveNameRefs( pd );
1290
1291         /* Resolve epsilon transitions. */
1292         for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) {
1293                 /* Get the link. */
1294                 EpsilonLink &link = epsilonLinks[ep];
1295                 NameInst *resolvedName = 0;
1296
1297                 if ( link.target.length() == 1 && strcmp( link.target.data[0], "final" ) == 0 ) {
1298                         /* Epsilon drawn to an implicit final state. An implicit final is
1299                          * only available in join operations. */
1300                         resolvedName = pd->localNameScope->final;
1301                 }
1302                 else {
1303                         /* Do an search for the name. */
1304                         NameSet resolved;
1305                         pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 );
1306                         if ( resolved.length() > 0 ) {
1307                                 /* Take the first one. */
1308                                 resolvedName = resolved[0];
1309                                 if ( resolved.length() > 1 ) {
1310                                         /* Complain about the multiple references. */
1311                                         error(link.loc) << "state reference " << link.target <<
1312                                                         " resolves to multiple entry points" << endl;
1313                                         errorStateLabels( resolved );
1314                                 }
1315                         }
1316                 }
1317
1318                 /* This is tricky, we stuff resolved epsilon transitions into one long
1319                  * vector in the parse data structure. Since the name resolution and
1320                  * graph generation both do identical walks of the parse tree we
1321                  * should always find the link resolutions in the right place.  */
1322                 pd->epsilonResolvedLinks.append( resolvedName );
1323
1324                 if ( resolvedName != 0 ) {
1325                         /* Found the name, bump of the reference count on it. */
1326                         resolvedName->numRefs += 1;
1327                 }
1328                 else {
1329                         /* Complain, no recovery action, the epsilon op will ignore any
1330                          * epsilon transitions whose names did not resolve. */
1331                         error(link.loc) << "could not resolve label " << link.target << endl;
1332                 }
1333         }
1334
1335         if ( labels.length() > 0 )
1336                 pd->popNameScope( nameFrame );
1337 }
1338
1339
1340 /* Clean up after a factor with repetition node. */
1341 FactorWithRep::~FactorWithRep()
1342 {
1343         switch ( type ) {
1344                 case StarType: case StarStarType: case OptionalType: case PlusType:
1345                 case ExactType: case MaxType: case MinType: case RangeType:
1346                         delete factorWithRep;
1347                         break;
1348                 case FactorWithNegType:
1349                         delete factorWithNeg;
1350                         break;
1351         }
1352 }
1353
1354 /* Evaluate a factor with repetition node. */
1355 FsmAp *FactorWithRep::walk( ParseData *pd )
1356 {
1357         FsmAp *retFsm = 0;
1358
1359         switch ( type ) {
1360         case StarType: {
1361                 /* Evaluate the FactorWithRep. */
1362                 retFsm = factorWithRep->walk( pd );
1363                 if ( retFsm->startState->isFinState() ) {
1364                         warning(loc) << "applying kleene star to a machine that "
1365                                         "accepts zero length word" << endl;
1366                 }
1367
1368                 /* Shift over the start action orders then do the kleene star. */
1369                 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1370                 retFsm->starOp( );
1371                 afterOpMinimize( retFsm );
1372                 break;
1373         }
1374         case StarStarType: {
1375                 /* Evaluate the FactorWithRep. */
1376                 retFsm = factorWithRep->walk( pd );
1377                 if ( retFsm->startState->isFinState() ) {
1378                         warning(loc) << "applying kleene star to a machine that "
1379                                         "accepts zero length word" << endl;
1380                 }
1381
1382                 /* Set up the prior descs. All gets priority one, whereas leaving gets
1383                  * priority zero. Make a unique key so that these priorities don't
1384                  * interfere with any priorities set by the user. */
1385                 priorDescs[0].key = pd->nextPriorKey++;
1386                 priorDescs[0].priority = 1;
1387                 retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
1388
1389                 /* Leaveing gets priority 0. Use same unique key. */
1390                 priorDescs[1].key = priorDescs[0].key;
1391                 priorDescs[1].priority = 0;
1392                 retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
1393
1394                 /* Shift over the start action orders then do the kleene star. */
1395                 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1396                 retFsm->starOp( );
1397                 afterOpMinimize( retFsm );
1398                 break;
1399         }
1400         case OptionalType: {
1401                 /* Make the null fsm. */
1402                 FsmAp *nu = new FsmAp();
1403                 nu->lambdaFsm( );
1404
1405                 /* Evaluate the FactorWithRep. */
1406                 retFsm = factorWithRep->walk( pd );
1407
1408                 /* Perform the question operator. */
1409                 retFsm->unionOp( nu );
1410                 afterOpMinimize( retFsm );
1411                 break;
1412         }
1413         case PlusType: {
1414                 /* Evaluate the FactorWithRep. */
1415                 retFsm = factorWithRep->walk( pd );
1416                 if ( retFsm->startState->isFinState() ) {
1417                         warning(loc) << "applying plus operator to a machine that "
1418                                         "accpets zero length word" << endl;
1419                 }
1420
1421                 /* Need a duplicated for the star end. */
1422                 FsmAp *dup = new FsmAp( *retFsm );
1423
1424                 /* The start func orders need to be shifted before doing the star. */
1425                 pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd );
1426
1427                 /* Star the duplicate. */
1428                 dup->starOp( );
1429                 afterOpMinimize( dup );
1430
1431                 retFsm->concatOp( dup );
1432                 afterOpMinimize( retFsm );
1433                 break;
1434         }
1435         case ExactType: {
1436                 /* Get an int from the repetition amount. */
1437                 if ( lowerRep == 0 ) {
1438                         /* No copies. Don't need to evaluate the factorWithRep.
1439                          * This Defeats the purpose so give a warning. */
1440                         warning(loc) << "exactly zero repetitions results "
1441                                         "in the null machine" << endl;
1442
1443                         retFsm = new FsmAp();
1444                         retFsm->lambdaFsm();
1445                 }
1446                 else {
1447                         /* Evaluate the first FactorWithRep. */
1448                         retFsm = factorWithRep->walk( pd );
1449                         if ( retFsm->startState->isFinState() ) {
1450                                 warning(loc) << "applying repetition to a machine that "
1451                                                 "accepts zero length word" << endl;
1452                         }
1453
1454                         /* The start func orders need to be shifted before doing the
1455                          * repetition. */
1456                         pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1457
1458                         /* Do the repetition on the machine. Already guarded against n == 0 */
1459                         retFsm->repeatOp( lowerRep );
1460                         afterOpMinimize( retFsm );
1461                 }
1462                 break;
1463         }
1464         case MaxType: {
1465                 /* Get an int from the repetition amount. */
1466                 if ( upperRep == 0 ) {
1467                         /* No copies. Don't need to evaluate the factorWithRep.
1468                          * This Defeats the purpose so give a warning. */
1469                         warning(loc) << "max zero repetitions results "
1470                                         "in the null machine" << endl;
1471
1472                         retFsm = new FsmAp();
1473                         retFsm->lambdaFsm();
1474                 }
1475                 else {
1476                         /* Evaluate the first FactorWithRep. */
1477                         retFsm = factorWithRep->walk( pd );
1478                         if ( retFsm->startState->isFinState() ) {
1479                                 warning(loc) << "applying max repetition to a machine that "
1480                                                 "accepts zero length word" << endl;
1481                         }
1482
1483                         /* The start func orders need to be shifted before doing the
1484                          * repetition. */
1485                         pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1486
1487                         /* Do the repetition on the machine. Already guarded against n == 0 */
1488                         retFsm->optionalRepeatOp( upperRep );
1489                         afterOpMinimize( retFsm );
1490                 }
1491                 break;
1492         }
1493         case MinType: {
1494                 /* Evaluate the repeated machine. */
1495                 retFsm = factorWithRep->walk( pd );
1496                 if ( retFsm->startState->isFinState() ) {
1497                         warning(loc) << "applying min repetition to a machine that "
1498                                         "accepts zero length word" << endl;
1499                 }
1500
1501                 /* The start func orders need to be shifted before doing the repetition
1502                  * and the kleene star. */
1503                 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1504
1505                 if ( lowerRep == 0 ) {
1506                         /* Acts just like a star op on the machine to return. */
1507                         retFsm->starOp( );
1508                         afterOpMinimize( retFsm );
1509                 }
1510                 else {
1511                         /* Take a duplicate for the plus. */
1512                         FsmAp *dup = new FsmAp( *retFsm );
1513
1514                         /* Do repetition on the first half. */
1515                         retFsm->repeatOp( lowerRep );
1516                         afterOpMinimize( retFsm );
1517
1518                         /* Star the duplicate. */
1519                         dup->starOp( );
1520                         afterOpMinimize( dup );
1521
1522                         /* Tak on the kleene star. */
1523                         retFsm->concatOp( dup );
1524                         afterOpMinimize( retFsm );
1525                 }
1526                 break;
1527         }
1528         case RangeType: {
1529                 /* Check for bogus range. */
1530                 if ( upperRep - lowerRep < 0 ) {
1531                         error(loc) << "invalid range repetition" << endl;
1532
1533                         /* Return null machine as recovery. */
1534                         retFsm = new FsmAp();
1535                         retFsm->lambdaFsm();
1536                 }
1537                 else if ( lowerRep == 0 && upperRep == 0 ) {
1538                         /* No copies. Don't need to evaluate the factorWithRep.  This
1539                          * defeats the purpose so give a warning. */
1540                         warning(loc) << "zero to zero repetitions results "
1541                                         "in the null machine" << endl;
1542
1543                         retFsm = new FsmAp();
1544                         retFsm->lambdaFsm();
1545                 }
1546                 else {
1547                         /* Now need to evaluate the repeated machine. */
1548                         retFsm = factorWithRep->walk( pd );
1549                         if ( retFsm->startState->isFinState() ) {
1550                                 warning(loc) << "applying range repetition to a machine that "
1551                                                 "accepts zero length word" << endl;
1552                         }
1553
1554                         /* The start func orders need to be shifted before doing both kinds
1555                          * of repetition. */
1556                         pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1557
1558                         if ( lowerRep == 0 ) {
1559                                 /* Just doing max repetition. Already guarded against n == 0. */
1560                                 retFsm->optionalRepeatOp( upperRep );
1561                                 afterOpMinimize( retFsm );
1562                         }
1563                         else if ( lowerRep == upperRep ) {
1564                                 /* Just doing exact repetition. Already guarded against n == 0. */
1565                                 retFsm->repeatOp( lowerRep );
1566                                 afterOpMinimize( retFsm );
1567                         }
1568                         else {
1569                                 /* This is the case that 0 < lowerRep < upperRep. Take a
1570                                  * duplicate for the optional repeat. */
1571                                 FsmAp *dup = new FsmAp( *retFsm );
1572
1573                                 /* Do repetition on the first half. */
1574                                 retFsm->repeatOp( lowerRep );
1575                                 afterOpMinimize( retFsm );
1576
1577                                 /* Do optional repetition on the second half. */
1578                                 dup->optionalRepeatOp( upperRep - lowerRep );
1579                                 afterOpMinimize( dup );
1580
1581                                 /* Tak on the duplicate machine. */
1582                                 retFsm->concatOp( dup );
1583                                 afterOpMinimize( retFsm );
1584                         }
1585                 }
1586                 break;
1587         }
1588         case FactorWithNegType: {
1589                 /* Evaluate the Factor. Pass it up. */
1590                 retFsm = factorWithNeg->walk( pd );
1591                 break;
1592         }}
1593         return retFsm;
1594 }
1595
1596 void FactorWithRep::makeNameTree( ParseData *pd )
1597 {
1598         switch ( type ) {
1599         case StarType:
1600         case StarStarType:
1601         case OptionalType:
1602         case PlusType:
1603         case ExactType:
1604         case MaxType:
1605         case MinType:
1606         case RangeType:
1607                 factorWithRep->makeNameTree( pd );
1608                 break;
1609         case FactorWithNegType:
1610                 factorWithNeg->makeNameTree( pd );
1611                 break;
1612         }
1613 }
1614
1615 void FactorWithRep::resolveNameRefs( ParseData *pd )
1616 {
1617         switch ( type ) {
1618         case StarType:
1619         case StarStarType:
1620         case OptionalType:
1621         case PlusType:
1622         case ExactType:
1623         case MaxType:
1624         case MinType:
1625         case RangeType:
1626                 factorWithRep->resolveNameRefs( pd );
1627                 break;
1628         case FactorWithNegType:
1629                 factorWithNeg->resolveNameRefs( pd );
1630                 break;
1631         }
1632 }
1633
1634 /* Clean up after a factor with negation node. */
1635 FactorWithNeg::~FactorWithNeg()
1636 {
1637         switch ( type ) {
1638                 case NegateType:
1639                 case CharNegateType:
1640                         delete factorWithNeg;
1641                         break;
1642                 case FactorType:
1643                         delete factor;
1644                         break;
1645         }
1646 }
1647
1648 /* Evaluate a factor with negation node. */
1649 FsmAp *FactorWithNeg::walk( ParseData *pd )
1650 {
1651         FsmAp *retFsm = 0;
1652
1653         switch ( type ) {
1654         case NegateType: {
1655                 /* Evaluate the factorWithNeg. */
1656                 FsmAp *toNegate = factorWithNeg->walk( pd );
1657
1658                 /* Negation is subtract from dot-star. */
1659                 retFsm = dotStarFsm( pd );
1660                 retFsm->subtractOp( toNegate );
1661                 afterOpMinimize( retFsm );
1662                 break;
1663         }
1664         case CharNegateType: {
1665                 /* Evaluate the factorWithNeg. */
1666                 FsmAp *toNegate = factorWithNeg->walk( pd );
1667
1668                 /* CharNegation is subtract from dot. */
1669                 retFsm = dotFsm( pd );
1670                 retFsm->subtractOp( toNegate );
1671                 afterOpMinimize( retFsm );
1672                 break;
1673         }
1674         case FactorType: {
1675                 /* Evaluate the Factor. Pass it up. */
1676                 retFsm = factor->walk( pd );
1677                 break;
1678         }}
1679         return retFsm;
1680 }
1681
1682 void FactorWithNeg::makeNameTree( ParseData *pd )
1683 {
1684         switch ( type ) {
1685         case NegateType:
1686         case CharNegateType:
1687                 factorWithNeg->makeNameTree( pd );
1688                 break;
1689         case FactorType:
1690                 factor->makeNameTree( pd );
1691                 break;
1692         }
1693 }
1694
1695 void FactorWithNeg::resolveNameRefs( ParseData *pd )
1696 {
1697         switch ( type ) {
1698         case NegateType:
1699         case CharNegateType:
1700                 factorWithNeg->resolveNameRefs( pd );
1701                 break;
1702         case FactorType:
1703                 factor->resolveNameRefs( pd );
1704                 break;
1705         }
1706 }
1707
1708 /* Clean up after a factor node. */
1709 Factor::~Factor()
1710 {
1711         switch ( type ) {
1712                 case LiteralType:
1713                         delete literal;
1714                         break;
1715                 case RangeType:
1716                         delete range;
1717                         break;
1718                 case OrExprType:
1719                         delete reItem;
1720                         break;
1721                 case RegExprType:
1722                         delete regExpr;
1723                         break;
1724                 case ReferenceType:
1725                         break;
1726                 case ParenType:
1727                         delete join;
1728                         break;
1729                 case LongestMatchType:
1730                         delete longestMatch;
1731                         break;
1732         }
1733 }
1734
1735 /* Evaluate a factor node. */
1736 FsmAp *Factor::walk( ParseData *pd )
1737 {
1738         FsmAp *rtnVal = 0;
1739         switch ( type ) {
1740         case LiteralType:
1741                 rtnVal = literal->walk( pd );
1742                 break;
1743         case RangeType:
1744                 rtnVal = range->walk( pd );
1745                 break;
1746         case OrExprType:
1747                 rtnVal = reItem->walk( pd, 0 );
1748                 break;
1749         case RegExprType:
1750                 rtnVal = regExpr->walk( pd, 0 );
1751                 break;
1752         case ReferenceType:
1753                 rtnVal = varDef->walk( pd );
1754                 break;
1755         case ParenType:
1756                 rtnVal = join->walk( pd );
1757                 break;
1758         case LongestMatchType:
1759                 rtnVal = longestMatch->walk( pd );
1760                 break;
1761         }
1762
1763         return rtnVal;
1764 }
1765
1766 void Factor::makeNameTree( ParseData *pd )
1767 {
1768         switch ( type ) {
1769         case LiteralType:
1770         case RangeType:
1771         case OrExprType:
1772         case RegExprType:
1773                 break;
1774         case ReferenceType:
1775                 varDef->makeNameTree( loc, pd );
1776                 break;
1777         case ParenType:
1778                 join->makeNameTree( pd );
1779                 break;
1780         case LongestMatchType:
1781                 longestMatch->makeNameTree( pd );
1782                 break;
1783         }
1784 }
1785
1786 void Factor::resolveNameRefs( ParseData *pd )
1787 {
1788         switch ( type ) {
1789         case LiteralType:
1790         case RangeType:
1791         case OrExprType:
1792         case RegExprType:
1793                 break;
1794         case ReferenceType:
1795                 varDef->resolveNameRefs( pd );
1796                 break;
1797         case ParenType:
1798                 join->resolveNameRefs( pd );
1799                 break;
1800         case LongestMatchType:
1801                 longestMatch->resolveNameRefs( pd );
1802                 break;
1803         }
1804 }
1805
1806 /* Clean up a range object. Must delete the two literals. */
1807 Range::~Range()
1808 {
1809         delete lowerLit;
1810         delete upperLit;
1811 }
1812
1813 /* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
1814 FsmAp *Range::walk( ParseData *pd )
1815 {
1816         /* Construct and verify the suitability of the lower end of the range. */
1817         FsmAp *lowerFsm = lowerLit->walk( pd );
1818         if ( !lowerFsm->checkSingleCharMachine() ) {
1819                 error(lowerLit->token.loc) <<
1820                         "bad range lower end, must be a single character" << endl;
1821         }
1822
1823         /* Construct and verify the upper end. */
1824         FsmAp *upperFsm = upperLit->walk( pd );
1825         if ( !upperFsm->checkSingleCharMachine() ) {
1826                 error(upperLit->token.loc) <<
1827                         "bad range upper end, must be a single character" << endl;
1828         }
1829
1830         /* Grab the keys from the machines, then delete them. */
1831         Key lowKey = lowerFsm->startState->outList.head->lowKey;
1832         Key highKey = upperFsm->startState->outList.head->lowKey;
1833         delete lowerFsm;
1834         delete upperFsm;
1835
1836         /* Validate the range. */
1837         if ( lowKey > highKey ) {
1838                 /* Recover by setting upper to lower; */
1839                 error(lowerLit->token.loc) << "lower end of range is greater then upper end" << endl;
1840                 highKey = lowKey;
1841         }
1842
1843         /* Return the range now that it is validated. */
1844         FsmAp *retFsm = new FsmAp();
1845         retFsm->rangeFsm( lowKey, highKey );
1846         return retFsm;
1847 }
1848
1849 /* Evaluate a literal object. */
1850 FsmAp *Literal::walk( ParseData *pd )
1851 {
1852         /* FsmAp to return, is the alphabet signed. */
1853         FsmAp *rtnVal = 0;
1854
1855         switch ( type ) {
1856         case Number: {
1857                 /* Make the fsm key in int format. */
1858                 Key fsmKey = makeFsmKeyNum( token.data, token.loc, pd );
1859                 /* Make the new machine. */
1860                 rtnVal = new FsmAp();
1861                 rtnVal->concatFsm( fsmKey );
1862                 break;
1863         }
1864         case LitString: {
1865                 /* Make the array of keys in int format. */
1866                 Token interp;
1867                 bool caseInsensitive;
1868                 token.prepareLitString( interp, caseInsensitive );
1869                 Key *arr = new Key[interp.length];
1870                 makeFsmKeyArray( arr, interp.data, interp.length, pd );
1871
1872                 /* Make the new machine. */
1873                 rtnVal = new FsmAp();
1874                 if ( caseInsensitive )
1875                         rtnVal->concatFsmCI( arr, interp.length );
1876                 else
1877                         rtnVal->concatFsm( arr, interp.length );
1878                 delete[] interp.data;
1879                 delete[] arr;
1880                 break;
1881         }}
1882         return rtnVal;
1883 }
1884
1885 /* Clean up after a regular expression object. */
1886 RegExpr::~RegExpr()
1887 {
1888         switch ( type ) {
1889                 case RecurseItem:
1890                         delete regExpr;
1891                         delete item;
1892                         break;
1893                 case Empty:
1894                         break;
1895         }
1896 }
1897
1898 /* Evaluate a regular expression object. */
1899 FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex )
1900 {
1901         /* This is the root regex, pass down a pointer to this. */
1902         if ( rootRegex == 0 )
1903                 rootRegex = this;
1904
1905         FsmAp *rtnVal = 0;
1906         switch ( type ) {
1907                 case RecurseItem: {
1908                         /* Walk both items. */
1909                         rtnVal = regExpr->walk( pd, rootRegex );
1910                         FsmAp *fsm2 = item->walk( pd, rootRegex );
1911                         rtnVal->concatOp( fsm2 );
1912                         break;
1913                 }
1914                 case Empty: {
1915                         rtnVal = new FsmAp();
1916                         rtnVal->lambdaFsm();
1917                         break;
1918                 }
1919         }
1920         return rtnVal;
1921 }
1922
1923 /* Clean up after an item in a regular expression. */
1924 ReItem::~ReItem()
1925 {
1926         switch ( type ) {
1927                 case Data:
1928                 case Dot:
1929                         break;
1930                 case OrBlock:
1931                 case NegOrBlock:
1932                         delete orBlock;
1933                         break;
1934         }
1935 }
1936
1937 /* Evaluate a regular expression object. */
1938 FsmAp *ReItem::walk( ParseData *pd, RegExpr *rootRegex )
1939 {
1940         /* The fsm to return, is the alphabet signed? */
1941         FsmAp *rtnVal = 0;
1942
1943         switch ( type ) {
1944                 case Data: {
1945                         /* Move the data into an integer array and make a concat fsm. */
1946                         Key *arr = new Key[token.length];
1947                         makeFsmKeyArray( arr, token.data, token.length, pd );
1948
1949                         /* Make the concat fsm. */
1950                         rtnVal = new FsmAp();
1951                         if ( rootRegex != 0 && rootRegex->caseInsensitive )
1952                                 rtnVal->concatFsmCI( arr, token.length );
1953                         else
1954                                 rtnVal->concatFsm( arr, token.length );
1955                         delete[] arr;
1956                         break;
1957                 }
1958                 case Dot: {
1959                         /* Make the dot fsm. */
1960                         rtnVal = dotFsm( pd );
1961                         break;
1962                 }
1963                 case OrBlock: {
1964                         /* Get the or block and minmize it. */
1965                         rtnVal = orBlock->walk( pd, rootRegex );
1966                         if ( rtnVal == 0 ) {
1967                                 rtnVal = new FsmAp();
1968                                 rtnVal->lambdaFsm();
1969                         }
1970                         rtnVal->minimizePartition2();
1971                         break;
1972                 }
1973                 case NegOrBlock: {
1974                         /* Get the or block and minimize it. */
1975                         FsmAp *fsm = orBlock->walk( pd, rootRegex );
1976                         fsm->minimizePartition2();
1977
1978                         /* Make a dot fsm and subtract from it. */
1979                         rtnVal = dotFsm( pd );
1980                         rtnVal->subtractOp( fsm );
1981                         rtnVal->minimizePartition2();
1982                         break;
1983                 }
1984         }
1985
1986         /* If the item is followed by a star, then apply the star op. */
1987         if ( star ) {
1988                 if ( rtnVal->startState->isFinState() ) {
1989                         warning(loc) << "applying kleene star to a machine that "
1990                                         "accpets zero length word" << endl;
1991                 }
1992
1993                 rtnVal->starOp();
1994                 rtnVal->minimizePartition2();
1995         }
1996         return rtnVal;
1997 }
1998
1999 /* Clean up after an or block of a regular expression. */
2000 ReOrBlock::~ReOrBlock()
2001 {
2002         switch ( type ) {
2003                 case RecurseItem:
2004                         delete orBlock;
2005                         delete item;
2006                         break;
2007                 case Empty:
2008                         break;
2009         }
2010 }
2011
2012
2013 /* Evaluate an or block of a regular expression. */
2014 FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex )
2015 {
2016         FsmAp *rtnVal = 0;
2017         switch ( type ) {
2018                 case RecurseItem: {
2019                         /* Evaluate the two fsm. */
2020                         FsmAp *fsm1 = orBlock->walk( pd, rootRegex );
2021                         FsmAp *fsm2 = item->walk( pd, rootRegex );
2022                         if ( fsm1 == 0 )
2023                                 rtnVal = fsm2;
2024                         else {
2025                                 fsm1->unionOp( fsm2 );
2026                                 rtnVal = fsm1;
2027                         }
2028                         break;
2029                 }
2030                 case Empty: {
2031                         rtnVal = 0;
2032                         break;
2033                 }
2034         }
2035         return rtnVal;;
2036 }
2037
2038 /* Evaluate an or block item of a regular expression. */
2039 FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex )
2040 {
2041         /* The return value, is the alphabet signed? */
2042         FsmAp *rtnVal = 0;
2043         switch ( type ) {
2044         case Data: {
2045                 /* Make the or machine. */
2046                 rtnVal = new FsmAp();
2047
2048                 /* Put the or data into an array of ints. Note that we find unique
2049                  * keys. Duplicates are silently ignored. The alternative would be to
2050                  * issue warning or an error but since we can't with [a0-9a] or 'a' |
2051                  * 'a' don't bother here. */
2052                 KeySet keySet;
2053                 makeFsmUniqueKeyArray( keySet, token.data, token.length,
2054                         rootRegex != 0 ? rootRegex->caseInsensitive : false, pd );
2055
2056                 /* Run the or operator. */
2057                 rtnVal->orFsm( keySet.data, keySet.length() );
2058                 break;
2059         }
2060         case Range: {
2061                 /* Make the upper and lower keys. */
2062                 Key lowKey = makeFsmKeyChar( lower, pd );
2063                 Key highKey = makeFsmKeyChar( upper, pd );
2064
2065                 /* Validate the range. */
2066                 if ( lowKey > highKey ) {
2067                         /* Recover by setting upper to lower; */
2068                         error(loc) << "lower end of range is greater then upper end" << endl;
2069                         highKey = lowKey;
2070                 }
2071
2072                 /* Make the range machine. */
2073                 rtnVal = new FsmAp();
2074                 rtnVal->rangeFsm( lowKey, highKey );
2075
2076                 if ( rootRegex != 0 && rootRegex->caseInsensitive ) {
2077                         if ( lowKey <= 'Z' && 'A' <= highKey ) {
2078                                 Key otherLow = lowKey < 'A' ? Key('A') : lowKey;
2079                                 Key otherHigh = 'Z' < highKey ? Key('Z') : highKey;
2080
2081                                 otherLow = 'a' + ( otherLow - 'A' );
2082                                 otherHigh = 'a' + ( otherHigh - 'A' );
2083
2084                                 FsmAp *otherRange = new FsmAp();
2085                                 otherRange->rangeFsm( otherLow, otherHigh );
2086                                 rtnVal->unionOp( otherRange );
2087                                 rtnVal->minimizePartition2();
2088                         }
2089                         else if ( lowKey <= 'z' && 'a' <= highKey ) {
2090                                 Key otherLow = lowKey < 'a' ? Key('a') : lowKey;
2091                                 Key otherHigh = 'z' < highKey ? Key('z') : highKey;
2092
2093                                 otherLow = 'A' + ( otherLow - 'a' );
2094                                 otherHigh = 'A' + ( otherHigh - 'a' );
2095
2096                                 FsmAp *otherRange = new FsmAp();
2097                                 otherRange->rangeFsm( otherLow, otherHigh );
2098                                 rtnVal->unionOp( otherRange );
2099                                 rtnVal->minimizePartition2();
2100                         }
2101                 }
2102
2103                 break;
2104         }}
2105         return rtnVal;
2106 }