ragel/parsetree.cpp

   1 /*
   2  *  Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
   3  */
   4
   5 /*  This file is part of Ragel.
   6  *
   7  *  Ragel is free software; you can redistribute it and/or modify
   8  *  it under the terms of the GNU General Public License as published by
   9  *  the Free Software Foundation; either version 2 of the License, or
  10  *  (at your option) any later version.
  11  *
  12  *  Ragel is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  *  GNU General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU General Public License
  18  *  along with Ragel; if not, write to the Free Software
  19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  */
  21
  22 #include <iostream>
  23 #include <iomanip>
  24 #include <errno.h>
  25 #include <limits.h>
  26 #include <stdlib.h>
  27
  28 /* Parsing. */
  29 #include "ragel.h"
  30 #include "rlparse.h"
  31 #include "parsetree.h"
  32
  33 using namespace std;
  34 ostream &operator<<( ostream &out, const NameRef &nameRef );
  35 ostream &operator<<( ostream &out, const NameInst &nameInst );
  36
  37 /* Convert the literal string which comes in from the scanner into an array of
  38  * characters with escapes and options interpreted. Also null terminates the
  39  * string. Though this null termination should not be relied on for
  40  * interpreting literals in the parser because the string may contain a
  41  * literal string with \0 */
  42 void Token::prepareLitString( Token &result, bool &caseInsensitive )
  43 {
  44         result.data = new char[this->length+1];
  45         caseInsensitive = false;
  46
  47         char *src = this->data + 1;
  48         char *end = this->data + this->length - 1;
  49
  50         while ( *end != '\'' && *end != '\"' ) {
  51                 if ( *end == 'i' )
  52                         caseInsensitive = true;
  53                 else {
  54                         error( this->loc ) << "literal string '" << *end <<
  55                                         "' option not supported" << endl;
  56                 }
  57                 end -= 1;
  58         }
  59
  60         char *dest = result.data;
  61         int len = 0;
  62         while ( src != end ) {
  63                 if ( *src == '\\' ) {
  64                         switch ( src[1] ) {
  65                         case '0': dest[len++] = '\0'; break;
  66                         case 'a': dest[len++] = '\a'; break;
  67                         case 'b': dest[len++] = '\b'; break;
  68                         case 't': dest[len++] = '\t'; break;
  69                         case 'n': dest[len++] = '\n'; break;
  70                         case 'v': dest[len++] = '\v'; break;
  71                         case 'f': dest[len++] = '\f'; break;
  72                         case 'r': dest[len++] = '\r'; break;
  73                         case '\n':  break;
  74                         default: dest[len++] = src[1]; break;
  75                         }
  76                         src += 2;
  77                 }
  78                 else {
  79                         dest[len++] = *src++;
  80                 }
  81         }
  82         result.length = len;
  83         result.data[result.length] = 0;
  84 }
  85
  86
  87 FsmAp *VarDef::walk( ParseData *pd )
  88 {
  89         /* We enter into a new name scope. */
  90         NameFrame nameFrame = pd->enterNameScope( true, 1 );
  91
  92         /* Recurse on the expression. */
  93         FsmAp *rtnVal = joinOrLm->walk( pd );
  94
  95         /* Do the tranfer of local error actions. */
  96         LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
  97         if ( localErrDictEl != 0 ) {
  98                 for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
  99                         rtnVal->transferErrorActions( state, localErrDictEl->value );
 100         }
 101
 102         /* If the expression below is a join operation with multiple expressions
 103          * then it just had epsilon transisions resolved. If it is a join
 104          * with only a single expression then run the epsilon op now. */
 105         if ( joinOrLm->type == JoinOrLm::JoinType && joinOrLm->join->exprList.length() == 1 )
 106                 rtnVal->epsilonOp();
 107
 108         /* We can now unset entry points that are not longer used. */
 109         pd->unsetObsoleteEntries( rtnVal );
 110
 111         /* If the name of the variable is referenced then add the entry point to
 112          * the graph. */
 113         if ( pd->curNameInst->numRefs > 0 )
 114                 rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
 115
 116         /* Pop the name scope. */
 117         pd->popNameScope( nameFrame );
 118         return rtnVal;
 119 }
 120
 121 void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd )
 122 {
 123         /* The variable definition enters a new scope. */
 124         NameInst *prevNameInst = pd->curNameInst;
 125         pd->curNameInst = pd->addNameInst( loc, name, false );
 126
 127         if ( joinOrLm->type == JoinOrLm::LongestMatchType )
 128                 pd->curNameInst->isLongestMatch = true;
 129
 130         /* Recurse. */
 131         joinOrLm->makeNameTree( pd );
 132
 133         /* The name scope ends, pop the name instantiation. */
 134         pd->curNameInst = prevNameInst;
 135 }
 136
 137 void VarDef::resolveNameRefs( ParseData *pd )
 138 {
 139         /* Entering into a new scope. */
 140         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 141
 142         /* Recurse. */
 143         joinOrLm->resolveNameRefs( pd );
 144
 145         /* The name scope ends, pop the name instantiation. */
 146         pd->popNameScope( nameFrame );
 147 }
 148
 149 InputLoc LongestMatchPart::getLoc()
 150 {
 151         return action != 0 ? action->loc : semiLoc;
 152 }
 153
 154 /*
 155  * If there are any LMs then all of the following entry points must reset
 156  * tokstart:
 157  *
 158  *  1. fentry(StateRef)
 159  *  2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
 160  *  3. targt of any transition that has an fcall (the return loc).
 161  *  4. start state of all longest match routines.
 162  */
 163
 164 Action *LongestMatch::newAction( ParseData *pd, const InputLoc &loc,
 165                 char *name, InlineList *inlineList )
 166 {
 167         Action *action = new Action( loc, name, inlineList, pd->nextCondId++ );
 168         action->actionRefs.append( pd->curNameInst );
 169         pd->actionList.append( action );
 170         action->isLmAction = true;
 171         return action;
 172 }
 173
 174 void LongestMatch::makeActions( ParseData *pd )
 175 {
 176         /* Make actions that set the action id. */
 177         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 178                 /* For each part create actions for setting the match type.  We need
 179                  * to do this so that the actions will go into the actionIndex. */
 180                 InlineList *inlineList = new InlineList;
 181                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, InlineItem::LmSetActId ) );
 182                 char *actName = new char[50];
 183                 sprintf( actName, "store%i", lmi->longestMatchId );
 184                 lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
 185         }
 186
 187         /* Make actions that execute the user action and restart on the last character. */
 188         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 189                 /* For each part create actions for setting the match type.  We need
 190                  * to do this so that the actions will go into the actionIndex. */
 191                 InlineList *inlineList = new InlineList;
 192                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
 193                                 InlineItem::LmOnLast ) );
 194                 char *actName = new char[50];
 195                 sprintf( actName, "imm%i", lmi->longestMatchId );
 196                 lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
 197         }
 198
 199         /* Make actions that execute the user action and restart on the next
 200          * character.  These actions will set tokend themselves (it is the current
 201          * char). */
 202         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 203                 /* For each part create actions for setting the match type.  We need
 204                  * to do this so that the actions will go into the actionIndex. */
 205                 InlineList *inlineList = new InlineList;
 206                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
 207                                 InlineItem::LmOnNext ) );
 208                 char *actName = new char[50];
 209                 sprintf( actName, "lagh%i", lmi->longestMatchId );
 210                 lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
 211         }
 212
 213         /* Make actions that execute the user action and restart at tokend. These
 214          * actions execute some time after matching the last char. */
 215         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 216                 /* For each part create actions for setting the match type.  We need
 217                  * to do this so that the actions will go into the actionIndex. */
 218                 InlineList *inlineList = new InlineList;
 219                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
 220                                 InlineItem::LmOnLagBehind ) );
 221                 char *actName = new char[50];
 222                 sprintf( actName, "lag%i", lmi->longestMatchId );
 223                 lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList );
 224         }
 225
 226         InputLoc loc;
 227         loc.line = 1;
 228         loc.col = 1;
 229
 230         /* Create the error action. */
 231         InlineList *il6 = new InlineList;
 232         il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) );
 233         lmActSelect = newAction( pd, loc, "lagsel", il6 );
 234 }
 235
 236 void LongestMatch::findName( ParseData *pd )
 237 {
 238         NameInst *nameInst = pd->curNameInst;
 239         while ( nameInst->name == 0 ) {
 240                 nameInst = nameInst->parent;
 241                 /* Since every machine must must have a name, we should always find a
 242                  * name for the longest match. */
 243                 assert( nameInst != 0 );
 244         }
 245         name = nameInst->name;
 246 }
 247
 248 void LongestMatch::makeNameTree( ParseData *pd )
 249 {
 250         /* Create an anonymous scope for the longest match. Will be used for
 251          * restarting machine after matching a token. */
 252         NameInst *prevNameInst = pd->curNameInst;
 253         pd->curNameInst = pd->addNameInst( loc, 0, false );
 254
 255         /* Recurse into all parts of the longest match operator. */
 256         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ )
 257                 lmi->join->makeNameTree( pd );
 258
 259         /* Traverse the name tree upwards to find a name for this lm. */
 260         findName( pd );
 261
 262         /* Also make the longest match's actions at this point. */
 263         makeActions( pd );
 264
 265         /* The name scope ends, pop the name instantiation. */
 266         pd->curNameInst = prevNameInst;
 267 }
 268
 269 void LongestMatch::resolveNameRefs( ParseData *pd )
 270 {
 271         /* The longest match gets its own name scope. */
 272         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 273
 274         /* Take an action reference for each longest match item and recurse. */
 275         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 276                 /* Record the reference if the item has an action. */
 277                 if ( lmi->action != 0 )
 278                         lmi->action->actionRefs.append( pd->localNameScope );
 279
 280                 /* Recurse down the join. */
 281                 lmi->join->resolveNameRefs( pd );
 282         }
 283
 284         /* The name scope ends, pop the name instantiation. */
 285         pd->popNameScope( nameFrame );
 286 }
 287
 288 void LongestMatch::restart( FsmAp *graph, TransAp *trans )
 289 {
 290         StateAp *fromState = trans->fromState;
 291         graph->detachTrans( fromState, trans->toState, trans );
 292         graph->attachTrans( fromState, graph->startState, trans );
 293 }
 294
 295 void LongestMatch::runLonestMatch( ParseData *pd, FsmAp *graph )
 296 {
 297         graph->markReachableFromHereStopFinal( graph->startState );
 298         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 299                 if ( ms->stateBits & SB_ISMARKED ) {
 300                         ms->lmItemSet.insert( 0 );
 301                         ms->stateBits &= ~ SB_ISMARKED;
 302                 }
 303         }
 304
 305         /* Transfer the first item of non-empty lmAction tables to the item sets
 306          * of the states that follow. Exclude states that have no transitions out.
 307          * This must happen on a separate pass so that on each iteration of the
 308          * next pass we have the item set entries from all lmAction tables. */
 309         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
 310                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
 311                         if ( trans->lmActionTable.length() > 0 ) {
 312                                 LmActionTableEl *lmAct = trans->lmActionTable.data;
 313                                 StateAp *toState = trans->toState;
 314                                 assert( toState );
 315
 316                                 /* Check if there are transitions out, this may be a very
 317                                  * close approximation? Out transitions going nowhere?
 318                                  * FIXME: Check. */
 319                                 if ( toState->outList.length() > 0 ) {
 320                                         /* Fill the item sets. */
 321                                         graph->markReachableFromHereStopFinal( toState );
 322                                         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 323                                                 if ( ms->stateBits & SB_ISMARKED ) {
 324                                                         ms->lmItemSet.insert( lmAct->value );
 325                                                         ms->stateBits &= ~ SB_ISMARKED;
 326                                                 }
 327                                         }
 328                                 }
 329                         }
 330                 }
 331         }
 332
 333         /* The lmItem sets are now filled, telling us which longest match rules
 334          * can succeed in which states. First determine if we need to make sure
 335          * act is defaulted to zero. We need to do this if there are any states
 336          * with lmItemSet.length() > 1 and NULL is included. That is, that the
 337          * switch may get called when in fact nothing has been matched. */
 338         int maxItemSetLength = 0;
 339         graph->markReachableFromHereStopFinal( graph->startState );
 340         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 341                 if ( ms->stateBits & SB_ISMARKED ) {
 342                         if ( ms->lmItemSet.length() > maxItemSetLength )
 343                                 maxItemSetLength = ms->lmItemSet.length();
 344                         ms->stateBits &= ~ SB_ISMARKED;
 345                 }
 346         }
 347
 348         /* The actions executed on starting to match a token. */
 349         graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart );
 350         graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
 351         if ( maxItemSetLength > 1 ) {
 352                 /* The longest match action switch may be called when tokens are
 353                  * matched, in which case act must be initialized, there must be a
 354                  * case to handle the error, and the generated machine will require an
 355                  * error state. */
 356                 lmSwitchHandlesError = true;
 357                 pd->lmRequiresErrorState = true;
 358                 graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
 359         }
 360
 361         /* The place to store transitions to restart. It maybe possible for the
 362          * restarting to affect the searching through the graph that follows. For
 363          * now take the safe route and save the list of transitions to restart
 364          * until after all searching is done. */
 365         Vector<TransAp*> restartTrans;
 366
 367         /* Set actions that do immediate token recognition, set the longest match part
 368          * id and set the token ending. */
 369         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
 370                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
 371                         if ( trans->lmActionTable.length() > 0 ) {
 372                                 LmActionTableEl *lmAct = trans->lmActionTable.data;
 373                                 StateAp *toState = trans->toState;
 374                                 assert( toState );
 375
 376                                 /* Check if there are transitions out, this may be a very
 377                                  * close approximation? Out transitions going nowhere?
 378                                  * FIXME: Check. */
 379                                 if ( toState->outList.length() == 0 ) {
 380                                         /* Can execute the immediate action for the longest match
 381                                          * part. Redirect the action to the start state. */
 382                                         trans->actionTable.setAction( lmAct->key,
 383                                                         lmAct->value->actOnLast );
 384                                         restartTrans.append( trans );
 385                                 }
 386                                 else {
 387                                         /* Look for non final states that have a non-empty item
 388                                          * set. If these are present then we need to record the
 389                                          * end of the token.  Also Find the highest item set
 390                                          * length reachable from here (excluding at transtions to
 391                                          * final states). */
 392                                         bool nonFinalNonEmptyItemSet = false;
 393                                         maxItemSetLength = 0;
 394                                         graph->markReachableFromHereStopFinal( toState );
 395                                         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 396                                                 if ( ms->stateBits & SB_ISMARKED ) {
 397                                                         if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
 398                                                                 nonFinalNonEmptyItemSet = true;
 399                                                         if ( ms->lmItemSet.length() > maxItemSetLength )
 400                                                                 maxItemSetLength = ms->lmItemSet.length();
 401                                                         ms->stateBits &= ~ SB_ISMARKED;
 402                                                 }
 403                                         }
 404
 405                                         /* If there are reachable states that are not final and
 406                                          * have non empty item sets or that have an item set
 407                                          * length greater than one then we need to set tokend
 408                                          * because the error action that matches the token will
 409                                          * require it. */
 410                                         if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
 411                                                 trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
 412
 413                                         /* Some states may not know which longest match item to
 414                                          * execute, must set it. */
 415                                         if ( maxItemSetLength > 1 ) {
 416                                                 /* There are transitions out, another match may come. */
 417                                                 trans->actionTable.setAction( lmAct->key,
 418                                                                 lmAct->value->setActId );
 419                                         }
 420                                 }
 421                         }
 422                 }
 423         }
 424
 425         /* Now that all graph searching is done it certainly safe set the
 426          * restarting. It may be safe above, however this must be verified. */
 427         for ( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ )
 428                 restart( graph, *pt );
 429
 430         int lmErrActionOrd = pd->curActionOrd++;
 431
 432         /* Embed the error for recognizing a char. */
 433         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
 434                 if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
 435                         if ( st->isFinState() ) {
 436                                 /* On error execute the onActNext action, which knows that
 437                                  * the last character of the token was one back and restart. */
 438                                 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
 439                                                 &st->lmItemSet[0]->actOnNext, 1 );
 440                         }
 441                         else {
 442                                 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
 443                                                 &st->lmItemSet[0]->actLagBehind, 1 );
 444                         }
 445                 }
 446                 else if ( st->lmItemSet.length() > 1 ) {
 447                         /* Need to use the select. Take note of the which items the select
 448                          * is needed for so only the necessary actions are included. */
 449                         for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
 450                                 if ( *plmi != 0 )
 451                                         (*plmi)->inLmSelect = true;
 452                         }
 453                         /* On error, execute the action select and go to the start state. */
 454                         graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
 455                                         &lmActSelect, 1 );
 456                 }
 457         }
 458
 459         /* Finally, the start state should be made final. */
 460         graph->setFinState( graph->startState );
 461 }
 462
 463 FsmAp *LongestMatch::walk( ParseData *pd )
 464 {
 465         /* The longest match has it's own name scope. */
 466         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 467
 468         /* Make each part of the longest match. */
 469         FsmAp **parts = new FsmAp*[longestMatchList->length()];
 470         LmPartList::Iter lmi = *longestMatchList;
 471         for ( int i = 0; lmi.lte(); lmi++, i++ ) {
 472                 /* Create the machine and embed the setting of the longest match id. */
 473                 parts[i] = lmi->join->walk( pd );
 474                 parts[i]->longMatchAction( pd->curActionOrd++, lmi );
 475         }
 476
 477         /* Union machines one and up with machine zero. The grammar dictates that
 478          * there will always be at least one part. */
 479         FsmAp *rtnVal = parts[0];
 480         for ( int i = 1; i < longestMatchList->length(); i++ ) {
 481                 rtnVal->unionOp( parts[i] );
 482                 afterOpMinimize( rtnVal );
 483         }
 484
 485         runLonestMatch( pd, rtnVal );
 486
 487         /* Pop the name scope. */
 488         pd->popNameScope( nameFrame );
 489
 490         delete[] parts;
 491         return rtnVal;
 492 }
 493
 494 FsmAp *JoinOrLm::walk( ParseData *pd )
 495 {
 496         FsmAp *rtnVal = 0;
 497         switch ( type ) {
 498         case JoinType:
 499                 rtnVal = join->walk( pd );
 500                 break;
 501         case LongestMatchType:
 502                 rtnVal = longestMatch->walk( pd );
 503                 break;
 504         }
 505         return rtnVal;
 506 }
 507
 508 void JoinOrLm::makeNameTree( ParseData *pd )
 509 {
 510         switch ( type ) {
 511         case JoinType:
 512                 join->makeNameTree( pd );
 513                 break;
 514         case LongestMatchType:
 515                 longestMatch->makeNameTree( pd );
 516                 break;
 517         }
 518 }
 519
 520 void JoinOrLm::resolveNameRefs( ParseData *pd )
 521 {
 522         switch ( type ) {
 523         case JoinType:
 524                 join->resolveNameRefs( pd );
 525                 break;
 526         case LongestMatchType:
 527                 longestMatch->resolveNameRefs( pd );
 528                 break;
 529         }
 530 }
 531
 532
 533 /* Construct with a location and the first expression. */
 534 Join::Join( const InputLoc &loc, Expression *expr )
 535 :
 536         loc(loc)
 537 {
 538         exprList.append( expr );
 539 }
 540
 541 /* Construct with a location and the first expression. */
 542 Join::Join( Expression *expr )
 543 :
 544         loc(loc)
 545 {
 546         exprList.append( expr );
 547 }
 548
 549 /* Walk an expression node. */
 550 FsmAp *Join::walk( ParseData *pd )
 551 {
 552         if ( exprList.length() > 1 )
 553                 return walkJoin( pd );
 554         else
 555                 return exprList.head->walk( pd );
 556 }
 557
 558 /* There is a list of expressions to join. */
 559 FsmAp *Join::walkJoin( ParseData *pd )
 560 {
 561         /* We enter into a new name scope. */
 562         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 563
 564         /* Evaluate the machines. */
 565         FsmAp **fsms = new FsmAp*[exprList.length()];
 566         ExprList::Iter expr = exprList;
 567         for ( int e = 0; e < exprList.length(); e++, expr++ )
 568                 fsms[e] = expr->walk( pd );
 569
 570         /* Get the start and final names. Final is
 571          * guaranteed to exist, start is not. */
 572         NameInst *startName = pd->curNameInst->start;
 573         NameInst *finalName = pd->curNameInst->final;
 574
 575         int startId = -1;
 576         if ( startName != 0 ) {
 577                 /* Take note that there was an implicit link to the start machine. */
 578                 pd->localNameScope->referencedNames.append( startName );
 579                 startId = startName->id;
 580         }
 581
 582         /* A final id of -1 indicates there is no epsilon that references the
 583          * final state, therefor do not create one or set an entry point to it. */
 584         int finalId = -1;
 585         if ( finalName->numRefs > 0 )
 586                 finalId = finalName->id;
 587
 588         /* Join machines 1 and up onto machine 0. */
 589         FsmAp *retFsm = fsms[0];
 590         retFsm->joinOp( startId, finalId, fsms+1, exprList.length()-1 );
 591
 592         /* We can now unset entry points that are not longer used. */
 593         pd->unsetObsoleteEntries( retFsm );
 594
 595         /* Pop the name scope. */
 596         pd->popNameScope( nameFrame );
 597
 598         delete[] fsms;
 599         return retFsm;
 600 }
 601
 602 void Join::makeNameTree( ParseData *pd )
 603 {
 604         if ( exprList.length() > 1 ) {
 605                 /* Create the new anonymous scope. */
 606                 NameInst *prevNameInst = pd->curNameInst;
 607                 pd->curNameInst = pd->addNameInst( loc, 0, false );
 608
 609                 /* Join scopes need an implicit "final" target. */
 610                 pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final",
 611                                 pd->nextNameId++, false );
 612
 613                 /* Recurse into all expressions in the list. */
 614                 for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
 615                         expr->makeNameTree( pd );
 616
 617                 /* The name scope ends, pop the name instantiation. */
 618                 pd->curNameInst = prevNameInst;
 619         }
 620         else {
 621                 /* Recurse into the single expression. */
 622                 exprList.head->makeNameTree( pd );
 623         }
 624 }
 625
 626
 627 void Join::resolveNameRefs( ParseData *pd )
 628 {
 629         /* Branch on whether or not there is to be a join. */
 630         if ( exprList.length() > 1 ) {
 631                 /* The variable definition enters a new scope. */
 632                 NameFrame nameFrame = pd->enterNameScope( true, 1 );
 633
 634                 /* The join scope must contain a start label. */
 635                 NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true );
 636                 if ( resolved.length() > 0 ) {
 637                         /* Take the first. */
 638                         pd->curNameInst->start = resolved[0];
 639                         if ( resolved.length() > 1 ) {
 640                                 /* Complain about the multiple references. */
 641                                 error(loc) << "multiple start labels" << endl;
 642                                 errorStateLabels( resolved );
 643                         }
 644                 }
 645
 646                 /* Make sure there is a start label. */
 647                 if ( pd->curNameInst->start != 0 ) {
 648                         /* There is an implicit reference to start name. */
 649                         pd->curNameInst->start->numRefs += 1;
 650                 }
 651                 else {
 652                         /* No start label. Complain and recover by adding a label to the
 653                          * adding one. Recover ignoring the problem. */
 654                         error(loc) << "no start label" << endl;
 655                 }
 656
 657                 /* Recurse into all expressions in the list. */
 658                 for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
 659                         expr->resolveNameRefs( pd );
 660
 661                 /* The name scope ends, pop the name instantiation. */
 662                 pd->popNameScope( nameFrame );
 663         }
 664         else {
 665                 /* Recurse into the single expression. */
 666                 exprList.head->resolveNameRefs( pd );
 667         }
 668 }
 669
 670 /* Clean up after an expression node. */
 671 Expression::~Expression()
 672 {
 673         switch ( type ) {
 674                 case OrType: case IntersectType: case SubtractType:
 675                 case StrongSubtractType:
 676                         delete expression;
 677                         delete term;
 678                         break;
 679                 case TermType:
 680                         delete term;
 681                         break;
 682                 case BuiltinType:
 683                         break;
 684         }
 685 }
 686
 687 /* Evaluate a single expression node. */
 688 FsmAp *Expression::walk( ParseData *pd, bool lastInSeq )
 689 {
 690         FsmAp *rtnVal = 0;
 691         switch ( type ) {
 692                 case OrType: {
 693                         /* Evaluate the expression. */
 694                         rtnVal = expression->walk( pd, false );
 695                         /* Evaluate the term. */
 696                         FsmAp *rhs = term->walk( pd );
 697                         /* Perform union. */
 698                         rtnVal->unionOp( rhs );
 699                         afterOpMinimize( rtnVal, lastInSeq );
 700                         break;
 701                 }
 702                 case IntersectType: {
 703                         /* Evaluate the expression. */
 704                         rtnVal = expression->walk( pd );
 705                         /* Evaluate the term. */
 706                         FsmAp *rhs = term->walk( pd );
 707                         /* Perform intersection. */
 708                         rtnVal->intersectOp( rhs );
 709                         afterOpMinimize( rtnVal, lastInSeq );
 710                         break;
 711                 }
 712                 case SubtractType: {
 713                         /* Evaluate the expression. */
 714                         rtnVal = expression->walk( pd );
 715                         /* Evaluate the term. */
 716                         FsmAp *rhs = term->walk( pd );
 717                         /* Perform subtraction. */
 718                         rtnVal->subtractOp( rhs );
 719                         afterOpMinimize( rtnVal, lastInSeq );
 720                         break;
 721                 }
 722                 case StrongSubtractType: {
 723                         /* Evaluate the expression. */
 724                         rtnVal = expression->walk( pd );
 725
 726                         /* Evaluate the term and pad it with any* machines. */
 727                         FsmAp *rhs = dotStarFsm( pd );
 728                         FsmAp *termFsm = term->walk( pd );
 729                         FsmAp *trailAnyStar = dotStarFsm( pd );
 730                         rhs->concatOp( termFsm );
 731                         rhs->concatOp( trailAnyStar );
 732
 733                         /* Perform subtraction. */
 734                         rtnVal->subtractOp( rhs );
 735                         afterOpMinimize( rtnVal, lastInSeq );
 736                         break;
 737                 }
 738                 case TermType: {
 739                         /* Return result of the term. */
 740                         rtnVal = term->walk( pd );
 741                         break;
 742                 }
 743                 case BuiltinType: {
 744                         /* Duplicate the builtin. */
 745                         rtnVal = makeBuiltin( builtin, pd );
 746                         break;
 747                 }
 748         }
 749
 750         return rtnVal;
 751 }
 752
 753 void Expression::makeNameTree( ParseData *pd )
 754 {
 755         switch ( type ) {
 756         case OrType:
 757         case IntersectType:
 758         case SubtractType:
 759         case StrongSubtractType:
 760                 expression->makeNameTree( pd );
 761                 term->makeNameTree( pd );
 762                 break;
 763         case TermType:
 764                 term->makeNameTree( pd );
 765                 break;
 766         case BuiltinType:
 767                 break;
 768         }
 769 }
 770
 771 void Expression::resolveNameRefs( ParseData *pd )
 772 {
 773         switch ( type ) {
 774         case OrType:
 775         case IntersectType:
 776         case SubtractType:
 777         case StrongSubtractType:
 778                 expression->resolveNameRefs( pd );
 779                 term->resolveNameRefs( pd );
 780                 break;
 781         case TermType:
 782                 term->resolveNameRefs( pd );
 783                 break;
 784         case BuiltinType:
 785                 break;
 786         }
 787 }
 788
 789 /* Clean up after a term node. */
 790 Term::~Term()
 791 {
 792         switch ( type ) {
 793                 case ConcatType:
 794                 case RightStartType:
 795                 case RightFinishType:
 796                 case LeftType:
 797                         delete term;
 798                         delete factorWithAug;
 799                         break;
 800                 case FactorWithAugType:
 801                         delete factorWithAug;
 802                         break;
 803         }
 804 }
 805
 806 /* Evaluate a term node. */
 807 FsmAp *Term::walk( ParseData *pd, bool lastInSeq )
 808 {
 809         FsmAp *rtnVal = 0;
 810         switch ( type ) {
 811                 case ConcatType: {
 812                         /* Evaluate the Term. */
 813                         rtnVal = term->walk( pd, false );
 814                         /* Evaluate the FactorWithRep. */
 815                         FsmAp *rhs = factorWithAug->walk( pd );
 816                         /* Perform concatenation. */
 817                         rtnVal->concatOp( rhs );
 818                         afterOpMinimize( rtnVal, lastInSeq );
 819                         break;
 820                 }
 821                 case RightStartType: {
 822                         /* Evaluate the Term. */
 823                         rtnVal = term->walk( pd );
 824
 825                         /* Evaluate the FactorWithRep. */
 826                         FsmAp *rhs = factorWithAug->walk( pd );
 827
 828                         /* Set up the priority descriptors. The left machine gets the
 829                          * lower priority where as the right get the higher start priority. */
 830                         priorDescs[0].key = pd->nextPriorKey++;
 831                         priorDescs[0].priority = 0;
 832                         rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
 833
 834                         /* The start transitions right machine get the higher priority.
 835                          * Use the same unique key. */
 836                         priorDescs[1].key = priorDescs[0].key;
 837                         priorDescs[1].priority = 1;
 838                         rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
 839
 840                         /* Perform concatenation. */
 841                         rtnVal->concatOp( rhs );
 842                         afterOpMinimize( rtnVal, lastInSeq );
 843                         break;
 844                 }
 845                 case RightFinishType: {
 846                         /* Evaluate the Term. */
 847                         rtnVal = term->walk( pd );
 848
 849                         /* Evaluate the FactorWithRep. */
 850                         FsmAp *rhs = factorWithAug->walk( pd );
 851
 852                         /* Set up the priority descriptors. The left machine gets the
 853                          * lower priority where as the finishing transitions to the right
 854                          * get the higher priority. */
 855                         priorDescs[0].key = pd->nextPriorKey++;
 856                         priorDescs[0].priority = 0;
 857                         rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
 858
 859                         /* The finishing transitions of the right machine get the higher
 860                          * priority. Use the same unique key. */
 861                         priorDescs[1].key = priorDescs[0].key;
 862                         priorDescs[1].priority = 1;
 863                         rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
 864
 865                         /* Perform concatenation. */
 866                         rtnVal->concatOp( rhs );
 867                         afterOpMinimize( rtnVal, lastInSeq );
 868                         break;
 869                 }
 870                 case LeftType: {
 871                         /* Evaluate the Term. */
 872                         rtnVal = term->walk( pd );
 873
 874                         /* Evaluate the FactorWithRep. */
 875                         FsmAp *rhs = factorWithAug->walk( pd );
 876
 877                         /* Set up the priority descriptors. The left machine gets the
 878                          * higher priority. */
 879                         priorDescs[0].key = pd->nextPriorKey++;
 880                         priorDescs[0].priority = 1;
 881                         rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
 882
 883                         /* The right machine gets the lower priority.  Since
 884                          * startTransPrior might unnecessarily increase the number of
 885                          * states during the state machine construction process (due to
 886                          * isolation), we use allTransPrior instead, which has the same
 887                          * effect. */
 888                         priorDescs[1].key = priorDescs[0].key;
 889                         priorDescs[1].priority = 0;
 890                         rhs->allTransPrior( pd->curPriorOrd++, &priorDescs[1] );
 891
 892                         /* Perform concatenation. */
 893                         rtnVal->concatOp( rhs );
 894                         afterOpMinimize( rtnVal, lastInSeq );
 895                         break;
 896                 }
 897                 case FactorWithAugType: {
 898                         rtnVal = factorWithAug->walk( pd );
 899                         break;
 900                 }
 901         }
 902         return rtnVal;
 903 }
 904
 905 void Term::makeNameTree( ParseData *pd )
 906 {
 907         switch ( type ) {
 908         case ConcatType:
 909         case RightStartType:
 910         case RightFinishType:
 911         case LeftType:
 912                 term->makeNameTree( pd );
 913                 factorWithAug->makeNameTree( pd );
 914                 break;
 915         case FactorWithAugType:
 916                 factorWithAug->makeNameTree( pd );
 917                 break;
 918         }
 919 }
 920
 921 void Term::resolveNameRefs( ParseData *pd )
 922 {
 923         switch ( type ) {
 924         case ConcatType:
 925         case RightStartType:
 926         case RightFinishType:
 927         case LeftType:
 928                 term->resolveNameRefs( pd );
 929                 factorWithAug->resolveNameRefs( pd );
 930                 break;
 931         case FactorWithAugType:
 932                 factorWithAug->resolveNameRefs( pd );
 933                 break;
 934         }
 935 }
 936
 937 /* Clean up after a factor with augmentation node. */
 938 FactorWithAug::~FactorWithAug()
 939 {
 940         delete factorWithRep;
 941
 942         /* Walk the vector of parser actions, deleting function names. */
 943
 944         /* Clean up priority descriptors. */
 945         if ( priorDescs != 0 )
 946                 delete[] priorDescs;
 947 }
 948
 949 void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd )
 950 {
 951         /* Assign actions. */
 952         for ( int i = 0; i < actions.length(); i++ )  {
 953                 switch ( actions[i].type ) {
 954                 /* Transition actions. */
 955                 case at_start:
 956                         graph->startFsmAction( actionOrd[i], actions[i].action );
 957                         afterOpMinimize( graph );
 958                         break;
 959                 case at_all:
 960                         graph->allTransAction( actionOrd[i], actions[i].action );
 961                         break;
 962                 case at_finish:
 963                         graph->finishFsmAction( actionOrd[i], actions[i].action );
 964                         break;
 965                 case at_leave:
 966                         graph->leaveFsmAction( actionOrd[i], actions[i].action );
 967                         break;
 968
 969                 /* Global error actions. */
 970                 case at_start_gbl_error:
 971                         graph->startErrorAction( actionOrd[i], actions[i].action, 0 );
 972                         afterOpMinimize( graph );
 973                         break;
 974                 case at_all_gbl_error:
 975                         graph->allErrorAction( actionOrd[i], actions[i].action, 0 );
 976                         break;
 977                 case at_final_gbl_error:
 978                         graph->finalErrorAction( actionOrd[i], actions[i].action, 0 );
 979                         break;
 980                 case at_not_start_gbl_error:
 981                         graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 );
 982                         break;
 983                 case at_not_final_gbl_error:
 984                         graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 );
 985                         break;
 986                 case at_middle_gbl_error:
 987                         graph->middleErrorAction( actionOrd[i], actions[i].action, 0 );
 988                         break;
 989
 990                 /* Local error actions. */
 991                 case at_start_local_error:
 992                         graph->startErrorAction( actionOrd[i], actions[i].action,
 993                                         actions[i].localErrKey );
 994                         afterOpMinimize( graph );
 995                         break;
 996                 case at_all_local_error:
 997                         graph->allErrorAction( actionOrd[i], actions[i].action,
 998                                         actions[i].localErrKey );
 999                         break;
1000                 case at_final_local_error:
1001                         graph->finalErrorAction( actionOrd[i], actions[i].action,
1002                                         actions[i].localErrKey );
1003                         break;
1004                 case at_not_start_local_error:
1005                         graph->notStartErrorAction( actionOrd[i], actions[i].action,
1006                                         actions[i].localErrKey );
1007                         break;
1008                 case at_not_final_local_error:
1009                         graph->notFinalErrorAction( actionOrd[i], actions[i].action,
1010                                         actions[i].localErrKey );
1011                         break;
1012                 case at_middle_local_error:
1013                         graph->middleErrorAction( actionOrd[i], actions[i].action,
1014                                         actions[i].localErrKey );
1015                         break;
1016
1017                 /* EOF actions. */
1018                 case at_start_eof:
1019                         graph->startEOFAction( actionOrd[i], actions[i].action );
1020                         afterOpMinimize( graph );
1021                         break;
1022                 case at_all_eof:
1023                         graph->allEOFAction( actionOrd[i], actions[i].action );
1024                         break;
1025                 case at_final_eof:
1026                         graph->finalEOFAction( actionOrd[i], actions[i].action );
1027                         break;
1028                 case at_not_start_eof:
1029                         graph->notStartEOFAction( actionOrd[i], actions[i].action );
1030                         break;
1031                 case at_not_final_eof:
1032                         graph->notFinalEOFAction( actionOrd[i], actions[i].action );
1033                         break;
1034                 case at_middle_eof:
1035                         graph->middleEOFAction( actionOrd[i], actions[i].action );
1036                         break;
1037
1038                 /* To State Actions. */
1039                 case at_start_to_state:
1040                         graph->startToStateAction( actionOrd[i], actions[i].action );
1041                         afterOpMinimize( graph );
1042                         break;
1043                 case at_all_to_state:
1044                         graph->allToStateAction( actionOrd[i], actions[i].action );
1045                         break;
1046                 case at_final_to_state:
1047                         graph->finalToStateAction( actionOrd[i], actions[i].action );
1048                         break;
1049                 case at_not_start_to_state:
1050                         graph->notStartToStateAction( actionOrd[i], actions[i].action );
1051                         break;
1052                 case at_not_final_to_state:
1053                         graph->notFinalToStateAction( actionOrd[i], actions[i].action );
1054                         break;
1055                 case at_middle_to_state:
1056                         graph->middleToStateAction( actionOrd[i], actions[i].action );
1057                         break;
1058
1059                 /* From State Actions. */
1060                 case at_start_from_state:
1061                         graph->startFromStateAction( actionOrd[i], actions[i].action );
1062                         afterOpMinimize( graph );
1063                         break;
1064                 case at_all_from_state:
1065                         graph->allFromStateAction( actionOrd[i], actions[i].action );
1066                         break;
1067                 case at_final_from_state:
1068                         graph->finalFromStateAction( actionOrd[i], actions[i].action );
1069                         break;
1070                 case at_not_start_from_state:
1071                         graph->notStartFromStateAction( actionOrd[i], actions[i].action );
1072                         break;
1073                 case at_not_final_from_state:
1074                         graph->notFinalFromStateAction( actionOrd[i], actions[i].action );
1075                         break;
1076                 case at_middle_from_state:
1077                         graph->middleFromStateAction( actionOrd[i], actions[i].action );
1078                         break;
1079
1080                 /* Remaining cases, prevented by the parser. */
1081                 default:
1082                         assert( false );
1083                         break;
1084                 }
1085         }
1086 }
1087
1088 void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd )
1089 {
1090         /* Assign priorities. */
1091         for ( int i = 0; i < priorityAugs.length(); i++ ) {
1092                 switch ( priorityAugs[i].type ) {
1093                 case at_start:
1094                         graph->startFsmPrior( priorOrd[i], &priorDescs[i]);
1095                         /* Start fsm priorities are a special case that may require
1096                          * minimization afterwards. */
1097                         afterOpMinimize( graph );
1098                         break;
1099                 case at_all:
1100                         graph->allTransPrior( priorOrd[i], &priorDescs[i] );
1101                         break;
1102                 case at_finish:
1103                         graph->finishFsmPrior( priorOrd[i], &priorDescs[i] );
1104                         break;
1105                 case at_leave:
1106                         graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] );
1107                         break;
1108
1109                 default:
1110                         /* Parser Prevents this case. */
1111                         break;
1112                 }
1113         }
1114 }
1115
1116 void FactorWithAug::assignConditions( FsmAp *graph )
1117 {
1118         for ( int i = 0; i < conditions.length(); i++ )  {
1119                 switch ( conditions[i].type ) {
1120                 /* Transition actions. */
1121                 case at_start:
1122                         graph->startFsmCondition( conditions[i].action );
1123                         afterOpMinimize( graph );
1124                         break;
1125                 case at_all:
1126                         graph->allTransCondition( conditions[i].action );
1127                         break;
1128                 case at_leave:
1129                         graph->leaveFsmCondition( conditions[i].action );
1130                         break;
1131                 default:
1132                         break;
1133                 }
1134         }
1135 }
1136
1137
1138 /* Evaluate a factor with augmentation node. */
1139 FsmAp *FactorWithAug::walk( ParseData *pd )
1140 {
1141         /* Enter into the scopes created for the labels. */
1142         NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1143
1144         /* Make the array of function orderings. */
1145         int *actionOrd = 0;
1146         if ( actions.length() > 0 )
1147                 actionOrd = new int[actions.length()];
1148
1149         /* First walk the list of actions, assigning order to all starting
1150          * actions. */
1151         for ( int i = 0; i < actions.length(); i++ ) {
1152                 if ( actions[i].type == at_start ||
1153                                 actions[i].type == at_start_gbl_error ||
1154                                 actions[i].type == at_start_local_error ||
1155                                 actions[i].type == at_start_to_state ||
1156                                 actions[i].type == at_start_from_state ||
1157                                 actions[i].type == at_start_eof )
1158                         actionOrd[i] = pd->curActionOrd++;
1159         }
1160
1161         /* Evaluate the factor with repetition. */
1162         FsmAp *rtnVal = factorWithRep->walk( pd );
1163
1164         /* Compute the remaining action orderings. */
1165         for ( int i = 0; i < actions.length(); i++ ) {
1166                 if ( actions[i].type != at_start &&
1167                                 actions[i].type != at_start_gbl_error &&
1168                                 actions[i].type != at_start_local_error &&
1169                                 actions[i].type != at_start_to_state &&
1170                                 actions[i].type != at_start_from_state &&
1171                                 actions[i].type != at_start_eof )
1172                         actionOrd[i] = pd->curActionOrd++;
1173         }
1174
1175         /* Embed conditions. */
1176         assignConditions( rtnVal );
1177
1178         /* Embed actions. */
1179         assignActions( pd, rtnVal , actionOrd );
1180
1181         /* Make the array of priority orderings. Orderings are local to this walk
1182          * of the factor with augmentation. */
1183         int *priorOrd = 0;
1184         if ( priorityAugs.length() > 0 )
1185                 priorOrd = new int[priorityAugs.length()];
1186
1187         /* Walk all priorities, assigning the priority ordering. */
1188         for ( int i = 0; i < priorityAugs.length(); i++ )
1189                 priorOrd[i] = pd->curPriorOrd++;
1190
1191         /* If the priority descriptors have not been made, make them now.  Make
1192          * priority descriptors for each priority asignment that will be passed to
1193          * the fsm. Used to keep track of the key, value and used bit. */
1194         if ( priorDescs == 0 && priorityAugs.length() > 0 ) {
1195                 priorDescs = new PriorDesc[priorityAugs.length()];
1196                 for ( int i = 0; i < priorityAugs.length(); i++ ) {
1197                         /* Init the prior descriptor for the priority setting. */
1198                         priorDescs[i].key = priorityAugs[i].priorKey;
1199                         priorDescs[i].priority = priorityAugs[i].priorValue;
1200                 }
1201         }
1202
1203         /* Assign priorities into the machine. */
1204         assignPriorities( rtnVal, priorOrd );
1205
1206         /* Assign epsilon transitions. */
1207         for ( int e = 0; e < epsilonLinks.length(); e++ ) {
1208                 /* Get the name, which may not exist. If it doesn't then silently
1209                  * ignore it because an error has already been reported. */
1210                 NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++];
1211                 if ( epTarg != 0 ) {
1212                         /* Make the epsilon transitions. */
1213                         rtnVal->epsilonTrans( epTarg->id );
1214
1215                         /* Note that we have made a link to the name. */
1216                         pd->localNameScope->referencedNames.append( epTarg );
1217                 }
1218         }
1219
1220         /* Set entry points for labels. */
1221         if ( labels.length() > 0 ) {
1222                 /* Pop the names. */
1223                 pd->resetNameScope( nameFrame );
1224
1225                 /* Make labels that are referenced into entry points. */
1226                 for ( int i = 0; i < labels.length(); i++ ) {
1227                         pd->enterNameScope( false, 1 );
1228
1229                         /* Will always be found. */
1230                         NameInst *name = pd->curNameInst;
1231
1232                         /* If the name is referenced then set the entry point. */
1233                         if ( name->numRefs > 0 )
1234                                 rtnVal->setEntry( name->id, rtnVal->startState );
1235                 }
1236
1237                 pd->popNameScope( nameFrame );
1238         }
1239
1240         if ( priorOrd != 0 )
1241                 delete[] priorOrd;
1242         if ( actionOrd != 0 )
1243                 delete[] actionOrd;
1244         return rtnVal;
1245 }
1246
1247 void FactorWithAug::makeNameTree( ParseData *pd )
1248 {
1249         /* Add the labels to the tree of instantiated names. Each label
1250          * makes a new scope. */
1251         NameInst *prevNameInst = pd->curNameInst;
1252         for ( int i = 0; i < labels.length(); i++ )
1253                 pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true );
1254
1255         /* Recurse, then pop the names. */
1256         factorWithRep->makeNameTree( pd );
1257         pd->curNameInst = prevNameInst;
1258 }
1259
1260
1261 void FactorWithAug::resolveNameRefs( ParseData *pd )
1262 {
1263         /* Enter into the name scope created by any labels. */
1264         NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1265
1266         /* Note action references. */
1267         for ( int i = 0; i < actions.length(); i++ )
1268                 actions[i].action->actionRefs.append( pd->localNameScope );
1269
1270         /* Recurse first. IMPORTANT: we must do the exact same traversal as when
1271          * the tree is constructed. */
1272         factorWithRep->resolveNameRefs( pd );
1273
1274         /* Resolve epsilon transitions. */
1275         for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) {
1276                 /* Get the link. */
1277                 EpsilonLink &link = epsilonLinks[ep];
1278                 NameInst *resolvedName = 0;
1279
1280                 if ( link.target.length() == 1 && strcmp( link.target.data[0], "final" ) == 0 ) {
1281                         /* Epsilon drawn to an implicit final state. An implicit final is
1282                          * only available in join operations. */
1283                         resolvedName = pd->localNameScope->final;
1284                 }
1285                 else {
1286                         /* Do an search for the name. */
1287                         NameSet resolved;
1288                         pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 );
1289                         if ( resolved.length() > 0 ) {
1290                                 /* Take the first one. */
1291                                 resolvedName = resolved[0];
1292                                 if ( resolved.length() > 1 ) {
1293                                         /* Complain about the multiple references. */
1294                                         error(link.loc) << "state reference " << link.target <<
1295                                                         " resolves to multiple entry points" << endl;
1296                                         errorStateLabels( resolved );
1297                                 }
1298                         }
1299                 }
1300
1301                 /* This is tricky, we stuff resolved epsilon transitions into one long
1302                  * vector in the parse data structure. Since the name resolution and
1303                  * graph generation both do identical walks of the parse tree we
1304                  * should always find the link resolutions in the right place.  */
1305                 pd->epsilonResolvedLinks.append( resolvedName );
1306
1307                 if ( resolvedName != 0 ) {
1308                         /* Found the name, bump of the reference count on it. */
1309                         resolvedName->numRefs += 1;
1310                 }
1311                 else {
1312                         /* Complain, no recovery action, the epsilon op will ignore any
1313                          * epsilon transitions whose names did not resolve. */
1314                         error(link.loc) << "could not resolve label " << link.target << endl;
1315                 }
1316         }
1317
1318         if ( labels.length() > 0 )
1319                 pd->popNameScope( nameFrame );
1320 }
1321
1322
1323 /* Clean up after a factor with repetition node. */
1324 FactorWithRep::~FactorWithRep()
1325 {
1326         switch ( type ) {
1327                 case StarType: case StarStarType: case OptionalType: case PlusType:
1328                 case ExactType: case MaxType: case MinType: case RangeType:
1329                         delete factorWithRep;
1330                         break;
1331                 case FactorWithNegType:
1332                         delete factorWithNeg;
1333                         break;
1334         }
1335 }
1336
1337 /* Evaluate a factor with repetition node. */
1338 FsmAp *FactorWithRep::walk( ParseData *pd )
1339 {
1340         FsmAp *retFsm = 0;
1341
1342         switch ( type ) {
1343         case StarType: {
1344                 /* Evaluate the FactorWithRep. */
1345                 retFsm = factorWithRep->walk( pd );
1346                 if ( retFsm->startState->isFinState() ) {
1347                         warning(loc) << "applying kleene star to a machine that "
1348                                         "accepts zero length word" << endl;
1349                 }
1350
1351                 /* Shift over the start action orders then do the kleene star. */
1352                 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1353                 retFsm->starOp( );
1354                 afterOpMinimize( retFsm );
1355                 break;
1356         }
1357         case StarStarType: {
1358                 /* Evaluate the FactorWithRep. */
1359                 retFsm = factorWithRep->walk( pd );
1360                 if ( retFsm->startState->isFinState() ) {
1361                         warning(loc) << "applying kleene star to a machine that "
1362                                         "accepts zero length word" << endl;
1363                 }
1364
1365                 /* Set up the prior descs. All gets priority one, whereas leaving gets
1366                  * priority zero. Make a unique key so that these priorities don't
1367                  * interfere with any priorities set by the user. */
1368                 priorDescs[0].key = pd->nextPriorKey++;
1369                 priorDescs[0].priority = 1;
1370                 retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
1371
1372                 /* Leaveing gets priority 0. Use same unique key. */
1373                 priorDescs[1].key = priorDescs[0].key;
1374                 priorDescs[1].priority = 0;
1375                 retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
1376
1377                 /* Shift over the start action orders then do the kleene star. */
1378                 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1379                 retFsm->starOp( );
1380                 afterOpMinimize( retFsm );
1381                 break;
1382         }
1383         case OptionalType: {
1384                 /* Make the null fsm. */
1385                 FsmAp *nu = new FsmAp();
1386                 nu->lambdaFsm( );
1387
1388                 /* Evaluate the FactorWithRep. */
1389                 retFsm = factorWithRep->walk( pd );
1390
1391                 /* Perform the question operator. */
1392                 retFsm->unionOp( nu );
1393                 afterOpMinimize( retFsm );
1394                 break;
1395         }
1396         case PlusType: {
1397                 /* Evaluate the FactorWithRep. */
1398                 retFsm = factorWithRep->walk( pd );
1399                 if ( retFsm->startState->isFinState() ) {
1400                         warning(loc) << "applying plus operator to a machine that "
1401                                         "accpets zero length word" << endl;
1402                 }
1403
1404                 /* Need a duplicated for the star end. */
1405                 FsmAp *dup = new FsmAp( *retFsm );
1406
1407                 /* The start func orders need to be shifted before doing the star. */
1408                 pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd );
1409
1410                 /* Star the duplicate. */
1411                 dup->starOp( );
1412                 afterOpMinimize( dup );
1413
1414                 retFsm->concatOp( dup );
1415                 afterOpMinimize( retFsm );
1416                 break;
1417         }
1418         case ExactType: {
1419                 /* Get an int from the repetition amount. */
1420                 if ( lowerRep == 0 ) {
1421                         /* No copies. Don't need to evaluate the factorWithRep.
1422                          * This Defeats the purpose so give a warning. */
1423                         warning(loc) << "exactly zero repetitions results "
1424                                         "in the null machine" << endl;
1425
1426                         retFsm = new FsmAp();
1427                         retFsm->lambdaFsm();
1428                 }
1429                 else {
1430                         /* Evaluate the first FactorWithRep. */
1431                         retFsm = factorWithRep->walk( pd );
1432                         if ( retFsm->startState->isFinState() ) {
1433                                 warning(loc) << "applying repetition to a machine that "
1434                                                 "accepts zero length word" << endl;
1435                         }
1436
1437                         /* The start func orders need to be shifted before doing the
1438                          * repetition. */
1439                         pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1440
1441                         /* Do the repetition on the machine. Already guarded against n == 0 */
1442                         retFsm->repeatOp( lowerRep );
1443                         afterOpMinimize( retFsm );
1444                 }
1445                 break;
1446         }
1447         case MaxType: {
1448                 /* Get an int from the repetition amount. */
1449                 if ( upperRep == 0 ) {
1450                         /* No copies. Don't need to evaluate the factorWithRep.
1451                          * This Defeats the purpose so give a warning. */
1452                         warning(loc) << "max zero repetitions results "
1453                                         "in the null machine" << endl;
1454
1455                         retFsm = new FsmAp();
1456                         retFsm->lambdaFsm();
1457                 }
1458                 else {
1459                         /* Evaluate the first FactorWithRep. */
1460                         retFsm = factorWithRep->walk( pd );
1461                         if ( retFsm->startState->isFinState() ) {
1462                                 warning(loc) << "applying max repetition to a machine that "
1463                                                 "accepts zero length word" << endl;
1464                         }
1465
1466                         /* The start func orders need to be shifted before doing the
1467                          * repetition. */
1468                         pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1469
1470                         /* Do the repetition on the machine. Already guarded against n == 0 */
1471                         retFsm->optionalRepeatOp( upperRep );
1472                         afterOpMinimize( retFsm );
1473                 }
1474                 break;
1475         }
1476         case MinType: {
1477                 /* Evaluate the repeated machine. */
1478                 retFsm = factorWithRep->walk( pd );
1479                 if ( retFsm->startState->isFinState() ) {
1480                         warning(loc) << "applying min repetition to a machine that "
1481                                         "accepts zero length word" << endl;
1482                 }
1483
1484                 /* The start func orders need to be shifted before doing the repetition
1485                  * and the kleene star. */
1486                 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1487
1488                 if ( lowerRep == 0 ) {
1489                         /* Acts just like a star op on the machine to return. */
1490                         retFsm->starOp( );
1491                         afterOpMinimize( retFsm );
1492                 }
1493                 else {
1494                         /* Take a duplicate for the plus. */
1495                         FsmAp *dup = new FsmAp( *retFsm );
1496
1497                         /* Do repetition on the first half. */
1498                         retFsm->repeatOp( lowerRep );
1499                         afterOpMinimize( retFsm );
1500
1501                         /* Star the duplicate. */
1502                         dup->starOp( );
1503                         afterOpMinimize( dup );
1504
1505                         /* Tak on the kleene star. */
1506                         retFsm->concatOp( dup );
1507                         afterOpMinimize( retFsm );
1508                 }
1509                 break;
1510         }
1511         case RangeType: {
1512                 /* Check for bogus range. */
1513                 if ( upperRep - lowerRep < 0 ) {
1514                         error(loc) << "invalid range repetition" << endl;
1515
1516                         /* Return null machine as recovery. */
1517                         retFsm = new FsmAp();
1518                         retFsm->lambdaFsm();
1519                 }
1520                 else if ( lowerRep == 0 && upperRep == 0 ) {
1521                         /* No copies. Don't need to evaluate the factorWithRep.  This
1522                          * defeats the purpose so give a warning. */
1523                         warning(loc) << "zero to zero repetitions results "
1524                                         "in the null machine" << endl;
1525
1526                         retFsm = new FsmAp();
1527                         retFsm->lambdaFsm();
1528                 }
1529                 else {
1530                         /* Now need to evaluate the repeated machine. */
1531                         retFsm = factorWithRep->walk( pd );
1532                         if ( retFsm->startState->isFinState() ) {
1533                                 warning(loc) << "applying range repetition to a machine that "
1534                                                 "accepts zero length word" << endl;
1535                         }
1536
1537                         /* The start func orders need to be shifted before doing both kinds
1538                          * of repetition. */
1539                         pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1540
1541                         if ( lowerRep == 0 ) {
1542                                 /* Just doing max repetition. Already guarded against n == 0. */
1543                                 retFsm->optionalRepeatOp( upperRep );
1544                                 afterOpMinimize( retFsm );
1545                         }
1546                         else if ( lowerRep == upperRep ) {
1547                                 /* Just doing exact repetition. Already guarded against n == 0. */
1548                                 retFsm->repeatOp( lowerRep );
1549                                 afterOpMinimize( retFsm );
1550                         }
1551                         else {
1552                                 /* This is the case that 0 < lowerRep < upperRep. Take a
1553                                  * duplicate for the optional repeat. */
1554                                 FsmAp *dup = new FsmAp( *retFsm );
1555
1556                                 /* Do repetition on the first half. */
1557                                 retFsm->repeatOp( lowerRep );
1558                                 afterOpMinimize( retFsm );
1559
1560                                 /* Do optional repetition on the second half. */
1561                                 dup->optionalRepeatOp( upperRep - lowerRep );
1562                                 afterOpMinimize( dup );
1563
1564                                 /* Tak on the duplicate machine. */
1565                                 retFsm->concatOp( dup );
1566                                 afterOpMinimize( retFsm );
1567                         }
1568                 }
1569                 break;
1570         }
1571         case FactorWithNegType: {
1572                 /* Evaluate the Factor. Pass it up. */
1573                 retFsm = factorWithNeg->walk( pd );
1574                 break;
1575         }}
1576         return retFsm;
1577 }
1578
1579 void FactorWithRep::makeNameTree( ParseData *pd )
1580 {
1581         switch ( type ) {
1582         case StarType:
1583         case StarStarType:
1584         case OptionalType:
1585         case PlusType:
1586         case ExactType:
1587         case MaxType:
1588         case MinType:
1589         case RangeType:
1590                 factorWithRep->makeNameTree( pd );
1591                 break;
1592         case FactorWithNegType:
1593                 factorWithNeg->makeNameTree( pd );
1594                 break;
1595         }
1596 }
1597
1598 void FactorWithRep::resolveNameRefs( ParseData *pd )
1599 {
1600         switch ( type ) {
1601         case StarType:
1602         case StarStarType:
1603         case OptionalType:
1604         case PlusType:
1605         case ExactType:
1606         case MaxType:
1607         case MinType:
1608         case RangeType:
1609                 factorWithRep->resolveNameRefs( pd );
1610                 break;
1611         case FactorWithNegType:
1612                 factorWithNeg->resolveNameRefs( pd );
1613                 break;
1614         }
1615 }
1616
1617 /* Clean up after a factor with negation node. */
1618 FactorWithNeg::~FactorWithNeg()
1619 {
1620         switch ( type ) {
1621                 case NegateType:
1622                 case CharNegateType:
1623                         delete factorWithNeg;
1624                         break;
1625                 case FactorType:
1626                         delete factor;
1627                         break;
1628         }
1629 }
1630
1631 /* Evaluate a factor with negation node. */
1632 FsmAp *FactorWithNeg::walk( ParseData *pd )
1633 {
1634         FsmAp *retFsm = 0;
1635
1636         switch ( type ) {
1637         case NegateType: {
1638                 /* Evaluate the factorWithNeg. */
1639                 FsmAp *toNegate = factorWithNeg->walk( pd );
1640
1641                 /* Negation is subtract from dot-star. */
1642                 retFsm = dotStarFsm( pd );
1643                 retFsm->subtractOp( toNegate );
1644                 afterOpMinimize( retFsm );
1645                 break;
1646         }
1647         case CharNegateType: {
1648                 /* Evaluate the factorWithNeg. */
1649                 FsmAp *toNegate = factorWithNeg->walk( pd );
1650
1651                 /* CharNegation is subtract from dot. */
1652                 retFsm = dotFsm( pd );
1653                 retFsm->subtractOp( toNegate );
1654                 afterOpMinimize( retFsm );
1655                 break;
1656         }
1657         case FactorType: {
1658                 /* Evaluate the Factor. Pass it up. */
1659                 retFsm = factor->walk( pd );
1660                 break;
1661         }}
1662         return retFsm;
1663 }
1664
1665 void FactorWithNeg::makeNameTree( ParseData *pd )
1666 {
1667         switch ( type ) {
1668         case NegateType:
1669         case CharNegateType:
1670                 factorWithNeg->makeNameTree( pd );
1671                 break;
1672         case FactorType:
1673                 factor->makeNameTree( pd );
1674                 break;
1675         }
1676 }
1677
1678 void FactorWithNeg::resolveNameRefs( ParseData *pd )
1679 {
1680         switch ( type ) {
1681         case NegateType:
1682         case CharNegateType:
1683                 factorWithNeg->resolveNameRefs( pd );
1684                 break;
1685         case FactorType:
1686                 factor->resolveNameRefs( pd );
1687                 break;
1688         }
1689 }
1690
1691 /* Clean up after a factor node. */
1692 Factor::~Factor()
1693 {
1694         switch ( type ) {
1695                 case LiteralType:
1696                         delete literal;
1697                         break;
1698                 case RangeType:
1699                         delete range;
1700                         break;
1701                 case OrExprType:
1702                         delete reItem;
1703                         break;
1704                 case RegExprType:
1705                         delete regExpr;
1706                         break;
1707                 case ReferenceType:
1708                         break;
1709                 case ParenType:
1710                         delete join;
1711                         break;
1712                 case LongestMatchType:
1713                         delete longestMatch;
1714                         break;
1715         }
1716 }
1717
1718 /* Evaluate a factor node. */
1719 FsmAp *Factor::walk( ParseData *pd )
1720 {
1721         FsmAp *rtnVal = 0;
1722         switch ( type ) {
1723         case LiteralType:
1724                 rtnVal = literal->walk( pd );
1725                 break;
1726         case RangeType:
1727                 rtnVal = range->walk( pd );
1728                 break;
1729         case OrExprType:
1730                 rtnVal = reItem->walk( pd, 0 );
1731                 break;
1732         case RegExprType:
1733                 rtnVal = regExpr->walk( pd, 0 );
1734                 break;
1735         case ReferenceType:
1736                 rtnVal = varDef->walk( pd );
1737                 break;
1738         case ParenType:
1739                 rtnVal = join->walk( pd );
1740                 break;
1741         case LongestMatchType:
1742                 rtnVal = longestMatch->walk( pd );
1743                 break;
1744         }
1745
1746         return rtnVal;
1747 }
1748
1749 void Factor::makeNameTree( ParseData *pd )
1750 {
1751         switch ( type ) {
1752         case LiteralType:
1753         case RangeType:
1754         case OrExprType:
1755         case RegExprType:
1756                 break;
1757         case ReferenceType:
1758                 varDef->makeNameTree( loc, pd );
1759                 break;
1760         case ParenType:
1761                 join->makeNameTree( pd );
1762                 break;
1763         case LongestMatchType:
1764                 longestMatch->makeNameTree( pd );
1765                 break;
1766         }
1767 }
1768
1769 void Factor::resolveNameRefs( ParseData *pd )
1770 {
1771         switch ( type ) {
1772         case LiteralType:
1773         case RangeType:
1774         case OrExprType:
1775         case RegExprType:
1776                 break;
1777         case ReferenceType:
1778                 varDef->resolveNameRefs( pd );
1779                 break;
1780         case ParenType:
1781                 join->resolveNameRefs( pd );
1782                 break;
1783         case LongestMatchType:
1784                 longestMatch->resolveNameRefs( pd );
1785                 break;
1786         }
1787 }
1788
1789 /* Clean up a range object. Must delete the two literals. */
1790 Range::~Range()
1791 {
1792         delete lowerLit;
1793         delete upperLit;
1794 }
1795
1796 bool Range::verifyRangeFsm( FsmAp *rangeEnd )
1797 {
1798         /* Must have two states. */
1799         if ( rangeEnd->stateList.length() != 2 )
1800                 return false;
1801         /* The start state cannot be final. */
1802         if ( rangeEnd->startState->isFinState() )
1803                 return false;
1804         /* There should be only one final state. */
1805         if ( rangeEnd->finStateSet.length() != 1 )
1806                 return false;
1807         /* The final state cannot have any transitions out. */
1808         if ( rangeEnd->finStateSet[0]->outList.length() != 0 )
1809                 return false;
1810         /* The start state should have only one transition out. */
1811         if ( rangeEnd->startState->outList.length() != 1 )
1812                 return false;
1813         /* The singe transition out of the start state should not be a range. */
1814         TransAp *startTrans = rangeEnd->startState->outList.head;
1815         if ( startTrans->lowKey != startTrans->highKey )
1816                 return false;
1817         return true;
1818 }
1819
1820 /* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
1821 FsmAp *Range::walk( ParseData *pd )
1822 {
1823         /* Construct and verify the suitability of the lower end of the range. */
1824         FsmAp *lowerFsm = lowerLit->walk( pd );
1825         if ( !verifyRangeFsm( lowerFsm ) ) {
1826                 error(lowerLit->token.loc) <<
1827                         "bad range lower end, must be a single character" << endl;
1828         }
1829
1830         /* Construct and verify the upper end. */
1831         FsmAp *upperFsm = upperLit->walk( pd );
1832         if ( !verifyRangeFsm( upperFsm ) ) {
1833                 error(upperLit->token.loc) <<
1834                         "bad range upper end, must be a single character" << endl;
1835         }
1836
1837         /* Grab the keys from the machines, then delete them. */
1838         Key lowKey = lowerFsm->startState->outList.head->lowKey;
1839         Key highKey = upperFsm->startState->outList.head->lowKey;
1840         delete lowerFsm;
1841         delete upperFsm;
1842
1843         /* Validate the range. */
1844         if ( lowKey > highKey ) {
1845                 /* Recover by setting upper to lower; */
1846                 error(lowerLit->token.loc) << "lower end of range is greater then upper end" << endl;
1847                 highKey = lowKey;
1848         }
1849
1850         /* Return the range now that it is validated. */
1851         FsmAp *retFsm = new FsmAp();
1852         retFsm->rangeFsm( lowKey, highKey );
1853         return retFsm;
1854 }
1855
1856 /* Evaluate a literal object. */
1857 FsmAp *Literal::walk( ParseData *pd )
1858 {
1859         /* FsmAp to return, is the alphabet signed. */
1860         FsmAp *rtnVal = 0;
1861
1862         switch ( type ) {
1863         case Number: {
1864                 /* Make the fsm key in int format. */
1865                 Key fsmKey = makeFsmKeyNum( token.data, token.loc, pd );
1866                 /* Make the new machine. */
1867                 rtnVal = new FsmAp();
1868                 rtnVal->concatFsm( fsmKey );
1869                 break;
1870         }
1871         case LitString: {
1872                 /* Make the array of keys in int format. */
1873                 Token interp;
1874                 bool caseInsensitive;
1875                 token.prepareLitString( interp, caseInsensitive );
1876                 Key *arr = new Key[interp.length];
1877                 makeFsmKeyArray( arr, interp.data, interp.length, pd );
1878
1879                 /* Make the new machine. */
1880                 rtnVal = new FsmAp();
1881                 if ( caseInsensitive )
1882                         rtnVal->concatFsmCI( arr, interp.length );
1883                 else
1884                         rtnVal->concatFsm( arr, interp.length );
1885                 delete[] interp.data;
1886                 delete[] arr;
1887                 break;
1888         }}
1889         return rtnVal;
1890 }
1891
1892 /* Clean up after a regular expression object. */
1893 RegExpr::~RegExpr()
1894 {
1895         switch ( type ) {
1896                 case RecurseItem:
1897                         delete regExpr;
1898                         delete item;
1899                         break;
1900                 case Empty:
1901                         break;
1902         }
1903 }
1904
1905 /* Evaluate a regular expression object. */
1906 FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex )
1907 {
1908         /* This is the root regex, pass down a pointer to this. */
1909         if ( rootRegex == 0 )
1910                 rootRegex = this;
1911
1912         FsmAp *rtnVal = 0;
1913         switch ( type ) {
1914                 case RecurseItem: {
1915                         /* Walk both items. */
1916                         rtnVal = regExpr->walk( pd, rootRegex );
1917                         FsmAp *fsm2 = item->walk( pd, rootRegex );
1918                         rtnVal->concatOp( fsm2 );
1919                         break;
1920                 }
1921                 case Empty: {
1922                         rtnVal = new FsmAp();
1923                         rtnVal->lambdaFsm();
1924                         break;
1925                 }
1926         }
1927         return rtnVal;
1928 }
1929
1930 /* Clean up after an item in a regular expression. */
1931 ReItem::~ReItem()
1932 {
1933         switch ( type ) {
1934                 case Data:
1935                 case Dot:
1936                         break;
1937                 case OrBlock:
1938                 case NegOrBlock:
1939                         delete orBlock;
1940                         break;
1941         }
1942 }
1943
1944 /* Evaluate a regular expression object. */
1945 FsmAp *ReItem::walk( ParseData *pd, RegExpr *rootRegex )
1946 {
1947         /* The fsm to return, is the alphabet signed? */
1948         FsmAp *rtnVal = 0;
1949
1950         switch ( type ) {
1951                 case Data: {
1952                         /* Move the data into an integer array and make a concat fsm. */
1953                         Key *arr = new Key[token.length];
1954                         makeFsmKeyArray( arr, token.data, token.length, pd );
1955
1956                         /* Make the concat fsm. */
1957                         rtnVal = new FsmAp();
1958                         if ( rootRegex != 0 && rootRegex->caseInsensitive )
1959                                 rtnVal->concatFsmCI( arr, token.length );
1960                         else
1961                                 rtnVal->concatFsm( arr, token.length );
1962                         delete[] arr;
1963                         break;
1964                 }
1965                 case Dot: {
1966                         /* Make the dot fsm. */
1967                         rtnVal = dotFsm( pd );
1968                         break;
1969                 }
1970                 case OrBlock: {
1971                         /* Get the or block and minmize it. */
1972                         rtnVal = orBlock->walk( pd, rootRegex );
1973                         if ( rtnVal == 0 ) {
1974                                 rtnVal = new FsmAp();
1975                                 rtnVal->lambdaFsm();
1976                         }
1977                         rtnVal->minimizePartition2();
1978                         break;
1979                 }
1980                 case NegOrBlock: {
1981                         /* Get the or block and minimize it. */
1982                         FsmAp *fsm = orBlock->walk( pd, rootRegex );
1983                         fsm->minimizePartition2();
1984
1985                         /* Make a dot fsm and subtract from it. */
1986                         rtnVal = dotFsm( pd );
1987                         rtnVal->subtractOp( fsm );
1988                         rtnVal->minimizePartition2();
1989                         break;
1990                 }
1991         }
1992
1993         /* If the item is followed by a star, then apply the star op. */
1994         if ( star ) {
1995                 if ( rtnVal->startState->isFinState() ) {
1996                         warning(loc) << "applying kleene star to a machine that "
1997                                         "accpets zero length word" << endl;
1998                 }
1999
2000                 rtnVal->starOp();
2001                 rtnVal->minimizePartition2();
2002         }
2003         return rtnVal;
2004 }
2005
2006 /* Clean up after an or block of a regular expression. */
2007 ReOrBlock::~ReOrBlock()
2008 {
2009         switch ( type ) {
2010                 case RecurseItem:
2011                         delete orBlock;
2012                         delete item;
2013                         break;
2014                 case Empty:
2015                         break;
2016         }
2017 }
2018
2019
2020 /* Evaluate an or block of a regular expression. */
2021 FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex )
2022 {
2023         FsmAp *rtnVal = 0;
2024         switch ( type ) {
2025                 case RecurseItem: {
2026                         /* Evaluate the two fsm. */
2027                         FsmAp *fsm1 = orBlock->walk( pd, rootRegex );
2028                         FsmAp *fsm2 = item->walk( pd, rootRegex );
2029                         if ( fsm1 == 0 )
2030                                 rtnVal = fsm2;
2031                         else {
2032                                 fsm1->unionOp( fsm2 );
2033                                 rtnVal = fsm1;
2034                         }
2035                         break;
2036                 }
2037                 case Empty: {
2038                         rtnVal = 0;
2039                         break;
2040                 }
2041         }
2042         return rtnVal;;
2043 }
2044
2045 /* Evaluate an or block item of a regular expression. */
2046 FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex )
2047 {
2048         /* The return value, is the alphabet signed? */
2049         FsmAp *rtnVal = 0;
2050         switch ( type ) {
2051         case Data: {
2052                 /* Make the or machine. */
2053                 rtnVal = new FsmAp();
2054
2055                 /* Put the or data into an array of ints. Note that we find unique
2056                  * keys. Duplicates are silently ignored. The alternative would be to
2057                  * issue warning or an error but since we can't with [a0-9a] or 'a' |
2058                  * 'a' don't bother here. */
2059                 KeySet keySet;
2060                 makeFsmUniqueKeyArray( keySet, token.data, token.length,
2061                         rootRegex != 0 ? rootRegex->caseInsensitive : false, pd );
2062
2063                 /* Run the or operator. */
2064                 rtnVal->orFsm( keySet.data, keySet.length() );
2065                 break;
2066         }
2067         case Range: {
2068                 /* Make the upper and lower keys. */
2069                 Key lowKey = makeFsmKeyChar( lower, pd );
2070                 Key highKey = makeFsmKeyChar( upper, pd );
2071
2072                 /* Validate the range. */
2073                 if ( lowKey > highKey ) {
2074                         /* Recover by setting upper to lower; */
2075                         error(loc) << "lower end of range is greater then upper end" << endl;
2076                         highKey = lowKey;
2077                 }
2078
2079                 /* Make the range machine. */
2080                 rtnVal = new FsmAp();
2081                 rtnVal->rangeFsm( lowKey, highKey );
2082
2083                 if ( rootRegex != 0 && rootRegex->caseInsensitive ) {
2084                         if ( lowKey <= 'Z' && 'A' <= highKey ) {
2085                                 Key otherLow = lowKey < 'A' ? Key('A') : lowKey;
2086                                 Key otherHigh = 'Z' < highKey ? Key('Z') : highKey;
2087
2088                                 otherLow = 'a' + ( otherLow - 'A' );
2089                                 otherHigh = 'a' + ( otherHigh - 'A' );
2090
2091                                 FsmAp *otherRange = new FsmAp();
2092                                 otherRange->rangeFsm( otherLow, otherHigh );
2093                                 rtnVal->unionOp( otherRange );
2094                                 rtnVal->minimizePartition2();
2095                         }
2096                         else if ( lowKey <= 'z' && 'a' <= highKey ) {
2097                                 Key otherLow = lowKey < 'a' ? Key('a') : lowKey;
2098                                 Key otherHigh = 'z' < highKey ? Key('z') : highKey;
2099
2100                                 otherLow = 'A' + ( otherLow - 'a' );
2101                                 otherHigh = 'A' + ( otherHigh - 'a' );
2102
2103                                 FsmAp *otherRange = new FsmAp();
2104                                 otherRange->rangeFsm( otherLow, otherHigh );
2105                                 rtnVal->unionOp( otherRange );
2106                                 rtnVal->minimizePartition2();
2107                         }
2108                 }
2109
2110                 break;
2111         }}
2112         return rtnVal;
2113 }