ragel/parsetree.cpp

   1 /*
   2  *  Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
   3  */
   4
   5 /*  This file is part of Ragel.
   6  *
   7  *  Ragel is free software; you can redistribute it and/or modify
   8  *  it under the terms of the GNU General Public License as published by
   9  *  the Free Software Foundation; either version 2 of the License, or
  10  *  (at your option) any later version.
  11  *
  12  *  Ragel is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  *  GNU General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU General Public License
  18  *  along with Ragel; if not, write to the Free Software
  19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  */
  21
  22 #include <iostream>
  23 #include <iomanip>
  24 #include <errno.h>
  25 #include <limits.h>
  26 #include <stdlib.h>
  27
  28 /* Parsing. */
  29 #include "ragel.h"
  30 #include "rlparse.h"
  31 #include "parsetree.h"
  32
  33 using namespace std;
  34 ostream &operator<<( ostream &out, const NameRef &nameRef );
  35 ostream &operator<<( ostream &out, const NameInst &nameInst );
  36
  37 /* Convert the literal string which comes in from the scanner into an array of
  38  * characters with escapes and options interpreted. Also null terminates the
  39  * string. Though this null termination should not be relied on for
  40  * interpreting literals in the parser because the string may contain a
  41  * literal string with \0 */
  42 void Token::prepareLitString( Token &result, bool &caseInsensitive )
  43 {
  44         result.data = new char[this->length+1];
  45         caseInsensitive = false;
  46
  47         char *src = this->data + 1;
  48         char *end = this->data + this->length - 1;
  49
  50         while ( *end != '\'' && *end != '\"' ) {
  51                 if ( *end == 'i' )
  52                         caseInsensitive = true;
  53                 else {
  54                         error( this->loc ) << "literal string '" << *end <<
  55                                         "' option not supported" << endl;
  56                 }
  57                 end -= 1;
  58         }
  59
  60         char *dest = result.data;
  61         int len = 0;
  62         while ( src != end ) {
  63                 if ( *src == '\\' ) {
  64                         switch ( src[1] ) {
  65                         case '0': dest[len++] = '\0'; break;
  66                         case 'a': dest[len++] = '\a'; break;
  67                         case 'b': dest[len++] = '\b'; break;
  68                         case 't': dest[len++] = '\t'; break;
  69                         case 'n': dest[len++] = '\n'; break;
  70                         case 'v': dest[len++] = '\v'; break;
  71                         case 'f': dest[len++] = '\f'; break;
  72                         case 'r': dest[len++] = '\r'; break;
  73                         case '\n':  break;
  74                         default: dest[len++] = src[1]; break;
  75                         }
  76                         src += 2;
  77                 }
  78                 else {
  79                         dest[len++] = *src++;
  80                 }
  81         }
  82         result.length = len;
  83         result.data[result.length] = 0;
  84 }
  85
  86
  87 FsmAp *VarDef::walk( ParseData *pd )
  88 {
  89         /* We enter into a new name scope. */
  90         NameFrame nameFrame = pd->enterNameScope( true, 1 );
  91
  92         /* Recurse on the expression. */
  93         FsmAp *rtnVal = joinOrLm->walk( pd );
  94
  95         /* Do the tranfer of local error actions. */
  96         LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
  97         if ( localErrDictEl != 0 ) {
  98                 for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
  99                         rtnVal->transferErrorActions( state, localErrDictEl->value );
 100         }
 101
 102         /* If the expression below is a join operation with multiple expressions
 103          * then it just had epsilon transisions resolved. If it is a join
 104          * with only a single expression then run the epsilon op now. */
 105         if ( joinOrLm->type == JoinOrLm::JoinType && joinOrLm->join->exprList.length() == 1 )
 106                 rtnVal->epsilonOp();
 107
 108         /* We can now unset entry points that are not longer used. */
 109         pd->unsetObsoleteEntries( rtnVal );
 110
 111         /* If the name of the variable is referenced then add the entry point to
 112          * the graph. */
 113         if ( pd->curNameInst->numRefs > 0 )
 114                 rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
 115
 116         /* Pop the name scope. */
 117         pd->popNameScope( nameFrame );
 118         return rtnVal;
 119 }
 120
 121 void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd )
 122 {
 123         /* The variable definition enters a new scope. */
 124         NameInst *prevNameInst = pd->curNameInst;
 125         pd->curNameInst = pd->addNameInst( loc, name, false );
 126
 127         if ( joinOrLm->type == JoinOrLm::LongestMatchType )
 128                 pd->curNameInst->isLongestMatch = true;
 129
 130         /* Recurse. */
 131         joinOrLm->makeNameTree( pd );
 132
 133         /* The name scope ends, pop the name instantiation. */
 134         pd->curNameInst = prevNameInst;
 135 }
 136
 137 void VarDef::resolveNameRefs( ParseData *pd )
 138 {
 139         /* Entering into a new scope. */
 140         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 141
 142         /* Recurse. */
 143         joinOrLm->resolveNameRefs( pd );
 144
 145         /* The name scope ends, pop the name instantiation. */
 146         pd->popNameScope( nameFrame );
 147 }
 148
 149 InputLoc LongestMatchPart::getLoc()
 150 {
 151         return action != 0 ? action->loc : semiLoc;
 152 }
 153
 154 /*
 155  * If there are any LMs then all of the following entry points must reset
 156  * tokstart:
 157  *
 158  *  1. fentry(StateRef)
 159  *  2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
 160  *  3. targt of any transition that has an fcall (the return loc).
 161  *  4. start state of all longest match routines.
 162  */
 163
 164 Action *LongestMatch::newAction( ParseData *pd, const InputLoc &loc,
 165                 char *name, InlineList *inlineList )
 166 {
 167         Action *action = new Action( loc, name, inlineList, pd->nextCondId++ );
 168         action->actionRefs.append( pd->curNameInst );
 169         pd->actionList.append( action );
 170         action->isLmAction = true;
 171         return action;
 172 }
 173
 174 void LongestMatch::makeActions( ParseData *pd )
 175 {
 176         /* Make actions that set the action id. */
 177         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 178                 /* For each part create actions for setting the match type.  We need
 179                  * to do this so that the actions will go into the actionIndex. */
 180                 InlineList *inlineList = new InlineList;
 181                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, InlineItem::LmSetActId ) );
 182                 char *actName = new char[50];
 183                 sprintf( actName, "store%i", lmi->longestMatchId );
 184                 lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
 185         }
 186
 187         /* Make actions that execute the user action and restart on the last character. */
 188         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 189                 /* For each part create actions for setting the match type.  We need
 190                  * to do this so that the actions will go into the actionIndex. */
 191                 InlineList *inlineList = new InlineList;
 192                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
 193                                 InlineItem::LmOnLast ) );
 194                 char *actName = new char[50];
 195                 sprintf( actName, "imm%i", lmi->longestMatchId );
 196                 lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
 197         }
 198
 199         /* Make actions that execute the user action and restart on the next
 200          * character.  These actions will set tokend themselves (it is the current
 201          * char). */
 202         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 203                 /* For each part create actions for setting the match type.  We need
 204                  * to do this so that the actions will go into the actionIndex. */
 205                 InlineList *inlineList = new InlineList;
 206                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
 207                                 InlineItem::LmOnNext ) );
 208                 char *actName = new char[50];
 209                 sprintf( actName, "lagh%i", lmi->longestMatchId );
 210                 lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
 211         }
 212
 213         /* Make actions that execute the user action and restart at tokend. These
 214          * actions execute some time after matching the last char. */
 215         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 216                 /* For each part create actions for setting the match type.  We need
 217                  * to do this so that the actions will go into the actionIndex. */
 218                 InlineList *inlineList = new InlineList;
 219                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
 220                                 InlineItem::LmOnLagBehind ) );
 221                 char *actName = new char[50];
 222                 sprintf( actName, "lag%i", lmi->longestMatchId );
 223                 lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList );
 224         }
 225
 226         InputLoc loc;
 227         loc.line = 1;
 228         loc.col = 1;
 229
 230         /* Create the error action. */
 231         InlineList *il6 = new InlineList;
 232         il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) );
 233         lmActSelect = newAction( pd, loc, "lagsel", il6 );
 234 }
 235
 236 void LongestMatch::findName( ParseData *pd )
 237 {
 238         NameInst *nameInst = pd->curNameInst;
 239         while ( nameInst->name == 0 ) {
 240                 nameInst = nameInst->parent;
 241                 /* Since every machine must must have a name, we should always find a
 242                  * name for the longest match. */
 243                 assert( nameInst != 0 );
 244         }
 245         name = nameInst->name;
 246 }
 247
 248 void LongestMatch::makeNameTree( ParseData *pd )
 249 {
 250         /* Create an anonymous scope for the longest match. Will be used for
 251          * restarting machine after matching a token. */
 252         NameInst *prevNameInst = pd->curNameInst;
 253         pd->curNameInst = pd->addNameInst( loc, 0, false );
 254
 255         /* Recurse into all parts of the longest match operator. */
 256         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ )
 257                 lmi->join->makeNameTree( pd );
 258
 259         /* Traverse the name tree upwards to find a name for this lm. */
 260         findName( pd );
 261
 262         /* Also make the longest match's actions at this point. */
 263         makeActions( pd );
 264
 265         /* The name scope ends, pop the name instantiation. */
 266         pd->curNameInst = prevNameInst;
 267 }
 268
 269 void LongestMatch::resolveNameRefs( ParseData *pd )
 270 {
 271         /* The longest match gets its own name scope. */
 272         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 273
 274         /* Take an action reference for each longest match item and recurse. */
 275         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 276                 /* Record the reference if the item has an action. */
 277                 if ( lmi->action != 0 )
 278                         lmi->action->actionRefs.append( pd->localNameScope );
 279
 280                 /* Recurse down the join. */
 281                 lmi->join->resolveNameRefs( pd );
 282         }
 283
 284         /* The name scope ends, pop the name instantiation. */
 285         pd->popNameScope( nameFrame );
 286 }
 287
 288 void LongestMatch::restart( FsmAp *graph, TransAp *trans )
 289 {
 290         StateAp *fromState = trans->fromState;
 291         graph->detachTrans( fromState, trans->toState, trans );
 292         graph->attachTrans( fromState, graph->startState, trans );
 293 }
 294
 295 void LongestMatch::runLonestMatch( ParseData *pd, FsmAp *graph )
 296 {
 297         graph->markReachableFromHereStopFinal( graph->startState );
 298         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 299                 if ( ms->stateBits & SB_ISMARKED ) {
 300                         ms->lmItemSet.insert( 0 );
 301                         ms->stateBits &= ~ SB_ISMARKED;
 302                 }
 303         }
 304
 305         /* Transfer the first item of non-empty lmAction tables to the item sets
 306          * of the states that follow. Exclude states that have no transitions out.
 307          * This must happen on a separate pass so that on each iteration of the
 308          * next pass we have the item set entries from all lmAction tables. */
 309         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
 310                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
 311                         if ( trans->lmActionTable.length() > 0 ) {
 312                                 LmActionTableEl *lmAct = trans->lmActionTable.data;
 313                                 StateAp *toState = trans->toState;
 314                                 assert( toState );
 315
 316                                 /* Check if there are transitions out, this may be a very
 317                                  * close approximation? Out transitions going nowhere?
 318                                  * FIXME: Check. */
 319                                 if ( toState->outList.length() > 0 ) {
 320                                         /* Fill the item sets. */
 321                                         graph->markReachableFromHereStopFinal( toState );
 322                                         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 323                                                 if ( ms->stateBits & SB_ISMARKED ) {
 324                                                         ms->lmItemSet.insert( lmAct->value );
 325                                                         ms->stateBits &= ~ SB_ISMARKED;
 326                                                 }
 327                                         }
 328                                 }
 329                         }
 330                 }
 331         }
 332
 333         /* The lmItem sets are now filled, telling us which longest match rules
 334          * can succeed in which states. First determine if we need to make sure
 335          * act is defaulted to zero. We need to do this if there are any states
 336          * with lmItemSet.length() > 1 and NULL is included. That is, that the
 337          * switch may get called when in fact nothing has been matched. */
 338         int maxItemSetLength = 0;
 339         graph->markReachableFromHereStopFinal( graph->startState );
 340         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 341                 if ( ms->stateBits & SB_ISMARKED ) {
 342                         if ( ms->lmItemSet.length() > maxItemSetLength )
 343                                 maxItemSetLength = ms->lmItemSet.length();
 344                         ms->stateBits &= ~ SB_ISMARKED;
 345                 }
 346         }
 347
 348         /* The actions executed on starting to match a token. */
 349         graph->isolateStartState();
 350         graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart );
 351         graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
 352         if ( maxItemSetLength > 1 ) {
 353                 /* The longest match action switch may be called when tokens are
 354                  * matched, in which case act must be initialized, there must be a
 355                  * case to handle the error, and the generated machine will require an
 356                  * error state. */
 357                 lmSwitchHandlesError = true;
 358                 pd->lmRequiresErrorState = true;
 359                 graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
 360         }
 361
 362         /* The place to store transitions to restart. It maybe possible for the
 363          * restarting to affect the searching through the graph that follows. For
 364          * now take the safe route and save the list of transitions to restart
 365          * until after all searching is done. */
 366         Vector<TransAp*> restartTrans;
 367
 368         /* Set actions that do immediate token recognition, set the longest match part
 369          * id and set the token ending. */
 370         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
 371                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
 372                         if ( trans->lmActionTable.length() > 0 ) {
 373                                 LmActionTableEl *lmAct = trans->lmActionTable.data;
 374                                 StateAp *toState = trans->toState;
 375                                 assert( toState );
 376
 377                                 /* Check if there are transitions out, this may be a very
 378                                  * close approximation? Out transitions going nowhere?
 379                                  * FIXME: Check. */
 380                                 if ( toState->outList.length() == 0 ) {
 381                                         /* Can execute the immediate action for the longest match
 382                                          * part. Redirect the action to the start state. */
 383                                         trans->actionTable.setAction( lmAct->key,
 384                                                         lmAct->value->actOnLast );
 385                                         restartTrans.append( trans );
 386                                 }
 387                                 else {
 388                                         /* Look for non final states that have a non-empty item
 389                                          * set. If these are present then we need to record the
 390                                          * end of the token.  Also Find the highest item set
 391                                          * length reachable from here (excluding at transtions to
 392                                          * final states). */
 393                                         bool nonFinalNonEmptyItemSet = false;
 394                                         maxItemSetLength = 0;
 395                                         graph->markReachableFromHereStopFinal( toState );
 396                                         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 397                                                 if ( ms->stateBits & SB_ISMARKED ) {
 398                                                         if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
 399                                                                 nonFinalNonEmptyItemSet = true;
 400                                                         if ( ms->lmItemSet.length() > maxItemSetLength )
 401                                                                 maxItemSetLength = ms->lmItemSet.length();
 402                                                         ms->stateBits &= ~ SB_ISMARKED;
 403                                                 }
 404                                         }
 405
 406                                         /* If there are reachable states that are not final and
 407                                          * have non empty item sets or that have an item set
 408                                          * length greater than one then we need to set tokend
 409                                          * because the error action that matches the token will
 410                                          * require it. */
 411                                         if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
 412                                                 trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
 413
 414                                         /* Some states may not know which longest match item to
 415                                          * execute, must set it. */
 416                                         if ( maxItemSetLength > 1 ) {
 417                                                 /* There are transitions out, another match may come. */
 418                                                 trans->actionTable.setAction( lmAct->key,
 419                                                                 lmAct->value->setActId );
 420                                         }
 421                                 }
 422                         }
 423                 }
 424         }
 425
 426         /* Now that all graph searching is done it certainly safe set the
 427          * restarting. It may be safe above, however this must be verified. */
 428         for ( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ )
 429                 restart( graph, *pt );
 430
 431         int lmErrActionOrd = pd->curActionOrd++;
 432
 433         /* Embed the error for recognizing a char. */
 434         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
 435                 if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
 436                         if ( st->isFinState() ) {
 437                                 /* On error execute the onActNext action, which knows that
 438                                  * the last character of the token was one back and restart. */
 439                                 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
 440                                                 &st->lmItemSet[0]->actOnNext, 1 );
 441                         }
 442                         else {
 443                                 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
 444                                                 &st->lmItemSet[0]->actLagBehind, 1 );
 445                         }
 446                 }
 447                 else if ( st->lmItemSet.length() > 1 ) {
 448                         /* Need to use the select. Take note of the which items the select
 449                          * is needed for so only the necessary actions are included. */
 450                         for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
 451                                 if ( *plmi != 0 )
 452                                         (*plmi)->inLmSelect = true;
 453                         }
 454                         /* On error, execute the action select and go to the start state. */
 455                         graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
 456                                         &lmActSelect, 1 );
 457                 }
 458         }
 459
 460         /* Finally, the start state should be made final. */
 461         graph->setFinState( graph->startState );
 462 }
 463
 464 FsmAp *LongestMatch::walk( ParseData *pd )
 465 {
 466         /* The longest match has it's own name scope. */
 467         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 468
 469         /* Make each part of the longest match. */
 470         FsmAp **parts = new FsmAp*[longestMatchList->length()];
 471         LmPartList::Iter lmi = *longestMatchList;
 472         for ( int i = 0; lmi.lte(); lmi++, i++ ) {
 473                 /* Create the machine and embed the setting of the longest match id. */
 474                 parts[i] = lmi->join->walk( pd );
 475                 parts[i]->longMatchAction( pd->curActionOrd++, lmi );
 476         }
 477
 478         /* Union machines one and up with machine zero. The grammar dictates that
 479          * there will always be at least one part. */
 480         FsmAp *rtnVal = parts[0];
 481         for ( int i = 1; i < longestMatchList->length(); i++ ) {
 482                 rtnVal->unionOp( parts[i] );
 483                 afterOpMinimize( rtnVal );
 484         }
 485
 486         runLonestMatch( pd, rtnVal );
 487
 488         /* Pop the name scope. */
 489         pd->popNameScope( nameFrame );
 490
 491         delete[] parts;
 492         return rtnVal;
 493 }
 494
 495 FsmAp *JoinOrLm::walk( ParseData *pd )
 496 {
 497         FsmAp *rtnVal = 0;
 498         switch ( type ) {
 499         case JoinType:
 500                 rtnVal = join->walk( pd );
 501                 break;
 502         case LongestMatchType:
 503                 rtnVal = longestMatch->walk( pd );
 504                 break;
 505         }
 506         return rtnVal;
 507 }
 508
 509 void JoinOrLm::makeNameTree( ParseData *pd )
 510 {
 511         switch ( type ) {
 512         case JoinType:
 513                 join->makeNameTree( pd );
 514                 break;
 515         case LongestMatchType:
 516                 longestMatch->makeNameTree( pd );
 517                 break;
 518         }
 519 }
 520
 521 void JoinOrLm::resolveNameRefs( ParseData *pd )
 522 {
 523         switch ( type ) {
 524         case JoinType:
 525                 join->resolveNameRefs( pd );
 526                 break;
 527         case LongestMatchType:
 528                 longestMatch->resolveNameRefs( pd );
 529                 break;
 530         }
 531 }
 532
 533
 534 /* Construct with a location and the first expression. */
 535 Join::Join( const InputLoc &loc, Expression *expr )
 536 :
 537         loc(loc)
 538 {
 539         exprList.append( expr );
 540 }
 541
 542 /* Construct with a location and the first expression. */
 543 Join::Join( Expression *expr )
 544 :
 545         loc(loc)
 546 {
 547         exprList.append( expr );
 548 }
 549
 550 /* Walk an expression node. */
 551 FsmAp *Join::walk( ParseData *pd )
 552 {
 553         if ( exprList.length() > 1 )
 554                 return walkJoin( pd );
 555         else
 556                 return exprList.head->walk( pd );
 557 }
 558
 559 /* There is a list of expressions to join. */
 560 FsmAp *Join::walkJoin( ParseData *pd )
 561 {
 562         /* We enter into a new name scope. */
 563         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 564
 565         /* Evaluate the machines. */
 566         FsmAp **fsms = new FsmAp*[exprList.length()];
 567         ExprList::Iter expr = exprList;
 568         for ( int e = 0; e < exprList.length(); e++, expr++ )
 569                 fsms[e] = expr->walk( pd );
 570
 571         /* Get the start and final names. Final is
 572          * guaranteed to exist, start is not. */
 573         NameInst *startName = pd->curNameInst->start;
 574         NameInst *finalName = pd->curNameInst->final;
 575
 576         int startId = -1;
 577         if ( startName != 0 ) {
 578                 /* Take note that there was an implicit link to the start machine. */
 579                 pd->localNameScope->referencedNames.append( startName );
 580                 startId = startName->id;
 581         }
 582
 583         /* A final id of -1 indicates there is no epsilon that references the
 584          * final state, therefor do not create one or set an entry point to it. */
 585         int finalId = -1;
 586         if ( finalName->numRefs > 0 )
 587                 finalId = finalName->id;
 588
 589         /* Join machines 1 and up onto machine 0. */
 590         FsmAp *retFsm = fsms[0];
 591         retFsm->joinOp( startId, finalId, fsms+1, exprList.length()-1 );
 592
 593         /* We can now unset entry points that are not longer used. */
 594         pd->unsetObsoleteEntries( retFsm );
 595
 596         /* Pop the name scope. */
 597         pd->popNameScope( nameFrame );
 598
 599         delete[] fsms;
 600         return retFsm;
 601 }
 602
 603 void Join::makeNameTree( ParseData *pd )
 604 {
 605         if ( exprList.length() > 1 ) {
 606                 /* Create the new anonymous scope. */
 607                 NameInst *prevNameInst = pd->curNameInst;
 608                 pd->curNameInst = pd->addNameInst( loc, 0, false );
 609
 610                 /* Join scopes need an implicit "final" target. */
 611                 pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final",
 612                                 pd->nextNameId++, false );
 613
 614                 /* Recurse into all expressions in the list. */
 615                 for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
 616                         expr->makeNameTree( pd );
 617
 618                 /* The name scope ends, pop the name instantiation. */
 619                 pd->curNameInst = prevNameInst;
 620         }
 621         else {
 622                 /* Recurse into the single expression. */
 623                 exprList.head->makeNameTree( pd );
 624         }
 625 }
 626
 627
 628 void Join::resolveNameRefs( ParseData *pd )
 629 {
 630         /* Branch on whether or not there is to be a join. */
 631         if ( exprList.length() > 1 ) {
 632                 /* The variable definition enters a new scope. */
 633                 NameFrame nameFrame = pd->enterNameScope( true, 1 );
 634
 635                 /* The join scope must contain a start label. */
 636                 NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true );
 637                 if ( resolved.length() > 0 ) {
 638                         /* Take the first. */
 639                         pd->curNameInst->start = resolved[0];
 640                         if ( resolved.length() > 1 ) {
 641                                 /* Complain about the multiple references. */
 642                                 error(loc) << "multiple start labels" << endl;
 643                                 errorStateLabels( resolved );
 644                         }
 645                 }
 646
 647                 /* Make sure there is a start label. */
 648                 if ( pd->curNameInst->start != 0 ) {
 649                         /* There is an implicit reference to start name. */
 650                         pd->curNameInst->start->numRefs += 1;
 651                 }
 652                 else {
 653                         /* No start label. Complain and recover by adding a label to the
 654                          * adding one. Recover ignoring the problem. */
 655                         error(loc) << "no start label" << endl;
 656                 }
 657
 658                 /* Recurse into all expressions in the list. */
 659                 for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
 660                         expr->resolveNameRefs( pd );
 661
 662                 /* The name scope ends, pop the name instantiation. */
 663                 pd->popNameScope( nameFrame );
 664         }
 665         else {
 666                 /* Recurse into the single expression. */
 667                 exprList.head->resolveNameRefs( pd );
 668         }
 669 }
 670
 671 /* Clean up after an expression node. */
 672 Expression::~Expression()
 673 {
 674         switch ( type ) {
 675                 case OrType: case IntersectType: case SubtractType:
 676                 case StrongSubtractType:
 677                         delete expression;
 678                         delete term;
 679                         break;
 680                 case TermType:
 681                         delete term;
 682                         break;
 683                 case BuiltinType:
 684                         break;
 685         }
 686 }
 687
 688 /* Evaluate a single expression node. */
 689 FsmAp *Expression::walk( ParseData *pd, bool lastInSeq )
 690 {
 691         FsmAp *rtnVal = 0;
 692         switch ( type ) {
 693                 case OrType: {
 694                         /* Evaluate the expression. */
 695                         rtnVal = expression->walk( pd, false );
 696                         /* Evaluate the term. */
 697                         FsmAp *rhs = term->walk( pd );
 698                         /* Perform union. */
 699                         rtnVal->unionOp( rhs );
 700                         afterOpMinimize( rtnVal, lastInSeq );
 701                         break;
 702                 }
 703                 case IntersectType: {
 704                         /* Evaluate the expression. */
 705                         rtnVal = expression->walk( pd );
 706                         /* Evaluate the term. */
 707                         FsmAp *rhs = term->walk( pd );
 708                         /* Perform intersection. */
 709                         rtnVal->intersectOp( rhs );
 710                         afterOpMinimize( rtnVal, lastInSeq );
 711                         break;
 712                 }
 713                 case SubtractType: {
 714                         /* Evaluate the expression. */
 715                         rtnVal = expression->walk( pd );
 716                         /* Evaluate the term. */
 717                         FsmAp *rhs = term->walk( pd );
 718                         /* Perform subtraction. */
 719                         rtnVal->subtractOp( rhs );
 720                         afterOpMinimize( rtnVal, lastInSeq );
 721                         break;
 722                 }
 723                 case StrongSubtractType: {
 724                         /* Evaluate the expression. */
 725                         rtnVal = expression->walk( pd );
 726
 727                         /* Evaluate the term and pad it with any* machines. */
 728                         FsmAp *rhs = dotStarFsm( pd );
 729                         FsmAp *termFsm = term->walk( pd );
 730                         FsmAp *trailAnyStar = dotStarFsm( pd );
 731                         rhs->concatOp( termFsm );
 732                         rhs->concatOp( trailAnyStar );
 733
 734                         /* Perform subtraction. */
 735                         rtnVal->subtractOp( rhs );
 736                         afterOpMinimize( rtnVal, lastInSeq );
 737                         break;
 738                 }
 739                 case TermType: {
 740                         /* Return result of the term. */
 741                         rtnVal = term->walk( pd );
 742                         break;
 743                 }
 744                 case BuiltinType: {
 745                         /* Duplicate the builtin. */
 746                         rtnVal = makeBuiltin( builtin, pd );
 747                         break;
 748                 }
 749         }
 750
 751         return rtnVal;
 752 }
 753
 754 void Expression::makeNameTree( ParseData *pd )
 755 {
 756         switch ( type ) {
 757         case OrType:
 758         case IntersectType:
 759         case SubtractType:
 760         case StrongSubtractType:
 761                 expression->makeNameTree( pd );
 762                 term->makeNameTree( pd );
 763                 break;
 764         case TermType:
 765                 term->makeNameTree( pd );
 766                 break;
 767         case BuiltinType:
 768                 break;
 769         }
 770 }
 771
 772 void Expression::resolveNameRefs( ParseData *pd )
 773 {
 774         switch ( type ) {
 775         case OrType:
 776         case IntersectType:
 777         case SubtractType:
 778         case StrongSubtractType:
 779                 expression->resolveNameRefs( pd );
 780                 term->resolveNameRefs( pd );
 781                 break;
 782         case TermType:
 783                 term->resolveNameRefs( pd );
 784                 break;
 785         case BuiltinType:
 786                 break;
 787         }
 788 }
 789
 790 /* Clean up after a term node. */
 791 Term::~Term()
 792 {
 793         switch ( type ) {
 794                 case ConcatType:
 795                 case RightStartType:
 796                 case RightFinishType:
 797                 case LeftType:
 798                         delete term;
 799                         delete factorWithAug;
 800                         break;
 801                 case FactorWithAugType:
 802                         delete factorWithAug;
 803                         break;
 804         }
 805 }
 806
 807 /* Evaluate a term node. */
 808 FsmAp *Term::walk( ParseData *pd, bool lastInSeq )
 809 {
 810         FsmAp *rtnVal = 0;
 811         switch ( type ) {
 812                 case ConcatType: {
 813                         /* Evaluate the Term. */
 814                         rtnVal = term->walk( pd, false );
 815                         /* Evaluate the FactorWithRep. */
 816                         FsmAp *rhs = factorWithAug->walk( pd );
 817                         /* Perform concatenation. */
 818                         rtnVal->concatOp( rhs );
 819                         afterOpMinimize( rtnVal, lastInSeq );
 820                         break;
 821                 }
 822                 case RightStartType: {
 823                         /* Evaluate the Term. */
 824                         rtnVal = term->walk( pd );
 825
 826                         /* Evaluate the FactorWithRep. */
 827                         FsmAp *rhs = factorWithAug->walk( pd );
 828
 829                         /* Set up the priority descriptors. The left machine gets the
 830                          * lower priority where as the right get the higher start priority. */
 831                         priorDescs[0].key = pd->nextPriorKey++;
 832                         priorDescs[0].priority = 0;
 833                         rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
 834
 835                         /* The start transitions right machine get the higher priority.
 836                          * Use the same unique key. */
 837                         priorDescs[1].key = priorDescs[0].key;
 838                         priorDescs[1].priority = 1;
 839                         rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
 840
 841                         /* Perform concatenation. */
 842                         rtnVal->concatOp( rhs );
 843                         afterOpMinimize( rtnVal, lastInSeq );
 844                         break;
 845                 }
 846                 case RightFinishType: {
 847                         /* Evaluate the Term. */
 848                         rtnVal = term->walk( pd );
 849
 850                         /* Evaluate the FactorWithRep. */
 851                         FsmAp *rhs = factorWithAug->walk( pd );
 852
 853                         /* Set up the priority descriptors. The left machine gets the
 854                          * lower priority where as the finishing transitions to the right
 855                          * get the higher priority. */
 856                         priorDescs[0].key = pd->nextPriorKey++;
 857                         priorDescs[0].priority = 0;
 858                         rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
 859
 860                         /* The finishing transitions of the right machine get the higher
 861                          * priority. Use the same unique key. */
 862                         priorDescs[1].key = priorDescs[0].key;
 863                         priorDescs[1].priority = 1;
 864                         rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
 865
 866                         /* Perform concatenation. */
 867                         rtnVal->concatOp( rhs );
 868                         afterOpMinimize( rtnVal, lastInSeq );
 869                         break;
 870                 }
 871                 case LeftType: {
 872                         /* Evaluate the Term. */
 873                         rtnVal = term->walk( pd );
 874
 875                         /* Evaluate the FactorWithRep. */
 876                         FsmAp *rhs = factorWithAug->walk( pd );
 877
 878                         /* Set up the priority descriptors. The left machine gets the
 879                          * higher priority. */
 880                         priorDescs[0].key = pd->nextPriorKey++;
 881                         priorDescs[0].priority = 1;
 882                         rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
 883
 884                         /* The right machine gets the lower priority.  Since
 885                          * startTransPrior might unnecessarily increase the number of
 886                          * states during the state machine construction process (due to
 887                          * isolation), we use allTransPrior instead, which has the same
 888                          * effect. */
 889                         priorDescs[1].key = priorDescs[0].key;
 890                         priorDescs[1].priority = 0;
 891                         rhs->allTransPrior( pd->curPriorOrd++, &priorDescs[1] );
 892
 893                         /* Perform concatenation. */
 894                         rtnVal->concatOp( rhs );
 895                         afterOpMinimize( rtnVal, lastInSeq );
 896                         break;
 897                 }
 898                 case FactorWithAugType: {
 899                         rtnVal = factorWithAug->walk( pd );
 900                         break;
 901                 }
 902         }
 903         return rtnVal;
 904 }
 905
 906 void Term::makeNameTree( ParseData *pd )
 907 {
 908         switch ( type ) {
 909         case ConcatType:
 910         case RightStartType:
 911         case RightFinishType:
 912         case LeftType:
 913                 term->makeNameTree( pd );
 914                 factorWithAug->makeNameTree( pd );
 915                 break;
 916         case FactorWithAugType:
 917                 factorWithAug->makeNameTree( pd );
 918                 break;
 919         }
 920 }
 921
 922 void Term::resolveNameRefs( ParseData *pd )
 923 {
 924         switch ( type ) {
 925         case ConcatType:
 926         case RightStartType:
 927         case RightFinishType:
 928         case LeftType:
 929                 term->resolveNameRefs( pd );
 930                 factorWithAug->resolveNameRefs( pd );
 931                 break;
 932         case FactorWithAugType:
 933                 factorWithAug->resolveNameRefs( pd );
 934                 break;
 935         }
 936 }
 937
 938 /* Clean up after a factor with augmentation node. */
 939 FactorWithAug::~FactorWithAug()
 940 {
 941         delete factorWithRep;
 942
 943         /* Walk the vector of parser actions, deleting function names. */
 944
 945         /* Clean up priority descriptors. */
 946         if ( priorDescs != 0 )
 947                 delete[] priorDescs;
 948 }
 949
 950 void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd )
 951 {
 952         /* Assign actions. */
 953         for ( int i = 0; i < actions.length(); i++ )  {
 954                 switch ( actions[i].type ) {
 955                 /* Transition actions. */
 956                 case at_start:
 957                         graph->startFsmAction( actionOrd[i], actions[i].action );
 958                         afterOpMinimize( graph );
 959                         break;
 960                 case at_all:
 961                         graph->allTransAction( actionOrd[i], actions[i].action );
 962                         break;
 963                 case at_finish:
 964                         graph->finishFsmAction( actionOrd[i], actions[i].action );
 965                         break;
 966                 case at_leave:
 967                         graph->leaveFsmAction( actionOrd[i], actions[i].action );
 968                         break;
 969
 970                 /* Global error actions. */
 971                 case at_start_gbl_error:
 972                         graph->startErrorAction( actionOrd[i], actions[i].action, 0 );
 973                         afterOpMinimize( graph );
 974                         break;
 975                 case at_all_gbl_error:
 976                         graph->allErrorAction( actionOrd[i], actions[i].action, 0 );
 977                         break;
 978                 case at_final_gbl_error:
 979                         graph->finalErrorAction( actionOrd[i], actions[i].action, 0 );
 980                         break;
 981                 case at_not_start_gbl_error:
 982                         graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 );
 983                         break;
 984                 case at_not_final_gbl_error:
 985                         graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 );
 986                         break;
 987                 case at_middle_gbl_error:
 988                         graph->middleErrorAction( actionOrd[i], actions[i].action, 0 );
 989                         break;
 990
 991                 /* Local error actions. */
 992                 case at_start_local_error:
 993                         graph->startErrorAction( actionOrd[i], actions[i].action,
 994                                         actions[i].localErrKey );
 995                         afterOpMinimize( graph );
 996                         break;
 997                 case at_all_local_error:
 998                         graph->allErrorAction( actionOrd[i], actions[i].action,
 999                                         actions[i].localErrKey );
1000                         break;
1001                 case at_final_local_error:
1002                         graph->finalErrorAction( actionOrd[i], actions[i].action,
1003                                         actions[i].localErrKey );
1004                         break;
1005                 case at_not_start_local_error:
1006                         graph->notStartErrorAction( actionOrd[i], actions[i].action,
1007                                         actions[i].localErrKey );
1008                         break;
1009                 case at_not_final_local_error:
1010                         graph->notFinalErrorAction( actionOrd[i], actions[i].action,
1011                                         actions[i].localErrKey );
1012                         break;
1013                 case at_middle_local_error:
1014                         graph->middleErrorAction( actionOrd[i], actions[i].action,
1015                                         actions[i].localErrKey );
1016                         break;
1017
1018                 /* EOF actions. */
1019                 case at_start_eof:
1020                         graph->startEOFAction( actionOrd[i], actions[i].action );
1021                         afterOpMinimize( graph );
1022                         break;
1023                 case at_all_eof:
1024                         graph->allEOFAction( actionOrd[i], actions[i].action );
1025                         break;
1026                 case at_final_eof:
1027                         graph->finalEOFAction( actionOrd[i], actions[i].action );
1028                         break;
1029                 case at_not_start_eof:
1030                         graph->notStartEOFAction( actionOrd[i], actions[i].action );
1031                         break;
1032                 case at_not_final_eof:
1033                         graph->notFinalEOFAction( actionOrd[i], actions[i].action );
1034                         break;
1035                 case at_middle_eof:
1036                         graph->middleEOFAction( actionOrd[i], actions[i].action );
1037                         break;
1038
1039                 /* To State Actions. */
1040                 case at_start_to_state:
1041                         graph->startToStateAction( actionOrd[i], actions[i].action );
1042                         afterOpMinimize( graph );
1043                         break;
1044                 case at_all_to_state:
1045                         graph->allToStateAction( actionOrd[i], actions[i].action );
1046                         break;
1047                 case at_final_to_state:
1048                         graph->finalToStateAction( actionOrd[i], actions[i].action );
1049                         break;
1050                 case at_not_start_to_state:
1051                         graph->notStartToStateAction( actionOrd[i], actions[i].action );
1052                         break;
1053                 case at_not_final_to_state:
1054                         graph->notFinalToStateAction( actionOrd[i], actions[i].action );
1055                         break;
1056                 case at_middle_to_state:
1057                         graph->middleToStateAction( actionOrd[i], actions[i].action );
1058                         break;
1059
1060                 /* From State Actions. */
1061                 case at_start_from_state:
1062                         graph->startFromStateAction( actionOrd[i], actions[i].action );
1063                         afterOpMinimize( graph );
1064                         break;
1065                 case at_all_from_state:
1066                         graph->allFromStateAction( actionOrd[i], actions[i].action );
1067                         break;
1068                 case at_final_from_state:
1069                         graph->finalFromStateAction( actionOrd[i], actions[i].action );
1070                         break;
1071                 case at_not_start_from_state:
1072                         graph->notStartFromStateAction( actionOrd[i], actions[i].action );
1073                         break;
1074                 case at_not_final_from_state:
1075                         graph->notFinalFromStateAction( actionOrd[i], actions[i].action );
1076                         break;
1077                 case at_middle_from_state:
1078                         graph->middleFromStateAction( actionOrd[i], actions[i].action );
1079                         break;
1080
1081                 /* Remaining cases, prevented by the parser. */
1082                 default:
1083                         assert( false );
1084                         break;
1085                 }
1086         }
1087 }
1088
1089 void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd )
1090 {
1091         /* Assign priorities. */
1092         for ( int i = 0; i < priorityAugs.length(); i++ ) {
1093                 switch ( priorityAugs[i].type ) {
1094                 case at_start:
1095                         graph->startFsmPrior( priorOrd[i], &priorDescs[i]);
1096                         /* Start fsm priorities are a special case that may require
1097                          * minimization afterwards. */
1098                         afterOpMinimize( graph );
1099                         break;
1100                 case at_all:
1101                         graph->allTransPrior( priorOrd[i], &priorDescs[i] );
1102                         break;
1103                 case at_finish:
1104                         graph->finishFsmPrior( priorOrd[i], &priorDescs[i] );
1105                         break;
1106                 case at_leave:
1107                         graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] );
1108                         break;
1109
1110                 default:
1111                         /* Parser Prevents this case. */
1112                         break;
1113                 }
1114         }
1115 }
1116
1117 void FactorWithAug::assignConditions( FsmAp *graph )
1118 {
1119         for ( int i = 0; i < conditions.length(); i++ )  {
1120                 switch ( conditions[i].type ) {
1121                 /* Transition actions. */
1122                 case at_start:
1123                         graph->startFsmCondition( conditions[i].action, conditions[i].sense );
1124                         afterOpMinimize( graph );
1125                         break;
1126                 case at_all:
1127                         graph->allTransCondition( conditions[i].action, conditions[i].sense );
1128                         break;
1129                 case at_leave:
1130                         graph->leaveFsmCondition( conditions[i].action, conditions[i].sense );
1131                         break;
1132                 default:
1133                         break;
1134                 }
1135         }
1136 }
1137
1138
1139 /* Evaluate a factor with augmentation node. */
1140 FsmAp *FactorWithAug::walk( ParseData *pd )
1141 {
1142         /* Enter into the scopes created for the labels. */
1143         NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1144
1145         /* Make the array of function orderings. */
1146         int *actionOrd = 0;
1147         if ( actions.length() > 0 )
1148                 actionOrd = new int[actions.length()];
1149
1150         /* First walk the list of actions, assigning order to all starting
1151          * actions. */
1152         for ( int i = 0; i < actions.length(); i++ ) {
1153                 if ( actions[i].type == at_start ||
1154                                 actions[i].type == at_start_gbl_error ||
1155                                 actions[i].type == at_start_local_error ||
1156                                 actions[i].type == at_start_to_state ||
1157                                 actions[i].type == at_start_from_state ||
1158                                 actions[i].type == at_start_eof )
1159                         actionOrd[i] = pd->curActionOrd++;
1160         }
1161
1162         /* Evaluate the factor with repetition. */
1163         FsmAp *rtnVal = factorWithRep->walk( pd );
1164
1165         /* Compute the remaining action orderings. */
1166         for ( int i = 0; i < actions.length(); i++ ) {
1167                 if ( actions[i].type != at_start &&
1168                                 actions[i].type != at_start_gbl_error &&
1169                                 actions[i].type != at_start_local_error &&
1170                                 actions[i].type != at_start_to_state &&
1171                                 actions[i].type != at_start_from_state &&
1172                                 actions[i].type != at_start_eof )
1173                         actionOrd[i] = pd->curActionOrd++;
1174         }
1175
1176         /* Embed conditions. */
1177         assignConditions( rtnVal );
1178
1179         /* Embed actions. */
1180         assignActions( pd, rtnVal , actionOrd );
1181
1182         /* Make the array of priority orderings. Orderings are local to this walk
1183          * of the factor with augmentation. */
1184         int *priorOrd = 0;
1185         if ( priorityAugs.length() > 0 )
1186                 priorOrd = new int[priorityAugs.length()];
1187
1188         /* Walk all priorities, assigning the priority ordering. */
1189         for ( int i = 0; i < priorityAugs.length(); i++ )
1190                 priorOrd[i] = pd->curPriorOrd++;
1191
1192         /* If the priority descriptors have not been made, make them now.  Make
1193          * priority descriptors for each priority asignment that will be passed to
1194          * the fsm. Used to keep track of the key, value and used bit. */
1195         if ( priorDescs == 0 && priorityAugs.length() > 0 ) {
1196                 priorDescs = new PriorDesc[priorityAugs.length()];
1197                 for ( int i = 0; i < priorityAugs.length(); i++ ) {
1198                         /* Init the prior descriptor for the priority setting. */
1199                         priorDescs[i].key = priorityAugs[i].priorKey;
1200                         priorDescs[i].priority = priorityAugs[i].priorValue;
1201                 }
1202         }
1203
1204         /* Assign priorities into the machine. */
1205         assignPriorities( rtnVal, priorOrd );
1206
1207         /* Assign epsilon transitions. */
1208         for ( int e = 0; e < epsilonLinks.length(); e++ ) {
1209                 /* Get the name, which may not exist. If it doesn't then silently
1210                  * ignore it because an error has already been reported. */
1211                 NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++];
1212                 if ( epTarg != 0 ) {
1213                         /* Make the epsilon transitions. */
1214                         rtnVal->epsilonTrans( epTarg->id );
1215
1216                         /* Note that we have made a link to the name. */
1217                         pd->localNameScope->referencedNames.append( epTarg );
1218                 }
1219         }
1220
1221         /* Set entry points for labels. */
1222         if ( labels.length() > 0 ) {
1223                 /* Pop the names. */
1224                 pd->resetNameScope( nameFrame );
1225
1226                 /* Make labels that are referenced into entry points. */
1227                 for ( int i = 0; i < labels.length(); i++ ) {
1228                         pd->enterNameScope( false, 1 );
1229
1230                         /* Will always be found. */
1231                         NameInst *name = pd->curNameInst;
1232
1233                         /* If the name is referenced then set the entry point. */
1234                         if ( name->numRefs > 0 )
1235                                 rtnVal->setEntry( name->id, rtnVal->startState );
1236                 }
1237
1238                 pd->popNameScope( nameFrame );
1239         }
1240
1241         if ( priorOrd != 0 )
1242                 delete[] priorOrd;
1243         if ( actionOrd != 0 )
1244                 delete[] actionOrd;
1245         return rtnVal;
1246 }
1247
1248 void FactorWithAug::makeNameTree( ParseData *pd )
1249 {
1250         /* Add the labels to the tree of instantiated names. Each label
1251          * makes a new scope. */
1252         NameInst *prevNameInst = pd->curNameInst;
1253         for ( int i = 0; i < labels.length(); i++ )
1254                 pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true );
1255
1256         /* Recurse, then pop the names. */
1257         factorWithRep->makeNameTree( pd );
1258         pd->curNameInst = prevNameInst;
1259 }
1260
1261
1262 void FactorWithAug::resolveNameRefs( ParseData *pd )
1263 {
1264         /* Enter into the name scope created by any labels. */
1265         NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1266
1267         /* Note action references. */
1268         for ( int i = 0; i < actions.length(); i++ )
1269                 actions[i].action->actionRefs.append( pd->localNameScope );
1270
1271         /* Recurse first. IMPORTANT: we must do the exact same traversal as when
1272          * the tree is constructed. */
1273         factorWithRep->resolveNameRefs( pd );
1274
1275         /* Resolve epsilon transitions. */
1276         for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) {
1277                 /* Get the link. */
1278                 EpsilonLink &link = epsilonLinks[ep];
1279                 NameInst *resolvedName = 0;
1280
1281                 if ( link.target.length() == 1 && strcmp( link.target.data[0], "final" ) == 0 ) {
1282                         /* Epsilon drawn to an implicit final state. An implicit final is
1283                          * only available in join operations. */
1284                         resolvedName = pd->localNameScope->final;
1285                 }
1286                 else {
1287                         /* Do an search for the name. */
1288                         NameSet resolved;
1289                         pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 );
1290                         if ( resolved.length() > 0 ) {
1291                                 /* Take the first one. */
1292                                 resolvedName = resolved[0];
1293                                 if ( resolved.length() > 1 ) {
1294                                         /* Complain about the multiple references. */
1295                                         error(link.loc) << "state reference " << link.target <<
1296                                                         " resolves to multiple entry points" << endl;
1297                                         errorStateLabels( resolved );
1298                                 }
1299                         }
1300                 }
1301
1302                 /* This is tricky, we stuff resolved epsilon transitions into one long
1303                  * vector in the parse data structure. Since the name resolution and
1304                  * graph generation both do identical walks of the parse tree we
1305                  * should always find the link resolutions in the right place.  */
1306                 pd->epsilonResolvedLinks.append( resolvedName );
1307
1308                 if ( resolvedName != 0 ) {
1309                         /* Found the name, bump of the reference count on it. */
1310                         resolvedName->numRefs += 1;
1311                 }
1312                 else {
1313                         /* Complain, no recovery action, the epsilon op will ignore any
1314                          * epsilon transitions whose names did not resolve. */
1315                         error(link.loc) << "could not resolve label " << link.target << endl;
1316                 }
1317         }
1318
1319         if ( labels.length() > 0 )
1320                 pd->popNameScope( nameFrame );
1321 }
1322
1323
1324 /* Clean up after a factor with repetition node. */
1325 FactorWithRep::~FactorWithRep()
1326 {
1327         switch ( type ) {
1328                 case StarType: case StarStarType: case OptionalType: case PlusType:
1329                 case ExactType: case MaxType: case MinType: case RangeType:
1330                         delete factorWithRep;
1331                         break;
1332                 case FactorWithNegType:
1333                         delete factorWithNeg;
1334                         break;
1335         }
1336 }
1337
1338 /* Evaluate a factor with repetition node. */
1339 FsmAp *FactorWithRep::walk( ParseData *pd )
1340 {
1341         FsmAp *retFsm = 0;
1342
1343         switch ( type ) {
1344         case StarType: {
1345                 /* Evaluate the FactorWithRep. */
1346                 retFsm = factorWithRep->walk( pd );
1347                 if ( retFsm->startState->isFinState() ) {
1348                         warning(loc) << "applying kleene star to a machine that "
1349                                         "accepts zero length word" << endl;
1350                 }
1351
1352                 /* Shift over the start action orders then do the kleene star. */
1353                 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1354                 retFsm->starOp( );
1355                 afterOpMinimize( retFsm );
1356                 break;
1357         }
1358         case StarStarType: {
1359                 /* Evaluate the FactorWithRep. */
1360                 retFsm = factorWithRep->walk( pd );
1361                 if ( retFsm->startState->isFinState() ) {
1362                         warning(loc) << "applying kleene star to a machine that "
1363                                         "accepts zero length word" << endl;
1364                 }
1365
1366                 /* Set up the prior descs. All gets priority one, whereas leaving gets
1367                  * priority zero. Make a unique key so that these priorities don't
1368                  * interfere with any priorities set by the user. */
1369                 priorDescs[0].key = pd->nextPriorKey++;
1370                 priorDescs[0].priority = 1;
1371                 retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
1372
1373                 /* Leaveing gets priority 0. Use same unique key. */
1374                 priorDescs[1].key = priorDescs[0].key;
1375                 priorDescs[1].priority = 0;
1376                 retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
1377
1378                 /* Shift over the start action orders then do the kleene star. */
1379                 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1380                 retFsm->starOp( );
1381                 afterOpMinimize( retFsm );
1382                 break;
1383         }
1384         case OptionalType: {
1385                 /* Make the null fsm. */
1386                 FsmAp *nu = new FsmAp();
1387                 nu->lambdaFsm( );
1388
1389                 /* Evaluate the FactorWithRep. */
1390                 retFsm = factorWithRep->walk( pd );
1391
1392                 /* Perform the question operator. */
1393                 retFsm->unionOp( nu );
1394                 afterOpMinimize( retFsm );
1395                 break;
1396         }
1397         case PlusType: {
1398                 /* Evaluate the FactorWithRep. */
1399                 retFsm = factorWithRep->walk( pd );
1400                 if ( retFsm->startState->isFinState() ) {
1401                         warning(loc) << "applying plus operator to a machine that "
1402                                         "accpets zero length word" << endl;
1403                 }
1404
1405                 /* Need a duplicated for the star end. */
1406                 FsmAp *dup = new FsmAp( *retFsm );
1407
1408                 /* The start func orders need to be shifted before doing the star. */
1409                 pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd );
1410
1411                 /* Star the duplicate. */
1412                 dup->starOp( );
1413                 afterOpMinimize( dup );
1414
1415                 retFsm->concatOp( dup );
1416                 afterOpMinimize( retFsm );
1417                 break;
1418         }
1419         case ExactType: {
1420                 /* Get an int from the repetition amount. */
1421                 if ( lowerRep == 0 ) {
1422                         /* No copies. Don't need to evaluate the factorWithRep.
1423                          * This Defeats the purpose so give a warning. */
1424                         warning(loc) << "exactly zero repetitions results "
1425                                         "in the null machine" << endl;
1426
1427                         retFsm = new FsmAp();
1428                         retFsm->lambdaFsm();
1429                 }
1430                 else {
1431                         /* Evaluate the first FactorWithRep. */
1432                         retFsm = factorWithRep->walk( pd );
1433                         if ( retFsm->startState->isFinState() ) {
1434                                 warning(loc) << "applying repetition to a machine that "
1435                                                 "accepts zero length word" << endl;
1436                         }
1437
1438                         /* The start func orders need to be shifted before doing the
1439                          * repetition. */
1440                         pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1441
1442                         /* Do the repetition on the machine. Already guarded against n == 0 */
1443                         retFsm->repeatOp( lowerRep );
1444                         afterOpMinimize( retFsm );
1445                 }
1446                 break;
1447         }
1448         case MaxType: {
1449                 /* Get an int from the repetition amount. */
1450                 if ( upperRep == 0 ) {
1451                         /* No copies. Don't need to evaluate the factorWithRep.
1452                          * This Defeats the purpose so give a warning. */
1453                         warning(loc) << "max zero repetitions results "
1454                                         "in the null machine" << endl;
1455
1456                         retFsm = new FsmAp();
1457                         retFsm->lambdaFsm();
1458                 }
1459                 else {
1460                         /* Evaluate the first FactorWithRep. */
1461                         retFsm = factorWithRep->walk( pd );
1462                         if ( retFsm->startState->isFinState() ) {
1463                                 warning(loc) << "applying max repetition to a machine that "
1464                                                 "accepts zero length word" << endl;
1465                         }
1466
1467                         /* The start func orders need to be shifted before doing the
1468                          * repetition. */
1469                         pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1470
1471                         /* Do the repetition on the machine. Already guarded against n == 0 */
1472                         retFsm->optionalRepeatOp( upperRep );
1473                         afterOpMinimize( retFsm );
1474                 }
1475                 break;
1476         }
1477         case MinType: {
1478                 /* Evaluate the repeated machine. */
1479                 retFsm = factorWithRep->walk( pd );
1480                 if ( retFsm->startState->isFinState() ) {
1481                         warning(loc) << "applying min repetition to a machine that "
1482                                         "accepts zero length word" << endl;
1483                 }
1484
1485                 /* The start func orders need to be shifted before doing the repetition
1486                  * and the kleene star. */
1487                 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1488
1489                 if ( lowerRep == 0 ) {
1490                         /* Acts just like a star op on the machine to return. */
1491                         retFsm->starOp( );
1492                         afterOpMinimize( retFsm );
1493                 }
1494                 else {
1495                         /* Take a duplicate for the plus. */
1496                         FsmAp *dup = new FsmAp( *retFsm );
1497
1498                         /* Do repetition on the first half. */
1499                         retFsm->repeatOp( lowerRep );
1500                         afterOpMinimize( retFsm );
1501
1502                         /* Star the duplicate. */
1503                         dup->starOp( );
1504                         afterOpMinimize( dup );
1505
1506                         /* Tak on the kleene star. */
1507                         retFsm->concatOp( dup );
1508                         afterOpMinimize( retFsm );
1509                 }
1510                 break;
1511         }
1512         case RangeType: {
1513                 /* Check for bogus range. */
1514                 if ( upperRep - lowerRep < 0 ) {
1515                         error(loc) << "invalid range repetition" << endl;
1516
1517                         /* Return null machine as recovery. */
1518                         retFsm = new FsmAp();
1519                         retFsm->lambdaFsm();
1520                 }
1521                 else if ( lowerRep == 0 && upperRep == 0 ) {
1522                         /* No copies. Don't need to evaluate the factorWithRep.  This
1523                          * defeats the purpose so give a warning. */
1524                         warning(loc) << "zero to zero repetitions results "
1525                                         "in the null machine" << endl;
1526
1527                         retFsm = new FsmAp();
1528                         retFsm->lambdaFsm();
1529                 }
1530                 else {
1531                         /* Now need to evaluate the repeated machine. */
1532                         retFsm = factorWithRep->walk( pd );
1533                         if ( retFsm->startState->isFinState() ) {
1534                                 warning(loc) << "applying range repetition to a machine that "
1535                                                 "accepts zero length word" << endl;
1536                         }
1537
1538                         /* The start func orders need to be shifted before doing both kinds
1539                          * of repetition. */
1540                         pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1541
1542                         if ( lowerRep == 0 ) {
1543                                 /* Just doing max repetition. Already guarded against n == 0. */
1544                                 retFsm->optionalRepeatOp( upperRep );
1545                                 afterOpMinimize( retFsm );
1546                         }
1547                         else if ( lowerRep == upperRep ) {
1548                                 /* Just doing exact repetition. Already guarded against n == 0. */
1549                                 retFsm->repeatOp( lowerRep );
1550                                 afterOpMinimize( retFsm );
1551                         }
1552                         else {
1553                                 /* This is the case that 0 < lowerRep < upperRep. Take a
1554                                  * duplicate for the optional repeat. */
1555                                 FsmAp *dup = new FsmAp( *retFsm );
1556
1557                                 /* Do repetition on the first half. */
1558                                 retFsm->repeatOp( lowerRep );
1559                                 afterOpMinimize( retFsm );
1560
1561                                 /* Do optional repetition on the second half. */
1562                                 dup->optionalRepeatOp( upperRep - lowerRep );
1563                                 afterOpMinimize( dup );
1564
1565                                 /* Tak on the duplicate machine. */
1566                                 retFsm->concatOp( dup );
1567                                 afterOpMinimize( retFsm );
1568                         }
1569                 }
1570                 break;
1571         }
1572         case FactorWithNegType: {
1573                 /* Evaluate the Factor. Pass it up. */
1574                 retFsm = factorWithNeg->walk( pd );
1575                 break;
1576         }}
1577         return retFsm;
1578 }
1579
1580 void FactorWithRep::makeNameTree( ParseData *pd )
1581 {
1582         switch ( type ) {
1583         case StarType:
1584         case StarStarType:
1585         case OptionalType:
1586         case PlusType:
1587         case ExactType:
1588         case MaxType:
1589         case MinType:
1590         case RangeType:
1591                 factorWithRep->makeNameTree( pd );
1592                 break;
1593         case FactorWithNegType:
1594                 factorWithNeg->makeNameTree( pd );
1595                 break;
1596         }
1597 }
1598
1599 void FactorWithRep::resolveNameRefs( ParseData *pd )
1600 {
1601         switch ( type ) {
1602         case StarType:
1603         case StarStarType:
1604         case OptionalType:
1605         case PlusType:
1606         case ExactType:
1607         case MaxType:
1608         case MinType:
1609         case RangeType:
1610                 factorWithRep->resolveNameRefs( pd );
1611                 break;
1612         case FactorWithNegType:
1613                 factorWithNeg->resolveNameRefs( pd );
1614                 break;
1615         }
1616 }
1617
1618 /* Clean up after a factor with negation node. */
1619 FactorWithNeg::~FactorWithNeg()
1620 {
1621         switch ( type ) {
1622                 case NegateType:
1623                 case CharNegateType:
1624                         delete factorWithNeg;
1625                         break;
1626                 case FactorType:
1627                         delete factor;
1628                         break;
1629         }
1630 }
1631
1632 /* Evaluate a factor with negation node. */
1633 FsmAp *FactorWithNeg::walk( ParseData *pd )
1634 {
1635         FsmAp *retFsm = 0;
1636
1637         switch ( type ) {
1638         case NegateType: {
1639                 /* Evaluate the factorWithNeg. */
1640                 FsmAp *toNegate = factorWithNeg->walk( pd );
1641
1642                 /* Negation is subtract from dot-star. */
1643                 retFsm = dotStarFsm( pd );
1644                 retFsm->subtractOp( toNegate );
1645                 afterOpMinimize( retFsm );
1646                 break;
1647         }
1648         case CharNegateType: {
1649                 /* Evaluate the factorWithNeg. */
1650                 FsmAp *toNegate = factorWithNeg->walk( pd );
1651
1652                 /* CharNegation is subtract from dot. */
1653                 retFsm = dotFsm( pd );
1654                 retFsm->subtractOp( toNegate );
1655                 afterOpMinimize( retFsm );
1656                 break;
1657         }
1658         case FactorType: {
1659                 /* Evaluate the Factor. Pass it up. */
1660                 retFsm = factor->walk( pd );
1661                 break;
1662         }}
1663         return retFsm;
1664 }
1665
1666 void FactorWithNeg::makeNameTree( ParseData *pd )
1667 {
1668         switch ( type ) {
1669         case NegateType:
1670         case CharNegateType:
1671                 factorWithNeg->makeNameTree( pd );
1672                 break;
1673         case FactorType:
1674                 factor->makeNameTree( pd );
1675                 break;
1676         }
1677 }
1678
1679 void FactorWithNeg::resolveNameRefs( ParseData *pd )
1680 {
1681         switch ( type ) {
1682         case NegateType:
1683         case CharNegateType:
1684                 factorWithNeg->resolveNameRefs( pd );
1685                 break;
1686         case FactorType:
1687                 factor->resolveNameRefs( pd );
1688                 break;
1689         }
1690 }
1691
1692 /* Clean up after a factor node. */
1693 Factor::~Factor()
1694 {
1695         switch ( type ) {
1696                 case LiteralType:
1697                         delete literal;
1698                         break;
1699                 case RangeType:
1700                         delete range;
1701                         break;
1702                 case OrExprType:
1703                         delete reItem;
1704                         break;
1705                 case RegExprType:
1706                         delete regExpr;
1707                         break;
1708                 case ReferenceType:
1709                         break;
1710                 case ParenType:
1711                         delete join;
1712                         break;
1713                 case LongestMatchType:
1714                         delete longestMatch;
1715                         break;
1716         }
1717 }
1718
1719 /* Evaluate a factor node. */
1720 FsmAp *Factor::walk( ParseData *pd )
1721 {
1722         FsmAp *rtnVal = 0;
1723         switch ( type ) {
1724         case LiteralType:
1725                 rtnVal = literal->walk( pd );
1726                 break;
1727         case RangeType:
1728                 rtnVal = range->walk( pd );
1729                 break;
1730         case OrExprType:
1731                 rtnVal = reItem->walk( pd, 0 );
1732                 break;
1733         case RegExprType:
1734                 rtnVal = regExpr->walk( pd, 0 );
1735                 break;
1736         case ReferenceType:
1737                 rtnVal = varDef->walk( pd );
1738                 break;
1739         case ParenType:
1740                 rtnVal = join->walk( pd );
1741                 break;
1742         case LongestMatchType:
1743                 rtnVal = longestMatch->walk( pd );
1744                 break;
1745         }
1746
1747         return rtnVal;
1748 }
1749
1750 void Factor::makeNameTree( ParseData *pd )
1751 {
1752         switch ( type ) {
1753         case LiteralType:
1754         case RangeType:
1755         case OrExprType:
1756         case RegExprType:
1757                 break;
1758         case ReferenceType:
1759                 varDef->makeNameTree( loc, pd );
1760                 break;
1761         case ParenType:
1762                 join->makeNameTree( pd );
1763                 break;
1764         case LongestMatchType:
1765                 longestMatch->makeNameTree( pd );
1766                 break;
1767         }
1768 }
1769
1770 void Factor::resolveNameRefs( ParseData *pd )
1771 {
1772         switch ( type ) {
1773         case LiteralType:
1774         case RangeType:
1775         case OrExprType:
1776         case RegExprType:
1777                 break;
1778         case ReferenceType:
1779                 varDef->resolveNameRefs( pd );
1780                 break;
1781         case ParenType:
1782                 join->resolveNameRefs( pd );
1783                 break;
1784         case LongestMatchType:
1785                 longestMatch->resolveNameRefs( pd );
1786                 break;
1787         }
1788 }
1789
1790 /* Clean up a range object. Must delete the two literals. */
1791 Range::~Range()
1792 {
1793         delete lowerLit;
1794         delete upperLit;
1795 }
1796
1797 /* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
1798 FsmAp *Range::walk( ParseData *pd )
1799 {
1800         /* Construct and verify the suitability of the lower end of the range. */
1801         FsmAp *lowerFsm = lowerLit->walk( pd );
1802         if ( !lowerFsm->checkSingleCharMachine() ) {
1803                 error(lowerLit->token.loc) <<
1804                         "bad range lower end, must be a single character" << endl;
1805         }
1806
1807         /* Construct and verify the upper end. */
1808         FsmAp *upperFsm = upperLit->walk( pd );
1809         if ( !upperFsm->checkSingleCharMachine() ) {
1810                 error(upperLit->token.loc) <<
1811                         "bad range upper end, must be a single character" << endl;
1812         }
1813
1814         /* Grab the keys from the machines, then delete them. */
1815         Key lowKey = lowerFsm->startState->outList.head->lowKey;
1816         Key highKey = upperFsm->startState->outList.head->lowKey;
1817         delete lowerFsm;
1818         delete upperFsm;
1819
1820         /* Validate the range. */
1821         if ( lowKey > highKey ) {
1822                 /* Recover by setting upper to lower; */
1823                 error(lowerLit->token.loc) << "lower end of range is greater then upper end" << endl;
1824                 highKey = lowKey;
1825         }
1826
1827         /* Return the range now that it is validated. */
1828         FsmAp *retFsm = new FsmAp();
1829         retFsm->rangeFsm( lowKey, highKey );
1830         return retFsm;
1831 }
1832
1833 /* Evaluate a literal object. */
1834 FsmAp *Literal::walk( ParseData *pd )
1835 {
1836         /* FsmAp to return, is the alphabet signed. */
1837         FsmAp *rtnVal = 0;
1838
1839         switch ( type ) {
1840         case Number: {
1841                 /* Make the fsm key in int format. */
1842                 Key fsmKey = makeFsmKeyNum( token.data, token.loc, pd );
1843                 /* Make the new machine. */
1844                 rtnVal = new FsmAp();
1845                 rtnVal->concatFsm( fsmKey );
1846                 break;
1847         }
1848         case LitString: {
1849                 /* Make the array of keys in int format. */
1850                 Token interp;
1851                 bool caseInsensitive;
1852                 token.prepareLitString( interp, caseInsensitive );
1853                 Key *arr = new Key[interp.length];
1854                 makeFsmKeyArray( arr, interp.data, interp.length, pd );
1855
1856                 /* Make the new machine. */
1857                 rtnVal = new FsmAp();
1858                 if ( caseInsensitive )
1859                         rtnVal->concatFsmCI( arr, interp.length );
1860                 else
1861                         rtnVal->concatFsm( arr, interp.length );
1862                 delete[] interp.data;
1863                 delete[] arr;
1864                 break;
1865         }}
1866         return rtnVal;
1867 }
1868
1869 /* Clean up after a regular expression object. */
1870 RegExpr::~RegExpr()
1871 {
1872         switch ( type ) {
1873                 case RecurseItem:
1874                         delete regExpr;
1875                         delete item;
1876                         break;
1877                 case Empty:
1878                         break;
1879         }
1880 }
1881
1882 /* Evaluate a regular expression object. */
1883 FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex )
1884 {
1885         /* This is the root regex, pass down a pointer to this. */
1886         if ( rootRegex == 0 )
1887                 rootRegex = this;
1888
1889         FsmAp *rtnVal = 0;
1890         switch ( type ) {
1891                 case RecurseItem: {
1892                         /* Walk both items. */
1893                         rtnVal = regExpr->walk( pd, rootRegex );
1894                         FsmAp *fsm2 = item->walk( pd, rootRegex );
1895                         rtnVal->concatOp( fsm2 );
1896                         break;
1897                 }
1898                 case Empty: {
1899                         rtnVal = new FsmAp();
1900                         rtnVal->lambdaFsm();
1901                         break;
1902                 }
1903         }
1904         return rtnVal;
1905 }
1906
1907 /* Clean up after an item in a regular expression. */
1908 ReItem::~ReItem()
1909 {
1910         switch ( type ) {
1911                 case Data:
1912                 case Dot:
1913                         break;
1914                 case OrBlock:
1915                 case NegOrBlock:
1916                         delete orBlock;
1917                         break;
1918         }
1919 }
1920
1921 /* Evaluate a regular expression object. */
1922 FsmAp *ReItem::walk( ParseData *pd, RegExpr *rootRegex )
1923 {
1924         /* The fsm to return, is the alphabet signed? */
1925         FsmAp *rtnVal = 0;
1926
1927         switch ( type ) {
1928                 case Data: {
1929                         /* Move the data into an integer array and make a concat fsm. */
1930                         Key *arr = new Key[token.length];
1931                         makeFsmKeyArray( arr, token.data, token.length, pd );
1932
1933                         /* Make the concat fsm. */
1934                         rtnVal = new FsmAp();
1935                         if ( rootRegex != 0 && rootRegex->caseInsensitive )
1936                                 rtnVal->concatFsmCI( arr, token.length );
1937                         else
1938                                 rtnVal->concatFsm( arr, token.length );
1939                         delete[] arr;
1940                         break;
1941                 }
1942                 case Dot: {
1943                         /* Make the dot fsm. */
1944                         rtnVal = dotFsm( pd );
1945                         break;
1946                 }
1947                 case OrBlock: {
1948                         /* Get the or block and minmize it. */
1949                         rtnVal = orBlock->walk( pd, rootRegex );
1950                         if ( rtnVal == 0 ) {
1951                                 rtnVal = new FsmAp();
1952                                 rtnVal->lambdaFsm();
1953                         }
1954                         rtnVal->minimizePartition2();
1955                         break;
1956                 }
1957                 case NegOrBlock: {
1958                         /* Get the or block and minimize it. */
1959                         FsmAp *fsm = orBlock->walk( pd, rootRegex );
1960                         fsm->minimizePartition2();
1961
1962                         /* Make a dot fsm and subtract from it. */
1963                         rtnVal = dotFsm( pd );
1964                         rtnVal->subtractOp( fsm );
1965                         rtnVal->minimizePartition2();
1966                         break;
1967                 }
1968         }
1969
1970         /* If the item is followed by a star, then apply the star op. */
1971         if ( star ) {
1972                 if ( rtnVal->startState->isFinState() ) {
1973                         warning(loc) << "applying kleene star to a machine that "
1974                                         "accpets zero length word" << endl;
1975                 }
1976
1977                 rtnVal->starOp();
1978                 rtnVal->minimizePartition2();
1979         }
1980         return rtnVal;
1981 }
1982
1983 /* Clean up after an or block of a regular expression. */
1984 ReOrBlock::~ReOrBlock()
1985 {
1986         switch ( type ) {
1987                 case RecurseItem:
1988                         delete orBlock;
1989                         delete item;
1990                         break;
1991                 case Empty:
1992                         break;
1993         }
1994 }
1995
1996
1997 /* Evaluate an or block of a regular expression. */
1998 FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex )
1999 {
2000         FsmAp *rtnVal = 0;
2001         switch ( type ) {
2002                 case RecurseItem: {
2003                         /* Evaluate the two fsm. */
2004                         FsmAp *fsm1 = orBlock->walk( pd, rootRegex );
2005                         FsmAp *fsm2 = item->walk( pd, rootRegex );
2006                         if ( fsm1 == 0 )
2007                                 rtnVal = fsm2;
2008                         else {
2009                                 fsm1->unionOp( fsm2 );
2010                                 rtnVal = fsm1;
2011                         }
2012                         break;
2013                 }
2014                 case Empty: {
2015                         rtnVal = 0;
2016                         break;
2017                 }
2018         }
2019         return rtnVal;;
2020 }
2021
2022 /* Evaluate an or block item of a regular expression. */
2023 FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex )
2024 {
2025         /* The return value, is the alphabet signed? */
2026         FsmAp *rtnVal = 0;
2027         switch ( type ) {
2028         case Data: {
2029                 /* Make the or machine. */
2030                 rtnVal = new FsmAp();
2031
2032                 /* Put the or data into an array of ints. Note that we find unique
2033                  * keys. Duplicates are silently ignored. The alternative would be to
2034                  * issue warning or an error but since we can't with [a0-9a] or 'a' |
2035                  * 'a' don't bother here. */
2036                 KeySet keySet;
2037                 makeFsmUniqueKeyArray( keySet, token.data, token.length,
2038                         rootRegex != 0 ? rootRegex->caseInsensitive : false, pd );
2039
2040                 /* Run the or operator. */
2041                 rtnVal->orFsm( keySet.data, keySet.length() );
2042                 break;
2043         }
2044         case Range: {
2045                 /* Make the upper and lower keys. */
2046                 Key lowKey = makeFsmKeyChar( lower, pd );
2047                 Key highKey = makeFsmKeyChar( upper, pd );
2048
2049                 /* Validate the range. */
2050                 if ( lowKey > highKey ) {
2051                         /* Recover by setting upper to lower; */
2052                         error(loc) << "lower end of range is greater then upper end" << endl;
2053                         highKey = lowKey;
2054                 }
2055
2056                 /* Make the range machine. */
2057                 rtnVal = new FsmAp();
2058                 rtnVal->rangeFsm( lowKey, highKey );
2059
2060                 if ( rootRegex != 0 && rootRegex->caseInsensitive ) {
2061                         if ( lowKey <= 'Z' && 'A' <= highKey ) {
2062                                 Key otherLow = lowKey < 'A' ? Key('A') : lowKey;
2063                                 Key otherHigh = 'Z' < highKey ? Key('Z') : highKey;
2064
2065                                 otherLow = 'a' + ( otherLow - 'A' );
2066                                 otherHigh = 'a' + ( otherHigh - 'A' );
2067
2068                                 FsmAp *otherRange = new FsmAp();
2069                                 otherRange->rangeFsm( otherLow, otherHigh );
2070                                 rtnVal->unionOp( otherRange );
2071                                 rtnVal->minimizePartition2();
2072                         }
2073                         else if ( lowKey <= 'z' && 'a' <= highKey ) {
2074                                 Key otherLow = lowKey < 'a' ? Key('a') : lowKey;
2075                                 Key otherHigh = 'z' < highKey ? Key('z') : highKey;
2076
2077                                 otherLow = 'A' + ( otherLow - 'a' );
2078                                 otherHigh = 'A' + ( otherHigh - 'a' );
2079
2080                                 FsmAp *otherRange = new FsmAp();
2081                                 otherRange->rangeFsm( otherLow, otherHigh );
2082                                 rtnVal->unionOp( otherRange );
2083                                 rtnVal->minimizePartition2();
2084                         }
2085                 }
2086
2087                 break;
2088         }}
2089         return rtnVal;
2090 }