ragel/parsetree.cpp

   1 /*
   2  *  Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
   3  */
   4
   5 /*  This file is part of Ragel.
   6  *
   7  *  Ragel is free software; you can redistribute it and/or modify
   8  *  it under the terms of the GNU General Public License as published by
   9  *  the Free Software Foundation; either version 2 of the License, or
  10  *  (at your option) any later version.
  11  *
  12  *  Ragel is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  *  GNU General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU General Public License
  18  *  along with Ragel; if not, write to the Free Software
  19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  */
  21
  22 #include <iostream>
  23 #include <iomanip>
  24 #include <errno.h>
  25 #include <limits.h>
  26 #include <stdlib.h>
  27
  28 /* Parsing. */
  29 #include "ragel.h"
  30 #include "rlparse.h"
  31 #include "parsetree.h"
  32
  33 using namespace std;
  34 ostream &operator<<( ostream &out, const NameRef &nameRef );
  35 ostream &operator<<( ostream &out, const NameInst &nameInst );
  36
  37 /* Convert the literal string which comes in from the scanner into an array of
  38  * characters with escapes and options interpreted. Also null terminates the
  39  * string. Though this null termination should not be relied on for
  40  * interpreting literals in the parser because the string may contain a
  41  * literal string with \0 */
  42 void Token::prepareLitString( Token &result, bool &caseInsensitive )
  43 {
  44         result.data = new char[this->length+1];
  45         caseInsensitive = false;
  46
  47         char *src = this->data + 1;
  48         char *end = this->data + this->length - 1;
  49
  50         while ( *end != '\'' && *end != '\"' ) {
  51                 if ( *end == 'i' )
  52                         caseInsensitive = true;
  53                 else {
  54                         error( this->loc ) << "literal string '" << *end <<
  55                                         "' option not supported" << endl;
  56                 }
  57                 end -= 1;
  58         }
  59
  60         char *dest = result.data;
  61         int len = 0;
  62         while ( src != end ) {
  63                 if ( *src == '\\' ) {
  64                         switch ( src[1] ) {
  65                         case '0': dest[len++] = '\0'; break;
  66                         case 'a': dest[len++] = '\a'; break;
  67                         case 'b': dest[len++] = '\b'; break;
  68                         case 't': dest[len++] = '\t'; break;
  69                         case 'n': dest[len++] = '\n'; break;
  70                         case 'v': dest[len++] = '\v'; break;
  71                         case 'f': dest[len++] = '\f'; break;
  72                         case 'r': dest[len++] = '\r'; break;
  73                         case '\n':  break;
  74                         default: dest[len++] = src[1]; break;
  75                         }
  76                         src += 2;
  77                 }
  78                 else {
  79                         dest[len++] = *src++;
  80                 }
  81         }
  82         result.length = len;
  83         result.data[result.length] = 0;
  84 }
  85
  86
  87 FsmAp *VarDef::walk( ParseData *pd )
  88 {
  89         /* We enter into a new name scope. */
  90         NameFrame nameFrame = pd->enterNameScope( true, 1 );
  91
  92         /* Recurse on the expression. */
  93         FsmAp *rtnVal = joinOrLm->walk( pd );
  94
  95         /* Do the tranfer of local error actions. */
  96         LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
  97         if ( localErrDictEl != 0 ) {
  98                 for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
  99                         rtnVal->transferErrorActions( state, localErrDictEl->value );
 100         }
 101
 102         /* If the expression below is a join operation with multiple expressions
 103          * then it just had epsilon transisions resolved. If it is a join
 104          * with only a single expression then run the epsilon op now. */
 105         if ( joinOrLm->type == JoinOrLm::JoinType && joinOrLm->join->exprList.length() == 1 )
 106                 rtnVal->epsilonOp();
 107
 108         /* We can now unset entry points that are not longer used. */
 109         pd->unsetObsoleteEntries( rtnVal );
 110
 111         /* If the name of the variable is referenced then add the entry point to
 112          * the graph. */
 113         if ( pd->curNameInst->numRefs > 0 )
 114                 rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
 115
 116         /* Pop the name scope. */
 117         pd->popNameScope( nameFrame );
 118         return rtnVal;
 119 }
 120
 121 void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd )
 122 {
 123         /* The variable definition enters a new scope. */
 124         NameInst *prevNameInst = pd->curNameInst;
 125         pd->curNameInst = pd->addNameInst( loc, name, false );
 126
 127         if ( joinOrLm->type == JoinOrLm::LongestMatchType )
 128                 pd->curNameInst->isLongestMatch = true;
 129
 130         /* Recurse. */
 131         joinOrLm->makeNameTree( pd );
 132
 133         /* The name scope ends, pop the name instantiation. */
 134         pd->curNameInst = prevNameInst;
 135 }
 136
 137 void VarDef::resolveNameRefs( ParseData *pd )
 138 {
 139         /* Entering into a new scope. */
 140         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 141
 142         /* Recurse. */
 143         joinOrLm->resolveNameRefs( pd );
 144
 145         /* The name scope ends, pop the name instantiation. */
 146         pd->popNameScope( nameFrame );
 147 }
 148
 149 InputLoc LongestMatchPart::getLoc()
 150 {
 151         return action != 0 ? action->loc : semiLoc;
 152 }
 153
 154 /*
 155  * If there are any LMs then all of the following entry points must reset
 156  * tokstart:
 157  *
 158  *  1. fentry(StateRef)
 159  *  2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
 160  *  3. targt of any transition that has an fcall (the return loc).
 161  *  4. start state of all longest match routines.
 162  */
 163
 164 Action *LongestMatch::newAction( ParseData *pd, const InputLoc &loc,
 165                 const char *name, InlineList *inlineList )
 166 {
 167         Action *action = new Action( loc, name, inlineList, pd->nextCondId++ );
 168         action->actionRefs.append( pd->curNameInst );
 169         pd->actionList.append( action );
 170         action->isLmAction = true;
 171         return action;
 172 }
 173
 174 void LongestMatch::makeActions( ParseData *pd )
 175 {
 176         /* Make actions that set the action id. */
 177         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 178                 /* For each part create actions for setting the match type.  We need
 179                  * to do this so that the actions will go into the actionIndex. */
 180                 InlineList *inlineList = new InlineList;
 181                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
 182                                 InlineItem::LmSetActId ) );
 183                 char *actName = new char[50];
 184                 sprintf( actName, "store%i", lmi->longestMatchId );
 185                 lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
 186         }
 187
 188         /* Make actions that execute the user action and restart on the last
 189          * character. */
 190         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 191                 /* For each part create actions for setting the match type.  We need
 192                  * to do this so that the actions will go into the actionIndex. */
 193                 InlineList *inlineList = new InlineList;
 194                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
 195                                 InlineItem::LmOnLast ) );
 196                 char *actName = new char[50];
 197                 sprintf( actName, "last%i", lmi->longestMatchId );
 198                 lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
 199         }
 200
 201         /* Make actions that execute the user action and restart on the next
 202          * character.  These actions will set tokend themselves (it is the current
 203          * char). */
 204         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 205                 /* For each part create actions for setting the match type.  We need
 206                  * to do this so that the actions will go into the actionIndex. */
 207                 InlineList *inlineList = new InlineList;
 208                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
 209                                 InlineItem::LmOnNext ) );
 210                 char *actName = new char[50];
 211                 sprintf( actName, "next%i", lmi->longestMatchId );
 212                 lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
 213         }
 214
 215         /* Make actions that execute the user action and restart at tokend. These
 216          * actions execute some time after matching the last char. */
 217         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 218                 /* For each part create actions for setting the match type.  We need
 219                  * to do this so that the actions will go into the actionIndex. */
 220                 InlineList *inlineList = new InlineList;
 221                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
 222                                 InlineItem::LmOnLagBehind ) );
 223                 char *actName = new char[50];
 224                 sprintf( actName, "lag%i", lmi->longestMatchId );
 225                 lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList );
 226         }
 227
 228         InputLoc loc;
 229         loc.line = 1;
 230         loc.col = 1;
 231
 232         /* Create the error action. */
 233         InlineList *il6 = new InlineList;
 234         il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) );
 235         lmActSelect = newAction( pd, loc, "switch", il6 );
 236 }
 237
 238 void LongestMatch::findName( ParseData *pd )
 239 {
 240         NameInst *nameInst = pd->curNameInst;
 241         while ( nameInst->name == 0 ) {
 242                 nameInst = nameInst->parent;
 243                 /* Since every machine must must have a name, we should always find a
 244                  * name for the longest match. */
 245                 assert( nameInst != 0 );
 246         }
 247         name = nameInst->name;
 248 }
 249
 250 void LongestMatch::makeNameTree( ParseData *pd )
 251 {
 252         /* Create an anonymous scope for the longest match. Will be used for
 253          * restarting machine after matching a token. */
 254         NameInst *prevNameInst = pd->curNameInst;
 255         pd->curNameInst = pd->addNameInst( loc, 0, false );
 256
 257         /* Recurse into all parts of the longest match operator. */
 258         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ )
 259                 lmi->join->makeNameTree( pd );
 260
 261         /* Traverse the name tree upwards to find a name for this lm. */
 262         findName( pd );
 263
 264         /* Also make the longest match's actions at this point. */
 265         makeActions( pd );
 266
 267         /* The name scope ends, pop the name instantiation. */
 268         pd->curNameInst = prevNameInst;
 269 }
 270
 271 void LongestMatch::resolveNameRefs( ParseData *pd )
 272 {
 273         /* The longest match gets its own name scope. */
 274         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 275
 276         /* Take an action reference for each longest match item and recurse. */
 277         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 278                 /* Record the reference if the item has an action. */
 279                 if ( lmi->action != 0 )
 280                         lmi->action->actionRefs.append( pd->localNameScope );
 281
 282                 /* Recurse down the join. */
 283                 lmi->join->resolveNameRefs( pd );
 284         }
 285
 286         /* The name scope ends, pop the name instantiation. */
 287         pd->popNameScope( nameFrame );
 288 }
 289
 290 void LongestMatch::restart( FsmAp *graph, TransAp *trans )
 291 {
 292         StateAp *fromState = trans->fromState;
 293         graph->detachTrans( fromState, trans->toState, trans );
 294         graph->attachTrans( fromState, graph->startState, trans );
 295 }
 296
 297 void LongestMatch::runLongestMatch( ParseData *pd, FsmAp *graph )
 298 {
 299         graph->markReachableFromHereStopFinal( graph->startState );
 300         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 301                 if ( ms->stateBits & STB_ISMARKED ) {
 302                         ms->lmItemSet.insert( 0 );
 303                         ms->stateBits &= ~ STB_ISMARKED;
 304                 }
 305         }
 306
 307         /* Transfer the first item of non-empty lmAction tables to the item sets
 308          * of the states that follow. Exclude states that have no transitions out.
 309          * This must happen on a separate pass so that on each iteration of the
 310          * next pass we have the item set entries from all lmAction tables. */
 311         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
 312                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
 313                         if ( trans->lmActionTable.length() > 0 ) {
 314                                 LmActionTableEl *lmAct = trans->lmActionTable.data;
 315                                 StateAp *toState = trans->toState;
 316                                 assert( toState );
 317
 318                                 /* Can only optimize this if there are no transitions out.
 319                                  * Note there can be out transitions going nowhere with
 320                                  * actions and they too must inhibit this optimization. */
 321                                 if ( toState->outList.length() > 0 ) {
 322                                         /* Fill the item sets. */
 323                                         graph->markReachableFromHereStopFinal( toState );
 324                                         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 325                                                 if ( ms->stateBits & STB_ISMARKED ) {
 326                                                         ms->lmItemSet.insert( lmAct->value );
 327                                                         ms->stateBits &= ~ STB_ISMARKED;
 328                                                 }
 329                                         }
 330                                 }
 331                         }
 332                 }
 333         }
 334
 335         /* The lmItem sets are now filled, telling us which longest match rules
 336          * can succeed in which states. First determine if we need to make sure
 337          * act is defaulted to zero. We need to do this if there are any states
 338          * with lmItemSet.length() > 1 and NULL is included. That is, that the
 339          * switch may get called when in fact nothing has been matched. */
 340         int maxItemSetLength = 0;
 341         graph->markReachableFromHereStopFinal( graph->startState );
 342         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 343                 if ( ms->stateBits & STB_ISMARKED ) {
 344                         if ( ms->lmItemSet.length() > maxItemSetLength )
 345                                 maxItemSetLength = ms->lmItemSet.length();
 346                         ms->stateBits &= ~ STB_ISMARKED;
 347                 }
 348         }
 349
 350         /* The actions executed on starting to match a token. */
 351         graph->isolateStartState();
 352         graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart );
 353         graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
 354         if ( maxItemSetLength > 1 ) {
 355                 /* The longest match action switch may be called when tokens are
 356                  * matched, in which case act must be initialized, there must be a
 357                  * case to handle the error, and the generated machine will require an
 358                  * error state. */
 359                 lmSwitchHandlesError = true;
 360                 pd->lmRequiresErrorState = true;
 361                 graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
 362         }
 363
 364         /* The place to store transitions to restart. It maybe possible for the
 365          * restarting to affect the searching through the graph that follows. For
 366          * now take the safe route and save the list of transitions to restart
 367          * until after all searching is done. */
 368         Vector<TransAp*> restartTrans;
 369
 370         /* Set actions that do immediate token recognition, set the longest match part
 371          * id and set the token ending. */
 372         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
 373                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
 374                         if ( trans->lmActionTable.length() > 0 ) {
 375                                 LmActionTableEl *lmAct = trans->lmActionTable.data;
 376                                 StateAp *toState = trans->toState;
 377                                 assert( toState );
 378
 379                                 /* Can only optimize this if there are no transitions out.
 380                                  * Note there can be out transitions going nowhere with
 381                                  * actions and they too must inhibit this optimization. */
 382                                 if ( toState->outList.length() == 0 ) {
 383                                         /* Can execute the immediate action for the longest match
 384                                          * part. Redirect the action to the start state.
 385                                          *
 386                                          * NOTE: When we need to inhibit on_last due to leaving
 387                                          * actions the above test suffices. If the state has out
 388                                          * actions then it will fail because the out action will
 389                                          * have been transferred to an error transition, which
 390                                          * makes the outlist non-empty. */
 391                                         trans->actionTable.setAction( lmAct->key,
 392                                                         lmAct->value->actOnLast );
 393                                         restartTrans.append( trans );
 394                                 }
 395                                 else {
 396                                         /* Look for non final states that have a non-empty item
 397                                          * set. If these are present then we need to record the
 398                                          * end of the token.  Also Find the highest item set
 399                                          * length reachable from here (excluding at transtions to
 400                                          * final states). */
 401                                         bool nonFinalNonEmptyItemSet = false;
 402                                         maxItemSetLength = 0;
 403                                         graph->markReachableFromHereStopFinal( toState );
 404                                         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 405                                                 if ( ms->stateBits & STB_ISMARKED ) {
 406                                                         if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
 407                                                                 nonFinalNonEmptyItemSet = true;
 408                                                         if ( ms->lmItemSet.length() > maxItemSetLength )
 409                                                                 maxItemSetLength = ms->lmItemSet.length();
 410                                                         ms->stateBits &= ~ STB_ISMARKED;
 411                                                 }
 412                                         }
 413
 414                                         /* If there are reachable states that are not final and
 415                                          * have non empty item sets or that have an item set
 416                                          * length greater than one then we need to set tokend
 417                                          * because the error action that matches the token will
 418                                          * require it. */
 419                                         if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
 420                                                 trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
 421
 422                                         /* Some states may not know which longest match item to
 423                                          * execute, must set it. */
 424                                         if ( maxItemSetLength > 1 ) {
 425                                                 /* There are transitions out, another match may come. */
 426                                                 trans->actionTable.setAction( lmAct->key,
 427                                                                 lmAct->value->setActId );
 428                                         }
 429                                 }
 430                         }
 431                 }
 432         }
 433
 434         /* Now that all graph searching is done it certainly safe set the
 435          * restarting. It may be safe above, however this must be verified. */
 436         for ( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ )
 437                 restart( graph, *pt );
 438
 439         int lmErrActionOrd = pd->curActionOrd++;
 440
 441         /* Embed the error for recognizing a char. */
 442         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
 443                 if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
 444                         if ( st->isFinState() ) {
 445                                 /* On error execute the onActNext action, which knows that
 446                                  * the last character of the token was one back and restart. */
 447                                 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
 448                                                 &st->lmItemSet[0]->actOnNext, 1 );
 449                                 st->eofActionTable.setAction( lmErrActionOrd,
 450                                                 st->lmItemSet[0]->actOnNext );
 451                                 st->eofTarget = graph->startState;
 452                         }
 453                         else {
 454                                 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
 455                                                 &st->lmItemSet[0]->actLagBehind, 1 );
 456                                 st->eofActionTable.setAction( lmErrActionOrd,
 457                                                 st->lmItemSet[0]->actLagBehind );
 458                                 st->eofTarget = graph->startState;
 459                         }
 460                 }
 461                 else if ( st->lmItemSet.length() > 1 ) {
 462                         /* Need to use the select. Take note of which items the select
 463                          * is needed for so only the necessary actions are included. */
 464                         for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
 465                                 if ( *plmi != 0 )
 466                                         (*plmi)->inLmSelect = true;
 467                         }
 468                         /* On error, execute the action select and go to the start state. */
 469                         graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
 470                                         &lmActSelect, 1 );
 471                         st->eofActionTable.setAction( lmErrActionOrd, lmActSelect );
 472                         st->eofTarget = graph->startState;
 473                 }
 474         }
 475
 476         /* Finally, the start state should be made final. */
 477         graph->setFinState( graph->startState );
 478 }
 479
 480 void LongestMatch::transferScannerLeavingActions( FsmAp *graph )
 481 {
 482         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
 483                 if ( st->outActionTable.length() > 0 )
 484                         graph->setErrorActions( st, st->outActionTable );
 485         }
 486 }
 487
 488 FsmAp *LongestMatch::walk( ParseData *pd )
 489 {
 490         /* The longest match has it's own name scope. */
 491         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 492
 493         /* Make each part of the longest match. */
 494         FsmAp **parts = new FsmAp*[longestMatchList->length()];
 495         LmPartList::Iter lmi = *longestMatchList;
 496         for ( int i = 0; lmi.lte(); lmi++, i++ ) {
 497                 /* Create the machine and embed the setting of the longest match id. */
 498                 parts[i] = lmi->join->walk( pd );
 499                 parts[i]->longMatchAction( pd->curActionOrd++, lmi );
 500         }
 501
 502         /* Before we union the patterns we need to deal with leaving actions. They
 503          * are transfered to error transitions out of the final states (like local
 504          * error actions) and to eof actions. In the scanner we need to forbid
 505          * on_last for any final state that has an leaving action. */
 506         for ( int i = 0; i < longestMatchList->length(); i++ )
 507                 transferScannerLeavingActions( parts[i] );
 508
 509         /* Union machines one and up with machine zero. The grammar dictates that
 510          * there will always be at least one part. */
 511         FsmAp *rtnVal = parts[0];
 512         for ( int i = 1; i < longestMatchList->length(); i++ ) {
 513                 rtnVal->unionOp( parts[i] );
 514                 afterOpMinimize( rtnVal );
 515         }
 516
 517         runLongestMatch( pd, rtnVal );
 518
 519         /* Pop the name scope. */
 520         pd->popNameScope( nameFrame );
 521
 522         delete[] parts;
 523         return rtnVal;
 524 }
 525
 526 FsmAp *JoinOrLm::walk( ParseData *pd )
 527 {
 528         FsmAp *rtnVal = 0;
 529         switch ( type ) {
 530         case JoinType:
 531                 rtnVal = join->walk( pd );
 532                 break;
 533         case LongestMatchType:
 534                 rtnVal = longestMatch->walk( pd );
 535                 break;
 536         }
 537         return rtnVal;
 538 }
 539
 540 void JoinOrLm::makeNameTree( ParseData *pd )
 541 {
 542         switch ( type ) {
 543         case JoinType:
 544                 join->makeNameTree( pd );
 545                 break;
 546         case LongestMatchType:
 547                 longestMatch->makeNameTree( pd );
 548                 break;
 549         }
 550 }
 551
 552 void JoinOrLm::resolveNameRefs( ParseData *pd )
 553 {
 554         switch ( type ) {
 555         case JoinType:
 556                 join->resolveNameRefs( pd );
 557                 break;
 558         case LongestMatchType:
 559                 longestMatch->resolveNameRefs( pd );
 560                 break;
 561         }
 562 }
 563
 564
 565 /* Construct with a location and the first expression. */
 566 Join::Join( const InputLoc &loc, Expression *expr )
 567 :
 568         loc(loc)
 569 {
 570         exprList.append( expr );
 571 }
 572
 573 /* Construct with a location and the first expression. */
 574 Join::Join( Expression *expr )
 575 :
 576         loc(loc)
 577 {
 578         exprList.append( expr );
 579 }
 580
 581 /* Walk an expression node. */
 582 FsmAp *Join::walk( ParseData *pd )
 583 {
 584         if ( exprList.length() > 1 )
 585                 return walkJoin( pd );
 586         else
 587                 return exprList.head->walk( pd );
 588 }
 589
 590 /* There is a list of expressions to join. */
 591 FsmAp *Join::walkJoin( ParseData *pd )
 592 {
 593         /* We enter into a new name scope. */
 594         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 595
 596         /* Evaluate the machines. */
 597         FsmAp **fsms = new FsmAp*[exprList.length()];
 598         ExprList::Iter expr = exprList;
 599         for ( int e = 0; e < exprList.length(); e++, expr++ )
 600                 fsms[e] = expr->walk( pd );
 601
 602         /* Get the start and final names. Final is
 603          * guaranteed to exist, start is not. */
 604         NameInst *startName = pd->curNameInst->start;
 605         NameInst *finalName = pd->curNameInst->final;
 606
 607         int startId = -1;
 608         if ( startName != 0 ) {
 609                 /* Take note that there was an implicit link to the start machine. */
 610                 pd->localNameScope->referencedNames.append( startName );
 611                 startId = startName->id;
 612         }
 613
 614         /* A final id of -1 indicates there is no epsilon that references the
 615          * final state, therefor do not create one or set an entry point to it. */
 616         int finalId = -1;
 617         if ( finalName->numRefs > 0 )
 618                 finalId = finalName->id;
 619
 620         /* Join machines 1 and up onto machine 0. */
 621         FsmAp *retFsm = fsms[0];
 622         retFsm->joinOp( startId, finalId, fsms+1, exprList.length()-1 );
 623
 624         /* We can now unset entry points that are not longer used. */
 625         pd->unsetObsoleteEntries( retFsm );
 626
 627         /* Pop the name scope. */
 628         pd->popNameScope( nameFrame );
 629
 630         delete[] fsms;
 631         return retFsm;
 632 }
 633
 634 void Join::makeNameTree( ParseData *pd )
 635 {
 636         if ( exprList.length() > 1 ) {
 637                 /* Create the new anonymous scope. */
 638                 NameInst *prevNameInst = pd->curNameInst;
 639                 pd->curNameInst = pd->addNameInst( loc, 0, false );
 640
 641                 /* Join scopes need an implicit "final" target. */
 642                 pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final",
 643                                 pd->nextNameId++, false );
 644
 645                 /* Recurse into all expressions in the list. */
 646                 for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
 647                         expr->makeNameTree( pd );
 648
 649                 /* The name scope ends, pop the name instantiation. */
 650                 pd->curNameInst = prevNameInst;
 651         }
 652         else {
 653                 /* Recurse into the single expression. */
 654                 exprList.head->makeNameTree( pd );
 655         }
 656 }
 657
 658
 659 void Join::resolveNameRefs( ParseData *pd )
 660 {
 661         /* Branch on whether or not there is to be a join. */
 662         if ( exprList.length() > 1 ) {
 663                 /* The variable definition enters a new scope. */
 664                 NameFrame nameFrame = pd->enterNameScope( true, 1 );
 665
 666                 /* The join scope must contain a start label. */
 667                 NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true );
 668                 if ( resolved.length() > 0 ) {
 669                         /* Take the first. */
 670                         pd->curNameInst->start = resolved[0];
 671                         if ( resolved.length() > 1 ) {
 672                                 /* Complain about the multiple references. */
 673                                 error(loc) << "multiple start labels" << endl;
 674                                 errorStateLabels( resolved );
 675                         }
 676                 }
 677
 678                 /* Make sure there is a start label. */
 679                 if ( pd->curNameInst->start != 0 ) {
 680                         /* There is an implicit reference to start name. */
 681                         pd->curNameInst->start->numRefs += 1;
 682                 }
 683                 else {
 684                         /* No start label. Complain and recover by adding a label to the
 685                          * adding one. Recover ignoring the problem. */
 686                         error(loc) << "no start label" << endl;
 687                 }
 688
 689                 /* Recurse into all expressions in the list. */
 690                 for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
 691                         expr->resolveNameRefs( pd );
 692
 693                 /* The name scope ends, pop the name instantiation. */
 694                 pd->popNameScope( nameFrame );
 695         }
 696         else {
 697                 /* Recurse into the single expression. */
 698                 exprList.head->resolveNameRefs( pd );
 699         }
 700 }
 701
 702 /* Clean up after an expression node. */
 703 Expression::~Expression()
 704 {
 705         switch ( type ) {
 706                 case OrType: case IntersectType: case SubtractType:
 707                 case StrongSubtractType:
 708                         delete expression;
 709                         delete term;
 710                         break;
 711                 case TermType:
 712                         delete term;
 713                         break;
 714                 case BuiltinType:
 715                         break;
 716         }
 717 }
 718
 719 /* Evaluate a single expression node. */
 720 FsmAp *Expression::walk( ParseData *pd, bool lastInSeq )
 721 {
 722         FsmAp *rtnVal = 0;
 723         switch ( type ) {
 724                 case OrType: {
 725                         /* Evaluate the expression. */
 726                         rtnVal = expression->walk( pd, false );
 727                         /* Evaluate the term. */
 728                         FsmAp *rhs = term->walk( pd );
 729                         /* Perform union. */
 730                         rtnVal->unionOp( rhs );
 731                         afterOpMinimize( rtnVal, lastInSeq );
 732                         break;
 733                 }
 734                 case IntersectType: {
 735                         /* Evaluate the expression. */
 736                         rtnVal = expression->walk( pd );
 737                         /* Evaluate the term. */
 738                         FsmAp *rhs = term->walk( pd );
 739                         /* Perform intersection. */
 740                         rtnVal->intersectOp( rhs );
 741                         afterOpMinimize( rtnVal, lastInSeq );
 742                         break;
 743                 }
 744                 case SubtractType: {
 745                         /* Evaluate the expression. */
 746                         rtnVal = expression->walk( pd );
 747                         /* Evaluate the term. */
 748                         FsmAp *rhs = term->walk( pd );
 749                         /* Perform subtraction. */
 750                         rtnVal->subtractOp( rhs );
 751                         afterOpMinimize( rtnVal, lastInSeq );
 752                         break;
 753                 }
 754                 case StrongSubtractType: {
 755                         /* Evaluate the expression. */
 756                         rtnVal = expression->walk( pd );
 757
 758                         /* Evaluate the term and pad it with any* machines. */
 759                         FsmAp *rhs = dotStarFsm( pd );
 760                         FsmAp *termFsm = term->walk( pd );
 761                         FsmAp *trailAnyStar = dotStarFsm( pd );
 762                         rhs->concatOp( termFsm );
 763                         rhs->concatOp( trailAnyStar );
 764
 765                         /* Perform subtraction. */
 766                         rtnVal->subtractOp( rhs );
 767                         afterOpMinimize( rtnVal, lastInSeq );
 768                         break;
 769                 }
 770                 case TermType: {
 771                         /* Return result of the term. */
 772                         rtnVal = term->walk( pd );
 773                         break;
 774                 }
 775                 case BuiltinType: {
 776                         /* Duplicate the builtin. */
 777                         rtnVal = makeBuiltin( builtin, pd );
 778                         break;
 779                 }
 780         }
 781
 782         return rtnVal;
 783 }
 784
 785 void Expression::makeNameTree( ParseData *pd )
 786 {
 787         switch ( type ) {
 788         case OrType:
 789         case IntersectType:
 790         case SubtractType:
 791         case StrongSubtractType:
 792                 expression->makeNameTree( pd );
 793                 term->makeNameTree( pd );
 794                 break;
 795         case TermType:
 796                 term->makeNameTree( pd );
 797                 break;
 798         case BuiltinType:
 799                 break;
 800         }
 801 }
 802
 803 void Expression::resolveNameRefs( ParseData *pd )
 804 {
 805         switch ( type ) {
 806         case OrType:
 807         case IntersectType:
 808         case SubtractType:
 809         case StrongSubtractType:
 810                 expression->resolveNameRefs( pd );
 811                 term->resolveNameRefs( pd );
 812                 break;
 813         case TermType:
 814                 term->resolveNameRefs( pd );
 815                 break;
 816         case BuiltinType:
 817                 break;
 818         }
 819 }
 820
 821 /* Clean up after a term node. */
 822 Term::~Term()
 823 {
 824         switch ( type ) {
 825                 case ConcatType:
 826                 case RightStartType:
 827                 case RightFinishType:
 828                 case LeftType:
 829                         delete term;
 830                         delete factorWithAug;
 831                         break;
 832                 case FactorWithAugType:
 833                         delete factorWithAug;
 834                         break;
 835         }
 836 }
 837
 838 /* Evaluate a term node. */
 839 FsmAp *Term::walk( ParseData *pd, bool lastInSeq )
 840 {
 841         FsmAp *rtnVal = 0;
 842         switch ( type ) {
 843                 case ConcatType: {
 844                         /* Evaluate the Term. */
 845                         rtnVal = term->walk( pd, false );
 846                         /* Evaluate the FactorWithRep. */
 847                         FsmAp *rhs = factorWithAug->walk( pd );
 848                         /* Perform concatenation. */
 849                         rtnVal->concatOp( rhs );
 850                         afterOpMinimize( rtnVal, lastInSeq );
 851                         break;
 852                 }
 853                 case RightStartType: {
 854                         /* Evaluate the Term. */
 855                         rtnVal = term->walk( pd );
 856
 857                         /* Evaluate the FactorWithRep. */
 858                         FsmAp *rhs = factorWithAug->walk( pd );
 859
 860                         /* Set up the priority descriptors. The left machine gets the
 861                          * lower priority where as the right get the higher start priority. */
 862                         priorDescs[0].key = pd->nextPriorKey++;
 863                         priorDescs[0].priority = 0;
 864                         rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
 865
 866                         /* The start transitions of the right machine gets the higher
 867                          * priority. Use the same unique key. */
 868                         priorDescs[1].key = priorDescs[0].key;
 869                         priorDescs[1].priority = 1;
 870                         rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
 871
 872                         /* Perform concatenation. */
 873                         rtnVal->concatOp( rhs );
 874                         afterOpMinimize( rtnVal, lastInSeq );
 875                         break;
 876                 }
 877                 case RightFinishType: {
 878                         /* Evaluate the Term. */
 879                         rtnVal = term->walk( pd );
 880
 881                         /* Evaluate the FactorWithRep. */
 882                         FsmAp *rhs = factorWithAug->walk( pd );
 883
 884                         /* Set up the priority descriptors. The left machine gets the
 885                          * lower priority where as the finishing transitions to the right
 886                          * get the higher priority. */
 887                         priorDescs[0].key = pd->nextPriorKey++;
 888                         priorDescs[0].priority = 0;
 889                         rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
 890
 891                         /* The finishing transitions of the right machine get the higher
 892                          * priority. Use the same unique key. */
 893                         priorDescs[1].key = priorDescs[0].key;
 894                         priorDescs[1].priority = 1;
 895                         rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
 896
 897                         /* If the right machine's start state is final we need to guard
 898                          * against the left machine persisting by moving through the empty
 899                          * string. */
 900                         if ( rhs->startState->isFinState() ) {
 901                                 rhs->startState->outPriorTable.setPrior(
 902                                                 pd->curPriorOrd++, &priorDescs[1] );
 903                         }
 904
 905                         /* Perform concatenation. */
 906                         rtnVal->concatOp( rhs );
 907                         afterOpMinimize( rtnVal, lastInSeq );
 908                         break;
 909                 }
 910                 case LeftType: {
 911                         /* Evaluate the Term. */
 912                         rtnVal = term->walk( pd );
 913
 914                         /* Evaluate the FactorWithRep. */
 915                         FsmAp *rhs = factorWithAug->walk( pd );
 916
 917                         /* Set up the priority descriptors. The left machine gets the
 918                          * higher priority. */
 919                         priorDescs[0].key = pd->nextPriorKey++;
 920                         priorDescs[0].priority = 1;
 921                         rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
 922
 923                         /* The right machine gets the lower priority. We cannot use
 924                          * allTransPrior here in case the start state of the right machine
 925                          * is final. It would allow the right machine thread to run along
 926                          * with the left if just passing through the start state. Using
 927                          * startFsmPrior prevents this. */
 928                         priorDescs[1].key = priorDescs[0].key;
 929                         priorDescs[1].priority = 0;
 930                         rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
 931
 932                         /* Perform concatenation. */
 933                         rtnVal->concatOp( rhs );
 934                         afterOpMinimize( rtnVal, lastInSeq );
 935                         break;
 936                 }
 937                 case FactorWithAugType: {
 938                         rtnVal = factorWithAug->walk( pd );
 939                         break;
 940                 }
 941         }
 942         return rtnVal;
 943 }
 944
 945 void Term::makeNameTree( ParseData *pd )
 946 {
 947         switch ( type ) {
 948         case ConcatType:
 949         case RightStartType:
 950         case RightFinishType:
 951         case LeftType:
 952                 term->makeNameTree( pd );
 953                 factorWithAug->makeNameTree( pd );
 954                 break;
 955         case FactorWithAugType:
 956                 factorWithAug->makeNameTree( pd );
 957                 break;
 958         }
 959 }
 960
 961 void Term::resolveNameRefs( ParseData *pd )
 962 {
 963         switch ( type ) {
 964         case ConcatType:
 965         case RightStartType:
 966         case RightFinishType:
 967         case LeftType:
 968                 term->resolveNameRefs( pd );
 969                 factorWithAug->resolveNameRefs( pd );
 970                 break;
 971         case FactorWithAugType:
 972                 factorWithAug->resolveNameRefs( pd );
 973                 break;
 974         }
 975 }
 976
 977 /* Clean up after a factor with augmentation node. */
 978 FactorWithAug::~FactorWithAug()
 979 {
 980         delete factorWithRep;
 981
 982         /* Walk the vector of parser actions, deleting function names. */
 983
 984         /* Clean up priority descriptors. */
 985         if ( priorDescs != 0 )
 986                 delete[] priorDescs;
 987 }
 988
 989 void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd )
 990 {
 991         /* Assign actions. */
 992         for ( int i = 0; i < actions.length(); i++ )  {
 993                 switch ( actions[i].type ) {
 994                 /* Transition actions. */
 995                 case at_start:
 996                         graph->startFsmAction( actionOrd[i], actions[i].action );
 997                         afterOpMinimize( graph );
 998                         break;
 999                 case at_all:
1000                         graph->allTransAction( actionOrd[i], actions[i].action );
1001                         break;
1002                 case at_finish:
1003                         graph->finishFsmAction( actionOrd[i], actions[i].action );
1004                         break;
1005                 case at_leave:
1006                         graph->leaveFsmAction( actionOrd[i], actions[i].action );
1007                         break;
1008
1009                 /* Global error actions. */
1010                 case at_start_gbl_error:
1011                         graph->startErrorAction( actionOrd[i], actions[i].action, 0 );
1012                         afterOpMinimize( graph );
1013                         break;
1014                 case at_all_gbl_error:
1015                         graph->allErrorAction( actionOrd[i], actions[i].action, 0 );
1016                         break;
1017                 case at_final_gbl_error:
1018                         graph->finalErrorAction( actionOrd[i], actions[i].action, 0 );
1019                         break;
1020                 case at_not_start_gbl_error:
1021                         graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 );
1022                         break;
1023                 case at_not_final_gbl_error:
1024                         graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 );
1025                         break;
1026                 case at_middle_gbl_error:
1027                         graph->middleErrorAction( actionOrd[i], actions[i].action, 0 );
1028                         break;
1029
1030                 /* Local error actions. */
1031                 case at_start_local_error:
1032                         graph->startErrorAction( actionOrd[i], actions[i].action,
1033                                         actions[i].localErrKey );
1034                         afterOpMinimize( graph );
1035                         break;
1036                 case at_all_local_error:
1037                         graph->allErrorAction( actionOrd[i], actions[i].action,
1038                                         actions[i].localErrKey );
1039                         break;
1040                 case at_final_local_error:
1041                         graph->finalErrorAction( actionOrd[i], actions[i].action,
1042                                         actions[i].localErrKey );
1043                         break;
1044                 case at_not_start_local_error:
1045                         graph->notStartErrorAction( actionOrd[i], actions[i].action,
1046                                         actions[i].localErrKey );
1047                         break;
1048                 case at_not_final_local_error:
1049                         graph->notFinalErrorAction( actionOrd[i], actions[i].action,
1050                                         actions[i].localErrKey );
1051                         break;
1052                 case at_middle_local_error:
1053                         graph->middleErrorAction( actionOrd[i], actions[i].action,
1054                                         actions[i].localErrKey );
1055                         break;
1056
1057                 /* EOF actions. */
1058                 case at_start_eof:
1059                         graph->startEOFAction( actionOrd[i], actions[i].action );
1060                         afterOpMinimize( graph );
1061                         break;
1062                 case at_all_eof:
1063                         graph->allEOFAction( actionOrd[i], actions[i].action );
1064                         break;
1065                 case at_final_eof:
1066                         graph->finalEOFAction( actionOrd[i], actions[i].action );
1067                         break;
1068                 case at_not_start_eof:
1069                         graph->notStartEOFAction( actionOrd[i], actions[i].action );
1070                         break;
1071                 case at_not_final_eof:
1072                         graph->notFinalEOFAction( actionOrd[i], actions[i].action );
1073                         break;
1074                 case at_middle_eof:
1075                         graph->middleEOFAction( actionOrd[i], actions[i].action );
1076                         break;
1077
1078                 /* To State Actions. */
1079                 case at_start_to_state:
1080                         graph->startToStateAction( actionOrd[i], actions[i].action );
1081                         afterOpMinimize( graph );
1082                         break;
1083                 case at_all_to_state:
1084                         graph->allToStateAction( actionOrd[i], actions[i].action );
1085                         break;
1086                 case at_final_to_state:
1087                         graph->finalToStateAction( actionOrd[i], actions[i].action );
1088                         break;
1089                 case at_not_start_to_state:
1090                         graph->notStartToStateAction( actionOrd[i], actions[i].action );
1091                         break;
1092                 case at_not_final_to_state:
1093                         graph->notFinalToStateAction( actionOrd[i], actions[i].action );
1094                         break;
1095                 case at_middle_to_state:
1096                         graph->middleToStateAction( actionOrd[i], actions[i].action );
1097                         break;
1098
1099                 /* From State Actions. */
1100                 case at_start_from_state:
1101                         graph->startFromStateAction( actionOrd[i], actions[i].action );
1102                         afterOpMinimize( graph );
1103                         break;
1104                 case at_all_from_state:
1105                         graph->allFromStateAction( actionOrd[i], actions[i].action );
1106                         break;
1107                 case at_final_from_state:
1108                         graph->finalFromStateAction( actionOrd[i], actions[i].action );
1109                         break;
1110                 case at_not_start_from_state:
1111                         graph->notStartFromStateAction( actionOrd[i], actions[i].action );
1112                         break;
1113                 case at_not_final_from_state:
1114                         graph->notFinalFromStateAction( actionOrd[i], actions[i].action );
1115                         break;
1116                 case at_middle_from_state:
1117                         graph->middleFromStateAction( actionOrd[i], actions[i].action );
1118                         break;
1119
1120                 /* Remaining cases, prevented by the parser. */
1121                 default:
1122                         assert( false );
1123                         break;
1124                 }
1125         }
1126 }
1127
1128 void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd )
1129 {
1130         /* Assign priorities. */
1131         for ( int i = 0; i < priorityAugs.length(); i++ ) {
1132                 switch ( priorityAugs[i].type ) {
1133                 case at_start:
1134                         graph->startFsmPrior( priorOrd[i], &priorDescs[i]);
1135                         /* Start fsm priorities are a special case that may require
1136                          * minimization afterwards. */
1137                         afterOpMinimize( graph );
1138                         break;
1139                 case at_all:
1140                         graph->allTransPrior( priorOrd[i], &priorDescs[i] );
1141                         break;
1142                 case at_finish:
1143                         graph->finishFsmPrior( priorOrd[i], &priorDescs[i] );
1144                         break;
1145                 case at_leave:
1146                         graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] );
1147                         break;
1148
1149                 default:
1150                         /* Parser Prevents this case. */
1151                         break;
1152                 }
1153         }
1154 }
1155
1156 void FactorWithAug::assignConditions( FsmAp *graph )
1157 {
1158         for ( int i = 0; i < conditions.length(); i++ )  {
1159                 switch ( conditions[i].type ) {
1160                 /* Transition actions. */
1161                 case at_start:
1162                         graph->startFsmCondition( conditions[i].action, conditions[i].sense );
1163                         afterOpMinimize( graph );
1164                         break;
1165                 case at_all:
1166                         graph->allTransCondition( conditions[i].action, conditions[i].sense );
1167                         break;
1168                 case at_leave:
1169                         graph->leaveFsmCondition( conditions[i].action, conditions[i].sense );
1170                         break;
1171                 default:
1172                         break;
1173                 }
1174         }
1175 }
1176
1177
1178 /* Evaluate a factor with augmentation node. */
1179 FsmAp *FactorWithAug::walk( ParseData *pd )
1180 {
1181         /* Enter into the scopes created for the labels. */
1182         NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1183
1184         /* Make the array of function orderings. */
1185         int *actionOrd = 0;
1186         if ( actions.length() > 0 )
1187                 actionOrd = new int[actions.length()];
1188
1189         /* First walk the list of actions, assigning order to all starting
1190          * actions. */
1191         for ( int i = 0; i < actions.length(); i++ ) {
1192                 if ( actions[i].type == at_start ||
1193                                 actions[i].type == at_start_gbl_error ||
1194                                 actions[i].type == at_start_local_error ||
1195                                 actions[i].type == at_start_to_state ||
1196                                 actions[i].type == at_start_from_state ||
1197                                 actions[i].type == at_start_eof )
1198                         actionOrd[i] = pd->curActionOrd++;
1199         }
1200
1201         /* Evaluate the factor with repetition. */
1202         FsmAp *rtnVal = factorWithRep->walk( pd );
1203
1204         /* Compute the remaining action orderings. */
1205         for ( int i = 0; i < actions.length(); i++ ) {
1206                 if ( actions[i].type != at_start &&
1207                                 actions[i].type != at_start_gbl_error &&
1208                                 actions[i].type != at_start_local_error &&
1209                                 actions[i].type != at_start_to_state &&
1210                                 actions[i].type != at_start_from_state &&
1211                                 actions[i].type != at_start_eof )
1212                         actionOrd[i] = pd->curActionOrd++;
1213         }
1214
1215         /* Embed conditions. */
1216         assignConditions( rtnVal );
1217
1218         /* Embed actions. */
1219         assignActions( pd, rtnVal , actionOrd );
1220
1221         /* Make the array of priority orderings. Orderings are local to this walk
1222          * of the factor with augmentation. */
1223         int *priorOrd = 0;
1224         if ( priorityAugs.length() > 0 )
1225                 priorOrd = new int[priorityAugs.length()];
1226
1227         /* Walk all priorities, assigning the priority ordering. */
1228         for ( int i = 0; i < priorityAugs.length(); i++ )
1229                 priorOrd[i] = pd->curPriorOrd++;
1230
1231         /* If the priority descriptors have not been made, make them now.  Make
1232          * priority descriptors for each priority asignment that will be passed to
1233          * the fsm. Used to keep track of the key, value and used bit. */
1234         if ( priorDescs == 0 && priorityAugs.length() > 0 ) {
1235                 priorDescs = new PriorDesc[priorityAugs.length()];
1236                 for ( int i = 0; i < priorityAugs.length(); i++ ) {
1237                         /* Init the prior descriptor for the priority setting. */
1238                         priorDescs[i].key = priorityAugs[i].priorKey;
1239                         priorDescs[i].priority = priorityAugs[i].priorValue;
1240                 }
1241         }
1242
1243         /* Assign priorities into the machine. */
1244         assignPriorities( rtnVal, priorOrd );
1245
1246         /* Assign epsilon transitions. */
1247         for ( int e = 0; e < epsilonLinks.length(); e++ ) {
1248                 /* Get the name, which may not exist. If it doesn't then silently
1249                  * ignore it because an error has already been reported. */
1250                 NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++];
1251                 if ( epTarg != 0 ) {
1252                         /* Make the epsilon transitions. */
1253                         rtnVal->epsilonTrans( epTarg->id );
1254
1255                         /* Note that we have made a link to the name. */
1256                         pd->localNameScope->referencedNames.append( epTarg );
1257                 }
1258         }
1259
1260         /* Set entry points for labels. */
1261         if ( labels.length() > 0 ) {
1262                 /* Pop the names. */
1263                 pd->resetNameScope( nameFrame );
1264
1265                 /* Make labels that are referenced into entry points. */
1266                 for ( int i = 0; i < labels.length(); i++ ) {
1267                         pd->enterNameScope( false, 1 );
1268
1269                         /* Will always be found. */
1270                         NameInst *name = pd->curNameInst;
1271
1272                         /* If the name is referenced then set the entry point. */
1273                         if ( name->numRefs > 0 )
1274                                 rtnVal->setEntry( name->id, rtnVal->startState );
1275                 }
1276
1277                 pd->popNameScope( nameFrame );
1278         }
1279
1280         if ( priorOrd != 0 )
1281                 delete[] priorOrd;
1282         if ( actionOrd != 0 )
1283                 delete[] actionOrd;
1284         return rtnVal;
1285 }
1286
1287 void FactorWithAug::makeNameTree( ParseData *pd )
1288 {
1289         /* Add the labels to the tree of instantiated names. Each label
1290          * makes a new scope. */
1291         NameInst *prevNameInst = pd->curNameInst;
1292         for ( int i = 0; i < labels.length(); i++ )
1293                 pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true );
1294
1295         /* Recurse, then pop the names. */
1296         factorWithRep->makeNameTree( pd );
1297         pd->curNameInst = prevNameInst;
1298 }
1299
1300
1301 void FactorWithAug::resolveNameRefs( ParseData *pd )
1302 {
1303         /* Enter into the name scope created by any labels. */
1304         NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1305
1306         /* Note action references. */
1307         for ( int i = 0; i < actions.length(); i++ )
1308                 actions[i].action->actionRefs.append( pd->localNameScope );
1309
1310         /* Recurse first. IMPORTANT: we must do the exact same traversal as when
1311          * the tree is constructed. */
1312         factorWithRep->resolveNameRefs( pd );
1313
1314         /* Resolve epsilon transitions. */
1315         for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) {
1316                 /* Get the link. */
1317                 EpsilonLink &link = epsilonLinks[ep];
1318                 NameInst *resolvedName = 0;
1319
1320                 if ( link.target.length() == 1 && strcmp( link.target.data[0], "final" ) == 0 ) {
1321                         /* Epsilon drawn to an implicit final state. An implicit final is
1322                          * only available in join operations. */
1323                         resolvedName = pd->localNameScope->final;
1324                 }
1325                 else {
1326                         /* Do an search for the name. */
1327                         NameSet resolved;
1328                         pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 );
1329                         if ( resolved.length() > 0 ) {
1330                                 /* Take the first one. */
1331                                 resolvedName = resolved[0];
1332                                 if ( resolved.length() > 1 ) {
1333                                         /* Complain about the multiple references. */
1334                                         error(link.loc) << "state reference " << link.target <<
1335                                                         " resolves to multiple entry points" << endl;
1336                                         errorStateLabels( resolved );
1337                                 }
1338                         }
1339                 }
1340
1341                 /* This is tricky, we stuff resolved epsilon transitions into one long
1342                  * vector in the parse data structure. Since the name resolution and
1343                  * graph generation both do identical walks of the parse tree we
1344                  * should always find the link resolutions in the right place.  */
1345                 pd->epsilonResolvedLinks.append( resolvedName );
1346
1347                 if ( resolvedName != 0 ) {
1348                         /* Found the name, bump of the reference count on it. */
1349                         resolvedName->numRefs += 1;
1350                 }
1351                 else {
1352                         /* Complain, no recovery action, the epsilon op will ignore any
1353                          * epsilon transitions whose names did not resolve. */
1354                         error(link.loc) << "could not resolve label " << link.target << endl;
1355                 }
1356         }
1357
1358         if ( labels.length() > 0 )
1359                 pd->popNameScope( nameFrame );
1360 }
1361
1362
1363 /* Clean up after a factor with repetition node. */
1364 FactorWithRep::~FactorWithRep()
1365 {
1366         switch ( type ) {
1367                 case StarType: case StarStarType: case OptionalType: case PlusType:
1368                 case ExactType: case MaxType: case MinType: case RangeType:
1369                         delete factorWithRep;
1370                         break;
1371                 case FactorWithNegType:
1372                         delete factorWithNeg;
1373                         break;
1374         }
1375 }
1376
1377 /* Evaluate a factor with repetition node. */
1378 FsmAp *FactorWithRep::walk( ParseData *pd )
1379 {
1380         FsmAp *retFsm = 0;
1381
1382         switch ( type ) {
1383         case StarType: {
1384                 /* Evaluate the FactorWithRep. */
1385                 retFsm = factorWithRep->walk( pd );
1386                 if ( retFsm->startState->isFinState() ) {
1387                         warning(loc) << "applying kleene star to a machine that "
1388                                         "accepts zero length word" << endl;
1389                 }
1390
1391                 /* Shift over the start action orders then do the kleene star. */
1392                 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1393                 retFsm->starOp( );
1394                 afterOpMinimize( retFsm );
1395                 break;
1396         }
1397         case StarStarType: {
1398                 /* Evaluate the FactorWithRep. */
1399                 retFsm = factorWithRep->walk( pd );
1400                 if ( retFsm->startState->isFinState() ) {
1401                         warning(loc) << "applying kleene star to a machine that "
1402                                         "accepts zero length word" << endl;
1403                 }
1404
1405                 /* Set up the prior descs. All gets priority one, whereas leaving gets
1406                  * priority zero. Make a unique key so that these priorities don't
1407                  * interfere with any priorities set by the user. */
1408                 priorDescs[0].key = pd->nextPriorKey++;
1409                 priorDescs[0].priority = 1;
1410                 retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
1411
1412                 /* Leaveing gets priority 0. Use same unique key. */
1413                 priorDescs[1].key = priorDescs[0].key;
1414                 priorDescs[1].priority = 0;
1415                 retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
1416
1417                 /* Shift over the start action orders then do the kleene star. */
1418                 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1419                 retFsm->starOp( );
1420                 afterOpMinimize( retFsm );
1421                 break;
1422         }
1423         case OptionalType: {
1424                 /* Make the null fsm. */
1425                 FsmAp *nu = new FsmAp();
1426                 nu->lambdaFsm( );
1427
1428                 /* Evaluate the FactorWithRep. */
1429                 retFsm = factorWithRep->walk( pd );
1430
1431                 /* Perform the question operator. */
1432                 retFsm->unionOp( nu );
1433                 afterOpMinimize( retFsm );
1434                 break;
1435         }
1436         case PlusType: {
1437                 /* Evaluate the FactorWithRep. */
1438                 retFsm = factorWithRep->walk( pd );
1439                 if ( retFsm->startState->isFinState() ) {
1440                         warning(loc) << "applying plus operator to a machine that "
1441                                         "accpets zero length word" << endl;
1442                 }
1443
1444                 /* Need a duplicated for the star end. */
1445                 FsmAp *dup = new FsmAp( *retFsm );
1446
1447                 /* The start func orders need to be shifted before doing the star. */
1448                 pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd );
1449
1450                 /* Star the duplicate. */
1451                 dup->starOp( );
1452                 afterOpMinimize( dup );
1453
1454                 retFsm->concatOp( dup );
1455                 afterOpMinimize( retFsm );
1456                 break;
1457         }
1458         case ExactType: {
1459                 /* Get an int from the repetition amount. */
1460                 if ( lowerRep == 0 ) {
1461                         /* No copies. Don't need to evaluate the factorWithRep.
1462                          * This Defeats the purpose so give a warning. */
1463                         warning(loc) << "exactly zero repetitions results "
1464                                         "in the null machine" << endl;
1465
1466                         retFsm = new FsmAp();
1467                         retFsm->lambdaFsm();
1468                 }
1469                 else {
1470                         /* Evaluate the first FactorWithRep. */
1471                         retFsm = factorWithRep->walk( pd );
1472                         if ( retFsm->startState->isFinState() ) {
1473                                 warning(loc) << "applying repetition to a machine that "
1474                                                 "accepts zero length word" << endl;
1475                         }
1476
1477                         /* The start func orders need to be shifted before doing the
1478                          * repetition. */
1479                         pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1480
1481                         /* Do the repetition on the machine. Already guarded against n == 0 */
1482                         retFsm->repeatOp( lowerRep );
1483                         afterOpMinimize( retFsm );
1484                 }
1485                 break;
1486         }
1487         case MaxType: {
1488                 /* Get an int from the repetition amount. */
1489                 if ( upperRep == 0 ) {
1490                         /* No copies. Don't need to evaluate the factorWithRep.
1491                          * This Defeats the purpose so give a warning. */
1492                         warning(loc) << "max zero repetitions results "
1493                                         "in the null machine" << endl;
1494
1495                         retFsm = new FsmAp();
1496                         retFsm->lambdaFsm();
1497                 }
1498                 else {
1499                         /* Evaluate the first FactorWithRep. */
1500                         retFsm = factorWithRep->walk( pd );
1501                         if ( retFsm->startState->isFinState() ) {
1502                                 warning(loc) << "applying max repetition to a machine that "
1503                                                 "accepts zero length word" << endl;
1504                         }
1505
1506                         /* The start func orders need to be shifted before doing the
1507                          * repetition. */
1508                         pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1509
1510                         /* Do the repetition on the machine. Already guarded against n == 0 */
1511                         retFsm->optionalRepeatOp( upperRep );
1512                         afterOpMinimize( retFsm );
1513                 }
1514                 break;
1515         }
1516         case MinType: {
1517                 /* Evaluate the repeated machine. */
1518                 retFsm = factorWithRep->walk( pd );
1519                 if ( retFsm->startState->isFinState() ) {
1520                         warning(loc) << "applying min repetition to a machine that "
1521                                         "accepts zero length word" << endl;
1522                 }
1523
1524                 /* The start func orders need to be shifted before doing the repetition
1525                  * and the kleene star. */
1526                 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1527
1528                 if ( lowerRep == 0 ) {
1529                         /* Acts just like a star op on the machine to return. */
1530                         retFsm->starOp( );
1531                         afterOpMinimize( retFsm );
1532                 }
1533                 else {
1534                         /* Take a duplicate for the plus. */
1535                         FsmAp *dup = new FsmAp( *retFsm );
1536
1537                         /* Do repetition on the first half. */
1538                         retFsm->repeatOp( lowerRep );
1539                         afterOpMinimize( retFsm );
1540
1541                         /* Star the duplicate. */
1542                         dup->starOp( );
1543                         afterOpMinimize( dup );
1544
1545                         /* Tak on the kleene star. */
1546                         retFsm->concatOp( dup );
1547                         afterOpMinimize( retFsm );
1548                 }
1549                 break;
1550         }
1551         case RangeType: {
1552                 /* Check for bogus range. */
1553                 if ( upperRep - lowerRep < 0 ) {
1554                         error(loc) << "invalid range repetition" << endl;
1555
1556                         /* Return null machine as recovery. */
1557                         retFsm = new FsmAp();
1558                         retFsm->lambdaFsm();
1559                 }
1560                 else if ( lowerRep == 0 && upperRep == 0 ) {
1561                         /* No copies. Don't need to evaluate the factorWithRep.  This
1562                          * defeats the purpose so give a warning. */
1563                         warning(loc) << "zero to zero repetitions results "
1564                                         "in the null machine" << endl;
1565
1566                         retFsm = new FsmAp();
1567                         retFsm->lambdaFsm();
1568                 }
1569                 else {
1570                         /* Now need to evaluate the repeated machine. */
1571                         retFsm = factorWithRep->walk( pd );
1572                         if ( retFsm->startState->isFinState() ) {
1573                                 warning(loc) << "applying range repetition to a machine that "
1574                                                 "accepts zero length word" << endl;
1575                         }
1576
1577                         /* The start func orders need to be shifted before doing both kinds
1578                          * of repetition. */
1579                         pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1580
1581                         if ( lowerRep == 0 ) {
1582                                 /* Just doing max repetition. Already guarded against n == 0. */
1583                                 retFsm->optionalRepeatOp( upperRep );
1584                                 afterOpMinimize( retFsm );
1585                         }
1586                         else if ( lowerRep == upperRep ) {
1587                                 /* Just doing exact repetition. Already guarded against n == 0. */
1588                                 retFsm->repeatOp( lowerRep );
1589                                 afterOpMinimize( retFsm );
1590                         }
1591                         else {
1592                                 /* This is the case that 0 < lowerRep < upperRep. Take a
1593                                  * duplicate for the optional repeat. */
1594                                 FsmAp *dup = new FsmAp( *retFsm );
1595
1596                                 /* Do repetition on the first half. */
1597                                 retFsm->repeatOp( lowerRep );
1598                                 afterOpMinimize( retFsm );
1599
1600                                 /* Do optional repetition on the second half. */
1601                                 dup->optionalRepeatOp( upperRep - lowerRep );
1602                                 afterOpMinimize( dup );
1603
1604                                 /* Tak on the duplicate machine. */
1605                                 retFsm->concatOp( dup );
1606                                 afterOpMinimize( retFsm );
1607                         }
1608                 }
1609                 break;
1610         }
1611         case FactorWithNegType: {
1612                 /* Evaluate the Factor. Pass it up. */
1613                 retFsm = factorWithNeg->walk( pd );
1614                 break;
1615         }}
1616         return retFsm;
1617 }
1618
1619 void FactorWithRep::makeNameTree( ParseData *pd )
1620 {
1621         switch ( type ) {
1622         case StarType:
1623         case StarStarType:
1624         case OptionalType:
1625         case PlusType:
1626         case ExactType:
1627         case MaxType:
1628         case MinType:
1629         case RangeType:
1630                 factorWithRep->makeNameTree( pd );
1631                 break;
1632         case FactorWithNegType:
1633                 factorWithNeg->makeNameTree( pd );
1634                 break;
1635         }
1636 }
1637
1638 void FactorWithRep::resolveNameRefs( ParseData *pd )
1639 {
1640         switch ( type ) {
1641         case StarType:
1642         case StarStarType:
1643         case OptionalType:
1644         case PlusType:
1645         case ExactType:
1646         case MaxType:
1647         case MinType:
1648         case RangeType:
1649                 factorWithRep->resolveNameRefs( pd );
1650                 break;
1651         case FactorWithNegType:
1652                 factorWithNeg->resolveNameRefs( pd );
1653                 break;
1654         }
1655 }
1656
1657 /* Clean up after a factor with negation node. */
1658 FactorWithNeg::~FactorWithNeg()
1659 {
1660         switch ( type ) {
1661                 case NegateType:
1662                 case CharNegateType:
1663                         delete factorWithNeg;
1664                         break;
1665                 case FactorType:
1666                         delete factor;
1667                         break;
1668         }
1669 }
1670
1671 /* Evaluate a factor with negation node. */
1672 FsmAp *FactorWithNeg::walk( ParseData *pd )
1673 {
1674         FsmAp *retFsm = 0;
1675
1676         switch ( type ) {
1677         case NegateType: {
1678                 /* Evaluate the factorWithNeg. */
1679                 FsmAp *toNegate = factorWithNeg->walk( pd );
1680
1681                 /* Negation is subtract from dot-star. */
1682                 retFsm = dotStarFsm( pd );
1683                 retFsm->subtractOp( toNegate );
1684                 afterOpMinimize( retFsm );
1685                 break;
1686         }
1687         case CharNegateType: {
1688                 /* Evaluate the factorWithNeg. */
1689                 FsmAp *toNegate = factorWithNeg->walk( pd );
1690
1691                 /* CharNegation is subtract from dot. */
1692                 retFsm = dotFsm( pd );
1693                 retFsm->subtractOp( toNegate );
1694                 afterOpMinimize( retFsm );
1695                 break;
1696         }
1697         case FactorType: {
1698                 /* Evaluate the Factor. Pass it up. */
1699                 retFsm = factor->walk( pd );
1700                 break;
1701         }}
1702         return retFsm;
1703 }
1704
1705 void FactorWithNeg::makeNameTree( ParseData *pd )
1706 {
1707         switch ( type ) {
1708         case NegateType:
1709         case CharNegateType:
1710                 factorWithNeg->makeNameTree( pd );
1711                 break;
1712         case FactorType:
1713                 factor->makeNameTree( pd );
1714                 break;
1715         }
1716 }
1717
1718 void FactorWithNeg::resolveNameRefs( ParseData *pd )
1719 {
1720         switch ( type ) {
1721         case NegateType:
1722         case CharNegateType:
1723                 factorWithNeg->resolveNameRefs( pd );
1724                 break;
1725         case FactorType:
1726                 factor->resolveNameRefs( pd );
1727                 break;
1728         }
1729 }
1730
1731 /* Clean up after a factor node. */
1732 Factor::~Factor()
1733 {
1734         switch ( type ) {
1735                 case LiteralType:
1736                         delete literal;
1737                         break;
1738                 case RangeType:
1739                         delete range;
1740                         break;
1741                 case OrExprType:
1742                         delete reItem;
1743                         break;
1744                 case RegExprType:
1745                         delete regExpr;
1746                         break;
1747                 case ReferenceType:
1748                         break;
1749                 case ParenType:
1750                         delete join;
1751                         break;
1752                 case LongestMatchType:
1753                         delete longestMatch;
1754                         break;
1755         }
1756 }
1757
1758 /* Evaluate a factor node. */
1759 FsmAp *Factor::walk( ParseData *pd )
1760 {
1761         FsmAp *rtnVal = 0;
1762         switch ( type ) {
1763         case LiteralType:
1764                 rtnVal = literal->walk( pd );
1765                 break;
1766         case RangeType:
1767                 rtnVal = range->walk( pd );
1768                 break;
1769         case OrExprType:
1770                 rtnVal = reItem->walk( pd, 0 );
1771                 break;
1772         case RegExprType:
1773                 rtnVal = regExpr->walk( pd, 0 );
1774                 break;
1775         case ReferenceType:
1776                 rtnVal = varDef->walk( pd );
1777                 break;
1778         case ParenType:
1779                 rtnVal = join->walk( pd );
1780                 break;
1781         case LongestMatchType:
1782                 rtnVal = longestMatch->walk( pd );
1783                 break;
1784         }
1785
1786         return rtnVal;
1787 }
1788
1789 void Factor::makeNameTree( ParseData *pd )
1790 {
1791         switch ( type ) {
1792         case LiteralType:
1793         case RangeType:
1794         case OrExprType:
1795         case RegExprType:
1796                 break;
1797         case ReferenceType:
1798                 varDef->makeNameTree( loc, pd );
1799                 break;
1800         case ParenType:
1801                 join->makeNameTree( pd );
1802                 break;
1803         case LongestMatchType:
1804                 longestMatch->makeNameTree( pd );
1805                 break;
1806         }
1807 }
1808
1809 void Factor::resolveNameRefs( ParseData *pd )
1810 {
1811         switch ( type ) {
1812         case LiteralType:
1813         case RangeType:
1814         case OrExprType:
1815         case RegExprType:
1816                 break;
1817         case ReferenceType:
1818                 varDef->resolveNameRefs( pd );
1819                 break;
1820         case ParenType:
1821                 join->resolveNameRefs( pd );
1822                 break;
1823         case LongestMatchType:
1824                 longestMatch->resolveNameRefs( pd );
1825                 break;
1826         }
1827 }
1828
1829 /* Clean up a range object. Must delete the two literals. */
1830 Range::~Range()
1831 {
1832         delete lowerLit;
1833         delete upperLit;
1834 }
1835
1836 /* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
1837 FsmAp *Range::walk( ParseData *pd )
1838 {
1839         /* Construct and verify the suitability of the lower end of the range. */
1840         FsmAp *lowerFsm = lowerLit->walk( pd );
1841         if ( !lowerFsm->checkSingleCharMachine() ) {
1842                 error(lowerLit->token.loc) <<
1843                         "bad range lower end, must be a single character" << endl;
1844         }
1845
1846         /* Construct and verify the upper end. */
1847         FsmAp *upperFsm = upperLit->walk( pd );
1848         if ( !upperFsm->checkSingleCharMachine() ) {
1849                 error(upperLit->token.loc) <<
1850                         "bad range upper end, must be a single character" << endl;
1851         }
1852
1853         /* Grab the keys from the machines, then delete them. */
1854         Key lowKey = lowerFsm->startState->outList.head->lowKey;
1855         Key highKey = upperFsm->startState->outList.head->lowKey;
1856         delete lowerFsm;
1857         delete upperFsm;
1858
1859         /* Validate the range. */
1860         if ( lowKey > highKey ) {
1861                 /* Recover by setting upper to lower; */
1862                 error(lowerLit->token.loc) << "lower end of range is greater then upper end" << endl;
1863                 highKey = lowKey;
1864         }
1865
1866         /* Return the range now that it is validated. */
1867         FsmAp *retFsm = new FsmAp();
1868         retFsm->rangeFsm( lowKey, highKey );
1869         return retFsm;
1870 }
1871
1872 /* Evaluate a literal object. */
1873 FsmAp *Literal::walk( ParseData *pd )
1874 {
1875         /* FsmAp to return, is the alphabet signed. */
1876         FsmAp *rtnVal = 0;
1877
1878         switch ( type ) {
1879         case Number: {
1880                 /* Make the fsm key in int format. */
1881                 Key fsmKey = makeFsmKeyNum( token.data, token.loc, pd );
1882                 /* Make the new machine. */
1883                 rtnVal = new FsmAp();
1884                 rtnVal->concatFsm( fsmKey );
1885                 break;
1886         }
1887         case LitString: {
1888                 /* Make the array of keys in int format. */
1889                 Token interp;
1890                 bool caseInsensitive;
1891                 token.prepareLitString( interp, caseInsensitive );
1892                 Key *arr = new Key[interp.length];
1893                 makeFsmKeyArray( arr, interp.data, interp.length, pd );
1894
1895                 /* Make the new machine. */
1896                 rtnVal = new FsmAp();
1897                 if ( caseInsensitive )
1898                         rtnVal->concatFsmCI( arr, interp.length );
1899                 else
1900                         rtnVal->concatFsm( arr, interp.length );
1901                 delete[] interp.data;
1902                 delete[] arr;
1903                 break;
1904         }}
1905         return rtnVal;
1906 }
1907
1908 /* Clean up after a regular expression object. */
1909 RegExpr::~RegExpr()
1910 {
1911         switch ( type ) {
1912                 case RecurseItem:
1913                         delete regExpr;
1914                         delete item;
1915                         break;
1916                 case Empty:
1917                         break;
1918         }
1919 }
1920
1921 /* Evaluate a regular expression object. */
1922 FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex )
1923 {
1924         /* This is the root regex, pass down a pointer to this. */
1925         if ( rootRegex == 0 )
1926                 rootRegex = this;
1927
1928         FsmAp *rtnVal = 0;
1929         switch ( type ) {
1930                 case RecurseItem: {
1931                         /* Walk both items. */
1932                         rtnVal = regExpr->walk( pd, rootRegex );
1933                         FsmAp *fsm2 = item->walk( pd, rootRegex );
1934                         rtnVal->concatOp( fsm2 );
1935                         break;
1936                 }
1937                 case Empty: {
1938                         rtnVal = new FsmAp();
1939                         rtnVal->lambdaFsm();
1940                         break;
1941                 }
1942         }
1943         return rtnVal;
1944 }
1945
1946 /* Clean up after an item in a regular expression. */
1947 ReItem::~ReItem()
1948 {
1949         switch ( type ) {
1950                 case Data:
1951                 case Dot:
1952                         break;
1953                 case OrBlock:
1954                 case NegOrBlock:
1955                         delete orBlock;
1956                         break;
1957         }
1958 }
1959
1960 /* Evaluate a regular expression object. */
1961 FsmAp *ReItem::walk( ParseData *pd, RegExpr *rootRegex )
1962 {
1963         /* The fsm to return, is the alphabet signed? */
1964         FsmAp *rtnVal = 0;
1965
1966         switch ( type ) {
1967                 case Data: {
1968                         /* Move the data into an integer array and make a concat fsm. */
1969                         Key *arr = new Key[token.length];
1970                         makeFsmKeyArray( arr, token.data, token.length, pd );
1971
1972                         /* Make the concat fsm. */
1973                         rtnVal = new FsmAp();
1974                         if ( rootRegex != 0 && rootRegex->caseInsensitive )
1975                                 rtnVal->concatFsmCI( arr, token.length );
1976                         else
1977                                 rtnVal->concatFsm( arr, token.length );
1978                         delete[] arr;
1979                         break;
1980                 }
1981                 case Dot: {
1982                         /* Make the dot fsm. */
1983                         rtnVal = dotFsm( pd );
1984                         break;
1985                 }
1986                 case OrBlock: {
1987                         /* Get the or block and minmize it. */
1988                         rtnVal = orBlock->walk( pd, rootRegex );
1989                         if ( rtnVal == 0 ) {
1990                                 rtnVal = new FsmAp();
1991                                 rtnVal->lambdaFsm();
1992                         }
1993                         rtnVal->minimizePartition2();
1994                         break;
1995                 }
1996                 case NegOrBlock: {
1997                         /* Get the or block and minimize it. */
1998                         FsmAp *fsm = orBlock->walk( pd, rootRegex );
1999                         fsm->minimizePartition2();
2000
2001                         /* Make a dot fsm and subtract from it. */
2002                         rtnVal = dotFsm( pd );
2003                         rtnVal->subtractOp( fsm );
2004                         rtnVal->minimizePartition2();
2005                         break;
2006                 }
2007         }
2008
2009         /* If the item is followed by a star, then apply the star op. */
2010         if ( star ) {
2011                 if ( rtnVal->startState->isFinState() ) {
2012                         warning(loc) << "applying kleene star to a machine that "
2013                                         "accpets zero length word" << endl;
2014                 }
2015
2016                 rtnVal->starOp();
2017                 rtnVal->minimizePartition2();
2018         }
2019         return rtnVal;
2020 }
2021
2022 /* Clean up after an or block of a regular expression. */
2023 ReOrBlock::~ReOrBlock()
2024 {
2025         switch ( type ) {
2026                 case RecurseItem:
2027                         delete orBlock;
2028                         delete item;
2029                         break;
2030                 case Empty:
2031                         break;
2032         }
2033 }
2034
2035
2036 /* Evaluate an or block of a regular expression. */
2037 FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex )
2038 {
2039         FsmAp *rtnVal = 0;
2040         switch ( type ) {
2041                 case RecurseItem: {
2042                         /* Evaluate the two fsm. */
2043                         FsmAp *fsm1 = orBlock->walk( pd, rootRegex );
2044                         FsmAp *fsm2 = item->walk( pd, rootRegex );
2045                         if ( fsm1 == 0 )
2046                                 rtnVal = fsm2;
2047                         else {
2048                                 fsm1->unionOp( fsm2 );
2049                                 rtnVal = fsm1;
2050                         }
2051                         break;
2052                 }
2053                 case Empty: {
2054                         rtnVal = 0;
2055                         break;
2056                 }
2057         }
2058         return rtnVal;;
2059 }
2060
2061 /* Evaluate an or block item of a regular expression. */
2062 FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex )
2063 {
2064         /* The return value, is the alphabet signed? */
2065         FsmAp *rtnVal = 0;
2066         switch ( type ) {
2067         case Data: {
2068                 /* Make the or machine. */
2069                 rtnVal = new FsmAp();
2070
2071                 /* Put the or data into an array of ints. Note that we find unique
2072                  * keys. Duplicates are silently ignored. The alternative would be to
2073                  * issue warning or an error but since we can't with [a0-9a] or 'a' |
2074                  * 'a' don't bother here. */
2075                 KeySet keySet;
2076                 makeFsmUniqueKeyArray( keySet, token.data, token.length,
2077                         rootRegex != 0 ? rootRegex->caseInsensitive : false, pd );
2078
2079                 /* Run the or operator. */
2080                 rtnVal->orFsm( keySet.data, keySet.length() );
2081                 break;
2082         }
2083         case Range: {
2084                 /* Make the upper and lower keys. */
2085                 Key lowKey = makeFsmKeyChar( lower, pd );
2086                 Key highKey = makeFsmKeyChar( upper, pd );
2087
2088                 /* Validate the range. */
2089                 if ( lowKey > highKey ) {
2090                         /* Recover by setting upper to lower; */
2091                         error(loc) << "lower end of range is greater then upper end" << endl;
2092                         highKey = lowKey;
2093                 }
2094
2095                 /* Make the range machine. */
2096                 rtnVal = new FsmAp();
2097                 rtnVal->rangeFsm( lowKey, highKey );
2098
2099                 if ( rootRegex != 0 && rootRegex->caseInsensitive ) {
2100                         if ( lowKey <= 'Z' && 'A' <= highKey ) {
2101                                 Key otherLow = lowKey < 'A' ? Key('A') : lowKey;
2102                                 Key otherHigh = 'Z' < highKey ? Key('Z') : highKey;
2103
2104                                 otherLow = 'a' + ( otherLow - 'A' );
2105                                 otherHigh = 'a' + ( otherHigh - 'A' );
2106
2107                                 FsmAp *otherRange = new FsmAp();
2108                                 otherRange->rangeFsm( otherLow, otherHigh );
2109                                 rtnVal->unionOp( otherRange );
2110                                 rtnVal->minimizePartition2();
2111                         }
2112                         else if ( lowKey <= 'z' && 'a' <= highKey ) {
2113                                 Key otherLow = lowKey < 'a' ? Key('a') : lowKey;
2114                                 Key otherHigh = 'z' < highKey ? Key('z') : highKey;
2115
2116                                 otherLow = 'A' + ( otherLow - 'a' );
2117                                 otherHigh = 'A' + ( otherHigh - 'a' );
2118
2119                                 FsmAp *otherRange = new FsmAp();
2120                                 otherRange->rangeFsm( otherLow, otherHigh );
2121                                 rtnVal->unionOp( otherRange );
2122                                 rtnVal->minimizePartition2();
2123                         }
2124                 }
2125
2126                 break;
2127         }}
2128         return rtnVal;
2129 }