ragel/parsetree.cpp

   1 /*
   2  *  Copyright 2001-2006 Adrian Thurston <thurston@complang.org>
   3  */
   4
   5 /*  This file is part of Ragel.
   6  *
   7  *  Ragel is free software; you can redistribute it and/or modify
   8  *  it under the terms of the GNU General Public License as published by
   9  *  the Free Software Foundation; either version 2 of the License, or
  10  *  (at your option) any later version.
  11  *
  12  *  Ragel is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  *  GNU General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU General Public License
  18  *  along with Ragel; if not, write to the Free Software
  19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  */
  21
  22 #include <iostream>
  23 #include <iomanip>
  24 #include <errno.h>
  25 #include <limits.h>
  26 #include <stdlib.h>
  27
  28 /* Parsing. */
  29 #include "ragel.h"
  30 #include "rlparse.h"
  31 #include "parsetree.h"
  32
  33 using namespace std;
  34 ostream &operator<<( ostream &out, const NameRef &nameRef );
  35 ostream &operator<<( ostream &out, const NameInst &nameInst );
  36
  37 /* Convert the literal string which comes in from the scanner into an array of
  38  * characters with escapes and options interpreted. Also null terminates the
  39  * string. Though this null termination should not be relied on for
  40  * interpreting literals in the parser because the string may contain \0 */
  41 char *prepareLitString( const InputLoc &loc, const char *data, long length,
  42                 long &resLen, bool &caseInsensitive )
  43 {
  44         char *resData = new char[length+1];
  45         caseInsensitive = false;
  46
  47         const char *src = data + 1;
  48         const char *end = data + length - 1;
  49
  50         while ( *end != '\'' && *end != '\"' ) {
  51                 if ( *end == 'i' )
  52                         caseInsensitive = true;
  53                 else {
  54                         error( loc ) << "literal string '" << *end <<
  55                                         "' option not supported" << endl;
  56                 }
  57                 end -= 1;
  58         }
  59
  60         char *dest = resData;
  61         long len = 0;
  62         while ( src != end ) {
  63                 if ( *src == '\\' ) {
  64                         switch ( src[1] ) {
  65                         case '0': dest[len++] = '\0'; break;
  66                         case 'a': dest[len++] = '\a'; break;
  67                         case 'b': dest[len++] = '\b'; break;
  68                         case 't': dest[len++] = '\t'; break;
  69                         case 'n': dest[len++] = '\n'; break;
  70                         case 'v': dest[len++] = '\v'; break;
  71                         case 'f': dest[len++] = '\f'; break;
  72                         case 'r': dest[len++] = '\r'; break;
  73                         case '\n':  break;
  74                         default: dest[len++] = src[1]; break;
  75                         }
  76                         src += 2;
  77                 }
  78                 else {
  79                         dest[len++] = *src++;
  80                 }
  81         }
  82
  83         resLen = len;
  84         resData[resLen] = 0;
  85         return resData;
  86 }
  87
  88 FsmAp *VarDef::walk( ParseData *pd )
  89 {
  90         /* We enter into a new name scope. */
  91         NameFrame nameFrame = pd->enterNameScope( true, 1 );
  92
  93         /* Recurse on the expression. */
  94         FsmAp *rtnVal = machineDef->walk( pd );
  95
  96         /* Do the tranfer of local error actions. */
  97         LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
  98         if ( localErrDictEl != 0 ) {
  99                 for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
 100                         rtnVal->transferErrorActions( state, localErrDictEl->value );
 101         }
 102
 103         /* If the expression below is a join operation with multiple expressions
 104          * then it just had epsilon transisions resolved. If it is a join
 105          * with only a single expression then run the epsilon op now. */
 106         if ( machineDef->type == MachineDef::JoinType && machineDef->join->exprList.length() == 1 )
 107                 rtnVal->epsilonOp();
 108
 109         /* We can now unset entry points that are not longer used. */
 110         pd->unsetObsoleteEntries( rtnVal );
 111
 112         /* If the name of the variable is referenced then add the entry point to
 113          * the graph. */
 114         if ( pd->curNameInst->numRefs > 0 )
 115                 rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
 116
 117         /* Pop the name scope. */
 118         pd->popNameScope( nameFrame );
 119         return rtnVal;
 120 }
 121
 122 void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd )
 123 {
 124         /* The variable definition enters a new scope. */
 125         NameInst *prevNameInst = pd->curNameInst;
 126         pd->curNameInst = pd->addNameInst( loc, name, false );
 127
 128         if ( machineDef->type == MachineDef::LongestMatchType )
 129                 pd->curNameInst->isLongestMatch = true;
 130
 131         /* Recurse. */
 132         machineDef->makeNameTree( pd );
 133
 134         /* The name scope ends, pop the name instantiation. */
 135         pd->curNameInst = prevNameInst;
 136 }
 137
 138 void VarDef::resolveNameRefs( ParseData *pd )
 139 {
 140         /* Entering into a new scope. */
 141         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 142
 143         /* Recurse. */
 144         machineDef->resolveNameRefs( pd );
 145
 146         /* The name scope ends, pop the name instantiation. */
 147         pd->popNameScope( nameFrame );
 148 }
 149
 150 InputLoc LongestMatchPart::getLoc()
 151 {
 152         return action != 0 ? action->loc : semiLoc;
 153 }
 154
 155 /*
 156  * If there are any LMs then all of the following entry points must reset
 157  * tokstart:
 158  *
 159  *  1. fentry(StateRef)
 160  *  2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
 161  *  3. targt of any transition that has an fcall (the return loc).
 162  *  4. start state of all longest match routines.
 163  */
 164
 165 Action *LongestMatch::newAction( ParseData *pd, const InputLoc &loc,
 166                 const char *name, InlineList *inlineList )
 167 {
 168         Action *action = new Action( loc, name, inlineList, pd->nextCondId++ );
 169         action->actionRefs.append( pd->curNameInst );
 170         pd->actionList.append( action );
 171         action->isLmAction = true;
 172         return action;
 173 }
 174
 175 void LongestMatch::makeActions( ParseData *pd )
 176 {
 177         /* Make actions that set the action id. */
 178         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 179                 /* For each part create actions for setting the match type.  We need
 180                  * to do this so that the actions will go into the actionIndex. */
 181                 InlineList *inlineList = new InlineList;
 182                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
 183                                 InlineItem::LmSetActId ) );
 184                 char *actName = new char[50];
 185                 sprintf( actName, "store%i", lmi->longestMatchId );
 186                 lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
 187         }
 188
 189         /* Make actions that execute the user action and restart on the last
 190          * character. */
 191         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 192                 /* For each part create actions for setting the match type.  We need
 193                  * to do this so that the actions will go into the actionIndex. */
 194                 InlineList *inlineList = new InlineList;
 195                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
 196                                 InlineItem::LmOnLast ) );
 197                 char *actName = new char[50];
 198                 sprintf( actName, "last%i", lmi->longestMatchId );
 199                 lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
 200         }
 201
 202         /* Make actions that execute the user action and restart on the next
 203          * character.  These actions will set tokend themselves (it is the current
 204          * char). */
 205         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 206                 /* For each part create actions for setting the match type.  We need
 207                  * to do this so that the actions will go into the actionIndex. */
 208                 InlineList *inlineList = new InlineList;
 209                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
 210                                 InlineItem::LmOnNext ) );
 211                 char *actName = new char[50];
 212                 sprintf( actName, "next%i", lmi->longestMatchId );
 213                 lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
 214         }
 215
 216         /* Make actions that execute the user action and restart at tokend. These
 217          * actions execute some time after matching the last char. */
 218         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 219                 /* For each part create actions for setting the match type.  We need
 220                  * to do this so that the actions will go into the actionIndex. */
 221                 InlineList *inlineList = new InlineList;
 222                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
 223                                 InlineItem::LmOnLagBehind ) );
 224                 char *actName = new char[50];
 225                 sprintf( actName, "lag%i", lmi->longestMatchId );
 226                 lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList );
 227         }
 228
 229         InputLoc loc;
 230         loc.line = 1;
 231         loc.col = 1;
 232
 233         /* Create the error action. */
 234         InlineList *il6 = new InlineList;
 235         il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) );
 236         lmActSelect = newAction( pd, loc, "switch", il6 );
 237 }
 238
 239 void LongestMatch::findName( ParseData *pd )
 240 {
 241         NameInst *nameInst = pd->curNameInst;
 242         while ( nameInst->name == 0 ) {
 243                 nameInst = nameInst->parent;
 244                 /* Since every machine must must have a name, we should always find a
 245                  * name for the longest match. */
 246                 assert( nameInst != 0 );
 247         }
 248         name = nameInst->name;
 249 }
 250
 251 void LongestMatch::makeNameTree( ParseData *pd )
 252 {
 253         /* Create an anonymous scope for the longest match. Will be used for
 254          * restarting machine after matching a token. */
 255         NameInst *prevNameInst = pd->curNameInst;
 256         pd->curNameInst = pd->addNameInst( loc, 0, false );
 257
 258         /* Recurse into all parts of the longest match operator. */
 259         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ )
 260                 lmi->join->makeNameTree( pd );
 261
 262         /* Traverse the name tree upwards to find a name for this lm. */
 263         findName( pd );
 264
 265         /* Also make the longest match's actions at this point. */
 266         makeActions( pd );
 267
 268         /* The name scope ends, pop the name instantiation. */
 269         pd->curNameInst = prevNameInst;
 270 }
 271
 272 void LongestMatch::resolveNameRefs( ParseData *pd )
 273 {
 274         /* The longest match gets its own name scope. */
 275         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 276
 277         /* Take an action reference for each longest match item and recurse. */
 278         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 279                 /* Record the reference if the item has an action. */
 280                 if ( lmi->action != 0 )
 281                         lmi->action->actionRefs.append( pd->localNameScope );
 282
 283                 /* Recurse down the join. */
 284                 lmi->join->resolveNameRefs( pd );
 285         }
 286
 287         /* The name scope ends, pop the name instantiation. */
 288         pd->popNameScope( nameFrame );
 289 }
 290
 291 void LongestMatch::restart( FsmAp *graph, TransAp *trans )
 292 {
 293         StateAp *fromState = trans->fromState;
 294         graph->detachTrans( fromState, trans->toState, trans );
 295         graph->attachTrans( fromState, graph->startState, trans );
 296 }
 297
 298 void LongestMatch::runLongestMatch( ParseData *pd, FsmAp *graph )
 299 {
 300         graph->markReachableFromHereStopFinal( graph->startState );
 301         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 302                 if ( ms->stateBits & STB_ISMARKED ) {
 303                         ms->lmItemSet.insert( 0 );
 304                         ms->stateBits &= ~ STB_ISMARKED;
 305                 }
 306         }
 307
 308         /* Transfer the first item of non-empty lmAction tables to the item sets
 309          * of the states that follow. Exclude states that have no transitions out.
 310          * This must happen on a separate pass so that on each iteration of the
 311          * next pass we have the item set entries from all lmAction tables. */
 312         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
 313                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
 314                         if ( trans->lmActionTable.length() > 0 ) {
 315                                 LmActionTableEl *lmAct = trans->lmActionTable.data;
 316                                 StateAp *toState = trans->toState;
 317                                 assert( toState );
 318
 319                                 /* Can only optimize this if there are no transitions out.
 320                                  * Note there can be out transitions going nowhere with
 321                                  * actions and they too must inhibit this optimization. */
 322                                 if ( toState->outList.length() > 0 ) {
 323                                         /* Fill the item sets. */
 324                                         graph->markReachableFromHereStopFinal( toState );
 325                                         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 326                                                 if ( ms->stateBits & STB_ISMARKED ) {
 327                                                         ms->lmItemSet.insert( lmAct->value );
 328                                                         ms->stateBits &= ~ STB_ISMARKED;
 329                                                 }
 330                                         }
 331                                 }
 332                         }
 333                 }
 334         }
 335
 336         /* The lmItem sets are now filled, telling us which longest match rules
 337          * can succeed in which states. First determine if we need to make sure
 338          * act is defaulted to zero. We need to do this if there are any states
 339          * with lmItemSet.length() > 1 and NULL is included. That is, that the
 340          * switch may get called when in fact nothing has been matched. */
 341         int maxItemSetLength = 0;
 342         graph->markReachableFromHereStopFinal( graph->startState );
 343         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 344                 if ( ms->stateBits & STB_ISMARKED ) {
 345                         if ( ms->lmItemSet.length() > maxItemSetLength )
 346                                 maxItemSetLength = ms->lmItemSet.length();
 347                         ms->stateBits &= ~ STB_ISMARKED;
 348                 }
 349         }
 350
 351         /* The actions executed on starting to match a token. */
 352         graph->isolateStartState();
 353         graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart );
 354         graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
 355         if ( maxItemSetLength > 1 ) {
 356                 /* The longest match action switch may be called when tokens are
 357                  * matched, in which case act must be initialized, there must be a
 358                  * case to handle the error, and the generated machine will require an
 359                  * error state. */
 360                 lmSwitchHandlesError = true;
 361                 pd->lmRequiresErrorState = true;
 362                 graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
 363         }
 364
 365         /* The place to store transitions to restart. It maybe possible for the
 366          * restarting to affect the searching through the graph that follows. For
 367          * now take the safe route and save the list of transitions to restart
 368          * until after all searching is done. */
 369         Vector<TransAp*> restartTrans;
 370
 371         /* Set actions that do immediate token recognition, set the longest match part
 372          * id and set the token ending. */
 373         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
 374                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
 375                         if ( trans->lmActionTable.length() > 0 ) {
 376                                 LmActionTableEl *lmAct = trans->lmActionTable.data;
 377                                 StateAp *toState = trans->toState;
 378                                 assert( toState );
 379
 380                                 /* Can only optimize this if there are no transitions out.
 381                                  * Note there can be out transitions going nowhere with
 382                                  * actions and they too must inhibit this optimization. */
 383                                 if ( toState->outList.length() == 0 ) {
 384                                         /* Can execute the immediate action for the longest match
 385                                          * part. Redirect the action to the start state.
 386                                          *
 387                                          * NOTE: When we need to inhibit on_last due to leaving
 388                                          * actions the above test suffices. If the state has out
 389                                          * actions then it will fail because the out action will
 390                                          * have been transferred to an error transition, which
 391                                          * makes the outlist non-empty. */
 392                                         trans->actionTable.setAction( lmAct->key,
 393                                                         lmAct->value->actOnLast );
 394                                         restartTrans.append( trans );
 395                                 }
 396                                 else {
 397                                         /* Look for non final states that have a non-empty item
 398                                          * set. If these are present then we need to record the
 399                                          * end of the token.  Also Find the highest item set
 400                                          * length reachable from here (excluding at transtions to
 401                                          * final states). */
 402                                         bool nonFinalNonEmptyItemSet = false;
 403                                         maxItemSetLength = 0;
 404                                         graph->markReachableFromHereStopFinal( toState );
 405                                         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 406                                                 if ( ms->stateBits & STB_ISMARKED ) {
 407                                                         if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
 408                                                                 nonFinalNonEmptyItemSet = true;
 409                                                         if ( ms->lmItemSet.length() > maxItemSetLength )
 410                                                                 maxItemSetLength = ms->lmItemSet.length();
 411                                                         ms->stateBits &= ~ STB_ISMARKED;
 412                                                 }
 413                                         }
 414
 415                                         /* If there are reachable states that are not final and
 416                                          * have non empty item sets or that have an item set
 417                                          * length greater than one then we need to set tokend
 418                                          * because the error action that matches the token will
 419                                          * require it. */
 420                                         if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
 421                                                 trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
 422
 423                                         /* Some states may not know which longest match item to
 424                                          * execute, must set it. */
 425                                         if ( maxItemSetLength > 1 ) {
 426                                                 /* There are transitions out, another match may come. */
 427                                                 trans->actionTable.setAction( lmAct->key,
 428                                                                 lmAct->value->setActId );
 429                                         }
 430                                 }
 431                         }
 432                 }
 433         }
 434
 435         /* Now that all graph searching is done it certainly safe set the
 436          * restarting. It may be safe above, however this must be verified. */
 437         for ( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ )
 438                 restart( graph, *pt );
 439
 440         int lmErrActionOrd = pd->curActionOrd++;
 441
 442         /* Embed the error for recognizing a char. */
 443         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
 444                 if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
 445                         if ( st->isFinState() ) {
 446                                 /* On error execute the onActNext action, which knows that
 447                                  * the last character of the token was one back and restart. */
 448                                 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
 449                                                 &st->lmItemSet[0]->actOnNext, 1 );
 450                                 st->eofActionTable.setAction( lmErrActionOrd,
 451                                                 st->lmItemSet[0]->actOnNext );
 452                                 st->eofTarget = graph->startState;
 453                         }
 454                         else {
 455                                 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
 456                                                 &st->lmItemSet[0]->actLagBehind, 1 );
 457                                 st->eofActionTable.setAction( lmErrActionOrd,
 458                                                 st->lmItemSet[0]->actLagBehind );
 459                                 st->eofTarget = graph->startState;
 460                         }
 461                 }
 462                 else if ( st->lmItemSet.length() > 1 ) {
 463                         /* Need to use the select. Take note of which items the select
 464                          * is needed for so only the necessary actions are included. */
 465                         for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
 466                                 if ( *plmi != 0 )
 467                                         (*plmi)->inLmSelect = true;
 468                         }
 469                         /* On error, execute the action select and go to the start state. */
 470                         graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
 471                                         &lmActSelect, 1 );
 472                         st->eofActionTable.setAction( lmErrActionOrd, lmActSelect );
 473                         st->eofTarget = graph->startState;
 474                 }
 475         }
 476
 477         /* Finally, the start state should be made final. */
 478         graph->setFinState( graph->startState );
 479 }
 480
 481 void LongestMatch::transferScannerLeavingActions( FsmAp *graph )
 482 {
 483         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
 484                 if ( st->outActionTable.length() > 0 )
 485                         graph->setErrorActions( st, st->outActionTable );
 486         }
 487 }
 488
 489 FsmAp *LongestMatch::walk( ParseData *pd )
 490 {
 491         /* The longest match has it's own name scope. */
 492         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 493
 494         /* Make each part of the longest match. */
 495         FsmAp **parts = new FsmAp*[longestMatchList->length()];
 496         LmPartList::Iter lmi = *longestMatchList;
 497         for ( int i = 0; lmi.lte(); lmi++, i++ ) {
 498                 /* Create the machine and embed the setting of the longest match id. */
 499                 parts[i] = lmi->join->walk( pd );
 500                 parts[i]->longMatchAction( pd->curActionOrd++, lmi );
 501         }
 502
 503         /* Before we union the patterns we need to deal with leaving actions. They
 504          * are transfered to error transitions out of the final states (like local
 505          * error actions) and to eof actions. In the scanner we need to forbid
 506          * on_last for any final state that has an leaving action. */
 507         for ( int i = 0; i < longestMatchList->length(); i++ )
 508                 transferScannerLeavingActions( parts[i] );
 509
 510         /* Union machines one and up with machine zero. The grammar dictates that
 511          * there will always be at least one part. */
 512         FsmAp *rtnVal = parts[0];
 513         for ( int i = 1; i < longestMatchList->length(); i++ ) {
 514                 rtnVal->unionOp( parts[i] );
 515                 afterOpMinimize( rtnVal );
 516         }
 517
 518         runLongestMatch( pd, rtnVal );
 519
 520         /* Pop the name scope. */
 521         pd->popNameScope( nameFrame );
 522
 523         delete[] parts;
 524         return rtnVal;
 525 }
 526
 527 FsmAp *MachineDef::walk( ParseData *pd )
 528 {
 529         FsmAp *rtnVal = 0;
 530         switch ( type ) {
 531         case JoinType:
 532                 rtnVal = join->walk( pd );
 533                 break;
 534         case LongestMatchType:
 535                 rtnVal = longestMatch->walk( pd );
 536                 break;
 537         case LengthDefType:
 538                 condData->lastCondKey.increment();
 539                 rtnVal = new FsmAp();
 540                 rtnVal->concatFsm( condData->lastCondKey );
 541                 break;
 542         }
 543         return rtnVal;
 544 }
 545
 546 void MachineDef::makeNameTree( ParseData *pd )
 547 {
 548         switch ( type ) {
 549         case JoinType:
 550                 join->makeNameTree( pd );
 551                 break;
 552         case LongestMatchType:
 553                 longestMatch->makeNameTree( pd );
 554                 break;
 555         case LengthDefType:
 556                 break;
 557         }
 558 }
 559
 560 void MachineDef::resolveNameRefs( ParseData *pd )
 561 {
 562         switch ( type ) {
 563         case JoinType:
 564                 join->resolveNameRefs( pd );
 565                 break;
 566         case LongestMatchType:
 567                 longestMatch->resolveNameRefs( pd );
 568                 break;
 569         case LengthDefType:
 570                 break;
 571         }
 572 }
 573
 574
 575 /* Construct with a location and the first expression. */
 576 Join::Join( const InputLoc &loc, Expression *expr )
 577 :
 578         loc(loc)
 579 {
 580         exprList.append( expr );
 581 }
 582
 583 /* Construct with a location and the first expression. */
 584 Join::Join( Expression *expr )
 585 :
 586         loc(loc)
 587 {
 588         exprList.append( expr );
 589 }
 590
 591 /* Walk an expression node. */
 592 FsmAp *Join::walk( ParseData *pd )
 593 {
 594         if ( exprList.length() > 1 )
 595                 return walkJoin( pd );
 596         else
 597                 return exprList.head->walk( pd );
 598 }
 599
 600 /* There is a list of expressions to join. */
 601 FsmAp *Join::walkJoin( ParseData *pd )
 602 {
 603         /* We enter into a new name scope. */
 604         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 605
 606         /* Evaluate the machines. */
 607         FsmAp **fsms = new FsmAp*[exprList.length()];
 608         ExprList::Iter expr = exprList;
 609         for ( int e = 0; e < exprList.length(); e++, expr++ )
 610                 fsms[e] = expr->walk( pd );
 611
 612         /* Get the start and final names. Final is
 613          * guaranteed to exist, start is not. */
 614         NameInst *startName = pd->curNameInst->start;
 615         NameInst *finalName = pd->curNameInst->final;
 616
 617         int startId = -1;
 618         if ( startName != 0 ) {
 619                 /* Take note that there was an implicit link to the start machine. */
 620                 pd->localNameScope->referencedNames.append( startName );
 621                 startId = startName->id;
 622         }
 623
 624         /* A final id of -1 indicates there is no epsilon that references the
 625          * final state, therefor do not create one or set an entry point to it. */
 626         int finalId = -1;
 627         if ( finalName->numRefs > 0 )
 628                 finalId = finalName->id;
 629
 630         /* Join machines 1 and up onto machine 0. */
 631         FsmAp *retFsm = fsms[0];
 632         retFsm->joinOp( startId, finalId, fsms+1, exprList.length()-1 );
 633
 634         /* We can now unset entry points that are not longer used. */
 635         pd->unsetObsoleteEntries( retFsm );
 636
 637         /* Pop the name scope. */
 638         pd->popNameScope( nameFrame );
 639
 640         delete[] fsms;
 641         return retFsm;
 642 }
 643
 644 void Join::makeNameTree( ParseData *pd )
 645 {
 646         if ( exprList.length() > 1 ) {
 647                 /* Create the new anonymous scope. */
 648                 NameInst *prevNameInst = pd->curNameInst;
 649                 pd->curNameInst = pd->addNameInst( loc, 0, false );
 650
 651                 /* Join scopes need an implicit "final" target. */
 652                 pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final",
 653                                 pd->nextNameId++, false );
 654
 655                 /* Recurse into all expressions in the list. */
 656                 for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
 657                         expr->makeNameTree( pd );
 658
 659                 /* The name scope ends, pop the name instantiation. */
 660                 pd->curNameInst = prevNameInst;
 661         }
 662         else {
 663                 /* Recurse into the single expression. */
 664                 exprList.head->makeNameTree( pd );
 665         }
 666 }
 667
 668
 669 void Join::resolveNameRefs( ParseData *pd )
 670 {
 671         /* Branch on whether or not there is to be a join. */
 672         if ( exprList.length() > 1 ) {
 673                 /* The variable definition enters a new scope. */
 674                 NameFrame nameFrame = pd->enterNameScope( true, 1 );
 675
 676                 /* The join scope must contain a start label. */
 677                 NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true );
 678                 if ( resolved.length() > 0 ) {
 679                         /* Take the first. */
 680                         pd->curNameInst->start = resolved[0];
 681                         if ( resolved.length() > 1 ) {
 682                                 /* Complain about the multiple references. */
 683                                 error(loc) << "join operation has multiple start labels" << endl;
 684                                 errorStateLabels( resolved );
 685                         }
 686                 }
 687
 688                 /* Make sure there is a start label. */
 689                 if ( pd->curNameInst->start != 0 ) {
 690                         /* There is an implicit reference to start name. */
 691                         pd->curNameInst->start->numRefs += 1;
 692                 }
 693                 else {
 694                         /* No start label. */
 695                         error(loc) << "join operation has no start label" << endl;
 696                 }
 697
 698                 /* Recurse into all expressions in the list. */
 699                 for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
 700                         expr->resolveNameRefs( pd );
 701
 702                 /* The name scope ends, pop the name instantiation. */
 703                 pd->popNameScope( nameFrame );
 704         }
 705         else {
 706                 /* Recurse into the single expression. */
 707                 exprList.head->resolveNameRefs( pd );
 708         }
 709 }
 710
 711 /* Clean up after an expression node. */
 712 Expression::~Expression()
 713 {
 714         switch ( type ) {
 715                 case OrType: case IntersectType: case SubtractType:
 716                 case StrongSubtractType:
 717                         delete expression;
 718                         delete term;
 719                         break;
 720                 case TermType:
 721                         delete term;
 722                         break;
 723                 case BuiltinType:
 724                         break;
 725         }
 726 }
 727
 728 /* Evaluate a single expression node. */
 729 FsmAp *Expression::walk( ParseData *pd, bool lastInSeq )
 730 {
 731         FsmAp *rtnVal = 0;
 732         switch ( type ) {
 733                 case OrType: {
 734                         /* Evaluate the expression. */
 735                         rtnVal = expression->walk( pd, false );
 736                         /* Evaluate the term. */
 737                         FsmAp *rhs = term->walk( pd );
 738                         /* Perform union. */
 739                         rtnVal->unionOp( rhs );
 740                         afterOpMinimize( rtnVal, lastInSeq );
 741                         break;
 742                 }
 743                 case IntersectType: {
 744                         /* Evaluate the expression. */
 745                         rtnVal = expression->walk( pd );
 746                         /* Evaluate the term. */
 747                         FsmAp *rhs = term->walk( pd );
 748                         /* Perform intersection. */
 749                         rtnVal->intersectOp( rhs );
 750                         afterOpMinimize( rtnVal, lastInSeq );
 751                         break;
 752                 }
 753                 case SubtractType: {
 754                         /* Evaluate the expression. */
 755                         rtnVal = expression->walk( pd );
 756                         /* Evaluate the term. */
 757                         FsmAp *rhs = term->walk( pd );
 758                         /* Perform subtraction. */
 759                         rtnVal->subtractOp( rhs );
 760                         afterOpMinimize( rtnVal, lastInSeq );
 761                         break;
 762                 }
 763                 case StrongSubtractType: {
 764                         /* Evaluate the expression. */
 765                         rtnVal = expression->walk( pd );
 766
 767                         /* Evaluate the term and pad it with any* machines. */
 768                         FsmAp *rhs = dotStarFsm( pd );
 769                         FsmAp *termFsm = term->walk( pd );
 770                         FsmAp *trailAnyStar = dotStarFsm( pd );
 771                         rhs->concatOp( termFsm );
 772                         rhs->concatOp( trailAnyStar );
 773
 774                         /* Perform subtraction. */
 775                         rtnVal->subtractOp( rhs );
 776                         afterOpMinimize( rtnVal, lastInSeq );
 777                         break;
 778                 }
 779                 case TermType: {
 780                         /* Return result of the term. */
 781                         rtnVal = term->walk( pd );
 782                         break;
 783                 }
 784                 case BuiltinType: {
 785                         /* Duplicate the builtin. */
 786                         rtnVal = makeBuiltin( builtin, pd );
 787                         break;
 788                 }
 789         }
 790
 791         return rtnVal;
 792 }
 793
 794 void Expression::makeNameTree( ParseData *pd )
 795 {
 796         switch ( type ) {
 797         case OrType:
 798         case IntersectType:
 799         case SubtractType:
 800         case StrongSubtractType:
 801                 expression->makeNameTree( pd );
 802                 term->makeNameTree( pd );
 803                 break;
 804         case TermType:
 805                 term->makeNameTree( pd );
 806                 break;
 807         case BuiltinType:
 808                 break;
 809         }
 810 }
 811
 812 void Expression::resolveNameRefs( ParseData *pd )
 813 {
 814         switch ( type ) {
 815         case OrType:
 816         case IntersectType:
 817         case SubtractType:
 818         case StrongSubtractType:
 819                 expression->resolveNameRefs( pd );
 820                 term->resolveNameRefs( pd );
 821                 break;
 822         case TermType:
 823                 term->resolveNameRefs( pd );
 824                 break;
 825         case BuiltinType:
 826                 break;
 827         }
 828 }
 829
 830 /* Clean up after a term node. */
 831 Term::~Term()
 832 {
 833         switch ( type ) {
 834                 case ConcatType:
 835                 case RightStartType:
 836                 case RightFinishType:
 837                 case LeftType:
 838                         delete term;
 839                         delete factorWithAug;
 840                         break;
 841                 case FactorWithAugType:
 842                         delete factorWithAug;
 843                         break;
 844         }
 845 }
 846
 847 /* Evaluate a term node. */
 848 FsmAp *Term::walk( ParseData *pd, bool lastInSeq )
 849 {
 850         FsmAp *rtnVal = 0;
 851         switch ( type ) {
 852                 case ConcatType: {
 853                         /* Evaluate the Term. */
 854                         rtnVal = term->walk( pd, false );
 855                         /* Evaluate the FactorWithRep. */
 856                         FsmAp *rhs = factorWithAug->walk( pd );
 857                         /* Perform concatenation. */
 858                         rtnVal->concatOp( rhs );
 859                         afterOpMinimize( rtnVal, lastInSeq );
 860                         break;
 861                 }
 862                 case RightStartType: {
 863                         /* Evaluate the Term. */
 864                         rtnVal = term->walk( pd );
 865
 866                         /* Evaluate the FactorWithRep. */
 867                         FsmAp *rhs = factorWithAug->walk( pd );
 868
 869                         /* Set up the priority descriptors. The left machine gets the
 870                          * lower priority where as the right get the higher start priority. */
 871                         priorDescs[0].key = pd->nextPriorKey++;
 872                         priorDescs[0].priority = 0;
 873                         rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
 874
 875                         /* The start transitions of the right machine gets the higher
 876                          * priority. Use the same unique key. */
 877                         priorDescs[1].key = priorDescs[0].key;
 878                         priorDescs[1].priority = 1;
 879                         rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
 880
 881                         /* Perform concatenation. */
 882                         rtnVal->concatOp( rhs );
 883                         afterOpMinimize( rtnVal, lastInSeq );
 884                         break;
 885                 }
 886                 case RightFinishType: {
 887                         /* Evaluate the Term. */
 888                         rtnVal = term->walk( pd );
 889
 890                         /* Evaluate the FactorWithRep. */
 891                         FsmAp *rhs = factorWithAug->walk( pd );
 892
 893                         /* Set up the priority descriptors. The left machine gets the
 894                          * lower priority where as the finishing transitions to the right
 895                          * get the higher priority. */
 896                         priorDescs[0].key = pd->nextPriorKey++;
 897                         priorDescs[0].priority = 0;
 898                         rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
 899
 900                         /* The finishing transitions of the right machine get the higher
 901                          * priority. Use the same unique key. */
 902                         priorDescs[1].key = priorDescs[0].key;
 903                         priorDescs[1].priority = 1;
 904                         rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
 905
 906                         /* If the right machine's start state is final we need to guard
 907                          * against the left machine persisting by moving through the empty
 908                          * string. */
 909                         if ( rhs->startState->isFinState() ) {
 910                                 rhs->startState->outPriorTable.setPrior(
 911                                                 pd->curPriorOrd++, &priorDescs[1] );
 912                         }
 913
 914                         /* Perform concatenation. */
 915                         rtnVal->concatOp( rhs );
 916                         afterOpMinimize( rtnVal, lastInSeq );
 917                         break;
 918                 }
 919                 case LeftType: {
 920                         /* Evaluate the Term. */
 921                         rtnVal = term->walk( pd );
 922
 923                         /* Evaluate the FactorWithRep. */
 924                         FsmAp *rhs = factorWithAug->walk( pd );
 925
 926                         /* Set up the priority descriptors. The left machine gets the
 927                          * higher priority. */
 928                         priorDescs[0].key = pd->nextPriorKey++;
 929                         priorDescs[0].priority = 1;
 930                         rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
 931
 932                         /* The right machine gets the lower priority. We cannot use
 933                          * allTransPrior here in case the start state of the right machine
 934                          * is final. It would allow the right machine thread to run along
 935                          * with the left if just passing through the start state. Using
 936                          * startFsmPrior prevents this. */
 937                         priorDescs[1].key = priorDescs[0].key;
 938                         priorDescs[1].priority = 0;
 939                         rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
 940
 941                         /* Perform concatenation. */
 942                         rtnVal->concatOp( rhs );
 943                         afterOpMinimize( rtnVal, lastInSeq );
 944                         break;
 945                 }
 946                 case FactorWithAugType: {
 947                         rtnVal = factorWithAug->walk( pd );
 948                         break;
 949                 }
 950         }
 951         return rtnVal;
 952 }
 953
 954 void Term::makeNameTree( ParseData *pd )
 955 {
 956         switch ( type ) {
 957         case ConcatType:
 958         case RightStartType:
 959         case RightFinishType:
 960         case LeftType:
 961                 term->makeNameTree( pd );
 962                 factorWithAug->makeNameTree( pd );
 963                 break;
 964         case FactorWithAugType:
 965                 factorWithAug->makeNameTree( pd );
 966                 break;
 967         }
 968 }
 969
 970 void Term::resolveNameRefs( ParseData *pd )
 971 {
 972         switch ( type ) {
 973         case ConcatType:
 974         case RightStartType:
 975         case RightFinishType:
 976         case LeftType:
 977                 term->resolveNameRefs( pd );
 978                 factorWithAug->resolveNameRefs( pd );
 979                 break;
 980         case FactorWithAugType:
 981                 factorWithAug->resolveNameRefs( pd );
 982                 break;
 983         }
 984 }
 985
 986 /* Clean up after a factor with augmentation node. */
 987 FactorWithAug::~FactorWithAug()
 988 {
 989         delete factorWithRep;
 990
 991         /* Walk the vector of parser actions, deleting function names. */
 992
 993         /* Clean up priority descriptors. */
 994         if ( priorDescs != 0 )
 995                 delete[] priorDescs;
 996 }
 997
 998 void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd )
 999 {
1000         /* Assign actions. */
1001         for ( int i = 0; i < actions.length(); i++ )  {
1002                 switch ( actions[i].type ) {
1003                 /* Transition actions. */
1004                 case at_start:
1005                         graph->startFsmAction( actionOrd[i], actions[i].action );
1006                         afterOpMinimize( graph );
1007                         break;
1008                 case at_all:
1009                         graph->allTransAction( actionOrd[i], actions[i].action );
1010                         break;
1011                 case at_finish:
1012                         graph->finishFsmAction( actionOrd[i], actions[i].action );
1013                         break;
1014                 case at_leave:
1015                         graph->leaveFsmAction( actionOrd[i], actions[i].action );
1016                         break;
1017
1018                 /* Global error actions. */
1019                 case at_start_gbl_error:
1020                         graph->startErrorAction( actionOrd[i], actions[i].action, 0 );
1021                         afterOpMinimize( graph );
1022                         break;
1023                 case at_all_gbl_error:
1024                         graph->allErrorAction( actionOrd[i], actions[i].action, 0 );
1025                         break;
1026                 case at_final_gbl_error:
1027                         graph->finalErrorAction( actionOrd[i], actions[i].action, 0 );
1028                         break;
1029                 case at_not_start_gbl_error:
1030                         graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 );
1031                         break;
1032                 case at_not_final_gbl_error:
1033                         graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 );
1034                         break;
1035                 case at_middle_gbl_error:
1036                         graph->middleErrorAction( actionOrd[i], actions[i].action, 0 );
1037                         break;
1038
1039                 /* Local error actions. */
1040                 case at_start_local_error:
1041                         graph->startErrorAction( actionOrd[i], actions[i].action,
1042                                         actions[i].localErrKey );
1043                         afterOpMinimize( graph );
1044                         break;
1045                 case at_all_local_error:
1046                         graph->allErrorAction( actionOrd[i], actions[i].action,
1047                                         actions[i].localErrKey );
1048                         break;
1049                 case at_final_local_error:
1050                         graph->finalErrorAction( actionOrd[i], actions[i].action,
1051                                         actions[i].localErrKey );
1052                         break;
1053                 case at_not_start_local_error:
1054                         graph->notStartErrorAction( actionOrd[i], actions[i].action,
1055                                         actions[i].localErrKey );
1056                         break;
1057                 case at_not_final_local_error:
1058                         graph->notFinalErrorAction( actionOrd[i], actions[i].action,
1059                                         actions[i].localErrKey );
1060                         break;
1061                 case at_middle_local_error:
1062                         graph->middleErrorAction( actionOrd[i], actions[i].action,
1063                                         actions[i].localErrKey );
1064                         break;
1065
1066                 /* EOF actions. */
1067                 case at_start_eof:
1068                         graph->startEOFAction( actionOrd[i], actions[i].action );
1069                         afterOpMinimize( graph );
1070                         break;
1071                 case at_all_eof:
1072                         graph->allEOFAction( actionOrd[i], actions[i].action );
1073                         break;
1074                 case at_final_eof:
1075                         graph->finalEOFAction( actionOrd[i], actions[i].action );
1076                         break;
1077                 case at_not_start_eof:
1078                         graph->notStartEOFAction( actionOrd[i], actions[i].action );
1079                         break;
1080                 case at_not_final_eof:
1081                         graph->notFinalEOFAction( actionOrd[i], actions[i].action );
1082                         break;
1083                 case at_middle_eof:
1084                         graph->middleEOFAction( actionOrd[i], actions[i].action );
1085                         break;
1086
1087                 /* To State Actions. */
1088                 case at_start_to_state:
1089                         graph->startToStateAction( actionOrd[i], actions[i].action );
1090                         afterOpMinimize( graph );
1091                         break;
1092                 case at_all_to_state:
1093                         graph->allToStateAction( actionOrd[i], actions[i].action );
1094                         break;
1095                 case at_final_to_state:
1096                         graph->finalToStateAction( actionOrd[i], actions[i].action );
1097                         break;
1098                 case at_not_start_to_state:
1099                         graph->notStartToStateAction( actionOrd[i], actions[i].action );
1100                         break;
1101                 case at_not_final_to_state:
1102                         graph->notFinalToStateAction( actionOrd[i], actions[i].action );
1103                         break;
1104                 case at_middle_to_state:
1105                         graph->middleToStateAction( actionOrd[i], actions[i].action );
1106                         break;
1107
1108                 /* From State Actions. */
1109                 case at_start_from_state:
1110                         graph->startFromStateAction( actionOrd[i], actions[i].action );
1111                         afterOpMinimize( graph );
1112                         break;
1113                 case at_all_from_state:
1114                         graph->allFromStateAction( actionOrd[i], actions[i].action );
1115                         break;
1116                 case at_final_from_state:
1117                         graph->finalFromStateAction( actionOrd[i], actions[i].action );
1118                         break;
1119                 case at_not_start_from_state:
1120                         graph->notStartFromStateAction( actionOrd[i], actions[i].action );
1121                         break;
1122                 case at_not_final_from_state:
1123                         graph->notFinalFromStateAction( actionOrd[i], actions[i].action );
1124                         break;
1125                 case at_middle_from_state:
1126                         graph->middleFromStateAction( actionOrd[i], actions[i].action );
1127                         break;
1128
1129                 /* Remaining cases, prevented by the parser. */
1130                 default:
1131                         assert( false );
1132                         break;
1133                 }
1134         }
1135 }
1136
1137 void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd )
1138 {
1139         /* Assign priorities. */
1140         for ( int i = 0; i < priorityAugs.length(); i++ ) {
1141                 switch ( priorityAugs[i].type ) {
1142                 case at_start:
1143                         graph->startFsmPrior( priorOrd[i], &priorDescs[i]);
1144                         /* Start fsm priorities are a special case that may require
1145                          * minimization afterwards. */
1146                         afterOpMinimize( graph );
1147                         break;
1148                 case at_all:
1149                         graph->allTransPrior( priorOrd[i], &priorDescs[i] );
1150                         break;
1151                 case at_finish:
1152                         graph->finishFsmPrior( priorOrd[i], &priorDescs[i] );
1153                         break;
1154                 case at_leave:
1155                         graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] );
1156                         break;
1157
1158                 default:
1159                         /* Parser Prevents this case. */
1160                         break;
1161                 }
1162         }
1163 }
1164
1165 void FactorWithAug::assignConditions( FsmAp *graph )
1166 {
1167         for ( int i = 0; i < conditions.length(); i++ )  {
1168                 switch ( conditions[i].type ) {
1169                 /* Transition actions. */
1170                 case at_start:
1171                         graph->startFsmCondition( conditions[i].action, conditions[i].sense );
1172                         afterOpMinimize( graph );
1173                         break;
1174                 case at_all:
1175                         graph->allTransCondition( conditions[i].action, conditions[i].sense );
1176                         break;
1177                 case at_leave:
1178                         graph->leaveFsmCondition( conditions[i].action, conditions[i].sense );
1179                         break;
1180                 default:
1181                         break;
1182                 }
1183         }
1184 }
1185
1186
1187 /* Evaluate a factor with augmentation node. */
1188 FsmAp *FactorWithAug::walk( ParseData *pd )
1189 {
1190         /* Enter into the scopes created for the labels. */
1191         NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1192
1193         /* Make the array of function orderings. */
1194         int *actionOrd = 0;
1195         if ( actions.length() > 0 )
1196                 actionOrd = new int[actions.length()];
1197
1198         /* First walk the list of actions, assigning order to all starting
1199          * actions. */
1200         for ( int i = 0; i < actions.length(); i++ ) {
1201                 if ( actions[i].type == at_start ||
1202                                 actions[i].type == at_start_gbl_error ||
1203                                 actions[i].type == at_start_local_error ||
1204                                 actions[i].type == at_start_to_state ||
1205                                 actions[i].type == at_start_from_state ||
1206                                 actions[i].type == at_start_eof )
1207                         actionOrd[i] = pd->curActionOrd++;
1208         }
1209
1210         /* Evaluate the factor with repetition. */
1211         FsmAp *rtnVal = factorWithRep->walk( pd );
1212
1213         /* Compute the remaining action orderings. */
1214         for ( int i = 0; i < actions.length(); i++ ) {
1215                 if ( actions[i].type != at_start &&
1216                                 actions[i].type != at_start_gbl_error &&
1217                                 actions[i].type != at_start_local_error &&
1218                                 actions[i].type != at_start_to_state &&
1219                                 actions[i].type != at_start_from_state &&
1220                                 actions[i].type != at_start_eof )
1221                         actionOrd[i] = pd->curActionOrd++;
1222         }
1223
1224         /* Embed conditions. */
1225         assignConditions( rtnVal );
1226
1227         /* Embed actions. */
1228         assignActions( pd, rtnVal , actionOrd );
1229
1230         /* Make the array of priority orderings. Orderings are local to this walk
1231          * of the factor with augmentation. */
1232         int *priorOrd = 0;
1233         if ( priorityAugs.length() > 0 )
1234                 priorOrd = new int[priorityAugs.length()];
1235
1236         /* Walk all priorities, assigning the priority ordering. */
1237         for ( int i = 0; i < priorityAugs.length(); i++ )
1238                 priorOrd[i] = pd->curPriorOrd++;
1239
1240         /* If the priority descriptors have not been made, make them now.  Make
1241          * priority descriptors for each priority asignment that will be passed to
1242          * the fsm. Used to keep track of the key, value and used bit. */
1243         if ( priorDescs == 0 && priorityAugs.length() > 0 ) {
1244                 priorDescs = new PriorDesc[priorityAugs.length()];
1245                 for ( int i = 0; i < priorityAugs.length(); i++ ) {
1246                         /* Init the prior descriptor for the priority setting. */
1247                         priorDescs[i].key = priorityAugs[i].priorKey;
1248                         priorDescs[i].priority = priorityAugs[i].priorValue;
1249                 }
1250         }
1251
1252         /* Assign priorities into the machine. */
1253         assignPriorities( rtnVal, priorOrd );
1254
1255         /* Assign epsilon transitions. */
1256         for ( int e = 0; e < epsilonLinks.length(); e++ ) {
1257                 /* Get the name, which may not exist. If it doesn't then silently
1258                  * ignore it because an error has already been reported. */
1259                 NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++];
1260                 if ( epTarg != 0 ) {
1261                         /* Make the epsilon transitions. */
1262                         rtnVal->epsilonTrans( epTarg->id );
1263
1264                         /* Note that we have made a link to the name. */
1265                         pd->localNameScope->referencedNames.append( epTarg );
1266                 }
1267         }
1268
1269         /* Set entry points for labels. */
1270         if ( labels.length() > 0 ) {
1271                 /* Pop the names. */
1272                 pd->resetNameScope( nameFrame );
1273
1274                 /* Make labels that are referenced into entry points. */
1275                 for ( int i = 0; i < labels.length(); i++ ) {
1276                         pd->enterNameScope( false, 1 );
1277
1278                         /* Will always be found. */
1279                         NameInst *name = pd->curNameInst;
1280
1281                         /* If the name is referenced then set the entry point. */
1282                         if ( name->numRefs > 0 )
1283                                 rtnVal->setEntry( name->id, rtnVal->startState );
1284                 }
1285
1286                 pd->popNameScope( nameFrame );
1287         }
1288
1289         if ( priorOrd != 0 )
1290                 delete[] priorOrd;
1291         if ( actionOrd != 0 )
1292                 delete[] actionOrd;
1293         return rtnVal;
1294 }
1295
1296 void FactorWithAug::makeNameTree( ParseData *pd )
1297 {
1298         /* Add the labels to the tree of instantiated names. Each label
1299          * makes a new scope. */
1300         NameInst *prevNameInst = pd->curNameInst;
1301         for ( int i = 0; i < labels.length(); i++ )
1302                 pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true );
1303
1304         /* Recurse, then pop the names. */
1305         factorWithRep->makeNameTree( pd );
1306         pd->curNameInst = prevNameInst;
1307 }
1308
1309
1310 void FactorWithAug::resolveNameRefs( ParseData *pd )
1311 {
1312         /* Enter into the name scope created by any labels. */
1313         NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1314
1315         /* Note action references. */
1316         for ( int i = 0; i < actions.length(); i++ )
1317                 actions[i].action->actionRefs.append( pd->localNameScope );
1318
1319         /* Recurse first. IMPORTANT: we must do the exact same traversal as when
1320          * the tree is constructed. */
1321         factorWithRep->resolveNameRefs( pd );
1322
1323         /* Resolve epsilon transitions. */
1324         for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) {
1325                 /* Get the link. */
1326                 EpsilonLink &link = epsilonLinks[ep];
1327                 NameInst *resolvedName = 0;
1328
1329                 if ( link.target.length() == 1 && strcmp( link.target.data[0], "final" ) == 0 ) {
1330                         /* Epsilon drawn to an implicit final state. An implicit final is
1331                          * only available in join operations. */
1332                         resolvedName = pd->localNameScope->final;
1333                 }
1334                 else {
1335                         /* Do an search for the name. */
1336                         NameSet resolved;
1337                         pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 );
1338                         if ( resolved.length() > 0 ) {
1339                                 /* Take the first one. */
1340                                 resolvedName = resolved[0];
1341                                 if ( resolved.length() > 1 ) {
1342                                         /* Complain about the multiple references. */
1343                                         error(link.loc) << "state reference " << link.target <<
1344                                                         " resolves to multiple entry points" << endl;
1345                                         errorStateLabels( resolved );
1346                                 }
1347                         }
1348                 }
1349
1350                 /* This is tricky, we stuff resolved epsilon transitions into one long
1351                  * vector in the parse data structure. Since the name resolution and
1352                  * graph generation both do identical walks of the parse tree we
1353                  * should always find the link resolutions in the right place.  */
1354                 pd->epsilonResolvedLinks.append( resolvedName );
1355
1356                 if ( resolvedName != 0 ) {
1357                         /* Found the name, bump of the reference count on it. */
1358                         resolvedName->numRefs += 1;
1359                 }
1360                 else {
1361                         /* Complain, no recovery action, the epsilon op will ignore any
1362                          * epsilon transitions whose names did not resolve. */
1363                         error(link.loc) << "could not resolve label " << link.target << endl;
1364                 }
1365         }
1366
1367         if ( labels.length() > 0 )
1368                 pd->popNameScope( nameFrame );
1369 }
1370
1371
1372 /* Clean up after a factor with repetition node. */
1373 FactorWithRep::~FactorWithRep()
1374 {
1375         switch ( type ) {
1376                 case StarType: case StarStarType: case OptionalType: case PlusType:
1377                 case ExactType: case MaxType: case MinType: case RangeType:
1378                         delete factorWithRep;
1379                         break;
1380                 case FactorWithNegType:
1381                         delete factorWithNeg;
1382                         break;
1383         }
1384 }
1385
1386 /* Evaluate a factor with repetition node. */
1387 FsmAp *FactorWithRep::walk( ParseData *pd )
1388 {
1389         FsmAp *retFsm = 0;
1390
1391         switch ( type ) {
1392         case StarType: {
1393                 /* Evaluate the FactorWithRep. */
1394                 retFsm = factorWithRep->walk( pd );
1395                 if ( retFsm->startState->isFinState() ) {
1396                         warning(loc) << "applying kleene star to a machine that "
1397                                         "accepts zero length word" << endl;
1398                         retFsm->unsetFinState( retFsm->startState );
1399                 }
1400
1401                 /* Shift over the start action orders then do the kleene star. */
1402                 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1403                 retFsm->starOp( );
1404                 afterOpMinimize( retFsm );
1405                 break;
1406         }
1407         case StarStarType: {
1408                 /* Evaluate the FactorWithRep. */
1409                 retFsm = factorWithRep->walk( pd );
1410                 if ( retFsm->startState->isFinState() ) {
1411                         warning(loc) << "applying kleene star to a machine that "
1412                                         "accepts zero length word" << endl;
1413                 }
1414
1415                 /* Set up the prior descs. All gets priority one, whereas leaving gets
1416                  * priority zero. Make a unique key so that these priorities don't
1417                  * interfere with any priorities set by the user. */
1418                 priorDescs[0].key = pd->nextPriorKey++;
1419                 priorDescs[0].priority = 1;
1420                 retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
1421
1422                 /* Leaveing gets priority 0. Use same unique key. */
1423                 priorDescs[1].key = priorDescs[0].key;
1424                 priorDescs[1].priority = 0;
1425                 retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
1426
1427                 /* Shift over the start action orders then do the kleene star. */
1428                 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1429                 retFsm->starOp( );
1430                 afterOpMinimize( retFsm );
1431                 break;
1432         }
1433         case OptionalType: {
1434                 /* Make the null fsm. */
1435                 FsmAp *nu = new FsmAp();
1436                 nu->lambdaFsm( );
1437
1438                 /* Evaluate the FactorWithRep. */
1439                 retFsm = factorWithRep->walk( pd );
1440
1441                 /* Perform the question operator. */
1442                 retFsm->unionOp( nu );
1443                 afterOpMinimize( retFsm );
1444                 break;
1445         }
1446         case PlusType: {
1447                 /* Evaluate the FactorWithRep. */
1448                 retFsm = factorWithRep->walk( pd );
1449                 if ( retFsm->startState->isFinState() ) {
1450                         warning(loc) << "applying plus operator to a machine that "
1451                                         "accepts zero length word" << endl;
1452                 }
1453
1454                 /* Need a duplicated for the star end. */
1455                 FsmAp *dup = new FsmAp( *retFsm );
1456
1457                 /* The start func orders need to be shifted before doing the star. */
1458                 pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd );
1459
1460                 /* Star the duplicate. */
1461                 dup->starOp( );
1462                 afterOpMinimize( dup );
1463
1464                 retFsm->concatOp( dup );
1465                 afterOpMinimize( retFsm );
1466                 break;
1467         }
1468         case ExactType: {
1469                 /* Get an int from the repetition amount. */
1470                 if ( lowerRep == 0 ) {
1471                         /* No copies. Don't need to evaluate the factorWithRep.
1472                          * This Defeats the purpose so give a warning. */
1473                         warning(loc) << "exactly zero repetitions results "
1474                                         "in the null machine" << endl;
1475
1476                         retFsm = new FsmAp();
1477                         retFsm->lambdaFsm();
1478                 }
1479                 else {
1480                         /* Evaluate the first FactorWithRep. */
1481                         retFsm = factorWithRep->walk( pd );
1482                         if ( retFsm->startState->isFinState() ) {
1483                                 warning(loc) << "applying repetition to a machine that "
1484                                                 "accepts zero length word" << endl;
1485                         }
1486
1487                         /* The start func orders need to be shifted before doing the
1488                          * repetition. */
1489                         pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1490
1491                         /* Do the repetition on the machine. Already guarded against n == 0 */
1492                         retFsm->repeatOp( lowerRep );
1493                         afterOpMinimize( retFsm );
1494                 }
1495                 break;
1496         }
1497         case MaxType: {
1498                 /* Get an int from the repetition amount. */
1499                 if ( upperRep == 0 ) {
1500                         /* No copies. Don't need to evaluate the factorWithRep.
1501                          * This Defeats the purpose so give a warning. */
1502                         warning(loc) << "max zero repetitions results "
1503                                         "in the null machine" << endl;
1504
1505                         retFsm = new FsmAp();
1506                         retFsm->lambdaFsm();
1507                 }
1508                 else {
1509                         /* Evaluate the first FactorWithRep. */
1510                         retFsm = factorWithRep->walk( pd );
1511                         if ( retFsm->startState->isFinState() ) {
1512                                 warning(loc) << "applying max repetition to a machine that "
1513                                                 "accepts zero length word" << endl;
1514                         }
1515
1516                         /* The start func orders need to be shifted before doing the
1517                          * repetition. */
1518                         pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1519
1520                         /* Do the repetition on the machine. Already guarded against n == 0 */
1521                         retFsm->optionalRepeatOp( upperRep );
1522                         afterOpMinimize( retFsm );
1523                 }
1524                 break;
1525         }
1526         case MinType: {
1527                 /* Evaluate the repeated machine. */
1528                 retFsm = factorWithRep->walk( pd );
1529                 if ( retFsm->startState->isFinState() ) {
1530                         warning(loc) << "applying min repetition to a machine that "
1531                                         "accepts zero length word" << endl;
1532                 }
1533
1534                 /* The start func orders need to be shifted before doing the repetition
1535                  * and the kleene star. */
1536                 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1537
1538                 if ( lowerRep == 0 ) {
1539                         /* Acts just like a star op on the machine to return. */
1540                         retFsm->starOp( );
1541                         afterOpMinimize( retFsm );
1542                 }
1543                 else {
1544                         /* Take a duplicate for the plus. */
1545                         FsmAp *dup = new FsmAp( *retFsm );
1546
1547                         /* Do repetition on the first half. */
1548                         retFsm->repeatOp( lowerRep );
1549                         afterOpMinimize( retFsm );
1550
1551                         /* Star the duplicate. */
1552                         dup->starOp( );
1553                         afterOpMinimize( dup );
1554
1555                         /* Tak on the kleene star. */
1556                         retFsm->concatOp( dup );
1557                         afterOpMinimize( retFsm );
1558                 }
1559                 break;
1560         }
1561         case RangeType: {
1562                 /* Check for bogus range. */
1563                 if ( upperRep - lowerRep < 0 ) {
1564                         error(loc) << "invalid range repetition" << endl;
1565
1566                         /* Return null machine as recovery. */
1567                         retFsm = new FsmAp();
1568                         retFsm->lambdaFsm();
1569                 }
1570                 else if ( lowerRep == 0 && upperRep == 0 ) {
1571                         /* No copies. Don't need to evaluate the factorWithRep.  This
1572                          * defeats the purpose so give a warning. */
1573                         warning(loc) << "zero to zero repetitions results "
1574                                         "in the null machine" << endl;
1575
1576                         retFsm = new FsmAp();
1577                         retFsm->lambdaFsm();
1578                 }
1579                 else {
1580                         /* Now need to evaluate the repeated machine. */
1581                         retFsm = factorWithRep->walk( pd );
1582                         if ( retFsm->startState->isFinState() ) {
1583                                 warning(loc) << "applying range repetition to a machine that "
1584                                                 "accepts zero length word" << endl;
1585                         }
1586
1587                         /* The start func orders need to be shifted before doing both kinds
1588                          * of repetition. */
1589                         pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1590
1591                         if ( lowerRep == 0 ) {
1592                                 /* Just doing max repetition. Already guarded against n == 0. */
1593                                 retFsm->optionalRepeatOp( upperRep );
1594                                 afterOpMinimize( retFsm );
1595                         }
1596                         else if ( lowerRep == upperRep ) {
1597                                 /* Just doing exact repetition. Already guarded against n == 0. */
1598                                 retFsm->repeatOp( lowerRep );
1599                                 afterOpMinimize( retFsm );
1600                         }
1601                         else {
1602                                 /* This is the case that 0 < lowerRep < upperRep. Take a
1603                                  * duplicate for the optional repeat. */
1604                                 FsmAp *dup = new FsmAp( *retFsm );
1605
1606                                 /* Do repetition on the first half. */
1607                                 retFsm->repeatOp( lowerRep );
1608                                 afterOpMinimize( retFsm );
1609
1610                                 /* Do optional repetition on the second half. */
1611                                 dup->optionalRepeatOp( upperRep - lowerRep );
1612                                 afterOpMinimize( dup );
1613
1614                                 /* Tak on the duplicate machine. */
1615                                 retFsm->concatOp( dup );
1616                                 afterOpMinimize( retFsm );
1617                         }
1618                 }
1619                 break;
1620         }
1621         case FactorWithNegType: {
1622                 /* Evaluate the Factor. Pass it up. */
1623                 retFsm = factorWithNeg->walk( pd );
1624                 break;
1625         }}
1626         return retFsm;
1627 }
1628
1629 void FactorWithRep::makeNameTree( ParseData *pd )
1630 {
1631         switch ( type ) {
1632         case StarType:
1633         case StarStarType:
1634         case OptionalType:
1635         case PlusType:
1636         case ExactType:
1637         case MaxType:
1638         case MinType:
1639         case RangeType:
1640                 factorWithRep->makeNameTree( pd );
1641                 break;
1642         case FactorWithNegType:
1643                 factorWithNeg->makeNameTree( pd );
1644                 break;
1645         }
1646 }
1647
1648 void FactorWithRep::resolveNameRefs( ParseData *pd )
1649 {
1650         switch ( type ) {
1651         case StarType:
1652         case StarStarType:
1653         case OptionalType:
1654         case PlusType:
1655         case ExactType:
1656         case MaxType:
1657         case MinType:
1658         case RangeType:
1659                 factorWithRep->resolveNameRefs( pd );
1660                 break;
1661         case FactorWithNegType:
1662                 factorWithNeg->resolveNameRefs( pd );
1663                 break;
1664         }
1665 }
1666
1667 /* Clean up after a factor with negation node. */
1668 FactorWithNeg::~FactorWithNeg()
1669 {
1670         switch ( type ) {
1671                 case NegateType:
1672                 case CharNegateType:
1673                         delete factorWithNeg;
1674                         break;
1675                 case FactorType:
1676                         delete factor;
1677                         break;
1678         }
1679 }
1680
1681 /* Evaluate a factor with negation node. */
1682 FsmAp *FactorWithNeg::walk( ParseData *pd )
1683 {
1684         FsmAp *retFsm = 0;
1685
1686         switch ( type ) {
1687         case NegateType: {
1688                 /* Evaluate the factorWithNeg. */
1689                 FsmAp *toNegate = factorWithNeg->walk( pd );
1690
1691                 /* Negation is subtract from dot-star. */
1692                 retFsm = dotStarFsm( pd );
1693                 retFsm->subtractOp( toNegate );
1694                 afterOpMinimize( retFsm );
1695                 break;
1696         }
1697         case CharNegateType: {
1698                 /* Evaluate the factorWithNeg. */
1699                 FsmAp *toNegate = factorWithNeg->walk( pd );
1700
1701                 /* CharNegation is subtract from dot. */
1702                 retFsm = dotFsm( pd );
1703                 retFsm->subtractOp( toNegate );
1704                 afterOpMinimize( retFsm );
1705                 break;
1706         }
1707         case FactorType: {
1708                 /* Evaluate the Factor. Pass it up. */
1709                 retFsm = factor->walk( pd );
1710                 break;
1711         }}
1712         return retFsm;
1713 }
1714
1715 void FactorWithNeg::makeNameTree( ParseData *pd )
1716 {
1717         switch ( type ) {
1718         case NegateType:
1719         case CharNegateType:
1720                 factorWithNeg->makeNameTree( pd );
1721                 break;
1722         case FactorType:
1723                 factor->makeNameTree( pd );
1724                 break;
1725         }
1726 }
1727
1728 void FactorWithNeg::resolveNameRefs( ParseData *pd )
1729 {
1730         switch ( type ) {
1731         case NegateType:
1732         case CharNegateType:
1733                 factorWithNeg->resolveNameRefs( pd );
1734                 break;
1735         case FactorType:
1736                 factor->resolveNameRefs( pd );
1737                 break;
1738         }
1739 }
1740
1741 /* Clean up after a factor node. */
1742 Factor::~Factor()
1743 {
1744         switch ( type ) {
1745                 case LiteralType:
1746                         delete literal;
1747                         break;
1748                 case RangeType:
1749                         delete range;
1750                         break;
1751                 case OrExprType:
1752                         delete reItem;
1753                         break;
1754                 case RegExprType:
1755                         delete regExpr;
1756                         break;
1757                 case ReferenceType:
1758                         break;
1759                 case ParenType:
1760                         delete join;
1761                         break;
1762                 case LongestMatchType:
1763                         delete longestMatch;
1764                         break;
1765         }
1766 }
1767
1768 /* Evaluate a factor node. */
1769 FsmAp *Factor::walk( ParseData *pd )
1770 {
1771         FsmAp *rtnVal = 0;
1772         switch ( type ) {
1773         case LiteralType:
1774                 rtnVal = literal->walk( pd );
1775                 break;
1776         case RangeType:
1777                 rtnVal = range->walk( pd );
1778                 break;
1779         case OrExprType:
1780                 rtnVal = reItem->walk( pd, 0 );
1781                 break;
1782         case RegExprType:
1783                 rtnVal = regExpr->walk( pd, 0 );
1784                 break;
1785         case ReferenceType:
1786                 rtnVal = varDef->walk( pd );
1787                 break;
1788         case ParenType:
1789                 rtnVal = join->walk( pd );
1790                 break;
1791         case LongestMatchType:
1792                 rtnVal = longestMatch->walk( pd );
1793                 break;
1794         }
1795
1796         return rtnVal;
1797 }
1798
1799 void Factor::makeNameTree( ParseData *pd )
1800 {
1801         switch ( type ) {
1802         case LiteralType:
1803         case RangeType:
1804         case OrExprType:
1805         case RegExprType:
1806                 break;
1807         case ReferenceType:
1808                 varDef->makeNameTree( loc, pd );
1809                 break;
1810         case ParenType:
1811                 join->makeNameTree( pd );
1812                 break;
1813         case LongestMatchType:
1814                 longestMatch->makeNameTree( pd );
1815                 break;
1816         }
1817 }
1818
1819 void Factor::resolveNameRefs( ParseData *pd )
1820 {
1821         switch ( type ) {
1822         case LiteralType:
1823         case RangeType:
1824         case OrExprType:
1825         case RegExprType:
1826                 break;
1827         case ReferenceType:
1828                 varDef->resolveNameRefs( pd );
1829                 break;
1830         case ParenType:
1831                 join->resolveNameRefs( pd );
1832                 break;
1833         case LongestMatchType:
1834                 longestMatch->resolveNameRefs( pd );
1835                 break;
1836         }
1837 }
1838
1839 /* Clean up a range object. Must delete the two literals. */
1840 Range::~Range()
1841 {
1842         delete lowerLit;
1843         delete upperLit;
1844 }
1845
1846 /* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
1847 FsmAp *Range::walk( ParseData *pd )
1848 {
1849         /* Construct and verify the suitability of the lower end of the range. */
1850         FsmAp *lowerFsm = lowerLit->walk( pd );
1851         if ( !lowerFsm->checkSingleCharMachine() ) {
1852                 error(lowerLit->token.loc) <<
1853                         "bad range lower end, must be a single character" << endl;
1854         }
1855
1856         /* Construct and verify the upper end. */
1857         FsmAp *upperFsm = upperLit->walk( pd );
1858         if ( !upperFsm->checkSingleCharMachine() ) {
1859                 error(upperLit->token.loc) <<
1860                         "bad range upper end, must be a single character" << endl;
1861         }
1862
1863         /* Grab the keys from the machines, then delete them. */
1864         Key lowKey = lowerFsm->startState->outList.head->lowKey;
1865         Key highKey = upperFsm->startState->outList.head->lowKey;
1866         delete lowerFsm;
1867         delete upperFsm;
1868
1869         /* Validate the range. */
1870         if ( lowKey > highKey ) {
1871                 /* Recover by setting upper to lower; */
1872                 error(lowerLit->token.loc) << "lower end of range is greater then upper end" << endl;
1873                 highKey = lowKey;
1874         }
1875
1876         /* Return the range now that it is validated. */
1877         FsmAp *retFsm = new FsmAp();
1878         retFsm->rangeFsm( lowKey, highKey );
1879         return retFsm;
1880 }
1881
1882 /* Evaluate a literal object. */
1883 FsmAp *Literal::walk( ParseData *pd )
1884 {
1885         /* FsmAp to return, is the alphabet signed. */
1886         FsmAp *rtnVal = 0;
1887
1888         switch ( type ) {
1889         case Number: {
1890                 /* Make the fsm key in int format. */
1891                 Key fsmKey = makeFsmKeyNum( token.data, token.loc, pd );
1892                 /* Make the new machine. */
1893                 rtnVal = new FsmAp();
1894                 rtnVal->concatFsm( fsmKey );
1895                 break;
1896         }
1897         case LitString: {
1898                 /* Make the array of keys in int format. */
1899                 long length;
1900                 bool caseInsensitive;
1901                 char *data = prepareLitString( token.loc, token.data, token.length,
1902                                 length, caseInsensitive );
1903                 Key *arr = new Key[length];
1904                 makeFsmKeyArray( arr, data, length, pd );
1905
1906                 /* Make the new machine. */
1907                 rtnVal = new FsmAp();
1908                 if ( caseInsensitive )
1909                         rtnVal->concatFsmCI( arr, length );
1910                 else
1911                         rtnVal->concatFsm( arr, length );
1912                 delete[] data;
1913                 delete[] arr;
1914                 break;
1915         }}
1916         return rtnVal;
1917 }
1918
1919 /* Clean up after a regular expression object. */
1920 RegExpr::~RegExpr()
1921 {
1922         switch ( type ) {
1923                 case RecurseItem:
1924                         delete regExpr;
1925                         delete item;
1926                         break;
1927                 case Empty:
1928                         break;
1929         }
1930 }
1931
1932 /* Evaluate a regular expression object. */
1933 FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex )
1934 {
1935         /* This is the root regex, pass down a pointer to this. */
1936         if ( rootRegex == 0 )
1937                 rootRegex = this;
1938
1939         FsmAp *rtnVal = 0;
1940         switch ( type ) {
1941                 case RecurseItem: {
1942                         /* Walk both items. */
1943                         rtnVal = regExpr->walk( pd, rootRegex );
1944                         FsmAp *fsm2 = item->walk( pd, rootRegex );
1945                         rtnVal->concatOp( fsm2 );
1946                         break;
1947                 }
1948                 case Empty: {
1949                         rtnVal = new FsmAp();
1950                         rtnVal->lambdaFsm();
1951                         break;
1952                 }
1953         }
1954         return rtnVal;
1955 }
1956
1957 /* Clean up after an item in a regular expression. */
1958 ReItem::~ReItem()
1959 {
1960         switch ( type ) {
1961                 case Data:
1962                 case Dot:
1963                         break;
1964                 case OrBlock:
1965                 case NegOrBlock:
1966                         delete orBlock;
1967                         break;
1968         }
1969 }
1970
1971 /* Evaluate a regular expression object. */
1972 FsmAp *ReItem::walk( ParseData *pd, RegExpr *rootRegex )
1973 {
1974         /* The fsm to return, is the alphabet signed? */
1975         FsmAp *rtnVal = 0;
1976
1977         switch ( type ) {
1978                 case Data: {
1979                         /* Move the data into an integer array and make a concat fsm. */
1980                         Key *arr = new Key[token.length];
1981                         makeFsmKeyArray( arr, token.data, token.length, pd );
1982
1983                         /* Make the concat fsm. */
1984                         rtnVal = new FsmAp();
1985                         if ( rootRegex != 0 && rootRegex->caseInsensitive )
1986                                 rtnVal->concatFsmCI( arr, token.length );
1987                         else
1988                                 rtnVal->concatFsm( arr, token.length );
1989                         delete[] arr;
1990                         break;
1991                 }
1992                 case Dot: {
1993                         /* Make the dot fsm. */
1994                         rtnVal = dotFsm( pd );
1995                         break;
1996                 }
1997                 case OrBlock: {
1998                         /* Get the or block and minmize it. */
1999                         rtnVal = orBlock->walk( pd, rootRegex );
2000                         if ( rtnVal == 0 ) {
2001                                 rtnVal = new FsmAp();
2002                                 rtnVal->lambdaFsm();
2003                         }
2004                         rtnVal->minimizePartition2();
2005                         break;
2006                 }
2007                 case NegOrBlock: {
2008                         /* Get the or block and minimize it. */
2009                         FsmAp *fsm = orBlock->walk( pd, rootRegex );
2010                         fsm->minimizePartition2();
2011
2012                         /* Make a dot fsm and subtract from it. */
2013                         rtnVal = dotFsm( pd );
2014                         rtnVal->subtractOp( fsm );
2015                         rtnVal->minimizePartition2();
2016                         break;
2017                 }
2018         }
2019
2020         /* If the item is followed by a star, then apply the star op. */
2021         if ( star ) {
2022                 if ( rtnVal->startState->isFinState() ) {
2023                         warning(loc) << "applying kleene star to a machine that "
2024                                         "accepts zero length word" << endl;
2025                 }
2026
2027                 rtnVal->starOp();
2028                 rtnVal->minimizePartition2();
2029         }
2030         return rtnVal;
2031 }
2032
2033 /* Clean up after an or block of a regular expression. */
2034 ReOrBlock::~ReOrBlock()
2035 {
2036         switch ( type ) {
2037                 case RecurseItem:
2038                         delete orBlock;
2039                         delete item;
2040                         break;
2041                 case Empty:
2042                         break;
2043         }
2044 }
2045
2046
2047 /* Evaluate an or block of a regular expression. */
2048 FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex )
2049 {
2050         FsmAp *rtnVal = 0;
2051         switch ( type ) {
2052                 case RecurseItem: {
2053                         /* Evaluate the two fsm. */
2054                         FsmAp *fsm1 = orBlock->walk( pd, rootRegex );
2055                         FsmAp *fsm2 = item->walk( pd, rootRegex );
2056                         if ( fsm1 == 0 )
2057                                 rtnVal = fsm2;
2058                         else {
2059                                 fsm1->unionOp( fsm2 );
2060                                 rtnVal = fsm1;
2061                         }
2062                         break;
2063                 }
2064                 case Empty: {
2065                         rtnVal = 0;
2066                         break;
2067                 }
2068         }
2069         return rtnVal;;
2070 }
2071
2072 /* Evaluate an or block item of a regular expression. */
2073 FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex )
2074 {
2075         /* The return value, is the alphabet signed? */
2076         FsmAp *rtnVal = 0;
2077         switch ( type ) {
2078         case Data: {
2079                 /* Make the or machine. */
2080                 rtnVal = new FsmAp();
2081
2082                 /* Put the or data into an array of ints. Note that we find unique
2083                  * keys. Duplicates are silently ignored. The alternative would be to
2084                  * issue warning or an error but since we can't with [a0-9a] or 'a' |
2085                  * 'a' don't bother here. */
2086                 KeySet keySet;
2087                 makeFsmUniqueKeyArray( keySet, token.data, token.length,
2088                         rootRegex != 0 ? rootRegex->caseInsensitive : false, pd );
2089
2090                 /* Run the or operator. */
2091                 rtnVal->orFsm( keySet.data, keySet.length() );
2092                 break;
2093         }
2094         case Range: {
2095                 /* Make the upper and lower keys. */
2096                 Key lowKey = makeFsmKeyChar( lower, pd );
2097                 Key highKey = makeFsmKeyChar( upper, pd );
2098
2099                 /* Validate the range. */
2100                 if ( lowKey > highKey ) {
2101                         /* Recover by setting upper to lower; */
2102                         error(loc) << "lower end of range is greater then upper end" << endl;
2103                         highKey = lowKey;
2104                 }
2105
2106                 /* Make the range machine. */
2107                 rtnVal = new FsmAp();
2108                 rtnVal->rangeFsm( lowKey, highKey );
2109
2110                 if ( rootRegex != 0 && rootRegex->caseInsensitive ) {
2111                         if ( lowKey <= 'Z' && 'A' <= highKey ) {
2112                                 Key otherLow = lowKey < 'A' ? Key('A') : lowKey;
2113                                 Key otherHigh = 'Z' < highKey ? Key('Z') : highKey;
2114
2115                                 otherLow = 'a' + ( otherLow - 'A' );
2116                                 otherHigh = 'a' + ( otherHigh - 'A' );
2117
2118                                 FsmAp *otherRange = new FsmAp();
2119                                 otherRange->rangeFsm( otherLow, otherHigh );
2120                                 rtnVal->unionOp( otherRange );
2121                                 rtnVal->minimizePartition2();
2122                         }
2123                         else if ( lowKey <= 'z' && 'a' <= highKey ) {
2124                                 Key otherLow = lowKey < 'a' ? Key('a') : lowKey;
2125                                 Key otherHigh = 'z' < highKey ? Key('z') : highKey;
2126
2127                                 otherLow = 'A' + ( otherLow - 'a' );
2128                                 otherHigh = 'A' + ( otherHigh - 'a' );
2129
2130                                 FsmAp *otherRange = new FsmAp();
2131                                 otherRange->rangeFsm( otherLow, otherHigh );
2132                                 rtnVal->unionOp( otherRange );
2133                                 rtnVal->minimizePartition2();
2134                         }
2135                 }
2136
2137                 break;
2138         }}
2139         return rtnVal;
2140 }