ragel/parsetree.cpp

   1 /*
   2  *  Copyright 2001-2006 Adrian Thurston <thurston@complang.org>
   3  */
   4
   5 /*  This file is part of Ragel.
   6  *
   7  *  Ragel is free software; you can redistribute it and/or modify
   8  *  it under the terms of the GNU General Public License as published by
   9  *  the Free Software Foundation; either version 2 of the License, or
  10  *  (at your option) any later version.
  11  *
  12  *  Ragel is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  *  GNU General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU General Public License
  18  *  along with Ragel; if not, write to the Free Software
  19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  */
  21
  22 #include <iostream>
  23 #include <iomanip>
  24 #include <errno.h>
  25 #include <limits.h>
  26 #include <stdlib.h>
  27
  28 /* Parsing. */
  29 #include "ragel.h"
  30 #include "rlparse.h"
  31 #include "parsetree.h"
  32
  33 using namespace std;
  34 ostream &operator<<( ostream &out, const NameRef &nameRef );
  35 ostream &operator<<( ostream &out, const NameInst &nameInst );
  36
  37 /* Convert the literal string which comes in from the scanner into an array of
  38  * characters with escapes and options interpreted. Also null terminates the
  39  * string. Though this null termination should not be relied on for
  40  * interpreting literals in the parser because the string may contain \0 */
  41 char *prepareLitString( const InputLoc &loc, const char *data, long length,
  42                 long &resLen, bool &caseInsensitive )
  43 {
  44         char *resData = new char[length+1];
  45         caseInsensitive = false;
  46
  47         const char *src = data + 1;
  48         const char *end = data + length - 1;
  49
  50         while ( *end != '\'' && *end != '\"' ) {
  51                 if ( *end == 'i' )
  52                         caseInsensitive = true;
  53                 else {
  54                         error( loc ) << "literal string '" << *end <<
  55                                         "' option not supported" << endl;
  56                 }
  57                 end -= 1;
  58         }
  59
  60         char *dest = resData;
  61         long len = 0;
  62         while ( src != end ) {
  63                 if ( *src == '\\' ) {
  64                         switch ( src[1] ) {
  65                         case '0': dest[len++] = '\0'; break;
  66                         case 'a': dest[len++] = '\a'; break;
  67                         case 'b': dest[len++] = '\b'; break;
  68                         case 't': dest[len++] = '\t'; break;
  69                         case 'n': dest[len++] = '\n'; break;
  70                         case 'v': dest[len++] = '\v'; break;
  71                         case 'f': dest[len++] = '\f'; break;
  72                         case 'r': dest[len++] = '\r'; break;
  73                         case '\n':  break;
  74                         default: dest[len++] = src[1]; break;
  75                         }
  76                         src += 2;
  77                 }
  78                 else {
  79                         dest[len++] = *src++;
  80                 }
  81         }
  82
  83         resLen = len;
  84         resData[resLen] = 0;
  85         return resData;
  86 }
  87
  88 FsmAp *VarDef::walk( ParseData *pd )
  89 {
  90         /* We enter into a new name scope. */
  91         NameFrame nameFrame = pd->enterNameScope( true, 1 );
  92
  93         /* Recurse on the expression. */
  94         FsmAp *rtnVal = machineDef->walk( pd );
  95
  96         /* Do the tranfer of local error actions. */
  97         LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
  98         if ( localErrDictEl != 0 ) {
  99                 for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
 100                         rtnVal->transferErrorActions( state, localErrDictEl->value );
 101         }
 102
 103         /* If the expression below is a join operation with multiple expressions
 104          * then it just had epsilon transisions resolved. If it is a join
 105          * with only a single expression then run the epsilon op now. */
 106         if ( machineDef->type == MachineDef::JoinType && machineDef->join->exprList.length() == 1 )
 107                 rtnVal->epsilonOp();
 108
 109         /* We can now unset entry points that are not longer used. */
 110         pd->unsetObsoleteEntries( rtnVal );
 111
 112         /* If the name of the variable is referenced then add the entry point to
 113          * the graph. */
 114         if ( pd->curNameInst->numRefs > 0 )
 115                 rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
 116
 117         /* Pop the name scope. */
 118         pd->popNameScope( nameFrame );
 119         return rtnVal;
 120 }
 121
 122 void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd )
 123 {
 124         /* The variable definition enters a new scope. */
 125         NameInst *prevNameInst = pd->curNameInst;
 126         pd->curNameInst = pd->addNameInst( loc, name, false );
 127
 128         if ( machineDef->type == MachineDef::LongestMatchType )
 129                 pd->curNameInst->isLongestMatch = true;
 130
 131         /* Recurse. */
 132         machineDef->makeNameTree( pd );
 133
 134         /* The name scope ends, pop the name instantiation. */
 135         pd->curNameInst = prevNameInst;
 136 }
 137
 138 void VarDef::resolveNameRefs( ParseData *pd )
 139 {
 140         /* Entering into a new scope. */
 141         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 142
 143         /* Recurse. */
 144         machineDef->resolveNameRefs( pd );
 145
 146         /* The name scope ends, pop the name instantiation. */
 147         pd->popNameScope( nameFrame );
 148 }
 149
 150 InputLoc LongestMatchPart::getLoc()
 151 {
 152         return action != 0 ? action->loc : semiLoc;
 153 }
 154
 155 /*
 156  * If there are any LMs then all of the following entry points must reset
 157  * tokstart:
 158  *
 159  *  1. fentry(StateRef)
 160  *  2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
 161  *  3. targt of any transition that has an fcall (the return loc).
 162  *  4. start state of all longest match routines.
 163  */
 164
 165 Action *LongestMatch::newAction( ParseData *pd, const InputLoc &loc,
 166                 const char *name, InlineList *inlineList )
 167 {
 168         Action *action = new Action( loc, name, inlineList, pd->nextCondId++ );
 169         action->actionRefs.append( pd->curNameInst );
 170         pd->actionList.append( action );
 171         action->isLmAction = true;
 172         return action;
 173 }
 174
 175 void LongestMatch::makeActions( ParseData *pd )
 176 {
 177         /* Make actions that set the action id. */
 178         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 179                 /* For each part create actions for setting the match type.  We need
 180                  * to do this so that the actions will go into the actionIndex. */
 181                 InlineList *inlineList = new InlineList;
 182                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
 183                                 InlineItem::LmSetActId ) );
 184                 char *actName = new char[50];
 185                 sprintf( actName, "store%i", lmi->longestMatchId );
 186                 lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
 187         }
 188
 189         /* Make actions that execute the user action and restart on the last
 190          * character. */
 191         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 192                 /* For each part create actions for setting the match type.  We need
 193                  * to do this so that the actions will go into the actionIndex. */
 194                 InlineList *inlineList = new InlineList;
 195                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
 196                                 InlineItem::LmOnLast ) );
 197                 char *actName = new char[50];
 198                 sprintf( actName, "last%i", lmi->longestMatchId );
 199                 lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
 200         }
 201
 202         /* Make actions that execute the user action and restart on the next
 203          * character.  These actions will set tokend themselves (it is the current
 204          * char). */
 205         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 206                 /* For each part create actions for setting the match type.  We need
 207                  * to do this so that the actions will go into the actionIndex. */
 208                 InlineList *inlineList = new InlineList;
 209                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
 210                                 InlineItem::LmOnNext ) );
 211                 char *actName = new char[50];
 212                 sprintf( actName, "next%i", lmi->longestMatchId );
 213                 lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
 214         }
 215
 216         /* Make actions that execute the user action and restart at tokend. These
 217          * actions execute some time after matching the last char. */
 218         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 219                 /* For each part create actions for setting the match type.  We need
 220                  * to do this so that the actions will go into the actionIndex. */
 221                 InlineList *inlineList = new InlineList;
 222                 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
 223                                 InlineItem::LmOnLagBehind ) );
 224                 char *actName = new char[50];
 225                 sprintf( actName, "lag%i", lmi->longestMatchId );
 226                 lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList );
 227         }
 228
 229         InputLoc loc;
 230         loc.line = 1;
 231         loc.col = 1;
 232         loc.fileName = "NONE";
 233
 234         /* Create the error action. */
 235         InlineList *il6 = new InlineList;
 236         il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) );
 237         lmActSelect = newAction( pd, loc, "switch", il6 );
 238 }
 239
 240 void LongestMatch::findName( ParseData *pd )
 241 {
 242         NameInst *nameInst = pd->curNameInst;
 243         while ( nameInst->name == 0 ) {
 244                 nameInst = nameInst->parent;
 245                 /* Since every machine must must have a name, we should always find a
 246                  * name for the longest match. */
 247                 assert( nameInst != 0 );
 248         }
 249         name = nameInst->name;
 250 }
 251
 252 void LongestMatch::makeNameTree( ParseData *pd )
 253 {
 254         /* Create an anonymous scope for the longest match. Will be used for
 255          * restarting machine after matching a token. */
 256         NameInst *prevNameInst = pd->curNameInst;
 257         pd->curNameInst = pd->addNameInst( loc, 0, false );
 258
 259         /* Recurse into all parts of the longest match operator. */
 260         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ )
 261                 lmi->join->makeNameTree( pd );
 262
 263         /* Traverse the name tree upwards to find a name for this lm. */
 264         findName( pd );
 265
 266         /* Also make the longest match's actions at this point. */
 267         makeActions( pd );
 268
 269         /* The name scope ends, pop the name instantiation. */
 270         pd->curNameInst = prevNameInst;
 271 }
 272
 273 void LongestMatch::resolveNameRefs( ParseData *pd )
 274 {
 275         /* The longest match gets its own name scope. */
 276         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 277
 278         /* Take an action reference for each longest match item and recurse. */
 279         for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
 280                 /* Record the reference if the item has an action. */
 281                 if ( lmi->action != 0 )
 282                         lmi->action->actionRefs.append( pd->localNameScope );
 283
 284                 /* Recurse down the join. */
 285                 lmi->join->resolveNameRefs( pd );
 286         }
 287
 288         /* The name scope ends, pop the name instantiation. */
 289         pd->popNameScope( nameFrame );
 290 }
 291
 292 void LongestMatch::restart( FsmAp *graph, TransAp *trans )
 293 {
 294         StateAp *fromState = trans->fromState;
 295         graph->detachTrans( fromState, trans->toState, trans );
 296         graph->attachTrans( fromState, graph->startState, trans );
 297 }
 298
 299 void LongestMatch::runLongestMatch( ParseData *pd, FsmAp *graph )
 300 {
 301         graph->markReachableFromHereStopFinal( graph->startState );
 302         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 303                 if ( ms->stateBits & STB_ISMARKED ) {
 304                         ms->lmItemSet.insert( 0 );
 305                         ms->stateBits &= ~ STB_ISMARKED;
 306                 }
 307         }
 308
 309         /* Transfer the first item of non-empty lmAction tables to the item sets
 310          * of the states that follow. Exclude states that have no transitions out.
 311          * This must happen on a separate pass so that on each iteration of the
 312          * next pass we have the item set entries from all lmAction tables. */
 313         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
 314                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
 315                         if ( trans->lmActionTable.length() > 0 ) {
 316                                 LmActionTableEl *lmAct = trans->lmActionTable.data;
 317                                 StateAp *toState = trans->toState;
 318                                 assert( toState );
 319
 320                                 /* Can only optimize this if there are no transitions out.
 321                                  * Note there can be out transitions going nowhere with
 322                                  * actions and they too must inhibit this optimization. */
 323                                 if ( toState->outList.length() > 0 ) {
 324                                         /* Fill the item sets. */
 325                                         graph->markReachableFromHereStopFinal( toState );
 326                                         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 327                                                 if ( ms->stateBits & STB_ISMARKED ) {
 328                                                         ms->lmItemSet.insert( lmAct->value );
 329                                                         ms->stateBits &= ~ STB_ISMARKED;
 330                                                 }
 331                                         }
 332                                 }
 333                         }
 334                 }
 335         }
 336
 337         /* The lmItem sets are now filled, telling us which longest match rules
 338          * can succeed in which states. First determine if we need to make sure
 339          * act is defaulted to zero. We need to do this if there are any states
 340          * with lmItemSet.length() > 1 and NULL is included. That is, that the
 341          * switch may get called when in fact nothing has been matched. */
 342         int maxItemSetLength = 0;
 343         graph->markReachableFromHereStopFinal( graph->startState );
 344         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 345                 if ( ms->stateBits & STB_ISMARKED ) {
 346                         if ( ms->lmItemSet.length() > maxItemSetLength )
 347                                 maxItemSetLength = ms->lmItemSet.length();
 348                         ms->stateBits &= ~ STB_ISMARKED;
 349                 }
 350         }
 351
 352         /* The actions executed on starting to match a token. */
 353         graph->isolateStartState();
 354         graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart );
 355         graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
 356         if ( maxItemSetLength > 1 ) {
 357                 /* The longest match action switch may be called when tokens are
 358                  * matched, in which case act must be initialized, there must be a
 359                  * case to handle the error, and the generated machine will require an
 360                  * error state. */
 361                 lmSwitchHandlesError = true;
 362                 pd->lmRequiresErrorState = true;
 363                 graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
 364         }
 365
 366         /* The place to store transitions to restart. It maybe possible for the
 367          * restarting to affect the searching through the graph that follows. For
 368          * now take the safe route and save the list of transitions to restart
 369          * until after all searching is done. */
 370         Vector<TransAp*> restartTrans;
 371
 372         /* Set actions that do immediate token recognition, set the longest match part
 373          * id and set the token ending. */
 374         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
 375                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
 376                         if ( trans->lmActionTable.length() > 0 ) {
 377                                 LmActionTableEl *lmAct = trans->lmActionTable.data;
 378                                 StateAp *toState = trans->toState;
 379                                 assert( toState );
 380
 381                                 /* Can only optimize this if there are no transitions out.
 382                                  * Note there can be out transitions going nowhere with
 383                                  * actions and they too must inhibit this optimization. */
 384                                 if ( toState->outList.length() == 0 ) {
 385                                         /* Can execute the immediate action for the longest match
 386                                          * part. Redirect the action to the start state.
 387                                          *
 388                                          * NOTE: When we need to inhibit on_last due to leaving
 389                                          * actions the above test suffices. If the state has out
 390                                          * actions then it will fail because the out action will
 391                                          * have been transferred to an error transition, which
 392                                          * makes the outlist non-empty. */
 393                                         trans->actionTable.setAction( lmAct->key,
 394                                                         lmAct->value->actOnLast );
 395                                         restartTrans.append( trans );
 396                                 }
 397                                 else {
 398                                         /* Look for non final states that have a non-empty item
 399                                          * set. If these are present then we need to record the
 400                                          * end of the token.  Also Find the highest item set
 401                                          * length reachable from here (excluding at transtions to
 402                                          * final states). */
 403                                         bool nonFinalNonEmptyItemSet = false;
 404                                         maxItemSetLength = 0;
 405                                         graph->markReachableFromHereStopFinal( toState );
 406                                         for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
 407                                                 if ( ms->stateBits & STB_ISMARKED ) {
 408                                                         if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
 409                                                                 nonFinalNonEmptyItemSet = true;
 410                                                         if ( ms->lmItemSet.length() > maxItemSetLength )
 411                                                                 maxItemSetLength = ms->lmItemSet.length();
 412                                                         ms->stateBits &= ~ STB_ISMARKED;
 413                                                 }
 414                                         }
 415
 416                                         /* If there are reachable states that are not final and
 417                                          * have non empty item sets or that have an item set
 418                                          * length greater than one then we need to set tokend
 419                                          * because the error action that matches the token will
 420                                          * require it. */
 421                                         if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
 422                                                 trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
 423
 424                                         /* Some states may not know which longest match item to
 425                                          * execute, must set it. */
 426                                         if ( maxItemSetLength > 1 ) {
 427                                                 /* There are transitions out, another match may come. */
 428                                                 trans->actionTable.setAction( lmAct->key,
 429                                                                 lmAct->value->setActId );
 430                                         }
 431                                 }
 432                         }
 433                 }
 434         }
 435
 436         /* Now that all graph searching is done it certainly safe set the
 437          * restarting. It may be safe above, however this must be verified. */
 438         for ( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ )
 439                 restart( graph, *pt );
 440
 441         int lmErrActionOrd = pd->curActionOrd++;
 442
 443         /* Embed the error for recognizing a char. */
 444         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
 445                 if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
 446                         if ( st->isFinState() ) {
 447                                 /* On error execute the onActNext action, which knows that
 448                                  * the last character of the token was one back and restart. */
 449                                 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
 450                                                 &st->lmItemSet[0]->actOnNext, 1 );
 451                                 st->eofActionTable.setAction( lmErrActionOrd,
 452                                                 st->lmItemSet[0]->actOnNext );
 453                                 st->eofTarget = graph->startState;
 454                         }
 455                         else {
 456                                 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
 457                                                 &st->lmItemSet[0]->actLagBehind, 1 );
 458                                 st->eofActionTable.setAction( lmErrActionOrd,
 459                                                 st->lmItemSet[0]->actLagBehind );
 460                                 st->eofTarget = graph->startState;
 461                         }
 462                 }
 463                 else if ( st->lmItemSet.length() > 1 ) {
 464                         /* Need to use the select. Take note of which items the select
 465                          * is needed for so only the necessary actions are included. */
 466                         for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
 467                                 if ( *plmi != 0 )
 468                                         (*plmi)->inLmSelect = true;
 469                         }
 470                         /* On error, execute the action select and go to the start state. */
 471                         graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
 472                                         &lmActSelect, 1 );
 473                         st->eofActionTable.setAction( lmErrActionOrd, lmActSelect );
 474                         st->eofTarget = graph->startState;
 475                 }
 476         }
 477
 478         /* Finally, the start state should be made final. */
 479         graph->setFinState( graph->startState );
 480 }
 481
 482 void LongestMatch::transferScannerLeavingActions( FsmAp *graph )
 483 {
 484         for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
 485                 if ( st->outActionTable.length() > 0 )
 486                         graph->setErrorActions( st, st->outActionTable );
 487         }
 488 }
 489
 490 FsmAp *LongestMatch::walk( ParseData *pd )
 491 {
 492         /* The longest match has it's own name scope. */
 493         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 494
 495         /* Make each part of the longest match. */
 496         FsmAp **parts = new FsmAp*[longestMatchList->length()];
 497         LmPartList::Iter lmi = *longestMatchList;
 498         for ( int i = 0; lmi.lte(); lmi++, i++ ) {
 499                 /* Create the machine and embed the setting of the longest match id. */
 500                 parts[i] = lmi->join->walk( pd );
 501                 parts[i]->longMatchAction( pd->curActionOrd++, lmi );
 502         }
 503
 504         /* Before we union the patterns we need to deal with leaving actions. They
 505          * are transfered to error transitions out of the final states (like local
 506          * error actions) and to eof actions. In the scanner we need to forbid
 507          * on_last for any final state that has an leaving action. */
 508         for ( int i = 0; i < longestMatchList->length(); i++ )
 509                 transferScannerLeavingActions( parts[i] );
 510
 511         /* Union machines one and up with machine zero. The grammar dictates that
 512          * there will always be at least one part. */
 513         FsmAp *rtnVal = parts[0];
 514         for ( int i = 1; i < longestMatchList->length(); i++ ) {
 515                 rtnVal->unionOp( parts[i] );
 516                 afterOpMinimize( rtnVal );
 517         }
 518
 519         runLongestMatch( pd, rtnVal );
 520
 521         /* Pop the name scope. */
 522         pd->popNameScope( nameFrame );
 523
 524         delete[] parts;
 525         return rtnVal;
 526 }
 527
 528 FsmAp *MachineDef::walk( ParseData *pd )
 529 {
 530         FsmAp *rtnVal = 0;
 531         switch ( type ) {
 532         case JoinType:
 533                 rtnVal = join->walk( pd );
 534                 break;
 535         case LongestMatchType:
 536                 rtnVal = longestMatch->walk( pd );
 537                 break;
 538         case LengthDefType:
 539                 condData->lastCondKey.increment();
 540                 rtnVal = new FsmAp();
 541                 rtnVal->concatFsm( condData->lastCondKey );
 542                 break;
 543         }
 544         return rtnVal;
 545 }
 546
 547 void MachineDef::makeNameTree( ParseData *pd )
 548 {
 549         switch ( type ) {
 550         case JoinType:
 551                 join->makeNameTree( pd );
 552                 break;
 553         case LongestMatchType:
 554                 longestMatch->makeNameTree( pd );
 555                 break;
 556         case LengthDefType:
 557                 break;
 558         }
 559 }
 560
 561 void MachineDef::resolveNameRefs( ParseData *pd )
 562 {
 563         switch ( type ) {
 564         case JoinType:
 565                 join->resolveNameRefs( pd );
 566                 break;
 567         case LongestMatchType:
 568                 longestMatch->resolveNameRefs( pd );
 569                 break;
 570         case LengthDefType:
 571                 break;
 572         }
 573 }
 574
 575
 576 /* Construct with a location and the first expression. */
 577 Join::Join( const InputLoc &loc, Expression *expr )
 578 :
 579         loc(loc)
 580 {
 581         exprList.append( expr );
 582 }
 583
 584 /* Construct with a location and the first expression. */
 585 Join::Join( Expression *expr )
 586 :
 587         loc(loc)
 588 {
 589         exprList.append( expr );
 590 }
 591
 592 /* Walk an expression node. */
 593 FsmAp *Join::walk( ParseData *pd )
 594 {
 595         if ( exprList.length() > 1 )
 596                 return walkJoin( pd );
 597         else
 598                 return exprList.head->walk( pd );
 599 }
 600
 601 /* There is a list of expressions to join. */
 602 FsmAp *Join::walkJoin( ParseData *pd )
 603 {
 604         /* We enter into a new name scope. */
 605         NameFrame nameFrame = pd->enterNameScope( true, 1 );
 606
 607         /* Evaluate the machines. */
 608         FsmAp **fsms = new FsmAp*[exprList.length()];
 609         ExprList::Iter expr = exprList;
 610         for ( int e = 0; e < exprList.length(); e++, expr++ )
 611                 fsms[e] = expr->walk( pd );
 612
 613         /* Get the start and final names. Final is
 614          * guaranteed to exist, start is not. */
 615         NameInst *startName = pd->curNameInst->start;
 616         NameInst *finalName = pd->curNameInst->final;
 617
 618         int startId = -1;
 619         if ( startName != 0 ) {
 620                 /* Take note that there was an implicit link to the start machine. */
 621                 pd->localNameScope->referencedNames.append( startName );
 622                 startId = startName->id;
 623         }
 624
 625         /* A final id of -1 indicates there is no epsilon that references the
 626          * final state, therefor do not create one or set an entry point to it. */
 627         int finalId = -1;
 628         if ( finalName->numRefs > 0 )
 629                 finalId = finalName->id;
 630
 631         /* Join machines 1 and up onto machine 0. */
 632         FsmAp *retFsm = fsms[0];
 633         retFsm->joinOp( startId, finalId, fsms+1, exprList.length()-1 );
 634
 635         /* We can now unset entry points that are not longer used. */
 636         pd->unsetObsoleteEntries( retFsm );
 637
 638         /* Pop the name scope. */
 639         pd->popNameScope( nameFrame );
 640
 641         delete[] fsms;
 642         return retFsm;
 643 }
 644
 645 void Join::makeNameTree( ParseData *pd )
 646 {
 647         if ( exprList.length() > 1 ) {
 648                 /* Create the new anonymous scope. */
 649                 NameInst *prevNameInst = pd->curNameInst;
 650                 pd->curNameInst = pd->addNameInst( loc, 0, false );
 651
 652                 /* Join scopes need an implicit "final" target. */
 653                 pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final",
 654                                 pd->nextNameId++, false );
 655
 656                 /* Recurse into all expressions in the list. */
 657                 for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
 658                         expr->makeNameTree( pd );
 659
 660                 /* The name scope ends, pop the name instantiation. */
 661                 pd->curNameInst = prevNameInst;
 662         }
 663         else {
 664                 /* Recurse into the single expression. */
 665                 exprList.head->makeNameTree( pd );
 666         }
 667 }
 668
 669
 670 void Join::resolveNameRefs( ParseData *pd )
 671 {
 672         /* Branch on whether or not there is to be a join. */
 673         if ( exprList.length() > 1 ) {
 674                 /* The variable definition enters a new scope. */
 675                 NameFrame nameFrame = pd->enterNameScope( true, 1 );
 676
 677                 /* The join scope must contain a start label. */
 678                 NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true );
 679                 if ( resolved.length() > 0 ) {
 680                         /* Take the first. */
 681                         pd->curNameInst->start = resolved[0];
 682                         if ( resolved.length() > 1 ) {
 683                                 /* Complain about the multiple references. */
 684                                 error(loc) << "join operation has multiple start labels" << endl;
 685                                 errorStateLabels( resolved );
 686                         }
 687                 }
 688
 689                 /* Make sure there is a start label. */
 690                 if ( pd->curNameInst->start != 0 ) {
 691                         /* There is an implicit reference to start name. */
 692                         pd->curNameInst->start->numRefs += 1;
 693                 }
 694                 else {
 695                         /* No start label. */
 696                         error(loc) << "join operation has no start label" << endl;
 697                 }
 698
 699                 /* Recurse into all expressions in the list. */
 700                 for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
 701                         expr->resolveNameRefs( pd );
 702
 703                 /* The name scope ends, pop the name instantiation. */
 704                 pd->popNameScope( nameFrame );
 705         }
 706         else {
 707                 /* Recurse into the single expression. */
 708                 exprList.head->resolveNameRefs( pd );
 709         }
 710 }
 711
 712 /* Clean up after an expression node. */
 713 Expression::~Expression()
 714 {
 715         switch ( type ) {
 716                 case OrType: case IntersectType: case SubtractType:
 717                 case StrongSubtractType:
 718                         delete expression;
 719                         delete term;
 720                         break;
 721                 case TermType:
 722                         delete term;
 723                         break;
 724                 case BuiltinType:
 725                         break;
 726         }
 727 }
 728
 729 /* Evaluate a single expression node. */
 730 FsmAp *Expression::walk( ParseData *pd, bool lastInSeq )
 731 {
 732         FsmAp *rtnVal = 0;
 733         switch ( type ) {
 734                 case OrType: {
 735                         /* Evaluate the expression. */
 736                         rtnVal = expression->walk( pd, false );
 737                         /* Evaluate the term. */
 738                         FsmAp *rhs = term->walk( pd );
 739                         /* Perform union. */
 740                         rtnVal->unionOp( rhs );
 741                         afterOpMinimize( rtnVal, lastInSeq );
 742                         break;
 743                 }
 744                 case IntersectType: {
 745                         /* Evaluate the expression. */
 746                         rtnVal = expression->walk( pd );
 747                         /* Evaluate the term. */
 748                         FsmAp *rhs = term->walk( pd );
 749                         /* Perform intersection. */
 750                         rtnVal->intersectOp( rhs );
 751                         afterOpMinimize( rtnVal, lastInSeq );
 752                         break;
 753                 }
 754                 case SubtractType: {
 755                         /* Evaluate the expression. */
 756                         rtnVal = expression->walk( pd );
 757                         /* Evaluate the term. */
 758                         FsmAp *rhs = term->walk( pd );
 759                         /* Perform subtraction. */
 760                         rtnVal->subtractOp( rhs );
 761                         afterOpMinimize( rtnVal, lastInSeq );
 762                         break;
 763                 }
 764                 case StrongSubtractType: {
 765                         /* Evaluate the expression. */
 766                         rtnVal = expression->walk( pd );
 767
 768                         /* Evaluate the term and pad it with any* machines. */
 769                         FsmAp *rhs = dotStarFsm( pd );
 770                         FsmAp *termFsm = term->walk( pd );
 771                         FsmAp *trailAnyStar = dotStarFsm( pd );
 772                         rhs->concatOp( termFsm );
 773                         rhs->concatOp( trailAnyStar );
 774
 775                         /* Perform subtraction. */
 776                         rtnVal->subtractOp( rhs );
 777                         afterOpMinimize( rtnVal, lastInSeq );
 778                         break;
 779                 }
 780                 case TermType: {
 781                         /* Return result of the term. */
 782                         rtnVal = term->walk( pd );
 783                         break;
 784                 }
 785                 case BuiltinType: {
 786                         /* Duplicate the builtin. */
 787                         rtnVal = makeBuiltin( builtin, pd );
 788                         break;
 789                 }
 790         }
 791
 792         return rtnVal;
 793 }
 794
 795 void Expression::makeNameTree( ParseData *pd )
 796 {
 797         switch ( type ) {
 798         case OrType:
 799         case IntersectType:
 800         case SubtractType:
 801         case StrongSubtractType:
 802                 expression->makeNameTree( pd );
 803                 term->makeNameTree( pd );
 804                 break;
 805         case TermType:
 806                 term->makeNameTree( pd );
 807                 break;
 808         case BuiltinType:
 809                 break;
 810         }
 811 }
 812
 813 void Expression::resolveNameRefs( ParseData *pd )
 814 {
 815         switch ( type ) {
 816         case OrType:
 817         case IntersectType:
 818         case SubtractType:
 819         case StrongSubtractType:
 820                 expression->resolveNameRefs( pd );
 821                 term->resolveNameRefs( pd );
 822                 break;
 823         case TermType:
 824                 term->resolveNameRefs( pd );
 825                 break;
 826         case BuiltinType:
 827                 break;
 828         }
 829 }
 830
 831 /* Clean up after a term node. */
 832 Term::~Term()
 833 {
 834         switch ( type ) {
 835                 case ConcatType:
 836                 case RightStartType:
 837                 case RightFinishType:
 838                 case LeftType:
 839                         delete term;
 840                         delete factorWithAug;
 841                         break;
 842                 case FactorWithAugType:
 843                         delete factorWithAug;
 844                         break;
 845         }
 846 }
 847
 848 /* Evaluate a term node. */
 849 FsmAp *Term::walk( ParseData *pd, bool lastInSeq )
 850 {
 851         FsmAp *rtnVal = 0;
 852         switch ( type ) {
 853                 case ConcatType: {
 854                         /* Evaluate the Term. */
 855                         rtnVal = term->walk( pd, false );
 856                         /* Evaluate the FactorWithRep. */
 857                         FsmAp *rhs = factorWithAug->walk( pd );
 858                         /* Perform concatenation. */
 859                         rtnVal->concatOp( rhs );
 860                         afterOpMinimize( rtnVal, lastInSeq );
 861                         break;
 862                 }
 863                 case RightStartType: {
 864                         /* Evaluate the Term. */
 865                         rtnVal = term->walk( pd );
 866
 867                         /* Evaluate the FactorWithRep. */
 868                         FsmAp *rhs = factorWithAug->walk( pd );
 869
 870                         /* Set up the priority descriptors. The left machine gets the
 871                          * lower priority where as the right get the higher start priority. */
 872                         priorDescs[0].key = pd->nextPriorKey++;
 873                         priorDescs[0].priority = 0;
 874                         rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
 875
 876                         /* The start transitions of the right machine gets the higher
 877                          * priority. Use the same unique key. */
 878                         priorDescs[1].key = priorDescs[0].key;
 879                         priorDescs[1].priority = 1;
 880                         rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
 881
 882                         /* Perform concatenation. */
 883                         rtnVal->concatOp( rhs );
 884                         afterOpMinimize( rtnVal, lastInSeq );
 885                         break;
 886                 }
 887                 case RightFinishType: {
 888                         /* Evaluate the Term. */
 889                         rtnVal = term->walk( pd );
 890
 891                         /* Evaluate the FactorWithRep. */
 892                         FsmAp *rhs = factorWithAug->walk( pd );
 893
 894                         /* Set up the priority descriptors. The left machine gets the
 895                          * lower priority where as the finishing transitions to the right
 896                          * get the higher priority. */
 897                         priorDescs[0].key = pd->nextPriorKey++;
 898                         priorDescs[0].priority = 0;
 899                         rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
 900
 901                         /* The finishing transitions of the right machine get the higher
 902                          * priority. Use the same unique key. */
 903                         priorDescs[1].key = priorDescs[0].key;
 904                         priorDescs[1].priority = 1;
 905                         rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
 906
 907                         /* If the right machine's start state is final we need to guard
 908                          * against the left machine persisting by moving through the empty
 909                          * string. */
 910                         if ( rhs->startState->isFinState() ) {
 911                                 rhs->startState->outPriorTable.setPrior(
 912                                                 pd->curPriorOrd++, &priorDescs[1] );
 913                         }
 914
 915                         /* Perform concatenation. */
 916                         rtnVal->concatOp( rhs );
 917                         afterOpMinimize( rtnVal, lastInSeq );
 918                         break;
 919                 }
 920                 case LeftType: {
 921                         /* Evaluate the Term. */
 922                         rtnVal = term->walk( pd );
 923
 924                         /* Evaluate the FactorWithRep. */
 925                         FsmAp *rhs = factorWithAug->walk( pd );
 926
 927                         /* Set up the priority descriptors. The left machine gets the
 928                          * higher priority. */
 929                         priorDescs[0].key = pd->nextPriorKey++;
 930                         priorDescs[0].priority = 1;
 931                         rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
 932
 933                         /* The right machine gets the lower priority. We cannot use
 934                          * allTransPrior here in case the start state of the right machine
 935                          * is final. It would allow the right machine thread to run along
 936                          * with the left if just passing through the start state. Using
 937                          * startFsmPrior prevents this. */
 938                         priorDescs[1].key = priorDescs[0].key;
 939                         priorDescs[1].priority = 0;
 940                         rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
 941
 942                         /* Perform concatenation. */
 943                         rtnVal->concatOp( rhs );
 944                         afterOpMinimize( rtnVal, lastInSeq );
 945                         break;
 946                 }
 947                 case FactorWithAugType: {
 948                         rtnVal = factorWithAug->walk( pd );
 949                         break;
 950                 }
 951         }
 952         return rtnVal;
 953 }
 954
 955 void Term::makeNameTree( ParseData *pd )
 956 {
 957         switch ( type ) {
 958         case ConcatType:
 959         case RightStartType:
 960         case RightFinishType:
 961         case LeftType:
 962                 term->makeNameTree( pd );
 963                 factorWithAug->makeNameTree( pd );
 964                 break;
 965         case FactorWithAugType:
 966                 factorWithAug->makeNameTree( pd );
 967                 break;
 968         }
 969 }
 970
 971 void Term::resolveNameRefs( ParseData *pd )
 972 {
 973         switch ( type ) {
 974         case ConcatType:
 975         case RightStartType:
 976         case RightFinishType:
 977         case LeftType:
 978                 term->resolveNameRefs( pd );
 979                 factorWithAug->resolveNameRefs( pd );
 980                 break;
 981         case FactorWithAugType:
 982                 factorWithAug->resolveNameRefs( pd );
 983                 break;
 984         }
 985 }
 986
 987 /* Clean up after a factor with augmentation node. */
 988 FactorWithAug::~FactorWithAug()
 989 {
 990         delete factorWithRep;
 991
 992         /* Walk the vector of parser actions, deleting function names. */
 993
 994         /* Clean up priority descriptors. */
 995         if ( priorDescs != 0 )
 996                 delete[] priorDescs;
 997 }
 998
 999 void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd )
1000 {
1001         /* Assign actions. */
1002         for ( int i = 0; i < actions.length(); i++ )  {
1003                 switch ( actions[i].type ) {
1004                 /* Transition actions. */
1005                 case at_start:
1006                         graph->startFsmAction( actionOrd[i], actions[i].action );
1007                         afterOpMinimize( graph );
1008                         break;
1009                 case at_all:
1010                         graph->allTransAction( actionOrd[i], actions[i].action );
1011                         break;
1012                 case at_finish:
1013                         graph->finishFsmAction( actionOrd[i], actions[i].action );
1014                         break;
1015                 case at_leave:
1016                         graph->leaveFsmAction( actionOrd[i], actions[i].action );
1017                         break;
1018
1019                 /* Global error actions. */
1020                 case at_start_gbl_error:
1021                         graph->startErrorAction( actionOrd[i], actions[i].action, 0 );
1022                         afterOpMinimize( graph );
1023                         break;
1024                 case at_all_gbl_error:
1025                         graph->allErrorAction( actionOrd[i], actions[i].action, 0 );
1026                         break;
1027                 case at_final_gbl_error:
1028                         graph->finalErrorAction( actionOrd[i], actions[i].action, 0 );
1029                         break;
1030                 case at_not_start_gbl_error:
1031                         graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 );
1032                         break;
1033                 case at_not_final_gbl_error:
1034                         graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 );
1035                         break;
1036                 case at_middle_gbl_error:
1037                         graph->middleErrorAction( actionOrd[i], actions[i].action, 0 );
1038                         break;
1039
1040                 /* Local error actions. */
1041                 case at_start_local_error:
1042                         graph->startErrorAction( actionOrd[i], actions[i].action,
1043                                         actions[i].localErrKey );
1044                         afterOpMinimize( graph );
1045                         break;
1046                 case at_all_local_error:
1047                         graph->allErrorAction( actionOrd[i], actions[i].action,
1048                                         actions[i].localErrKey );
1049                         break;
1050                 case at_final_local_error:
1051                         graph->finalErrorAction( actionOrd[i], actions[i].action,
1052                                         actions[i].localErrKey );
1053                         break;
1054                 case at_not_start_local_error:
1055                         graph->notStartErrorAction( actionOrd[i], actions[i].action,
1056                                         actions[i].localErrKey );
1057                         break;
1058                 case at_not_final_local_error:
1059                         graph->notFinalErrorAction( actionOrd[i], actions[i].action,
1060                                         actions[i].localErrKey );
1061                         break;
1062                 case at_middle_local_error:
1063                         graph->middleErrorAction( actionOrd[i], actions[i].action,
1064                                         actions[i].localErrKey );
1065                         break;
1066
1067                 /* EOF actions. */
1068                 case at_start_eof:
1069                         graph->startEOFAction( actionOrd[i], actions[i].action );
1070                         afterOpMinimize( graph );
1071                         break;
1072                 case at_all_eof:
1073                         graph->allEOFAction( actionOrd[i], actions[i].action );
1074                         break;
1075                 case at_final_eof:
1076                         graph->finalEOFAction( actionOrd[i], actions[i].action );
1077                         break;
1078                 case at_not_start_eof:
1079                         graph->notStartEOFAction( actionOrd[i], actions[i].action );
1080                         break;
1081                 case at_not_final_eof:
1082                         graph->notFinalEOFAction( actionOrd[i], actions[i].action );
1083                         break;
1084                 case at_middle_eof:
1085                         graph->middleEOFAction( actionOrd[i], actions[i].action );
1086                         break;
1087
1088                 /* To State Actions. */
1089                 case at_start_to_state:
1090                         graph->startToStateAction( actionOrd[i], actions[i].action );
1091                         afterOpMinimize( graph );
1092                         break;
1093                 case at_all_to_state:
1094                         graph->allToStateAction( actionOrd[i], actions[i].action );
1095                         break;
1096                 case at_final_to_state:
1097                         graph->finalToStateAction( actionOrd[i], actions[i].action );
1098                         break;
1099                 case at_not_start_to_state:
1100                         graph->notStartToStateAction( actionOrd[i], actions[i].action );
1101                         break;
1102                 case at_not_final_to_state:
1103                         graph->notFinalToStateAction( actionOrd[i], actions[i].action );
1104                         break;
1105                 case at_middle_to_state:
1106                         graph->middleToStateAction( actionOrd[i], actions[i].action );
1107                         break;
1108
1109                 /* From State Actions. */
1110                 case at_start_from_state:
1111                         graph->startFromStateAction( actionOrd[i], actions[i].action );
1112                         afterOpMinimize( graph );
1113                         break;
1114                 case at_all_from_state:
1115                         graph->allFromStateAction( actionOrd[i], actions[i].action );
1116                         break;
1117                 case at_final_from_state:
1118                         graph->finalFromStateAction( actionOrd[i], actions[i].action );
1119                         break;
1120                 case at_not_start_from_state:
1121                         graph->notStartFromStateAction( actionOrd[i], actions[i].action );
1122                         break;
1123                 case at_not_final_from_state:
1124                         graph->notFinalFromStateAction( actionOrd[i], actions[i].action );
1125                         break;
1126                 case at_middle_from_state:
1127                         graph->middleFromStateAction( actionOrd[i], actions[i].action );
1128                         break;
1129
1130                 /* Remaining cases, prevented by the parser. */
1131                 default:
1132                         assert( false );
1133                         break;
1134                 }
1135         }
1136 }
1137
1138 void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd )
1139 {
1140         /* Assign priorities. */
1141         for ( int i = 0; i < priorityAugs.length(); i++ ) {
1142                 switch ( priorityAugs[i].type ) {
1143                 case at_start:
1144                         graph->startFsmPrior( priorOrd[i], &priorDescs[i]);
1145                         /* Start fsm priorities are a special case that may require
1146                          * minimization afterwards. */
1147                         afterOpMinimize( graph );
1148                         break;
1149                 case at_all:
1150                         graph->allTransPrior( priorOrd[i], &priorDescs[i] );
1151                         break;
1152                 case at_finish:
1153                         graph->finishFsmPrior( priorOrd[i], &priorDescs[i] );
1154                         break;
1155                 case at_leave:
1156                         graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] );
1157                         break;
1158
1159                 default:
1160                         /* Parser Prevents this case. */
1161                         break;
1162                 }
1163         }
1164 }
1165
1166 void FactorWithAug::assignConditions( FsmAp *graph )
1167 {
1168         for ( int i = 0; i < conditions.length(); i++ )  {
1169                 switch ( conditions[i].type ) {
1170                 /* Transition actions. */
1171                 case at_start:
1172                         graph->startFsmCondition( conditions[i].action, conditions[i].sense );
1173                         afterOpMinimize( graph );
1174                         break;
1175                 case at_all:
1176                         graph->allTransCondition( conditions[i].action, conditions[i].sense );
1177                         break;
1178                 case at_leave:
1179                         graph->leaveFsmCondition( conditions[i].action, conditions[i].sense );
1180                         break;
1181                 default:
1182                         break;
1183                 }
1184         }
1185 }
1186
1187
1188 /* Evaluate a factor with augmentation node. */
1189 FsmAp *FactorWithAug::walk( ParseData *pd )
1190 {
1191         /* Enter into the scopes created for the labels. */
1192         NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1193
1194         /* Make the array of function orderings. */
1195         int *actionOrd = 0;
1196         if ( actions.length() > 0 )
1197                 actionOrd = new int[actions.length()];
1198
1199         /* First walk the list of actions, assigning order to all starting
1200          * actions. */
1201         for ( int i = 0; i < actions.length(); i++ ) {
1202                 if ( actions[i].type == at_start ||
1203                                 actions[i].type == at_start_gbl_error ||
1204                                 actions[i].type == at_start_local_error ||
1205                                 actions[i].type == at_start_to_state ||
1206                                 actions[i].type == at_start_from_state ||
1207                                 actions[i].type == at_start_eof )
1208                         actionOrd[i] = pd->curActionOrd++;
1209         }
1210
1211         /* Evaluate the factor with repetition. */
1212         FsmAp *rtnVal = factorWithRep->walk( pd );
1213
1214         /* Compute the remaining action orderings. */
1215         for ( int i = 0; i < actions.length(); i++ ) {
1216                 if ( actions[i].type != at_start &&
1217                                 actions[i].type != at_start_gbl_error &&
1218                                 actions[i].type != at_start_local_error &&
1219                                 actions[i].type != at_start_to_state &&
1220                                 actions[i].type != at_start_from_state &&
1221                                 actions[i].type != at_start_eof )
1222                         actionOrd[i] = pd->curActionOrd++;
1223         }
1224
1225         /* Embed conditions. */
1226         assignConditions( rtnVal );
1227
1228         /* Embed actions. */
1229         assignActions( pd, rtnVal , actionOrd );
1230
1231         /* Make the array of priority orderings. Orderings are local to this walk
1232          * of the factor with augmentation. */
1233         int *priorOrd = 0;
1234         if ( priorityAugs.length() > 0 )
1235                 priorOrd = new int[priorityAugs.length()];
1236
1237         /* Walk all priorities, assigning the priority ordering. */
1238         for ( int i = 0; i < priorityAugs.length(); i++ )
1239                 priorOrd[i] = pd->curPriorOrd++;
1240
1241         /* If the priority descriptors have not been made, make them now.  Make
1242          * priority descriptors for each priority asignment that will be passed to
1243          * the fsm. Used to keep track of the key, value and used bit. */
1244         if ( priorDescs == 0 && priorityAugs.length() > 0 ) {
1245                 priorDescs = new PriorDesc[priorityAugs.length()];
1246                 for ( int i = 0; i < priorityAugs.length(); i++ ) {
1247                         /* Init the prior descriptor for the priority setting. */
1248                         priorDescs[i].key = priorityAugs[i].priorKey;
1249                         priorDescs[i].priority = priorityAugs[i].priorValue;
1250                 }
1251         }
1252
1253         /* Assign priorities into the machine. */
1254         assignPriorities( rtnVal, priorOrd );
1255
1256         /* Assign epsilon transitions. */
1257         for ( int e = 0; e < epsilonLinks.length(); e++ ) {
1258                 /* Get the name, which may not exist. If it doesn't then silently
1259                  * ignore it because an error has already been reported. */
1260                 NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++];
1261                 if ( epTarg != 0 ) {
1262                         /* Make the epsilon transitions. */
1263                         rtnVal->epsilonTrans( epTarg->id );
1264
1265                         /* Note that we have made a link to the name. */
1266                         pd->localNameScope->referencedNames.append( epTarg );
1267                 }
1268         }
1269
1270         /* Set entry points for labels. */
1271         if ( labels.length() > 0 ) {
1272                 /* Pop the names. */
1273                 pd->resetNameScope( nameFrame );
1274
1275                 /* Make labels that are referenced into entry points. */
1276                 for ( int i = 0; i < labels.length(); i++ ) {
1277                         pd->enterNameScope( false, 1 );
1278
1279                         /* Will always be found. */
1280                         NameInst *name = pd->curNameInst;
1281
1282                         /* If the name is referenced then set the entry point. */
1283                         if ( name->numRefs > 0 )
1284                                 rtnVal->setEntry( name->id, rtnVal->startState );
1285                 }
1286
1287                 pd->popNameScope( nameFrame );
1288         }
1289
1290         if ( priorOrd != 0 )
1291                 delete[] priorOrd;
1292         if ( actionOrd != 0 )
1293                 delete[] actionOrd;
1294         return rtnVal;
1295 }
1296
1297 void FactorWithAug::makeNameTree( ParseData *pd )
1298 {
1299         /* Add the labels to the tree of instantiated names. Each label
1300          * makes a new scope. */
1301         NameInst *prevNameInst = pd->curNameInst;
1302         for ( int i = 0; i < labels.length(); i++ )
1303                 pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true );
1304
1305         /* Recurse, then pop the names. */
1306         factorWithRep->makeNameTree( pd );
1307         pd->curNameInst = prevNameInst;
1308 }
1309
1310
1311 void FactorWithAug::resolveNameRefs( ParseData *pd )
1312 {
1313         /* Enter into the name scope created by any labels. */
1314         NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1315
1316         /* Note action references. */
1317         for ( int i = 0; i < actions.length(); i++ )
1318                 actions[i].action->actionRefs.append( pd->localNameScope );
1319
1320         /* Recurse first. IMPORTANT: we must do the exact same traversal as when
1321          * the tree is constructed. */
1322         factorWithRep->resolveNameRefs( pd );
1323
1324         /* Resolve epsilon transitions. */
1325         for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) {
1326                 /* Get the link. */
1327                 EpsilonLink &link = epsilonLinks[ep];
1328                 NameInst *resolvedName = 0;
1329
1330                 if ( link.target.length() == 1 && strcmp( link.target.data[0], "final" ) == 0 ) {
1331                         /* Epsilon drawn to an implicit final state. An implicit final is
1332                          * only available in join operations. */
1333                         resolvedName = pd->localNameScope->final;
1334                 }
1335                 else {
1336                         /* Do an search for the name. */
1337                         NameSet resolved;
1338                         pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 );
1339                         if ( resolved.length() > 0 ) {
1340                                 /* Take the first one. */
1341                                 resolvedName = resolved[0];
1342                                 if ( resolved.length() > 1 ) {
1343                                         /* Complain about the multiple references. */
1344                                         error(link.loc) << "state reference " << link.target <<
1345                                                         " resolves to multiple entry points" << endl;
1346                                         errorStateLabels( resolved );
1347                                 }
1348                         }
1349                 }
1350
1351                 /* This is tricky, we stuff resolved epsilon transitions into one long
1352                  * vector in the parse data structure. Since the name resolution and
1353                  * graph generation both do identical walks of the parse tree we
1354                  * should always find the link resolutions in the right place.  */
1355                 pd->epsilonResolvedLinks.append( resolvedName );
1356
1357                 if ( resolvedName != 0 ) {
1358                         /* Found the name, bump of the reference count on it. */
1359                         resolvedName->numRefs += 1;
1360                 }
1361                 else {
1362                         /* Complain, no recovery action, the epsilon op will ignore any
1363                          * epsilon transitions whose names did not resolve. */
1364                         error(link.loc) << "could not resolve label " << link.target << endl;
1365                 }
1366         }
1367
1368         if ( labels.length() > 0 )
1369                 pd->popNameScope( nameFrame );
1370 }
1371
1372
1373 /* Clean up after a factor with repetition node. */
1374 FactorWithRep::~FactorWithRep()
1375 {
1376         switch ( type ) {
1377                 case StarType: case StarStarType: case OptionalType: case PlusType:
1378                 case ExactType: case MaxType: case MinType: case RangeType:
1379                         delete factorWithRep;
1380                         break;
1381                 case FactorWithNegType:
1382                         delete factorWithNeg;
1383                         break;
1384         }
1385 }
1386
1387 /* Evaluate a factor with repetition node. */
1388 FsmAp *FactorWithRep::walk( ParseData *pd )
1389 {
1390         FsmAp *retFsm = 0;
1391
1392         switch ( type ) {
1393         case StarType: {
1394                 /* Evaluate the FactorWithRep. */
1395                 retFsm = factorWithRep->walk( pd );
1396                 if ( retFsm->startState->isFinState() ) {
1397                         warning(loc) << "applying kleene star to a machine that "
1398                                         "accepts zero length word" << endl;
1399                         retFsm->unsetFinState( retFsm->startState );
1400                 }
1401
1402                 /* Shift over the start action orders then do the kleene star. */
1403                 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1404                 retFsm->starOp( );
1405                 afterOpMinimize( retFsm );
1406                 break;
1407         }
1408         case StarStarType: {
1409                 /* Evaluate the FactorWithRep. */
1410                 retFsm = factorWithRep->walk( pd );
1411                 if ( retFsm->startState->isFinState() ) {
1412                         warning(loc) << "applying kleene star to a machine that "
1413                                         "accepts zero length word" << endl;
1414                 }
1415
1416                 /* Set up the prior descs. All gets priority one, whereas leaving gets
1417                  * priority zero. Make a unique key so that these priorities don't
1418                  * interfere with any priorities set by the user. */
1419                 priorDescs[0].key = pd->nextPriorKey++;
1420                 priorDescs[0].priority = 1;
1421                 retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
1422
1423                 /* Leaveing gets priority 0. Use same unique key. */
1424                 priorDescs[1].key = priorDescs[0].key;
1425                 priorDescs[1].priority = 0;
1426                 retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
1427
1428                 /* Shift over the start action orders then do the kleene star. */
1429                 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1430                 retFsm->starOp( );
1431                 afterOpMinimize( retFsm );
1432                 break;
1433         }
1434         case OptionalType: {
1435                 /* Make the null fsm. */
1436                 FsmAp *nu = new FsmAp();
1437                 nu->lambdaFsm( );
1438
1439                 /* Evaluate the FactorWithRep. */
1440                 retFsm = factorWithRep->walk( pd );
1441
1442                 /* Perform the question operator. */
1443                 retFsm->unionOp( nu );
1444                 afterOpMinimize( retFsm );
1445                 break;
1446         }
1447         case PlusType: {
1448                 /* Evaluate the FactorWithRep. */
1449                 retFsm = factorWithRep->walk( pd );
1450                 if ( retFsm->startState->isFinState() ) {
1451                         warning(loc) << "applying plus operator to a machine that "
1452                                         "accepts zero length word" << endl;
1453                 }
1454
1455                 /* Need a duplicated for the star end. */
1456                 FsmAp *dup = new FsmAp( *retFsm );
1457
1458                 /* The start func orders need to be shifted before doing the star. */
1459                 pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd );
1460
1461                 /* Star the duplicate. */
1462                 dup->starOp( );
1463                 afterOpMinimize( dup );
1464
1465                 retFsm->concatOp( dup );
1466                 afterOpMinimize( retFsm );
1467                 break;
1468         }
1469         case ExactType: {
1470                 /* Get an int from the repetition amount. */
1471                 if ( lowerRep == 0 ) {
1472                         /* No copies. Don't need to evaluate the factorWithRep.
1473                          * This Defeats the purpose so give a warning. */
1474                         warning(loc) << "exactly zero repetitions results "
1475                                         "in the null machine" << endl;
1476
1477                         retFsm = new FsmAp();
1478                         retFsm->lambdaFsm();
1479                 }
1480                 else {
1481                         /* Evaluate the first FactorWithRep. */
1482                         retFsm = factorWithRep->walk( pd );
1483                         if ( retFsm->startState->isFinState() ) {
1484                                 warning(loc) << "applying repetition to a machine that "
1485                                                 "accepts zero length word" << endl;
1486                         }
1487
1488                         /* The start func orders need to be shifted before doing the
1489                          * repetition. */
1490                         pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1491
1492                         /* Do the repetition on the machine. Already guarded against n == 0 */
1493                         retFsm->repeatOp( lowerRep );
1494                         afterOpMinimize( retFsm );
1495                 }
1496                 break;
1497         }
1498         case MaxType: {
1499                 /* Get an int from the repetition amount. */
1500                 if ( upperRep == 0 ) {
1501                         /* No copies. Don't need to evaluate the factorWithRep.
1502                          * This Defeats the purpose so give a warning. */
1503                         warning(loc) << "max zero repetitions results "
1504                                         "in the null machine" << endl;
1505
1506                         retFsm = new FsmAp();
1507                         retFsm->lambdaFsm();
1508                 }
1509                 else {
1510                         /* Evaluate the first FactorWithRep. */
1511                         retFsm = factorWithRep->walk( pd );
1512                         if ( retFsm->startState->isFinState() ) {
1513                                 warning(loc) << "applying max repetition to a machine that "
1514                                                 "accepts zero length word" << endl;
1515                         }
1516
1517                         /* The start func orders need to be shifted before doing the
1518                          * repetition. */
1519                         pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1520
1521                         /* Do the repetition on the machine. Already guarded against n == 0 */
1522                         retFsm->optionalRepeatOp( upperRep );
1523                         afterOpMinimize( retFsm );
1524                 }
1525                 break;
1526         }
1527         case MinType: {
1528                 /* Evaluate the repeated machine. */
1529                 retFsm = factorWithRep->walk( pd );
1530                 if ( retFsm->startState->isFinState() ) {
1531                         warning(loc) << "applying min repetition to a machine that "
1532                                         "accepts zero length word" << endl;
1533                 }
1534
1535                 /* The start func orders need to be shifted before doing the repetition
1536                  * and the kleene star. */
1537                 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1538
1539                 if ( lowerRep == 0 ) {
1540                         /* Acts just like a star op on the machine to return. */
1541                         retFsm->starOp( );
1542                         afterOpMinimize( retFsm );
1543                 }
1544                 else {
1545                         /* Take a duplicate for the plus. */
1546                         FsmAp *dup = new FsmAp( *retFsm );
1547
1548                         /* Do repetition on the first half. */
1549                         retFsm->repeatOp( lowerRep );
1550                         afterOpMinimize( retFsm );
1551
1552                         /* Star the duplicate. */
1553                         dup->starOp( );
1554                         afterOpMinimize( dup );
1555
1556                         /* Tak on the kleene star. */
1557                         retFsm->concatOp( dup );
1558                         afterOpMinimize( retFsm );
1559                 }
1560                 break;
1561         }
1562         case RangeType: {
1563                 /* Check for bogus range. */
1564                 if ( upperRep - lowerRep < 0 ) {
1565                         error(loc) << "invalid range repetition" << endl;
1566
1567                         /* Return null machine as recovery. */
1568                         retFsm = new FsmAp();
1569                         retFsm->lambdaFsm();
1570                 }
1571                 else if ( lowerRep == 0 && upperRep == 0 ) {
1572                         /* No copies. Don't need to evaluate the factorWithRep.  This
1573                          * defeats the purpose so give a warning. */
1574                         warning(loc) << "zero to zero repetitions results "
1575                                         "in the null machine" << endl;
1576
1577                         retFsm = new FsmAp();
1578                         retFsm->lambdaFsm();
1579                 }
1580                 else {
1581                         /* Now need to evaluate the repeated machine. */
1582                         retFsm = factorWithRep->walk( pd );
1583                         if ( retFsm->startState->isFinState() ) {
1584                                 warning(loc) << "applying range repetition to a machine that "
1585                                                 "accepts zero length word" << endl;
1586                         }
1587
1588                         /* The start func orders need to be shifted before doing both kinds
1589                          * of repetition. */
1590                         pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1591
1592                         if ( lowerRep == 0 ) {
1593                                 /* Just doing max repetition. Already guarded against n == 0. */
1594                                 retFsm->optionalRepeatOp( upperRep );
1595                                 afterOpMinimize( retFsm );
1596                         }
1597                         else if ( lowerRep == upperRep ) {
1598                                 /* Just doing exact repetition. Already guarded against n == 0. */
1599                                 retFsm->repeatOp( lowerRep );
1600                                 afterOpMinimize( retFsm );
1601                         }
1602                         else {
1603                                 /* This is the case that 0 < lowerRep < upperRep. Take a
1604                                  * duplicate for the optional repeat. */
1605                                 FsmAp *dup = new FsmAp( *retFsm );
1606
1607                                 /* Do repetition on the first half. */
1608                                 retFsm->repeatOp( lowerRep );
1609                                 afterOpMinimize( retFsm );
1610
1611                                 /* Do optional repetition on the second half. */
1612                                 dup->optionalRepeatOp( upperRep - lowerRep );
1613                                 afterOpMinimize( dup );
1614
1615                                 /* Tak on the duplicate machine. */
1616                                 retFsm->concatOp( dup );
1617                                 afterOpMinimize( retFsm );
1618                         }
1619                 }
1620                 break;
1621         }
1622         case FactorWithNegType: {
1623                 /* Evaluate the Factor. Pass it up. */
1624                 retFsm = factorWithNeg->walk( pd );
1625                 break;
1626         }}
1627         return retFsm;
1628 }
1629
1630 void FactorWithRep::makeNameTree( ParseData *pd )
1631 {
1632         switch ( type ) {
1633         case StarType:
1634         case StarStarType:
1635         case OptionalType:
1636         case PlusType:
1637         case ExactType:
1638         case MaxType:
1639         case MinType:
1640         case RangeType:
1641                 factorWithRep->makeNameTree( pd );
1642                 break;
1643         case FactorWithNegType:
1644                 factorWithNeg->makeNameTree( pd );
1645                 break;
1646         }
1647 }
1648
1649 void FactorWithRep::resolveNameRefs( ParseData *pd )
1650 {
1651         switch ( type ) {
1652         case StarType:
1653         case StarStarType:
1654         case OptionalType:
1655         case PlusType:
1656         case ExactType:
1657         case MaxType:
1658         case MinType:
1659         case RangeType:
1660                 factorWithRep->resolveNameRefs( pd );
1661                 break;
1662         case FactorWithNegType:
1663                 factorWithNeg->resolveNameRefs( pd );
1664                 break;
1665         }
1666 }
1667
1668 /* Clean up after a factor with negation node. */
1669 FactorWithNeg::~FactorWithNeg()
1670 {
1671         switch ( type ) {
1672                 case NegateType:
1673                 case CharNegateType:
1674                         delete factorWithNeg;
1675                         break;
1676                 case FactorType:
1677                         delete factor;
1678                         break;
1679         }
1680 }
1681
1682 /* Evaluate a factor with negation node. */
1683 FsmAp *FactorWithNeg::walk( ParseData *pd )
1684 {
1685         FsmAp *retFsm = 0;
1686
1687         switch ( type ) {
1688         case NegateType: {
1689                 /* Evaluate the factorWithNeg. */
1690                 FsmAp *toNegate = factorWithNeg->walk( pd );
1691
1692                 /* Negation is subtract from dot-star. */
1693                 retFsm = dotStarFsm( pd );
1694                 retFsm->subtractOp( toNegate );
1695                 afterOpMinimize( retFsm );
1696                 break;
1697         }
1698         case CharNegateType: {
1699                 /* Evaluate the factorWithNeg. */
1700                 FsmAp *toNegate = factorWithNeg->walk( pd );
1701
1702                 /* CharNegation is subtract from dot. */
1703                 retFsm = dotFsm( pd );
1704                 retFsm->subtractOp( toNegate );
1705                 afterOpMinimize( retFsm );
1706                 break;
1707         }
1708         case FactorType: {
1709                 /* Evaluate the Factor. Pass it up. */
1710                 retFsm = factor->walk( pd );
1711                 break;
1712         }}
1713         return retFsm;
1714 }
1715
1716 void FactorWithNeg::makeNameTree( ParseData *pd )
1717 {
1718         switch ( type ) {
1719         case NegateType:
1720         case CharNegateType:
1721                 factorWithNeg->makeNameTree( pd );
1722                 break;
1723         case FactorType:
1724                 factor->makeNameTree( pd );
1725                 break;
1726         }
1727 }
1728
1729 void FactorWithNeg::resolveNameRefs( ParseData *pd )
1730 {
1731         switch ( type ) {
1732         case NegateType:
1733         case CharNegateType:
1734                 factorWithNeg->resolveNameRefs( pd );
1735                 break;
1736         case FactorType:
1737                 factor->resolveNameRefs( pd );
1738                 break;
1739         }
1740 }
1741
1742 /* Clean up after a factor node. */
1743 Factor::~Factor()
1744 {
1745         switch ( type ) {
1746                 case LiteralType:
1747                         delete literal;
1748                         break;
1749                 case RangeType:
1750                         delete range;
1751                         break;
1752                 case OrExprType:
1753                         delete reItem;
1754                         break;
1755                 case RegExprType:
1756                         delete regExpr;
1757                         break;
1758                 case ReferenceType:
1759                         break;
1760                 case ParenType:
1761                         delete join;
1762                         break;
1763                 case LongestMatchType:
1764                         delete longestMatch;
1765                         break;
1766         }
1767 }
1768
1769 /* Evaluate a factor node. */
1770 FsmAp *Factor::walk( ParseData *pd )
1771 {
1772         FsmAp *rtnVal = 0;
1773         switch ( type ) {
1774         case LiteralType:
1775                 rtnVal = literal->walk( pd );
1776                 break;
1777         case RangeType:
1778                 rtnVal = range->walk( pd );
1779                 break;
1780         case OrExprType:
1781                 rtnVal = reItem->walk( pd, 0 );
1782                 break;
1783         case RegExprType:
1784                 rtnVal = regExpr->walk( pd, 0 );
1785                 break;
1786         case ReferenceType:
1787                 rtnVal = varDef->walk( pd );
1788                 break;
1789         case ParenType:
1790                 rtnVal = join->walk( pd );
1791                 break;
1792         case LongestMatchType:
1793                 rtnVal = longestMatch->walk( pd );
1794                 break;
1795         }
1796
1797         return rtnVal;
1798 }
1799
1800 void Factor::makeNameTree( ParseData *pd )
1801 {
1802         switch ( type ) {
1803         case LiteralType:
1804         case RangeType:
1805         case OrExprType:
1806         case RegExprType:
1807                 break;
1808         case ReferenceType:
1809                 varDef->makeNameTree( loc, pd );
1810                 break;
1811         case ParenType:
1812                 join->makeNameTree( pd );
1813                 break;
1814         case LongestMatchType:
1815                 longestMatch->makeNameTree( pd );
1816                 break;
1817         }
1818 }
1819
1820 void Factor::resolveNameRefs( ParseData *pd )
1821 {
1822         switch ( type ) {
1823         case LiteralType:
1824         case RangeType:
1825         case OrExprType:
1826         case RegExprType:
1827                 break;
1828         case ReferenceType:
1829                 varDef->resolveNameRefs( pd );
1830                 break;
1831         case ParenType:
1832                 join->resolveNameRefs( pd );
1833                 break;
1834         case LongestMatchType:
1835                 longestMatch->resolveNameRefs( pd );
1836                 break;
1837         }
1838 }
1839
1840 /* Clean up a range object. Must delete the two literals. */
1841 Range::~Range()
1842 {
1843         delete lowerLit;
1844         delete upperLit;
1845 }
1846
1847 /* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
1848 FsmAp *Range::walk( ParseData *pd )
1849 {
1850         /* Construct and verify the suitability of the lower end of the range. */
1851         FsmAp *lowerFsm = lowerLit->walk( pd );
1852         if ( !lowerFsm->checkSingleCharMachine() ) {
1853                 error(lowerLit->token.loc) <<
1854                         "bad range lower end, must be a single character" << endl;
1855         }
1856
1857         /* Construct and verify the upper end. */
1858         FsmAp *upperFsm = upperLit->walk( pd );
1859         if ( !upperFsm->checkSingleCharMachine() ) {
1860                 error(upperLit->token.loc) <<
1861                         "bad range upper end, must be a single character" << endl;
1862         }
1863
1864         /* Grab the keys from the machines, then delete them. */
1865         Key lowKey = lowerFsm->startState->outList.head->lowKey;
1866         Key highKey = upperFsm->startState->outList.head->lowKey;
1867         delete lowerFsm;
1868         delete upperFsm;
1869
1870         /* Validate the range. */
1871         if ( lowKey > highKey ) {
1872                 /* Recover by setting upper to lower; */
1873                 error(lowerLit->token.loc) << "lower end of range is greater then upper end" << endl;
1874                 highKey = lowKey;
1875         }
1876
1877         /* Return the range now that it is validated. */
1878         FsmAp *retFsm = new FsmAp();
1879         retFsm->rangeFsm( lowKey, highKey );
1880         return retFsm;
1881 }
1882
1883 /* Evaluate a literal object. */
1884 FsmAp *Literal::walk( ParseData *pd )
1885 {
1886         /* FsmAp to return, is the alphabet signed. */
1887         FsmAp *rtnVal = 0;
1888
1889         switch ( type ) {
1890         case Number: {
1891                 /* Make the fsm key in int format. */
1892                 Key fsmKey = makeFsmKeyNum( token.data, token.loc, pd );
1893                 /* Make the new machine. */
1894                 rtnVal = new FsmAp();
1895                 rtnVal->concatFsm( fsmKey );
1896                 break;
1897         }
1898         case LitString: {
1899                 /* Make the array of keys in int format. */
1900                 long length;
1901                 bool caseInsensitive;
1902                 char *data = prepareLitString( token.loc, token.data, token.length,
1903                                 length, caseInsensitive );
1904                 Key *arr = new Key[length];
1905                 makeFsmKeyArray( arr, data, length, pd );
1906
1907                 /* Make the new machine. */
1908                 rtnVal = new FsmAp();
1909                 if ( caseInsensitive )
1910                         rtnVal->concatFsmCI( arr, length );
1911                 else
1912                         rtnVal->concatFsm( arr, length );
1913                 delete[] data;
1914                 delete[] arr;
1915                 break;
1916         }}
1917         return rtnVal;
1918 }
1919
1920 /* Clean up after a regular expression object. */
1921 RegExpr::~RegExpr()
1922 {
1923         switch ( type ) {
1924                 case RecurseItem:
1925                         delete regExpr;
1926                         delete item;
1927                         break;
1928                 case Empty:
1929                         break;
1930         }
1931 }
1932
1933 /* Evaluate a regular expression object. */
1934 FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex )
1935 {
1936         /* This is the root regex, pass down a pointer to this. */
1937         if ( rootRegex == 0 )
1938                 rootRegex = this;
1939
1940         FsmAp *rtnVal = 0;
1941         switch ( type ) {
1942                 case RecurseItem: {
1943                         /* Walk both items. */
1944                         rtnVal = regExpr->walk( pd, rootRegex );
1945                         FsmAp *fsm2 = item->walk( pd, rootRegex );
1946                         rtnVal->concatOp( fsm2 );
1947                         break;
1948                 }
1949                 case Empty: {
1950                         rtnVal = new FsmAp();
1951                         rtnVal->lambdaFsm();
1952                         break;
1953                 }
1954         }
1955         return rtnVal;
1956 }
1957
1958 /* Clean up after an item in a regular expression. */
1959 ReItem::~ReItem()
1960 {
1961         switch ( type ) {
1962                 case Data:
1963                 case Dot:
1964                         break;
1965                 case OrBlock:
1966                 case NegOrBlock:
1967                         delete orBlock;
1968                         break;
1969         }
1970 }
1971
1972 /* Evaluate a regular expression object. */
1973 FsmAp *ReItem::walk( ParseData *pd, RegExpr *rootRegex )
1974 {
1975         /* The fsm to return, is the alphabet signed? */
1976         FsmAp *rtnVal = 0;
1977
1978         switch ( type ) {
1979                 case Data: {
1980                         /* Move the data into an integer array and make a concat fsm. */
1981                         Key *arr = new Key[token.length];
1982                         makeFsmKeyArray( arr, token.data, token.length, pd );
1983
1984                         /* Make the concat fsm. */
1985                         rtnVal = new FsmAp();
1986                         if ( rootRegex != 0 && rootRegex->caseInsensitive )
1987                                 rtnVal->concatFsmCI( arr, token.length );
1988                         else
1989                                 rtnVal->concatFsm( arr, token.length );
1990                         delete[] arr;
1991                         break;
1992                 }
1993                 case Dot: {
1994                         /* Make the dot fsm. */
1995                         rtnVal = dotFsm( pd );
1996                         break;
1997                 }
1998                 case OrBlock: {
1999                         /* Get the or block and minmize it. */
2000                         rtnVal = orBlock->walk( pd, rootRegex );
2001                         if ( rtnVal == 0 ) {
2002                                 rtnVal = new FsmAp();
2003                                 rtnVal->lambdaFsm();
2004                         }
2005                         rtnVal->minimizePartition2();
2006                         break;
2007                 }
2008                 case NegOrBlock: {
2009                         /* Get the or block and minimize it. */
2010                         FsmAp *fsm = orBlock->walk( pd, rootRegex );
2011                         fsm->minimizePartition2();
2012
2013                         /* Make a dot fsm and subtract from it. */
2014                         rtnVal = dotFsm( pd );
2015                         rtnVal->subtractOp( fsm );
2016                         rtnVal->minimizePartition2();
2017                         break;
2018                 }
2019         }
2020
2021         /* If the item is followed by a star, then apply the star op. */
2022         if ( star ) {
2023                 if ( rtnVal->startState->isFinState() ) {
2024                         warning(loc) << "applying kleene star to a machine that "
2025                                         "accepts zero length word" << endl;
2026                 }
2027
2028                 rtnVal->starOp();
2029                 rtnVal->minimizePartition2();
2030         }
2031         return rtnVal;
2032 }
2033
2034 /* Clean up after an or block of a regular expression. */
2035 ReOrBlock::~ReOrBlock()
2036 {
2037         switch ( type ) {
2038                 case RecurseItem:
2039                         delete orBlock;
2040                         delete item;
2041                         break;
2042                 case Empty:
2043                         break;
2044         }
2045 }
2046
2047
2048 /* Evaluate an or block of a regular expression. */
2049 FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex )
2050 {
2051         FsmAp *rtnVal = 0;
2052         switch ( type ) {
2053                 case RecurseItem: {
2054                         /* Evaluate the two fsm. */
2055                         FsmAp *fsm1 = orBlock->walk( pd, rootRegex );
2056                         FsmAp *fsm2 = item->walk( pd, rootRegex );
2057                         if ( fsm1 == 0 )
2058                                 rtnVal = fsm2;
2059                         else {
2060                                 fsm1->unionOp( fsm2 );
2061                                 rtnVal = fsm1;
2062                         }
2063                         break;
2064                 }
2065                 case Empty: {
2066                         rtnVal = 0;
2067                         break;
2068                 }
2069         }
2070         return rtnVal;;
2071 }
2072
2073 /* Evaluate an or block item of a regular expression. */
2074 FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex )
2075 {
2076         /* The return value, is the alphabet signed? */
2077         FsmAp *rtnVal = 0;
2078         switch ( type ) {
2079         case Data: {
2080                 /* Make the or machine. */
2081                 rtnVal = new FsmAp();
2082
2083                 /* Put the or data into an array of ints. Note that we find unique
2084                  * keys. Duplicates are silently ignored. The alternative would be to
2085                  * issue warning or an error but since we can't with [a0-9a] or 'a' |
2086                  * 'a' don't bother here. */
2087                 KeySet keySet;
2088                 makeFsmUniqueKeyArray( keySet, token.data, token.length,
2089                         rootRegex != 0 ? rootRegex->caseInsensitive : false, pd );
2090
2091                 /* Run the or operator. */
2092                 rtnVal->orFsm( keySet.data, keySet.length() );
2093                 break;
2094         }
2095         case Range: {
2096                 /* Make the upper and lower keys. */
2097                 Key lowKey = makeFsmKeyChar( lower, pd );
2098                 Key highKey = makeFsmKeyChar( upper, pd );
2099
2100                 /* Validate the range. */
2101                 if ( lowKey > highKey ) {
2102                         /* Recover by setting upper to lower; */
2103                         error(loc) << "lower end of range is greater then upper end" << endl;
2104                         highKey = lowKey;
2105                 }
2106
2107                 /* Make the range machine. */
2108                 rtnVal = new FsmAp();
2109                 rtnVal->rangeFsm( lowKey, highKey );
2110
2111                 if ( rootRegex != 0 && rootRegex->caseInsensitive ) {
2112                         if ( lowKey <= 'Z' && 'A' <= highKey ) {
2113                                 Key otherLow = lowKey < 'A' ? Key('A') : lowKey;
2114                                 Key otherHigh = 'Z' < highKey ? Key('Z') : highKey;
2115
2116                                 otherLow = 'a' + ( otherLow - 'A' );
2117                                 otherHigh = 'a' + ( otherHigh - 'A' );
2118
2119                                 FsmAp *otherRange = new FsmAp();
2120                                 otherRange->rangeFsm( otherLow, otherHigh );
2121                                 rtnVal->unionOp( otherRange );
2122                                 rtnVal->minimizePartition2();
2123                         }
2124                         else if ( lowKey <= 'z' && 'a' <= highKey ) {
2125                                 Key otherLow = lowKey < 'a' ? Key('a') : lowKey;
2126                                 Key otherHigh = 'z' < highKey ? Key('z') : highKey;
2127
2128                                 otherLow = 'A' + ( otherLow - 'a' );
2129                                 otherHigh = 'A' + ( otherHigh - 'a' );
2130
2131                                 FsmAp *otherRange = new FsmAp();
2132                                 otherRange->rangeFsm( otherLow, otherHigh );
2133                                 rtnVal->unionOp( otherRange );
2134                                 rtnVal->minimizePartition2();
2135                         }
2136                 }
2137
2138                 break;
2139         }}
2140         return rtnVal;
2141 }