/*
- * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * Copyright 2001-2006 Adrian Thurston <thurston@complang.org>
*/
/* This file is part of Ragel.
/* Convert the literal string which comes in from the scanner into an array of
* characters with escapes and options interpreted. Also null terminates the
* string. Though this null termination should not be relied on for
- * interpreting literals in the parser because the string may contain a
- * literal string with \0 */
-void Token::prepareLitString( Token &result, bool &caseInsensitive )
+ * interpreting literals in the parser because the string may contain \0 */
+char *prepareLitString( const InputLoc &loc, const char *data, long length,
+ long &resLen, bool &caseInsensitive )
{
- result.data = new char[this->length+1];
+ char *resData = new char[length+1];
caseInsensitive = false;
- char *src = this->data + 1;
- char *end = this->data + this->length - 1;
+ const char *src = data + 1;
+ const char *end = data + length - 1;
while ( *end != '\'' && *end != '\"' ) {
if ( *end == 'i' )
caseInsensitive = true;
else {
- error( this->loc ) << "literal string '" << *end <<
+ error( loc ) << "literal string '" << *end <<
"' option not supported" << endl;
}
end -= 1;
}
- char *dest = result.data;
- int len = 0;
+ char *dest = resData;
+ long len = 0;
while ( src != end ) {
if ( *src == '\\' ) {
switch ( src[1] ) {
dest[len++] = *src++;
}
}
- result.length = len;
- result.data[result.length] = 0;
-}
+ resLen = len;
+ resData[resLen] = 0;
+ return resData;
+}
FsmAp *VarDef::walk( ParseData *pd )
{
NameFrame nameFrame = pd->enterNameScope( true, 1 );
/* Recurse on the expression. */
- FsmAp *rtnVal = joinOrLm->walk( pd );
+ FsmAp *rtnVal = machineDef->walk( pd );
/* Do the tranfer of local error actions. */
LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
/* If the expression below is a join operation with multiple expressions
* then it just had epsilon transisions resolved. If it is a join
* with only a single expression then run the epsilon op now. */
- if ( joinOrLm->type == JoinOrLm::JoinType && joinOrLm->join->exprList.length() == 1 )
+ if ( machineDef->type == MachineDef::JoinType && machineDef->join->exprList.length() == 1 )
rtnVal->epsilonOp();
/* We can now unset entry points that are not longer used. */
NameInst *prevNameInst = pd->curNameInst;
pd->curNameInst = pd->addNameInst( loc, name, false );
- if ( joinOrLm->type == JoinOrLm::LongestMatchType )
+ if ( machineDef->type == MachineDef::LongestMatchType )
pd->curNameInst->isLongestMatch = true;
/* Recurse. */
- joinOrLm->makeNameTree( pd );
+ machineDef->makeNameTree( pd );
/* The name scope ends, pop the name instantiation. */
pd->curNameInst = prevNameInst;
NameFrame nameFrame = pd->enterNameScope( true, 1 );
/* Recurse. */
- joinOrLm->resolveNameRefs( pd );
+ machineDef->resolveNameRefs( pd );
/* The name scope ends, pop the name instantiation. */
pd->popNameScope( nameFrame );
*/
Action *LongestMatch::newAction( ParseData *pd, const InputLoc &loc,
- char *name, InlineList *inlineList )
+ const char *name, InlineList *inlineList )
{
Action *action = new Action( loc, name, inlineList, pd->nextCondId++ );
action->actionRefs.append( pd->curNameInst );
/* For each part create actions for setting the match type. We need
* to do this so that the actions will go into the actionIndex. */
InlineList *inlineList = new InlineList;
- inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, InlineItem::LmSetActId ) );
+ inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
+ InlineItem::LmSetActId ) );
char *actName = new char[50];
sprintf( actName, "store%i", lmi->longestMatchId );
lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
}
- /* Make actions that execute the user action and restart on the last character. */
+ /* Make actions that execute the user action and restart on the last
+ * character. */
for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
/* For each part create actions for setting the match type. We need
* to do this so that the actions will go into the actionIndex. */
inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
InlineItem::LmOnLast ) );
char *actName = new char[50];
- sprintf( actName, "imm%i", lmi->longestMatchId );
+ sprintf( actName, "last%i", lmi->longestMatchId );
lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
}
inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
InlineItem::LmOnNext ) );
char *actName = new char[50];
- sprintf( actName, "lagh%i", lmi->longestMatchId );
+ sprintf( actName, "next%i", lmi->longestMatchId );
lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
}
InputLoc loc;
loc.line = 1;
loc.col = 1;
+ loc.fileName = "NONE";
/* Create the error action. */
InlineList *il6 = new InlineList;
il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) );
- lmActSelect = newAction( pd, loc, "lagsel", il6 );
+ lmActSelect = newAction( pd, loc, "switch", il6 );
}
void LongestMatch::findName( ParseData *pd )
graph->attachTrans( fromState, graph->startState, trans );
}
-void LongestMatch::runLonestMatch( ParseData *pd, FsmAp *graph )
+void LongestMatch::runLongestMatch( ParseData *pd, FsmAp *graph )
{
graph->markReachableFromHereStopFinal( graph->startState );
for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
- if ( ms->stateBits & SB_ISMARKED ) {
+ if ( ms->stateBits & STB_ISMARKED ) {
ms->lmItemSet.insert( 0 );
- ms->stateBits &= ~ SB_ISMARKED;
+ ms->stateBits &= ~ STB_ISMARKED;
}
}
StateAp *toState = trans->toState;
assert( toState );
- /* Check if there are transitions out, this may be a very
- * close approximation? Out transitions going nowhere?
- * FIXME: Check. */
+ /* Can only optimize this if there are no transitions out.
+ * Note there can be out transitions going nowhere with
+ * actions and they too must inhibit this optimization. */
if ( toState->outList.length() > 0 ) {
/* Fill the item sets. */
graph->markReachableFromHereStopFinal( toState );
for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
- if ( ms->stateBits & SB_ISMARKED ) {
+ if ( ms->stateBits & STB_ISMARKED ) {
ms->lmItemSet.insert( lmAct->value );
- ms->stateBits &= ~ SB_ISMARKED;
+ ms->stateBits &= ~ STB_ISMARKED;
}
}
}
int maxItemSetLength = 0;
graph->markReachableFromHereStopFinal( graph->startState );
for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
- if ( ms->stateBits & SB_ISMARKED ) {
+ if ( ms->stateBits & STB_ISMARKED ) {
if ( ms->lmItemSet.length() > maxItemSetLength )
maxItemSetLength = ms->lmItemSet.length();
- ms->stateBits &= ~ SB_ISMARKED;
+ ms->stateBits &= ~ STB_ISMARKED;
}
}
StateAp *toState = trans->toState;
assert( toState );
- /* Check if there are transitions out, this may be a very
- * close approximation? Out transitions going nowhere?
- * FIXME: Check. */
+ /* Can only optimize this if there are no transitions out.
+ * Note there can be out transitions going nowhere with
+ * actions and they too must inhibit this optimization. */
if ( toState->outList.length() == 0 ) {
/* Can execute the immediate action for the longest match
- * part. Redirect the action to the start state. */
+ * part. Redirect the action to the start state.
+ *
+ * NOTE: When we need to inhibit on_last due to leaving
+ * actions the above test suffices. If the state has out
+ * actions then it will fail because the out action will
+ * have been transferred to an error transition, which
+ * makes the outlist non-empty. */
trans->actionTable.setAction( lmAct->key,
lmAct->value->actOnLast );
restartTrans.append( trans );
maxItemSetLength = 0;
graph->markReachableFromHereStopFinal( toState );
for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
- if ( ms->stateBits & SB_ISMARKED ) {
+ if ( ms->stateBits & STB_ISMARKED ) {
if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
nonFinalNonEmptyItemSet = true;
if ( ms->lmItemSet.length() > maxItemSetLength )
maxItemSetLength = ms->lmItemSet.length();
- ms->stateBits &= ~ SB_ISMARKED;
+ ms->stateBits &= ~ STB_ISMARKED;
}
}
* the last character of the token was one back and restart. */
graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
&st->lmItemSet[0]->actOnNext, 1 );
+ st->eofActionTable.setAction( lmErrActionOrd,
+ st->lmItemSet[0]->actOnNext );
+ st->eofTarget = graph->startState;
}
else {
graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
&st->lmItemSet[0]->actLagBehind, 1 );
+ st->eofActionTable.setAction( lmErrActionOrd,
+ st->lmItemSet[0]->actLagBehind );
+ st->eofTarget = graph->startState;
}
}
else if ( st->lmItemSet.length() > 1 ) {
- /* Need to use the select. Take note of the which items the select
+ /* Need to use the select. Take note of which items the select
* is needed for so only the necessary actions are included. */
for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
if ( *plmi != 0 )
/* On error, execute the action select and go to the start state. */
graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
&lmActSelect, 1 );
+ st->eofActionTable.setAction( lmErrActionOrd, lmActSelect );
+ st->eofTarget = graph->startState;
}
}
graph->setFinState( graph->startState );
}
+void LongestMatch::transferScannerLeavingActions( FsmAp *graph )
+{
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ if ( st->outActionTable.length() > 0 )
+ graph->setErrorActions( st, st->outActionTable );
+ }
+}
+
FsmAp *LongestMatch::walk( ParseData *pd )
{
/* The longest match has it's own name scope. */
parts[i]->longMatchAction( pd->curActionOrd++, lmi );
}
+ /* Before we union the patterns we need to deal with leaving actions. They
+ * are transfered to error transitions out of the final states (like local
+ * error actions) and to eof actions. In the scanner we need to forbid
+ * on_last for any final state that has an leaving action. */
+ for ( int i = 0; i < longestMatchList->length(); i++ )
+ transferScannerLeavingActions( parts[i] );
+
/* Union machines one and up with machine zero. The grammar dictates that
* there will always be at least one part. */
FsmAp *rtnVal = parts[0];
afterOpMinimize( rtnVal );
}
- runLonestMatch( pd, rtnVal );
+ runLongestMatch( pd, rtnVal );
/* Pop the name scope. */
pd->popNameScope( nameFrame );
return rtnVal;
}
-FsmAp *JoinOrLm::walk( ParseData *pd )
+FsmAp *MachineDef::walk( ParseData *pd )
{
FsmAp *rtnVal = 0;
switch ( type ) {
case LongestMatchType:
rtnVal = longestMatch->walk( pd );
break;
+ case LengthDefType:
+ condData->lastCondKey.increment();
+ rtnVal = new FsmAp();
+ rtnVal->concatFsm( condData->lastCondKey );
+ break;
}
return rtnVal;
}
-void JoinOrLm::makeNameTree( ParseData *pd )
+void MachineDef::makeNameTree( ParseData *pd )
{
switch ( type ) {
case JoinType:
case LongestMatchType:
longestMatch->makeNameTree( pd );
break;
+ case LengthDefType:
+ break;
}
}
-void JoinOrLm::resolveNameRefs( ParseData *pd )
+void MachineDef::resolveNameRefs( ParseData *pd )
{
switch ( type ) {
case JoinType:
case LongestMatchType:
longestMatch->resolveNameRefs( pd );
break;
+ case LengthDefType:
+ break;
}
}
pd->curNameInst->start = resolved[0];
if ( resolved.length() > 1 ) {
/* Complain about the multiple references. */
- error(loc) << "multiple start labels" << endl;
+ error(loc) << "join operation has multiple start labels" << endl;
errorStateLabels( resolved );
}
}
pd->curNameInst->start->numRefs += 1;
}
else {
- /* No start label. Complain and recover by adding a label to the
- * adding one. Recover ignoring the problem. */
- error(loc) << "no start label" << endl;
+ /* No start label. */
+ error(loc) << "join operation has no start label" << endl;
}
/* Recurse into all expressions in the list. */
priorDescs[0].priority = 0;
rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
- /* The start transitions right machine get the higher priority.
- * Use the same unique key. */
+ /* The start transitions of the right machine gets the higher
+ * priority. Use the same unique key. */
priorDescs[1].key = priorDescs[0].key;
priorDescs[1].priority = 1;
rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
priorDescs[1].priority = 1;
rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
+ /* If the right machine's start state is final we need to guard
+ * against the left machine persisting by moving through the empty
+ * string. */
+ if ( rhs->startState->isFinState() ) {
+ rhs->startState->outPriorTable.setPrior(
+ pd->curPriorOrd++, &priorDescs[1] );
+ }
+
/* Perform concatenation. */
rtnVal->concatOp( rhs );
afterOpMinimize( rtnVal, lastInSeq );
priorDescs[0].priority = 1;
rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
- /* The right machine gets the lower priority. Since
- * startTransPrior might unnecessarily increase the number of
- * states during the state machine construction process (due to
- * isolation), we use allTransPrior instead, which has the same
- * effect. */
+ /* The right machine gets the lower priority. We cannot use
+ * allTransPrior here in case the start state of the right machine
+ * is final. It would allow the right machine thread to run along
+ * with the left if just passing through the start state. Using
+ * startFsmPrior prevents this. */
priorDescs[1].key = priorDescs[0].key;
priorDescs[1].priority = 0;
- rhs->allTransPrior( pd->curPriorOrd++, &priorDescs[1] );
+ rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
/* Perform concatenation. */
rtnVal->concatOp( rhs );
if ( retFsm->startState->isFinState() ) {
warning(loc) << "applying kleene star to a machine that "
"accepts zero length word" << endl;
+ retFsm->unsetFinState( retFsm->startState );
}
/* Shift over the start action orders then do the kleene star. */
retFsm = factorWithRep->walk( pd );
if ( retFsm->startState->isFinState() ) {
warning(loc) << "applying plus operator to a machine that "
- "accpets zero length word" << endl;
+ "accepts zero length word" << endl;
}
/* Need a duplicated for the star end. */
}
case LitString: {
/* Make the array of keys in int format. */
- Token interp;
+ long length;
bool caseInsensitive;
- token.prepareLitString( interp, caseInsensitive );
- Key *arr = new Key[interp.length];
- makeFsmKeyArray( arr, interp.data, interp.length, pd );
+ char *data = prepareLitString( token.loc, token.data, token.length,
+ length, caseInsensitive );
+ Key *arr = new Key[length];
+ makeFsmKeyArray( arr, data, length, pd );
/* Make the new machine. */
rtnVal = new FsmAp();
if ( caseInsensitive )
- rtnVal->concatFsmCI( arr, interp.length );
+ rtnVal->concatFsmCI( arr, length );
else
- rtnVal->concatFsm( arr, interp.length );
- delete[] interp.data;
+ rtnVal->concatFsm( arr, length );
+ delete[] data;
delete[] arr;
break;
}}
if ( star ) {
if ( rtnVal->startState->isFinState() ) {
warning(loc) << "applying kleene star to a machine that "
- "accpets zero length word" << endl;
+ "accepts zero length word" << endl;
}
rtnVal->starOp();