/*
- * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
+ * Copyright 2001-2008 Adrian Thurston <thurston@complang.org>
*/
/* This file is part of Ragel.
#include "mergesort.h"
#include "xmlcodegen.h"
#include "version.h"
+#include "inputdata.h"
using namespace std;
char mainMachine[] = "main";
-void Token::set( char *str, int len )
+void Token::set( const char *str, int len )
{
length = len;
data = new char[len+1];
void afterOpMinimize( FsmAp *fsm, bool lastInSeq )
{
/* Switch on the prefered minimization algorithm. */
- if ( minimizeOpt == MinimizeEveryOp || minimizeOpt == MinimizeMostOps && lastInSeq ) {
+ if ( minimizeOpt == MinimizeEveryOp || ( minimizeOpt == MinimizeMostOps && lastInSeq ) ) {
/* First clean up the graph. FsmAp operations may leave these
* lying around. There should be no dead end states. The subtract
* intersection operators are the only places where they may be
unsigned long ul = strtoul( str, 0, 16 );
- if ( errno == ERANGE || unusedBits && ul >> (size * 8) ) {
+ if ( errno == ERANGE || ( unusedBits && ul >> (size * 8) ) ) {
error(loc) << "literal " << str << " overflows the alphabet type" << endl;
ul = 1 << (size * 8);
}
if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) )
- ul |= (0xffffffff >> (size*8 ) ) << (size*8);
+ ul |= ( -1L >> (size*8) ) << (size*8);
return Key( (long)ul );
}
long long ll = strtoll( str, 0, 10 );
/* Check for underflow. */
- if ( errno == ERANGE && ll < 0 || ll < minVal) {
+ if ( ( errno == ERANGE && ll < 0 ) || ll < minVal) {
error(loc) << "literal " << str << " underflows the alphabet type" << endl;
ll = minVal;
}
/* Check for overflow. */
- else if ( errno == ERANGE && ll > 0 || ll > maxVal ) {
+ else if ( ( errno == ERANGE && ll > 0 ) || ll > maxVal ) {
error(loc) << "literal " << str << " overflows the alphabet type" << endl;
ll = maxVal;
}
/* Initialize the structure that will collect info during the parse of a
* machine. */
-ParseData::ParseData( char *fileName, char *sectionName,
+ParseData::ParseData( const char *fileName, char *sectionName,
const InputLoc §ionLoc )
:
sectionGraph(0),
alphTypeSet(false),
getKeyExpr(0),
accessExpr(0),
+ prePushExpr(0),
+ postPopExpr(0),
pExpr(0),
peExpr(0),
+ eofExpr(0),
csExpr(0),
topExpr(0),
stackExpr(0),
exportsRootName(0),
nextEpsilonResolvedLink(0),
nextLongestMatchId(1),
- lmRequiresErrorState(false)
+ lmRequiresErrorState(false),
+ cgd(0)
{
/* Initialize the dictionary of graphs. This is our symbol table. The
* initialization needs to be done on construction which happens at the
/* Make a name id in the current name instantiation scope if it is not
* already there. */
-NameInst *ParseData::addNameInst( const InputLoc &loc, char *data, bool isLabel )
+NameInst *ParseData::addNameInst( const InputLoc &loc, const char *data, bool isLabel )
{
/* Create the name instantitaion object and insert it. */
NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel );
}
}
-NameSet ParseData::resolvePart( NameInst *refFrom, char *data, bool recLabelsOnly )
+NameSet ParseData::resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly )
{
/* Queue needed for breadth-first search, load it with the start node. */
NameInstList nameQueue;
/* Resolve, pass action for local search. */
NameInst *target = resolveStateRef( *item->nameRef, item->loc, action );
- /* Check if the target goes into a longest match. */
- NameInst *search = target->parent;
- while ( search != 0 ) {
- if ( search->isLongestMatch ) {
- error(item->loc) << "cannot enter inside a longest "
- "match construction as an entry point" << endl;
- break;
+ /* Name lookup error reporting is handled by resolveStateRef. */
+ if ( target != 0 ) {
+ /* Check if the target goes into a longest match. */
+ NameInst *search = target->parent;
+ while ( search != 0 ) {
+ if ( search->isLongestMatch ) {
+ error(item->loc) << "cannot enter inside a longest "
+ "match construction as an entry point" << endl;
+ break;
+ }
+ search = search->parent;
}
- search = search->parent;
- }
- /* Note the reference in the name. This will cause the entry
- * point to survive to the end of the graph generating walk. */
- if ( target != 0 )
+ /* Record the reference in the name. This will cause the
+ * entry point to survive to the end of the graph
+ * generating walk. */
target->numRefs += 1;
+ }
+
item->nameTarg = target;
break;
}
}
-void ParseData::createBuiltin( char *name, BuiltinMachine builtin )
+void ParseData::createBuiltin( const char *name, BuiltinMachine builtin )
{
Expression *expression = new Expression( builtin );
Join *join = new Join( expression );
- JoinOrLm *joinOrLm = new JoinOrLm( join );
- VarDef *varDef = new VarDef( name, joinOrLm );
+ MachineDef *machineDef = new MachineDef( join );
+ VarDef *varDef = new VarDef( name, machineDef );
GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
graphDict.insert( graphDictEl );
}
}
/* Set the alphabet type. If the types are not valid returns false. */
-bool ParseData::setAlphType( char *s1, char *s2 )
+bool ParseData::setAlphType( const InputLoc &loc, char *s1, char *s2 )
{
+ alphTypeLoc = loc;
userAlphType = findAlphType( s1, s2 );
alphTypeSet = true;
return userAlphType != 0;
}
/* Set the alphabet type. If the types are not valid returns false. */
-bool ParseData::setAlphType( char *s1 )
+bool ParseData::setAlphType( const InputLoc &loc, char *s1 )
{
+ alphTypeLoc = loc;
userAlphType = findAlphType( s1 );
alphTypeSet = true;
return userAlphType != 0;
pExpr = inlineList;
else if ( strcmp( var, "pe" ) == 0 )
peExpr = inlineList;
+ else if ( strcmp( var, "eof" ) == 0 )
+ eofExpr = inlineList;
else if ( strcmp( var, "cs" ) == 0 )
csExpr = inlineList;
else if ( strcmp( var, "data" ) == 0 )
stackExpr = inlineList;
else if ( strcmp( var, "act" ) == 0 )
actExpr = inlineList;
- else if ( strcmp( var, "tokstart" ) == 0 )
+ else if ( strcmp( var, "ts" ) == 0 )
tokstartExpr = inlineList;
- else if ( strcmp( var, "tokend" ) == 0 )
+ else if ( strcmp( var, "te" ) == 0 )
tokendExpr = inlineList;
else
set = false;
thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
}
- thisCondData.nextCondKey = thisKeyOps.maxKey;
- thisCondData.nextCondKey.increment();
+ thisCondData.lastCondKey = thisKeyOps.maxKey;
}
void ParseData::printNameInst( NameInst *nameInst, int level )
}
}
-Action *ParseData::newAction( char *name, InlineList *inlineList )
+Action *ParseData::newAction( const char *name, InlineList *inlineList )
{
InputLoc loc;
loc.line = 1;
loc.col = 1;
- loc.fileName = "<NONE>";
+ loc.fileName = "NONE";
Action *action = new Action( loc, name, inlineList, nextCondId++ );
action->actionRefs.append( rootName );
/* The setTokStart action sets tokstart. */
InlineList *il5 = new InlineList;
il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) );
- setTokStart = newAction( "tokstart", il5 );
+ setTokStart = newAction( "ts", il5 );
setTokStart->isLmAction = true;
/* The setTokEnd action sets tokend. */
InlineList *il3 = new InlineList;
il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) );
- setTokEnd = newAction( "tokend", il3 );
+ setTokEnd = newAction( "te", il3 );
setTokEnd->isLmAction = true;
/* The action will also need an ordering: ahead of all user action
* All state construction is now complete.
*/
+ /* Transfer actions from the out action tables to eof action tables. */
+ for ( StateSet::Iter state = graph->finStateSet; state.lte(); state++ )
+ graph->transferOutActions( *state );
+
/* Transfer global error actions. */
for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
graph->transferErrorActions( state, 0 );
- removeActionDups( graph );
+ if ( ::wantDupsRemoved )
+ removeActionDups( graph );
/* Remove unreachable states. There should be no dead end states. The
* subtract and intersection operators are the only places where they may
/* Show that the name index is correct. */
for ( int ni = 0; ni < nextNameId; ni++ ) {
cerr << ni << ": ";
- char *name = nameIndex[ni]->name;
+ const char *name = nameIndex[ni]->name;
cerr << ( name != 0 ? name : "<ANON>" ) << endl;
}
}
/* EOF checks. */
if ( act->numEofRefs > 0 ) {
switch ( item->type ) {
- case InlineItem::PChar:
- error(item->loc) << "pointer to current element does not exist in "
- "EOF action code" << endl;
- break;
- case InlineItem::Char:
- error(item->loc) << "current element does not exist in "
- "EOF action code" << endl;
- break;
- case InlineItem::Hold:
- error(item->loc) << "changing the current element not possible in "
- "EOF action code" << endl;
- break;
- case InlineItem::Exec:
- error(item->loc) << "changing the current element not possible in "
- "EOF action code" << endl;
- break;
- case InlineItem::Goto: case InlineItem::Call:
- case InlineItem::Next: case InlineItem::GotoExpr:
- case InlineItem::CallExpr: case InlineItem::NextExpr:
- case InlineItem::Ret:
- error(item->loc) << "changing the current state not possible in "
- "EOF action code" << endl;
- break;
- default:
- break;
+ /* Currently no checks. */
+ default:
+ break;
}
}
}
+/* Construct the machine and catch failures which can occur during
+ * construction. */
void ParseData::prepareMachineGen( GraphDictEl *graphDictEl )
{
+ try {
+ /* This machine construction can fail. */
+ prepareMachineGenTBWrapped( graphDictEl );
+ }
+ catch ( FsmConstructFail fail ) {
+ switch ( fail.reason ) {
+ case FsmConstructFail::CondNoKeySpace: {
+ InputLoc &loc = alphTypeSet ? alphTypeLoc : sectionLoc;
+ error(loc) << "sorry, no more characters are "
+ "available in the alphabet space" << endl;
+ error(loc) << " for conditions, please use a "
+ "smaller alphtype or reduce" << endl;
+ error(loc) << " the span of characters on which "
+ "conditions are embedded" << endl;
+ break;
+ }
+ }
+ }
+}
+
+void ParseData::prepareMachineGenTBWrapped( GraphDictEl *graphDictEl )
+{
beginProcessing();
initKeyOps();
makeRootNames();
sectionGraph->setStateNumbers( 0 );
}
-void ParseData::generateXML( ostream &out )
+void ParseData::generateReduced( InputData &inputData )
{
beginProcessing();
+ cgd = makeCodeGen( inputData.inputFileName, sectionName, *inputData.outStream );
+
/* Make the generator. */
- XMLCodeGen codeGen( sectionName, this, sectionGraph, out );
+ BackendGen backendGen( sectionName, this, sectionGraph, cgd );
/* Write out with it. */
- codeGen.writeXML();
+ backendGen.makeBackend();
if ( printStatistics ) {
cerr << "fsm name : " << sectionName << endl;
}
}
-/* Send eof to all parsers. */
-void terminateAllParsers( )
-{
- /* FIXME: a proper token is needed here. Suppose we should use the
- * location of EOF in the last file that the parser was referenced in. */
- InputLoc loc;
- loc.fileName = "<EOF>";
- loc.line = 0;
- loc.col = 0;
- for ( ParserDict::Iter pdel = parserDict; pdel.lte(); pdel++ )
- pdel->value->token( loc, _eof, 0, 0 );
-}
-
-void writeLanguage( std::ostream &out )
+void ParseData::generateXML( ostream &out )
{
- out << " lang=\"";
- switch ( hostLang->lang ) {
- case HostLang::C: out << "C"; break;
- case HostLang::D: out << "D"; break;
- case HostLang::Java: out << "Java"; break;
- case HostLang::Ruby: out << "Ruby"; break;
- }
- out << "\"";
-
-}
+ beginProcessing();
-void writeMachines( std::ostream &out, std::string hostData, char *inputFileName )
-{
- if ( machineSpec == 0 && machineName == 0 ) {
- /* No machine spec or machine name given. Generate everything. */
- for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
- ParseData *pd = parser->value->pd;
- if ( pd->instanceList.length() > 0 )
- pd->prepareMachineGen( 0 );
- }
+ /* Make the generator. */
+ XMLCodeGen codeGen( sectionName, this, sectionGraph, out );
- if ( gblErrorCount == 0 ) {
- out << "<ragel version=\"" VERSION "\" filename=\"" << inputFileName << "\"";
- writeLanguage( out );
- out << ">\n";
- for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
- ParseData *pd = parser->value->pd;
- if ( pd->instanceList.length() > 0 )
- pd->generateXML( out );
- }
- out << hostData;
- out << "</ragel>\n";
- }
- }
- else if ( parserDict.length() > 0 ) {
- /* There is either a machine spec or machine name given. */
- ParseData *parseData = 0;
- GraphDictEl *graphDictEl = 0;
-
- /* Traverse the sections, break out when we find a section/machine
- * that matches the one specified. */
- for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) {
- ParseData *checkPd = parser->value->pd;
- if ( machineSpec == 0 || strcmp( checkPd->sectionName, machineSpec ) == 0 ) {
- GraphDictEl *checkGdEl = 0;
- if ( machineName == 0 || (checkGdEl =
- checkPd->graphDict.find( machineName )) != 0 )
- {
- /* Have a machine spec and/or machine name that matches
- * the -M/-S options. */
- parseData = checkPd;
- graphDictEl = checkGdEl;
- break;
- }
- }
- }
+ /* Write out with it. */
+ codeGen.writeXML();
- if ( parseData == 0 )
- error() << "could not locate machine specified with -S and/or -M" << endl;
- else {
- /* Section/Machine to emit was found. Prepare and emit it. */
- parseData->prepareMachineGen( graphDictEl );
- if ( gblErrorCount == 0 ) {
- out << "<ragel version=\"" VERSION "\" filename=\"" << inputFileName << "\"";
- writeLanguage( out );
- out << ">\n";
- parseData->generateXML( out );
- out << hostData;
- out << "</ragel>\n";
- }
- }
+ if ( printStatistics ) {
+ cerr << "fsm name : " << sectionName << endl;
+ cerr << "num states: " << sectionGraph->stateList.length() << endl;
+ cerr << endl;
}
}
+