/*
- * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ * Copyright 2001-2007 Adrian Thurston <thurston@complang.org>
*/
/* This file is part of Ragel.
#include <sstream>
#include <unistd.h>
#include <sys/types.h>
-#include <sys/wait.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
+#ifdef _WIN32
+#include <windows.h>
+#include <psapi.h>
+#include <time.h>
+#include <io.h>
+#include <process.h>
+
+#if _MSC_VER
+#define S_IRUSR _S_IREAD
+#define S_IWUSR _S_IWRITE
+#endif
+#endif
+
/* Parsing. */
#include "ragel.h"
#include "rlscan.h"
#include "vector.h"
#include "version.h"
#include "common.h"
+#include "inputdata.h"
using std::istream;
using std::ostream;
MinimizeOpt minimizeOpt = MinimizeMostOps;
/* Graphviz dot file generation. */
-char *machineSpec = 0, *machineName = 0;
+const char *machineSpec = 0, *machineName = 0;
bool machineSpecFound = false;
+bool wantDupsRemoved = true;
bool printStatistics = false;
+bool generateXML = false;
+bool generateDot = false;
+
+/* Target language and output style. */
+CodeStyle codeStyle = GenTables;
-typedef Vector<char*> ArgsVector;
-ArgsVector backendArgs;
+int numSplitPartitions = 0;
+bool noLineDirectives = false;
+
+bool displayPrintables = false;
+
+/* Target ruby impl */
+RubyImplEnum rubyImpl = MRI;
/* Print a summary of the options. */
void usage()
" -v, --version Print version information and exit\n"
" -o <file> Write output to <file>\n"
" -s Print some statistics on stderr\n"
+" -d Do not remove duplicates from action lists\n"
+" -I <dir> Add <dir> to the list of directories to search\n"
+" for included an imported files\n"
+"error reporting format:\n"
+" --error-format=gnu file:line:column: message (default)\n"
+" --error-format=msvc file(line,column): message\n"
"fsm minimization:\n"
" -n Do not perform minimization\n"
" -m Minimize at the end of the compilation\n"
" -l Minimize after most operations (default)\n"
" -e Minimize after every operation\n"
-"machine selection:\n"
-" -S <spec> FSM specification to output (for rlgen-dot)\n"
-" -M <machine> Machine definition/instantiation to output (for rlgen-dot)\n"
+"visualization:\n"
+" -x Run the frontend only: emit XML intermediate format\n"
+" -V Generate a dot file for Graphviz\n"
+" -p Display printable characters on labels\n"
+" -S <spec> FSM specification to output (for graphviz output)\n"
+" -M <machine> Machine definition/instantiation to output (for graphviz output)\n"
"host language:\n"
" -C The host language is C, C++, Obj-C or Obj-C++ (default)\n"
" -D The host language is D\n"
" -J The host language is Java\n"
" -R The host language is Ruby\n"
+" -A The host language is C#\n"
+"line direcives: (C/D/C#)\n"
+" -L Inhibit writing of #line directives\n"
+"code style: (C/D/Java/Ruby/C#)\n"
+" -T0 Table driven FSM (default)\n"
+"code style: (C/D/Ruby/C#)\n"
+" -T1 Faster table driven FSM\n"
+" -F0 Flat table driven FSM\n"
+" -F1 Faster flat table-driven FSM\n"
+"code style: (C/D/C#)\n"
+" -G0 Goto-driven FSM\n"
+" -G1 Faster goto-driven FSM\n"
+"code style: (C/D)\n"
+" -G2 Really fast goto-driven FSM\n"
+" -P<N> N-Way Split really fast goto-driven FSM\n"
;
+
+ exit(0);
}
-/* Print version information. */
+/* Print version information and exit. */
void version()
{
cout << "Ragel State Machine Compiler version " VERSION << " " PUBDATE << endl <<
- "Copyright (c) 2001-2007 by Adrian Thurston" << endl;
+ "Copyright (c) 2001-2009 by Adrian Thurston" << endl;
+ exit(0);
+}
+
+/* Error reporting format. */
+ErrorFormat errorFormat = ErrorFormatGNU;
+
+InputLoc makeInputLoc( const char *fileName, int line, int col)
+{
+ InputLoc loc = { fileName, line, col };
+ return loc;
+}
+
+ostream &operator<<( ostream &out, const InputLoc &loc )
+{
+ assert( loc.fileName != 0 );
+ switch ( errorFormat ) {
+ case ErrorFormatMSVC:
+ out << loc.fileName << "(" << loc.line;
+ if ( loc.col )
+ out << "," << loc.col;
+ out << ")";
+ break;
+
+ default:
+ out << loc.fileName << ":" << loc.line;
+ if ( loc.col )
+ out << ":" << loc.col;
+ break;
+ }
+ return out;
}
/* Total error count. */
/* Print the opening to a warning in the input, then return the error ostream. */
ostream &warning( const InputLoc &loc )
{
- assert( loc.fileName != 0 );
- cerr << loc.fileName << ":" << loc.line << ":" <<
- loc.col << ": warning: ";
+ cerr << loc << ": warning: ";
return cerr;
}
ostream &error( const InputLoc &loc )
{
gblErrorCount += 1;
- assert( loc.fileName != 0 );
- cerr << loc.fileName << ":" << loc.line << ": ";
+ cerr << loc << ": ";
return cerr;
}
}
}
-/* If any forward slash is found in argv0 then it is assumed that the path is
- * explicit and the path to the backend executable should be derived from
- * that. If no forward slash is found it is assumed the file is being run from
- * the installed location. The PREFIX supplied during configuration is used.
- * */
-char **makePathChecks( const char *argv0, const char *progName )
+void processArgs( int argc, const char **argv, InputData &id )
{
- char **result = new char*[3];
- const char *lastSlash = strrchr( argv0, '/' );
- int numChecks = 0;
-
- if ( lastSlash != 0 ) {
- char *path = strdup( argv0 );
- int givenPathLen = (lastSlash - argv0) + 1;
- path[givenPathLen] = 0;
-
- int progNameLen = strlen(progName);
- int length = givenPathLen + progNameLen + 1;
- char *check = new char[length];
- sprintf( check, "%s%s", path, progName );
- result[numChecks++] = check;
-
- length = givenPathLen + 3 + progNameLen + 1 + progNameLen + 1;
- check = new char[length];
- sprintf( check, "%s../%s/%s", path, progName, progName );
- result[numChecks++] = check;
- }
- else {
- int prefixLen = strlen(PREFIX);
- int progNameLen = strlen(progName);
- int length = prefixLen + 5 + progNameLen + 1;
- char *check = new char[length];
-
- sprintf( check, PREFIX "/bin/%s", progName );
- result[numChecks++] = check;
- }
+ ParamCheck pc("xo:dnmleabjkS:M:I:CDJRAvHh?-:sT:F:G:P:LpV", argc, argv);
- result[numChecks] = 0;
- return result;
-}
-
-
-void execBackend( const char *argv0, costream *intermed )
-{
- /* Locate the backend program */
- const char *progName = 0;
- switch ( hostLang->lang ) {
- case HostLang::C:
- case HostLang::D:
- progName = "rlgen-cd";
- break;
- case HostLang::Java:
- progName = "rlgen-java";
- break;
- case HostLang::Ruby:
- progName = "rlgen-ruby";
- break;
- }
-
- char **pathChecks = makePathChecks( argv0, progName );
-
- backendArgs.insert( 0, "rlgen-ruby" );
- backendArgs.append( intermed->b->fileName );
- backendArgs.append( 0 );
-
- pid_t pid = fork();
- if ( pid < 0 ) {
- /* Error, no child created. */
- error() << "failed to fork backend" << endp;
- }
- else if ( pid == 0 ) {
- /* child */
- while ( *pathChecks != 0 ) {
- execv( *pathChecks, backendArgs.data );
- pathChecks += 1;
- }
- error() << "failed to exec backend" << endp;
- }
- else {
- /* parent. */
- wait( 0 );
- }
-
- unlink( intermed->b->fileName );
-}
-
-char *makeIntermedTemplate( char *baseFileName )
-{
- char *result;
- char *lastSlash = strrchr( baseFileName, '/' );
- if ( lastSlash == 0 ) {
- result = new char[13];
- strcpy( result, "ragel-XXXXXX.xml" );
- }
- else {
- int baseLen = lastSlash - baseFileName + 1;
- result = new char[baseLen + 13];
- memcpy( result, baseFileName, baseLen );
- strcpy( result+baseLen, "ragel-XXXXXX.xml" );
- }
- return result;
-};
-
-char fnChars[] = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
-
-costream *openIntermed( char *inputFileName, char *outputFileName )
-{
- srandom(time(0));
- costream *result = 0;
-
- /* Which filename do we use as the base? */
- char *baseFileName = outputFileName != 0 ? outputFileName : inputFileName;
-
- /* The template for the intermediate file name. */
- char *intermedFileName = makeIntermedTemplate( baseFileName );
-
- /* Randomize the name and try to open. */
- char *firstX = strrchr( intermedFileName, 'X' ) - 5;
- for ( int tries = 0; tries < 20; tries++ ) {
- /* Choose a random name. */
- for ( int x = 0; x < 6; x++ )
- firstX[x] = fnChars[random() % 52];
-
- /* Try to open the file. */
- int fd = ::open( intermedFileName, O_WRONLY|O_EXCL|O_CREAT, S_IRUSR|S_IWUSR );
-
- if ( fd > 0 ) {
- /* Success. */
- FILE *file = fdopen( fd, "wt" );
- if ( file == 0 )
- error() << "fdopen(...) on intermediate file failed" << endp;
-
- cfilebuf *b = new cfilebuf( intermedFileName, file );
- result = new costream( b );
- break;
- }
-
- if ( errno == EACCES ) {
- error() << "failed to open temp file " << intermedFileName <<
- ", access denied" << endp;
- }
- }
-
- if ( result == 0 )
- error() << "abnormal error: cannot find unique name for temp file" << endp;
-
- return result;
-}
-
-/* Main, process args and call yyparse to start scanning input. */
-int main(int argc, char **argv)
-{
- ParamCheck pc("o:nmleabjkS:M:CDJRvHh?-:s", argc, argv);
- char *inputFileName = 0;
- char *outputFileName = 0;
+ /* FIXME: Need to check code styles VS langauge. */
while ( pc.check() ) {
switch ( pc.state ) {
case ParamCheck::match:
switch ( pc.parameter ) {
+ case 'V':
+ generateDot = true;
+ break;
+
+ case 'x':
+ generateXML = true;
+ break;
+
/* Output. */
case 'o':
- if ( *pc.parameterArg == 0 )
+ if ( *pc.paramArg == 0 )
error() << "a zero length output file name was given" << endl;
- else if ( outputFileName != 0 )
+ else if ( id.outputFileName != 0 )
error() << "more than one output file name was given" << endl;
else {
/* Ok, remember the output file name. */
- outputFileName = pc.parameterArg;
- backendArgs.append( "-o" );
- backendArgs.append( pc.parameterArg );
+ id.outputFileName = pc.paramArg;
}
break;
+ /* Flag for turning off duplicate action removal. */
+ case 'd':
+ wantDupsRemoved = false;
+ break;
+
/* Minimization, mostly hidden options. */
case 'n':
minimizeOpt = MinimizeNone;
/* Machine spec. */
case 'S':
- if ( *pc.parameterArg == 0 )
+ if ( *pc.paramArg == 0 )
error() << "please specify an argument to -S" << endl;
else if ( machineSpec != 0 )
error() << "more than one -S argument was given" << endl;
else {
/* Ok, remember the path to the machine to generate. */
- machineSpec = pc.parameterArg;
+ machineSpec = pc.paramArg;
}
break;
/* Machine path. */
case 'M':
- if ( *pc.parameterArg == 0 )
+ if ( *pc.paramArg == 0 )
error() << "please specify an argument to -M" << endl;
else if ( machineName != 0 )
error() << "more than one -M argument was given" << endl;
else {
/* Ok, remember the machine name to generate. */
- machineName = pc.parameterArg;
+ machineName = pc.paramArg;
+ }
+ break;
+
+ case 'I':
+ if ( *pc.paramArg == 0 )
+ error() << "please specify an argument to -I" << endl;
+ else {
+ id.includePaths.append( pc.paramArg );
}
break;
case 'R':
hostLang = &hostLangRuby;
break;
+ case 'A':
+ hostLang = &hostLangCSharp;
+ break;
/* Version and help. */
case 'v':
version();
- exit(0);
+ break;
case 'H': case 'h': case '?':
usage();
- exit(0);
+ break;
case 's':
printStatistics = true;
break;
- case '-':
- if ( strcasecmp(pc.parameterArg, "help") == 0 ) {
+ case '-': {
+ char *arg = strdup( pc.paramArg );
+ char *eq = strchr( arg, '=' );
+
+ if ( eq != 0 )
+ *eq++ = 0;
+
+ if ( strcmp( arg, "help" ) == 0 )
usage();
- exit(0);
- }
- else if ( strcasecmp(pc.parameterArg, "version") == 0 ) {
+ else if ( strcmp( arg, "version" ) == 0 )
version();
- exit(0);
+ else if ( strcmp( arg, "error-format" ) == 0 ) {
+ if ( eq == 0 )
+ error() << "expecting '=value' for error-format" << endl;
+ else if ( strcmp( eq, "gnu" ) == 0 )
+ errorFormat = ErrorFormatGNU;
+ else if ( strcmp( eq, "msvc" ) == 0 )
+ errorFormat = ErrorFormatMSVC;
+ else
+ error() << "invalid value for error-format" << endl;
}
+ else if ( strcmp( arg, "rbx" ) == 0 )
+ rubyImpl = Rubinius;
else {
- error() << "--" << pc.parameterArg <<
+ error() << "--" << pc.paramArg <<
" is an invalid argument" << endl;
}
+ free( arg );
+ break;
+ }
+
+ /* Passthrough args. */
+ case 'T':
+ if ( pc.paramArg[0] == '0' )
+ codeStyle = GenTables;
+ else if ( pc.paramArg[0] == '1' )
+ codeStyle = GenFTables;
+ else {
+ error() << "-T" << pc.paramArg[0] <<
+ " is an invalid argument" << endl;
+ exit(1);
+ }
+ break;
+ case 'F':
+ if ( pc.paramArg[0] == '0' )
+ codeStyle = GenFlat;
+ else if ( pc.paramArg[0] == '1' )
+ codeStyle = GenFFlat;
+ else {
+ error() << "-F" << pc.paramArg[0] <<
+ " is an invalid argument" << endl;
+ exit(1);
+ }
+ break;
+ case 'G':
+ if ( pc.paramArg[0] == '0' )
+ codeStyle = GenGoto;
+ else if ( pc.paramArg[0] == '1' )
+ codeStyle = GenFGoto;
+ else if ( pc.paramArg[0] == '2' )
+ codeStyle = GenIpGoto;
+ else {
+ error() << "-G" << pc.paramArg[0] <<
+ " is an invalid argument" << endl;
+ exit(1);
+ }
+ break;
+ case 'P':
+ codeStyle = GenSplit;
+ numSplitPartitions = atoi( pc.paramArg );
+ break;
+
+ case 'p':
+ displayPrintables = true;
+ break;
+
+ case 'L':
+ noLineDirectives = true;
+ break;
}
break;
/* It is interpreted as an input file. */
if ( *pc.curArg == 0 )
error() << "a zero length input file name was given" << endl;
- else if ( inputFileName != 0 )
+ else if ( id.inputFileName != 0 )
error() << "more than one input file name was given" << endl;
else {
/* OK, Remember the filename. */
- inputFileName = pc.curArg;
+ id.inputFileName = pc.curArg;
}
break;
}
}
+}
- /* Bail on above errors. */
- if ( gblErrorCount > 0 )
- exit(1);
-
- /* Make sure we are not writing to the same file as the input file. */
- if ( inputFileName != 0 && outputFileName != 0 &&
- strcmp( inputFileName, outputFileName ) == 0 )
- {
- error() << "output file \"" << outputFileName <<
- "\" is the same as the input file" << endp;
- }
-
+void process( InputData &id )
+{
/* Open the input file for reading. */
- istream *inStream;
- if ( inputFileName != 0 ) {
- /* Open the input file for reading. */
- ifstream *inFile = new ifstream( inputFileName );
- inStream = inFile;
- if ( ! inFile->is_open() )
- error() << "could not open " << inputFileName << " for reading" << endp;
- }
- else {
- inputFileName = "<stdin>";
- inStream = &cin;
- }
+ assert( id.inputFileName != 0 );
+ ifstream *inFile = new ifstream( id.inputFileName );
+ if ( ! inFile->is_open() )
+ error() << "could not open " << id.inputFileName << " for reading" << endp;
/* Used for just a few things. */
std::ostringstream hostData;
- if ( machineSpec == 0 && machineName == 0 )
- hostData << "<host line=\"1\" col=\"1\">";
+ /* Make the first input item. */
+ InputItem *firstInputItem = new InputItem;
+ firstInputItem->type = InputItem::HostData;
+ firstInputItem->loc.fileName = id.inputFileName;
+ firstInputItem->loc.line = 1;
+ firstInputItem->loc.col = 1;
+ id.inputItems.append( firstInputItem );
- Scanner scanner( inputFileName, *inStream, hostData, 0, 0, 0, false );
+ Scanner scanner( id, id.inputFileName, *inFile, 0, 0, 0, false );
scanner.do_scan();
/* Finished, final check for errors.. */
if ( gblErrorCount > 0 )
- return 1;
-
+ exit(1);
+
/* Now send EOF to all parsers. */
- terminateAllParsers();
+ id.terminateAllParsers();
+
+ /* Bail on above error. */
+ if ( gblErrorCount > 0 )
+ exit(1);
+
+ /* Locate the backend program */
+ /* Compiles machines. */
+ id.prepareMachineGen();
- /* Finished, final check for errors.. */
if ( gblErrorCount > 0 )
- return 1;
+ exit(1);
+
+ id.makeOutputStream();
+
+ /* Generates the reduced machine, which we use to write output. */
+ if ( !generateXML ) {
+ id.generateReduced();
- if ( machineSpec == 0 && machineName == 0 )
- hostData << "</host>\n";
+ if ( gblErrorCount > 0 )
+ exit(1);
+ }
+ id.verifyWritesHaveData();
if ( gblErrorCount > 0 )
- return 1;
-
- costream *intermed = openIntermed( inputFileName, outputFileName );
+ exit(1);
+
+ /*
+ * From this point on we should not be reporting any errors.
+ */
+
+ id.openOutput();
+ id.writeOutput();
+
+ /* Close the input and the intermediate file. */
+ delete inFile;
+
+ /* If writing to a file, delete the ostream, causing it to flush.
+ * Standard out is flushed automatically. */
+ if ( id.outputFileName != 0 ) {
+ delete id.outStream;
+ delete id.outFilter;
+ }
+
+ assert( gblErrorCount == 0 );
+}
+
+char *makeIntermedTemplate( const char *baseFileName )
+{
+ char *result = 0;
+ const char *templ = "ragel-XXXXXX.xml";
+ const char *lastSlash = strrchr( baseFileName, '/' );
+ if ( lastSlash == 0 ) {
+ result = new char[strlen(templ)+1];
+ strcpy( result, templ );
+ }
+ else {
+ int baseLen = lastSlash - baseFileName + 1;
+ result = new char[baseLen + strlen(templ) + 1];
+ memcpy( result, baseFileName, baseLen );
+ strcpy( result+baseLen, templ );
+ }
+ return result;
+};
+
+/* Main, process args and call yyparse to start scanning input. */
+int main( int argc, const char **argv )
+{
+ InputData id;
+
+ processArgs( argc, argv, id );
- /* Write the machines, then the surrounding code. */
- writeMachines( *intermed, hostData.str(), inputFileName );
+ /* Require an input file. If we use standard in then we won't have a file
+ * name on which to base the output. */
+ if ( id.inputFileName == 0 )
+ error() << "no input file given" << endl;
+
+ /* Bail on argument processing errors. */
+ if ( gblErrorCount > 0 )
+ exit(1);
- /* Close the intermediate file. */
- intermed->fclose();
+ /* Make sure we are not writing to the same file as the input file. */
+ if ( id.inputFileName != 0 && id.outputFileName != 0 &&
+ strcmp( id.inputFileName, id.outputFileName ) == 0 )
+ {
+ error() << "output file \"" << id.outputFileName <<
+ "\" is the same as the input file" << endp;
+ }
- /* Run the backend process. */
- execBackend( argv[0], intermed );
+ process( id );
return 0;
}