/*
- * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ * Copyright 2001-2007 Adrian Thurston <thurston@complang.org>
*/
/* This file is part of Ragel.
#include <fstream>
#include <unistd.h>
#include <sstream>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#include <psapi.h>
+#include <time.h>
+#include <io.h>
+#include <process.h>
+
+#if _MSC_VER
+#define S_IRUSR _S_IREAD
+#define S_IWUSR _S_IWRITE
+#endif
+#endif
/* Parsing. */
#include "ragel.h"
+#include "rlscan.h"
/* Parameters and output. */
#include "pcheck.h"
#include "vector.h"
#include "version.h"
#include "common.h"
+#include "inputdata.h"
using std::istream;
using std::ostream;
using std::cout;
using std::cerr;
using std::endl;
+using std::ios;
+using std::streamsize;
/* Controls minimization. */
MinimizeLevel minimizeLevel = MinimizePartition2;
MinimizeOpt minimizeOpt = MinimizeMostOps;
/* Graphviz dot file generation. */
-char *machineSpec = 0, *machineName = 0;
+const char *machineSpec = 0, *machineName = 0;
bool machineSpecFound = false;
+bool wantDupsRemoved = true;
bool printStatistics = false;
+bool generateXML = false;
+bool generateDot = false;
+
+/* Target language and output style. */
+CodeStyle codeStyle = GenTables;
+
+int numSplitPartitions = 0;
+bool noLineDirectives = false;
+
+bool displayPrintables = false;
+
+/* Target ruby impl */
+RubyImplEnum rubyImpl = MRI;
/* Print a summary of the options. */
void usage()
" -v, --version Print version information and exit\n"
" -o <file> Write output to <file>\n"
" -s Print some statistics on stderr\n"
+" -d Do not remove duplicates from action lists\n"
+" -I <dir> Add <dir> to the list of directories to search\n"
+" for included an imported files\n"
+"error reporting format:\n"
+" --error-format=gnu file:line:column: message (default)\n"
+" --error-format=msvc file(line,column): message\n"
"fsm minimization:\n"
" -n Do not perform minimization\n"
" -m Minimize at the end of the compilation\n"
" -l Minimize after most operations (default)\n"
" -e Minimize after every operation\n"
-"machine selection:\n"
-" -S <spec> FSM specification to output for -V\n"
-" -M <machine> Machine definition/instantiation to output for -V\n"
+"visualization:\n"
+" -x Run the frontend only: emit XML intermediate format\n"
+" -V Generate a dot file for Graphviz\n"
+" -p Display printable characters on labels\n"
+" -S <spec> FSM specification to output (for graphviz output)\n"
+" -M <machine> Machine definition/instantiation to output (for graphviz output)\n"
"host language:\n"
" -C The host language is C, C++, Obj-C or Obj-C++ (default)\n"
" -D The host language is D\n"
" -J The host language is Java\n"
" -R The host language is Ruby\n"
+" -A The host language is C#\n"
+"line direcives: (C/D/C#)\n"
+" -L Inhibit writing of #line directives\n"
+"code style: (C/D/Java/Ruby/C#)\n"
+" -T0 Table driven FSM (default)\n"
+"code style: (C/D/Ruby/C#)\n"
+" -T1 Faster table driven FSM\n"
+" -F0 Flat table driven FSM\n"
+" -F1 Faster flat table-driven FSM\n"
+"code style: (C/D/C#)\n"
+" -G0 Goto-driven FSM\n"
+" -G1 Faster goto-driven FSM\n"
+"code style: (C/D)\n"
+" -G2 Really fast goto-driven FSM\n"
+" -P<N> N-Way Split really fast goto-driven FSM\n"
;
+
+ exit(0);
}
-/* Print version information. */
+/* Print version information and exit. */
void version()
{
cout << "Ragel State Machine Compiler version " VERSION << " " PUBDATE << endl <<
- "Copyright (c) 2001-2006 by Adrian Thurston" << endl;
+ "Copyright (c) 2001-2009 by Adrian Thurston" << endl;
+ exit(0);
+}
+
+/* Error reporting format. */
+ErrorFormat errorFormat = ErrorFormatGNU;
+
+InputLoc makeInputLoc( const char *fileName, int line, int col)
+{
+ InputLoc loc = { fileName, line, col };
+ return loc;
+}
+
+ostream &operator<<( ostream &out, const InputLoc &loc )
+{
+ assert( loc.fileName != 0 );
+ switch ( errorFormat ) {
+ case ErrorFormatMSVC:
+ out << loc.fileName << "(" << loc.line;
+ if ( loc.col )
+ out << "," << loc.col;
+ out << ")";
+ break;
+
+ default:
+ out << loc.fileName << ":" << loc.line;
+ if ( loc.col )
+ out << ":" << loc.col;
+ break;
+ }
+ return out;
}
/* Total error count. */
/* Print the opening to a warning in the input, then return the error ostream. */
ostream &warning( const InputLoc &loc )
{
- assert( loc.fileName != 0 );
- cerr << loc.fileName << ":" << loc.line << ":" <<
- loc.col << ": warning: ";
+ cerr << loc << ": warning: ";
return cerr;
}
ostream &error( const InputLoc &loc )
{
gblErrorCount += 1;
- assert( loc.fileName != 0 );
- cerr << loc.fileName << ":" << loc.line << ": ";
+ cerr << loc << ": ";
return cerr;
}
}
}
-/* Main, process args and call yyparse to start scanning input. */
-int main(int argc, char **argv)
+void processArgs( int argc, const char **argv, InputData &id )
{
- ParamCheck pc("o:nmleabjkS:M:CDJRvHh?-:s", argc, argv);
- char *inputFileName = 0;
- char *outputFileName = 0;
+ ParamCheck pc("xo:dnmleabjkS:M:I:CDJRAvHh?-:sT:F:G:P:LpV", argc, argv);
+
+ /* FIXME: Need to check code styles VS langauge. */
while ( pc.check() ) {
switch ( pc.state ) {
case ParamCheck::match:
switch ( pc.parameter ) {
+ case 'V':
+ generateDot = true;
+ break;
+
+ case 'x':
+ generateXML = true;
+ break;
+
/* Output. */
case 'o':
- if ( *pc.parameterArg == 0 )
+ if ( *pc.paramArg == 0 )
error() << "a zero length output file name was given" << endl;
- else if ( outputFileName != 0 )
+ else if ( id.outputFileName != 0 )
error() << "more than one output file name was given" << endl;
else {
/* Ok, remember the output file name. */
- outputFileName = pc.parameterArg;
+ id.outputFileName = pc.paramArg;
}
break;
+ /* Flag for turning off duplicate action removal. */
+ case 'd':
+ wantDupsRemoved = false;
+ break;
+
/* Minimization, mostly hidden options. */
case 'n':
minimizeOpt = MinimizeNone;
/* Machine spec. */
case 'S':
- if ( *pc.parameterArg == 0 )
+ if ( *pc.paramArg == 0 )
error() << "please specify an argument to -S" << endl;
else if ( machineSpec != 0 )
error() << "more than one -S argument was given" << endl;
else {
/* Ok, remember the path to the machine to generate. */
- machineSpec = pc.parameterArg;
+ machineSpec = pc.paramArg;
}
break;
/* Machine path. */
case 'M':
- if ( *pc.parameterArg == 0 )
+ if ( *pc.paramArg == 0 )
error() << "please specify an argument to -M" << endl;
else if ( machineName != 0 )
error() << "more than one -M argument was given" << endl;
else {
/* Ok, remember the machine name to generate. */
- machineName = pc.parameterArg;
+ machineName = pc.paramArg;
+ }
+ break;
+
+ case 'I':
+ if ( *pc.paramArg == 0 )
+ error() << "please specify an argument to -I" << endl;
+ else {
+ id.includePaths.append( pc.paramArg );
}
break;
/* Host language types. */
case 'C':
- hostLangType = CCode;
hostLang = &hostLangC;
break;
case 'D':
- hostLangType = DCode;
hostLang = &hostLangD;
break;
case 'J':
- hostLangType = JavaCode;
hostLang = &hostLangJava;
break;
case 'R':
- hostLangType = RubyCode;
hostLang = &hostLangRuby;
break;
+ case 'A':
+ hostLang = &hostLangCSharp;
+ break;
/* Version and help. */
case 'v':
version();
- exit(0);
+ break;
case 'H': case 'h': case '?':
usage();
- exit(0);
+ break;
case 's':
printStatistics = true;
break;
- case '-':
- if ( strcasecmp(pc.parameterArg, "help") == 0 ) {
+ case '-': {
+ char *arg = strdup( pc.paramArg );
+ char *eq = strchr( arg, '=' );
+
+ if ( eq != 0 )
+ *eq++ = 0;
+
+ if ( strcmp( arg, "help" ) == 0 )
usage();
- exit(0);
- }
- else if ( strcasecmp(pc.parameterArg, "version") == 0 ) {
+ else if ( strcmp( arg, "version" ) == 0 )
version();
- exit(0);
+ else if ( strcmp( arg, "error-format" ) == 0 ) {
+ if ( eq == 0 )
+ error() << "expecting '=value' for error-format" << endl;
+ else if ( strcmp( eq, "gnu" ) == 0 )
+ errorFormat = ErrorFormatGNU;
+ else if ( strcmp( eq, "msvc" ) == 0 )
+ errorFormat = ErrorFormatMSVC;
+ else
+ error() << "invalid value for error-format" << endl;
}
+ else if ( strcmp( arg, "rbx" ) == 0 )
+ rubyImpl = Rubinius;
else {
- error() << "--" << pc.parameterArg <<
+ error() << "--" << pc.paramArg <<
" is an invalid argument" << endl;
}
+ free( arg );
+ break;
+ }
+
+ /* Passthrough args. */
+ case 'T':
+ if ( pc.paramArg[0] == '0' )
+ codeStyle = GenTables;
+ else if ( pc.paramArg[0] == '1' )
+ codeStyle = GenFTables;
+ else {
+ error() << "-T" << pc.paramArg[0] <<
+ " is an invalid argument" << endl;
+ exit(1);
+ }
+ break;
+ case 'F':
+ if ( pc.paramArg[0] == '0' )
+ codeStyle = GenFlat;
+ else if ( pc.paramArg[0] == '1' )
+ codeStyle = GenFFlat;
+ else {
+ error() << "-F" << pc.paramArg[0] <<
+ " is an invalid argument" << endl;
+ exit(1);
+ }
+ break;
+ case 'G':
+ if ( pc.paramArg[0] == '0' )
+ codeStyle = GenGoto;
+ else if ( pc.paramArg[0] == '1' )
+ codeStyle = GenFGoto;
+ else if ( pc.paramArg[0] == '2' )
+ codeStyle = GenIpGoto;
+ else {
+ error() << "-G" << pc.paramArg[0] <<
+ " is an invalid argument" << endl;
+ exit(1);
+ }
+ break;
+ case 'P':
+ codeStyle = GenSplit;
+ numSplitPartitions = atoi( pc.paramArg );
+ break;
+
+ case 'p':
+ displayPrintables = true;
+ break;
+
+ case 'L':
+ noLineDirectives = true;
+ break;
}
break;
/* It is interpreted as an input file. */
if ( *pc.curArg == 0 )
error() << "a zero length input file name was given" << endl;
- else if ( inputFileName != 0 )
+ else if ( id.inputFileName != 0 )
error() << "more than one input file name was given" << endl;
else {
/* OK, Remember the filename. */
- inputFileName = pc.curArg;
+ id.inputFileName = pc.curArg;
}
break;
}
}
+}
- /* Bail on above errors. */
+void process( InputData &id )
+{
+ /* Open the input file for reading. */
+ assert( id.inputFileName != 0 );
+ ifstream *inFile = new ifstream( id.inputFileName );
+ if ( ! inFile->is_open() )
+ error() << "could not open " << id.inputFileName << " for reading" << endp;
+
+ /* Used for just a few things. */
+ std::ostringstream hostData;
+
+ /* Make the first input item. */
+ InputItem *firstInputItem = new InputItem;
+ firstInputItem->type = InputItem::HostData;
+ firstInputItem->loc.fileName = id.inputFileName;
+ firstInputItem->loc.line = 1;
+ firstInputItem->loc.col = 1;
+ id.inputItems.append( firstInputItem );
+
+ Scanner scanner( id, id.inputFileName, *inFile, 0, 0, 0, false );
+ scanner.do_scan();
+
+ /* Finished, final check for errors.. */
if ( gblErrorCount > 0 )
exit(1);
- /* Make sure we are not writing to the same file as the input file. */
- if ( inputFileName != 0 && outputFileName != 0 &&
- strcmp( inputFileName, outputFileName ) == 0 )
- {
- error() << "output file \"" << outputFileName <<
- "\" is the same as the input file" << endl;
- }
+ /* Now send EOF to all parsers. */
+ id.terminateAllParsers();
- /* Open the input file for reading. */
- istream *inStream;
- if ( inputFileName != 0 ) {
- /* Open the input file for reading. */
- ifstream *inFile = new ifstream( inputFileName );
- inStream = inFile;
- if ( ! inFile->is_open() )
- error() << "could not open " << inputFileName << " for reading" << endl;
- }
- else {
- inputFileName = "<stdin>";
- inStream = &cin;
- }
+ /* Bail on above error. */
+ if ( gblErrorCount > 0 )
+ exit(1);
+ /* Locate the backend program */
+ /* Compiles machines. */
+ id.prepareMachineGen();
- /* Bail on above errors. */
if ( gblErrorCount > 0 )
exit(1);
- std::ostringstream outputBuffer;
+ id.makeOutputStream();
- if ( machineSpec == 0 && machineName == 0 )
- outputBuffer << "<host line=\"1\" col=\"1\">";
+ /* Generates the reduced machine, which we use to write output. */
+ if ( !generateXML ) {
+ id.generateReduced();
- scan( inputFileName, *inStream, outputBuffer );
+ if ( gblErrorCount > 0 )
+ exit(1);
+ }
- /* Finished, final check for errors.. */
+ id.verifyWritesHaveData();
if ( gblErrorCount > 0 )
- return 1;
-
- /* Now send EOF to all parsers. */
- terminateAllParsers();
+ exit(1);
- /* Finished, final check for errors.. */
- if ( gblErrorCount > 0 )
- return 1;
+ /*
+ * From this point on we should not be reporting any errors.
+ */
+
+ id.openOutput();
+ id.writeOutput();
+
+ /* Close the input and the intermediate file. */
+ delete inFile;
+
+ /* If writing to a file, delete the ostream, causing it to flush.
+ * Standard out is flushed automatically. */
+ if ( id.outputFileName != 0 ) {
+ delete id.outStream;
+ delete id.outFilter;
+ }
+
+ assert( gblErrorCount == 0 );
+}
+
+char *makeIntermedTemplate( const char *baseFileName )
+{
+ char *result = 0;
+ const char *templ = "ragel-XXXXXX.xml";
+ const char *lastSlash = strrchr( baseFileName, '/' );
+ if ( lastSlash == 0 ) {
+ result = new char[strlen(templ)+1];
+ strcpy( result, templ );
+ }
+ else {
+ int baseLen = lastSlash - baseFileName + 1;
+ result = new char[baseLen + strlen(templ) + 1];
+ memcpy( result, baseFileName, baseLen );
+ strcpy( result+baseLen, templ );
+ }
+ return result;
+};
+
+/* Main, process args and call yyparse to start scanning input. */
+int main( int argc, const char **argv )
+{
+ InputData id;
- if ( machineSpec == 0 && machineName == 0 )
- outputBuffer << "</host>\n";
+ processArgs( argc, argv, id );
- checkMachines();
+ /* Require an input file. If we use standard in then we won't have a file
+ * name on which to base the output. */
+ if ( id.inputFileName == 0 )
+ error() << "no input file given" << endl;
+ /* Bail on argument processing errors. */
if ( gblErrorCount > 0 )
- return 1;
-
- ostream *outputFile = 0;
- if ( outputFileName != 0 )
- outputFile = new ofstream( outputFileName );
- else
- outputFile = &cout;
-
- /* Write the machines, then the surrounding code. */
- writeMachines( *outputFile, outputBuffer.str(), inputFileName );
-
- if ( outputFileName != 0 )
- delete outputFile;
+ exit(1);
+
+ /* Make sure we are not writing to the same file as the input file. */
+ if ( id.inputFileName != 0 && id.outputFileName != 0 &&
+ strcmp( id.inputFileName, id.outputFileName ) == 0 )
+ {
+ error() << "output file \"" << id.outputFileName <<
+ "\" is the same as the input file" << endp;
+ }
+
+ process( id );
return 0;
}