A bit more cleanup.
[external/ragel.git] / ragel / main.cpp
index 11b9659..120fae7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- *  Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ *  Copyright 2001-2007 Adrian Thurston <thurston@complang.org>
  */
 
 /*  This file is part of Ragel.
 #include <fstream>
 #include <unistd.h>
 #include <sstream>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#include <psapi.h>
+#include <time.h>
+#include <io.h>
+#include <process.h>
+
+#if _MSC_VER
+#define S_IRUSR _S_IREAD
+#define S_IWUSR _S_IWRITE
+#endif
+#endif
 
 /* Parsing. */
 #include "ragel.h"
@@ -36,6 +54,8 @@
 #include "vector.h"
 #include "version.h"
 #include "common.h"
+#include "xmlparse.h"
+#include "inputdata.h"
 
 using std::istream;
 using std::ostream;
@@ -45,16 +65,40 @@ using std::cin;
 using std::cout;
 using std::cerr;
 using std::endl;
+using std::ios;
+using std::streamsize;
 
 /* Controls minimization. */
 MinimizeLevel minimizeLevel = MinimizePartition2;
 MinimizeOpt minimizeOpt = MinimizeMostOps;
 
 /* Graphviz dot file generation. */
-char *machineSpec = 0, *machineName = 0;
+const char *machineSpec = 0, *machineName = 0;
 bool machineSpecFound = false;
+bool wantDupsRemoved = true;
 
 bool printStatistics = false;
+bool frontendOnly = false;
+bool generateDot = false;
+
+/* Target language and output style. */
+CodeStyleEnum codeStyle = GenTables;
+
+int numSplitPartitions = 0;
+bool noLineDirectives = false;
+
+bool displayPrintables = false;
+bool graphvizDone = false;
+
+/* Target ruby impl */
+RubyImplEnum rubyImpl = MRI;
+
+ArgsVector includePaths;
+
+istream *inStream = 0;
+ostream *outStream = 0;
+output_filter *outFilter = 0;
+const char *outputFileName = 0;
 
 /* Print a summary of the options. */
 void usage()
@@ -66,27 +110,82 @@ void usage()
 "   -v, --version        Print version information and exit\n"
 "   -o <file>            Write output to <file>\n"
 "   -s                   Print some statistics on stderr\n"
+"   -d                   Do not remove duplicates from action lists\n"
+"   -I <dir>             Add <dir> to the list of directories to search\n"
+"                        for included an imported files\n"
+"error reporting format:\n"
+"   --error-format=gnu   file:line:column: message (default)\n"
+"   --error-format=msvc  file(line,column): message\n"
 "fsm minimization:\n"
 "   -n                   Do not perform minimization\n"
 "   -m                   Minimize at the end of the compilation\n"
 "   -l                   Minimize after most operations (default)\n"
 "   -e                   Minimize after every operation\n"
-"machine selection:\n"
-"   -S <spec>            FSM specification to output (for rlgen-dot)\n"
-"   -M <machine>         Machine definition/instantiation to output (for rlgen-dot)\n"
+"visualization:\n"
+"   -x                   Run the frontend only: emit XML intermediate format\n"
+"   -V                   Generate a dot file for Graphviz\n"
+"   -p                   Display printable characters on labels\n"
+"   -S <spec>            FSM specification to output (for graphviz output)\n"
+"   -M <machine>         Machine definition/instantiation to output (for graphviz output)\n"
 "host language:\n"
 "   -C                   The host language is C, C++, Obj-C or Obj-C++ (default)\n"
 "   -D                   The host language is D\n"
 "   -J                   The host language is Java\n"
 "   -R                   The host language is Ruby\n"
+"   -A                   The host language is C#\n"
+"line direcives: (C/D/C# only)\n"
+"   -L                   Inhibit writing of #line directives\n"
+"code style: (C/Ruby/C# only)\n"
+"   -T0                  Table driven FSM (default)\n"
+"   -T1                  Faster table driven FSM\n"
+"   -F0                  Flat table driven FSM\n"
+"   -F1                  Faster flat table-driven FSM\n"
+"code style: (C/C# only)\n"
+"   -G0                  Goto-driven FSM\n"
+"   -G1                  Faster goto-driven FSM\n"
+"code style: (C only)\n"
+"   -G2                  Really fast goto-driven FSM\n"
+"   -P<N>                N-Way Split really fast goto-driven FSM\n"
        ;       
+
+       exit(0);
 }
 
-/* Print version information. */
+/* Print version information and exit. */
 void version()
 {
        cout << "Ragel State Machine Compiler version " VERSION << " " PUBDATE << endl <<
-                       "Copyright (c) 2001-2007 by Adrian Thurston" << endl;
+                       "Copyright (c) 2001-2008 by Adrian Thurston" << endl;
+       exit(0);
+}
+
+/* Error reporting format. */
+ErrorFormat errorFormat = ErrorFormatGNU;
+
+InputLoc makeInputLoc( const char *fileName, int line, int col)
+{
+       InputLoc loc = { fileName, line, col };
+       return loc;
+}
+
+ostream &operator<<( ostream &out, const InputLoc &loc )
+{
+       assert( loc.fileName != 0 );
+       switch ( errorFormat ) {
+       case ErrorFormatMSVC:
+               out << loc.fileName << "(" << loc.line;
+               if ( loc.col )
+                       out << "," << loc.col;
+               out << ")";
+               break;
+
+       default:
+               out << loc.fileName << ":" << loc.line;
+               if ( loc.col )
+                       out << ":" << loc.col;
+               break;
+       }
+       return out;
 }
 
 /* Total error count. */
@@ -95,9 +194,7 @@ int gblErrorCount = 0;
 /* Print the opening to a warning in the input, then return the error ostream. */
 ostream &warning( const InputLoc &loc )
 {
-       assert( loc.fileName != 0 );
-       cerr << loc.fileName << ":" << loc.line << ":" << 
-                       loc.col << ": warning: ";
+       cerr << loc << ": warning: ";
        return cerr;
 }
 
@@ -112,8 +209,7 @@ ostream &error()
 ostream &error( const InputLoc &loc )
 {
        gblErrorCount += 1;
-       assert( loc.fileName != 0 );
-       cerr << loc.fileName << ":" << loc.line << ": ";
+       cerr << loc << ": ";
        return cerr;
 }
 
@@ -127,94 +223,41 @@ void escapeLineDirectivePath( std::ostream &out, char *path )
        }
 }
 
-/* If any forward slash is found in argv0 then it is assumed that the path is
- * explicit and the path to the backend executable should be derived from
- * that. If no forward slash is found it is assumed the file is being run from
- * the installed location. The PREFIX supplied during configuration is used.
- * */
-char **makePathChecks( const char *argv0, const char *progName )
+void processArgs( int argc, const char **argv, const char *&inputFileName )
 {
-       char **result = new char*[3];
-       const char *lastSlash = strrchr( argv0, '/' );
-       int numChecks = 0;
-
-       if ( lastSlash != 0 ) {
-               char *path = strdup( argv0 );
-               int givenPathLen = (lastSlash - argv0) + 1;
-               path[givenPathLen] = 0;
-
-               int progNameLen = strlen(progName);
-               int length = givenPathLen + progNameLen + 1;
-               char *check = new char[length];
-               sprintf( check, "%s%s", path, progName );
-               result[numChecks++] = check;
-
-               length = givenPathLen + 3 + progNameLen + 1 + progNameLen + 1;
-               check = new char[length];
-               sprintf( check, "%s../%s/%s", path, progName, progName );
-               result[numChecks++] = check;
-       }
-       else {
-               int prefixLen = strlen(PREFIX);
-               int progNameLen = strlen(progName);
-               int length = prefixLen + 5 + progNameLen + 1;
-               char *check = new char[length];
-
-               sprintf( check, PREFIX "/bin/%s", progName );
-               result[numChecks++] = check;
-       }
-
-       result[numChecks] = 0;
-       return result;
-}
-
+       ParamCheck pc("xo:dnmleabjkS:M:I:CDJRAvHh?-:sT:F:G:P:LpV", argc, argv);
 
-void execBackend( const char *argv0 )
-{
-       /* Locate the backend program */
-       const char *progName = 0;
-       switch ( hostLang->lang ) {
-               case HostLang::C:
-               case HostLang::D:
-                       progName = "rlgen-cd";
-                       break;
-               case HostLang::Java:
-                       progName = "rlgen-java";
-                       break;
-               case HostLang::Ruby:
-                       progName = "rlgen-ruby";
-                       break;
-       }
-
-       char **pathChecks = makePathChecks( argv0, progName );
-       while ( *pathChecks != 0 ) {
-               pathChecks += 1;
-       }
-}
-
-/* Main, process args and call yyparse to start scanning input. */
-int main(int argc, char **argv)
-{
-       ParamCheck pc("o:nmleabjkS:M:CDJRvHh?-:s", argc, argv);
-       char *inputFileName = 0;
-       char *outputFileName = 0;
+       /* FIXME: Need to check code styles VS langauge. */
 
        while ( pc.check() ) {
                switch ( pc.state ) {
                case ParamCheck::match:
                        switch ( pc.parameter ) {
+                       case 'V':
+                               generateDot = true;
+                               break;
+
+                       case 'x':
+                               frontendOnly = true;
+                               break;
+
                        /* Output. */
                        case 'o':
-                               if ( *pc.parameterArg == 0 )
+                               if ( *pc.paramArg == 0 )
                                        error() << "a zero length output file name was given" << endl;
                                else if ( outputFileName != 0 )
                                        error() << "more than one output file name was given" << endl;
                                else {
                                        /* Ok, remember the output file name. */
-                                       outputFileName = pc.parameterArg;
+                                       outputFileName = pc.paramArg;
                                }
                                break;
 
+                       /* Flag for turning off duplicate action removal. */
+                       case 'd':
+                               wantDupsRemoved = false;
+                               break;
+
                        /* Minimization, mostly hidden options. */
                        case 'n':
                                minimizeOpt = MinimizeNone;
@@ -243,25 +286,33 @@ int main(int argc, char **argv)
 
                        /* Machine spec. */
                        case 'S':
-                               if ( *pc.parameterArg == 0 )
+                               if ( *pc.paramArg == 0 )
                                        error() << "please specify an argument to -S" << endl;
                                else if ( machineSpec != 0 )
                                        error() << "more than one -S argument was given" << endl;
                                else {
                                        /* Ok, remember the path to the machine to generate. */
-                                       machineSpec = pc.parameterArg;
+                                       machineSpec = pc.paramArg;
                                }
                                break;
 
                        /* Machine path. */
                        case 'M':
-                               if ( *pc.parameterArg == 0 )
+                               if ( *pc.paramArg == 0 )
                                        error() << "please specify an argument to -M" << endl;
                                else if ( machineName != 0 )
                                        error() << "more than one -M argument was given" << endl;
                                else {
                                        /* Ok, remember the machine name to generate. */
-                                       machineName = pc.parameterArg;
+                                       machineName = pc.paramArg;
+                               }
+                               break;
+
+                       case 'I':
+                               if ( *pc.paramArg == 0 )
+                                       error() << "please specify an argument to -I" << endl;
+                               else {
+                                       includePaths.append( pc.paramArg );
                                }
                                break;
 
@@ -278,30 +329,97 @@ int main(int argc, char **argv)
                        case 'R':
                                hostLang = &hostLangRuby;
                                break;
+                       case 'A':
+                               hostLang = &hostLangCSharp;
+                               break;
 
                        /* Version and help. */
                        case 'v':
                                version();
-                               exit(0);
+                               break;
                        case 'H': case 'h': case '?':
                                usage();
-                               exit(0);
+                               break;
                        case 's':
                                printStatistics = true;
                                break;
-                       case '-':
-                               if ( strcasecmp(pc.parameterArg, "help") == 0 ) {
+                       case '-': {
+                               char *eq = strchr( pc.paramArg, '=' );
+
+                               if ( eq != 0 )
+                                       *eq++ = 0;
+
+                               if ( strcmp( pc.paramArg, "help" ) == 0 )
                                        usage();
-                                       exit(0);
-                               }
-                               else if ( strcasecmp(pc.parameterArg, "version") == 0 ) {
+                               else if ( strcmp( pc.paramArg, "version" ) == 0 )
                                        version();
-                                       exit(0);
+                               else if ( strcmp( pc.paramArg, "error-format" ) == 0 ) {
+                                       if ( eq == 0 )
+                                               error() << "expecting '=value' for error-format" << endl;
+                                       else if ( strcmp( eq, "gnu" ) == 0 )
+                                               errorFormat = ErrorFormatGNU;
+                                       else if ( strcmp( eq, "msvc" ) == 0 )
+                                               errorFormat = ErrorFormatMSVC;
+                                       else
+                                               error() << "invalid value for error-format" << endl;
+                               }
+                               else if ( strcmp( pc.paramArg, "rbx" ) == 0 )
+                                       rubyImpl = Rubinius;
+                               else {
+                                       error() << "--" << pc.paramArg << 
+                                                       " is an invalid argument" << endl;
+                               }
+                               break;
+                       }
+
+                       /* Passthrough args. */
+                       case 'T': 
+                               if ( pc.paramArg[0] == '0' )
+                                       codeStyle = GenTables;
+                               else if ( pc.paramArg[0] == '1' )
+                                       codeStyle = GenFTables;
+                               else {
+                                       error() << "-T" << pc.paramArg[0] << 
+                                                       " is an invalid argument" << endl;
+                                       exit(1);
                                }
+                               break;
+                       case 'F': 
+                               if ( pc.paramArg[0] == '0' )
+                                       codeStyle = GenFlat;
+                               else if ( pc.paramArg[0] == '1' )
+                                       codeStyle = GenFFlat;
                                else {
-                                       error() << "--" << pc.parameterArg << 
+                                       error() << "-F" << pc.paramArg[0] << 
                                                        " is an invalid argument" << endl;
+                                       exit(1);
                                }
+                               break;
+                       case 'G': 
+                               if ( pc.paramArg[0] == '0' )
+                                       codeStyle = GenGoto;
+                               else if ( pc.paramArg[0] == '1' )
+                                       codeStyle = GenFGoto;
+                               else if ( pc.paramArg[0] == '2' )
+                                       codeStyle = GenIpGoto;
+                               else {
+                                       error() << "-G" << pc.paramArg[0] << 
+                                                       " is an invalid argument" << endl;
+                                       exit(1);
+                               }
+                               break;
+                       case 'P':
+                               codeStyle = GenSplit;
+                               numSplitPartitions = atoi( pc.paramArg );
+                               break;
+
+                       case 'p':
+                               displayPrintables = true;
+                               break;
+
+                       case 'L':
+                               noLineDirectives = true;
+                               break;
                        }
                        break;
 
@@ -322,76 +440,192 @@ int main(int argc, char **argv)
                        break;
                }
        }
+}
 
-       /* Bail on above errors. */
+void process( const char *inputFileName )
+{
+       bool wantComplete = true;
+       bool outputActive = true;
+
+       /* Open the input file for reading. */
+       assert( inputFileName != 0 );
+       ifstream *inFile = new ifstream( inputFileName );
+       if ( ! inFile->is_open() )
+               error() << "could not open " << inputFileName << " for reading" << endp;
+
+       /* Used for just a few things. */
+       std::ostringstream hostData;
+
+       /* Make the first input item. */
+       InputItem *firstInputItem = new InputItem;
+       firstInputItem->type = InputItem::HostData;
+       firstInputItem->loc.line = 1;
+       firstInputItem->loc.col = 1;
+       inputItems.append( firstInputItem );
+
+       Scanner scanner( inputFileName, *inFile, 0, 0, 0, false );
+       scanner.do_scan();
+
+       /* Finished, final check for errors.. */
        if ( gblErrorCount > 0 )
                exit(1);
+       
+       /* Now send EOF to all parsers. */
+       terminateAllParsers();
 
-       /* Make sure we are not writing to the same file as the input file. */
-       if ( inputFileName != 0 && outputFileName != 0 && 
-                       strcmp( inputFileName, outputFileName  ) == 0 )
-       {
-               error() << "output file \"" << outputFileName  << 
-                               "\" is the same as the input file" << endl;
-       }
+       /* Finished, final check for errors.. */
+       if ( gblErrorCount > 0 )
+               exit(1);
 
-       /* Open the input file for reading. */
-       istream *inStream;
-       if ( inputFileName != 0 ) {
-               /* Open the input file for reading. */
-               ifstream *inFile = new ifstream( inputFileName );
-               inStream = inFile;
-               if ( ! inFile->is_open() )
-                       error() << "could not open " << inputFileName << " for reading" << endl;
-       }
-       else {
-               inputFileName = "<stdin>";
-               inStream = &cin;
+       /* Bail on above error. */
+       if ( gblErrorCount > 0 )
+               exit(1);
+
+       /* Locate the backend program */
+       if ( generateDot ) {
+               wantComplete = false;
+               outputActive = false;
        }
 
+       InputData inputData( inputFileName, outputActive, wantComplete );
+
+       /* Compiles machines. */
+       inputData.prepareMachineGen();
 
-       /* Bail on above errors. */
        if ( gblErrorCount > 0 )
                exit(1);
 
-       std::ostringstream outputBuffer;
+       inputData.openOutput();
 
-       if ( machineSpec == 0 && machineName == 0 )
-               outputBuffer << "<host line=\"1\" col=\"1\">";
+       /* Generates the reduced machine, which we use to write output. */
+       inputData.generateReduced();
 
-       Scanner scanner( inputFileName, *inStream, outputBuffer, 0, 0, 0, false );
-       scanner.do_scan();
+       if ( gblErrorCount > 0 )
+               exit(1);
 
-       /* Finished, final check for errors.. */
+       inputData.openOutput2();
+       inputData.writeOutput();
+
+       /* Close the input and the intermediate file. */
+       delete inFile;
+
+       /* Bail on above error. */
        if ( gblErrorCount > 0 )
-               return 1;
-       
-       /* Now send EOF to all parsers. */
-       terminateAllParsers();
+               exit(1);
+
+       /* If writing to a file, delete the ostream, causing it to flush.
+        * Standard out is flushed automatically. */
+       if ( outputFileName != 0 ) {
+               delete outStream;
+               delete outFilter;
+       }
 
        /* Finished, final check for errors.. */
-       if ( gblErrorCount > 0 )
-               return 1;
+       if ( gblErrorCount > 0 ) {
+               /* If we opened an output file, remove it. */
+               if ( outputFileName != 0 )
+                       unlink( outputFileName );
+               exit(1);
+       }
+}
 
-       if ( machineSpec == 0 && machineName == 0 )
-               outputBuffer << "</host>\n";
+char *makeIntermedTemplate( const char *baseFileName )
+{
+       char *result = 0;
+       const char *templ = "ragel-XXXXXX.xml";
+       char *lastSlash = strrchr( baseFileName, '/' );
+       if ( lastSlash == 0 ) {
+               result = new char[strlen(templ)+1];
+               strcpy( result, templ );
+       }
+       else {
+               int baseLen = lastSlash - baseFileName + 1;
+               result = new char[baseLen + strlen(templ) + 1];
+               memcpy( result, baseFileName, baseLen );
+               strcpy( result+baseLen, templ );
+       }
+       return result;
+};
 
-       if ( gblErrorCount > 0 )
-               return 1;
+const char *openIntermed( const char *inputFileName, const char *outputFileName )
+{
+       srand(time(0));
+       const char *result = 0;
+
+       /* Which filename do we use as the base? */
+       const char *baseFileName = outputFileName != 0 ? outputFileName : inputFileName;
+
+       /* The template for the intermediate file name. */
+       const char *intermedFileName = makeIntermedTemplate( baseFileName );
+
+       /* Randomize the name and try to open. */
+       char fnChars[] = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+       char *firstX = strrchr( intermedFileName, 'X' ) - 5;
+       for ( int tries = 0; tries < 20; tries++ ) {
+               /* Choose a random name. */
+               for ( int x = 0; x < 6; x++ )
+                       firstX[x] = fnChars[rand() % 52];
+
+               /* Try to open the file. */
+               int fd = ::open( intermedFileName, O_WRONLY|O_EXCL|O_CREAT, S_IRUSR|S_IWUSR );
+
+               if ( fd > 0 ) {
+                       /* Success. Close the file immediately and return the name for use
+                        * by the child processes. */
+                       ::close( fd );
+                       result = intermedFileName;
+                       break;
+               }
+
+               if ( errno == EACCES ) {
+                       error() << "failed to open temp file " << intermedFileName << 
+                                       ", access denied" << endp;
+               }
+       }
+
+       if ( result == 0 )
+               error() << "abnormal error: cannot find unique name for temp file" << endp;
+
+       return result;
+}
+
+
+
+/* Main, process args and call yyparse to start scanning input. */
+int main( int argc, const char **argv )
+{
+       const char *inputFileName = 0;
+       processArgs( argc, argv, inputFileName );
        
-       ostream *outputFile = 0;
-       if ( outputFileName != 0 )
-               outputFile = new ofstream( outputFileName );
-       else
-               outputFile = &cout;
 
-       /* Write the machines, then the surrounding code. */
-       writeMachines( *outputFile, outputBuffer.str(), inputFileName );
+       /* If -M or -S are given and we're not generating a dot file then invoke
+        * the frontend. These options are not useful with code generators. */
+       if ( machineName != 0 || machineSpec != 0 ) {
+               if ( !generateDot )
+                       frontendOnly = true;
+       }
+
+       /* Require an input file. If we use standard in then we won't have a file
+        * name on which to base the output. */
+       if ( inputFileName == 0 )
+               error() << "no input file given" << endl;
+
+       /* Bail on argument processing errors. */
+       if ( gblErrorCount > 0 )
+               exit(1);
+
+       /* Make sure we are not writing to the same file as the input file. */
+       if ( inputFileName != 0 && outputFileName != 0 && 
+                       strcmp( inputFileName, outputFileName  ) == 0 )
+       {
+               error() << "output file \"" << outputFileName  << 
+                               "\" is the same as the input file" << endp;
+       }
 
-       if ( outputFileName != 0 )
-               delete outputFile;
+       process( inputFileName );
 
-       execBackend( argv[0] );
+       /* Clean up the intermediate. */
+       exit( 0 );
 
        return 0;
 }