A bit more cleanup.
[external/ragel.git] / ragel / main.cpp
index b396e75..120fae7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- *  Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ *  Copyright 2001-2007 Adrian Thurston <thurston@complang.org>
  */
 
 /*  This file is part of Ragel.
@@ -32,9 +32,7 @@
 #include <fcntl.h>
 #include <errno.h>
 
-#ifndef _WIN32
-#include <sys/wait.h>
-#else
+#ifdef _WIN32
 #include <windows.h>
 #include <psapi.h>
 #include <time.h>
@@ -56,6 +54,8 @@
 #include "vector.h"
 #include "version.h"
 #include "common.h"
+#include "xmlparse.h"
+#include "inputdata.h"
 
 using std::istream;
 using std::ostream;
@@ -73,7 +73,7 @@ MinimizeLevel minimizeLevel = MinimizePartition2;
 MinimizeOpt minimizeOpt = MinimizeMostOps;
 
 /* Graphviz dot file generation. */
-char *machineSpec = 0, *machineName = 0;
+const char *machineSpec = 0, *machineName = 0;
 bool machineSpecFound = false;
 bool wantDupsRemoved = true;
 
@@ -81,10 +81,25 @@ bool printStatistics = false;
 bool frontendOnly = false;
 bool generateDot = false;
 
-ArgsVector frontendArgs;
-ArgsVector backendArgs;
+/* Target language and output style. */
+CodeStyleEnum codeStyle = GenTables;
+
+int numSplitPartitions = 0;
+bool noLineDirectives = false;
+
+bool displayPrintables = false;
+bool graphvizDone = false;
+
+/* Target ruby impl */
+RubyImplEnum rubyImpl = MRI;
+
 ArgsVector includePaths;
 
+istream *inStream = 0;
+ostream *outStream = 0;
+output_filter *outFilter = 0;
+const char *outputFileName = 0;
+
 /* Print a summary of the options. */
 void usage()
 {
@@ -110,8 +125,8 @@ void usage()
 "   -x                   Run the frontend only: emit XML intermediate format\n"
 "   -V                   Generate a dot file for Graphviz\n"
 "   -p                   Display printable characters on labels\n"
-"   -S <spec>            FSM specification to output (for rlgen-dot)\n"
-"   -M <machine>         Machine definition/instantiation to output (for rlgen-dot)\n"
+"   -S <spec>            FSM specification to output (for graphviz output)\n"
+"   -M <machine>         Machine definition/instantiation to output (for graphviz output)\n"
 "host language:\n"
 "   -C                   The host language is C, C++, Obj-C or Obj-C++ (default)\n"
 "   -D                   The host language is D\n"
@@ -140,7 +155,7 @@ void usage()
 void version()
 {
        cout << "Ragel State Machine Compiler version " VERSION << " " PUBDATE << endl <<
-                       "Copyright (c) 2001-2007 by Adrian Thurston" << endl;
+                       "Copyright (c) 2001-2008 by Adrian Thurston" << endl;
        exit(0);
 }
 
@@ -208,10 +223,12 @@ void escapeLineDirectivePath( std::ostream &out, char *path )
        }
 }
 
-void processArgs( int argc, char **argv, char *&inputFileName, char *&outputFileName )
+void processArgs( int argc, const char **argv, const char *&inputFileName )
 {
        ParamCheck pc("xo:dnmleabjkS:M:I:CDJRAvHh?-:sT:F:G:P:LpV", argc, argv);
 
+       /* FIXME: Need to check code styles VS langauge. */
+
        while ( pc.check() ) {
                switch ( pc.state ) {
                case ParamCheck::match:
@@ -236,44 +253,35 @@ void processArgs( int argc, char **argv, char *&inputFileName, char *&outputFile
                                }
                                break;
 
-                       /* Minimization, mostly hidden options. */
+                       /* Flag for turning off duplicate action removal. */
                        case 'd':
                                wantDupsRemoved = false;
-                               frontendArgs.append( "-d" );
                                break;
 
                        /* Minimization, mostly hidden options. */
                        case 'n':
                                minimizeOpt = MinimizeNone;
-                               frontendArgs.append( "-n" );
                                break;
                        case 'm':
                                minimizeOpt = MinimizeEnd;
-                               frontendArgs.append( "-m" );
                                break;
                        case 'l':
                                minimizeOpt = MinimizeMostOps;
-                               frontendArgs.append( "-l" );
                                break;
                        case 'e':
                                minimizeOpt = MinimizeEveryOp;
-                               frontendArgs.append( "-e" );
                                break;
                        case 'a':
                                minimizeLevel = MinimizeApprox;
-                               frontendArgs.append( "-a" );
                                break;
                        case 'b':
                                minimizeLevel = MinimizeStable;
-                               frontendArgs.append( "-b" );
                                break;
                        case 'j':
                                minimizeLevel = MinimizePartition1;
-                               frontendArgs.append( "-j" );
                                break;
                        case 'k':
                                minimizeLevel = MinimizePartition2;
-                               frontendArgs.append( "-k" );
                                break;
 
                        /* Machine spec. */
@@ -285,8 +293,6 @@ void processArgs( int argc, char **argv, char *&inputFileName, char *&outputFile
                                else {
                                        /* Ok, remember the path to the machine to generate. */
                                        machineSpec = pc.paramArg;
-                                       frontendArgs.append( "-S" );
-                                       frontendArgs.append( pc.paramArg );
                                }
                                break;
 
@@ -299,8 +305,6 @@ void processArgs( int argc, char **argv, char *&inputFileName, char *&outputFile
                                else {
                                        /* Ok, remember the machine name to generate. */
                                        machineName = pc.paramArg;
-                                       frontendArgs.append( "-M" );
-                                       frontendArgs.append( pc.paramArg );
                                }
                                break;
 
@@ -309,45 +313,24 @@ void processArgs( int argc, char **argv, char *&inputFileName, char *&outputFile
                                        error() << "please specify an argument to -I" << endl;
                                else {
                                        includePaths.append( pc.paramArg );
-                                       frontendArgs.append( "-I" );
-                                       frontendArgs.append( pc.paramArg );
                                }
                                break;
 
-                       /* Error reporting format. */
-                       case 'E':
-                               if ( pc.paramArg[0] == '0' )
-                                       errorFormat = ErrorFormatGNU;
-                               else if ( pc.paramArg[0] == '1' )
-                                       errorFormat = ErrorFormatMSVC;
-                               else {
-                                       error() << "-E" << pc.paramArg[0] << 
-                                                       " is an invalid argument" << endl;
-                               }
-                               frontendArgs.append( "-E" );
-                               frontendArgs.append( pc.paramArg );
-                               break;
-
                        /* Host language types. */
                        case 'C':
                                hostLang = &hostLangC;
-                               frontendArgs.append( "-C" );
                                break;
                        case 'D':
                                hostLang = &hostLangD;
-                               frontendArgs.append( "-D" );
                                break;
                        case 'J':
                                hostLang = &hostLangJava;
-                               frontendArgs.append( "-J" );
                                break;
                        case 'R':
                                hostLang = &hostLangRuby;
-                               frontendArgs.append( "-R" );
                                break;
                        case 'A':
                                hostLang = &hostLangCSharp;
-                               frontendArgs.append( "-A" );
                                break;
 
                        /* Version and help. */
@@ -359,7 +342,6 @@ void processArgs( int argc, char **argv, char *&inputFileName, char *&outputFile
                                break;
                        case 's':
                                printStatistics = true;
-                               frontendArgs.append( "-s" );
                                break;
                        case '-': {
                                char *eq = strchr( pc.paramArg, '=' );
@@ -374,18 +356,15 @@ void processArgs( int argc, char **argv, char *&inputFileName, char *&outputFile
                                else if ( strcmp( pc.paramArg, "error-format" ) == 0 ) {
                                        if ( eq == 0 )
                                                error() << "expecting '=value' for error-format" << endl;
-                                       else if ( strcmp( eq, "gnu" ) == 0 ) {
+                                       else if ( strcmp( eq, "gnu" ) == 0 )
                                                errorFormat = ErrorFormatGNU;
-                                               frontendArgs.append( "--error-format=gnu" );
-                                       }
-                                       else if ( strcmp( eq, "msvc" ) == 0 ) {
+                                       else if ( strcmp( eq, "msvc" ) == 0 )
                                                errorFormat = ErrorFormatMSVC;
-                                               frontendArgs.append( "--error-format=msvc" );
-                                       }
-                                       else {
+                                       else
                                                error() << "invalid value for error-format" << endl;
-                                       }
                                }
+                               else if ( strcmp( pc.paramArg, "rbx" ) == 0 )
+                                       rubyImpl = Rubinius;
                                else {
                                        error() << "--" << pc.paramArg << 
                                                        " is an invalid argument" << endl;
@@ -395,26 +374,51 @@ void processArgs( int argc, char **argv, char *&inputFileName, char *&outputFile
 
                        /* Passthrough args. */
                        case 'T': 
-                               backendArgs.append( "-T" );
-                               backendArgs.append( pc.paramArg );
+                               if ( pc.paramArg[0] == '0' )
+                                       codeStyle = GenTables;
+                               else if ( pc.paramArg[0] == '1' )
+                                       codeStyle = GenFTables;
+                               else {
+                                       error() << "-T" << pc.paramArg[0] << 
+                                                       " is an invalid argument" << endl;
+                                       exit(1);
+                               }
                                break;
                        case 'F': 
-                               backendArgs.append( "-F" );
-                               backendArgs.append( pc.paramArg );
+                               if ( pc.paramArg[0] == '0' )
+                                       codeStyle = GenFlat;
+                               else if ( pc.paramArg[0] == '1' )
+                                       codeStyle = GenFFlat;
+                               else {
+                                       error() << "-F" << pc.paramArg[0] << 
+                                                       " is an invalid argument" << endl;
+                                       exit(1);
+                               }
                                break;
                        case 'G': 
-                               backendArgs.append( "-G" );
-                               backendArgs.append( pc.paramArg );
+                               if ( pc.paramArg[0] == '0' )
+                                       codeStyle = GenGoto;
+                               else if ( pc.paramArg[0] == '1' )
+                                       codeStyle = GenFGoto;
+                               else if ( pc.paramArg[0] == '2' )
+                                       codeStyle = GenIpGoto;
+                               else {
+                                       error() << "-G" << pc.paramArg[0] << 
+                                                       " is an invalid argument" << endl;
+                                       exit(1);
+                               }
                                break;
                        case 'P':
-                               backendArgs.append( "-P" );
-                               backendArgs.append( pc.paramArg );
+                               codeStyle = GenSplit;
+                               numSplitPartitions = atoi( pc.paramArg );
                                break;
+
                        case 'p':
-                               backendArgs.append( "-p" );
+                               displayPrintables = true;
                                break;
+
                        case 'L':
-                               backendArgs.append( "-L" );
+                               noLineDirectives = true;
                                break;
                        }
                        break;
@@ -438,58 +442,94 @@ void processArgs( int argc, char **argv, char *&inputFileName, char *&outputFile
        }
 }
 
-int frontend( char *inputFileName, char *outputFileName )
+void process( const char *inputFileName )
 {
+       bool wantComplete = true;
+       bool outputActive = true;
+
        /* Open the input file for reading. */
        assert( inputFileName != 0 );
        ifstream *inFile = new ifstream( inputFileName );
-       istream *inStream = inFile;
        if ( ! inFile->is_open() )
                error() << "could not open " << inputFileName << " for reading" << endp;
 
        /* Used for just a few things. */
        std::ostringstream hostData;
 
-       if ( machineSpec == 0 && machineName == 0 )
-               hostData << "<host line=\"1\" col=\"1\">";
+       /* Make the first input item. */
+       InputItem *firstInputItem = new InputItem;
+       firstInputItem->type = InputItem::HostData;
+       firstInputItem->loc.line = 1;
+       firstInputItem->loc.col = 1;
+       inputItems.append( firstInputItem );
 
-       Scanner scanner( inputFileName, *inStream, hostData, 0, 0, 0, false );
+       Scanner scanner( inputFileName, *inFile, 0, 0, 0, false );
        scanner.do_scan();
 
        /* Finished, final check for errors.. */
        if ( gblErrorCount > 0 )
-               return 1;
+               exit(1);
        
        /* Now send EOF to all parsers. */
        terminateAllParsers();
 
        /* Finished, final check for errors.. */
        if ( gblErrorCount > 0 )
-               return 1;
+               exit(1);
 
-       if ( machineSpec == 0 && machineName == 0 )
-               hostData << "</host>\n";
+       /* Bail on above error. */
+       if ( gblErrorCount > 0 )
+               exit(1);
+
+       /* Locate the backend program */
+       if ( generateDot ) {
+               wantComplete = false;
+               outputActive = false;
+       }
+
+       InputData inputData( inputFileName, outputActive, wantComplete );
+
+       /* Compiles machines. */
+       inputData.prepareMachineGen();
 
        if ( gblErrorCount > 0 )
-               return 1;
-       
-       ostream *outputFile = 0;
-       if ( outputFileName != 0 )
-               outputFile = new ofstream( outputFileName );
-       else
-               outputFile = &cout;
+               exit(1);
+
+       inputData.openOutput();
+
+       /* Generates the reduced machine, which we use to write output. */
+       inputData.generateReduced();
 
-       /* Write the machines, then the surrounding code. */
-       writeMachines( *outputFile, hostData.str(), inputFileName );
+       if ( gblErrorCount > 0 )
+               exit(1);
+
+       inputData.openOutput2();
+       inputData.writeOutput();
+
+       /* Close the input and the intermediate file. */
+       delete inFile;
 
-       /* Close the intermediate file. */
-       if ( outputFileName != 0 )
-               delete outputFile;
+       /* Bail on above error. */
+       if ( gblErrorCount > 0 )
+               exit(1);
 
-       return gblErrorCount > 0;
+       /* If writing to a file, delete the ostream, causing it to flush.
+        * Standard out is flushed automatically. */
+       if ( outputFileName != 0 ) {
+               delete outStream;
+               delete outFilter;
+       }
+
+       /* Finished, final check for errors.. */
+       if ( gblErrorCount > 0 ) {
+               /* If we opened an output file, remove it. */
+               if ( outputFileName != 0 )
+                       unlink( outputFileName );
+               exit(1);
+       }
 }
 
-char *makeIntermedTemplate( char *baseFileName )
+char *makeIntermedTemplate( const char *baseFileName )
 {
        char *result = 0;
        const char *templ = "ragel-XXXXXX.xml";
@@ -507,16 +547,16 @@ char *makeIntermedTemplate( char *baseFileName )
        return result;
 };
 
-char *openIntermed( char *inputFileName, char *outputFileName )
+const char *openIntermed( const char *inputFileName, const char *outputFileName )
 {
        srand(time(0));
-       char *result = 0;
+       const char *result = 0;
 
        /* Which filename do we use as the base? */
-       char *baseFileName = outputFileName != 0 ? outputFileName : inputFileName;
+       const char *baseFileName = outputFileName != 0 ? outputFileName : inputFileName;
 
        /* The template for the intermediate file name. */
-       char *intermedFileName = makeIntermedTemplate( baseFileName );
+       const char *intermedFileName = makeIntermedTemplate( baseFileName );
 
        /* Randomize the name and try to open. */
        char fnChars[] = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
@@ -550,222 +590,13 @@ char *openIntermed( char *inputFileName, char *outputFileName )
 }
 
 
-void cleanExit( char *intermed, int status )
-{
-       unlink( intermed );
-       exit( status );
-}
-
-#ifndef _WIN32
-
-/* If any forward slash is found in argv0 then it is assumed that the path is
- * explicit and the path to the backend executable should be derived from
- * that. Whe check that location and also go up one then inside a directory of
- * the same name in case we are executing from the source tree. If no forward
- * slash is found it is assumed the file is being run from the installed
- * location. The PREFIX supplied during configuration is used. */
-char **makePathChecksUnix( const char *argv0, const char *progName )
-{
-       char **result = new char*[3];
-       const char *lastSlash = strrchr( argv0, '/' );
-       int numChecks = 0;
-
-       if ( lastSlash != 0 ) {
-               char *path = strdup( argv0 );
-               int givenPathLen = (lastSlash - argv0) + 1;
-               path[givenPathLen] = 0;
-
-               int progNameLen = strlen(progName);
-               int length = givenPathLen + progNameLen + 1;
-               char *check = new char[length];
-               sprintf( check, "%s%s", path, progName );
-               result[numChecks++] = check;
-
-               length = givenPathLen + 3 + progNameLen + 1 + progNameLen + 1;
-               check = new char[length];
-               sprintf( check, "%s../%s/%s", path, progName, progName );
-               result[numChecks++] = check;
-       }
-       else {
-               int prefixLen = strlen(PREFIX);
-               int progNameLen = strlen(progName);
-               int length = prefixLen + 5 + progNameLen + 1;
-               char *check = new char[length];
-
-               sprintf( check, PREFIX "/bin/%s", progName );
-               result[numChecks++] = check;
-       }
-
-       result[numChecks] = 0;
-       return result;
-}
-
-
-void forkAndExec( const char *progName, char **pathChecks, 
-               ArgsVector &args, char *intermed )
-{
-       pid_t pid = fork();
-       if ( pid < 0 ) {
-               /* Error, no child created. */
-               error() << "failed to fork for " << progName << endl;
-               cleanExit( intermed, 1 );
-       }
-       else if ( pid == 0 ) {
-               /* child */
-               while ( *pathChecks != 0 ) {
-                       /* Execv does not modify argv, it just uses the const form that is
-                        * compatible with the most code. Ours not included. */
-                       execv( *pathChecks, (char *const*) args.data );
-                       pathChecks += 1;
-               }
-               error() << "failed to exec " << progName << endl;
-               cleanExit( intermed, 1 );
-       }
-
-       /* Parent process, wait for the child. */
-       int status;
-       wait( &status );
-
-       /* What happened with the child. */
-       if ( ! WIFEXITED( status ) ) {
-               error() << progName << " did not exit normally" << endl;
-               cleanExit( intermed, 1 );
-       }
-       
-       if ( WEXITSTATUS(status) != 0 )
-               cleanExit( intermed, WEXITSTATUS(status) );
-}
-
-#else
-
-/* GetModuleFileNameEx is used to find out where the the current process's
- * binary is. That location is searched first. If that fails then we go up one
- * directory and look for the executable inside a directory of the same name
- * in case we are executing from the source tree.
- * */
-char **makePathChecksWin( const char *progName )
-{
-       int len = 1024;
-       char *imageFileName = new char[len];
-       HANDLE h = GetCurrentProcess();
-       len = GetModuleFileNameEx( h, NULL, imageFileName, len );
-       imageFileName[len] = 0;
-
-       char **result = new char*[3];
-       const char *lastSlash = strrchr( imageFileName, '\\' );
-       int numChecks = 0;
-
-       assert( lastSlash != 0 );
-       char *path = strdup( imageFileName );
-       int givenPathLen = (lastSlash - imageFileName) + 1;
-       path[givenPathLen] = 0;
-
-       int progNameLen = strlen(progName);
-       int length = givenPathLen + progNameLen + 1;
-       char *check = new char[length];
-       sprintf( check, "%s%s", path, progName );
-       result[numChecks++] = check;
-
-       length = givenPathLen + 3 + progNameLen + 1 + progNameLen + 1;
-       check = new char[length];
-       sprintf( check, "%s..\\%s\\%s", path, progName, progName );
-       result[numChecks++] = check;
-
-       result[numChecks] = 0;
-       return result;
-}
-
-void spawn( const char *progName, char **pathChecks, 
-               ArgsVector &args, char *intermed )
-{
-       int result = 0;
-       while ( *pathChecks != 0 ) {
-               //cerr << "trying to execute " << *pathChecks << endl;
-               result = _spawnv( _P_WAIT, *pathChecks, args.data );
-               if ( result >= 0 || errno != ENOENT )
-                       break;
-               pathChecks += 1;
-       }
-
-       if ( result < 0 ) {
-               error() << "failed to spawn " << progName << endl;
-               cleanExit( intermed, 1 );
-       }
-
-       if ( result > 0 )
-               cleanExit( intermed, 1 );
-}
-
-#endif
-
-void execFrontend( const char *argv0, char *inputFileName, char *intermed )
-{
-       /* The frontend program name. */
-       const char *progName = "ragel";
-
-       frontendArgs.insert( 0, progName );
-       frontendArgs.insert( 1, "-x" );
-       frontendArgs.append( "-o" );
-       frontendArgs.append( intermed );
-       frontendArgs.append( inputFileName );
-       frontendArgs.append( 0 );
-
-#ifndef _WIN32
-       char **pathChecks = makePathChecksUnix( argv0, progName );
-       forkAndExec( progName, pathChecks, frontendArgs, intermed );
-#else
-       char **pathChecks = makePathChecksWin( progName );
-       spawn( progName, pathChecks, frontendArgs, intermed );
-#endif
-}
-
-void execBackend( const char *argv0, char *intermed, char *outputFileName )
-{
-       /* Locate the backend program */
-       const char *progName = 0;
-       if ( generateDot )
-               progName = "rlgen-dot";
-       else {
-               switch ( hostLang->lang ) {
-                       case HostLang::C:
-                       case HostLang::D:
-                               progName = "rlgen-cd";
-                               break;
-                       case HostLang::Java:
-                               progName = "rlgen-java";
-                               break;
-                       case HostLang::Ruby:
-                               progName = "rlgen-ruby";
-                               break;
-                       case HostLang::CSharp:
-                               progName = "rlgen-csharp";
-               }
-       }
-
-       backendArgs.insert( 0, progName );
-       if ( outputFileName != 0 ) {
-               backendArgs.append( "-o" );
-               backendArgs.append( outputFileName );
-       }
-       backendArgs.append( intermed );
-       backendArgs.append( 0 );
-
-#ifndef _WIN32
-       char **pathChecks = makePathChecksUnix( argv0, progName );
-       forkAndExec( progName, pathChecks, backendArgs, intermed );
-#else
-       char **pathChecks = makePathChecksWin( progName );
-       spawn( progName, pathChecks, backendArgs, intermed );
-#endif
-}
 
 /* Main, process args and call yyparse to start scanning input. */
-int main(int argc, char **argv)
+int main( int argc, const char **argv )
 {
-       char *inputFileName = 0;
-       char *outputFileName = 0;
-
-       processArgs( argc, argv, inputFileName, outputFileName );
+       const char *inputFileName = 0;
+       processArgs( argc, argv, inputFileName );
+       
 
        /* If -M or -S are given and we're not generating a dot file then invoke
         * the frontend. These options are not useful with code generators. */
@@ -791,19 +622,10 @@ int main(int argc, char **argv)
                                "\" is the same as the input file" << endp;
        }
 
-       if ( frontendOnly )
-               return frontend( inputFileName, outputFileName );
-
-       char *intermed = openIntermed( inputFileName, outputFileName );
-
-       /* From here on in the cleanExit function should be used to exit. */
-
-       /* Run the frontend, then the backend processes. */
-       execFrontend( argv[0], inputFileName, intermed );
-       execBackend( argv[0], intermed, outputFileName );
+       process( inputFileName );
 
        /* Clean up the intermediate. */
-       cleanExit( intermed, 0 );
+       exit( 0 );
 
        return 0;
 }