tizen 2.3.1 release
[external/ragel.git] / ragel / main.cpp
index 328b671..2b54b51 100644 (file)
@@ -1,5 +1,5 @@
 /*
- *  Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
+ *  Copyright 2001-2007 Adrian Thurston <thurston@complang.org>
  */
 
 /*  This file is part of Ragel.
 #include <sstream>
 #include <unistd.h>
 #include <sys/types.h>
-#include <sys/wait.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <errno.h>
 
+#ifdef _WIN32
+#include <windows.h>
+#include <psapi.h>
+#include <time.h>
+#include <io.h>
+#include <process.h>
+
+#if _MSC_VER
+#define S_IRUSR _S_IREAD
+#define S_IWUSR _S_IWRITE
+#endif
+#endif
+
 /* Parsing. */
 #include "ragel.h"
 #include "rlscan.h"
@@ -42,6 +54,7 @@
 #include "vector.h"
 #include "version.h"
 #include "common.h"
+#include "inputdata.h"
 
 using std::istream;
 using std::ostream;
@@ -59,13 +72,24 @@ MinimizeLevel minimizeLevel = MinimizePartition2;
 MinimizeOpt minimizeOpt = MinimizeMostOps;
 
 /* Graphviz dot file generation. */
-char *machineSpec = 0, *machineName = 0;
+const char *machineSpec = 0, *machineName = 0;
 bool machineSpecFound = false;
+bool wantDupsRemoved = true;
 
 bool printStatistics = false;
+bool generateXML = false;
+bool generateDot = false;
+
+/* Target language and output style. */
+CodeStyle codeStyle = GenTables;
 
-typedef Vector<char*> ArgsVector;
-ArgsVector backendArgs;
+int numSplitPartitions = 0;
+bool noLineDirectives = false;
+
+bool displayPrintables = false;
+
+/* Target ruby impl */
+RubyImplEnum rubyImpl = MRI;
 
 /* Print a summary of the options. */
 void usage()
@@ -77,27 +101,83 @@ void usage()
 "   -v, --version        Print version information and exit\n"
 "   -o <file>            Write output to <file>\n"
 "   -s                   Print some statistics on stderr\n"
+"   -d                   Do not remove duplicates from action lists\n"
+"   -I <dir>             Add <dir> to the list of directories to search\n"
+"                        for included an imported files\n"
+"error reporting format:\n"
+"   --error-format=gnu   file:line:column: message (default)\n"
+"   --error-format=msvc  file(line,column): message\n"
 "fsm minimization:\n"
 "   -n                   Do not perform minimization\n"
 "   -m                   Minimize at the end of the compilation\n"
 "   -l                   Minimize after most operations (default)\n"
 "   -e                   Minimize after every operation\n"
-"machine selection:\n"
-"   -S <spec>            FSM specification to output (for rlgen-dot)\n"
-"   -M <machine>         Machine definition/instantiation to output (for rlgen-dot)\n"
+"visualization:\n"
+"   -x                   Run the frontend only: emit XML intermediate format\n"
+"   -V                   Generate a dot file for Graphviz\n"
+"   -p                   Display printable characters on labels\n"
+"   -S <spec>            FSM specification to output (for graphviz output)\n"
+"   -M <machine>         Machine definition/instantiation to output (for graphviz output)\n"
 "host language:\n"
 "   -C                   The host language is C, C++, Obj-C or Obj-C++ (default)\n"
 "   -D                   The host language is D\n"
 "   -J                   The host language is Java\n"
 "   -R                   The host language is Ruby\n"
+"   -A                   The host language is C#\n"
+"line direcives: (C/D/C#)\n"
+"   -L                   Inhibit writing of #line directives\n"
+"code style: (C/D/Java/Ruby/C#)\n"
+"   -T0                  Table driven FSM (default)\n"
+"code style: (C/D/Ruby/C#)\n"
+"   -T1                  Faster table driven FSM\n"
+"   -F0                  Flat table driven FSM\n"
+"   -F1                  Faster flat table-driven FSM\n"
+"code style: (C/D/C#)\n"
+"   -G0                  Goto-driven FSM\n"
+"   -G1                  Faster goto-driven FSM\n"
+"code style: (C/D)\n"
+"   -G2                  Really fast goto-driven FSM\n"
+"   -P<N>                N-Way Split really fast goto-driven FSM\n"
        ;       
+
+       exit(0);
 }
 
-/* Print version information. */
+/* Print version information and exit. */
 void version()
 {
        cout << "Ragel State Machine Compiler version " VERSION << " " PUBDATE << endl <<
-                       "Copyright (c) 2001-2007 by Adrian Thurston" << endl;
+                       "Copyright (c) 2001-2009 by Adrian Thurston" << endl;
+       exit(0);
+}
+
+/* Error reporting format. */
+ErrorFormat errorFormat = ErrorFormatGNU;
+
+InputLoc makeInputLoc( const char *fileName, int line, int col)
+{
+       InputLoc loc = { fileName, line, col };
+       return loc;
+}
+
+ostream &operator<<( ostream &out, const InputLoc &loc )
+{
+       assert( loc.fileName != 0 );
+       switch ( errorFormat ) {
+       case ErrorFormatMSVC:
+               out << loc.fileName << "(" << loc.line;
+               if ( loc.col )
+                       out << "," << loc.col;
+               out << ")";
+               break;
+
+       default:
+               out << loc.fileName << ":" << loc.line;
+               if ( loc.col )
+                       out << ":" << loc.col;
+               break;
+       }
+       return out;
 }
 
 /* Total error count. */
@@ -106,9 +186,7 @@ int gblErrorCount = 0;
 /* Print the opening to a warning in the input, then return the error ostream. */
 ostream &warning( const InputLoc &loc )
 {
-       assert( loc.fileName != 0 );
-       cerr << loc.fileName << ":" << loc.line << ":" << 
-                       loc.col << ": warning: ";
+       cerr << loc << ": warning: ";
        return cerr;
 }
 
@@ -123,8 +201,7 @@ ostream &error()
 ostream &error( const InputLoc &loc )
 {
        gblErrorCount += 1;
-       assert( loc.fileName != 0 );
-       cerr << loc.fileName << ":" << loc.line << ": ";
+       cerr << loc << ": ";
        return cerr;
 }
 
@@ -138,180 +215,41 @@ void escapeLineDirectivePath( std::ostream &out, char *path )
        }
 }
 
-/* If any forward slash is found in argv0 then it is assumed that the path is
- * explicit and the path to the backend executable should be derived from
- * that. If no forward slash is found it is assumed the file is being run from
- * the installed location. The PREFIX supplied during configuration is used.
- * */
-char **makePathChecks( const char *argv0, const char *progName )
+void processArgs( int argc, const char **argv, InputData &id )
 {
-       char **result = new char*[3];
-       const char *lastSlash = strrchr( argv0, '/' );
-       int numChecks = 0;
-
-       if ( lastSlash != 0 ) {
-               char *path = strdup( argv0 );
-               int givenPathLen = (lastSlash - argv0) + 1;
-               path[givenPathLen] = 0;
-
-               int progNameLen = strlen(progName);
-               int length = givenPathLen + progNameLen + 1;
-               char *check = new char[length];
-               sprintf( check, "%s%s", path, progName );
-               result[numChecks++] = check;
-
-               length = givenPathLen + 3 + progNameLen + 1 + progNameLen + 1;
-               check = new char[length];
-               sprintf( check, "%s../%s/%s", path, progName, progName );
-               result[numChecks++] = check;
-       }
-       else {
-               int prefixLen = strlen(PREFIX);
-               int progNameLen = strlen(progName);
-               int length = prefixLen + 5 + progNameLen + 1;
-               char *check = new char[length];
-
-               sprintf( check, PREFIX "/bin/%s", progName );
-               result[numChecks++] = check;
-       }
+       ParamCheck pc("xo:dnmleabjkS:M:I:CDJRAvHh?-:sT:F:G:P:LpV", argc, argv);
 
-       result[numChecks] = 0;
-       return result;
-}
-
-
-void execBackend( const char *argv0, costream *intermed )
-{
-       /* Locate the backend program */
-       const char *progName = 0;
-       switch ( hostLang->lang ) {
-               case HostLang::C:
-               case HostLang::D:
-                       progName = "rlgen-cd";
-                       break;
-               case HostLang::Java:
-                       progName = "rlgen-java";
-                       break;
-               case HostLang::Ruby:
-                       progName = "rlgen-ruby";
-                       break;
-       }
-
-       char **pathChecks = makePathChecks( argv0, progName );
-
-       backendArgs.insert( 0, "rlgen-ruby" );
-       backendArgs.append( intermed->b->fileName );
-       backendArgs.append( 0 );
-
-       pid_t pid = fork();
-       if ( pid < 0 ) {
-               /* Error, no child created. */
-               error() << "failed to fork backend" << endp;
-       }
-       else if ( pid == 0 ) {
-               /* child */
-               while ( *pathChecks != 0 ) {
-                       execv( *pathChecks, backendArgs.data );
-                       pathChecks += 1;
-               }
-               error() << "failed to exec backend" << endp;
-       }
-       else {
-               /* parent. */
-               wait( 0 );
-       }
-
-       unlink( intermed->b->fileName );
-}
-
-char *makeIntermedTemplate( char *baseFileName )
-{
-       char *result;
-       char *lastSlash = strrchr( baseFileName, '/' );
-       if ( lastSlash == 0 ) {
-               result = new char[13];
-               strcpy( result, "ragel-XXXXXX.xml" );
-       }
-       else {
-               int baseLen = lastSlash - baseFileName + 1;
-               result = new char[baseLen + 13];
-               memcpy( result, baseFileName, baseLen );
-               strcpy( result+baseLen, "ragel-XXXXXX.xml" );
-       }
-       return result;
-};
-
-char fnChars[] = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
-
-costream *openIntermed( char *inputFileName, char *outputFileName )
-{
-       srandom(time(0));
-       costream *result = 0;
-
-       /* Which filename do we use as the base? */
-       char *baseFileName = outputFileName != 0 ? outputFileName : inputFileName;
-
-       /* The template for the intermediate file name. */
-       char *intermedFileName = makeIntermedTemplate( baseFileName );
-
-       /* Randomize the name and try to open. */
-       char *firstX = strrchr( intermedFileName, 'X' ) - 5;
-       for ( int tries = 0; tries < 20; tries++ ) {
-               /* Choose a random name. */
-               for ( int x = 0; x < 6; x++ )
-                       firstX[x] = fnChars[random() % 52];
-
-               /* Try to open the file. */
-               int fd = ::open( intermedFileName, O_WRONLY|O_EXCL|O_CREAT, S_IRUSR|S_IWUSR );
-
-               if ( fd > 0 ) {
-                       /* Success. */
-                       FILE *file = fdopen( fd, "wt" );
-                       if ( file == 0 )
-                               error() << "fdopen(...) on intermediate file failed" << endp;
-
-                       cfilebuf *b = new cfilebuf( intermedFileName, file );
-                       result = new costream( b );
-                       break;
-               }
-
-               if ( errno == EACCES ) {
-                       error() << "failed to open temp file " << intermedFileName << 
-                                       ", access denied" << endp;
-               }
-       }
-
-       if ( result == 0 )
-               error() << "abnormal error: cannot find unique name for temp file" << endp;
-
-       return result;
-}
-
-/* Main, process args and call yyparse to start scanning input. */
-int main(int argc, char **argv)
-{
-       ParamCheck pc("o:nmleabjkS:M:CDJRvHh?-:s", argc, argv);
-       char *inputFileName = 0;
-       char *outputFileName = 0;
+       /* FIXME: Need to check code styles VS langauge. */
 
        while ( pc.check() ) {
                switch ( pc.state ) {
                case ParamCheck::match:
                        switch ( pc.parameter ) {
+                       case 'V':
+                               generateDot = true;
+                               break;
+
+                       case 'x':
+                               generateXML = true;
+                               break;
+
                        /* Output. */
                        case 'o':
-                               if ( *pc.parameterArg == 0 )
+                               if ( *pc.paramArg == 0 )
                                        error() << "a zero length output file name was given" << endl;
-                               else if ( outputFileName != 0 )
+                               else if ( id.outputFileName != 0 )
                                        error() << "more than one output file name was given" << endl;
                                else {
                                        /* Ok, remember the output file name. */
-                                       outputFileName = pc.parameterArg;
-                                       backendArgs.append( "-o" );
-                                       backendArgs.append( pc.parameterArg );
+                                       id.outputFileName = pc.paramArg;
                                }
                                break;
 
+                       /* Flag for turning off duplicate action removal. */
+                       case 'd':
+                               wantDupsRemoved = false;
+                               break;
+
                        /* Minimization, mostly hidden options. */
                        case 'n':
                                minimizeOpt = MinimizeNone;
@@ -340,25 +278,33 @@ int main(int argc, char **argv)
 
                        /* Machine spec. */
                        case 'S':
-                               if ( *pc.parameterArg == 0 )
+                               if ( *pc.paramArg == 0 )
                                        error() << "please specify an argument to -S" << endl;
                                else if ( machineSpec != 0 )
                                        error() << "more than one -S argument was given" << endl;
                                else {
                                        /* Ok, remember the path to the machine to generate. */
-                                       machineSpec = pc.parameterArg;
+                                       machineSpec = pc.paramArg;
                                }
                                break;
 
                        /* Machine path. */
                        case 'M':
-                               if ( *pc.parameterArg == 0 )
+                               if ( *pc.paramArg == 0 )
                                        error() << "please specify an argument to -M" << endl;
                                else if ( machineName != 0 )
                                        error() << "more than one -M argument was given" << endl;
                                else {
                                        /* Ok, remember the machine name to generate. */
-                                       machineName = pc.parameterArg;
+                                       machineName = pc.paramArg;
+                               }
+                               break;
+
+                       case 'I':
+                               if ( *pc.paramArg == 0 )
+                                       error() << "please specify an argument to -I" << endl;
+                               else {
+                                       id.includePaths.append( pc.paramArg );
                                }
                                break;
 
@@ -375,30 +321,99 @@ int main(int argc, char **argv)
                        case 'R':
                                hostLang = &hostLangRuby;
                                break;
+                       case 'A':
+                               hostLang = &hostLangCSharp;
+                               break;
 
                        /* Version and help. */
                        case 'v':
                                version();
-                               exit(0);
+                               break;
                        case 'H': case 'h': case '?':
                                usage();
-                               exit(0);
+                               break;
                        case 's':
                                printStatistics = true;
                                break;
-                       case '-':
-                               if ( strcasecmp(pc.parameterArg, "help") == 0 ) {
+                       case '-': {
+                               char *arg = strdup( pc.paramArg );
+                               char *eq = strchr( arg, '=' );
+
+                               if ( eq != 0 )
+                                       *eq++ = 0;
+
+                               if ( strcmp( arg, "help" ) == 0 )
                                        usage();
-                                       exit(0);
-                               }
-                               else if ( strcasecmp(pc.parameterArg, "version") == 0 ) {
+                               else if ( strcmp( arg, "version" ) == 0 )
                                        version();
-                                       exit(0);
+                               else if ( strcmp( arg, "error-format" ) == 0 ) {
+                                       if ( eq == 0 )
+                                               error() << "expecting '=value' for error-format" << endl;
+                                       else if ( strcmp( eq, "gnu" ) == 0 )
+                                               errorFormat = ErrorFormatGNU;
+                                       else if ( strcmp( eq, "msvc" ) == 0 )
+                                               errorFormat = ErrorFormatMSVC;
+                                       else
+                                               error() << "invalid value for error-format" << endl;
                                }
+                               else if ( strcmp( arg, "rbx" ) == 0 )
+                                       rubyImpl = Rubinius;
                                else {
-                                       error() << "--" << pc.parameterArg << 
+                                       error() << "--" << pc.paramArg << 
                                                        " is an invalid argument" << endl;
                                }
+                               free( arg );
+                               break;
+                       }
+
+                       /* Passthrough args. */
+                       case 'T': 
+                               if ( pc.paramArg[0] == '0' )
+                                       codeStyle = GenTables;
+                               else if ( pc.paramArg[0] == '1' )
+                                       codeStyle = GenFTables;
+                               else {
+                                       error() << "-T" << pc.paramArg[0] << 
+                                                       " is an invalid argument" << endl;
+                                       exit(1);
+                               }
+                               break;
+                       case 'F': 
+                               if ( pc.paramArg[0] == '0' )
+                                       codeStyle = GenFlat;
+                               else if ( pc.paramArg[0] == '1' )
+                                       codeStyle = GenFFlat;
+                               else {
+                                       error() << "-F" << pc.paramArg[0] << 
+                                                       " is an invalid argument" << endl;
+                                       exit(1);
+                               }
+                               break;
+                       case 'G': 
+                               if ( pc.paramArg[0] == '0' )
+                                       codeStyle = GenGoto;
+                               else if ( pc.paramArg[0] == '1' )
+                                       codeStyle = GenFGoto;
+                               else if ( pc.paramArg[0] == '2' )
+                                       codeStyle = GenIpGoto;
+                               else {
+                                       error() << "-G" << pc.paramArg[0] << 
+                                                       " is an invalid argument" << endl;
+                                       exit(1);
+                               }
+                               break;
+                       case 'P':
+                               codeStyle = GenSplit;
+                               numSplitPartitions = atoi( pc.paramArg );
+                               break;
+
+                       case 'p':
+                               displayPrintables = true;
+                               break;
+
+                       case 'L':
+                               noLineDirectives = true;
+                               break;
                        }
                        break;
 
@@ -410,78 +425,134 @@ int main(int argc, char **argv)
                        /* It is interpreted as an input file. */
                        if ( *pc.curArg == 0 )
                                error() << "a zero length input file name was given" << endl;
-                       else if ( inputFileName != 0 )
+                       else if ( id.inputFileName != 0 )
                                error() << "more than one input file name was given" << endl;
                        else {
                                /* OK, Remember the filename. */
-                               inputFileName = pc.curArg;
+                               id.inputFileName = pc.curArg;
                        }
                        break;
                }
        }
+}
 
-       /* Bail on above errors. */
-       if ( gblErrorCount > 0 )
-               exit(1);
-
-       /* Make sure we are not writing to the same file as the input file. */
-       if ( inputFileName != 0 && outputFileName != 0 && 
-                       strcmp( inputFileName, outputFileName  ) == 0 )
-       {
-               error() << "output file \"" << outputFileName  << 
-                               "\" is the same as the input file" << endp;
-       }
-
+void process( InputData &id )
+{
        /* Open the input file for reading. */
-       istream *inStream;
-       if ( inputFileName != 0 ) {
-               /* Open the input file for reading. */
-               ifstream *inFile = new ifstream( inputFileName );
-               inStream = inFile;
-               if ( ! inFile->is_open() )
-                       error() << "could not open " << inputFileName << " for reading" << endp;
-       }
-       else {
-               inputFileName = "<stdin>";
-               inStream = &cin;
-       }
+       assert( id.inputFileName != 0 );
+       ifstream *inFile = new ifstream( id.inputFileName );
+       if ( ! inFile->is_open() )
+               error() << "could not open " << id.inputFileName << " for reading" << endp;
 
        /* Used for just a few things. */
        std::ostringstream hostData;
 
-       if ( machineSpec == 0 && machineName == 0 )
-               hostData << "<host line=\"1\" col=\"1\">";
+       /* Make the first input item. */
+       InputItem *firstInputItem = new InputItem;
+       firstInputItem->type = InputItem::HostData;
+       firstInputItem->loc.fileName = id.inputFileName;
+       firstInputItem->loc.line = 1;
+       firstInputItem->loc.col = 1;
+       id.inputItems.append( firstInputItem );
 
-       Scanner scanner( inputFileName, *inStream, hostData, 0, 0, 0, false );
+       Scanner scanner( id, id.inputFileName, *inFile, 0, 0, 0, false );
        scanner.do_scan();
 
        /* Finished, final check for errors.. */
        if ( gblErrorCount > 0 )
-               return 1;
-       
+               exit(1);
+
        /* Now send EOF to all parsers. */
-       terminateAllParsers();
+       id.terminateAllParsers();
+
+       /* Bail on above error. */
+       if ( gblErrorCount > 0 )
+               exit(1);
+
+       /* Locate the backend program */
+       /* Compiles machines. */
+       id.prepareMachineGen();
 
-       /* Finished, final check for errors.. */
        if ( gblErrorCount > 0 )
-               return 1;
+               exit(1);
+
+       id.makeOutputStream();
+
+       /* Generates the reduced machine, which we use to write output. */
+       if ( !generateXML ) {
+               id.generateReduced();
 
-       if ( machineSpec == 0 && machineName == 0 )
-               hostData << "</host>\n";
+               if ( gblErrorCount > 0 )
+                       exit(1);
+       }
 
+       id.verifyWritesHaveData();
        if ( gblErrorCount > 0 )
-               return 1;
-       
-       costream *intermed = openIntermed( inputFileName, outputFileName );
+               exit(1);
+
+       /*
+        * From this point on we should not be reporting any errors.
+        */
+
+       id.openOutput();
+       id.writeOutput();
+
+       /* Close the input and the intermediate file. */
+       delete inFile;
+
+       /* If writing to a file, delete the ostream, causing it to flush.
+        * Standard out is flushed automatically. */
+       if ( id.outputFileName != 0 ) {
+               delete id.outStream;
+               delete id.outFilter;
+       }
+
+       assert( gblErrorCount == 0 );
+}
+
+char *makeIntermedTemplate( const char *baseFileName )
+{
+       char *result = 0;
+       const char *templ = "ragel-XXXXXX.xml";
+       const char *lastSlash = strrchr( baseFileName, '/' );
+       if ( lastSlash == 0 ) {
+               result = new char[strlen(templ)+1];
+               strcpy( result, templ );
+       }
+       else {
+               int baseLen = lastSlash - baseFileName + 1;
+               result = new char[baseLen + strlen(templ) + 1];
+               memcpy( result, baseFileName, baseLen );
+               strcpy( result+baseLen, templ );
+       }
+       return result;
+};
+
+/* Main, process args and call yyparse to start scanning input. */
+int main( int argc, const char **argv )
+{
+       InputData id;
+
+       processArgs( argc, argv, id );
 
-       /* Write the machines, then the surrounding code. */
-       writeMachines( *intermed, hostData.str(), inputFileName );
+       /* Require an input file. If we use standard in then we won't have a file
+        * name on which to base the output. */
+       if ( id.inputFileName == 0 )
+               error() << "no input file given" << endl;
+
+       /* Bail on argument processing errors. */
+       if ( gblErrorCount > 0 )
+               exit(1);
 
-       /* Close the intermediate file. */
-       intermed->fclose();
+       /* Make sure we are not writing to the same file as the input file. */
+       if ( id.inputFileName != 0 && id.outputFileName != 0 && 
+                       strcmp( id.inputFileName, id.outputFileName  ) == 0 )
+       {
+               error() << "output file \"" << id.outputFileName  << 
+                               "\" is the same as the input file" << endp;
+       }
 
-       /* Run the backend process. */
-       execBackend( argv[0], intermed );
+       process( id );
 
        return 0;
 }