2 * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 #include <sys/types.h>
45 #define S_IRUSR _S_IREAD
46 #define S_IWUSR _S_IWRITE
54 /* Parameters and output. */
69 using std::streamsize;
71 /* Controls minimization. */
72 MinimizeLevel minimizeLevel = MinimizePartition2;
73 MinimizeOpt minimizeOpt = MinimizeMostOps;
75 /* Graphviz dot file generation. */
76 const char *machineSpec = 0, *machineName = 0;
77 bool machineSpecFound = false;
78 bool wantDupsRemoved = true;
80 bool printStatistics = false;
81 bool frontendOnly = false;
82 bool generateDot = false;
84 ArgsVector frontendArgs;
85 ArgsVector backendArgs;
86 ArgsVector includePaths;
88 /* Print a summary of the options. */
92 "usage: ragel [options] file\n"
94 " -h, -H, -?, --help Print this usage and exit\n"
95 " -v, --version Print version information and exit\n"
96 " -o <file> Write output to <file>\n"
97 " -s Print some statistics on stderr\n"
98 " -d Do not remove duplicates from action lists\n"
99 " -I <dir> Add <dir> to the list of directories to search\n"
100 " for included an imported files\n"
101 "error reporting format:\n"
102 " --error-format=gnu file:line:column: message (default)\n"
103 " --error-format=msvc file(line,column): message\n"
104 "fsm minimization:\n"
105 " -n Do not perform minimization\n"
106 " -m Minimize at the end of the compilation\n"
107 " -l Minimize after most operations (default)\n"
108 " -e Minimize after every operation\n"
110 " -x Run the frontend only: emit XML intermediate format\n"
111 " -V Generate a dot file for Graphviz\n"
112 " -p Display printable characters on labels\n"
113 " -S <spec> FSM specification to output (for rlgen-dot)\n"
114 " -M <machine> Machine definition/instantiation to output (for rlgen-dot)\n"
116 " -C The host language is C, C++, Obj-C or Obj-C++ (default)\n"
117 " -D The host language is D\n"
118 " -J The host language is Java\n"
119 " -R The host language is Ruby\n"
120 " -A The host language is C#\n"
121 "line direcives: (C/D/C# only)\n"
122 " -L Inhibit writing of #line directives\n"
123 "code style: (C/Ruby/C# only)\n"
124 " -T0 Table driven FSM (default)\n"
125 " -T1 Faster table driven FSM\n"
126 " -F0 Flat table driven FSM\n"
127 " -F1 Faster flat table-driven FSM\n"
128 "code style: (C/C# only)\n"
129 " -G0 Goto-driven FSM\n"
130 " -G1 Faster goto-driven FSM\n"
131 "code style: (C only)\n"
132 " -G2 Really fast goto-driven FSM\n"
133 " -P<N> N-Way Split really fast goto-driven FSM\n"
139 /* Print version information and exit. */
142 cout << "Ragel State Machine Compiler version " VERSION << " " PUBDATE << endl <<
143 "Copyright (c) 2001-2007 by Adrian Thurston" << endl;
147 /* Error reporting format. */
148 ErrorFormat errorFormat = ErrorFormatGNU;
150 InputLoc makeInputLoc( const char *fileName, int line, int col)
152 InputLoc loc = { fileName, line, col };
156 ostream &operator<<( ostream &out, const InputLoc &loc )
158 assert( loc.fileName != 0 );
159 switch ( errorFormat ) {
160 case ErrorFormatMSVC:
161 out << loc.fileName << "(" << loc.line;
163 out << "," << loc.col;
168 out << loc.fileName << ":" << loc.line;
170 out << ":" << loc.col;
176 /* Total error count. */
177 int gblErrorCount = 0;
179 /* Print the opening to a warning in the input, then return the error ostream. */
180 ostream &warning( const InputLoc &loc )
182 cerr << loc << ": warning: ";
186 /* Print the opening to a program error, then return the error stream. */
190 cerr << PROGNAME ": ";
194 ostream &error( const InputLoc &loc )
201 void escapeLineDirectivePath( std::ostream &out, char *path )
203 for ( char *pc = path; *pc != 0; pc++ ) {
211 void processArgs( int argc, const char **argv,
212 const char *&inputFileName, const char *&outputFileName )
214 ParamCheck pc("xo:dnmleabjkS:M:I:CDJRAvHh?-:sT:F:G:P:LpV", argc, argv);
216 while ( pc.check() ) {
217 switch ( pc.state ) {
218 case ParamCheck::match:
219 switch ( pc.parameter ) {
230 if ( *pc.paramArg == 0 )
231 error() << "a zero length output file name was given" << endl;
232 else if ( outputFileName != 0 )
233 error() << "more than one output file name was given" << endl;
235 /* Ok, remember the output file name. */
236 outputFileName = pc.paramArg;
240 /* Minimization, mostly hidden options. */
242 wantDupsRemoved = false;
243 frontendArgs.append( "-d" );
246 /* Minimization, mostly hidden options. */
248 minimizeOpt = MinimizeNone;
249 frontendArgs.append( "-n" );
252 minimizeOpt = MinimizeEnd;
253 frontendArgs.append( "-m" );
256 minimizeOpt = MinimizeMostOps;
257 frontendArgs.append( "-l" );
260 minimizeOpt = MinimizeEveryOp;
261 frontendArgs.append( "-e" );
264 minimizeLevel = MinimizeApprox;
265 frontendArgs.append( "-a" );
268 minimizeLevel = MinimizeStable;
269 frontendArgs.append( "-b" );
272 minimizeLevel = MinimizePartition1;
273 frontendArgs.append( "-j" );
276 minimizeLevel = MinimizePartition2;
277 frontendArgs.append( "-k" );
282 if ( *pc.paramArg == 0 )
283 error() << "please specify an argument to -S" << endl;
284 else if ( machineSpec != 0 )
285 error() << "more than one -S argument was given" << endl;
287 /* Ok, remember the path to the machine to generate. */
288 machineSpec = pc.paramArg;
289 frontendArgs.append( "-S" );
290 frontendArgs.append( pc.paramArg );
296 if ( *pc.paramArg == 0 )
297 error() << "please specify an argument to -M" << endl;
298 else if ( machineName != 0 )
299 error() << "more than one -M argument was given" << endl;
301 /* Ok, remember the machine name to generate. */
302 machineName = pc.paramArg;
303 frontendArgs.append( "-M" );
304 frontendArgs.append( pc.paramArg );
309 if ( *pc.paramArg == 0 )
310 error() << "please specify an argument to -I" << endl;
312 includePaths.append( pc.paramArg );
313 frontendArgs.append( "-I" );
314 frontendArgs.append( pc.paramArg );
318 /* Host language types. */
320 hostLang = &hostLangC;
321 frontendArgs.append( "-C" );
324 hostLang = &hostLangD;
325 frontendArgs.append( "-D" );
328 hostLang = &hostLangJava;
329 frontendArgs.append( "-J" );
332 hostLang = &hostLangRuby;
333 frontendArgs.append( "-R" );
336 hostLang = &hostLangCSharp;
337 frontendArgs.append( "-A" );
340 /* Version and help. */
344 case 'H': case 'h': case '?':
348 printStatistics = true;
349 frontendArgs.append( "-s" );
352 char *eq = strchr( pc.paramArg, '=' );
357 if ( strcmp( pc.paramArg, "help" ) == 0 )
359 else if ( strcmp( pc.paramArg, "version" ) == 0 )
361 else if ( strcmp( pc.paramArg, "error-format" ) == 0 ) {
363 error() << "expecting '=value' for error-format" << endl;
364 else if ( strcmp( eq, "gnu" ) == 0 ) {
365 errorFormat = ErrorFormatGNU;
366 frontendArgs.append( "--error-format=gnu" );
368 else if ( strcmp( eq, "msvc" ) == 0 ) {
369 errorFormat = ErrorFormatMSVC;
370 frontendArgs.append( "--error-format=msvc" );
373 error() << "invalid value for error-format" << endl;
376 else if ( strcmp( pc.paramArg, "rbx" ) == 0 )
377 backendArgs.append( "--rbx" );
379 error() << "--" << pc.paramArg <<
380 " is an invalid argument" << endl;
385 /* Passthrough args. */
387 backendArgs.append( "-T" );
388 backendArgs.append( pc.paramArg );
391 backendArgs.append( "-F" );
392 backendArgs.append( pc.paramArg );
395 backendArgs.append( "-G" );
396 backendArgs.append( pc.paramArg );
399 backendArgs.append( "-P" );
400 backendArgs.append( pc.paramArg );
403 backendArgs.append( "-p" );
406 backendArgs.append( "-L" );
411 case ParamCheck::invalid:
412 error() << "-" << pc.parameter << " is an invalid argument" << endl;
415 case ParamCheck::noparam:
416 /* It is interpreted as an input file. */
417 if ( *pc.curArg == 0 )
418 error() << "a zero length input file name was given" << endl;
419 else if ( inputFileName != 0 )
420 error() << "more than one input file name was given" << endl;
422 /* OK, Remember the filename. */
423 inputFileName = pc.curArg;
430 int frontend( const char *inputFileName, const char *outputFileName )
432 /* Open the input file for reading. */
433 assert( inputFileName != 0 );
434 ifstream *inFile = new ifstream( inputFileName );
435 istream *inStream = inFile;
436 if ( ! inFile->is_open() )
437 error() << "could not open " << inputFileName << " for reading" << endp;
439 /* Used for just a few things. */
440 std::ostringstream hostData;
442 if ( machineSpec == 0 && machineName == 0 )
443 hostData << "<host line=\"1\" col=\"1\">";
445 Scanner scanner( inputFileName, *inStream, hostData, 0, 0, 0, false );
448 /* Finished, final check for errors.. */
449 if ( gblErrorCount > 0 )
452 /* Now send EOF to all parsers. */
453 terminateAllParsers();
455 /* Finished, final check for errors.. */
456 if ( gblErrorCount > 0 )
459 if ( machineSpec == 0 && machineName == 0 )
460 hostData << "</host>\n";
462 if ( gblErrorCount > 0 )
465 ostream *outputFile = 0;
466 if ( outputFileName != 0 )
467 outputFile = new ofstream( outputFileName );
471 /* Write the machines, then the surrounding code. */
472 writeMachines( *outputFile, hostData.str(), inputFileName );
474 /* Close the intermediate file. */
475 if ( outputFileName != 0 )
478 return gblErrorCount > 0;
481 char *makeIntermedTemplate( const char *baseFileName )
484 const char *templ = "ragel-XXXXXX.xml";
485 char *lastSlash = strrchr( baseFileName, '/' );
486 if ( lastSlash == 0 ) {
487 result = new char[strlen(templ)+1];
488 strcpy( result, templ );
491 int baseLen = lastSlash - baseFileName + 1;
492 result = new char[baseLen + strlen(templ) + 1];
493 memcpy( result, baseFileName, baseLen );
494 strcpy( result+baseLen, templ );
499 const char *openIntermed( const char *inputFileName, const char *outputFileName )
502 const char *result = 0;
504 /* Which filename do we use as the base? */
505 const char *baseFileName = outputFileName != 0 ? outputFileName : inputFileName;
507 /* The template for the intermediate file name. */
508 const char *intermedFileName = makeIntermedTemplate( baseFileName );
510 /* Randomize the name and try to open. */
511 char fnChars[] = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
512 char *firstX = strrchr( intermedFileName, 'X' ) - 5;
513 for ( int tries = 0; tries < 20; tries++ ) {
514 /* Choose a random name. */
515 for ( int x = 0; x < 6; x++ )
516 firstX[x] = fnChars[rand() % 52];
518 /* Try to open the file. */
519 int fd = ::open( intermedFileName, O_WRONLY|O_EXCL|O_CREAT, S_IRUSR|S_IWUSR );
522 /* Success. Close the file immediately and return the name for use
523 * by the child processes. */
525 result = intermedFileName;
529 if ( errno == EACCES ) {
530 error() << "failed to open temp file " << intermedFileName <<
531 ", access denied" << endp;
536 error() << "abnormal error: cannot find unique name for temp file" << endp;
542 void cleanExit( const char *intermed, int status )
550 /* If any forward slash is found in argv0 then it is assumed that the path is
551 * explicit and the path to the backend executable should be derived from
552 * that. Whe check that location and also go up one then inside a directory of
553 * the same name in case we are executing from the source tree. If no forward
554 * slash is found it is assumed the file is being run from the installed
555 * location. The PREFIX supplied during configuration is used. */
556 char **makePathChecksUnix( const char *argv0, const char *progName )
558 char **result = new char*[3];
559 const char *lastSlash = strrchr( argv0, '/' );
562 if ( lastSlash != 0 ) {
563 char *path = strdup( argv0 );
564 int givenPathLen = (lastSlash - argv0) + 1;
565 path[givenPathLen] = 0;
567 int progNameLen = strlen(progName);
568 int length = givenPathLen + progNameLen + 1;
569 char *check = new char[length];
570 sprintf( check, "%s%s", path, progName );
571 result[numChecks++] = check;
573 length = givenPathLen + 3 + progNameLen + 1 + progNameLen + 1;
574 check = new char[length];
575 sprintf( check, "%s../%s/%s", path, progName, progName );
576 result[numChecks++] = check;
579 int prefixLen = strlen(PREFIX);
580 int progNameLen = strlen(progName);
581 int length = prefixLen + 5 + progNameLen + 1;
582 char *check = new char[length];
584 sprintf( check, PREFIX "/bin/%s", progName );
585 result[numChecks++] = check;
588 result[numChecks] = 0;
592 int main(int argc, const char **argv);
593 int cd_main(int argc, const char **argv);
594 int java_main(int argc, const char **argv);
595 int ruby_main(int argc, const char **argv);
596 int csharp_main(int argc, const char **argv);
597 int dot_main(int argc, const char **argv);
600 void forkAndExec( const char *progName, char **pathChecks,
601 ArgsVector &args, const char *intermed )
606 /* Error, no child created. */
607 error() << "failed to fork for " << progName << endl;
608 cleanExit( intermed, 1 );
610 else if ( pid == 0 ) {
612 while ( *pathChecks != 0 ) {
613 /* Execv does not modify argv, it just uses the const form that is
614 * compatible with the most code. Ours not included. */
615 execv( *pathChecks, (char *const*) args.data );
618 error() << "failed to exec " << progName << endl;
619 cleanExit( intermed, 1 );
623 if ( strcmp( progName, "ragel" ) == 0 )
624 main( args.length()-1, args.data );
625 else if ( strcmp( progName, "rlgen-cd" ) == 0 )
626 cd_main( args.length()-1, args.data );
627 else if ( strcmp( progName, "rlgen-java" ) == 0 )
628 java_main( args.length()-1, args.data );
629 else if ( strcmp( progName, "rlgen-ruby" ) == 0 )
630 ruby_main( args.length()-1, args.data );
631 else if ( strcmp( progName, "rlgen-csharp" ) == 0 )
632 csharp_main( args.length()-1, args.data );
633 else if ( strcmp( progName, "rlgen-dot" ) == 0 )
634 dot_main( args.length()-1, args.data );
637 /* Parent process, wait for the child. */
641 /* What happened with the child. */
642 if ( ! WIFEXITED( status ) ) {
643 error() << progName << " did not exit normally" << endl;
644 cleanExit( intermed, 1 );
647 if ( WEXITSTATUS(status) != 0 )
648 cleanExit( intermed, WEXITSTATUS(status) );
654 /* GetModuleFileNameEx is used to find out where the the current process's
655 * binary is. That location is searched first. If that fails then we go up one
656 * directory and look for the executable inside a directory of the same name
657 * in case we are executing from the source tree.
659 char **makePathChecksWin( const char *progName )
662 char *imageFileName = new char[len];
663 HANDLE h = GetCurrentProcess();
664 len = GetModuleFileNameEx( h, NULL, imageFileName, len );
665 imageFileName[len] = 0;
667 char **result = new char*[3];
668 const char *lastSlash = strrchr( imageFileName, '\\' );
671 assert( lastSlash != 0 );
672 char *path = strdup( imageFileName );
673 int givenPathLen = (lastSlash - imageFileName) + 1;
674 path[givenPathLen] = 0;
676 int progNameLen = strlen(progName);
677 int length = givenPathLen + progNameLen + 1;
678 char *check = new char[length];
679 sprintf( check, "%s%s", path, progName );
680 result[numChecks++] = check;
682 length = givenPathLen + 3 + progNameLen + 1 + progNameLen + 1;
683 check = new char[length];
684 sprintf( check, "%s..\\%s\\%s", path, progName, progName );
685 result[numChecks++] = check;
687 result[numChecks] = 0;
691 void spawn( const char *progName, char **pathChecks,
692 ArgsVector &args, char *intermed )
695 while ( *pathChecks != 0 ) {
696 //cerr << "trying to execute " << *pathChecks << endl;
697 result = _spawnv( _P_WAIT, *pathChecks, args.data );
698 if ( result >= 0 || errno != ENOENT )
704 error() << "failed to spawn " << progName << endl;
705 cleanExit( intermed, 1 );
709 cleanExit( intermed, 1 );
714 void execFrontend( const char *argv0, const char *inputFileName, const char *intermed )
716 /* The frontend program name. */
717 const char *progName = "ragel";
719 frontendArgs.insert( 0, progName );
720 frontendArgs.insert( 1, "-x" );
721 frontendArgs.append( "-o" );
722 frontendArgs.append( intermed );
723 frontendArgs.append( inputFileName );
724 frontendArgs.append( 0 );
727 char **pathChecks = makePathChecksUnix( argv0, progName );
728 forkAndExec( progName, pathChecks, frontendArgs, intermed );
730 char **pathChecks = makePathChecksWin( progName );
731 spawn( progName, pathChecks, frontendArgs, intermed );
735 void execBackend( const char *argv0, const char *intermed, const char *outputFileName )
737 /* Locate the backend program */
738 const char *progName = 0;
740 progName = "rlgen-dot";
742 switch ( hostLang->lang ) {
745 progName = "rlgen-cd";
748 progName = "rlgen-java";
751 progName = "rlgen-ruby";
753 case HostLang::CSharp:
754 progName = "rlgen-csharp";
758 backendArgs.insert( 0, progName );
759 if ( outputFileName != 0 ) {
760 backendArgs.append( "-o" );
761 backendArgs.append( outputFileName );
763 backendArgs.append( intermed );
764 backendArgs.append( 0 );
767 char **pathChecks = makePathChecksUnix( argv0, progName );
768 forkAndExec( progName, pathChecks, backendArgs, intermed );
770 char **pathChecks = makePathChecksWin( progName );
771 spawn( progName, pathChecks, backendArgs, intermed );
775 /* Main, process args and call yyparse to start scanning input. */
776 int main(int argc, const char **argv)
778 const char *inputFileName = 0;
779 const char *outputFileName = 0;
781 processArgs( argc, argv, inputFileName, outputFileName );
783 /* If -M or -S are given and we're not generating a dot file then invoke
784 * the frontend. These options are not useful with code generators. */
785 if ( machineName != 0 || machineSpec != 0 ) {
790 /* Require an input file. If we use standard in then we won't have a file
791 * name on which to base the output. */
792 if ( inputFileName == 0 )
793 error() << "no input file given" << endl;
795 /* Bail on argument processing errors. */
796 if ( gblErrorCount > 0 )
799 /* Make sure we are not writing to the same file as the input file. */
800 if ( inputFileName != 0 && outputFileName != 0 &&
801 strcmp( inputFileName, outputFileName ) == 0 )
803 error() << "output file \"" << outputFileName <<
804 "\" is the same as the input file" << endp;
807 const char *intermed = openIntermed( inputFileName, outputFileName );
808 frontend( inputFileName, intermed );
809 execBackend( argv[0], intermed, outputFileName );
811 /* Clean up the intermediate. */
812 cleanExit( intermed, 0 );