2 * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 #include <sys/types.h>
46 /* Parameters and output. */
61 using std::streamsize;
63 /* Controls minimization. */
64 MinimizeLevel minimizeLevel = MinimizePartition2;
65 MinimizeOpt minimizeOpt = MinimizeMostOps;
67 /* Graphviz dot file generation. */
68 char *machineSpec = 0, *machineName = 0;
69 bool machineSpecFound = false;
70 bool wantDupsRemoved = true;
72 bool printStatistics = false;
73 bool frontendOnly = false;
74 bool generateDot = false;
76 ArgsVector frontendArgs;
77 ArgsVector backendArgs;
78 ArgsVector includePaths;
80 /* Print a summary of the options. */
84 "usage: ragel [options] file\n"
86 " -h, -H, -?, --help Print this usage and exit\n"
87 " -v, --version Print version information and exit\n"
88 " -o <file> Write output to <file>\n"
89 " -s Print some statistics on stderr\n"
90 " -d Do not remove duplicates from action lists\n"
91 " -I <dir> Add <dir> to the list of directories to search\n"
92 " for included an imported files\n"
93 "error reporting format:\n"
94 " --error-format=gnu file:line:column: message (default)\n"
95 " --error-format=msvc file(line,column): message\n"
97 " -n Do not perform minimization\n"
98 " -m Minimize at the end of the compilation\n"
99 " -l Minimize after most operations (default)\n"
100 " -e Minimize after every operation\n"
102 " -x Run the frontend only: emit XML intermediate format\n"
103 " -V Generate a dot file for Graphviz\n"
104 " -p Display printable characters on labels\n"
105 " -S <spec> FSM specification to output (for rlgen-dot)\n"
106 " -M <machine> Machine definition/instantiation to output (for rlgen-dot)\n"
108 " -C The host language is C, C++, Obj-C or Obj-C++ (default)\n"
109 " -D The host language is D\n"
110 " -J The host language is Java\n"
111 " -R The host language is Ruby\n"
112 " -A The host language is C#\n"
113 "line direcives: (C/D/C# only)\n"
114 " -L Inhibit writing of #line directives\n"
115 "code style: (C/Ruby/C# only)\n"
116 " -T0 Table driven FSM (default)\n"
117 " -T1 Faster table driven FSM\n"
118 " -F0 Flat table driven FSM\n"
119 " -F1 Faster flat table-driven FSM\n"
120 "code style: (C/C# only)\n"
121 " -G0 Goto-driven FSM\n"
122 " -G1 Faster goto-driven FSM\n"
123 "code style: (C only)\n"
124 " -G2 Really fast goto-driven FSM\n"
125 " -P<N> N-Way Split really fast goto-driven FSM\n"
131 /* Print version information and exit. */
134 cout << "Ragel State Machine Compiler version " VERSION << " " PUBDATE << endl <<
135 "Copyright (c) 2001-2007 by Adrian Thurston" << endl;
139 /* Error reporting format. */
140 ErrorFormat errorFormat = ErrorFormatGNU;
142 InputLoc makeInputLoc( const char *fileName, int line, int col)
144 InputLoc loc = { fileName, line, col };
148 ostream &operator<<( ostream &out, const InputLoc &loc )
150 assert( loc.fileName != 0 );
151 switch ( errorFormat ) {
152 case ErrorFormatMSVC:
153 out << loc.fileName << "(" << loc.line;
155 out << "," << loc.col;
160 out << loc.fileName << ":" << loc.line;
162 out << ":" << loc.col;
168 /* Total error count. */
169 int gblErrorCount = 0;
171 /* Print the opening to a warning in the input, then return the error ostream. */
172 ostream &warning( const InputLoc &loc )
174 cerr << loc << ": warning: ";
178 /* Print the opening to a program error, then return the error stream. */
182 cerr << PROGNAME ": ";
186 ostream &error( const InputLoc &loc )
193 void escapeLineDirectivePath( std::ostream &out, char *path )
195 for ( char *pc = path; *pc != 0; pc++ ) {
203 void processArgs( int argc, char **argv, char *&inputFileName, char *&outputFileName )
205 ParamCheck pc("xo:dnmleabjkS:M:I:CDJRAvHh?-:sT:F:G:P:LpV", argc, argv);
207 while ( pc.check() ) {
208 switch ( pc.state ) {
209 case ParamCheck::match:
210 switch ( pc.parameter ) {
221 if ( *pc.paramArg == 0 )
222 error() << "a zero length output file name was given" << endl;
223 else if ( outputFileName != 0 )
224 error() << "more than one output file name was given" << endl;
226 /* Ok, remember the output file name. */
227 outputFileName = pc.paramArg;
231 /* Minimization, mostly hidden options. */
233 wantDupsRemoved = false;
234 frontendArgs.append( "-d" );
237 /* Minimization, mostly hidden options. */
239 minimizeOpt = MinimizeNone;
240 frontendArgs.append( "-n" );
243 minimizeOpt = MinimizeEnd;
244 frontendArgs.append( "-m" );
247 minimizeOpt = MinimizeMostOps;
248 frontendArgs.append( "-l" );
251 minimizeOpt = MinimizeEveryOp;
252 frontendArgs.append( "-e" );
255 minimizeLevel = MinimizeApprox;
256 frontendArgs.append( "-a" );
259 minimizeLevel = MinimizeStable;
260 frontendArgs.append( "-b" );
263 minimizeLevel = MinimizePartition1;
264 frontendArgs.append( "-j" );
267 minimizeLevel = MinimizePartition2;
268 frontendArgs.append( "-k" );
273 if ( *pc.paramArg == 0 )
274 error() << "please specify an argument to -S" << endl;
275 else if ( machineSpec != 0 )
276 error() << "more than one -S argument was given" << endl;
278 /* Ok, remember the path to the machine to generate. */
279 machineSpec = pc.paramArg;
280 frontendArgs.append( "-S" );
281 frontendArgs.append( pc.paramArg );
287 if ( *pc.paramArg == 0 )
288 error() << "please specify an argument to -M" << endl;
289 else if ( machineName != 0 )
290 error() << "more than one -M argument was given" << endl;
292 /* Ok, remember the machine name to generate. */
293 machineName = pc.paramArg;
294 frontendArgs.append( "-M" );
295 frontendArgs.append( pc.paramArg );
300 if ( *pc.paramArg == 0 )
301 error() << "please specify an argument to -I" << endl;
303 includePaths.append( pc.paramArg );
304 frontendArgs.append( "-I" );
305 frontendArgs.append( pc.paramArg );
309 /* Error reporting format. */
311 if ( pc.paramArg[0] == '0' )
312 errorFormat = ErrorFormatGNU;
313 else if ( pc.paramArg[0] == '1' )
314 errorFormat = ErrorFormatMSVC;
316 error() << "-E" << pc.paramArg[0] <<
317 " is an invalid argument" << endl;
319 frontendArgs.append( "-E" );
320 frontendArgs.append( pc.paramArg );
323 /* Host language types. */
325 hostLang = &hostLangC;
326 frontendArgs.append( "-C" );
329 hostLang = &hostLangD;
330 frontendArgs.append( "-D" );
333 hostLang = &hostLangJava;
334 frontendArgs.append( "-J" );
337 hostLang = &hostLangRuby;
338 frontendArgs.append( "-R" );
341 hostLang = &hostLangCSharp;
342 frontendArgs.append( "-A" );
345 /* Version and help. */
349 case 'H': case 'h': case '?':
353 printStatistics = true;
354 frontendArgs.append( "-s" );
357 char *eq = strchr( pc.paramArg, '=' );
362 if ( strcmp( pc.paramArg, "help" ) == 0 )
364 else if ( strcmp( pc.paramArg, "version" ) == 0 )
366 else if ( strcmp( pc.paramArg, "error-format" ) == 0 ) {
368 error() << "expecting '=value' for error-format" << endl;
369 else if ( strcmp( eq, "gnu" ) == 0 ) {
370 errorFormat = ErrorFormatGNU;
371 frontendArgs.append( "--error-format=gnu" );
373 else if ( strcmp( eq, "msvc" ) == 0 ) {
374 errorFormat = ErrorFormatMSVC;
375 frontendArgs.append( "--error-format=msvc" );
378 error() << "invalid value for error-format" << endl;
382 error() << "--" << pc.paramArg <<
383 " is an invalid argument" << endl;
387 /* Passthrough args. */
389 backendArgs.append( "-T" );
390 backendArgs.append( pc.paramArg );
393 backendArgs.append( "-F" );
394 backendArgs.append( pc.paramArg );
397 backendArgs.append( "-G" );
398 backendArgs.append( pc.paramArg );
401 backendArgs.append( "-P" );
402 backendArgs.append( pc.paramArg );
405 backendArgs.append( "-p" );
408 backendArgs.append( "-L" );
413 case ParamCheck::invalid:
414 error() << "-" << pc.parameter << " is an invalid argument" << endl;
417 case ParamCheck::noparam:
418 /* It is interpreted as an input file. */
419 if ( *pc.curArg == 0 )
420 error() << "a zero length input file name was given" << endl;
421 else if ( inputFileName != 0 )
422 error() << "more than one input file name was given" << endl;
424 /* OK, Remember the filename. */
425 inputFileName = pc.curArg;
432 int frontend( char *inputFileName, char *outputFileName )
434 /* Open the input file for reading. */
435 assert( inputFileName != 0 );
436 ifstream *inFile = new ifstream( inputFileName );
437 istream *inStream = inFile;
438 if ( ! inFile->is_open() )
439 error() << "could not open " << inputFileName << " for reading" << endp;
441 /* Used for just a few things. */
442 std::ostringstream hostData;
444 if ( machineSpec == 0 && machineName == 0 )
445 hostData << "<host line=\"1\" col=\"1\">";
447 Scanner scanner( inputFileName, *inStream, hostData, 0, 0, 0, false );
450 /* Finished, final check for errors.. */
451 if ( gblErrorCount > 0 )
454 /* Now send EOF to all parsers. */
455 terminateAllParsers();
457 /* Finished, final check for errors.. */
458 if ( gblErrorCount > 0 )
461 if ( machineSpec == 0 && machineName == 0 )
462 hostData << "</host>\n";
464 if ( gblErrorCount > 0 )
467 ostream *outputFile = 0;
468 if ( outputFileName != 0 )
469 outputFile = new ofstream( outputFileName );
473 /* Write the machines, then the surrounding code. */
474 writeMachines( *outputFile, hostData.str(), inputFileName );
476 /* Close the intermediate file. */
477 if ( outputFileName != 0 )
480 return gblErrorCount > 0;
483 char *makeIntermedTemplate( char *baseFileName )
486 const char *templ = "ragel-XXXXXX.xml";
487 char *lastSlash = strrchr( baseFileName, '/' );
488 if ( lastSlash == 0 ) {
489 result = new char[strlen(templ)+1];
490 strcpy( result, templ );
493 int baseLen = lastSlash - baseFileName + 1;
494 result = new char[baseLen + strlen(templ) + 1];
495 memcpy( result, baseFileName, baseLen );
496 strcpy( result+baseLen, templ );
501 char *openIntermed( char *inputFileName, char *outputFileName )
506 /* Which filename do we use as the base? */
507 char *baseFileName = outputFileName != 0 ? outputFileName : inputFileName;
509 /* The template for the intermediate file name. */
510 char *intermedFileName = makeIntermedTemplate( baseFileName );
512 /* Randomize the name and try to open. */
513 char fnChars[] = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
514 char *firstX = strrchr( intermedFileName, 'X' ) - 5;
515 for ( int tries = 0; tries < 20; tries++ ) {
516 /* Choose a random name. */
517 for ( int x = 0; x < 6; x++ )
518 firstX[x] = fnChars[rand() % 52];
520 /* Try to open the file. */
521 int fd = ::open( intermedFileName, O_WRONLY|O_EXCL|O_CREAT, S_IRUSR|S_IWUSR );
524 /* Success. Close the file immediately and return the name for use
525 * by the child processes. */
527 result = intermedFileName;
531 if ( errno == EACCES ) {
532 error() << "failed to open temp file " << intermedFileName <<
533 ", access denied" << endp;
538 error() << "abnormal error: cannot find unique name for temp file" << endp;
544 void cleanExit( char *intermed, int status )
552 /* If any forward slash is found in argv0 then it is assumed that the path is
553 * explicit and the path to the backend executable should be derived from
554 * that. Whe check that location and also go up one then inside a directory of
555 * the same name in case we are executing from the source tree. If no forward
556 * slash is found it is assumed the file is being run from the installed
557 * location. The PREFIX supplied during configuration is used. */
558 char **makePathChecksUnix( const char *argv0, const char *progName )
560 char **result = new char*[3];
561 const char *lastSlash = strrchr( argv0, '/' );
564 if ( lastSlash != 0 ) {
565 char *path = strdup( argv0 );
566 int givenPathLen = (lastSlash - argv0) + 1;
567 path[givenPathLen] = 0;
569 int progNameLen = strlen(progName);
570 int length = givenPathLen + progNameLen + 1;
571 char *check = new char[length];
572 sprintf( check, "%s%s", path, progName );
573 result[numChecks++] = check;
575 length = givenPathLen + 3 + progNameLen + 1 + progNameLen + 1;
576 check = new char[length];
577 sprintf( check, "%s../%s/%s", path, progName, progName );
578 result[numChecks++] = check;
581 int prefixLen = strlen(PREFIX);
582 int progNameLen = strlen(progName);
583 int length = prefixLen + 5 + progNameLen + 1;
584 char *check = new char[length];
586 sprintf( check, PREFIX "/bin/%s", progName );
587 result[numChecks++] = check;
590 result[numChecks] = 0;
595 void forkAndExec( const char *progName, char **pathChecks,
596 ArgsVector &args, char *intermed )
600 /* Error, no child created. */
601 error() << "failed to fork for " << progName << endl;
602 cleanExit( intermed, 1 );
604 else if ( pid == 0 ) {
606 while ( *pathChecks != 0 ) {
607 /* Execv does not modify argv, it just uses the const form that is
608 * compatible with the most code. Ours not included. */
609 execv( *pathChecks, (char *const*) args.data );
612 error() << "failed to exec " << progName << endl;
613 cleanExit( intermed, 1 );
616 /* Parent process, wait for the child. */
620 /* What happened with the child. */
621 if ( ! WIFEXITED( status ) ) {
622 error() << progName << " did not exit normally" << endl;
623 cleanExit( intermed, 1 );
626 if ( WEXITSTATUS(status) != 0 )
627 cleanExit( intermed, WEXITSTATUS(status) );
632 /* GetModuleFileNameEx is used to find out where the the current process's
633 * binary is. That location is searched first. If that fails then we go up one
634 * directory and look for the executable inside a directory of the same name
635 * in case we are executing from the source tree.
637 char **makePathChecksWin( const char *progName )
640 char *imageFileName = new char[len];
641 HANDLE h = GetCurrentProcess();
642 len = GetModuleFileNameEx( h, NULL, imageFileName, len );
643 imageFileName[len] = 0;
645 char **result = new char*[3];
646 const char *lastSlash = strrchr( imageFileName, '\\' );
649 assert( lastSlash != 0 );
650 char *path = strdup( imageFileName );
651 int givenPathLen = (lastSlash - imageFileName) + 1;
652 path[givenPathLen] = 0;
654 int progNameLen = strlen(progName);
655 int length = givenPathLen + progNameLen + 1;
656 char *check = new char[length];
657 sprintf( check, "%s%s", path, progName );
658 result[numChecks++] = check;
660 length = givenPathLen + 3 + progNameLen + 1 + progNameLen + 1;
661 check = new char[length];
662 sprintf( check, "%s..\\%s\\%s", path, progName, progName );
663 result[numChecks++] = check;
665 result[numChecks] = 0;
669 void spawn( const char *progName, char **pathChecks,
670 ArgsVector &args, char *intermed )
673 while ( *pathChecks != 0 ) {
674 //cerr << "trying to execute " << *pathChecks << endl;
675 result = _spawnv( _P_WAIT, *pathChecks, args.data );
676 if ( result >= 0 || errno != ENOENT )
682 error() << "failed to spawn " << progName << endl;
683 cleanExit( intermed, 1 );
687 cleanExit( intermed, 1 );
692 void execFrontend( const char *argv0, char *inputFileName, char *intermed )
694 /* The frontend program name. */
695 const char *progName = "ragel";
697 frontendArgs.insert( 0, progName );
698 frontendArgs.insert( 1, "-x" );
699 frontendArgs.append( "-o" );
700 frontendArgs.append( intermed );
701 frontendArgs.append( inputFileName );
702 frontendArgs.append( 0 );
705 char **pathChecks = makePathChecksUnix( argv0, progName );
706 forkAndExec( progName, pathChecks, frontendArgs, intermed );
708 char **pathChecks = makePathChecksWin( progName );
709 spawn( progName, pathChecks, frontendArgs, intermed );
713 void execBackend( const char *argv0, char *intermed, char *outputFileName )
715 /* Locate the backend program */
716 const char *progName = 0;
718 progName = "rlgen-dot";
720 switch ( hostLang->lang ) {
723 progName = "rlgen-cd";
726 progName = "rlgen-java";
729 progName = "rlgen-ruby";
731 case HostLang::CSharp:
732 progName = "rlgen-csharp";
736 backendArgs.insert( 0, progName );
737 if ( outputFileName != 0 ) {
738 backendArgs.append( "-o" );
739 backendArgs.append( outputFileName );
741 backendArgs.append( intermed );
742 backendArgs.append( 0 );
745 char **pathChecks = makePathChecksUnix( argv0, progName );
746 forkAndExec( progName, pathChecks, backendArgs, intermed );
748 char **pathChecks = makePathChecksWin( progName );
749 spawn( progName, pathChecks, backendArgs, intermed );
753 /* Main, process args and call yyparse to start scanning input. */
754 int main(int argc, char **argv)
756 char *inputFileName = 0;
757 char *outputFileName = 0;
759 processArgs( argc, argv, inputFileName, outputFileName );
761 /* If -M or -S are given and we're not generating a dot file then invoke
762 * the frontend. These options are not useful with code generators. */
763 if ( machineName != 0 || machineSpec != 0 ) {
768 /* Require an input file. If we use standard in then we won't have a file
769 * name on which to base the output. */
770 if ( inputFileName == 0 )
771 error() << "no input file given" << endl;
773 /* Bail on argument processing errors. */
774 if ( gblErrorCount > 0 )
777 /* Make sure we are not writing to the same file as the input file. */
778 if ( inputFileName != 0 && outputFileName != 0 &&
779 strcmp( inputFileName, outputFileName ) == 0 )
781 error() << "output file \"" << outputFileName <<
782 "\" is the same as the input file" << endp;
786 return frontend( inputFileName, outputFileName );
788 char *intermed = openIntermed( inputFileName, outputFileName );
790 /* From here on in the cleanExit function should be used to exit. */
792 /* Run the frontend, then the backend processes. */
793 execFrontend( argv[0], inputFileName, intermed );
794 execBackend( argv[0], intermed, outputFileName );
796 /* Clean up the intermediate. */
797 cleanExit( intermed, 0 );