2 * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 #include <sys/types.h>
46 /* Parameters and output. */
61 using std::streamsize;
63 /* Controls minimization. */
64 MinimizeLevel minimizeLevel = MinimizePartition2;
65 MinimizeOpt minimizeOpt = MinimizeMostOps;
67 /* Graphviz dot file generation. */
68 char *machineSpec = 0, *machineName = 0;
69 bool machineSpecFound = false;
70 bool wantDupsRemoved = true;
72 bool printStatistics = false;
73 bool frontendOnly = false;
74 bool generateDot = false;
76 ArgsVector frontendArgs;
77 ArgsVector backendArgs;
78 ArgsVector includePaths;
80 /* Print a summary of the options. */
84 "usage: ragel [options] file\n"
86 " -h, -H, -?, --help Print this usage and exit\n"
87 " -v, --version Print version information and exit\n"
88 " -o <file> Write output to <file>\n"
89 " -s Print some statistics on stderr\n"
90 " -d Do not remove duplicates from action lists\n"
91 " -I <dir> Add <dir> to the list of directories to search\n"
92 " for included an imported files\n"
94 " -n Do not perform minimization\n"
95 " -m Minimize at the end of the compilation\n"
96 " -l Minimize after most operations (default)\n"
97 " -e Minimize after every operation\n"
99 " -x Run the frontend only: emit XML intermediate format\n"
100 " -V Generate a dot file for Graphviz\n"
101 " -p Display printable characters on labels\n"
102 " -S <spec> FSM specification to output (for rlgen-dot)\n"
103 " -M <machine> Machine definition/instantiation to output (for rlgen-dot)\n"
105 " -C The host language is C, C++, Obj-C or Obj-C++ (default)\n"
106 " -D The host language is D\n"
107 " -J The host language is Java\n"
108 " -R The host language is Ruby\n"
109 " -A The host language is C#\n"
110 "line direcives: (C/D/C# only)\n"
111 " -L Inhibit writing of #line directives\n"
112 "code style: (C/Ruby/C# only)\n"
113 " -T0 Table driven FSM (default)\n"
114 " -T1 Faster table driven FSM\n"
115 " -F0 Flat table driven FSM\n"
116 " -F1 Faster flat table-driven FSM\n"
117 "code style: (C/C# only)\n"
118 " -G0 Goto-driven FSM\n"
119 " -G1 Faster goto-driven FSM\n"
120 "code style: (C only)\n"
121 " -G2 Really fast goto-driven FSM\n"
122 " -P<N> N-Way Split really fast goto-driven FSM\n"
126 /* Print version information. */
129 cout << "Ragel State Machine Compiler version " VERSION << " " PUBDATE << endl <<
130 "Copyright (c) 2001-2007 by Adrian Thurston" << endl;
133 /* Total error count. */
134 int gblErrorCount = 0;
136 /* Print the opening to a warning in the input, then return the error ostream. */
137 ostream &warning( const InputLoc &loc )
139 assert( loc.fileName != 0 );
140 cerr << loc.fileName << ":" << loc.line << ":" <<
141 loc.col << ": warning: ";
145 /* Print the opening to a program error, then return the error stream. */
149 cerr << PROGNAME ": ";
153 ostream &error( const InputLoc &loc )
156 assert( loc.fileName != 0 );
157 cerr << loc.fileName << ":" << loc.line << ": ";
161 void escapeLineDirectivePath( std::ostream &out, char *path )
163 for ( char *pc = path; *pc != 0; pc++ ) {
171 void processArgs( int argc, char **argv, char *&inputFileName, char *&outputFileName )
173 ParamCheck pc("xo:dnmleabjkS:M:I:CDJRAvHh?-:sT:F:G:P:LpV", argc, argv);
175 while ( pc.check() ) {
176 switch ( pc.state ) {
177 case ParamCheck::match:
178 switch ( pc.parameter ) {
189 if ( *pc.parameterArg == 0 )
190 error() << "a zero length output file name was given" << endl;
191 else if ( outputFileName != 0 )
192 error() << "more than one output file name was given" << endl;
194 /* Ok, remember the output file name. */
195 outputFileName = pc.parameterArg;
199 /* Minimization, mostly hidden options. */
201 wantDupsRemoved = false;
202 frontendArgs.append( "-d" );
205 /* Minimization, mostly hidden options. */
207 minimizeOpt = MinimizeNone;
208 frontendArgs.append( "-n" );
211 minimizeOpt = MinimizeEnd;
212 frontendArgs.append( "-m" );
215 minimizeOpt = MinimizeMostOps;
216 frontendArgs.append( "-l" );
219 minimizeOpt = MinimizeEveryOp;
220 frontendArgs.append( "-e" );
223 minimizeLevel = MinimizeApprox;
224 frontendArgs.append( "-a" );
227 minimizeLevel = MinimizeStable;
228 frontendArgs.append( "-b" );
231 minimizeLevel = MinimizePartition1;
232 frontendArgs.append( "-j" );
235 minimizeLevel = MinimizePartition2;
236 frontendArgs.append( "-k" );
241 if ( *pc.parameterArg == 0 )
242 error() << "please specify an argument to -S" << endl;
243 else if ( machineSpec != 0 )
244 error() << "more than one -S argument was given" << endl;
246 /* Ok, remember the path to the machine to generate. */
247 machineSpec = pc.parameterArg;
248 frontendArgs.append( "-S" );
249 frontendArgs.append( pc.parameterArg );
255 if ( *pc.parameterArg == 0 )
256 error() << "please specify an argument to -M" << endl;
257 else if ( machineName != 0 )
258 error() << "more than one -M argument was given" << endl;
260 /* Ok, remember the machine name to generate. */
261 machineName = pc.parameterArg;
262 frontendArgs.append( "-M" );
263 frontendArgs.append( pc.parameterArg );
268 if ( *pc.parameterArg == 0 )
269 error() << "please specify an argument to -I" << endl;
271 includePaths.append( pc.parameterArg );
272 frontendArgs.append( "-I" );
273 frontendArgs.append( pc.parameterArg );
277 /* Host language types. */
279 hostLang = &hostLangC;
280 frontendArgs.append( "-C" );
283 hostLang = &hostLangD;
284 frontendArgs.append( "-D" );
287 hostLang = &hostLangJava;
288 frontendArgs.append( "-J" );
291 hostLang = &hostLangRuby;
292 frontendArgs.append( "-R" );
295 hostLang = &hostLangCSharp;
296 frontendArgs.append( "-A" );
299 /* Version and help. */
303 case 'H': case 'h': case '?':
307 printStatistics = true;
308 frontendArgs.append( "-s" );
311 if ( strcasecmp(pc.parameterArg, "help") == 0 ) {
315 else if ( strcasecmp(pc.parameterArg, "version") == 0 ) {
320 error() << "--" << pc.parameterArg <<
321 " is an invalid argument" << endl;
324 /* Passthrough args. */
326 backendArgs.append( "-T" );
327 backendArgs.append( pc.parameterArg );
330 backendArgs.append( "-F" );
331 backendArgs.append( pc.parameterArg );
334 backendArgs.append( "-G" );
335 backendArgs.append( pc.parameterArg );
338 backendArgs.append( "-P" );
339 backendArgs.append( pc.parameterArg );
342 backendArgs.append( "-p" );
345 backendArgs.append( "-L" );
350 case ParamCheck::invalid:
351 error() << "-" << pc.parameter << " is an invalid argument" << endl;
354 case ParamCheck::noparam:
355 /* It is interpreted as an input file. */
356 if ( *pc.curArg == 0 )
357 error() << "a zero length input file name was given" << endl;
358 else if ( inputFileName != 0 )
359 error() << "more than one input file name was given" << endl;
361 /* OK, Remember the filename. */
362 inputFileName = pc.curArg;
369 int frontend( char *inputFileName, char *outputFileName )
371 /* Open the input file for reading. */
372 assert( inputFileName != 0 );
373 ifstream *inFile = new ifstream( inputFileName );
374 istream *inStream = inFile;
375 if ( ! inFile->is_open() )
376 error() << "could not open " << inputFileName << " for reading" << endp;
378 /* Used for just a few things. */
379 std::ostringstream hostData;
381 if ( machineSpec == 0 && machineName == 0 )
382 hostData << "<host line=\"1\" col=\"1\">";
384 Scanner scanner( inputFileName, *inStream, hostData, 0, 0, 0, false );
387 /* Finished, final check for errors.. */
388 if ( gblErrorCount > 0 )
391 /* Now send EOF to all parsers. */
392 terminateAllParsers();
394 /* Finished, final check for errors.. */
395 if ( gblErrorCount > 0 )
398 if ( machineSpec == 0 && machineName == 0 )
399 hostData << "</host>\n";
401 if ( gblErrorCount > 0 )
404 ostream *outputFile = 0;
405 if ( outputFileName != 0 )
406 outputFile = new ofstream( outputFileName );
410 /* Write the machines, then the surrounding code. */
411 writeMachines( *outputFile, hostData.str(), inputFileName );
413 /* Close the intermediate file. */
414 if ( outputFileName != 0 )
417 return gblErrorCount > 0;
420 char *makeIntermedTemplate( char *baseFileName )
423 const char *templ = "ragel-XXXXXX.xml";
424 char *lastSlash = strrchr( baseFileName, '/' );
425 if ( lastSlash == 0 ) {
426 result = new char[strlen(templ)+1];
427 strcpy( result, templ );
430 int baseLen = lastSlash - baseFileName + 1;
431 result = new char[baseLen + strlen(templ) + 1];
432 memcpy( result, baseFileName, baseLen );
433 strcpy( result+baseLen, templ );
438 char *openIntermed( char *inputFileName, char *outputFileName )
443 /* Which filename do we use as the base? */
444 char *baseFileName = outputFileName != 0 ? outputFileName : inputFileName;
446 /* The template for the intermediate file name. */
447 char *intermedFileName = makeIntermedTemplate( baseFileName );
449 /* Randomize the name and try to open. */
450 char fnChars[] = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
451 char *firstX = strrchr( intermedFileName, 'X' ) - 5;
452 for ( int tries = 0; tries < 20; tries++ ) {
453 /* Choose a random name. */
454 for ( int x = 0; x < 6; x++ )
455 firstX[x] = fnChars[rand() % 52];
457 /* Try to open the file. */
458 int fd = ::open( intermedFileName, O_WRONLY|O_EXCL|O_CREAT, S_IRUSR|S_IWUSR );
461 /* Success. Close the file immediately and return the name for use
462 * by the child processes. */
464 result = intermedFileName;
468 if ( errno == EACCES ) {
469 error() << "failed to open temp file " << intermedFileName <<
470 ", access denied" << endp;
475 error() << "abnormal error: cannot find unique name for temp file" << endp;
481 void cleanExit( char *intermed, int status )
489 /* If any forward slash is found in argv0 then it is assumed that the path is
490 * explicit and the path to the backend executable should be derived from
491 * that. Whe check that location and also go up one then inside a directory of
492 * the same name in case we are executing from the source tree. If no forward
493 * slash is found it is assumed the file is being run from the installed
494 * location. The PREFIX supplied during configuration is used. */
495 char **makePathChecksUnix( const char *argv0, const char *progName )
497 char **result = new char*[3];
498 const char *lastSlash = strrchr( argv0, '/' );
501 if ( lastSlash != 0 ) {
502 char *path = strdup( argv0 );
503 int givenPathLen = (lastSlash - argv0) + 1;
504 path[givenPathLen] = 0;
506 int progNameLen = strlen(progName);
507 int length = givenPathLen + progNameLen + 1;
508 char *check = new char[length];
509 sprintf( check, "%s%s", path, progName );
510 result[numChecks++] = check;
512 length = givenPathLen + 3 + progNameLen + 1 + progNameLen + 1;
513 check = new char[length];
514 sprintf( check, "%s../%s/%s", path, progName, progName );
515 result[numChecks++] = check;
518 int prefixLen = strlen(PREFIX);
519 int progNameLen = strlen(progName);
520 int length = prefixLen + 5 + progNameLen + 1;
521 char *check = new char[length];
523 sprintf( check, PREFIX "/bin/%s", progName );
524 result[numChecks++] = check;
527 result[numChecks] = 0;
532 void forkAndExec( const char *progName, char **pathChecks,
533 ArgsVector &args, char *intermed )
537 /* Error, no child created. */
538 error() << "failed to fork for " << progName << endl;
539 cleanExit( intermed, 1 );
541 else if ( pid == 0 ) {
543 while ( *pathChecks != 0 ) {
544 /* Execv does not modify argv, it just uses the const form that is
545 * compatible with the most code. Ours not included. */
546 execv( *pathChecks, (char *const*) args.data );
549 error() << "failed to exec " << progName << endl;
550 cleanExit( intermed, 1 );
553 /* Parent process, wait for the child. */
557 /* What happened with the child. */
558 if ( ! WIFEXITED( status ) ) {
559 error() << progName << " did not exit normally" << endl;
560 cleanExit( intermed, 1 );
563 if ( WEXITSTATUS(status) != 0 )
564 cleanExit( intermed, WEXITSTATUS(status) );
569 /* GetModuleFileNameEx is used to find out where the the current process's
570 * binary is. That location is searched first. If that fails then we go up one
571 * directory and look for the executable inside a directory of the same name
572 * in case we are executing from the source tree.
574 char **makePathChecksWin( const char *progName )
577 char *imageFileName = new char[len];
578 HANDLE h = GetCurrentProcess();
579 len = GetModuleFileNameEx( h, NULL, imageFileName, len );
580 imageFileName[len] = 0;
582 char **result = new char*[3];
583 const char *lastSlash = strrchr( imageFileName, '\\' );
586 assert( lastSlash != 0 );
587 char *path = strdup( imageFileName );
588 int givenPathLen = (lastSlash - imageFileName) + 1;
589 path[givenPathLen] = 0;
591 int progNameLen = strlen(progName);
592 int length = givenPathLen + progNameLen + 1;
593 char *check = new char[length];
594 sprintf( check, "%s%s", path, progName );
595 result[numChecks++] = check;
597 length = givenPathLen + 3 + progNameLen + 1 + progNameLen + 1;
598 check = new char[length];
599 sprintf( check, "%s..\\%s\\%s", path, progName, progName );
600 result[numChecks++] = check;
602 result[numChecks] = 0;
606 void spawn( const char *progName, char **pathChecks,
607 ArgsVector &args, char *intermed )
610 while ( *pathChecks != 0 ) {
611 //cerr << "trying to execute " << *pathChecks << endl;
612 result = _spawnv( _P_WAIT, *pathChecks, args.data );
613 if ( result >= 0 || errno != ENOENT )
619 error() << "failed to spawn " << progName << endl;
620 cleanExit( intermed, 1 );
624 cleanExit( intermed, 1 );
629 void execFrontend( const char *argv0, char *inputFileName, char *intermed )
631 /* The frontend program name. */
632 const char *progName = "ragel";
634 frontendArgs.insert( 0, progName );
635 frontendArgs.insert( 1, "-x" );
636 frontendArgs.append( "-o" );
637 frontendArgs.append( intermed );
638 frontendArgs.append( inputFileName );
639 frontendArgs.append( 0 );
642 char **pathChecks = makePathChecksUnix( argv0, progName );
643 forkAndExec( progName, pathChecks, frontendArgs, intermed );
645 char **pathChecks = makePathChecksWin( progName );
646 spawn( progName, pathChecks, frontendArgs, intermed );
650 void execBackend( const char *argv0, char *intermed, char *outputFileName )
652 /* Locate the backend program */
653 const char *progName = 0;
655 progName = "rlgen-dot";
657 switch ( hostLang->lang ) {
660 progName = "rlgen-cd";
663 progName = "rlgen-java";
666 progName = "rlgen-ruby";
668 case HostLang::CSharp:
669 progName = "rlgen-csharp";
673 backendArgs.insert( 0, progName );
674 if ( outputFileName != 0 ) {
675 backendArgs.append( "-o" );
676 backendArgs.append( outputFileName );
678 backendArgs.append( intermed );
679 backendArgs.append( 0 );
682 char **pathChecks = makePathChecksUnix( argv0, progName );
683 forkAndExec( progName, pathChecks, backendArgs, intermed );
685 char **pathChecks = makePathChecksWin( progName );
686 spawn( progName, pathChecks, backendArgs, intermed );
690 /* Main, process args and call yyparse to start scanning input. */
691 int main(int argc, char **argv)
693 char *inputFileName = 0;
694 char *outputFileName = 0;
696 processArgs( argc, argv, inputFileName, outputFileName );
698 /* If -M or -S are given and we're not generating a dot file then invoke
699 * the frontend. These options are not useful with code generators. */
700 if ( machineName != 0 || machineSpec != 0 ) {
705 /* Require an input file. If we use standard in then we won't have a file
706 * name on which to base the output. */
707 if ( inputFileName == 0 )
708 error() << "no input file given" << endl;
710 /* Bail on argument processing errors. */
711 if ( gblErrorCount > 0 )
714 /* Make sure we are not writing to the same file as the input file. */
715 if ( inputFileName != 0 && outputFileName != 0 &&
716 strcmp( inputFileName, outputFileName ) == 0 )
718 error() << "output file \"" << outputFileName <<
719 "\" is the same as the input file" << endp;
723 return frontend( inputFileName, outputFileName );
725 char *intermed = openIntermed( inputFileName, outputFileName );
727 /* From here on in the cleanExit function should be used to exit. */
729 /* Run the frontend, then the backend processes. */
730 execFrontend( argv[0], inputFileName, intermed );
731 execBackend( argv[0], intermed, outputFileName );
733 /* Clean up the intermediate. */
734 cleanExit( intermed, 0 );