2 * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 #include <sys/types.h>
46 /* Parameters and output. */
61 using std::streamsize;
63 /* Controls minimization. */
64 MinimizeLevel minimizeLevel = MinimizePartition2;
65 MinimizeOpt minimizeOpt = MinimizeMostOps;
67 /* Graphviz dot file generation. */
68 char *machineSpec = 0, *machineName = 0;
69 bool machineSpecFound = false;
70 bool wantDupsRemoved = true;
72 bool printStatistics = false;
73 bool frontendOnly = false;
74 bool generateDot = false;
76 typedef Vector<const char *> ArgsVector;
77 ArgsVector frontendArgs;
78 ArgsVector backendArgs;
80 /* Print a summary of the options. */
84 "usage: ragel [options] file\n"
86 " -h, -H, -?, --help Print this usage and exit\n"
87 " -v, --version Print version information and exit\n"
88 " -o <file> Write output to <file>\n"
89 " -s Print some statistics on stderr\n"
90 " -d Do not remove duplicates from action lists\n"
92 " -n Do not perform minimization\n"
93 " -m Minimize at the end of the compilation\n"
94 " -l Minimize after most operations (default)\n"
95 " -e Minimize after every operation\n"
97 " -x Run the frontend only: emit XML intermediate format\n"
98 " -V Generate a dot file for Graphviz\n"
99 " -p Display printable characters on labels\n"
100 " -S <spec> FSM specification to output (for rlgen-dot)\n"
101 " -M <machine> Machine definition/instantiation to output (for rlgen-dot)\n"
103 " -C The host language is C, C++, Obj-C or Obj-C++ (default)\n"
104 " -D The host language is D\n"
105 " -J The host language is Java\n"
106 " -R The host language is Ruby\n"
107 "line direcives: (C/D only)\n"
108 " -L Inhibit writing of #line directives\n"
109 "code style: (C/Ruby only)\n"
110 " -T0 Table driven FSM (default)\n"
111 " -T1 Faster table driven FSM\n"
112 " -F0 Flat table driven FSM\n"
113 " -F1 Faster flat table-driven FSM\n"
114 "code style: (C only)\n"
115 " -G0 Goto-driven FSM\n"
116 " -G1 Faster goto-driven FSM\n"
117 " -G2 Really fast goto-driven FSM\n"
118 " -P<N> N-Way Split really fast goto-driven FSM\n"
122 /* Print version information. */
125 cout << "Ragel State Machine Compiler version " VERSION << " " PUBDATE << endl <<
126 "Copyright (c) 2001-2007 by Adrian Thurston" << endl;
129 /* Total error count. */
130 int gblErrorCount = 0;
132 /* Print the opening to a warning in the input, then return the error ostream. */
133 ostream &warning( const InputLoc &loc )
135 assert( loc.fileName != 0 );
136 cerr << loc.fileName << ":" << loc.line << ":" <<
137 loc.col << ": warning: ";
141 /* Print the opening to a program error, then return the error stream. */
145 cerr << PROGNAME ": ";
149 ostream &error( const InputLoc &loc )
152 assert( loc.fileName != 0 );
153 cerr << loc.fileName << ":" << loc.line << ": ";
157 void escapeLineDirectivePath( std::ostream &out, char *path )
159 for ( char *pc = path; *pc != 0; pc++ ) {
167 void processArgs( int argc, char **argv, char *&inputFileName, char *&outputFileName )
169 ParamCheck pc("xo:dnmleabjkS:M:CDJRvHh?-:sT:F:G:P:LpV", argc, argv);
171 while ( pc.check() ) {
172 switch ( pc.state ) {
173 case ParamCheck::match:
174 switch ( pc.parameter ) {
185 if ( *pc.parameterArg == 0 )
186 error() << "a zero length output file name was given" << endl;
187 else if ( outputFileName != 0 )
188 error() << "more than one output file name was given" << endl;
190 /* Ok, remember the output file name. */
191 outputFileName = pc.parameterArg;
195 /* Minimization, mostly hidden options. */
197 wantDupsRemoved = false;
198 frontendArgs.append( "-d" );
201 /* Minimization, mostly hidden options. */
203 minimizeOpt = MinimizeNone;
204 frontendArgs.append( "-n" );
207 minimizeOpt = MinimizeEnd;
208 frontendArgs.append( "-m" );
211 minimizeOpt = MinimizeMostOps;
212 frontendArgs.append( "-l" );
215 minimizeOpt = MinimizeEveryOp;
216 frontendArgs.append( "-e" );
219 minimizeLevel = MinimizeApprox;
220 frontendArgs.append( "-a" );
223 minimizeLevel = MinimizeStable;
224 frontendArgs.append( "-b" );
227 minimizeLevel = MinimizePartition1;
228 frontendArgs.append( "-j" );
231 minimizeLevel = MinimizePartition2;
232 frontendArgs.append( "-k" );
237 if ( *pc.parameterArg == 0 )
238 error() << "please specify an argument to -S" << endl;
239 else if ( machineSpec != 0 )
240 error() << "more than one -S argument was given" << endl;
242 /* Ok, remember the path to the machine to generate. */
243 machineSpec = pc.parameterArg;
244 frontendArgs.append( "-S" );
245 frontendArgs.append( pc.parameterArg );
251 if ( *pc.parameterArg == 0 )
252 error() << "please specify an argument to -M" << endl;
253 else if ( machineName != 0 )
254 error() << "more than one -M argument was given" << endl;
256 /* Ok, remember the machine name to generate. */
257 machineName = pc.parameterArg;
258 frontendArgs.append( "-M" );
259 frontendArgs.append( pc.parameterArg );
263 /* Host language types. */
265 hostLang = &hostLangC;
266 frontendArgs.append( "-C" );
269 hostLang = &hostLangD;
270 frontendArgs.append( "-D" );
273 hostLang = &hostLangJava;
274 frontendArgs.append( "-J" );
277 hostLang = &hostLangRuby;
278 frontendArgs.append( "-R" );
281 /* Version and help. */
285 case 'H': case 'h': case '?':
289 printStatistics = true;
290 frontendArgs.append( "-s" );
293 if ( strcasecmp(pc.parameterArg, "help") == 0 ) {
297 else if ( strcasecmp(pc.parameterArg, "version") == 0 ) {
302 error() << "--" << pc.parameterArg <<
303 " is an invalid argument" << endl;
306 /* Passthrough args. */
308 backendArgs.append( "-T" );
309 backendArgs.append( pc.parameterArg );
312 backendArgs.append( "-F" );
313 backendArgs.append( pc.parameterArg );
316 backendArgs.append( "-G" );
317 backendArgs.append( pc.parameterArg );
320 backendArgs.append( "-P" );
321 backendArgs.append( pc.parameterArg );
324 backendArgs.append( "-p" );
327 backendArgs.append( "-L" );
332 case ParamCheck::invalid:
333 error() << "-" << pc.parameter << " is an invalid argument" << endl;
336 case ParamCheck::noparam:
337 /* It is interpreted as an input file. */
338 if ( *pc.curArg == 0 )
339 error() << "a zero length input file name was given" << endl;
340 else if ( inputFileName != 0 )
341 error() << "more than one input file name was given" << endl;
343 /* OK, Remember the filename. */
344 inputFileName = pc.curArg;
351 int frontend( char *inputFileName, char *outputFileName )
353 /* Open the input file for reading. */
354 assert( inputFileName != 0 );
355 ifstream *inFile = new ifstream( inputFileName );
356 istream *inStream = inFile;
357 if ( ! inFile->is_open() )
358 error() << "could not open " << inputFileName << " for reading" << endp;
360 /* Used for just a few things. */
361 std::ostringstream hostData;
363 if ( machineSpec == 0 && machineName == 0 )
364 hostData << "<host line=\"1\" col=\"1\">";
366 Scanner scanner( inputFileName, *inStream, hostData, 0, 0, 0, false );
369 /* Finished, final check for errors.. */
370 if ( gblErrorCount > 0 )
373 /* Now send EOF to all parsers. */
374 terminateAllParsers();
376 /* Finished, final check for errors.. */
377 if ( gblErrorCount > 0 )
380 if ( machineSpec == 0 && machineName == 0 )
381 hostData << "</host>\n";
383 if ( gblErrorCount > 0 )
386 ostream *outputFile = 0;
387 if ( outputFileName != 0 )
388 outputFile = new ofstream( outputFileName );
392 /* Write the machines, then the surrounding code. */
393 writeMachines( *outputFile, hostData.str(), inputFileName );
395 /* Close the intermediate file. */
396 if ( outputFileName != 0 )
399 return gblErrorCount > 0;
402 char *makeIntermedTemplate( char *baseFileName )
405 const char *templ = "ragel-XXXXXX.xml";
406 char *lastSlash = strrchr( baseFileName, '/' );
407 if ( lastSlash == 0 ) {
408 result = new char[strlen(templ)+1];
409 strcpy( result, templ );
412 int baseLen = lastSlash - baseFileName + 1;
413 result = new char[baseLen + strlen(templ) + 1];
414 memcpy( result, baseFileName, baseLen );
415 strcpy( result+baseLen, templ );
420 char *openIntermed( char *inputFileName, char *outputFileName )
425 /* Which filename do we use as the base? */
426 char *baseFileName = outputFileName != 0 ? outputFileName : inputFileName;
428 /* The template for the intermediate file name. */
429 char *intermedFileName = makeIntermedTemplate( baseFileName );
431 /* Randomize the name and try to open. */
432 char fnChars[] = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
433 char *firstX = strrchr( intermedFileName, 'X' ) - 5;
434 for ( int tries = 0; tries < 20; tries++ ) {
435 /* Choose a random name. */
436 for ( int x = 0; x < 6; x++ )
437 firstX[x] = fnChars[rand() % 52];
439 /* Try to open the file. */
440 int fd = ::open( intermedFileName, O_WRONLY|O_EXCL|O_CREAT, S_IRUSR|S_IWUSR );
443 /* Success. Close the file immediately and return the name for use
444 * by the child processes. */
446 result = intermedFileName;
450 if ( errno == EACCES ) {
451 error() << "failed to open temp file " << intermedFileName <<
452 ", access denied" << endp;
457 error() << "abnormal error: cannot find unique name for temp file" << endp;
463 void cleanExit( char *intermed, int status )
471 /* If any forward slash is found in argv0 then it is assumed that the path is
472 * explicit and the path to the backend executable should be derived from
473 * that. Whe check that location and also go up one then inside a directory of
474 * the same name in case we are executing from the source tree. If no forward
475 * slash is found it is assumed the file is being run from the installed
476 * location. The PREFIX supplied during configuration is used. */
477 char **makePathChecksUnix( const char *argv0, const char *progName )
479 char **result = new char*[3];
480 const char *lastSlash = strrchr( argv0, '/' );
483 if ( lastSlash != 0 ) {
484 char *path = strdup( argv0 );
485 int givenPathLen = (lastSlash - argv0) + 1;
486 path[givenPathLen] = 0;
488 int progNameLen = strlen(progName);
489 int length = givenPathLen + progNameLen + 1;
490 char *check = new char[length];
491 sprintf( check, "%s%s", path, progName );
492 result[numChecks++] = check;
494 length = givenPathLen + 3 + progNameLen + 1 + progNameLen + 1;
495 check = new char[length];
496 sprintf( check, "%s../%s/%s", path, progName, progName );
497 result[numChecks++] = check;
500 int prefixLen = strlen(PREFIX);
501 int progNameLen = strlen(progName);
502 int length = prefixLen + 5 + progNameLen + 1;
503 char *check = new char[length];
505 sprintf( check, PREFIX "/bin/%s", progName );
506 result[numChecks++] = check;
509 result[numChecks] = 0;
514 void forkAndExec( const char *progName, char **pathChecks,
515 ArgsVector &args, char *intermed )
519 /* Error, no child created. */
520 error() << "failed to fork for " << progName << endl;
521 cleanExit( intermed, 1 );
523 else if ( pid == 0 ) {
525 while ( *pathChecks != 0 ) {
526 /* Execv does not modify argv, it just uses the const form that is
527 * compatible with the most code. Ours not included. */
528 execv( *pathChecks, (char *const*) args.data );
531 error() << "failed to exec " << progName << endl;
532 cleanExit( intermed, 1 );
535 /* Parent process, wait for the child. */
539 /* What happened with the child. */
540 if ( ! WIFEXITED( status ) ) {
541 error() << progName << " did not exit normally" << endl;
542 cleanExit( intermed, 1 );
545 if ( WEXITSTATUS(status) != 0 )
546 cleanExit( intermed, WEXITSTATUS(status) );
551 /* GetModuleFileNameEx is used to find out where the the current process's
552 * binary is. That location is searched first. If that fails then we go up one
553 * directory and look for the executable inside a directory of the same name
554 * in case we are executing from the source tree.
556 char **makePathChecksWin( const char *progName )
559 char *imageFileName = new char[len];
560 HANDLE h = GetCurrentProcess();
561 len = GetModuleFileNameEx( h, NULL, imageFileName, len );
562 imageFileName[len] = 0;
564 char **result = new char*[3];
565 const char *lastSlash = strrchr( imageFileName, '\\' );
568 assert( lastSlash != 0 );
569 char *path = strdup( imageFileName );
570 int givenPathLen = (lastSlash - imageFileName) + 1;
571 path[givenPathLen] = 0;
573 int progNameLen = strlen(progName);
574 int length = givenPathLen + progNameLen + 1;
575 char *check = new char[length];
576 sprintf( check, "%s%s", path, progName );
577 result[numChecks++] = check;
579 length = givenPathLen + 3 + progNameLen + 1 + progNameLen + 1;
580 check = new char[length];
581 sprintf( check, "%s..\\%s\\%s", path, progName, progName );
582 result[numChecks++] = check;
584 result[numChecks] = 0;
588 void spawn( const char *progName, char **pathChecks,
589 ArgsVector &args, char *intermed )
592 while ( *pathChecks != 0 ) {
593 //cerr << "trying to execute " << *pathChecks << endl;
594 result = _spawnv( _P_WAIT, *pathChecks, args.data );
595 if ( result >= 0 || errno != ENOENT )
601 error() << "failed to spawn " << progName << endl;
602 cleanExit( intermed, 1 );
606 cleanExit( intermed, 1 );
611 void execFrontend( const char *argv0, char *inputFileName, char *intermed )
613 /* The frontend program name. */
614 const char *progName = "ragel";
616 frontendArgs.insert( 0, progName );
617 frontendArgs.insert( 1, "-x" );
618 frontendArgs.append( "-o" );
619 frontendArgs.append( intermed );
620 frontendArgs.append( inputFileName );
621 frontendArgs.append( 0 );
624 char **pathChecks = makePathChecksUnix( argv0, progName );
625 forkAndExec( progName, pathChecks, frontendArgs, intermed );
627 char **pathChecks = makePathChecksWin( progName );
628 spawn( progName, pathChecks, frontendArgs, intermed );
632 void execBackend( const char *argv0, char *intermed, char *outputFileName )
634 /* Locate the backend program */
635 const char *progName = 0;
637 progName = "rlgen-dot";
639 switch ( hostLang->lang ) {
642 progName = "rlgen-cd";
645 progName = "rlgen-java";
648 progName = "rlgen-ruby";
653 backendArgs.insert( 0, progName );
654 if ( outputFileName != 0 ) {
655 backendArgs.append( "-o" );
656 backendArgs.append( outputFileName );
658 backendArgs.append( intermed );
659 backendArgs.append( 0 );
662 char **pathChecks = makePathChecksUnix( argv0, progName );
663 forkAndExec( progName, pathChecks, backendArgs, intermed );
665 char **pathChecks = makePathChecksWin( progName );
666 spawn( progName, pathChecks, backendArgs, intermed );
670 /* Main, process args and call yyparse to start scanning input. */
671 int main(int argc, char **argv)
673 char *inputFileName = 0;
674 char *outputFileName = 0;
676 processArgs( argc, argv, inputFileName, outputFileName );
678 /* If -M or -S are given and we're not generating a dot file then invoke
679 * the frontend. These options are not useful with code generators. */
680 if ( machineName != 0 || machineSpec != 0 ) {
685 /* Require an input file. If we use standard in then we won't have a file
686 * name on which to base the output. */
687 if ( inputFileName == 0 )
688 error() << "no input file given" << endl;
690 /* Bail on argument processing errors. */
691 if ( gblErrorCount > 0 )
694 /* Make sure we are not writing to the same file as the input file. */
695 if ( inputFileName != 0 && outputFileName != 0 &&
696 strcmp( inputFileName, outputFileName ) == 0 )
698 error() << "output file \"" << outputFileName <<
699 "\" is the same as the input file" << endp;
703 return frontend( inputFileName, outputFileName );
705 char *intermed = openIntermed( inputFileName, outputFileName );
707 /* From here on in the cleanExit function should be used to exit. */
709 /* Run the frontend, then the backend processes. */
710 execFrontend( argv[0], inputFileName, intermed );
711 execBackend( argv[0], intermed, outputFileName );
713 /* Clean up the intermediate. */
714 cleanExit( intermed, 0 );