2 * Copyright 2001-2006 Adrian Thurston <thurston@complang.org>
3 * 2004 Erich Ocean <eric.ocean@ampede.com>
4 * 2005 Alan West <alan@alanz.com>
7 /* This file is part of Ragel.
9 * Ragel is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * Ragel is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Ragel; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include "cdcodegen.h"
32 /* Code generators. */
43 using std::ostringstream;
56 /* Target language and output style. */
57 extern CodeStyleEnum codeStyle;
60 extern istream *inStream;
61 extern ostream *outStream;
62 extern output_filter *outFilter;
63 extern const char *outputFileName;
65 /* Graphviz dot file generation. */
66 extern bool graphvizDone;
68 extern int numSplitPartitions;
69 extern bool noLineDirectives;
71 /* Invoked by the parser when the root element is opened. */
72 ostream *cdOpenOutput( const char *inputFile )
74 if ( hostLang->lang != HostLang::C && hostLang->lang != HostLang::D ) {
75 error() << "this code generator is for C and D only" << endl;
79 /* If the output format is code and no output file name is given, then
81 if ( outputFileName == 0 ) {
82 const char *ext = findFileExtension( inputFile );
83 if ( ext != 0 && strcmp( ext, ".rh" ) == 0 )
84 outputFileName = fileNameFromStem( inputFile, ".h" );
86 const char *defExtension = 0;
87 switch ( hostLang->lang ) {
88 case HostLang::C: defExtension = ".c"; break;
89 case HostLang::D: defExtension = ".d"; break;
92 outputFileName = fileNameFromStem( inputFile, defExtension );
96 /* Make sure we are not writing to the same file as the input file. */
97 if ( outputFileName != 0 && strcmp( inputFile, outputFileName ) == 0 ) {
98 error() << "output file \"" << outputFileName <<
99 "\" is the same as the input file" << endl;
102 if ( outputFileName != 0 ) {
103 /* Create the filter on the output and open it. */
104 outFilter = new output_filter( outputFileName );
105 outFilter->open( outputFileName, ios::out|ios::trunc );
106 if ( !outFilter->is_open() ) {
107 error() << "error opening " << outputFileName << " for writing" << endl;
111 /* Open the output stream, attaching it to the filter. */
112 outStream = new ostream( outFilter );
115 /* Writing out ot std out. */
121 /* Invoked by the parser when a ragel definition is opened. */
122 CodeGenData *cdMakeCodeGen( const char *sourceFileName, const char *fsmName,
123 ostream &out, bool wantComplete )
125 CodeGenData *codeGen = 0;
126 switch ( hostLang->lang ) {
128 switch ( codeStyle ) {
130 codeGen = new CTabCodeGen(out);
133 codeGen = new CFTabCodeGen(out);
136 codeGen = new CFlatCodeGen(out);
139 codeGen = new CFFlatCodeGen(out);
142 codeGen = new CGotoCodeGen(out);
145 codeGen = new CFGotoCodeGen(out);
148 codeGen = new CIpGotoCodeGen(out);
151 codeGen = new CSplitCodeGen(out);
157 switch ( codeStyle ) {
159 codeGen = new DTabCodeGen(out);
162 codeGen = new DFTabCodeGen(out);
165 codeGen = new DFlatCodeGen(out);
168 codeGen = new DFFlatCodeGen(out);
171 codeGen = new DGotoCodeGen(out);
174 codeGen = new DFGotoCodeGen(out);
177 codeGen = new DIpGotoCodeGen(out);
180 codeGen = new DSplitCodeGen(out);
188 codeGen->sourceFileName = sourceFileName;
189 codeGen->fsmName = fsmName;
190 codeGen->wantComplete = wantComplete;
196 void cdLineDirective( ostream &out, const char *fileName, int line )
198 if ( noLineDirectives )
201 /* Write the preprocessor line info for to the input file. */
202 out << "#line " << line << " \"";
203 for ( const char *pc = fileName; *pc != 0; pc++ ) {
211 if ( noLineDirectives )
217 void FsmCodeGen::genLineDirective( ostream &out )
219 std::streambuf *sbuf = out.rdbuf();
220 output_filter *filter = static_cast<output_filter*>(sbuf);
221 cdLineDirective( out, filter->fileName, filter->line + 1 );
225 /* Init code gen with in parameters. */
226 FsmCodeGen::FsmCodeGen( ostream &out )
232 unsigned int FsmCodeGen::arrayTypeSize( unsigned long maxVal )
234 long long maxValLL = (long long) maxVal;
235 HostType *arrayType = keyOps->typeSubsumes( maxValLL );
236 assert( arrayType != 0 );
237 return arrayType->size;
240 string FsmCodeGen::ARRAY_TYPE( unsigned long maxVal )
242 long long maxValLL = (long long) maxVal;
243 HostType *arrayType = keyOps->typeSubsumes( maxValLL );
244 assert( arrayType != 0 );
246 string ret = arrayType->data1;
247 if ( arrayType->data2 != 0 ) {
249 ret += arrayType->data2;
255 /* Write out the fsm name. */
256 string FsmCodeGen::FSM_NAME()
261 /* Emit the offset of the start state as a decimal integer. */
262 string FsmCodeGen::START_STATE_ID()
265 ret << redFsm->startState->id;
269 /* Write out the array of actions. */
270 std::ostream &FsmCodeGen::ACTIONS_ARRAY()
273 int totalActions = 1;
274 for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) {
275 /* Write out the length, which will never be the last character. */
276 out << act->key.length() << ", ";
277 /* Put in a line break every 8 */
278 if ( totalActions++ % 8 == 7 )
281 for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) {
282 out << item->value->actionId;
283 if ( ! (act.last() && item.last()) )
286 /* Put in a line break every 8 */
287 if ( totalActions++ % 8 == 7 )
296 string FsmCodeGen::ACCESS()
299 if ( accessExpr != 0 )
300 INLINE_LIST( ret, accessExpr, 0, false, false );
305 string FsmCodeGen::P()
312 INLINE_LIST( ret, pExpr, 0, false, false );
318 string FsmCodeGen::PE()
325 INLINE_LIST( ret, peExpr, 0, false, false );
331 string FsmCodeGen::EOFV()
338 INLINE_LIST( ret, eofExpr, 0, false, false );
344 string FsmCodeGen::CS()
348 ret << ACCESS() << "cs";
350 /* Emit the user supplied method of retrieving the key. */
352 INLINE_LIST( ret, csExpr, 0, false, false );
358 string FsmCodeGen::TOP()
362 ret << ACCESS() + "top";
365 INLINE_LIST( ret, topExpr, 0, false, false );
371 string FsmCodeGen::STACK()
374 if ( stackExpr == 0 )
375 ret << ACCESS() + "stack";
378 INLINE_LIST( ret, stackExpr, 0, false, false );
384 string FsmCodeGen::ACT()
388 ret << ACCESS() + "act";
391 INLINE_LIST( ret, actExpr, 0, false, false );
397 string FsmCodeGen::TOKSTART()
400 if ( tokstartExpr == 0 )
401 ret << ACCESS() + "ts";
404 INLINE_LIST( ret, tokstartExpr, 0, false, false );
410 string FsmCodeGen::TOKEND()
413 if ( tokendExpr == 0 )
414 ret << ACCESS() + "te";
417 INLINE_LIST( ret, tokendExpr, 0, false, false );
423 string FsmCodeGen::GET_WIDE_KEY()
425 if ( redFsm->anyConditions() )
431 string FsmCodeGen::GET_WIDE_KEY( RedStateAp *state )
433 if ( state->stateCondList.length() > 0 )
439 string FsmCodeGen::GET_KEY()
442 if ( getKeyExpr != 0 ) {
443 /* Emit the user supplied method of retrieving the key. */
445 INLINE_LIST( ret, getKeyExpr, 0, false, false );
449 /* Expression for retrieving the key, use simple dereference. */
450 ret << "(*" << P() << ")";
455 /* Write out level number of tabs. Makes the nested binary search nice
457 string FsmCodeGen::TABS( int level )
460 while ( level-- > 0 )
465 /* Write out a key from the fsm code gen. Depends on wether or not the key is
467 string FsmCodeGen::KEY( Key key )
470 if ( keyOps->isSigned || !hostLang->explicitUnsigned )
473 ret << (unsigned long) key.getVal() << 'u';
477 void FsmCodeGen::EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish )
479 /* The parser gives fexec two children. The double brackets are for D
480 * code. If the inline list is a single word it will get interpreted as a
481 * C-style cast by the D compiler. */
482 ret << "{" << P() << " = ((";
483 INLINE_LIST( ret, item->children, targState, inFinish, false );
487 void FsmCodeGen::LM_SWITCH( ostream &ret, GenInlineItem *item,
488 int targState, int inFinish, bool csForced )
491 " switch( " << ACT() << " ) {\n";
493 bool haveDefault = false;
494 for ( GenInlineList::Iter lma = *item->children; lma.lte(); lma++ ) {
495 /* Write the case label, the action and the case break. */
496 if ( lma->lmId < 0 ) {
497 ret << " default:\n";
501 ret << " case " << lma->lmId << ":\n";
503 /* Write the block and close it off. */
505 INLINE_LIST( ret, lma->children, targState, inFinish, csForced );
511 if ( hostLang->lang == HostLang::D && !haveDefault )
512 ret << " default: break;";
519 void FsmCodeGen::SET_ACT( ostream &ret, GenInlineItem *item )
521 ret << ACT() << " = " << item->lmId << ";";
524 void FsmCodeGen::SET_TOKEND( ostream &ret, GenInlineItem *item )
526 /* The tokend action sets tokend. */
527 ret << TOKEND() << " = " << P();
528 if ( item->offset != 0 )
529 out << "+" << item->offset;
533 void FsmCodeGen::GET_TOKEND( ostream &ret, GenInlineItem *item )
538 void FsmCodeGen::INIT_TOKSTART( ostream &ret, GenInlineItem *item )
540 ret << TOKSTART() << " = " << NULL_ITEM() << ";";
543 void FsmCodeGen::INIT_ACT( ostream &ret, GenInlineItem *item )
545 ret << ACT() << " = 0;";
548 void FsmCodeGen::SET_TOKSTART( ostream &ret, GenInlineItem *item )
550 ret << TOKSTART() << " = " << P() << ";";
553 void FsmCodeGen::SUB_ACTION( ostream &ret, GenInlineItem *item,
554 int targState, bool inFinish, bool csForced )
556 if ( item->children->length() > 0 ) {
557 /* Write the block and close it off. */
559 INLINE_LIST( ret, item->children, targState, inFinish, csForced );
565 /* Write out an inline tree structure. Walks the list and possibly calls out
566 * to virtual functions than handle language specific items in the tree. */
567 void FsmCodeGen::INLINE_LIST( ostream &ret, GenInlineList *inlineList,
568 int targState, bool inFinish, bool csForced )
570 for ( GenInlineList::Iter item = *inlineList; item.lte(); item++ ) {
571 switch ( item->type ) {
572 case GenInlineItem::Text:
575 case GenInlineItem::Goto:
576 GOTO( ret, item->targState->id, inFinish );
578 case GenInlineItem::Call:
579 CALL( ret, item->targState->id, targState, inFinish );
581 case GenInlineItem::Next:
582 NEXT( ret, item->targState->id, inFinish );
584 case GenInlineItem::Ret:
585 RET( ret, inFinish );
587 case GenInlineItem::PChar:
590 case GenInlineItem::Char:
593 case GenInlineItem::Hold:
596 case GenInlineItem::Exec:
597 EXEC( ret, item, targState, inFinish );
599 case GenInlineItem::Curs:
600 CURS( ret, inFinish );
602 case GenInlineItem::Targs:
603 TARGS( ret, inFinish, targState );
605 case GenInlineItem::Entry:
606 ret << item->targState->id;
608 case GenInlineItem::GotoExpr:
609 GOTO_EXPR( ret, item, inFinish );
611 case GenInlineItem::CallExpr:
612 CALL_EXPR( ret, item, targState, inFinish );
614 case GenInlineItem::NextExpr:
615 NEXT_EXPR( ret, item, inFinish );
617 case GenInlineItem::LmSwitch:
618 LM_SWITCH( ret, item, targState, inFinish, csForced );
620 case GenInlineItem::LmSetActId:
621 SET_ACT( ret, item );
623 case GenInlineItem::LmSetTokEnd:
624 SET_TOKEND( ret, item );
626 case GenInlineItem::LmGetTokEnd:
627 GET_TOKEND( ret, item );
629 case GenInlineItem::LmInitTokStart:
630 INIT_TOKSTART( ret, item );
632 case GenInlineItem::LmInitAct:
633 INIT_ACT( ret, item );
635 case GenInlineItem::LmSetTokStart:
636 SET_TOKSTART( ret, item );
638 case GenInlineItem::SubAction:
639 SUB_ACTION( ret, item, targState, inFinish, csForced );
641 case GenInlineItem::Break:
642 BREAK( ret, targState, csForced );
647 /* Write out paths in line directives. Escapes any special characters. */
648 string FsmCodeGen::LDIR_PATH( char *path )
651 for ( char *pc = path; *pc != 0; pc++ ) {
660 void FsmCodeGen::ACTION( ostream &ret, GenAction *action, int targState,
661 bool inFinish, bool csForced )
663 /* Write the preprocessor line info for going into the source file. */
664 cdLineDirective( ret, sourceFileName, action->loc.line );
666 /* Write the block and close it off. */
668 INLINE_LIST( ret, action->inlineList, targState, inFinish, csForced );
672 void FsmCodeGen::CONDITION( ostream &ret, GenAction *condition )
675 cdLineDirective( ret, sourceFileName, condition->loc.line );
676 INLINE_LIST( ret, condition->inlineList, 0, false, false );
679 string FsmCodeGen::ERROR_STATE()
682 if ( redFsm->errState != 0 )
683 ret << redFsm->errState->id;
689 string FsmCodeGen::FIRST_FINAL_STATE()
692 if ( redFsm->firstFinState != 0 )
693 ret << redFsm->firstFinState->id;
695 ret << redFsm->nextStateId;
699 void FsmCodeGen::writeInit()
704 out << "\t" << CS() << " = " << START() << ";\n";
706 /* If there are any calls, then the stack top needs initialization. */
707 if ( redFsm->anyActionCalls() || redFsm->anyActionRets() )
708 out << "\t" << TOP() << " = 0;\n";
710 if ( hasLongestMatch ) {
712 " " << TOKSTART() << " = " << NULL_ITEM() << ";\n"
713 " " << TOKEND() << " = " << NULL_ITEM() << ";\n"
714 " " << ACT() << " = 0;\n";
719 string FsmCodeGen::DATA_PREFIX()
722 return FSM_NAME() + "_";
726 /* Emit the alphabet data type. */
727 string FsmCodeGen::ALPH_TYPE()
729 string ret = keyOps->alphType->data1;
730 if ( keyOps->alphType->data2 != 0 ) {
732 ret += + keyOps->alphType->data2;
737 /* Emit the alphabet data type. */
738 string FsmCodeGen::WIDE_ALPH_TYPE()
741 if ( redFsm->maxKey <= keyOps->maxKey )
744 long long maxKeyVal = redFsm->maxKey.getLongLong();
745 HostType *wideType = keyOps->typeSubsumes( keyOps->isSigned, maxKeyVal );
746 assert( wideType != 0 );
748 ret = wideType->data1;
749 if ( wideType->data2 != 0 ) {
751 ret += wideType->data2;
757 void FsmCodeGen::STATE_IDS()
759 if ( redFsm->startState != 0 )
760 STATIC_VAR( "int", START() ) << " = " << START_STATE_ID() << ";\n";
763 STATIC_VAR( "int" , FIRST_FINAL() ) << " = " << FIRST_FINAL_STATE() << ";\n";
766 STATIC_VAR( "int", ERROR() ) << " = " << ERROR_STATE() << ";\n";
770 if ( entryPointNames.length() > 0 ) {
771 for ( EntryNameVect::Iter en = entryPointNames; en.lte(); en++ ) {
772 STATIC_VAR( "int", DATA_PREFIX() + "en_" + *en ) <<
773 " = " << entryPointIds[en.pos()] << ";\n";
779 void FsmCodeGen::writeStart()
781 out << START_STATE_ID();
784 void FsmCodeGen::writeFirstFinal()
786 out << FIRST_FINAL_STATE();
789 void FsmCodeGen::writeError()
791 out << ERROR_STATE();
795 * Language specific, but style independent code generators functions.
798 string CCodeGen::PTR_CONST()
803 std::ostream &CCodeGen::OPEN_ARRAY( string type, string name )
805 out << "static const " << type << " " << name << "[] = {\n";
809 std::ostream &CCodeGen::CLOSE_ARRAY()
811 return out << "};\n";
814 std::ostream &CCodeGen::STATIC_VAR( string type, string name )
816 out << "static const " << type << " " << name;
820 string CCodeGen::UINT( )
822 return "unsigned int";
825 string CCodeGen::ARR_OFF( string ptr, string offset )
827 return ptr + " + " + offset;
830 string CCodeGen::CAST( string type )
832 return "(" + type + ")";
835 string CCodeGen::NULL_ITEM()
840 string CCodeGen::POINTER()
845 std::ostream &CCodeGen::SWITCH_DEFAULT()
850 string CCodeGen::CTRL_FLOW()
855 void CCodeGen::writeExports()
857 if ( exportList.length() > 0 ) {
858 for ( ExportList::Iter ex = exportList; ex.lte(); ex++ ) {
859 out << "#define " << DATA_PREFIX() << "ex_" << ex->name << " " <<
860 KEY(ex->key) << "\n";
870 string DCodeGen::NULL_ITEM()
875 string DCodeGen::POINTER()
877 // multiple items seperated by commas can also be pointer types.
881 string DCodeGen::PTR_CONST()
886 std::ostream &DCodeGen::OPEN_ARRAY( string type, string name )
888 out << "static const " << type << "[] " << name << " = [\n";
892 std::ostream &DCodeGen::CLOSE_ARRAY()
894 return out << "];\n";
897 std::ostream &DCodeGen::STATIC_VAR( string type, string name )
899 out << "static const " << type << " " << name;
903 string DCodeGen::ARR_OFF( string ptr, string offset )
905 return "&" + ptr + "[" + offset + "]";
908 string DCodeGen::CAST( string type )
910 return "cast(" + type + ")";
913 string DCodeGen::UINT( )
918 std::ostream &DCodeGen::SWITCH_DEFAULT()
920 out << " default: break;\n";
924 string DCodeGen::CTRL_FLOW()
929 void DCodeGen::writeExports()
931 if ( exportList.length() > 0 ) {
932 for ( ExportList::Iter ex = exportList; ex.lte(); ex++ ) {
933 out << "static const " << ALPH_TYPE() << " " << DATA_PREFIX() <<
934 "ex_" << ex->name << " = " << KEY(ex->key) << ";\n";
941 * End D-specific code.
944 void FsmCodeGen::finishRagelDef()
946 if ( codeStyle == GenGoto || codeStyle == GenFGoto ||
947 codeStyle == GenIpGoto || codeStyle == GenSplit )
949 /* For directly executable machines there is no required state
950 * ordering. Choose a depth-first ordering to increase the
951 * potential for fall-throughs. */
952 redFsm->depthFirstOrdering();
955 /* The frontend will do this for us, but it may be a good idea to
956 * force it if the intermediate file is edited. */
957 redFsm->sortByStateId();
960 /* Choose default transitions and the single transition. */
961 redFsm->chooseDefaultSpan();
963 /* Maybe do flat expand, otherwise choose single. */
964 if ( codeStyle == GenFlat || codeStyle == GenFFlat )
967 redFsm->chooseSingle();
969 /* If any errors have occured in the input file then don't write anything. */
970 if ( gblErrorCount > 0 )
973 if ( codeStyle == GenSplit )
974 redFsm->partitionFsm( numSplitPartitions );
976 if ( codeStyle == GenIpGoto || codeStyle == GenSplit )
977 redFsm->setInTrans();
979 /* Anlayze Machine will find the final action reference counts, among
980 * other things. We will use these in reporting the usage
981 * of fsm directives in action code. */
984 /* Determine if we should use indicies. */
988 ostream &FsmCodeGen::source_warning( const InputLoc &loc )
990 cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": warning: ";
994 ostream &FsmCodeGen::source_error( const InputLoc &loc )
997 assert( sourceFileName != 0 );
998 cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": ";