2 * Copyright 2001-2006 Adrian Thurston <thurston@complang.org>
3 * 2004 Erich Ocean <eric.ocean@ampede.com>
4 * 2005 Alan West <alan@alanz.com>
7 /* This file is part of Ragel.
9 * Ragel is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * Ragel is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Ragel; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include "cdcodegen.h"
32 /* Code generators. */
43 using std::ostringstream;
56 /* Target language and output style. */
57 extern CodeStyleEnum codeStyle;
60 extern istream *inStream;
61 extern ostream *outStream;
62 extern output_filter *outFilter;
63 extern const char *outputFileName;
65 /* Graphviz dot file generation. */
66 extern bool graphvizDone;
68 extern int numSplitPartitions;
69 extern bool noLineDirectives;
71 /* Invoked by the parser when a ragel definition is opened. */
72 CodeGenData *cdMakeCodeGen( const char *sourceFileName, const char *fsmName,
73 ostream &out, bool wantComplete )
75 CodeGenData *codeGen = 0;
76 switch ( hostLang->lang ) {
78 switch ( codeStyle ) {
80 codeGen = new CTabCodeGen(out);
83 codeGen = new CFTabCodeGen(out);
86 codeGen = new CFlatCodeGen(out);
89 codeGen = new CFFlatCodeGen(out);
92 codeGen = new CGotoCodeGen(out);
95 codeGen = new CFGotoCodeGen(out);
98 codeGen = new CIpGotoCodeGen(out);
101 codeGen = new CSplitCodeGen(out);
107 switch ( codeStyle ) {
109 codeGen = new DTabCodeGen(out);
112 codeGen = new DFTabCodeGen(out);
115 codeGen = new DFlatCodeGen(out);
118 codeGen = new DFFlatCodeGen(out);
121 codeGen = new DGotoCodeGen(out);
124 codeGen = new DFGotoCodeGen(out);
127 codeGen = new DIpGotoCodeGen(out);
130 codeGen = new DSplitCodeGen(out);
138 codeGen->sourceFileName = sourceFileName;
139 codeGen->fsmName = fsmName;
140 codeGen->wantComplete = wantComplete;
146 void cdLineDirective( ostream &out, const char *fileName, int line )
148 if ( noLineDirectives )
151 /* Write the preprocessor line info for to the input file. */
152 out << "#line " << line << " \"";
153 for ( const char *pc = fileName; *pc != 0; pc++ ) {
161 if ( noLineDirectives )
167 void FsmCodeGen::genLineDirective( ostream &out )
169 std::streambuf *sbuf = out.rdbuf();
170 output_filter *filter = static_cast<output_filter*>(sbuf);
171 cdLineDirective( out, filter->fileName, filter->line + 1 );
175 /* Init code gen with in parameters. */
176 FsmCodeGen::FsmCodeGen( ostream &out )
182 unsigned int FsmCodeGen::arrayTypeSize( unsigned long maxVal )
184 long long maxValLL = (long long) maxVal;
185 HostType *arrayType = keyOps->typeSubsumes( maxValLL );
186 assert( arrayType != 0 );
187 return arrayType->size;
190 string FsmCodeGen::ARRAY_TYPE( unsigned long maxVal )
192 long long maxValLL = (long long) maxVal;
193 HostType *arrayType = keyOps->typeSubsumes( maxValLL );
194 assert( arrayType != 0 );
196 string ret = arrayType->data1;
197 if ( arrayType->data2 != 0 ) {
199 ret += arrayType->data2;
205 /* Write out the fsm name. */
206 string FsmCodeGen::FSM_NAME()
211 /* Emit the offset of the start state as a decimal integer. */
212 string FsmCodeGen::START_STATE_ID()
215 ret << redFsm->startState->id;
219 /* Write out the array of actions. */
220 std::ostream &FsmCodeGen::ACTIONS_ARRAY()
223 int totalActions = 1;
224 for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) {
225 /* Write out the length, which will never be the last character. */
226 out << act->key.length() << ", ";
227 /* Put in a line break every 8 */
228 if ( totalActions++ % 8 == 7 )
231 for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) {
232 out << item->value->actionId;
233 if ( ! (act.last() && item.last()) )
236 /* Put in a line break every 8 */
237 if ( totalActions++ % 8 == 7 )
246 string FsmCodeGen::ACCESS()
249 if ( accessExpr != 0 )
250 INLINE_LIST( ret, accessExpr, 0, false, false );
255 string FsmCodeGen::P()
262 INLINE_LIST( ret, pExpr, 0, false, false );
268 string FsmCodeGen::PE()
275 INLINE_LIST( ret, peExpr, 0, false, false );
281 string FsmCodeGen::EOFV()
288 INLINE_LIST( ret, eofExpr, 0, false, false );
294 string FsmCodeGen::CS()
298 ret << ACCESS() << "cs";
300 /* Emit the user supplied method of retrieving the key. */
302 INLINE_LIST( ret, csExpr, 0, false, false );
308 string FsmCodeGen::TOP()
312 ret << ACCESS() + "top";
315 INLINE_LIST( ret, topExpr, 0, false, false );
321 string FsmCodeGen::STACK()
324 if ( stackExpr == 0 )
325 ret << ACCESS() + "stack";
328 INLINE_LIST( ret, stackExpr, 0, false, false );
334 string FsmCodeGen::ACT()
338 ret << ACCESS() + "act";
341 INLINE_LIST( ret, actExpr, 0, false, false );
347 string FsmCodeGen::TOKSTART()
350 if ( tokstartExpr == 0 )
351 ret << ACCESS() + "ts";
354 INLINE_LIST( ret, tokstartExpr, 0, false, false );
360 string FsmCodeGen::TOKEND()
363 if ( tokendExpr == 0 )
364 ret << ACCESS() + "te";
367 INLINE_LIST( ret, tokendExpr, 0, false, false );
373 string FsmCodeGen::GET_WIDE_KEY()
375 if ( redFsm->anyConditions() )
381 string FsmCodeGen::GET_WIDE_KEY( RedStateAp *state )
383 if ( state->stateCondList.length() > 0 )
389 string FsmCodeGen::GET_KEY()
392 if ( getKeyExpr != 0 ) {
393 /* Emit the user supplied method of retrieving the key. */
395 INLINE_LIST( ret, getKeyExpr, 0, false, false );
399 /* Expression for retrieving the key, use simple dereference. */
400 ret << "(*" << P() << ")";
405 /* Write out level number of tabs. Makes the nested binary search nice
407 string FsmCodeGen::TABS( int level )
410 while ( level-- > 0 )
415 /* Write out a key from the fsm code gen. Depends on wether or not the key is
417 string FsmCodeGen::KEY( Key key )
420 if ( keyOps->isSigned || !hostLang->explicitUnsigned )
423 ret << (unsigned long) key.getVal() << 'u';
427 void FsmCodeGen::EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish )
429 /* The parser gives fexec two children. The double brackets are for D
430 * code. If the inline list is a single word it will get interpreted as a
431 * C-style cast by the D compiler. */
432 ret << "{" << P() << " = ((";
433 INLINE_LIST( ret, item->children, targState, inFinish, false );
437 void FsmCodeGen::LM_SWITCH( ostream &ret, GenInlineItem *item,
438 int targState, int inFinish, bool csForced )
441 " switch( " << ACT() << " ) {\n";
443 bool haveDefault = false;
444 for ( GenInlineList::Iter lma = *item->children; lma.lte(); lma++ ) {
445 /* Write the case label, the action and the case break. */
446 if ( lma->lmId < 0 ) {
447 ret << " default:\n";
451 ret << " case " << lma->lmId << ":\n";
453 /* Write the block and close it off. */
455 INLINE_LIST( ret, lma->children, targState, inFinish, csForced );
461 if ( hostLang->lang == HostLang::D && !haveDefault )
462 ret << " default: break;";
469 void FsmCodeGen::SET_ACT( ostream &ret, GenInlineItem *item )
471 ret << ACT() << " = " << item->lmId << ";";
474 void FsmCodeGen::SET_TOKEND( ostream &ret, GenInlineItem *item )
476 /* The tokend action sets tokend. */
477 ret << TOKEND() << " = " << P();
478 if ( item->offset != 0 )
479 out << "+" << item->offset;
483 void FsmCodeGen::GET_TOKEND( ostream &ret, GenInlineItem *item )
488 void FsmCodeGen::INIT_TOKSTART( ostream &ret, GenInlineItem *item )
490 ret << TOKSTART() << " = " << NULL_ITEM() << ";";
493 void FsmCodeGen::INIT_ACT( ostream &ret, GenInlineItem *item )
495 ret << ACT() << " = 0;";
498 void FsmCodeGen::SET_TOKSTART( ostream &ret, GenInlineItem *item )
500 ret << TOKSTART() << " = " << P() << ";";
503 void FsmCodeGen::SUB_ACTION( ostream &ret, GenInlineItem *item,
504 int targState, bool inFinish, bool csForced )
506 if ( item->children->length() > 0 ) {
507 /* Write the block and close it off. */
509 INLINE_LIST( ret, item->children, targState, inFinish, csForced );
515 /* Write out an inline tree structure. Walks the list and possibly calls out
516 * to virtual functions than handle language specific items in the tree. */
517 void FsmCodeGen::INLINE_LIST( ostream &ret, GenInlineList *inlineList,
518 int targState, bool inFinish, bool csForced )
520 for ( GenInlineList::Iter item = *inlineList; item.lte(); item++ ) {
521 switch ( item->type ) {
522 case GenInlineItem::Text:
525 case GenInlineItem::Goto:
526 GOTO( ret, item->targState->id, inFinish );
528 case GenInlineItem::Call:
529 CALL( ret, item->targState->id, targState, inFinish );
531 case GenInlineItem::Next:
532 NEXT( ret, item->targState->id, inFinish );
534 case GenInlineItem::Ret:
535 RET( ret, inFinish );
537 case GenInlineItem::PChar:
540 case GenInlineItem::Char:
543 case GenInlineItem::Hold:
546 case GenInlineItem::Exec:
547 EXEC( ret, item, targState, inFinish );
549 case GenInlineItem::Curs:
550 CURS( ret, inFinish );
552 case GenInlineItem::Targs:
553 TARGS( ret, inFinish, targState );
555 case GenInlineItem::Entry:
556 ret << item->targState->id;
558 case GenInlineItem::GotoExpr:
559 GOTO_EXPR( ret, item, inFinish );
561 case GenInlineItem::CallExpr:
562 CALL_EXPR( ret, item, targState, inFinish );
564 case GenInlineItem::NextExpr:
565 NEXT_EXPR( ret, item, inFinish );
567 case GenInlineItem::LmSwitch:
568 LM_SWITCH( ret, item, targState, inFinish, csForced );
570 case GenInlineItem::LmSetActId:
571 SET_ACT( ret, item );
573 case GenInlineItem::LmSetTokEnd:
574 SET_TOKEND( ret, item );
576 case GenInlineItem::LmGetTokEnd:
577 GET_TOKEND( ret, item );
579 case GenInlineItem::LmInitTokStart:
580 INIT_TOKSTART( ret, item );
582 case GenInlineItem::LmInitAct:
583 INIT_ACT( ret, item );
585 case GenInlineItem::LmSetTokStart:
586 SET_TOKSTART( ret, item );
588 case GenInlineItem::SubAction:
589 SUB_ACTION( ret, item, targState, inFinish, csForced );
591 case GenInlineItem::Break:
592 BREAK( ret, targState, csForced );
597 /* Write out paths in line directives. Escapes any special characters. */
598 string FsmCodeGen::LDIR_PATH( char *path )
601 for ( char *pc = path; *pc != 0; pc++ ) {
610 void FsmCodeGen::ACTION( ostream &ret, GenAction *action, int targState,
611 bool inFinish, bool csForced )
613 /* Write the preprocessor line info for going into the source file. */
614 cdLineDirective( ret, sourceFileName, action->loc.line );
616 /* Write the block and close it off. */
618 INLINE_LIST( ret, action->inlineList, targState, inFinish, csForced );
622 void FsmCodeGen::CONDITION( ostream &ret, GenAction *condition )
625 cdLineDirective( ret, sourceFileName, condition->loc.line );
626 INLINE_LIST( ret, condition->inlineList, 0, false, false );
629 string FsmCodeGen::ERROR_STATE()
632 if ( redFsm->errState != 0 )
633 ret << redFsm->errState->id;
639 string FsmCodeGen::FIRST_FINAL_STATE()
642 if ( redFsm->firstFinState != 0 )
643 ret << redFsm->firstFinState->id;
645 ret << redFsm->nextStateId;
649 void FsmCodeGen::writeInit()
654 out << "\t" << CS() << " = " << START() << ";\n";
656 /* If there are any calls, then the stack top needs initialization. */
657 if ( redFsm->anyActionCalls() || redFsm->anyActionRets() )
658 out << "\t" << TOP() << " = 0;\n";
660 if ( hasLongestMatch ) {
662 " " << TOKSTART() << " = " << NULL_ITEM() << ";\n"
663 " " << TOKEND() << " = " << NULL_ITEM() << ";\n"
664 " " << ACT() << " = 0;\n";
669 string FsmCodeGen::DATA_PREFIX()
672 return FSM_NAME() + "_";
676 /* Emit the alphabet data type. */
677 string FsmCodeGen::ALPH_TYPE()
679 string ret = keyOps->alphType->data1;
680 if ( keyOps->alphType->data2 != 0 ) {
682 ret += + keyOps->alphType->data2;
687 /* Emit the alphabet data type. */
688 string FsmCodeGen::WIDE_ALPH_TYPE()
691 if ( redFsm->maxKey <= keyOps->maxKey )
694 long long maxKeyVal = redFsm->maxKey.getLongLong();
695 HostType *wideType = keyOps->typeSubsumes( keyOps->isSigned, maxKeyVal );
696 assert( wideType != 0 );
698 ret = wideType->data1;
699 if ( wideType->data2 != 0 ) {
701 ret += wideType->data2;
707 void FsmCodeGen::STATE_IDS()
709 if ( redFsm->startState != 0 )
710 STATIC_VAR( "int", START() ) << " = " << START_STATE_ID() << ";\n";
713 STATIC_VAR( "int" , FIRST_FINAL() ) << " = " << FIRST_FINAL_STATE() << ";\n";
716 STATIC_VAR( "int", ERROR() ) << " = " << ERROR_STATE() << ";\n";
720 if ( entryPointNames.length() > 0 ) {
721 for ( EntryNameVect::Iter en = entryPointNames; en.lte(); en++ ) {
722 STATIC_VAR( "int", DATA_PREFIX() + "en_" + *en ) <<
723 " = " << entryPointIds[en.pos()] << ";\n";
729 void FsmCodeGen::writeStart()
731 out << START_STATE_ID();
734 void FsmCodeGen::writeFirstFinal()
736 out << FIRST_FINAL_STATE();
739 void FsmCodeGen::writeError()
741 out << ERROR_STATE();
745 * Language specific, but style independent code generators functions.
748 string CCodeGen::PTR_CONST()
753 std::ostream &CCodeGen::OPEN_ARRAY( string type, string name )
755 out << "static const " << type << " " << name << "[] = {\n";
759 std::ostream &CCodeGen::CLOSE_ARRAY()
761 return out << "};\n";
764 std::ostream &CCodeGen::STATIC_VAR( string type, string name )
766 out << "static const " << type << " " << name;
770 string CCodeGen::UINT( )
772 return "unsigned int";
775 string CCodeGen::ARR_OFF( string ptr, string offset )
777 return ptr + " + " + offset;
780 string CCodeGen::CAST( string type )
782 return "(" + type + ")";
785 string CCodeGen::NULL_ITEM()
790 string CCodeGen::POINTER()
795 std::ostream &CCodeGen::SWITCH_DEFAULT()
800 string CCodeGen::CTRL_FLOW()
805 void CCodeGen::writeExports()
807 if ( exportList.length() > 0 ) {
808 for ( ExportList::Iter ex = exportList; ex.lte(); ex++ ) {
809 out << "#define " << DATA_PREFIX() << "ex_" << ex->name << " " <<
810 KEY(ex->key) << "\n";
820 string DCodeGen::NULL_ITEM()
825 string DCodeGen::POINTER()
827 // multiple items seperated by commas can also be pointer types.
831 string DCodeGen::PTR_CONST()
836 std::ostream &DCodeGen::OPEN_ARRAY( string type, string name )
838 out << "static const " << type << "[] " << name << " = [\n";
842 std::ostream &DCodeGen::CLOSE_ARRAY()
844 return out << "];\n";
847 std::ostream &DCodeGen::STATIC_VAR( string type, string name )
849 out << "static const " << type << " " << name;
853 string DCodeGen::ARR_OFF( string ptr, string offset )
855 return "&" + ptr + "[" + offset + "]";
858 string DCodeGen::CAST( string type )
860 return "cast(" + type + ")";
863 string DCodeGen::UINT( )
868 std::ostream &DCodeGen::SWITCH_DEFAULT()
870 out << " default: break;\n";
874 string DCodeGen::CTRL_FLOW()
879 void DCodeGen::writeExports()
881 if ( exportList.length() > 0 ) {
882 for ( ExportList::Iter ex = exportList; ex.lte(); ex++ ) {
883 out << "static const " << ALPH_TYPE() << " " << DATA_PREFIX() <<
884 "ex_" << ex->name << " = " << KEY(ex->key) << ";\n";
891 * End D-specific code.
894 void FsmCodeGen::finishRagelDef()
896 if ( codeStyle == GenGoto || codeStyle == GenFGoto ||
897 codeStyle == GenIpGoto || codeStyle == GenSplit )
899 /* For directly executable machines there is no required state
900 * ordering. Choose a depth-first ordering to increase the
901 * potential for fall-throughs. */
902 redFsm->depthFirstOrdering();
905 /* The frontend will do this for us, but it may be a good idea to
906 * force it if the intermediate file is edited. */
907 redFsm->sortByStateId();
910 /* Choose default transitions and the single transition. */
911 redFsm->chooseDefaultSpan();
913 /* Maybe do flat expand, otherwise choose single. */
914 if ( codeStyle == GenFlat || codeStyle == GenFFlat )
917 redFsm->chooseSingle();
919 /* If any errors have occured in the input file then don't write anything. */
920 if ( gblErrorCount > 0 )
923 if ( codeStyle == GenSplit )
924 redFsm->partitionFsm( numSplitPartitions );
926 if ( codeStyle == GenIpGoto || codeStyle == GenSplit )
927 redFsm->setInTrans();
929 /* Anlayze Machine will find the final action reference counts, among
930 * other things. We will use these in reporting the usage
931 * of fsm directives in action code. */
934 /* Determine if we should use indicies. */
938 ostream &FsmCodeGen::source_warning( const InputLoc &loc )
940 cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": warning: ";
944 ostream &FsmCodeGen::source_error( const InputLoc &loc )
947 assert( sourceFileName != 0 );
948 cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": ";