2 * Copyright 2001-2006 Adrian Thurston <thurston@complang.org>
3 * 2004 Erich Ocean <eric.ocean@ampede.com>
4 * 2005 Alan West <alan@alanz.com>
7 /* This file is part of Ragel.
9 * Ragel is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * Ragel is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Ragel; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include "cdcodegen.h"
34 using std::ostringstream;
47 /* Target language and output style. */
50 extern istream *inStream;
51 extern ostream *outStream;
52 extern output_filter *outFilter;
53 extern const char *outputFileName;
55 /* Graphviz dot file generation. */
56 extern bool graphvizDone;
58 extern int numSplitPartitions;
59 extern bool noLineDirectives;
61 void cdLineDirective( ostream &out, const char *fileName, int line )
63 if ( noLineDirectives )
66 /* Write the preprocessor line info for to the input file. */
67 out << "#line " << line << " \"";
68 for ( const char *pc = fileName; *pc != 0; pc++ ) {
76 if ( noLineDirectives )
82 void FsmCodeGen::genLineDirective( ostream &out )
84 std::streambuf *sbuf = out.rdbuf();
85 output_filter *filter = static_cast<output_filter*>(sbuf);
86 cdLineDirective( out, filter->fileName, filter->line + 1 );
90 /* Init code gen with in parameters. */
91 FsmCodeGen::FsmCodeGen( ostream &out )
97 unsigned int FsmCodeGen::arrayTypeSize( unsigned long maxVal )
99 long long maxValLL = (long long) maxVal;
100 HostType *arrayType = keyOps->typeSubsumes( maxValLL );
101 assert( arrayType != 0 );
102 return arrayType->size;
105 string FsmCodeGen::ARRAY_TYPE( unsigned long maxVal )
107 long long maxValLL = (long long) maxVal;
108 HostType *arrayType = keyOps->typeSubsumes( maxValLL );
109 assert( arrayType != 0 );
111 string ret = arrayType->data1;
112 if ( arrayType->data2 != 0 ) {
114 ret += arrayType->data2;
120 /* Write out the fsm name. */
121 string FsmCodeGen::FSM_NAME()
126 /* Emit the offset of the start state as a decimal integer. */
127 string FsmCodeGen::START_STATE_ID()
130 ret << redFsm->startState->id;
134 /* Write out the array of actions. */
135 std::ostream &FsmCodeGen::ACTIONS_ARRAY()
138 int totalActions = 1;
139 for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) {
140 /* Write out the length, which will never be the last character. */
141 out << act->key.length() << ", ";
142 /* Put in a line break every 8 */
143 if ( totalActions++ % 8 == 7 )
146 for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) {
147 out << item->value->actionId;
148 if ( ! (act.last() && item.last()) )
151 /* Put in a line break every 8 */
152 if ( totalActions++ % 8 == 7 )
161 string FsmCodeGen::ACCESS()
164 if ( accessExpr != 0 )
165 INLINE_LIST( ret, accessExpr, 0, false, false );
170 string FsmCodeGen::P()
177 INLINE_LIST( ret, pExpr, 0, false, false );
183 string FsmCodeGen::PE()
190 INLINE_LIST( ret, peExpr, 0, false, false );
196 string FsmCodeGen::EOFV()
203 INLINE_LIST( ret, eofExpr, 0, false, false );
209 string FsmCodeGen::CS()
213 ret << ACCESS() << "cs";
215 /* Emit the user supplied method of retrieving the key. */
217 INLINE_LIST( ret, csExpr, 0, false, false );
223 string FsmCodeGen::TOP()
227 ret << ACCESS() + "top";
230 INLINE_LIST( ret, topExpr, 0, false, false );
236 string FsmCodeGen::STACK()
239 if ( stackExpr == 0 )
240 ret << ACCESS() + "stack";
243 INLINE_LIST( ret, stackExpr, 0, false, false );
249 string FsmCodeGen::ACT()
253 ret << ACCESS() + "act";
256 INLINE_LIST( ret, actExpr, 0, false, false );
262 string FsmCodeGen::TOKSTART()
265 if ( tokstartExpr == 0 )
266 ret << ACCESS() + "ts";
269 INLINE_LIST( ret, tokstartExpr, 0, false, false );
275 string FsmCodeGen::TOKEND()
278 if ( tokendExpr == 0 )
279 ret << ACCESS() + "te";
282 INLINE_LIST( ret, tokendExpr, 0, false, false );
288 string FsmCodeGen::GET_WIDE_KEY()
290 if ( redFsm->anyConditions() )
296 string FsmCodeGen::GET_WIDE_KEY( RedStateAp *state )
298 if ( state->stateCondList.length() > 0 )
304 string FsmCodeGen::GET_KEY()
307 if ( getKeyExpr != 0 ) {
308 /* Emit the user supplied method of retrieving the key. */
310 INLINE_LIST( ret, getKeyExpr, 0, false, false );
314 /* Expression for retrieving the key, use simple dereference. */
315 ret << "(*" << P() << ")";
320 /* Write out level number of tabs. Makes the nested binary search nice
322 string FsmCodeGen::TABS( int level )
325 while ( level-- > 0 )
330 /* Write out a key from the fsm code gen. Depends on wether or not the key is
332 string FsmCodeGen::KEY( Key key )
335 if ( keyOps->isSigned || !hostLang->explicitUnsigned )
338 ret << (unsigned long) key.getVal() << 'u';
342 void FsmCodeGen::EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish )
344 /* The parser gives fexec two children. The double brackets are for D
345 * code. If the inline list is a single word it will get interpreted as a
346 * C-style cast by the D compiler. */
347 ret << "{" << P() << " = ((";
348 INLINE_LIST( ret, item->children, targState, inFinish, false );
352 void FsmCodeGen::LM_SWITCH( ostream &ret, GenInlineItem *item,
353 int targState, int inFinish, bool csForced )
356 " switch( " << ACT() << " ) {\n";
358 bool haveDefault = false;
359 for ( GenInlineList::Iter lma = *item->children; lma.lte(); lma++ ) {
360 /* Write the case label, the action and the case break. */
361 if ( lma->lmId < 0 ) {
362 ret << " default:\n";
366 ret << " case " << lma->lmId << ":\n";
368 /* Write the block and close it off. */
370 INLINE_LIST( ret, lma->children, targState, inFinish, csForced );
376 if ( hostLang->lang == HostLang::D && !haveDefault )
377 ret << " default: break;";
384 void FsmCodeGen::SET_ACT( ostream &ret, GenInlineItem *item )
386 ret << ACT() << " = " << item->lmId << ";";
389 void FsmCodeGen::SET_TOKEND( ostream &ret, GenInlineItem *item )
391 /* The tokend action sets tokend. */
392 ret << TOKEND() << " = " << P();
393 if ( item->offset != 0 )
394 out << "+" << item->offset;
398 void FsmCodeGen::GET_TOKEND( ostream &ret, GenInlineItem *item )
403 void FsmCodeGen::INIT_TOKSTART( ostream &ret, GenInlineItem *item )
405 ret << TOKSTART() << " = " << NULL_ITEM() << ";";
408 void FsmCodeGen::INIT_ACT( ostream &ret, GenInlineItem *item )
410 ret << ACT() << " = 0;";
413 void FsmCodeGen::SET_TOKSTART( ostream &ret, GenInlineItem *item )
415 ret << TOKSTART() << " = " << P() << ";";
418 void FsmCodeGen::SUB_ACTION( ostream &ret, GenInlineItem *item,
419 int targState, bool inFinish, bool csForced )
421 if ( item->children->length() > 0 ) {
422 /* Write the block and close it off. */
424 INLINE_LIST( ret, item->children, targState, inFinish, csForced );
430 /* Write out an inline tree structure. Walks the list and possibly calls out
431 * to virtual functions than handle language specific items in the tree. */
432 void FsmCodeGen::INLINE_LIST( ostream &ret, GenInlineList *inlineList,
433 int targState, bool inFinish, bool csForced )
435 for ( GenInlineList::Iter item = *inlineList; item.lte(); item++ ) {
436 switch ( item->type ) {
437 case GenInlineItem::Text:
440 case GenInlineItem::Goto:
441 GOTO( ret, item->targState->id, inFinish );
443 case GenInlineItem::Call:
444 CALL( ret, item->targState->id, targState, inFinish );
446 case GenInlineItem::Next:
447 NEXT( ret, item->targState->id, inFinish );
449 case GenInlineItem::Ret:
450 RET( ret, inFinish );
452 case GenInlineItem::PChar:
455 case GenInlineItem::Char:
458 case GenInlineItem::Hold:
461 case GenInlineItem::Exec:
462 EXEC( ret, item, targState, inFinish );
464 case GenInlineItem::Curs:
465 CURS( ret, inFinish );
467 case GenInlineItem::Targs:
468 TARGS( ret, inFinish, targState );
470 case GenInlineItem::Entry:
471 ret << item->targState->id;
473 case GenInlineItem::GotoExpr:
474 GOTO_EXPR( ret, item, inFinish );
476 case GenInlineItem::CallExpr:
477 CALL_EXPR( ret, item, targState, inFinish );
479 case GenInlineItem::NextExpr:
480 NEXT_EXPR( ret, item, inFinish );
482 case GenInlineItem::LmSwitch:
483 LM_SWITCH( ret, item, targState, inFinish, csForced );
485 case GenInlineItem::LmSetActId:
486 SET_ACT( ret, item );
488 case GenInlineItem::LmSetTokEnd:
489 SET_TOKEND( ret, item );
491 case GenInlineItem::LmGetTokEnd:
492 GET_TOKEND( ret, item );
494 case GenInlineItem::LmInitTokStart:
495 INIT_TOKSTART( ret, item );
497 case GenInlineItem::LmInitAct:
498 INIT_ACT( ret, item );
500 case GenInlineItem::LmSetTokStart:
501 SET_TOKSTART( ret, item );
503 case GenInlineItem::SubAction:
504 SUB_ACTION( ret, item, targState, inFinish, csForced );
506 case GenInlineItem::Break:
507 BREAK( ret, targState, csForced );
512 /* Write out paths in line directives. Escapes any special characters. */
513 string FsmCodeGen::LDIR_PATH( char *path )
516 for ( char *pc = path; *pc != 0; pc++ ) {
525 void FsmCodeGen::ACTION( ostream &ret, GenAction *action, int targState,
526 bool inFinish, bool csForced )
528 /* Write the preprocessor line info for going into the source file. */
529 cdLineDirective( ret, sourceFileName, action->loc.line );
531 /* Write the block and close it off. */
533 INLINE_LIST( ret, action->inlineList, targState, inFinish, csForced );
537 void FsmCodeGen::CONDITION( ostream &ret, GenAction *condition )
540 cdLineDirective( ret, sourceFileName, condition->loc.line );
541 INLINE_LIST( ret, condition->inlineList, 0, false, false );
544 string FsmCodeGen::ERROR_STATE()
547 if ( redFsm->errState != 0 )
548 ret << redFsm->errState->id;
554 string FsmCodeGen::FIRST_FINAL_STATE()
557 if ( redFsm->firstFinState != 0 )
558 ret << redFsm->firstFinState->id;
560 ret << redFsm->nextStateId;
564 void FsmCodeGen::writeInit()
569 out << "\t" << CS() << " = " << START() << ";\n";
571 /* If there are any calls, then the stack top needs initialization. */
572 if ( redFsm->anyActionCalls() || redFsm->anyActionRets() )
573 out << "\t" << TOP() << " = 0;\n";
575 if ( hasLongestMatch ) {
577 " " << TOKSTART() << " = " << NULL_ITEM() << ";\n"
578 " " << TOKEND() << " = " << NULL_ITEM() << ";\n"
579 " " << ACT() << " = 0;\n";
584 string FsmCodeGen::DATA_PREFIX()
587 return FSM_NAME() + "_";
591 /* Emit the alphabet data type. */
592 string FsmCodeGen::ALPH_TYPE()
594 string ret = keyOps->alphType->data1;
595 if ( keyOps->alphType->data2 != 0 ) {
597 ret += + keyOps->alphType->data2;
602 /* Emit the alphabet data type. */
603 string FsmCodeGen::WIDE_ALPH_TYPE()
606 if ( redFsm->maxKey <= keyOps->maxKey )
609 long long maxKeyVal = redFsm->maxKey.getLongLong();
610 HostType *wideType = keyOps->typeSubsumes( keyOps->isSigned, maxKeyVal );
611 assert( wideType != 0 );
613 ret = wideType->data1;
614 if ( wideType->data2 != 0 ) {
616 ret += wideType->data2;
622 void FsmCodeGen::STATE_IDS()
624 if ( redFsm->startState != 0 )
625 STATIC_VAR( "int", START() ) << " = " << START_STATE_ID() << ";\n";
628 STATIC_VAR( "int" , FIRST_FINAL() ) << " = " << FIRST_FINAL_STATE() << ";\n";
631 STATIC_VAR( "int", ERROR() ) << " = " << ERROR_STATE() << ";\n";
635 if ( entryPointNames.length() > 0 ) {
636 for ( EntryNameVect::Iter en = entryPointNames; en.lte(); en++ ) {
637 STATIC_VAR( "int", DATA_PREFIX() + "en_" + *en ) <<
638 " = " << entryPointIds[en.pos()] << ";\n";
644 void FsmCodeGen::writeStart()
646 out << START_STATE_ID();
649 void FsmCodeGen::writeFirstFinal()
651 out << FIRST_FINAL_STATE();
654 void FsmCodeGen::writeError()
656 out << ERROR_STATE();
660 * Language specific, but style independent code generators functions.
663 string CCodeGen::PTR_CONST()
668 std::ostream &CCodeGen::OPEN_ARRAY( string type, string name )
670 out << "static const " << type << " " << name << "[] = {\n";
674 std::ostream &CCodeGen::CLOSE_ARRAY()
676 return out << "};\n";
679 std::ostream &CCodeGen::STATIC_VAR( string type, string name )
681 out << "static const " << type << " " << name;
685 string CCodeGen::UINT( )
687 return "unsigned int";
690 string CCodeGen::ARR_OFF( string ptr, string offset )
692 return ptr + " + " + offset;
695 string CCodeGen::CAST( string type )
697 return "(" + type + ")";
700 string CCodeGen::NULL_ITEM()
705 string CCodeGen::POINTER()
710 std::ostream &CCodeGen::SWITCH_DEFAULT()
715 string CCodeGen::CTRL_FLOW()
720 void CCodeGen::writeExports()
722 if ( exportList.length() > 0 ) {
723 for ( ExportList::Iter ex = exportList; ex.lte(); ex++ ) {
724 out << "#define " << DATA_PREFIX() << "ex_" << ex->name << " " <<
725 KEY(ex->key) << "\n";
735 string DCodeGen::NULL_ITEM()
740 string DCodeGen::POINTER()
742 // multiple items seperated by commas can also be pointer types.
746 string DCodeGen::PTR_CONST()
751 std::ostream &DCodeGen::OPEN_ARRAY( string type, string name )
753 out << "static const " << type << "[] " << name << " = [\n";
757 std::ostream &DCodeGen::CLOSE_ARRAY()
759 return out << "];\n";
762 std::ostream &DCodeGen::STATIC_VAR( string type, string name )
764 out << "static const " << type << " " << name;
768 string DCodeGen::ARR_OFF( string ptr, string offset )
770 return "&" + ptr + "[" + offset + "]";
773 string DCodeGen::CAST( string type )
775 return "cast(" + type + ")";
778 string DCodeGen::UINT( )
783 std::ostream &DCodeGen::SWITCH_DEFAULT()
785 out << " default: break;\n";
789 string DCodeGen::CTRL_FLOW()
794 void DCodeGen::writeExports()
796 if ( exportList.length() > 0 ) {
797 for ( ExportList::Iter ex = exportList; ex.lte(); ex++ ) {
798 out << "static const " << ALPH_TYPE() << " " << DATA_PREFIX() <<
799 "ex_" << ex->name << " = " << KEY(ex->key) << ";\n";
806 * End D-specific code.
809 void FsmCodeGen::finishRagelDef()
811 if ( codeStyle == GenGoto || codeStyle == GenFGoto ||
812 codeStyle == GenIpGoto || codeStyle == GenSplit )
814 /* For directly executable machines there is no required state
815 * ordering. Choose a depth-first ordering to increase the
816 * potential for fall-throughs. */
817 redFsm->depthFirstOrdering();
820 /* The frontend will do this for us, but it may be a good idea to
821 * force it if the intermediate file is edited. */
822 redFsm->sortByStateId();
825 /* Choose default transitions and the single transition. */
826 redFsm->chooseDefaultSpan();
828 /* Maybe do flat expand, otherwise choose single. */
829 if ( codeStyle == GenFlat || codeStyle == GenFFlat )
832 redFsm->chooseSingle();
834 /* If any errors have occured in the input file then don't write anything. */
835 if ( gblErrorCount > 0 )
838 if ( codeStyle == GenSplit )
839 redFsm->partitionFsm( numSplitPartitions );
841 if ( codeStyle == GenIpGoto || codeStyle == GenSplit )
842 redFsm->setInTrans();
844 /* Anlayze Machine will find the final action reference counts, among
845 * other things. We will use these in reporting the usage
846 * of fsm directives in action code. */
849 /* Determine if we should use indicies. */
853 ostream &FsmCodeGen::source_warning( const InputLoc &loc )
855 cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": warning: ";
859 ostream &FsmCodeGen::source_error( const InputLoc &loc )
862 assert( sourceFileName != 0 );
863 cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": ";