2 * Copyright 2001-2006 Adrian Thurston <thurston@complang.org>
3 * 2004 Erich Ocean <eric.ocean@ampede.com>
4 * 2005 Alan West <alan@alanz.com>
7 /* This file is part of Ragel.
9 * Ragel is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * Ragel is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Ragel; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include "cdcodegen.h"
34 using std::ostringstream;
48 extern int numSplitPartitions;
49 extern bool noLineDirectives;
51 void cdLineDirective( ostream &out, const char *fileName, int line )
53 if ( noLineDirectives )
56 /* Write the preprocessor line info for to the input file. */
57 out << "#line " << line << " \"";
58 for ( const char *pc = fileName; *pc != 0; pc++ ) {
66 if ( noLineDirectives )
72 void FsmCodeGen::genLineDirective( ostream &out )
74 std::streambuf *sbuf = out.rdbuf();
75 output_filter *filter = static_cast<output_filter*>(sbuf);
76 cdLineDirective( out, filter->fileName, filter->line + 1 );
80 /* Init code gen with in parameters. */
81 FsmCodeGen::FsmCodeGen( ostream &out )
87 unsigned int FsmCodeGen::arrayTypeSize( unsigned long maxVal )
89 long long maxValLL = (long long) maxVal;
90 HostType *arrayType = keyOps->typeSubsumes( maxValLL );
91 assert( arrayType != 0 );
92 return arrayType->size;
95 string FsmCodeGen::ARRAY_TYPE( unsigned long maxVal )
97 long long maxValLL = (long long) maxVal;
98 HostType *arrayType = keyOps->typeSubsumes( maxValLL );
99 assert( arrayType != 0 );
101 string ret = arrayType->data1;
102 if ( arrayType->data2 != 0 ) {
104 ret += arrayType->data2;
110 /* Write out the fsm name. */
111 string FsmCodeGen::FSM_NAME()
116 /* Emit the offset of the start state as a decimal integer. */
117 string FsmCodeGen::START_STATE_ID()
120 ret << redFsm->startState->id;
124 /* Write out the array of actions. */
125 std::ostream &FsmCodeGen::ACTIONS_ARRAY()
128 int totalActions = 1;
129 for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) {
130 /* Write out the length, which will never be the last character. */
131 out << act->key.length() << ", ";
132 /* Put in a line break every 8 */
133 if ( totalActions++ % 8 == 7 )
136 for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) {
137 out << item->value->actionId;
138 if ( ! (act.last() && item.last()) )
141 /* Put in a line break every 8 */
142 if ( totalActions++ % 8 == 7 )
151 string FsmCodeGen::ACCESS()
154 if ( accessExpr != 0 )
155 INLINE_LIST( ret, accessExpr, 0, false, false );
160 string FsmCodeGen::P()
167 INLINE_LIST( ret, pExpr, 0, false, false );
173 string FsmCodeGen::PE()
180 INLINE_LIST( ret, peExpr, 0, false, false );
186 string FsmCodeGen::vEOF()
193 INLINE_LIST( ret, eofExpr, 0, false, false );
199 string FsmCodeGen::vCS()
203 ret << ACCESS() << "cs";
205 /* Emit the user supplied method of retrieving the key. */
207 INLINE_LIST( ret, csExpr, 0, false, false );
213 string FsmCodeGen::TOP()
217 ret << ACCESS() + "top";
220 INLINE_LIST( ret, topExpr, 0, false, false );
226 string FsmCodeGen::STACK()
229 if ( stackExpr == 0 )
230 ret << ACCESS() + "stack";
233 INLINE_LIST( ret, stackExpr, 0, false, false );
239 string FsmCodeGen::ACT()
243 ret << ACCESS() + "act";
246 INLINE_LIST( ret, actExpr, 0, false, false );
252 string FsmCodeGen::TOKSTART()
255 if ( tokstartExpr == 0 )
256 ret << ACCESS() + "ts";
259 INLINE_LIST( ret, tokstartExpr, 0, false, false );
265 string FsmCodeGen::TOKEND()
268 if ( tokendExpr == 0 )
269 ret << ACCESS() + "te";
272 INLINE_LIST( ret, tokendExpr, 0, false, false );
278 string FsmCodeGen::GET_WIDE_KEY()
280 if ( redFsm->anyConditions() )
286 string FsmCodeGen::GET_WIDE_KEY( RedStateAp *state )
288 if ( state->stateCondList.length() > 0 )
294 string FsmCodeGen::GET_KEY()
297 if ( getKeyExpr != 0 ) {
298 /* Emit the user supplied method of retrieving the key. */
300 INLINE_LIST( ret, getKeyExpr, 0, false, false );
304 /* Expression for retrieving the key, use simple dereference. */
305 ret << "(*" << P() << ")";
310 /* Write out level number of tabs. Makes the nested binary search nice
312 string FsmCodeGen::TABS( int level )
315 while ( level-- > 0 )
320 /* Write out a key from the fsm code gen. Depends on wether or not the key is
322 string FsmCodeGen::KEY( Key key )
325 if ( keyOps->isSigned || !hostLang->explicitUnsigned )
328 ret << (unsigned long) key.getVal() << 'u';
332 void FsmCodeGen::EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish )
334 /* The parser gives fexec two children. The double brackets are for D
335 * code. If the inline list is a single word it will get interpreted as a
336 * C-style cast by the D compiler. */
337 ret << "{" << P() << " = ((";
338 INLINE_LIST( ret, item->children, targState, inFinish, false );
342 void FsmCodeGen::LM_SWITCH( ostream &ret, GenInlineItem *item,
343 int targState, int inFinish, bool csForced )
346 " switch( " << ACT() << " ) {\n";
348 bool haveDefault = false;
349 for ( GenInlineList::Iter lma = *item->children; lma.lte(); lma++ ) {
350 /* Write the case label, the action and the case break. */
351 if ( lma->lmId < 0 ) {
352 ret << " default:\n";
356 ret << " case " << lma->lmId << ":\n";
358 /* Write the block and close it off. */
360 INLINE_LIST( ret, lma->children, targState, inFinish, csForced );
366 if ( hostLang->lang == HostLang::D && !haveDefault )
367 ret << " default: break;";
374 void FsmCodeGen::SET_ACT( ostream &ret, GenInlineItem *item )
376 ret << ACT() << " = " << item->lmId << ";";
379 void FsmCodeGen::SET_TOKEND( ostream &ret, GenInlineItem *item )
381 /* The tokend action sets tokend. */
382 ret << TOKEND() << " = " << P();
383 if ( item->offset != 0 )
384 out << "+" << item->offset;
388 void FsmCodeGen::GET_TOKEND( ostream &ret, GenInlineItem *item )
393 void FsmCodeGen::INIT_TOKSTART( ostream &ret, GenInlineItem *item )
395 ret << TOKSTART() << " = " << NULL_ITEM() << ";";
398 void FsmCodeGen::INIT_ACT( ostream &ret, GenInlineItem *item )
400 ret << ACT() << " = 0;";
403 void FsmCodeGen::SET_TOKSTART( ostream &ret, GenInlineItem *item )
405 ret << TOKSTART() << " = " << P() << ";";
408 void FsmCodeGen::SUB_ACTION( ostream &ret, GenInlineItem *item,
409 int targState, bool inFinish, bool csForced )
411 if ( item->children->length() > 0 ) {
412 /* Write the block and close it off. */
414 INLINE_LIST( ret, item->children, targState, inFinish, csForced );
420 /* Write out an inline tree structure. Walks the list and possibly calls out
421 * to virtual functions than handle language specific items in the tree. */
422 void FsmCodeGen::INLINE_LIST( ostream &ret, GenInlineList *inlineList,
423 int targState, bool inFinish, bool csForced )
425 for ( GenInlineList::Iter item = *inlineList; item.lte(); item++ ) {
426 switch ( item->type ) {
427 case GenInlineItem::Text:
430 case GenInlineItem::Goto:
431 GOTO( ret, item->targState->id, inFinish );
433 case GenInlineItem::Call:
434 CALL( ret, item->targState->id, targState, inFinish );
436 case GenInlineItem::Next:
437 NEXT( ret, item->targState->id, inFinish );
439 case GenInlineItem::Ret:
440 RET( ret, inFinish );
442 case GenInlineItem::PChar:
445 case GenInlineItem::Char:
448 case GenInlineItem::Hold:
451 case GenInlineItem::Exec:
452 EXEC( ret, item, targState, inFinish );
454 case GenInlineItem::Curs:
455 CURS( ret, inFinish );
457 case GenInlineItem::Targs:
458 TARGS( ret, inFinish, targState );
460 case GenInlineItem::Entry:
461 ret << item->targState->id;
463 case GenInlineItem::GotoExpr:
464 GOTO_EXPR( ret, item, inFinish );
466 case GenInlineItem::CallExpr:
467 CALL_EXPR( ret, item, targState, inFinish );
469 case GenInlineItem::NextExpr:
470 NEXT_EXPR( ret, item, inFinish );
472 case GenInlineItem::LmSwitch:
473 LM_SWITCH( ret, item, targState, inFinish, csForced );
475 case GenInlineItem::LmSetActId:
476 SET_ACT( ret, item );
478 case GenInlineItem::LmSetTokEnd:
479 SET_TOKEND( ret, item );
481 case GenInlineItem::LmGetTokEnd:
482 GET_TOKEND( ret, item );
484 case GenInlineItem::LmInitTokStart:
485 INIT_TOKSTART( ret, item );
487 case GenInlineItem::LmInitAct:
488 INIT_ACT( ret, item );
490 case GenInlineItem::LmSetTokStart:
491 SET_TOKSTART( ret, item );
493 case GenInlineItem::SubAction:
494 SUB_ACTION( ret, item, targState, inFinish, csForced );
496 case GenInlineItem::Break:
497 BREAK( ret, targState, csForced );
502 /* Write out paths in line directives. Escapes any special characters. */
503 string FsmCodeGen::LDIR_PATH( char *path )
506 for ( char *pc = path; *pc != 0; pc++ ) {
515 void FsmCodeGen::ACTION( ostream &ret, GenAction *action, int targState,
516 bool inFinish, bool csForced )
518 /* Write the preprocessor line info for going into the source file. */
519 cdLineDirective( ret, action->loc.fileName, action->loc.line );
521 /* Write the block and close it off. */
523 INLINE_LIST( ret, action->inlineList, targState, inFinish, csForced );
527 void FsmCodeGen::CONDITION( ostream &ret, GenAction *condition )
530 cdLineDirective( ret, condition->loc.fileName, condition->loc.line );
531 INLINE_LIST( ret, condition->inlineList, 0, false, false );
534 string FsmCodeGen::ERROR_STATE()
537 if ( redFsm->errState != 0 )
538 ret << redFsm->errState->id;
544 string FsmCodeGen::FIRST_FINAL_STATE()
547 if ( redFsm->firstFinState != 0 )
548 ret << redFsm->firstFinState->id;
550 ret << redFsm->nextStateId;
554 void FsmCodeGen::writeInit()
559 out << "\t" << vCS() << " = " << START() << ";\n";
561 /* If there are any calls, then the stack top needs initialization. */
562 if ( redFsm->anyActionCalls() || redFsm->anyActionRets() )
563 out << "\t" << TOP() << " = 0;\n";
565 if ( hasLongestMatch ) {
567 " " << TOKSTART() << " = " << NULL_ITEM() << ";\n"
568 " " << TOKEND() << " = " << NULL_ITEM() << ";\n"
569 " " << ACT() << " = 0;\n";
574 string FsmCodeGen::DATA_PREFIX()
577 return FSM_NAME() + "_";
581 /* Emit the alphabet data type. */
582 string FsmCodeGen::ALPH_TYPE()
584 string ret = keyOps->alphType->data1;
585 if ( keyOps->alphType->data2 != 0 ) {
587 ret += + keyOps->alphType->data2;
592 /* Emit the alphabet data type. */
593 string FsmCodeGen::WIDE_ALPH_TYPE()
596 if ( redFsm->maxKey <= keyOps->maxKey )
599 long long maxKeyVal = redFsm->maxKey.getLongLong();
600 HostType *wideType = keyOps->typeSubsumes( keyOps->isSigned, maxKeyVal );
601 assert( wideType != 0 );
603 ret = wideType->data1;
604 if ( wideType->data2 != 0 ) {
606 ret += wideType->data2;
612 void FsmCodeGen::STATE_IDS()
614 if ( redFsm->startState != 0 )
615 STATIC_VAR( "int", START() ) << " = " << START_STATE_ID() << ";\n";
618 STATIC_VAR( "int" , FIRST_FINAL() ) << " = " << FIRST_FINAL_STATE() << ";\n";
621 STATIC_VAR( "int", ERROR() ) << " = " << ERROR_STATE() << ";\n";
625 if ( entryPointNames.length() > 0 ) {
626 for ( EntryNameVect::Iter en = entryPointNames; en.lte(); en++ ) {
627 STATIC_VAR( "int", DATA_PREFIX() + "en_" + *en ) <<
628 " = " << entryPointIds[en.pos()] << ";\n";
634 void FsmCodeGen::writeStart()
636 out << START_STATE_ID();
639 void FsmCodeGen::writeFirstFinal()
641 out << FIRST_FINAL_STATE();
644 void FsmCodeGen::writeError()
646 out << ERROR_STATE();
650 * Language specific, but style independent code generators functions.
653 string CCodeGen::PTR_CONST()
658 std::ostream &CCodeGen::OPEN_ARRAY( string type, string name )
660 out << "static const " << type << " " << name << "[] = {\n";
664 std::ostream &CCodeGen::CLOSE_ARRAY()
666 return out << "};\n";
669 std::ostream &CCodeGen::STATIC_VAR( string type, string name )
671 out << "static const " << type << " " << name;
675 string CCodeGen::UINT( )
677 return "unsigned int";
680 string CCodeGen::ARR_OFF( string ptr, string offset )
682 return ptr + " + " + offset;
685 string CCodeGen::CAST( string type )
687 return "(" + type + ")";
690 string CCodeGen::NULL_ITEM()
695 string CCodeGen::POINTER()
700 std::ostream &CCodeGen::SWITCH_DEFAULT()
705 string CCodeGen::CTRL_FLOW()
710 void CCodeGen::writeExports()
712 if ( exportList.length() > 0 ) {
713 for ( ExportList::Iter ex = exportList; ex.lte(); ex++ ) {
714 out << "#define " << DATA_PREFIX() << "ex_" << ex->name << " " <<
715 KEY(ex->key) << "\n";
725 string DCodeGen::NULL_ITEM()
730 string DCodeGen::POINTER()
732 // multiple items seperated by commas can also be pointer types.
736 string DCodeGen::PTR_CONST()
741 std::ostream &DCodeGen::OPEN_ARRAY( string type, string name )
743 out << "static const " << type << "[] " << name << " = [\n";
747 std::ostream &DCodeGen::CLOSE_ARRAY()
749 return out << "];\n";
752 std::ostream &DCodeGen::STATIC_VAR( string type, string name )
754 out << "static const " << type << " " << name;
758 string DCodeGen::ARR_OFF( string ptr, string offset )
760 return "&" + ptr + "[" + offset + "]";
763 string DCodeGen::CAST( string type )
765 return "cast(" + type + ")";
768 string DCodeGen::UINT( )
773 std::ostream &DCodeGen::SWITCH_DEFAULT()
775 out << " default: break;\n";
779 string DCodeGen::CTRL_FLOW()
784 void DCodeGen::writeExports()
786 if ( exportList.length() > 0 ) {
787 for ( ExportList::Iter ex = exportList; ex.lte(); ex++ ) {
788 out << "static const " << ALPH_TYPE() << " " << DATA_PREFIX() <<
789 "ex_" << ex->name << " = " << KEY(ex->key) << ";\n";
796 * End D-specific code.
799 void FsmCodeGen::finishRagelDef()
801 if ( codeStyle == GenGoto || codeStyle == GenFGoto ||
802 codeStyle == GenIpGoto || codeStyle == GenSplit )
804 /* For directly executable machines there is no required state
805 * ordering. Choose a depth-first ordering to increase the
806 * potential for fall-throughs. */
807 redFsm->depthFirstOrdering();
810 /* The frontend will do this for us, but it may be a good idea to
811 * force it if the intermediate file is edited. */
812 redFsm->sortByStateId();
815 /* Choose default transitions and the single transition. */
816 redFsm->chooseDefaultSpan();
818 /* Maybe do flat expand, otherwise choose single. */
819 if ( codeStyle == GenFlat || codeStyle == GenFFlat )
822 redFsm->chooseSingle();
824 /* If any errors have occured in the input file then don't write anything. */
825 if ( gblErrorCount > 0 )
828 if ( codeStyle == GenSplit )
829 redFsm->partitionFsm( numSplitPartitions );
831 if ( codeStyle == GenIpGoto || codeStyle == GenSplit )
832 redFsm->setInTrans();
834 /* Anlayze Machine will find the final action reference counts, among
835 * other things. We will use these in reporting the usage
836 * of fsm directives in action code. */
839 /* Determine if we should use indicies. */
843 ostream &FsmCodeGen::source_warning( const InputLoc &loc )
845 cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": warning: ";
849 ostream &FsmCodeGen::source_error( const InputLoc &loc )
852 assert( sourceFileName != 0 );
853 cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": ";