2 * Copyright 2001-2006 Adrian Thurston <thurston@complang.org>
3 * 2004 Erich Ocean <eric.ocean@ampede.com>
4 * 2005 Alan West <alan@alanz.com>
7 /* This file is part of Ragel.
9 * Ragel is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * Ragel is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Ragel; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include "cscodegen.h"
33 /* Code generators. */
44 using std::ostringstream;
58 /* Invoked by the parser when a ragel definition is opened. */
59 CodeGenData *csharpMakeCodeGen( const char *sourceFileName, const char *fsmName,
60 ostream &out, bool wantComplete )
62 CodeGenData *codeGen = 0;
64 switch ( codeStyle ) {
66 codeGen = new CSharpTabCodeGen(out);
69 codeGen = new CSharpFTabCodeGen(out);
72 codeGen = new CSharpFlatCodeGen(out);
75 codeGen = new CSharpFFlatCodeGen(out);
78 codeGen = new CSharpGotoCodeGen(out);
81 codeGen = new CSharpFGotoCodeGen(out);
84 codeGen = new CSharpIpGotoCodeGen(out);
87 codeGen = new CSharpSplitCodeGen(out);
91 codeGen->sourceFileName = sourceFileName;
92 codeGen->fsmName = fsmName;
93 codeGen->wantComplete = wantComplete;
98 void csharpLineDirective( ostream &out, const char *fileName, int line )
100 if ( noLineDirectives )
103 /* Write the preprocessor line info for to the input file. */
104 out << "#line " << line << " \"";
105 for ( const char *pc = fileName; *pc != 0; pc++ ) {
113 if ( noLineDirectives )
119 void CSharpFsmCodeGen::genLineDirective( ostream &out )
121 std::streambuf *sbuf = out.rdbuf();
122 output_filter *filter = static_cast<output_filter*>(sbuf);
123 csharpLineDirective( out, filter->fileName, filter->line + 1 );
127 /* Init code gen with in parameters. */
128 CSharpFsmCodeGen::CSharpFsmCodeGen( ostream &out )
134 unsigned int CSharpFsmCodeGen::arrayTypeSize( unsigned long maxVal )
136 long long maxValLL = (long long) maxVal;
137 HostType *arrayType = keyOps->typeSubsumes( maxValLL );
138 assert( arrayType != 0 );
139 return arrayType->size;
142 string CSharpFsmCodeGen::ARRAY_TYPE( unsigned long maxVal )
144 return ARRAY_TYPE( maxVal, false );
147 string CSharpFsmCodeGen::ARRAY_TYPE( unsigned long maxVal, bool forceSigned )
149 long long maxValLL = (long long) maxVal;
152 arrayType = keyOps->typeSubsumes(true, maxValLL);
154 arrayType = keyOps->typeSubsumes( maxValLL );
155 assert( arrayType != 0 );
157 string ret = arrayType->data1;
158 if ( arrayType->data2 != 0 ) {
160 ret += arrayType->data2;
165 /* Write out the fsm name. */
166 string CSharpFsmCodeGen::FSM_NAME()
171 /* Emit the offset of the start state as a decimal integer. */
172 string CSharpFsmCodeGen::START_STATE_ID()
175 ret << redFsm->startState->id;
179 /* Write out the array of actions. */
180 std::ostream &CSharpFsmCodeGen::ACTIONS_ARRAY()
183 int totalActions = 1;
184 for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) {
185 /* Write out the length, which will never be the last character. */
186 out << act->key.length() << ", ";
187 /* Put in a line break every 8 */
188 if ( totalActions++ % 8 == 7 )
191 for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) {
192 out << item->value->actionId;
193 if ( ! (act.last() && item.last()) )
196 /* Put in a line break every 8 */
197 if ( totalActions++ % 8 == 7 )
206 string CSharpFsmCodeGen::ACCESS()
209 if ( accessExpr != 0 )
210 INLINE_LIST( ret, accessExpr, 0, false );
215 string CSharpFsmCodeGen::P()
222 INLINE_LIST( ret, pExpr, 0, false );
228 string CSharpFsmCodeGen::PE()
235 INLINE_LIST( ret, peExpr, 0, false );
241 string CSharpFsmCodeGen::EOFV()
248 INLINE_LIST( ret, eofExpr, 0, false );
254 string CSharpFsmCodeGen::CS()
258 ret << ACCESS() << "cs";
260 /* Emit the user supplied method of retrieving the key. */
262 INLINE_LIST( ret, csExpr, 0, false );
268 string CSharpFsmCodeGen::TOP()
272 ret << ACCESS() + "top";
275 INLINE_LIST( ret, topExpr, 0, false );
281 string CSharpFsmCodeGen::STACK()
284 if ( stackExpr == 0 )
285 ret << ACCESS() + "stack";
288 INLINE_LIST( ret, stackExpr, 0, false );
294 string CSharpFsmCodeGen::ACT()
298 ret << ACCESS() + "act";
301 INLINE_LIST( ret, actExpr, 0, false );
307 string CSharpFsmCodeGen::TOKSTART()
310 if ( tokstartExpr == 0 )
311 ret << ACCESS() + "ts";
314 INLINE_LIST( ret, tokstartExpr, 0, false );
320 string CSharpFsmCodeGen::TOKEND()
323 if ( tokendExpr == 0 )
324 ret << ACCESS() + "te";
327 INLINE_LIST( ret, tokendExpr, 0, false );
333 string CSharpFsmCodeGen::GET_WIDE_KEY()
335 if ( redFsm->anyConditions() )
341 string CSharpFsmCodeGen::GET_WIDE_KEY( RedStateAp *state )
343 if ( state->stateCondList.length() > 0 )
349 string CSharpFsmCodeGen::GET_KEY()
352 if ( getKeyExpr != 0 ) {
353 /* Emit the user supplied method of retrieving the key. */
355 INLINE_LIST( ret, getKeyExpr, 0, false );
359 /* Expression for retrieving the key, use simple dereference. */
360 ret << "(*" << P() << ")";
365 /* Write out level number of tabs. Makes the nested binary search nice
367 string CSharpFsmCodeGen::TABS( int level )
370 while ( level-- > 0 )
375 /* Write out a key from the fsm code gen. Depends on wether or not the key is
377 string CSharpFsmCodeGen::KEY( Key key )
380 if ( keyOps->isSigned || !hostLang->explicitUnsigned )
383 ret << (unsigned long) key.getVal() << 'u';
387 string CSharpFsmCodeGen::ALPHA_KEY( Key key )
390 if (key.getVal() > 0xFFFF) {
393 ret << "'\\u" << std::hex << std::setw(4) << std::setfill('0') <<
396 //ret << "(char) " << key.getVal();
400 void CSharpFsmCodeGen::EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish )
402 /* The parser gives fexec two children. The double brackets are for D
403 * code. If the inline list is a single word it will get interpreted as a
404 * C-style cast by the D compiler. */
405 ret << "{" << P() << " = ((";
406 INLINE_LIST( ret, item->children, targState, inFinish );
410 void CSharpFsmCodeGen::LM_SWITCH( ostream &ret, GenInlineItem *item,
411 int targState, int inFinish )
414 " switch( " << ACT() << " ) {\n";
416 for ( GenInlineList::Iter lma = *item->children; lma.lte(); lma++ ) {
417 /* Write the case label, the action and the case break. */
419 ret << " default:\n";
421 ret << " case " << lma->lmId << ":\n";
423 /* Write the block and close it off. */
425 INLINE_LIST( ret, lma->children, targState, inFinish );
436 void CSharpFsmCodeGen::SET_ACT( ostream &ret, GenInlineItem *item )
438 ret << ACT() << " = " << item->lmId << ";";
441 void CSharpFsmCodeGen::SET_TOKEND( ostream &ret, GenInlineItem *item )
443 /* The tokend action sets tokend. */
444 ret << TOKEND() << " = " << P();
445 if ( item->offset != 0 )
446 out << "+" << item->offset;
450 void CSharpFsmCodeGen::GET_TOKEND( ostream &ret, GenInlineItem *item )
455 void CSharpFsmCodeGen::INIT_TOKSTART( ostream &ret, GenInlineItem *item )
457 ret << TOKSTART() << " = " << NULL_ITEM() << ";";
460 void CSharpFsmCodeGen::INIT_ACT( ostream &ret, GenInlineItem *item )
462 ret << ACT() << " = 0;";
465 void CSharpFsmCodeGen::SET_TOKSTART( ostream &ret, GenInlineItem *item )
467 ret << TOKSTART() << " = " << P() << ";";
470 void CSharpFsmCodeGen::SUB_ACTION( ostream &ret, GenInlineItem *item,
471 int targState, bool inFinish )
473 if ( item->children->length() > 0 ) {
474 /* Write the block and close it off. */
476 INLINE_LIST( ret, item->children, targState, inFinish );
482 /* Write out an inline tree structure. Walks the list and possibly calls out
483 * to virtual functions than handle language specific items in the tree. */
484 void CSharpFsmCodeGen::INLINE_LIST( ostream &ret, GenInlineList *inlineList,
485 int targState, bool inFinish )
487 for ( GenInlineList::Iter item = *inlineList; item.lte(); item++ ) {
488 switch ( item->type ) {
489 case GenInlineItem::Text:
492 case GenInlineItem::Goto:
493 GOTO( ret, item->targState->id, inFinish );
495 case GenInlineItem::Call:
496 CALL( ret, item->targState->id, targState, inFinish );
498 case GenInlineItem::Next:
499 NEXT( ret, item->targState->id, inFinish );
501 case GenInlineItem::Ret:
502 RET( ret, inFinish );
504 case GenInlineItem::PChar:
507 case GenInlineItem::Char:
510 case GenInlineItem::Hold:
513 case GenInlineItem::Exec:
514 EXEC( ret, item, targState, inFinish );
516 case GenInlineItem::Curs:
517 CURS( ret, inFinish );
519 case GenInlineItem::Targs:
520 TARGS( ret, inFinish, targState );
522 case GenInlineItem::Entry:
523 ret << item->targState->id;
525 case GenInlineItem::GotoExpr:
526 GOTO_EXPR( ret, item, inFinish );
528 case GenInlineItem::CallExpr:
529 CALL_EXPR( ret, item, targState, inFinish );
531 case GenInlineItem::NextExpr:
532 NEXT_EXPR( ret, item, inFinish );
534 case GenInlineItem::LmSwitch:
535 LM_SWITCH( ret, item, targState, inFinish );
537 case GenInlineItem::LmSetActId:
538 SET_ACT( ret, item );
540 case GenInlineItem::LmSetTokEnd:
541 SET_TOKEND( ret, item );
543 case GenInlineItem::LmGetTokEnd:
544 GET_TOKEND( ret, item );
546 case GenInlineItem::LmInitTokStart:
547 INIT_TOKSTART( ret, item );
549 case GenInlineItem::LmInitAct:
550 INIT_ACT( ret, item );
552 case GenInlineItem::LmSetTokStart:
553 SET_TOKSTART( ret, item );
555 case GenInlineItem::SubAction:
556 SUB_ACTION( ret, item, targState, inFinish );
558 case GenInlineItem::Break:
559 BREAK( ret, targState );
564 /* Write out paths in line directives. Escapes any special characters. */
565 string CSharpFsmCodeGen::LDIR_PATH( char *path )
568 for ( char *pc = path; *pc != 0; pc++ ) {
577 void CSharpFsmCodeGen::ACTION( ostream &ret, GenAction *action, int targState, bool inFinish )
579 /* Write the preprocessor line info for going into the source file. */
580 csharpLineDirective( ret, sourceFileName, action->loc.line );
582 /* Write the block and close it off. */
584 INLINE_LIST( ret, action->inlineList, targState, inFinish );
588 void CSharpFsmCodeGen::CONDITION( ostream &ret, GenAction *condition )
591 csharpLineDirective( ret, sourceFileName, condition->loc.line );
592 INLINE_LIST( ret, condition->inlineList, 0, false );
595 string CSharpFsmCodeGen::ERROR_STATE()
598 if ( redFsm->errState != 0 )
599 ret << redFsm->errState->id;
605 string CSharpFsmCodeGen::FIRST_FINAL_STATE()
608 if ( redFsm->firstFinState != 0 )
609 ret << redFsm->firstFinState->id;
611 ret << redFsm->nextStateId;
615 void CSharpFsmCodeGen::writeInit()
620 out << "\t" << CS() << " = " << START() << ";\n";
622 /* If there are any calls, then the stack top needs initialization. */
623 if ( redFsm->anyActionCalls() || redFsm->anyActionRets() )
624 out << "\t" << TOP() << " = 0;\n";
626 if ( hasLongestMatch ) {
628 " " << TOKSTART() << " = " << NULL_ITEM() << ";\n"
629 " " << TOKEND() << " = " << NULL_ITEM() << ";\n"
630 " " << ACT() << " = 0;\n";
635 string CSharpFsmCodeGen::DATA_PREFIX()
638 return FSM_NAME() + "_";
642 /* Emit the alphabet data type. */
643 string CSharpFsmCodeGen::ALPH_TYPE()
645 string ret = keyOps->alphType->data1;
646 if ( keyOps->alphType->data2 != 0 ) {
648 ret += + keyOps->alphType->data2;
653 /* Emit the alphabet data type. */
654 string CSharpFsmCodeGen::WIDE_ALPH_TYPE()
657 if ( redFsm->maxKey <= keyOps->maxKey )
660 long long maxKeyVal = redFsm->maxKey.getLongLong();
661 HostType *wideType = keyOps->typeSubsumes( keyOps->isSigned, maxKeyVal );
662 assert( wideType != 0 );
664 ret = wideType->data1;
665 if ( wideType->data2 != 0 ) {
667 ret += wideType->data2;
673 void CSharpFsmCodeGen::STATE_IDS()
675 if ( redFsm->startState != 0 )
676 STATIC_VAR( "int", START() ) << " = " << START_STATE_ID() << ";\n";
679 STATIC_VAR( "int" , FIRST_FINAL() ) << " = " << FIRST_FINAL_STATE() << ";\n";
682 STATIC_VAR( "int", ERROR() ) << " = " << ERROR_STATE() << ";\n";
686 if ( entryPointNames.length() > 0 ) {
687 for ( EntryNameVect::Iter en = entryPointNames; en.lte(); en++ ) {
688 STATIC_VAR( "int", DATA_PREFIX() + "en_" + *en ) <<
689 " = " << entryPointIds[en.pos()] << ";\n";
696 void CSharpFsmCodeGen::writeStart()
698 out << START_STATE_ID();
701 void CSharpFsmCodeGen::writeFirstFinal()
703 out << FIRST_FINAL_STATE();
706 void CSharpFsmCodeGen::writeError()
708 out << ERROR_STATE();
714 string CSharpCodeGen::GET_KEY()
717 if ( getKeyExpr != 0 ) {
718 /* Emit the user supplied method of retrieving the key. */
720 INLINE_LIST( ret, getKeyExpr, 0, false );
724 /* Expression for retrieving the key, use simple dereference. */
725 ret << "data[" << P() << "]";
729 string CSharpCodeGen::NULL_ITEM()
734 string CSharpCodeGen::POINTER()
736 // XXX C# has no pointers
737 // multiple items seperated by commas can also be pointer types.
741 string CSharpCodeGen::PTR_CONST()
746 std::ostream &CSharpCodeGen::OPEN_ARRAY( string type, string name )
748 out << "static readonly " << type << "[] " << name << " = ";
751 out << "Encoding.ASCII.Get";
753 out << "new " << type << " [] {\n";
757 std::ostream &CSharpCodeGen::CLOSE_ARRAY()
759 return out << "};\n";
762 std::ostream &CSharpCodeGen::STATIC_VAR( string type, string name )
764 out << "const " << type << " " << name;
768 string CSharpCodeGen::ARR_OFF( string ptr, string offset )
770 // XXX C# can't do pointer arithmetic
771 return "&" + ptr + "[" + offset + "]";
774 string CSharpCodeGen::CAST( string type )
776 return "(" + type + ")";
779 string CSharpCodeGen::UINT( )
784 std::ostream &CSharpCodeGen::SWITCH_DEFAULT()
786 out << " default: break;\n";
790 string CSharpCodeGen::CTRL_FLOW()
795 void CSharpCodeGen::writeExports()
797 if ( exportList.length() > 0 ) {
798 for ( ExportList::Iter ex = exportList; ex.lte(); ex++ ) {
799 out << "const " << ALPH_TYPE() << " " << DATA_PREFIX() <<
800 "ex_" << ex->name << " = " << KEY(ex->key) << ";\n";
807 * End C#-specific code.
810 void CSharpFsmCodeGen::finishRagelDef()
812 if ( codeStyle == GenGoto || codeStyle == GenFGoto ||
813 codeStyle == GenIpGoto || codeStyle == GenSplit )
815 /* For directly executable machines there is no required state
816 * ordering. Choose a depth-first ordering to increase the
817 * potential for fall-throughs. */
818 redFsm->depthFirstOrdering();
821 /* The frontend will do this for us, but it may be a good idea to
822 * force it if the intermediate file is edited. */
823 redFsm->sortByStateId();
826 /* Choose default transitions and the single transition. */
827 redFsm->chooseDefaultSpan();
829 /* Maybe do flat expand, otherwise choose single. */
830 if ( codeStyle == GenFlat || codeStyle == GenFFlat )
833 redFsm->chooseSingle();
835 /* If any errors have occured in the input file then don't write anything. */
836 if ( gblErrorCount > 0 )
839 if ( codeStyle == GenSplit )
840 redFsm->partitionFsm( numSplitPartitions );
842 if ( codeStyle == GenIpGoto || codeStyle == GenSplit )
843 redFsm->setInTrans();
845 /* Anlayze Machine will find the final action reference counts, among
846 * other things. We will use these in reporting the usage
847 * of fsm directives in action code. */
850 /* Determine if we should use indicies. */
854 ostream &CSharpFsmCodeGen::source_warning( const InputLoc &loc )
856 cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": warning: ";
860 ostream &CSharpFsmCodeGen::source_error( const InputLoc &loc )
863 assert( sourceFileName != 0 );
864 cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": ";