2 * Copyright 2001-2006 Adrian Thurston <thurston@complang.org>
3 * 2004 Erich Ocean <eric.ocean@ampede.com>
4 * 2005 Alan West <alan@alanz.com>
7 /* This file is part of Ragel.
9 * Ragel is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * Ragel is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Ragel; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include "cscodegen.h"
33 /* Code generators. */
44 using std::ostringstream;
58 /* Invoked by the parser when the root element is opened. */
59 ostream *csharpOpenOutput( const char *inputFile )
61 if ( hostLang->lang != HostLang::CSharp ) {
62 error() << "this code generator is for C# only" << endl;
66 /* If the output format is code and no output file name is given, then
68 if ( outputFileName == 0 ) {
69 const char *ext = findFileExtension( inputFile );
70 if ( ext != 0 && strcmp( ext, ".rh" ) == 0 )
71 outputFileName = fileNameFromStem( inputFile, ".h" );
73 outputFileName = fileNameFromStem( inputFile, ".cs" );
76 /* Make sure we are not writing to the same file as the input file. */
77 if ( outputFileName != 0 && strcmp( inputFile, outputFileName ) == 0 ) {
78 error() << "output file \"" << outputFileName <<
79 "\" is the same as the input file" << endl;
82 if ( outputFileName != 0 ) {
83 /* Create the filter on the output and open it. */
84 outFilter = new output_filter( outputFileName );
85 outFilter->open( outputFileName, ios::out|ios::trunc );
86 if ( !outFilter->is_open() ) {
87 error() << "error opening " << outputFileName << " for writing" << endl;
91 /* Open the output stream, attaching it to the filter. */
92 outStream = new ostream( outFilter );
95 /* Writing out ot std out. */
101 /* Invoked by the parser when a ragel definition is opened. */
102 CodeGenData *csharpMakeCodeGen( const char *sourceFileName, const char *fsmName,
103 ostream &out, bool wantComplete )
105 CodeGenData *codeGen = 0;
107 switch ( codeStyle ) {
109 codeGen = new CSharpTabCodeGen(out);
112 codeGen = new CSharpFTabCodeGen(out);
115 codeGen = new CSharpFlatCodeGen(out);
118 codeGen = new CSharpFFlatCodeGen(out);
121 codeGen = new CSharpGotoCodeGen(out);
124 codeGen = new CSharpFGotoCodeGen(out);
127 codeGen = new CSharpIpGotoCodeGen(out);
130 codeGen = new CSharpSplitCodeGen(out);
134 codeGen->sourceFileName = sourceFileName;
135 codeGen->fsmName = fsmName;
136 codeGen->wantComplete = wantComplete;
141 void csharpLineDirective( ostream &out, const char *fileName, int line )
143 if ( noLineDirectives )
146 /* Write the preprocessor line info for to the input file. */
147 out << "#line " << line << " \"";
148 for ( const char *pc = fileName; *pc != 0; pc++ ) {
156 if ( noLineDirectives )
162 void CSharpFsmCodeGen::genLineDirective( ostream &out )
164 std::streambuf *sbuf = out.rdbuf();
165 output_filter *filter = static_cast<output_filter*>(sbuf);
166 csharpLineDirective( out, filter->fileName, filter->line + 1 );
170 /* Init code gen with in parameters. */
171 CSharpFsmCodeGen::CSharpFsmCodeGen( ostream &out )
177 unsigned int CSharpFsmCodeGen::arrayTypeSize( unsigned long maxVal )
179 long long maxValLL = (long long) maxVal;
180 HostType *arrayType = keyOps->typeSubsumes( maxValLL );
181 assert( arrayType != 0 );
182 return arrayType->size;
185 string CSharpFsmCodeGen::ARRAY_TYPE( unsigned long maxVal )
187 return ARRAY_TYPE( maxVal, false );
190 string CSharpFsmCodeGen::ARRAY_TYPE( unsigned long maxVal, bool forceSigned )
192 long long maxValLL = (long long) maxVal;
195 arrayType = keyOps->typeSubsumes(true, maxValLL);
197 arrayType = keyOps->typeSubsumes( maxValLL );
198 assert( arrayType != 0 );
200 string ret = arrayType->data1;
201 if ( arrayType->data2 != 0 ) {
203 ret += arrayType->data2;
208 /* Write out the fsm name. */
209 string CSharpFsmCodeGen::FSM_NAME()
214 /* Emit the offset of the start state as a decimal integer. */
215 string CSharpFsmCodeGen::START_STATE_ID()
218 ret << redFsm->startState->id;
222 /* Write out the array of actions. */
223 std::ostream &CSharpFsmCodeGen::ACTIONS_ARRAY()
226 int totalActions = 1;
227 for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) {
228 /* Write out the length, which will never be the last character. */
229 out << act->key.length() << ", ";
230 /* Put in a line break every 8 */
231 if ( totalActions++ % 8 == 7 )
234 for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) {
235 out << item->value->actionId;
236 if ( ! (act.last() && item.last()) )
239 /* Put in a line break every 8 */
240 if ( totalActions++ % 8 == 7 )
249 string CSharpFsmCodeGen::ACCESS()
252 if ( accessExpr != 0 )
253 INLINE_LIST( ret, accessExpr, 0, false );
258 string CSharpFsmCodeGen::P()
265 INLINE_LIST( ret, pExpr, 0, false );
271 string CSharpFsmCodeGen::PE()
278 INLINE_LIST( ret, peExpr, 0, false );
284 string CSharpFsmCodeGen::EOFV()
291 INLINE_LIST( ret, eofExpr, 0, false );
297 string CSharpFsmCodeGen::CS()
301 ret << ACCESS() << "cs";
303 /* Emit the user supplied method of retrieving the key. */
305 INLINE_LIST( ret, csExpr, 0, false );
311 string CSharpFsmCodeGen::TOP()
315 ret << ACCESS() + "top";
318 INLINE_LIST( ret, topExpr, 0, false );
324 string CSharpFsmCodeGen::STACK()
327 if ( stackExpr == 0 )
328 ret << ACCESS() + "stack";
331 INLINE_LIST( ret, stackExpr, 0, false );
337 string CSharpFsmCodeGen::ACT()
341 ret << ACCESS() + "act";
344 INLINE_LIST( ret, actExpr, 0, false );
350 string CSharpFsmCodeGen::TOKSTART()
353 if ( tokstartExpr == 0 )
354 ret << ACCESS() + "ts";
357 INLINE_LIST( ret, tokstartExpr, 0, false );
363 string CSharpFsmCodeGen::TOKEND()
366 if ( tokendExpr == 0 )
367 ret << ACCESS() + "te";
370 INLINE_LIST( ret, tokendExpr, 0, false );
376 string CSharpFsmCodeGen::GET_WIDE_KEY()
378 if ( redFsm->anyConditions() )
384 string CSharpFsmCodeGen::GET_WIDE_KEY( RedStateAp *state )
386 if ( state->stateCondList.length() > 0 )
392 string CSharpFsmCodeGen::GET_KEY()
395 if ( getKeyExpr != 0 ) {
396 /* Emit the user supplied method of retrieving the key. */
398 INLINE_LIST( ret, getKeyExpr, 0, false );
402 /* Expression for retrieving the key, use simple dereference. */
403 ret << "(*" << P() << ")";
408 /* Write out level number of tabs. Makes the nested binary search nice
410 string CSharpFsmCodeGen::TABS( int level )
413 while ( level-- > 0 )
418 /* Write out a key from the fsm code gen. Depends on wether or not the key is
420 string CSharpFsmCodeGen::KEY( Key key )
423 if ( keyOps->isSigned || !hostLang->explicitUnsigned )
426 ret << (unsigned long) key.getVal() << 'u';
430 string CSharpFsmCodeGen::ALPHA_KEY( Key key )
433 if (key.getVal() > 0xFFFF) {
436 ret << "'\\u" << std::hex << std::setw(4) << std::setfill('0') <<
439 //ret << "(char) " << key.getVal();
443 void CSharpFsmCodeGen::EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish )
445 /* The parser gives fexec two children. The double brackets are for D
446 * code. If the inline list is a single word it will get interpreted as a
447 * C-style cast by the D compiler. */
448 ret << "{" << P() << " = ((";
449 INLINE_LIST( ret, item->children, targState, inFinish );
453 void CSharpFsmCodeGen::LM_SWITCH( ostream &ret, GenInlineItem *item,
454 int targState, int inFinish )
457 " switch( " << ACT() << " ) {\n";
459 for ( GenInlineList::Iter lma = *item->children; lma.lte(); lma++ ) {
460 /* Write the case label, the action and the case break. */
462 ret << " default:\n";
464 ret << " case " << lma->lmId << ":\n";
466 /* Write the block and close it off. */
468 INLINE_LIST( ret, lma->children, targState, inFinish );
479 void CSharpFsmCodeGen::SET_ACT( ostream &ret, GenInlineItem *item )
481 ret << ACT() << " = " << item->lmId << ";";
484 void CSharpFsmCodeGen::SET_TOKEND( ostream &ret, GenInlineItem *item )
486 /* The tokend action sets tokend. */
487 ret << TOKEND() << " = " << P();
488 if ( item->offset != 0 )
489 out << "+" << item->offset;
493 void CSharpFsmCodeGen::GET_TOKEND( ostream &ret, GenInlineItem *item )
498 void CSharpFsmCodeGen::INIT_TOKSTART( ostream &ret, GenInlineItem *item )
500 ret << TOKSTART() << " = " << NULL_ITEM() << ";";
503 void CSharpFsmCodeGen::INIT_ACT( ostream &ret, GenInlineItem *item )
505 ret << ACT() << " = 0;";
508 void CSharpFsmCodeGen::SET_TOKSTART( ostream &ret, GenInlineItem *item )
510 ret << TOKSTART() << " = " << P() << ";";
513 void CSharpFsmCodeGen::SUB_ACTION( ostream &ret, GenInlineItem *item,
514 int targState, bool inFinish )
516 if ( item->children->length() > 0 ) {
517 /* Write the block and close it off. */
519 INLINE_LIST( ret, item->children, targState, inFinish );
525 /* Write out an inline tree structure. Walks the list and possibly calls out
526 * to virtual functions than handle language specific items in the tree. */
527 void CSharpFsmCodeGen::INLINE_LIST( ostream &ret, GenInlineList *inlineList,
528 int targState, bool inFinish )
530 for ( GenInlineList::Iter item = *inlineList; item.lte(); item++ ) {
531 switch ( item->type ) {
532 case GenInlineItem::Text:
535 case GenInlineItem::Goto:
536 GOTO( ret, item->targState->id, inFinish );
538 case GenInlineItem::Call:
539 CALL( ret, item->targState->id, targState, inFinish );
541 case GenInlineItem::Next:
542 NEXT( ret, item->targState->id, inFinish );
544 case GenInlineItem::Ret:
545 RET( ret, inFinish );
547 case GenInlineItem::PChar:
550 case GenInlineItem::Char:
553 case GenInlineItem::Hold:
556 case GenInlineItem::Exec:
557 EXEC( ret, item, targState, inFinish );
559 case GenInlineItem::Curs:
560 CURS( ret, inFinish );
562 case GenInlineItem::Targs:
563 TARGS( ret, inFinish, targState );
565 case GenInlineItem::Entry:
566 ret << item->targState->id;
568 case GenInlineItem::GotoExpr:
569 GOTO_EXPR( ret, item, inFinish );
571 case GenInlineItem::CallExpr:
572 CALL_EXPR( ret, item, targState, inFinish );
574 case GenInlineItem::NextExpr:
575 NEXT_EXPR( ret, item, inFinish );
577 case GenInlineItem::LmSwitch:
578 LM_SWITCH( ret, item, targState, inFinish );
580 case GenInlineItem::LmSetActId:
581 SET_ACT( ret, item );
583 case GenInlineItem::LmSetTokEnd:
584 SET_TOKEND( ret, item );
586 case GenInlineItem::LmGetTokEnd:
587 GET_TOKEND( ret, item );
589 case GenInlineItem::LmInitTokStart:
590 INIT_TOKSTART( ret, item );
592 case GenInlineItem::LmInitAct:
593 INIT_ACT( ret, item );
595 case GenInlineItem::LmSetTokStart:
596 SET_TOKSTART( ret, item );
598 case GenInlineItem::SubAction:
599 SUB_ACTION( ret, item, targState, inFinish );
601 case GenInlineItem::Break:
602 BREAK( ret, targState );
607 /* Write out paths in line directives. Escapes any special characters. */
608 string CSharpFsmCodeGen::LDIR_PATH( char *path )
611 for ( char *pc = path; *pc != 0; pc++ ) {
620 void CSharpFsmCodeGen::ACTION( ostream &ret, GenAction *action, int targState, bool inFinish )
622 /* Write the preprocessor line info for going into the source file. */
623 csharpLineDirective( ret, sourceFileName, action->loc.line );
625 /* Write the block and close it off. */
627 INLINE_LIST( ret, action->inlineList, targState, inFinish );
631 void CSharpFsmCodeGen::CONDITION( ostream &ret, GenAction *condition )
634 csharpLineDirective( ret, sourceFileName, condition->loc.line );
635 INLINE_LIST( ret, condition->inlineList, 0, false );
638 string CSharpFsmCodeGen::ERROR_STATE()
641 if ( redFsm->errState != 0 )
642 ret << redFsm->errState->id;
648 string CSharpFsmCodeGen::FIRST_FINAL_STATE()
651 if ( redFsm->firstFinState != 0 )
652 ret << redFsm->firstFinState->id;
654 ret << redFsm->nextStateId;
658 void CSharpFsmCodeGen::writeInit()
663 out << "\t" << CS() << " = " << START() << ";\n";
665 /* If there are any calls, then the stack top needs initialization. */
666 if ( redFsm->anyActionCalls() || redFsm->anyActionRets() )
667 out << "\t" << TOP() << " = 0;\n";
669 if ( hasLongestMatch ) {
671 " " << TOKSTART() << " = " << NULL_ITEM() << ";\n"
672 " " << TOKEND() << " = " << NULL_ITEM() << ";\n"
673 " " << ACT() << " = 0;\n";
678 string CSharpFsmCodeGen::DATA_PREFIX()
681 return FSM_NAME() + "_";
685 /* Emit the alphabet data type. */
686 string CSharpFsmCodeGen::ALPH_TYPE()
688 string ret = keyOps->alphType->data1;
689 if ( keyOps->alphType->data2 != 0 ) {
691 ret += + keyOps->alphType->data2;
696 /* Emit the alphabet data type. */
697 string CSharpFsmCodeGen::WIDE_ALPH_TYPE()
700 if ( redFsm->maxKey <= keyOps->maxKey )
703 long long maxKeyVal = redFsm->maxKey.getLongLong();
704 HostType *wideType = keyOps->typeSubsumes( keyOps->isSigned, maxKeyVal );
705 assert( wideType != 0 );
707 ret = wideType->data1;
708 if ( wideType->data2 != 0 ) {
710 ret += wideType->data2;
716 void CSharpFsmCodeGen::STATE_IDS()
718 if ( redFsm->startState != 0 )
719 STATIC_VAR( "int", START() ) << " = " << START_STATE_ID() << ";\n";
722 STATIC_VAR( "int" , FIRST_FINAL() ) << " = " << FIRST_FINAL_STATE() << ";\n";
725 STATIC_VAR( "int", ERROR() ) << " = " << ERROR_STATE() << ";\n";
729 if ( entryPointNames.length() > 0 ) {
730 for ( EntryNameVect::Iter en = entryPointNames; en.lte(); en++ ) {
731 STATIC_VAR( "int", DATA_PREFIX() + "en_" + *en ) <<
732 " = " << entryPointIds[en.pos()] << ";\n";
741 string CSharpCodeGen::GET_KEY()
744 if ( getKeyExpr != 0 ) {
745 /* Emit the user supplied method of retrieving the key. */
747 INLINE_LIST( ret, getKeyExpr, 0, false );
751 /* Expression for retrieving the key, use simple dereference. */
752 ret << "data[" << P() << "]";
756 string CSharpCodeGen::NULL_ITEM()
761 string CSharpCodeGen::POINTER()
763 // XXX C# has no pointers
764 // multiple items seperated by commas can also be pointer types.
768 string CSharpCodeGen::PTR_CONST()
773 std::ostream &CSharpCodeGen::OPEN_ARRAY( string type, string name )
775 out << "static readonly " << type << "[] " << name << " = ";
778 out << "Encoding.ASCII.Get";
780 out << "new " << type << " [] {\n";
784 std::ostream &CSharpCodeGen::CLOSE_ARRAY()
786 return out << "};\n";
789 std::ostream &CSharpCodeGen::STATIC_VAR( string type, string name )
791 out << "const " << type << " " << name;
795 string CSharpCodeGen::ARR_OFF( string ptr, string offset )
797 // XXX C# can't do pointer arithmetic
798 return "&" + ptr + "[" + offset + "]";
801 string CSharpCodeGen::CAST( string type )
803 return "(" + type + ")";
806 string CSharpCodeGen::UINT( )
811 std::ostream &CSharpCodeGen::SWITCH_DEFAULT()
813 out << " default: break;\n";
817 string CSharpCodeGen::CTRL_FLOW()
822 void CSharpCodeGen::writeExports()
824 if ( exportList.length() > 0 ) {
825 for ( ExportList::Iter ex = exportList; ex.lte(); ex++ ) {
826 out << "const " << ALPH_TYPE() << " " << DATA_PREFIX() <<
827 "ex_" << ex->name << " = " << KEY(ex->key) << ";\n";
834 * End C#-specific code.
837 void CSharpFsmCodeGen::finishRagelDef()
839 if ( codeStyle == GenGoto || codeStyle == GenFGoto ||
840 codeStyle == GenIpGoto || codeStyle == GenSplit )
842 /* For directly executable machines there is no required state
843 * ordering. Choose a depth-first ordering to increase the
844 * potential for fall-throughs. */
845 redFsm->depthFirstOrdering();
848 /* The frontend will do this for us, but it may be a good idea to
849 * force it if the intermediate file is edited. */
850 redFsm->sortByStateId();
853 /* Choose default transitions and the single transition. */
854 redFsm->chooseDefaultSpan();
856 /* Maybe do flat expand, otherwise choose single. */
857 if ( codeStyle == GenFlat || codeStyle == GenFFlat )
860 redFsm->chooseSingle();
862 /* If any errors have occured in the input file then don't write anything. */
863 if ( gblErrorCount > 0 )
866 if ( codeStyle == GenSplit )
867 redFsm->partitionFsm( numSplitPartitions );
869 if ( codeStyle == GenIpGoto || codeStyle == GenSplit )
870 redFsm->setInTrans();
872 /* Anlayze Machine will find the final action reference counts, among
873 * other things. We will use these in reporting the usage
874 * of fsm directives in action code. */
877 /* Determine if we should use indicies. */
881 ostream &CSharpFsmCodeGen::source_warning( const InputLoc &loc )
883 cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": warning: ";
887 ostream &CSharpFsmCodeGen::source_error( const InputLoc &loc )
890 assert( sourceFileName != 0 );
891 cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": ";