2 * 2007 Victor Hugo Borja <vic@rubyforge.org>
3 * Copyright 2001-2007 Adrian Thurston <thurston@complang.org>
6 /* This file is part of Ragel.
8 * Ragel is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * Ragel is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with Ragel; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 #include "rubycodegen.h"
36 #include "rubytable.h"
37 #include "rubyftable.h"
39 #include "rubyfflat.h"
43 using std::ostringstream;
56 /* Target ruby impl */
57 extern RubyImplEnum rubyImpl;
59 /* Target language and output style. */
60 extern CodeStyleEnum codeStyle;
63 extern istream *inStream;
64 extern ostream *outStream;
65 extern output_filter *outFilter;
66 extern const char *outputFileName;
68 /* Graphviz dot file generation. */
69 extern bool graphvizDone;
71 extern int numSplitPartitions;
74 * Callbacks invoked by the XML data parser.
77 /* Invoked by the parser when the root element is opened. */
78 ostream *rubyOpenOutput( const char *inputFile )
80 if ( hostLang->lang != HostLang::Ruby ) {
81 error() << "this code generator is for Ruby only" << endl;
85 /* If the output format is code and no output file name is given, then
87 if ( outputFileName == 0 ) {
88 const char *ext = findFileExtension( inputFile );
89 if ( ext != 0 && strcmp( ext, ".rh" ) == 0 )
90 outputFileName = fileNameFromStem( inputFile, ".h" );
92 outputFileName = fileNameFromStem( inputFile, ".rb" );
95 /* Make sure we are not writing to the same file as the input file. */
96 if ( outputFileName != 0 && strcmp( inputFile, outputFileName ) == 0 ) {
97 error() << "output file \"" << outputFileName <<
98 "\" is the same as the input file" << endl;
101 if ( outputFileName != 0 ) {
102 /* Create the filter on the output and open it. */
103 outFilter = new output_filter( outputFileName );
105 /* Open the output stream, attaching it to the filter. */
106 outStream = new ostream( outFilter );
109 /* Writing out ot std out. */
115 /* Invoked by the parser when a ragel definition is opened. */
116 CodeGenData *rubyMakeCodeGen( const char *sourceFileName, const char *fsmName,
117 ostream &out, bool wantComplete )
119 CodeGenData *codeGen = 0;
120 switch ( codeStyle ) {
122 codeGen = new RubyTabCodeGen(out);
125 codeGen = new RubyFTabCodeGen(out);
128 codeGen = new RubyFlatCodeGen(out);
131 codeGen = new RubyFFlatCodeGen(out);
134 if ( rubyImpl == Rubinius ) {
135 codeGen = new RbxGotoCodeGen(out);
137 cout << "Goto style is still _very_ experimental "
138 "and only supported using Rubinius.\n"
139 "You may want to enable the --rbx flag "
140 " to give it a try.\n";
145 cout << "Invalid code style\n";
149 codeGen->sourceFileName = sourceFileName;
150 codeGen->fsmName = fsmName;
151 codeGen->wantComplete = wantComplete;
157 void rubyLineDirective( ostream &out, const char *fileName, int line )
159 /* Write a comment containing line info. */
160 out << "# line " << line << " \"";
161 for ( const char *pc = fileName; *pc != 0; pc++ ) {
170 void RubyCodeGen::genLineDirective( ostream &out )
172 std::streambuf *sbuf = out.rdbuf();
173 output_filter *filter = static_cast<output_filter*>(sbuf);
174 rubyLineDirective( out, filter->fileName, filter->line + 1 );
177 string RubyCodeGen::DATA_PREFIX()
180 return FSM_NAME() + "_";
184 std::ostream &RubyCodeGen::STATIC_VAR( string type, string name )
188 " attr_accessor :" << name << "\n"
195 std::ostream &RubyCodeGen::OPEN_ARRAY( string type, string name )
199 " attr_accessor :" << name << "\n"
200 " private :" << name << ", :" << name << "=\n"
202 "self." << name << " = [\n";
206 std::ostream &RubyCodeGen::CLOSE_ARRAY()
213 string RubyCodeGen::ARR_OFF( string ptr, string offset )
215 return ptr + "[" + offset + "]";
218 string RubyCodeGen::NULL_ITEM()
224 string RubyCodeGen::P()
231 INLINE_LIST( ret, pExpr, 0, false );
237 string RubyCodeGen::PE()
244 INLINE_LIST( ret, peExpr, 0, false );
250 string RubyCodeGen::EOFV()
257 INLINE_LIST( ret, eofExpr, 0, false );
263 string RubyCodeGen::CS()
267 ret << ACCESS() << "cs";
270 INLINE_LIST( ret, csExpr, 0, false );
276 string RubyCodeGen::TOP()
280 ret << ACCESS() + "top";
283 INLINE_LIST( ret, topExpr, 0, false );
289 string RubyCodeGen::STACK()
292 if ( stackExpr == 0 )
293 ret << ACCESS() + "stack";
296 INLINE_LIST( ret, stackExpr, 0, false );
302 string RubyCodeGen::ACT()
306 ret << ACCESS() + "act";
309 INLINE_LIST( ret, actExpr, 0, false );
315 string RubyCodeGen::TOKSTART()
318 if ( tokstartExpr == 0 )
319 ret << ACCESS() + "ts";
322 INLINE_LIST( ret, tokstartExpr, 0, false );
328 string RubyCodeGen::TOKEND()
331 if ( tokendExpr == 0 )
332 ret << ACCESS() + "te";
335 INLINE_LIST( ret, tokendExpr, 0, false );
341 string RubyCodeGen::DATA()
345 ret << ACCESS() + "data";
348 INLINE_LIST( ret, dataExpr, 0, false );
354 /* Write out the fsm name. */
355 string RubyCodeGen::FSM_NAME()
361 void RubyCodeGen::ACTION( ostream &ret, GenAction *action, int targState, bool inFinish )
363 /* Write the preprocessor line info for going into the source file. */
364 rubyLineDirective( ret, sourceFileName, action->loc.line );
366 /* Write the block and close it off. */
368 INLINE_LIST( ret, action->inlineList, targState, inFinish );
370 rubyLineDirective( ret, sourceFileName, action->loc.line );
375 string RubyCodeGen::GET_WIDE_KEY()
377 if ( redFsm->anyConditions() )
383 string RubyCodeGen::GET_WIDE_KEY( RedStateAp *state )
385 if ( state->stateCondList.length() > 0 )
391 string RubyCodeGen::GET_KEY()
394 if ( getKeyExpr != 0 ) {
395 /* Emit the user supplied method of retrieving the key. */
397 INLINE_LIST( ret, getKeyExpr, 0, false );
401 /* Expression for retrieving the key, use simple dereference. */
402 ret << DATA() << "[" << P() << "]";
407 string RubyCodeGen::KEY( Key key )
410 if ( keyOps->isSigned || !hostLang->explicitUnsigned )
413 ret << (unsigned long) key.getVal();
418 /* Write out level number of tabs. Makes the nested binary search nice
420 string RubyCodeGen::TABS( int level )
423 while ( level-- > 0 )
428 string RubyCodeGen::INT( int i )
435 void RubyCodeGen::CONDITION( ostream &ret, GenAction *condition )
438 rubyLineDirective( ret, sourceFileName, condition->loc.line );
439 INLINE_LIST( ret, condition->inlineList, 0, false );
442 /* Emit the alphabet data type. */
443 string RubyCodeGen::ALPH_TYPE()
445 string ret = keyOps->alphType->data1;
446 if ( keyOps->alphType->data2 != 0 ) {
448 ret += + keyOps->alphType->data2;
453 /* Emit the alphabet data type. */
454 string RubyCodeGen::WIDE_ALPH_TYPE()
457 if ( redFsm->maxKey <= keyOps->maxKey )
460 long long maxKeyVal = redFsm->maxKey.getLongLong();
461 HostType *wideType = keyOps->typeSubsumes( keyOps->isSigned, maxKeyVal );
462 assert( wideType != 0 );
464 ret = wideType->data1;
465 if ( wideType->data2 != 0 ) {
467 ret += wideType->data2;
474 string RubyCodeGen::ARRAY_TYPE( unsigned long maxVal )
476 long long maxValLL = (long long) maxVal;
477 HostType *arrayType = keyOps->typeSubsumes( maxValLL );
478 assert( arrayType != 0 );
480 string ret = arrayType->data1;
481 if ( arrayType->data2 != 0 ) {
483 ret += arrayType->data2;
488 /* Write out the array of actions. */
489 std::ostream &RubyCodeGen::ACTIONS_ARRAY()
492 int totalActions = 0;
493 ARRAY_ITEM( INT(0), ++totalActions, false );
494 for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) {
495 /* Write out the length, which will never be the last character. */
496 ARRAY_ITEM( INT(act->key.length()), ++totalActions, false );
498 for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) {
499 ARRAY_ITEM( INT(item->value->actionId), ++totalActions, (act.last() && item.last()) );
506 void RubyCodeGen::STATE_IDS()
508 if ( redFsm->startState != 0 )
509 STATIC_VAR( "int", START() ) << " = " << START_STATE_ID() << ";\n";
512 STATIC_VAR( "int" , FIRST_FINAL() ) << " = " << FIRST_FINAL_STATE() << ";\n";
515 STATIC_VAR( "int", ERROR() ) << " = " << ERROR_STATE() << ";\n";
519 if ( entryPointNames.length() > 0 ) {
520 for ( EntryNameVect::Iter en = entryPointNames; en.lte(); en++ ) {
521 STATIC_VAR( "int", DATA_PREFIX() + "en_" + *en ) <<
522 " = " << entryPointIds[en.pos()] << ";\n";
528 std::ostream &RubyCodeGen::START_ARRAY_LINE()
534 std::ostream &RubyCodeGen::ARRAY_ITEM( string item, int count, bool last )
540 if ( count % IALL == 0 )
549 std::ostream &RubyCodeGen::END_ARRAY_LINE()
555 /* Emit the offset of the start state as a decimal integer. */
556 string RubyCodeGen::START_STATE_ID()
559 ret << redFsm->startState->id;
563 string RubyCodeGen::ERROR_STATE()
566 if ( redFsm->errState != 0 )
567 ret << redFsm->errState->id;
573 string RubyCodeGen::FIRST_FINAL_STATE()
576 if ( redFsm->firstFinState != 0 )
577 ret << redFsm->firstFinState->id;
579 ret << redFsm->nextStateId;
583 string RubyCodeGen::ACCESS()
586 if ( accessExpr != 0 )
587 INLINE_LIST( ret, accessExpr, 0, false );
591 /* Write out an inline tree structure. Walks the list and possibly calls out
592 * to virtual functions than handle language specific items in the tree. */
593 void RubyCodeGen::INLINE_LIST( ostream &ret, GenInlineList *inlineList,
594 int targState, bool inFinish )
596 for ( GenInlineList::Iter item = *inlineList; item.lte(); item++ ) {
597 switch ( item->type ) {
598 case GenInlineItem::Text:
601 case GenInlineItem::Goto:
602 GOTO( ret, item->targState->id, inFinish );
604 case GenInlineItem::Call:
605 CALL( ret, item->targState->id, targState, inFinish );
607 case GenInlineItem::Next:
608 NEXT( ret, item->targState->id, inFinish );
610 case GenInlineItem::Ret:
611 RET( ret, inFinish );
613 case GenInlineItem::PChar:
616 case GenInlineItem::Char:
619 case GenInlineItem::Hold:
620 ret << P() << " = " << P() << " - 1;";
622 case GenInlineItem::Exec:
623 EXEC( ret, item, targState, inFinish );
625 case GenInlineItem::Curs:
628 case GenInlineItem::Targs:
629 ret << "(" << CS() << ")";
631 case GenInlineItem::Entry:
632 ret << item->targState->id;
634 case GenInlineItem::GotoExpr:
635 GOTO_EXPR( ret, item, inFinish );
637 case GenInlineItem::CallExpr:
638 CALL_EXPR( ret, item, targState, inFinish );
640 case GenInlineItem::NextExpr:
641 NEXT_EXPR( ret, item, inFinish );
643 case GenInlineItem::LmSwitch:
644 LM_SWITCH( ret, item, targState, inFinish );
646 case GenInlineItem::LmSetActId:
647 SET_ACT( ret, item );
649 case GenInlineItem::LmSetTokEnd:
650 SET_TOKEND( ret, item );
652 case GenInlineItem::LmGetTokEnd:
653 GET_TOKEND( ret, item );
655 case GenInlineItem::LmInitTokStart:
656 INIT_TOKSTART( ret, item );
658 case GenInlineItem::LmInitAct:
659 INIT_ACT( ret, item );
661 case GenInlineItem::LmSetTokStart:
662 SET_TOKSTART( ret, item );
664 case GenInlineItem::SubAction:
665 SUB_ACTION( ret, item, targState, inFinish );
667 case GenInlineItem::Break:
668 BREAK( ret, targState );
675 void RubyCodeGen::EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish )
677 /* The parser gives fexec two children. The double brackets are for D
678 * code. If the inline list is a single word it will get interpreted as a
679 * C-style cast by the D compiler. */
680 ret << " begin " << P() << " = ((";
681 INLINE_LIST( ret, item->children, targState, inFinish );
682 ret << "))-1; end\n";
685 void RubyCodeGen::LM_SWITCH( ostream &ret, GenInlineItem *item,
686 int targState, int inFinish )
689 " case " << ACT() << "\n";
691 for ( GenInlineList::Iter lma = *item->children; lma.lte(); lma++ ) {
692 /* Write the case label, the action and the case break. */
696 ret << " when " << lma->lmId << " then\n";
699 /* Write the block and close it off. */
701 INLINE_LIST( ret, lma->children, targState, inFinish );
708 void RubyCodeGen::SET_ACT( ostream &ret, GenInlineItem *item )
710 ret << ACT() << " = " << item->lmId << ";";
713 void RubyCodeGen::INIT_TOKSTART( ostream &ret, GenInlineItem *item )
715 ret << TOKSTART() << " = " << NULL_ITEM() << ";";
718 void RubyCodeGen::INIT_ACT( ostream &ret, GenInlineItem *item )
720 ret << ACT() << " = 0\n";
723 void RubyCodeGen::SET_TOKSTART( ostream &ret, GenInlineItem *item )
725 ret << TOKSTART() << " = " << P() << "\n";
728 void RubyCodeGen::SET_TOKEND( ostream &ret, GenInlineItem *item )
730 /* The tokend action sets tokend. */
731 ret << TOKEND() << " = " << P();
732 if ( item->offset != 0 )
733 out << "+" << item->offset;
737 void RubyCodeGen::GET_TOKEND( ostream &ret, GenInlineItem *item )
742 void RubyCodeGen::SUB_ACTION( ostream &ret, GenInlineItem *item,
743 int targState, bool inFinish )
745 if ( item->children->length() > 0 ) {
746 /* Write the block and close it off. */
748 INLINE_LIST( ret, item->children, targState, inFinish );
753 int RubyCodeGen::TRANS_ACTION( RedTransAp *trans )
755 /* If there are actions, emit them. Otherwise emit zero. */
757 if ( trans->action != 0 )
758 act = trans->action->location+1;
762 ostream &RubyCodeGen::source_warning( const InputLoc &loc )
764 cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": warning: ";
768 ostream &RubyCodeGen::source_error( const InputLoc &loc )
771 assert( sourceFileName != 0 );
772 cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": ";
776 void RubyCodeGen::finishRagelDef()
779 if ( codeStyle == GenGoto || codeStyle == GenFGoto ||
780 codeStyle == GenIpGoto || codeStyle == GenSplit )
782 /* For directly executable machines there is no required state
783 * ordering. Choose a depth-first ordering to increase the
784 * potential for fall-throughs. */
785 redFsm->depthFirstOrdering();
788 /* The frontend will do this for us, but it may be a good idea to
789 * force it if the intermediate file is edited. */
790 redFsm->sortByStateId();
793 /* Choose default transitions and the single transition. */
794 redFsm->chooseDefaultSpan();
796 /* Maybe do flat expand, otherwise choose single. */
797 if ( codeStyle == GenFlat || codeStyle == GenFFlat )
800 redFsm->chooseSingle();
802 /* If any errors have occured in the input file then don't write anything. */
803 if ( gblErrorCount > 0 )
806 if ( codeStyle == GenSplit )
807 redFsm->partitionFsm( numSplitPartitions );
809 if ( codeStyle == GenIpGoto || codeStyle == GenSplit )
810 redFsm->setInTrans();
812 /* Anlayze Machine will find the final action reference counts, among
813 * other things. We will use these in reporting the usage
814 * of fsm directives in action code. */
817 /* Determine if we should use indicies. */
822 /* Determine if we should use indicies or not. */
823 void RubyCodeGen::calcIndexSize()
825 int sizeWithInds = 0, sizeWithoutInds = 0;
827 /* Calculate cost of using with indicies. */
828 for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
829 int totalIndex = st->outSingle.length() + st->outRange.length() +
830 (st->defTrans == 0 ? 0 : 1);
831 sizeWithInds += arrayTypeSize(redFsm->maxIndex) * totalIndex;
833 sizeWithInds += arrayTypeSize(redFsm->maxState) * redFsm->transSet.length();
834 if ( redFsm->anyActions() )
835 sizeWithInds += arrayTypeSize(redFsm->maxActionLoc) * redFsm->transSet.length();
837 /* Calculate the cost of not using indicies. */
838 for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
839 int totalIndex = st->outSingle.length() + st->outRange.length() +
840 (st->defTrans == 0 ? 0 : 1);
841 sizeWithoutInds += arrayTypeSize(redFsm->maxState) * totalIndex;
842 if ( redFsm->anyActions() )
843 sizeWithoutInds += arrayTypeSize(redFsm->maxActionLoc) * totalIndex;
846 /* If using indicies reduces the size, use them. */
847 useIndicies = sizeWithInds < sizeWithoutInds;
850 unsigned int RubyCodeGen::arrayTypeSize( unsigned long maxVal )
852 long long maxValLL = (long long) maxVal;
853 HostType *arrayType = keyOps->typeSubsumes( maxValLL );
854 assert( arrayType != 0 );
855 return arrayType->size;
859 void RubyCodeGen::writeInit()
863 out << " " << P() << " ||= 0\n";
866 out << " " << PE() << " ||= " << DATA() << ".length\n";
869 out << " " << CS() << " = " << START() << "\n";
871 /* If there are any calls, then the stack top needs initialization. */
872 if ( redFsm->anyActionCalls() || redFsm->anyActionRets() )
873 out << " " << TOP() << " = 0\n";
875 if ( hasLongestMatch ) {
877 " " << TOKSTART() << " = " << NULL_ITEM() << "\n"
878 " " << TOKEND() << " = " << NULL_ITEM() << "\n"
879 " " << ACT() << " = 0\n";
885 void RubyCodeGen::writeExports()
887 if ( exportList.length() > 0 ) {
888 for ( ExportList::Iter ex = exportList; ex.lte(); ex++ ) {
889 STATIC_VAR( ALPH_TYPE(), DATA_PREFIX() + "ex_" + ex->name )
890 << " = " << KEY(ex->key) << "\n";
896 void RubyCodeGen::writeStart()
898 out << START_STATE_ID();
901 void RubyCodeGen::writeFirstFinal()
903 out << FIRST_FINAL_STATE();
906 void RubyCodeGen::writeError()
908 out << ERROR_STATE();
915 * indent-tabs-mode: 1
916 * c-file-style: "bsd"