2 * Copyright 2006-2007 Adrian Thurston <thurston@cs.queensu.ca>
3 * 2007 Victor Hugo Borja <vhborja@gmail.com>
6 /* This file is part of Ragel.
8 * Ragel is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * Ragel is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with Ragel; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include "rlgen-ruby.h"
24 #include "rubycodegen.h"
30 /* Integer array line length. */
34 using std::ostringstream;
39 void genLineDirective( ostream &out )
41 std::streambuf *sbuf = out.rdbuf();
42 output_filter *filter = static_cast<output_filter*>(sbuf);
43 lineDirective( out, filter->fileName, filter->line + 1 );
46 void RubyCodeGen::GOTO( ostream &out, int gotoDest, bool inFinish )
48 out << INDENT_U() << "begin"
49 << INDENT_S() << CS() << " = " << gotoDest
50 << INDENT_S() << "_again.call " << CTRL_FLOW()
51 << INDENT_D() << "end";
54 void RubyCodeGen::GOTO_EXPR( ostream &out, InlineItem *ilItem, bool inFinish )
56 out << INDENT_U() << "begin"
57 << INDENT_S() << CS() << " = (";
58 INLINE_LIST( out, ilItem->children, 0, inFinish );
60 << INDENT_S() << "_again.call " << CTRL_FLOW()
61 << INDENT_D() << "end";
64 void RubyCodeGen::CALL( ostream &out, int callDest, int targState, bool inFinish )
66 out << INDENT_U() << "begin"
67 << INDENT_S() << TOP() << "+= 1"
68 << INDENT_S() << STACK() << "[" << TOP() << "-1] = " << CS()
69 << INDENT_S() << CS() << " = " << callDest
70 << INDENT_S() << "_again.call " << CTRL_FLOW()
71 << INDENT_D() << "end";
74 void RubyCodeGen::CALL_EXPR(ostream &out, InlineItem *ilItem, int targState, bool inFinish )
76 out << INDENT_U() << "begin"
77 << INDENT_S() << TOP() << " += 1"
78 << INDENT_S() << STACK() << "[" << TOP() << "-1] = " << CS()
79 << INDENT_S() << CS() << " = (";
80 INLINE_LIST( out, ilItem->children, targState, inFinish );
82 << INDENT_S() << "_again.call " << CTRL_FLOW()
83 << INDENT_D() << "end";
86 void RubyCodeGen::RET( ostream &out, bool inFinish )
88 out << INDENT_U() << "begin"
89 << INDENT_S() << TOP() << " -= 1"
90 << INDENT_S() << CS() << " = " << STACK() << "[" << TOP() << "+1]"
91 << INDENT_S() << "_again.call " << CTRL_FLOW()
92 << INDENT_D() << "end";
95 void RubyCodeGen::BREAK( ostream &out, int targState )
97 out << "_resume.call " << CTRL_FLOW();
100 void RubyCodeGen::COND_TRANSLATE()
102 out << INDENT_S() << "_widec = " << GET_KEY()
103 << INDENT_S() << "_keys = " << CO() << "[" << CS() << "]*2"
104 << INDENT_S() << "_klen = " << CL() << "[" << CS() << "]"
105 << INDENT_U() << "if _klen > 0"
106 << INDENT_S() << "_lower = _keys"
107 << INDENT_S() << "_upper = _keys + (_klen<<1) - 2"
108 << INDENT_U() << "loop do"
109 << INDENT_S() << "break if _upper < _lower"
110 << INDENT_S() << "_mid = _lower + (((_upper-_lower) >> 1) & ~1)"
111 << INDENT_U() << "if " << GET_WIDE_KEY() << " < " << CK() << "[_mid]"
112 << INDENT_O() << "_upper = _mid - 2"
113 << INDENT_U() << "elsif " << GET_WIDE_KEY() << " > " << CK() << "[_mid]"
114 << INDENT_O() << "_lower = _mid + 2"
115 << INDENT_U() << "else"
116 << INDENT_U() << "case " << C() << "[" << CO() << "[" << CS() << "]"
117 << " + ((_mid - _keys)>>1)]"
120 for ( CondSpaceList::Iter csi = condSpaceList; csi.lte(); csi++ ) {
121 CondSpace *condSpace = csi;
122 out << INDENT_U() << "when " << condSpace->condSpaceId << ":" ;
123 out << INDENT_S() << "_widec = " << KEY(condSpace->baseKey)
124 << "+ (" << GET_KEY() << " - " << KEY(keyOps->minKey) << ")" ;
126 for ( CondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) {
127 Size condValOffset = ((1 << csi.pos()) * keyOps->alphSize());
128 out << INDENT_S() << "_widec += " << condValOffset << " if ( ";
129 CONDITION( out, *csi );
134 out << INDENT_D() << "end # case"
135 << INDENT_D() << "end"
136 << INDENT_D() << "end # loop"
137 << INDENT_D() << "end"
141 void RubyCodeGen::LOCATE_TRANS()
143 out << INDENT_S() << "_keys = " << KO() << "[" << CS() << "]"
144 << INDENT_S() << "_trans = " << IO() << "[" << CS() << "]"
145 << INDENT_S() << "_klen = " << SL() << "[" << CS() << "]"
147 << INDENT_U() << "callcc do |_match|"
148 << INDENT_U() << "if _klen > 0"
149 << INDENT_S() << "_lower = _keys"
150 << INDENT_S() << "_upper = _keys + _klen - 1"
152 << INDENT_U() << "loop do"
153 << INDENT_S() << "break if _upper < _lower"
154 << INDENT_S() << "_mid = _lower + ( (_upper - _lower) >> 1 )"
156 << INDENT_U() << "if " << GET_WIDE_KEY() << " < " << K() << "[_mid]"
157 << INDENT_O() << "_upper = _mid - 1"
158 << INDENT_U() << "elsif " << GET_WIDE_KEY() << " > " << K() << "[_mid]"
159 << INDENT_O() << "_lower = _mid + 1"
160 << INDENT_U() << "else"
161 << INDENT_S() << "_trans += (_mid - _keys)"
162 << INDENT_S() << "_match.call"
163 << INDENT_D() << "end"
164 << INDENT_D() << "end # loop"
165 << INDENT_S() << "_keys += _klen"
166 << INDENT_S() << "_trans += _klen"
167 << INDENT_D() << "end"
169 << INDENT_S() << "_klen = " << RL() << "[" << CS() << "]"
170 << INDENT_U() << "if _klen > 0"
171 << INDENT_S() << "_lower = _keys"
172 << INDENT_S() << "_upper = _keys + (_klen << 1) - 2"
173 << INDENT_U() << "loop do"
174 << INDENT_S() << "break if _upper < _lower"
175 << INDENT_S() << "_mid = _lower + (((_upper-_lower) >> 1) & ~1)"
176 << INDENT_U() << "if " << GET_WIDE_KEY() << " < " << K() << "[_mid]"
177 << INDENT_O() << "_upper = _mid - 2"
178 << INDENT_U() << "elsif " << GET_WIDE_KEY() << " > " << K() << "[_mid]"
179 << INDENT_O() << "_lower = _mid + 2"
180 << INDENT_U() << "else"
181 << INDENT_S() << "_trans += ((_mid - _keys) >> 1)"
182 << INDENT_S() << "_match.call"
183 << INDENT_D() << "end"
184 << INDENT_D() << "end # loop"
185 << INDENT_S() << "_trans += _klen"
186 << INDENT_D() << "end"
187 << INDENT_D() << "end # cc _match" ;
190 void RubyCodeGen::writeOutExec()
192 out << INDENT_U() << "callcc do |_out|"
193 << INDENT_S() << "_klen, _trans, _keys";
195 if ( redFsm->anyRegCurStateRef() )
197 if ( redFsm->anyConditions() )
199 if ( redFsm->anyToStateActions() || redFsm->anyRegActions()
200 || redFsm->anyFromStateActions() )
201 out << ", _acts, _nacts";
206 out << INDENT_S() << "_out.call if " << P() << " == " << PE() ;
208 out << INDENT_S() << "_resume = nil"
209 << INDENT_S() << "callcc { |_cc| _resume = _cc }" ;
211 if ( redFsm->errState != 0)
212 out << INDENT_S() << "_out.call if " << CS() << " == " << redFsm->errState->id ;
214 if ( redFsm->anyRegActions() || redFsm->anyActionGotos() ||
215 redFsm->anyActionCalls() || redFsm->anyActionRets() )
216 out << INDENT_U() << "callcc do |_again|" ;
218 if ( redFsm->anyFromStateActions() ) {
219 out << INDENT_S() << "_acts = " << FSA() << "[" << CS() << "]"
220 << INDENT_S() << "_nacts = " << A() << "[acts]"
221 << INDENT_S() << "_acts += 1"
222 << INDENT_U() << "while _nacts > 0"
223 << INDENT_S() << "_nacts -= 1"
224 << INDENT_S() << " _acts += 1"
225 << INDENT_U() << "case " << A() << "[_acts - 1]" ;
226 FROM_STATE_ACTION_SWITCH();
228 << INDENT_D() << "end # from state action switch"
229 << INDENT_D() << "end"
233 if ( redFsm->anyConditions() )
238 if ( redFsm->anyRegCurStateRef() )
239 out << INDENT_S() << "_ps = " << CS() ;
242 out << INDENT_S() << "_trans = " << I() << "[_trans]" ;
244 out << INDENT_S() << CS() << " = " << TT() << "[_trans]" ;
246 if ( redFsm->anyRegActions() ) {
247 out << INDENT_S() << "_again.call if " << TA() << "[_trans] == 0"
249 << INDENT_S() << "_acts = " << TA() << "[_trans]"
250 << INDENT_S() << "_nacts = " << A() << "[_acts]"
251 << INDENT_S() << "_acts += 1"
252 << INDENT_U() << "while _nacts > 0"
253 << INDENT_S() << "_nacts -= 1"
254 << INDENT_S() << "_acts += 1"
255 << INDENT_U() << "case " << A() << "[_acts - 1]" ;
258 << INDENT_D() << "end # action switch"
259 << INDENT_D() << "end"
263 if ( redFsm->anyRegActions() || redFsm->anyActionGotos() ||
264 redFsm->anyActionCalls() || redFsm->anyActionRets() )
265 out << INDENT_D() << "end # cc _again";
267 if ( redFsm->anyToStateActions() ) {
268 out << INDENT_S() << "_acts = " << TSA() << "[" << CS() << "]"
269 << INDENT_S() << "_nacts = " << A() << "[_acts]"
270 << INDENT_S() << "_acts += 1"
271 << INDENT_U() << "while _nacts > 0"
272 << INDENT_S() << "_nacts -= 1"
273 << INDENT_S() << "_acts += 1"
274 << INDENT_U() << "case " << A() << "[acts - 1]" ;
275 TO_STATE_ACTION_SWITCH();
277 << INDENT_D() << "end # to state action switch"
278 << INDENT_D() << "end"
282 out << INDENT_S() << P() << " += 1" ;
285 out << INDENT_S() << "_resume.call if p != pe";
287 out << INDENT_D() << "end # cc _out" ;
290 void RubyCodeGen::writeOutEOF()
292 if ( redFsm->anyEofActions() ) {
293 out << INDENT_S() << "_acts = " << EA() << "[" << CS() << "]"
294 << INDENT_S() << "_nacts = " << " " << A() << "[_acts]"
295 << INDENT_S() << "_acts += 1"
296 << INDENT_U() << "while _nacts > 0"
297 << INDENT_S() << "_nacts -= 1"
298 << INDENT_S() << "_acts += 1"
299 << INDENT_S() << "case " << A() << "[_acts - 1]" ;
302 << INDENT_D() << "end # eof action switch"
303 << INDENT_D() << "end"
308 std::ostream &RubyCodeGen::FROM_STATE_ACTION_SWITCH()
310 /* Walk the list of functions, printing the cases. */
311 for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
312 /* Write out referenced actions. */
313 if ( act->numFromStateRefs > 0 ) {
314 /* Write the case label, the action */
315 out << INDENT_S() << "when " << act->actionId << ":" ;
316 ACTION( out, act, 0, false );
320 genLineDirective( out );
325 std::ostream &RubyCodeGen::TO_STATE_ACTION_SWITCH()
327 /* Walk the list of functions, printing the cases. */
328 for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
329 /* Write out referenced actions. */
330 if ( act->numToStateRefs > 0 ) {
331 /* Write the case label, the action and the case break. */
332 out << INDENT_S() << "when " << act->actionId << ":" ;
333 ACTION( out, act, 0, false );
337 genLineDirective( out );
341 std::ostream &RubyCodeGen::EOF_ACTION_SWITCH()
343 /* Walk the list of functions, printing the cases. */
344 for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
345 /* Write out referenced actions. */
346 if ( act->numEofRefs > 0 ) {
347 /* Write the case label, the action and the case break. */
348 out << INDENT_S() << "when " << act->actionId << ":" ;
349 ACTION( out, act, 0, true );
353 genLineDirective( out );
357 std::ostream &RubyCodeGen::ACTION_SWITCH()
359 /* Walk the list of functions, printing the cases. */
360 for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
361 /* Write out referenced actions. */
362 if ( act->numTransRefs > 0 ) {
363 /* Write the case label, the action and the case break. */
364 out << INDENT_S() << "when " << act->actionId << ":" ;
365 ACTION( out, act, 0, false );
369 genLineDirective( out );
374 void RubyCodeGen::writeOutInit()
376 out << INDENT_U() << "begin"
377 << INDENT_S() << CS() << " = " << START();
379 /* If there are any calls, then the stack top needs initialization. */
380 if ( redFsm->anyActionCalls() || redFsm->anyActionRets() )
381 out << INDENT_S() << TOP() << " = 0";
383 if ( hasLongestMatch ) {
384 out << INDENT_S() << TOKSTART() << " = " << NULL_ITEM()
385 << INDENT_S() << TOKEND() << " = " << NULL_ITEM()
386 << INDENT_S() << ACT() << " = 0"
389 out << INDENT_D() << "end";
392 string RubyCodeGen::PTR_CONST()
394 /* Not used in Ruby code. */
399 std::ostream &RubyCodeGen::OPEN_ARRAY( string type, string name )
401 out << "class << self" << endl
402 << INDENT(1) << "attr_accessor :" << name << endl
403 << INDENT(1) << "private :" << name << ", :" << name << "=" << endl
405 << "self." << name << " = [" << endl;
409 std::ostream &RubyCodeGen::CLOSE_ARRAY()
411 return out << "]" << endl;
414 std::ostream &RubyCodeGen::STATIC_VAR( string type, string name )
416 out << "class << self" << endl
417 << INDENT(1) << "attr_accessor :" << name << endl
423 string RubyCodeGen::UINT( )
430 string RubyCodeGen::ARR_OFF( string ptr, string offset )
432 return ptr + " + " + offset;
435 string RubyCodeGen::CAST( string type )
437 /* No casts on ruby */
442 string RubyCodeGen::NULL_ITEM()
447 string RubyCodeGen::POINTER()
454 std::ostream &RubyCodeGen::SWITCH_DEFAULT()
459 string RubyCodeGen::GET_KEY()
462 if ( getKeyExpr != 0 ) {
463 /* Emit the user supplied method of retrieving the key. */
465 INLINE_LIST( ret, getKeyExpr, 0, false );
469 /* Expression for retrieving the key, use simple dereference. */
470 ret << "data[" << P() << "]";
475 string RubyCodeGen::CTRL_FLOW()
480 void RubyCodeGen::ACTION( ostream &ret, Action *action, int targState, bool inFinish )
482 /* Write the preprocessor line info for going into the source file. */
483 lineDirective( ret, sourceFileName, action->loc.line );
485 /* Write the block and close it off. */
486 ret << "begin" << endl << INDENT(1);
487 INLINE_LIST( ret, action->inlineList, targState, inFinish );
489 lineDirective( ret, sourceFileName, action->loc.line );
493 string RubyCodeGen::INDENT(int level)
495 string result = "\n";
496 while ( level-- > 0 )
497 result += " "; /* The convention in ruby is 2 spaces per level */
500 inline string RubyCodeGen::INDENT_S() { return INDENT(indent_level); }
501 inline string RubyCodeGen::INDENT_U() { return INDENT(++indent_level); }
502 inline string RubyCodeGen::INDENT_D() { return INDENT(--indent_level); }
503 inline string RubyCodeGen::INDENT_O() { return INDENT(indent_level--); }
506 void RubyCodeGen::NEXT( ostream &ret, int nextDest, bool inFinish )
508 ret << CS() << " = " << nextDest << ";";
511 void RubyCodeGen::NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish )
513 ret << CS() << " = (";
514 INLINE_LIST( ret, ilItem->children, 0, inFinish );
518 void RubyCodeGen::EXEC( ostream &ret, InlineItem *item, int targState, int inFinish )
520 /* The parser gives fexec two children. The double brackets are for D
521 * code. If the inline list is a single word it will get interpreted as a
522 * C-style cast by the D compiler. */
523 ret << "{" << P() << " = ((";
524 INLINE_LIST( ret, item->children, targState, inFinish );
528 void RubyCodeGen::EXECTE( ostream &ret, InlineItem *item, int targState, int inFinish )
530 /* Tokend version of exec. */
532 /* The parser gives fexec two children. The double brackets are for D
533 * code. If the inline list is a single word it will get interpreted as a
534 * C-style cast by the D compiler. */
535 ret << "{" << TOKEND() << " = ((";
536 INLINE_LIST( ret, item->children, targState, inFinish );
540 /* Write out an inline tree structure. Walks the list and possibly calls out
541 * to virtual functions than handle language specific items in the tree. */
542 void RubyCodeGen::INLINE_LIST( ostream &ret, InlineList *inlineList,
543 int targState, bool inFinish )
545 for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
546 switch ( item->type ) {
547 case InlineItem::Text:
550 case InlineItem::Goto:
551 GOTO( ret, item->targState->id, inFinish );
553 case InlineItem::Call:
554 CALL( ret, item->targState->id, targState, inFinish );
556 case InlineItem::Next:
557 NEXT( ret, item->targState->id, inFinish );
559 case InlineItem::Ret:
560 RET( ret, inFinish );
562 case InlineItem::PChar:
565 case InlineItem::Char:
568 case InlineItem::Hold:
571 case InlineItem::Exec:
572 EXEC( ret, item, targState, inFinish );
574 case InlineItem::HoldTE:
575 ret << TOKEND() << "--;";
577 case InlineItem::ExecTE:
578 EXECTE( ret, item, targState, inFinish );
580 case InlineItem::Curs:
583 case InlineItem::Targs:
584 ret << "(" << CS() << ")";
586 case InlineItem::Entry:
587 ret << item->targState->id;
589 case InlineItem::GotoExpr:
590 GOTO_EXPR( ret, item, inFinish );
592 case InlineItem::CallExpr:
593 CALL_EXPR( ret, item, targState, inFinish );
595 case InlineItem::NextExpr:
596 NEXT_EXPR( ret, item, inFinish );
598 case InlineItem::LmSwitch:
599 LM_SWITCH( ret, item, targState, inFinish );
601 case InlineItem::LmSetActId:
602 SET_ACT( ret, item );
604 case InlineItem::LmSetTokEnd:
605 SET_TOKEND( ret, item );
607 case InlineItem::LmGetTokEnd:
608 GET_TOKEND( ret, item );
610 case InlineItem::LmInitTokStart:
611 INIT_TOKSTART( ret, item );
613 case InlineItem::LmInitAct:
614 INIT_ACT( ret, item );
616 case InlineItem::LmSetTokStart:
617 SET_TOKSTART( ret, item );
619 case InlineItem::SubAction:
620 SUB_ACTION( ret, item, targState, inFinish );
622 case InlineItem::Break:
623 BREAK( ret, targState );
629 string RubyCodeGen::DATA_PREFIX()
632 return FSM_NAME() + "_";
636 /* Emit the alphabet data type. */
637 string RubyCodeGen::ALPH_TYPE()
639 string ret = keyOps->alphType->data1;
640 if ( keyOps->alphType->data2 != 0 ) {
642 ret += + keyOps->alphType->data2;
647 /* Emit the alphabet data type. */
648 string RubyCodeGen::WIDE_ALPH_TYPE()
651 if ( redFsm->maxKey <= keyOps->maxKey )
654 long long maxKeyVal = redFsm->maxKey.getLongLong();
655 HostType *wideType = keyOps->typeSubsumes( keyOps->isSigned, maxKeyVal );
656 assert( wideType != 0 );
658 ret = wideType->data1;
659 if ( wideType->data2 != 0 ) {
661 ret += wideType->data2;
667 /* Determine if we should use indicies or not. */
668 void RubyCodeGen::calcIndexSize()
670 int sizeWithInds = 0, sizeWithoutInds = 0;
672 /* Calculate cost of using with indicies. */
673 for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
674 int totalIndex = st->outSingle.length() + st->outRange.length() +
675 (st->defTrans == 0 ? 0 : 1);
676 sizeWithInds += arrayTypeSize(redFsm->maxIndex) * totalIndex;
678 sizeWithInds += arrayTypeSize(redFsm->maxState) * redFsm->transSet.length();
679 if ( redFsm->anyActions() )
680 sizeWithInds += arrayTypeSize(redFsm->maxActionLoc) * redFsm->transSet.length();
682 /* Calculate the cost of not using indicies. */
683 for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
684 int totalIndex = st->outSingle.length() + st->outRange.length() +
685 (st->defTrans == 0 ? 0 : 1);
686 sizeWithoutInds += arrayTypeSize(redFsm->maxState) * totalIndex;
687 if ( redFsm->anyActions() )
688 sizeWithoutInds += arrayTypeSize(redFsm->maxActionLoc) * totalIndex;
691 /* If using indicies reduces the size, use them. */
692 useIndicies = sizeWithInds < sizeWithoutInds;
695 int RubyCodeGen::TO_STATE_ACTION( RedStateAp *state )
698 if ( state->toStateAction != 0 )
699 act = state->toStateAction->location+1;
703 int RubyCodeGen::FROM_STATE_ACTION( RedStateAp *state )
706 if ( state->fromStateAction != 0 )
707 act = state->fromStateAction->location+1;
711 int RubyCodeGen::EOF_ACTION( RedStateAp *state )
714 if ( state->eofAction != 0 )
715 act = state->eofAction->location+1;
720 int RubyCodeGen::TRANS_ACTION( RedTransAp *trans )
722 /* If there are actions, emit them. Otherwise emit zero. */
724 if ( trans->action != 0 )
725 act = trans->action->location+1;
729 std::ostream &RubyCodeGen::COND_OFFSETS()
732 int totalStateNum = 0, curKeyOffset = 0;
733 for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
734 /* Write the key offset. */
735 ARRAY_ITEM( curKeyOffset, ++totalStateNum, st.last() );
737 /* Move the key offset ahead. */
738 curKeyOffset += st->stateCondList.length();
744 std::ostream &RubyCodeGen::KEY_OFFSETS()
747 int totalStateNum = 0, curKeyOffset = 0;
748 for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
749 /* Write the key offset. */
750 ARRAY_ITEM( curKeyOffset, ++totalStateNum, st.last() );
752 /* Move the key offset ahead. */
753 curKeyOffset += st->outSingle.length() + st->outRange.length()*2;
760 std::ostream &RubyCodeGen::INDEX_OFFSETS()
763 int totalStateNum = 0, curIndOffset = 0;
764 for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
765 /* Write the index offset. */
766 ARRAY_ITEM( curIndOffset, ++totalStateNum, st.last() );
768 /* Move the index offset ahead. */
769 curIndOffset += st->outSingle.length() + st->outRange.length();
770 if ( st->defTrans != 0 )
777 std::ostream &RubyCodeGen::COND_LENS()
780 int totalStateNum = 0;
781 for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
782 /* Write singles length. */
783 ARRAY_ITEM( st->stateCondList.length(), ++totalStateNum, st.last() );
790 std::ostream &RubyCodeGen::SINGLE_LENS()
793 int totalStateNum = 0;
794 for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
795 /* Write singles length. */
796 ARRAY_ITEM( st->outSingle.length(), ++totalStateNum, st.last() );
802 std::ostream &RubyCodeGen::RANGE_LENS()
805 int totalStateNum = 0;
806 for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
807 /* Emit length of range index. */
808 ARRAY_ITEM( st->outRange.length(), ++totalStateNum, st.last() );
814 std::ostream &RubyCodeGen::TO_STATE_ACTIONS()
817 int totalStateNum = 0;
818 for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
819 /* Write any eof action. */
820 ARRAY_ITEM( TO_STATE_ACTION(st), ++totalStateNum, st.last() );
826 std::ostream &RubyCodeGen::FROM_STATE_ACTIONS()
829 int totalStateNum = 0;
830 for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
831 /* Write any eof action. */
832 ARRAY_ITEM( FROM_STATE_ACTION(st), ++totalStateNum, st.last() );
838 std::ostream &RubyCodeGen::EOF_ACTIONS()
841 int totalStateNum = 0;
842 for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
843 /* Write any eof action. */
844 ARRAY_ITEM( EOF_ACTION(st), ++totalStateNum, st.last() );
850 std::ostream &RubyCodeGen::COND_KEYS()
854 for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
855 /* Loop the state's transitions. */
856 for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
858 ARRAY_ITEM( KEY( sc->lowKey ), ++totalTrans, false );
859 ARRAY_ITEM( KEY( sc->highKey ), ++totalTrans, false );
863 /* Output one last number so we don't have to figure out when the last
864 * entry is and avoid writing a comma. */
865 ARRAY_ITEM( 0, ++totalTrans, true );
870 std::ostream &RubyCodeGen::COND_SPACES()
874 for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
875 /* Loop the state's transitions. */
876 for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
878 ARRAY_ITEM( KEY( sc->condSpace->condSpaceId ), ++totalTrans, false );
882 /* Output one last number so we don't have to figure out when the last
883 * entry is and avoid writing a comma. */
884 ARRAY_ITEM( 0, ++totalTrans, true );
889 std::ostream &RubyCodeGen::KEYS()
893 for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
894 /* Loop the singles. */
895 for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) {
896 ARRAY_ITEM( KEY( stel->lowKey ), ++totalTrans, false );
899 /* Loop the state's transitions. */
900 for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
902 ARRAY_ITEM( KEY( rtel->lowKey ), ++totalTrans, false );
905 ARRAY_ITEM( KEY( rtel->highKey ), ++totalTrans, false );
909 /* Output one last number so we don't have to figure out when the last
910 * entry is and avoid writing a comma. */
911 ARRAY_ITEM( 0, ++totalTrans, true );
916 std::ostream &RubyCodeGen::INDICIES()
920 for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
921 /* Walk the singles. */
922 for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) {
923 ARRAY_ITEM( KEY( stel->value->id ), ++totalTrans, false );
926 /* Walk the ranges. */
927 for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
928 ARRAY_ITEM( KEY( rtel->value->id ), ++totalTrans, false );
931 /* The state's default index goes next. */
932 if ( st->defTrans != 0 ) {
933 ARRAY_ITEM( KEY( st->defTrans->id ), ++totalTrans, false );
937 /* Output one last number so we don't have to figure out when the last
938 * entry is and avoid writing a comma. */
939 ARRAY_ITEM( 0, ++totalTrans, true );
944 std::ostream &RubyCodeGen::TRANS_TARGS()
948 for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
949 /* Walk the singles. */
950 for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) {
951 RedTransAp *trans = stel->value;
952 ARRAY_ITEM( KEY( trans->targ->id ), ++totalTrans, false );
955 /* Walk the ranges. */
956 for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
957 RedTransAp *trans = rtel->value;
958 ARRAY_ITEM( KEY( trans->targ->id ), ++totalTrans, false );
961 /* The state's default target state. */
962 if ( st->defTrans != 0 ) {
963 RedTransAp *trans = st->defTrans;
964 ARRAY_ITEM( KEY( trans->targ->id ), ++totalTrans, false );
968 /* Output one last number so we don't have to figure out when the last
969 * entry is and avoid writing a comma. */
970 ARRAY_ITEM( 0, ++totalTrans, true );
976 std::ostream &RubyCodeGen::TRANS_ACTIONS()
980 for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
981 /* Walk the singles. */
982 for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) {
983 RedTransAp *trans = stel->value;
984 ARRAY_ITEM( TRANS_ACTION( trans ), ++totalTrans, false );
987 /* Walk the ranges. */
988 for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
989 RedTransAp *trans = rtel->value;
990 ARRAY_ITEM( TRANS_ACTION( trans ), ++totalTrans, false );
993 /* The state's default index goes next. */
994 if ( st->defTrans != 0 ) {
995 RedTransAp *trans = st->defTrans;
996 ARRAY_ITEM( TRANS_ACTION( trans ), ++totalTrans, false );
1000 /* Output one last number so we don't have to figure out when the last
1001 * entry is and avoid writing a comma. */
1002 ARRAY_ITEM( 0, ++totalTrans, true );
1007 std::ostream &RubyCodeGen::TRANS_TARGS_WI()
1009 /* Transitions must be written ordered by their id. */
1010 RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()];
1011 for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ )
1012 transPtrs[trans->id] = trans;
1014 /* Keep a count of the num of items in the array written. */
1016 int totalStates = 0;
1017 for ( int t = 0; t < redFsm->transSet.length(); t++ ) {
1018 /* Write out the target state. */
1019 RedTransAp *trans = transPtrs[t];
1020 ARRAY_ITEM( trans->targ->id, ++totalStates, ( t >= redFsm->transSet.length()-1 ) );
1028 std::ostream &RubyCodeGen::TRANS_ACTIONS_WI()
1030 /* Transitions must be written ordered by their id. */
1031 RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()];
1032 for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ )
1033 transPtrs[trans->id] = trans;
1035 /* Keep a count of the num of items in the array written. */
1038 for ( int t = 0; t < redFsm->transSet.length(); t++ ) {
1039 /* Write the function for the transition. */
1040 RedTransAp *trans = transPtrs[t];
1041 ARRAY_ITEM( TRANS_ACTION( trans ), ++totalAct, ( t >= redFsm->transSet.length()-1 ) );
1049 void RubyCodeGen::writeOutData()
1052 " private static byte[] unpack_byte(String packed)\n"
1054 " byte[] ret = new byte[packed.length()];\n"
1055 " for (int i = 0; i < packed.length(); i++)\n"
1057 " int value = packed.charAt(i);\n"
1059 " ret[i] = (byte) value;\n"
1063 " private static short[] unpack_short(String packed)\n"
1065 " short[] ret = new short[packed.length()];\n"
1066 " for (int i = 0; i < packed.length(); i++)\n"
1068 " int value = packed.charAt(i);\n"
1070 " ret[i] = (short) value;\n"
1075 /* If there are any transtion functions then output the array. If there
1076 * are none, don't bother emitting an empty array that won't be used. */
1077 if ( redFsm->anyActions() ) {
1078 OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActArrItem), A() );
1084 if ( redFsm->anyConditions() ) {
1085 OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondOffset), CO() );
1090 OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondLen), CL() );
1095 OPEN_ARRAY( WIDE_ALPH_TYPE(), CK() );
1100 OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondSpaceId), C() );
1106 OPEN_ARRAY( ARRAY_TYPE(redFsm->maxKeyOffset), KO() );
1111 OPEN_ARRAY( WIDE_ALPH_TYPE(), K() );
1116 OPEN_ARRAY( ARRAY_TYPE(redFsm->maxSingleLen), SL() );
1121 OPEN_ARRAY( ARRAY_TYPE(redFsm->maxRangeLen), RL() );
1126 OPEN_ARRAY( ARRAY_TYPE(redFsm->maxIndexOffset), IO() );
1131 if ( useIndicies ) {
1132 OPEN_ARRAY( ARRAY_TYPE(redFsm->maxIndex), I() );
1137 OPEN_ARRAY( ARRAY_TYPE(redFsm->maxState), TT() );
1142 if ( redFsm->anyActions() ) {
1143 OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), TA() );
1150 OPEN_ARRAY( ARRAY_TYPE(redFsm->maxState), TT() );
1155 if ( redFsm->anyActions() ) {
1156 OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), TA() );
1163 if ( redFsm->anyToStateActions() ) {
1164 OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), TSA() );
1170 if ( redFsm->anyFromStateActions() ) {
1171 OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), FSA() );
1172 FROM_STATE_ACTIONS();
1177 if ( redFsm->anyEofActions() ) {
1178 OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), EA() );
1184 STATIC_VAR( "int", START() ) << " = " << START_STATE_ID() << ";\n"
1187 if ( writeFirstFinal ) {
1188 STATIC_VAR( "int" , FIRST_FINAL() ) << " = " << FIRST_FINAL_STATE() << ";\n"
1193 STATIC_VAR( "int", ERROR() ) << " = " << ERROR_STATE() << ";\n"
1199 std::ostream &RubyCodeGen::START_ARRAY_LINE()
1205 std::ostream &RubyCodeGen::ARRAY_ITEM( int item, int count, bool last )
1207 // 0 codes in 2 bytes in the Java class file and is common,
1208 // so we increment all values by one when packing
1211 std::ios_base::fmtflags originalFlags=out.flags();
1214 out << std::oct << "\\" << item;
1218 out << std::hex << "\\u" << std::setfill('0') << std::setw(4) << item;
1220 out.flags(originalFlags);
1224 if ( count % IALL == 0 )
1233 std::ostream &RubyCodeGen::END_ARRAY_LINE()
1240 unsigned int RubyCodeGen::arrayTypeSize( unsigned long maxVal )
1242 long long maxValLL = (long long) maxVal;
1243 HostType *arrayType = keyOps->typeSubsumes( maxValLL );
1244 assert( arrayType != 0 );
1245 return arrayType->size;
1248 string RubyCodeGen::ARRAY_TYPE( unsigned long maxVal )
1250 long long maxValLL = (long long) maxVal;
1251 HostType *arrayType = keyOps->typeSubsumes( maxValLL );
1252 assert( arrayType != 0 );
1254 string ret = arrayType->data1;
1255 if ( arrayType->data2 != 0 ) {
1257 ret += arrayType->data2;
1263 /* Write out the fsm name. */
1264 string RubyCodeGen::FSM_NAME()
1269 /* Emit the offset of the start state as a decimal integer. */
1270 string RubyCodeGen::START_STATE_ID()
1273 ret << redFsm->startState->id;
1277 /* Write out the array of actions. */
1278 std::ostream &RubyCodeGen::ACTIONS_ARRAY()
1281 int totalActions = 0;
1282 ARRAY_ITEM( 0, ++totalActions, false );
1283 for ( ActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) {
1284 /* Write out the length, which will never be the last character. */
1285 ARRAY_ITEM( act->key.length(), ++totalActions, false );
1287 for ( ActionTable::Iter item = act->key; item.lte(); item++ ) {
1288 ARRAY_ITEM( item->value->actionId, ++totalActions, (act.last() && item.last()) );
1296 string RubyCodeGen::CS()
1299 if ( curStateExpr != 0 ) {
1300 /* Emit the user supplied method of retrieving the key. */
1302 INLINE_LIST( ret, curStateExpr, 0, false );
1306 /* Expression for retrieving the key, use simple dereference. */
1307 ret << ACCESS() << "cs";
1312 string RubyCodeGen::ACCESS()
1315 if ( accessExpr != 0 )
1316 INLINE_LIST( ret, accessExpr, 0, false );
1320 string RubyCodeGen::GET_WIDE_KEY()
1322 if ( redFsm->anyConditions() )
1328 string RubyCodeGen::GET_WIDE_KEY( RedStateAp *state )
1330 if ( state->stateCondList.length() > 0 )
1336 /* Write out level number of tabs. Makes the nested binary search nice
1338 string RubyCodeGen::TABS( int level )
1341 while ( level-- > 0 )
1346 int RubyCodeGen::KEY( Key key )
1348 return key.getVal();
1352 void RubyCodeGen::LM_SWITCH( ostream &ret, InlineItem *item,
1353 int targState, int inFinish )
1356 " switch( act ) {\n";
1358 /* If the switch handles error then we also forced the error state. It
1360 if ( item->handlesError ) {
1361 ret << " case 0: " << TOKEND() << " = " << TOKSTART() << "; ";
1362 GOTO( ret, redFsm->errState->id, inFinish );
1366 for ( InlineList::Iter lma = *item->children; lma.lte(); lma++ ) {
1367 /* Write the case label, the action and the case break. */
1368 ret << " case " << lma->lmId << ":\n";
1370 /* Write the block and close it off. */
1372 INLINE_LIST( ret, lma->children, targState, inFinish );
1377 /* Default required for D code. */
1379 " default: break;\n"
1384 void RubyCodeGen::SET_ACT( ostream &ret, InlineItem *item )
1386 ret << ACT() << " = " << item->lmId << ";";
1389 void RubyCodeGen::SET_TOKEND( ostream &ret, InlineItem *item )
1391 /* The tokend action sets tokend. */
1392 ret << TOKEND() << " = " << P();
1393 if ( item->offset != 0 )
1394 out << "+" << item->offset;
1398 void RubyCodeGen::GET_TOKEND( ostream &ret, InlineItem *item )
1403 void RubyCodeGen::INIT_TOKSTART( ostream &ret, InlineItem *item )
1405 ret << TOKSTART() << " = " << NULL_ITEM() << ";";
1408 void RubyCodeGen::INIT_ACT( ostream &ret, InlineItem *item )
1410 ret << ACT() << " = 0;";
1413 void RubyCodeGen::SET_TOKSTART( ostream &ret, InlineItem *item )
1415 ret << TOKSTART() << " = " << P() << ";";
1418 void RubyCodeGen::SUB_ACTION( ostream &ret, InlineItem *item,
1419 int targState, bool inFinish )
1421 if ( item->children->length() > 0 ) {
1422 /* Write the block and close it off. */
1424 INLINE_LIST( ret, item->children, targState, inFinish );
1429 void RubyCodeGen::CONDITION( ostream &ret, Action *condition )
1432 lineDirective( ret, sourceFileName, condition->loc.line );
1433 INLINE_LIST( ret, condition->inlineList, 0, false );
1436 string RubyCodeGen::ERROR_STATE()
1439 if ( redFsm->errState != 0 )
1440 ret << redFsm->errState->id;
1446 string RubyCodeGen::FIRST_FINAL_STATE()
1449 if ( redFsm->firstFinState != 0 )
1450 ret << redFsm->firstFinState->id;
1452 ret << redFsm->nextStateId;
1456 /* Generate the code for an fsm. Assumes parseData is set up properly. Called
1457 * by parser code. */
1458 void RubyCodeGen::prepareMachine()
1460 if ( hasBeenPrepared )
1462 hasBeenPrepared = true;
1464 /* Do this before distributing transitions out to singles and defaults
1465 * makes life easier. */
1466 redFsm->maxKey = findMaxKey();
1468 redFsm->assignActionLocs();
1470 /* The frontend will do this for us, but it may be a good idea to force it
1471 * if the intermediate file is edited. */
1472 redFsm->sortByStateId();
1474 /* Find the first final state. This is the final state with the lowest
1476 redFsm->findFirstFinState();
1478 /* Choose default transitions and the single transition. */
1479 redFsm->chooseDefaultSpan();
1481 /* Maybe do flat expand, otherwise choose single. */
1482 redFsm->chooseSingle();
1484 /* If any errors have occured in the input file then don't write anything. */
1485 if ( gblErrorCount > 0 )
1488 /* Anlayze Machine will find the final action reference counts, among
1489 * other things. We will use these in reporting the usage
1490 * of fsm directives in action code. */
1493 /* Determine if we should use indicies. */
1497 void RubyCodeGen::finishRagelDef()
1502 void RubyCodeGen::writeStatement( InputLoc &loc, int nargs, char **args )
1504 /* Force a newline. */
1506 genLineDirective( out );
1508 if ( strcmp( args[0], "data" ) == 0 ) {
1509 for ( int i = 1; i < nargs; i++ ) {
1510 if ( strcmp( args[i], "noerror" ) == 0 )
1512 else if ( strcmp( args[i], "noprefix" ) == 0 )
1514 else if ( strcmp( args[i], "nofinal" ) == 0 )
1515 writeFirstFinal = false;
1517 source_warning(loc) << "unrecognized write option \"" <<
1518 args[i] << "\"" << endl;
1523 else if ( strcmp( args[0], "init" ) == 0 ) {
1524 for ( int i = 1; i < nargs; i++ ) {
1525 source_warning(loc) << "unrecognized write option \"" <<
1526 args[i] << "\"" << endl;
1530 else if ( strcmp( args[0], "exec" ) == 0 ) {
1531 for ( int i = 1; i < nargs; i++ ) {
1532 if ( strcmp( args[i], "noend" ) == 0 )
1535 source_warning(loc) << "unrecognized write option \"" <<
1536 args[i] << "\"" << endl;
1540 /* Must set labels immediately before writing because we may depend
1541 * on the noend write option. */
1545 else if ( strcmp( args[0], "eof" ) == 0 ) {
1546 for ( int i = 1; i < nargs; i++ ) {
1547 source_warning(loc) << "unrecognized write option \"" <<
1548 args[i] << "\"" << endl;
1553 /* EMIT An error here. */
1557 ostream &RubyCodeGen::source_warning( const InputLoc &loc )
1559 cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": warning: ";
1563 ostream &RubyCodeGen::source_error( const InputLoc &loc )
1566 assert( sourceFileName != 0 );
1567 cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": ";