2 * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
3 * 2004 Eric Ocean <eric.ocean@ampede.com>
4 * 2005 Alan West <alan@alanz.com>
7 /* This file is part of Ragel.
9 * Ragel is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * Ragel is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Ragel; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include "rlcodegen.h"
25 #include "fsmcodegen.h"
33 using std::ostringstream;
39 /* Determine if a string is only whitespace. Code blocks that are only
40 * whitespace need not be output. */
41 bool onlyWhitespace( char *str )
44 if ( *str != ' ' && *str != '\t' && *str != '\n' &&
45 *str != '\v' && *str != '\f' && *str != '\r' )
52 /* Init code gen with in parameters. */
53 FsmCodeGen::FsmCodeGen( ostream &out )
59 bAnyToStateActions(false),
60 bAnyFromStateActions(false),
61 bAnyRegActions(false),
62 bAnyEofActions(false),
63 bAnyActionGotos(false),
64 bAnyActionCalls(false),
65 bAnyActionRets(false),
66 bAnyRegActionRets(false),
67 bAnyRegActionByValControl(false),
68 bAnyRegNextStmt(false),
69 bAnyRegCurStateRef(false),
71 bAnyLmSwitchError(false),
76 /* Does the machine have any actions. */
77 bool FsmCodeGen::anyActions()
79 return redFsm->actionMap.length() > 0;
82 /* Assign ids to referenced actions. */
83 void FsmCodeGen::assignActionIds()
86 for ( ActionList::Iter act = cgd->actionList; act.lte(); act++ ) {
87 /* Only ever interested in referenced actions. */
88 if ( act->numRefs() > 0 )
89 act->actionId = nextActionId++;
93 void FsmCodeGen::setValueLimits()
104 maxFlatIndexOffset = 0;
108 maxCondIndexOffset = 0;
110 /* In both of these cases the 0 index is reserved for no value, so the max
111 * is one more than it would be if they started at 0. */
112 maxIndex = redFsm->transSet.length();
113 maxCond = cgd->condSpaceList.length();
115 /* The nextStateId - 1 is the last state id assigned. */
116 maxState = redFsm->nextStateId - 1;
118 for ( CondSpaceList::Iter csi = cgd->condSpaceList; csi.lte(); csi++ ) {
119 if ( csi->condSpaceId > maxCondSpaceId )
120 maxCondSpaceId = csi->condSpaceId;
123 for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
124 /* Maximum cond length. */
125 if ( st->stateCondList.length() > maxCondLen )
126 maxCondLen = st->stateCondList.length();
128 /* Maximum single length. */
129 if ( st->outSingle.length() > maxSingleLen )
130 maxSingleLen = st->outSingle.length();
132 /* Maximum range length. */
133 if ( st->outRange.length() > maxRangeLen )
134 maxRangeLen = st->outRange.length();
136 /* The key offset index offset for the state after last is not used, skip it.. */
138 maxCondOffset += st->stateCondList.length();
139 maxKeyOffset += st->outSingle.length() + st->outRange.length()*2;
140 maxIndexOffset += st->outSingle.length() + st->outRange.length() + 1;
144 if ( st->condList != 0 ) {
145 unsigned long long span = keyOps->span( st->condLowKey, st->condHighKey );
146 if ( span > maxCondSpan )
151 if ( st->transList != 0 ) {
152 unsigned long long span = keyOps->span( st->lowKey, st->highKey );
153 if ( span > maxSpan )
157 /* Max cond index offset. */
159 if ( st->condList != 0 )
160 maxCondIndexOffset += keyOps->span( st->condLowKey, st->condHighKey );
163 /* Max flat index offset. */
165 if ( st->transList != 0 )
166 maxFlatIndexOffset += keyOps->span( st->lowKey, st->highKey );
167 maxFlatIndexOffset += 1;
171 for ( ActionTableMap::Iter at = redFsm->actionMap; at.lte(); at++ ) {
172 /* Maximum id of action lists. */
173 if ( at->actListId+1 > maxActListId )
174 maxActListId = at->actListId+1;
176 /* Maximum location of items in action array. */
177 if ( at->location+1 > maxActionLoc )
178 maxActionLoc = at->location+1;
180 /* Maximum values going into the action array. */
181 if ( at->key.length() > maxActArrItem )
182 maxActArrItem = at->key.length();
183 for ( ActionTable::Iter item = at->key; item.lte(); item++ ) {
184 if ( item->value->actionId > maxActArrItem )
185 maxActArrItem = item->value->actionId;
191 unsigned int FsmCodeGen::arrayTypeSize( unsigned long maxVal )
193 long long maxValLL = (long long) maxVal;
194 HostType *arrayType = keyOps->typeSubsumes( maxValLL );
195 assert( arrayType != 0 );
196 return arrayType->size;
199 string FsmCodeGen::ARRAY_TYPE( unsigned long maxVal )
201 long long maxValLL = (long long) maxVal;
202 HostType *arrayType = keyOps->typeSubsumes( maxValLL );
203 assert( arrayType != 0 );
205 string ret = arrayType->data1;
206 if ( arrayType->data2 != 0 ) {
208 ret += arrayType->data2;
214 /* Write out the fsm name. */
215 string FsmCodeGen::FSM_NAME()
220 /* Emit the offset of the start state as a decimal integer. */
221 string FsmCodeGen::START_STATE_ID()
224 ret << redFsm->startState->id;
228 /* Write out the array of actions. */
229 std::ostream &FsmCodeGen::ACTIONS_ARRAY()
232 int totalActions = 1;
233 for ( ActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) {
234 /* Write out the length, which will never be the last character. */
235 out << act->key.length() << ", ";
236 /* Put in a line break every 8 */
237 if ( totalActions++ % 8 == 7 )
240 for ( ActionTable::Iter item = act->key; item.lte(); item++ ) {
241 out << item->value->actionId;
242 if ( ! (act.last() && item.last()) )
245 /* Put in a line break every 8 */
246 if ( totalActions++ % 8 == 7 )
255 string FsmCodeGen::CS()
258 if ( cgd->curStateExpr != 0 ) {
259 /* Emit the user supplied method of retrieving the key. */
261 INLINE_LIST( ret, cgd->curStateExpr, 0, false );
265 /* Expression for retrieving the key, use simple dereference. */
266 ret << ACCESS() << "cs";
271 string FsmCodeGen::ACCESS()
274 if ( cgd->accessExpr != 0 )
275 INLINE_LIST( ret, cgd->accessExpr, 0, false );
279 string FsmCodeGen::GET_WIDE_KEY()
281 if ( anyConditions() )
287 string FsmCodeGen::GET_WIDE_KEY( RedStateAp *state )
289 if ( state->stateCondList.length() > 0 )
295 string FsmCodeGen::GET_KEY()
298 if ( cgd->getKeyExpr != 0 ) {
299 /* Emit the user supplied method of retrieving the key. */
301 INLINE_LIST( ret, cgd->getKeyExpr, 0, false );
305 /* Expression for retrieving the key, use simple dereference. */
306 ret << "(*" << P() << ")";
311 /* Write out level number of tabs. Makes the nested binary search nice
313 string FsmCodeGen::TABS( int level )
316 while ( level-- > 0 )
321 /* Write out a key from the fsm code gen. Depends on wether or not the key is
323 string FsmCodeGen::KEY( Key key )
326 if ( keyOps->isSigned || !hostLang->explicitUnsigned )
329 ret << (unsigned long) key.getVal() << 'u';
333 void FsmCodeGen::EXEC( ostream &ret, InlineItem *item, int targState, int inFinish )
335 /* The parser gives fexec two children. The double brackets are for D
336 * code. If the inline list is a single word it will get interpreted as a
337 * C-style cast by the D compiler. */
338 ret << "{" << P() << " = ((";
339 INLINE_LIST( ret, item->children, targState, inFinish );
343 void FsmCodeGen::EXECTE( ostream &ret, InlineItem *item, int targState, int inFinish )
345 /* Tokend version of exec. */
347 /* The parser gives fexec two children. The double brackets are for D
348 * code. If the inline list is a single word it will get interpreted as a
349 * C-style cast by the D compiler. */
350 ret << "{" << TOKEND() << " = ((";
351 INLINE_LIST( ret, item->children, targState, inFinish );
356 void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item,
357 int targState, int inFinish )
360 " switch( act ) {\n";
362 /* If the switch handles error then we also forced the error state. It
364 if ( item->handlesError ) {
365 ret << " case 0: " << TOKEND() << " = " << TOKSTART() << "; ";
366 GOTO( ret, redFsm->errState->id, inFinish );
370 for ( InlineList::Iter lma = *item->children; lma.lte(); lma++ ) {
371 /* Write the case label, the action and the case break. */
372 ret << " case " << lma->lmId << ":\n";
374 /* Write the block and close it off. */
376 INLINE_LIST( ret, lma->children, targState, inFinish );
381 /* Default required for D code. */
388 void FsmCodeGen::SET_ACT( ostream &ret, InlineItem *item )
390 ret << ACT() << " = " << item->lmId << ";";
393 void FsmCodeGen::SET_TOKEND( ostream &ret, InlineItem *item )
395 /* The tokend action sets tokend. */
396 ret << TOKEND() << " = " << P();
397 if ( item->offset != 0 )
398 out << "+" << item->offset;
402 void FsmCodeGen::GET_TOKEND( ostream &ret, InlineItem *item )
407 void FsmCodeGen::INIT_TOKSTART( ostream &ret, InlineItem *item )
409 ret << TOKSTART() << " = " << NULL_ITEM() << ";";
412 void FsmCodeGen::INIT_ACT( ostream &ret, InlineItem *item )
414 ret << ACT() << " = 0;";
417 void FsmCodeGen::SET_TOKSTART( ostream &ret, InlineItem *item )
419 ret << TOKSTART() << " = " << P() << ";";
422 void FsmCodeGen::SUB_ACTION( ostream &ret, InlineItem *item,
423 int targState, bool inFinish )
425 if ( item->children->length() > 0 ) {
426 /* Write the block and close it off. */
428 INLINE_LIST( ret, item->children, targState, inFinish );
434 /* Write out an inline tree structure. Walks the list and possibly calls out
435 * to virtual functions than handle language specific items in the tree. */
436 void FsmCodeGen::INLINE_LIST( ostream &ret, InlineList *inlineList,
437 int targState, bool inFinish )
439 for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
440 switch ( item->type ) {
441 case InlineItem::Text:
444 case InlineItem::Goto:
445 GOTO( ret, item->targState->id, inFinish );
447 case InlineItem::Call:
448 CALL( ret, item->targState->id, targState, inFinish );
450 case InlineItem::Next:
451 NEXT( ret, item->targState->id, inFinish );
453 case InlineItem::Ret:
454 RET( ret, inFinish );
456 case InlineItem::PChar:
459 case InlineItem::Char:
462 case InlineItem::Hold:
465 case InlineItem::Exec:
466 EXEC( ret, item, targState, inFinish );
468 case InlineItem::HoldTE:
469 ret << TOKEND() << "--;";
471 case InlineItem::ExecTE:
472 EXECTE( ret, item, targState, inFinish );
474 case InlineItem::Curs:
475 CURS( ret, inFinish );
477 case InlineItem::Targs:
478 TARGS( ret, inFinish, targState );
480 case InlineItem::Entry:
481 ret << item->targState->id;
483 case InlineItem::GotoExpr:
484 GOTO_EXPR( ret, item, inFinish );
486 case InlineItem::CallExpr:
487 CALL_EXPR( ret, item, targState, inFinish );
489 case InlineItem::NextExpr:
490 NEXT_EXPR( ret, item, inFinish );
492 case InlineItem::LmSwitch:
493 LM_SWITCH( ret, item, targState, inFinish );
495 case InlineItem::LmSetActId:
496 SET_ACT( ret, item );
498 case InlineItem::LmSetTokEnd:
499 SET_TOKEND( ret, item );
501 case InlineItem::LmGetTokEnd:
502 GET_TOKEND( ret, item );
504 case InlineItem::LmInitTokStart:
505 INIT_TOKSTART( ret, item );
507 case InlineItem::LmInitAct:
508 INIT_ACT( ret, item );
510 case InlineItem::LmSetTokStart:
511 SET_TOKSTART( ret, item );
513 case InlineItem::SubAction:
514 SUB_ACTION( ret, item, targState, inFinish );
516 case InlineItem::Break:
517 BREAK( ret, targState );
522 /* Write out paths in line directives. Escapes any special characters. */
523 string FsmCodeGen::LDIR_PATH( char *path )
526 for ( char *pc = path; *pc != 0; pc++ ) {
535 void FsmCodeGen::ACTION( ostream &ret, Action *action, int targState, bool inFinish )
537 /* Write the preprocessor line info for going into the source file. */
538 lineDirective( ret, cgd->fileName, action->loc.line );
540 /* Write the block and close it off. */
542 INLINE_LIST( ret, action->inlineList, targState, inFinish );
546 void FsmCodeGen::CONDITION( ostream &ret, Action *condition )
549 lineDirective( ret, cgd->fileName, condition->loc.line );
550 INLINE_LIST( ret, condition->inlineList, 0, false );
553 string FsmCodeGen::ERROR_STATE()
556 if ( redFsm->errState != 0 )
557 ret << redFsm->errState->id;
563 string FsmCodeGen::FIRST_FINAL_STATE()
566 if ( redFsm->firstFinState != 0 )
567 ret << redFsm->firstFinState->id;
569 ret << redFsm->nextStateId;
573 void FsmCodeGen::writeOutInit()
576 out << "\t" << CS() << " = " << START() << ";\n";
578 /* If there are any calls, then the stack top needs initialization. */
579 if ( anyActionCalls() || anyActionRets() )
580 out << "\t" << TOP() << " = 0;\n";
582 if ( cgd->hasLongestMatch ) {
584 " " << TOKSTART() << " = " << NULL_ITEM() << ";\n"
585 " " << TOKEND() << " = " << NULL_ITEM() << ";\n"
586 " " << ACT() << " = 0;\n";
591 string FsmCodeGen::DATA_PREFIX()
593 if ( cgd->dataPrefix )
594 return FSM_NAME() + "_";
598 /* Emit the alphabet data type. */
599 string FsmCodeGen::ALPH_TYPE()
601 string ret = keyOps->alphType->data1;
602 if ( keyOps->alphType->data2 != 0 ) {
604 ret += + keyOps->alphType->data2;
609 /* Emit the alphabet data type. */
610 string FsmCodeGen::WIDE_ALPH_TYPE()
613 if ( maxKey <= keyOps->maxKey )
616 long long maxKeyVal = maxKey.getLongLong();
617 HostType *wideType = keyOps->typeSubsumes( keyOps->isSigned, maxKeyVal );
618 assert( wideType != 0 );
620 ret = wideType->data1;
621 if ( wideType->data2 != 0 ) {
623 ret += wideType->data2;
631 * Language specific, but style independent code generators functions.
634 string CCodeGen::PTR_CONST()
639 std::ostream &CCodeGen::OPEN_ARRAY( string type, string name )
641 out << "static const " << type << " " << name << "[] = {\n";
645 std::ostream &CCodeGen::CLOSE_ARRAY()
647 return out << "};\n";
650 std::ostream &CCodeGen::STATIC_VAR( string type, string name )
652 out << "static const " << type << " " << name;
656 string CCodeGen::UINT( )
658 return "unsigned int";
661 string CCodeGen::ARR_OFF( string ptr, string offset )
663 return ptr + " + " + offset;
666 string CCodeGen::CAST( string type )
668 return "(" + type + ")";
671 string CCodeGen::NULL_ITEM()
676 string CCodeGen::POINTER()
681 std::ostream &CCodeGen::SWITCH_DEFAULT()
686 string CCodeGen::CTRL_FLOW()
695 string DCodeGen::NULL_ITEM()
700 string DCodeGen::POINTER()
702 // multiple items seperated by commas can also be pointer types.
706 string DCodeGen::PTR_CONST()
711 std::ostream &DCodeGen::OPEN_ARRAY( string type, string name )
713 out << "static const " << type << "[] " << name << " = [\n";
717 std::ostream &DCodeGen::CLOSE_ARRAY()
719 return out << "];\n";
722 std::ostream &DCodeGen::STATIC_VAR( string type, string name )
724 out << "static const " << type << " " << name;
728 string DCodeGen::ARR_OFF( string ptr, string offset )
730 return "&" + ptr + "[" + offset + "]";
733 string DCodeGen::CAST( string type )
735 return "cast(" + type + ")";
738 string DCodeGen::UINT( )
743 std::ostream &DCodeGen::SWITCH_DEFAULT()
745 out << " default: break;\n";
749 string DCodeGen::CTRL_FLOW()
759 string JavaCodeGen::PTR_CONST()
761 /* Not used in Java code. */
766 std::ostream &JavaCodeGen::OPEN_ARRAY( string type, string name )
768 out << "static final " << type << "[] " << name << " = {\n";
772 std::ostream &JavaCodeGen::CLOSE_ARRAY()
774 return out << "};\n";
777 std::ostream &JavaCodeGen::STATIC_VAR( string type, string name )
779 out << "static final " << type << " " << name;
783 string JavaCodeGen::UINT( )
790 string JavaCodeGen::ARR_OFF( string ptr, string offset )
792 return ptr + " + " + offset;
795 string JavaCodeGen::CAST( string type )
797 return "(" + type + ")";
800 string JavaCodeGen::NULL_ITEM()
802 /* In java we use integers instead of pointers. */
806 string JavaCodeGen::POINTER()
813 std::ostream &JavaCodeGen::SWITCH_DEFAULT()
818 string JavaCodeGen::GET_KEY()
821 if ( cgd->getKeyExpr != 0 ) {
822 /* Emit the user supplied method of retrieving the key. */
824 INLINE_LIST( ret, cgd->getKeyExpr, 0, false );
828 /* Expression for retrieving the key, use simple dereference. */
829 ret << "data[" << P() << "]";
834 string JavaCodeGen::CTRL_FLOW()