2 * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
3 * 2004 Eric Ocean <eric.ocean@ampede.com>
4 * 2005 Alan West <alan@alanz.com>
7 /* This file is part of Ragel.
9 * Ragel is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * Ragel is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Ragel; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include "rlcodegen.h"
25 #include "fsmcodegen.h"
33 using std::ostringstream;
39 /* Determine if a string is only whitespace. Code blocks that are only
40 * whitespace need not be output. */
41 bool onlyWhitespace( char *str )
44 if ( *str != ' ' && *str != '\t' && *str != '\n' &&
45 *str != '\v' && *str != '\f' && *str != '\r' )
52 /* Init code gen with in parameters. */
53 FsmCodeGen::FsmCodeGen( ostream &out )
58 bAnyToStateActions(false),
59 bAnyFromStateActions(false),
60 bAnyRegActions(false),
61 bAnyEofActions(false),
62 bAnyActionGotos(false),
63 bAnyActionCalls(false),
64 bAnyActionRets(false),
65 bAnyRegActionRets(false),
66 bAnyRegActionByValControl(false),
67 bAnyRegNextStmt(false),
68 bAnyRegCurStateRef(false),
70 bAnyLmSwitchError(false),
75 /* Does the machine have any actions. */
76 bool FsmCodeGen::anyActions()
78 return redFsm->actionMap.length() > 0;
81 unsigned int FsmCodeGen::arrayTypeSize( unsigned long maxVal )
83 long long maxValLL = (long long) maxVal;
84 HostType *arrayType = keyOps->typeSubsumes( maxValLL );
85 assert( arrayType != 0 );
86 return arrayType->size;
89 string FsmCodeGen::ARRAY_TYPE( unsigned long maxVal )
91 long long maxValLL = (long long) maxVal;
92 HostType *arrayType = keyOps->typeSubsumes( maxValLL );
93 assert( arrayType != 0 );
95 string ret = arrayType->data1;
96 if ( arrayType->data2 != 0 ) {
98 ret += arrayType->data2;
104 /* Write out the fsm name. */
105 string FsmCodeGen::FSM_NAME()
110 /* Emit the offset of the start state as a decimal integer. */
111 string FsmCodeGen::START_STATE_ID()
114 ret << redFsm->startState->id;
118 /* Write out the array of actions. */
119 std::ostream &FsmCodeGen::ACTIONS_ARRAY()
122 int totalActions = 1;
123 for ( ActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) {
124 /* Write out the length, which will never be the last character. */
125 out << act->key.length() << ", ";
126 /* Put in a line break every 8 */
127 if ( totalActions++ % 8 == 7 )
130 for ( ActionTable::Iter item = act->key; item.lte(); item++ ) {
131 out << item->value->actionId;
132 if ( ! (act.last() && item.last()) )
135 /* Put in a line break every 8 */
136 if ( totalActions++ % 8 == 7 )
145 string FsmCodeGen::CS()
148 if ( cgd->curStateExpr != 0 ) {
149 /* Emit the user supplied method of retrieving the key. */
151 INLINE_LIST( ret, cgd->curStateExpr, 0, false );
155 /* Expression for retrieving the key, use simple dereference. */
156 ret << ACCESS() << "cs";
161 string FsmCodeGen::ACCESS()
164 if ( cgd->accessExpr != 0 )
165 INLINE_LIST( ret, cgd->accessExpr, 0, false );
169 string FsmCodeGen::GET_WIDE_KEY()
171 if ( anyConditions() )
177 string FsmCodeGen::GET_WIDE_KEY( RedStateAp *state )
179 if ( state->stateCondList.length() > 0 )
185 string FsmCodeGen::GET_KEY()
188 if ( cgd->getKeyExpr != 0 ) {
189 /* Emit the user supplied method of retrieving the key. */
191 INLINE_LIST( ret, cgd->getKeyExpr, 0, false );
195 /* Expression for retrieving the key, use simple dereference. */
196 ret << "(*" << P() << ")";
201 /* Write out level number of tabs. Makes the nested binary search nice
203 string FsmCodeGen::TABS( int level )
206 while ( level-- > 0 )
211 /* Write out a key from the fsm code gen. Depends on wether or not the key is
213 string FsmCodeGen::KEY( Key key )
216 if ( keyOps->isSigned || !hostLang->explicitUnsigned )
219 ret << (unsigned long) key.getVal() << 'u';
223 void FsmCodeGen::EXEC( ostream &ret, InlineItem *item, int targState, int inFinish )
225 /* The parser gives fexec two children. The double brackets are for D
226 * code. If the inline list is a single word it will get interpreted as a
227 * C-style cast by the D compiler. */
228 ret << "{" << P() << " = ((";
229 INLINE_LIST( ret, item->children, targState, inFinish );
233 void FsmCodeGen::EXECTE( ostream &ret, InlineItem *item, int targState, int inFinish )
235 /* Tokend version of exec. */
237 /* The parser gives fexec two children. The double brackets are for D
238 * code. If the inline list is a single word it will get interpreted as a
239 * C-style cast by the D compiler. */
240 ret << "{" << TOKEND() << " = ((";
241 INLINE_LIST( ret, item->children, targState, inFinish );
246 void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item,
247 int targState, int inFinish )
250 " switch( act ) {\n";
252 /* If the switch handles error then we also forced the error state. It
254 if ( item->handlesError ) {
255 ret << " case 0: " << TOKEND() << " = " << TOKSTART() << "; ";
256 GOTO( ret, redFsm->errState->id, inFinish );
260 for ( InlineList::Iter lma = *item->children; lma.lte(); lma++ ) {
261 /* Write the case label, the action and the case break. */
262 ret << " case " << lma->lmId << ":\n";
264 /* Write the block and close it off. */
266 INLINE_LIST( ret, lma->children, targState, inFinish );
271 /* Default required for D code. */
278 void FsmCodeGen::SET_ACT( ostream &ret, InlineItem *item )
280 ret << ACT() << " = " << item->lmId << ";";
283 void FsmCodeGen::SET_TOKEND( ostream &ret, InlineItem *item )
285 /* The tokend action sets tokend. */
286 ret << TOKEND() << " = " << P();
287 if ( item->offset != 0 )
288 out << "+" << item->offset;
292 void FsmCodeGen::GET_TOKEND( ostream &ret, InlineItem *item )
297 void FsmCodeGen::INIT_TOKSTART( ostream &ret, InlineItem *item )
299 ret << TOKSTART() << " = " << NULL_ITEM() << ";";
302 void FsmCodeGen::INIT_ACT( ostream &ret, InlineItem *item )
304 ret << ACT() << " = 0;";
307 void FsmCodeGen::SET_TOKSTART( ostream &ret, InlineItem *item )
309 ret << TOKSTART() << " = " << P() << ";";
312 void FsmCodeGen::SUB_ACTION( ostream &ret, InlineItem *item,
313 int targState, bool inFinish )
315 if ( item->children->length() > 0 ) {
316 /* Write the block and close it off. */
318 INLINE_LIST( ret, item->children, targState, inFinish );
324 /* Write out an inline tree structure. Walks the list and possibly calls out
325 * to virtual functions than handle language specific items in the tree. */
326 void FsmCodeGen::INLINE_LIST( ostream &ret, InlineList *inlineList,
327 int targState, bool inFinish )
329 for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
330 switch ( item->type ) {
331 case InlineItem::Text:
334 case InlineItem::Goto:
335 GOTO( ret, item->targState->id, inFinish );
337 case InlineItem::Call:
338 CALL( ret, item->targState->id, targState, inFinish );
340 case InlineItem::Next:
341 NEXT( ret, item->targState->id, inFinish );
343 case InlineItem::Ret:
344 RET( ret, inFinish );
346 case InlineItem::PChar:
349 case InlineItem::Char:
352 case InlineItem::Hold:
355 case InlineItem::Exec:
356 EXEC( ret, item, targState, inFinish );
358 case InlineItem::HoldTE:
359 ret << TOKEND() << "--;";
361 case InlineItem::ExecTE:
362 EXECTE( ret, item, targState, inFinish );
364 case InlineItem::Curs:
365 CURS( ret, inFinish );
367 case InlineItem::Targs:
368 TARGS( ret, inFinish, targState );
370 case InlineItem::Entry:
371 ret << item->targState->id;
373 case InlineItem::GotoExpr:
374 GOTO_EXPR( ret, item, inFinish );
376 case InlineItem::CallExpr:
377 CALL_EXPR( ret, item, targState, inFinish );
379 case InlineItem::NextExpr:
380 NEXT_EXPR( ret, item, inFinish );
382 case InlineItem::LmSwitch:
383 LM_SWITCH( ret, item, targState, inFinish );
385 case InlineItem::LmSetActId:
386 SET_ACT( ret, item );
388 case InlineItem::LmSetTokEnd:
389 SET_TOKEND( ret, item );
391 case InlineItem::LmGetTokEnd:
392 GET_TOKEND( ret, item );
394 case InlineItem::LmInitTokStart:
395 INIT_TOKSTART( ret, item );
397 case InlineItem::LmInitAct:
398 INIT_ACT( ret, item );
400 case InlineItem::LmSetTokStart:
401 SET_TOKSTART( ret, item );
403 case InlineItem::SubAction:
404 SUB_ACTION( ret, item, targState, inFinish );
406 case InlineItem::Break:
407 BREAK( ret, targState );
412 /* Write out paths in line directives. Escapes any special characters. */
413 string FsmCodeGen::LDIR_PATH( char *path )
416 for ( char *pc = path; *pc != 0; pc++ ) {
425 void FsmCodeGen::ACTION( ostream &ret, Action *action, int targState, bool inFinish )
427 /* Write the preprocessor line info for going into the source file. */
428 lineDirective( ret, cgd->fileName, action->loc.line );
430 /* Write the block and close it off. */
432 INLINE_LIST( ret, action->inlineList, targState, inFinish );
436 void FsmCodeGen::CONDITION( ostream &ret, Action *condition )
439 lineDirective( ret, cgd->fileName, condition->loc.line );
440 INLINE_LIST( ret, condition->inlineList, 0, false );
443 string FsmCodeGen::ERROR_STATE()
446 if ( redFsm->errState != 0 )
447 ret << redFsm->errState->id;
453 string FsmCodeGen::FIRST_FINAL_STATE()
456 if ( redFsm->firstFinState != 0 )
457 ret << redFsm->firstFinState->id;
459 ret << redFsm->nextStateId;
463 void FsmCodeGen::writeOutInit()
466 out << "\t" << CS() << " = " << START() << ";\n";
468 /* If there are any calls, then the stack top needs initialization. */
469 if ( anyActionCalls() || anyActionRets() )
470 out << "\t" << TOP() << " = 0;\n";
472 if ( cgd->hasLongestMatch ) {
474 " " << TOKSTART() << " = " << NULL_ITEM() << ";\n"
475 " " << TOKEND() << " = " << NULL_ITEM() << ";\n"
476 " " << ACT() << " = 0;\n";
481 string FsmCodeGen::DATA_PREFIX()
483 if ( cgd->dataPrefix )
484 return FSM_NAME() + "_";
488 /* Emit the alphabet data type. */
489 string FsmCodeGen::ALPH_TYPE()
491 string ret = keyOps->alphType->data1;
492 if ( keyOps->alphType->data2 != 0 ) {
494 ret += + keyOps->alphType->data2;
499 /* Emit the alphabet data type. */
500 string FsmCodeGen::WIDE_ALPH_TYPE()
503 if ( maxKey <= keyOps->maxKey )
506 long long maxKeyVal = maxKey.getLongLong();
507 HostType *wideType = keyOps->typeSubsumes( keyOps->isSigned, maxKeyVal );
508 assert( wideType != 0 );
510 ret = wideType->data1;
511 if ( wideType->data2 != 0 ) {
513 ret += wideType->data2;
521 * Language specific, but style independent code generators functions.
524 string CCodeGen::PTR_CONST()
529 std::ostream &CCodeGen::OPEN_ARRAY( string type, string name )
531 out << "static const " << type << " " << name << "[] = {\n";
535 std::ostream &CCodeGen::CLOSE_ARRAY()
537 return out << "};\n";
540 std::ostream &CCodeGen::STATIC_VAR( string type, string name )
542 out << "static const " << type << " " << name;
546 string CCodeGen::UINT( )
548 return "unsigned int";
551 string CCodeGen::ARR_OFF( string ptr, string offset )
553 return ptr + " + " + offset;
556 string CCodeGen::CAST( string type )
558 return "(" + type + ")";
561 string CCodeGen::NULL_ITEM()
566 string CCodeGen::POINTER()
571 std::ostream &CCodeGen::SWITCH_DEFAULT()
576 string CCodeGen::CTRL_FLOW()
585 string DCodeGen::NULL_ITEM()
590 string DCodeGen::POINTER()
592 // multiple items seperated by commas can also be pointer types.
596 string DCodeGen::PTR_CONST()
601 std::ostream &DCodeGen::OPEN_ARRAY( string type, string name )
603 out << "static const " << type << "[] " << name << " = [\n";
607 std::ostream &DCodeGen::CLOSE_ARRAY()
609 return out << "];\n";
612 std::ostream &DCodeGen::STATIC_VAR( string type, string name )
614 out << "static const " << type << " " << name;
618 string DCodeGen::ARR_OFF( string ptr, string offset )
620 return "&" + ptr + "[" + offset + "]";
623 string DCodeGen::CAST( string type )
625 return "cast(" + type + ")";
628 string DCodeGen::UINT( )
633 std::ostream &DCodeGen::SWITCH_DEFAULT()
635 out << " default: break;\n";
639 string DCodeGen::CTRL_FLOW()
649 string JavaCodeGen::PTR_CONST()
651 /* Not used in Java code. */
656 std::ostream &JavaCodeGen::OPEN_ARRAY( string type, string name )
658 out << "static final " << type << "[] " << name << " = {\n";
662 std::ostream &JavaCodeGen::CLOSE_ARRAY()
664 return out << "};\n";
667 std::ostream &JavaCodeGen::STATIC_VAR( string type, string name )
669 out << "static final " << type << " " << name;
673 string JavaCodeGen::UINT( )
680 string JavaCodeGen::ARR_OFF( string ptr, string offset )
682 return ptr + " + " + offset;
685 string JavaCodeGen::CAST( string type )
687 return "(" + type + ")";
690 string JavaCodeGen::NULL_ITEM()
692 /* In java we use integers instead of pointers. */
696 string JavaCodeGen::POINTER()
703 std::ostream &JavaCodeGen::SWITCH_DEFAULT()
708 string JavaCodeGen::GET_KEY()
711 if ( cgd->getKeyExpr != 0 ) {
712 /* Emit the user supplied method of retrieving the key. */
714 INLINE_LIST( ret, cgd->getKeyExpr, 0, false );
718 /* Expression for retrieving the key, use simple dereference. */
719 ret << "data[" << P() << "]";
724 string JavaCodeGen::CTRL_FLOW()