Don't allow the left machine of <: to escape through the right machine via the
[external/ragel.git] / ragel / xmlcodegen.cpp
1 /*
2  *  Copyright 2005-2007 Adrian Thurston <thurston@cs.queensu.ca>
3  */
4
5 /*  This file is part of Ragel.
6  *
7  *  Ragel is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  * 
12  *  Ragel is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  * 
17  *  You should have received a copy of the GNU General Public License
18  *  along with Ragel; if not, write to the Free Software
19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
20  */
21
22
23 #include "ragel.h"
24 #include "xmlcodegen.h"
25 #include "parsedata.h"
26 #include "fsmgraph.h"
27 #include <string.h>
28
29 using namespace std;
30
31 XMLCodeGen::XMLCodeGen( char *fsmName, ParseData *pd, FsmAp *fsm, 
32                 std::ostream &out )
33 :
34         fsmName(fsmName),
35         pd(pd),
36         fsm(fsm),
37         out(out),
38         nextActionTableId(0)
39 {
40 }
41
42
43 void XMLCodeGen::writeActionList()
44 {
45         /* Determine which actions to write. */
46         int nextActionId = 0;
47         for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) {
48                 if ( act->numRefs() > 0 || act->numCondRefs > 0 )
49                         act->actionId = nextActionId++;
50         }
51
52         /* Write the list. */
53         out << "    <action_list length=\"" << nextActionId << "\">\n";
54         for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) {
55                 if ( act->actionId >= 0 )
56                         writeAction( act );
57         }
58         out << "    </action_list>\n";
59 }
60
61 void XMLCodeGen::writeActionTableList()
62 {
63         /* Must first order the action tables based on their id. */
64         int numTables = nextActionTableId;
65         RedActionTable **tables = new RedActionTable*[numTables];
66         for ( ActionTableMap::Iter at = actionTableMap; at.lte(); at++ )
67                 tables[at->id] = at;
68
69         out << "    <action_table_list length=\"" << numTables << "\">\n";
70         for ( int t = 0; t < numTables; t++ ) {
71                 out << "      <action_table id=\"" << t << "\" length=\"" << 
72                                 tables[t]->key.length() << "\">";
73                 for ( ActionTable::Iter atel = tables[t]->key; atel.lte(); atel++ ) {
74                         out << atel->value->actionId;
75                         if ( ! atel.last() )
76                                 out << " ";
77                 }
78                 out << "</action_table>\n";
79         }
80         out << "    </action_table_list>\n";
81
82         delete[] tables;
83 }
84
85 void XMLCodeGen::reduceActionTables()
86 {
87         /* Reduce the actions tables to a set. */
88         for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) {
89                 RedActionTable *actionTable = 0;
90
91                 /* Reduce To State Actions. */
92                 if ( st->toStateActionTable.length() > 0 ) {
93                         if ( actionTableMap.insert( st->toStateActionTable, &actionTable ) )
94                                 actionTable->id = nextActionTableId++;
95                 }
96
97                 /* Reduce From State Actions. */
98                 if ( st->fromStateActionTable.length() > 0 ) {
99                         if ( actionTableMap.insert( st->fromStateActionTable, &actionTable ) )
100                                 actionTable->id = nextActionTableId++;
101                 }
102
103                 /* Reduce EOF actions. */
104                 if ( st->eofActionTable.length() > 0 ) {
105                         if ( actionTableMap.insert( st->eofActionTable, &actionTable ) )
106                                 actionTable->id = nextActionTableId++;
107                 }
108
109                 /* Loop the transitions and reduce their actions. */
110                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
111                         if ( trans->actionTable.length() > 0 ) {
112                                 if ( actionTableMap.insert( trans->actionTable, &actionTable ) )
113                                         actionTable->id = nextActionTableId++;
114                         }
115                 }
116         }
117 }
118
119 void XMLCodeGen::appendTrans( TransListVect &outList, Key lowKey, 
120                 Key highKey, TransAp *trans )
121 {
122         if ( trans->toState != 0 || trans->actionTable.length() > 0 )
123                 outList.append( TransEl( lowKey, highKey, trans ) );
124 }
125
126 void XMLCodeGen::writeKey( Key key )
127 {
128         if ( keyOps->isSigned )
129                 out << key.getVal();
130         else
131                 out << (unsigned long) key.getVal();
132 }
133
134 void XMLCodeGen::writeTrans( Key lowKey, Key highKey, TransAp *trans )
135 {
136         /* First reduce the action. */
137         RedActionTable *actionTable = 0;
138         if ( trans->actionTable.length() > 0 )
139                 actionTable = actionTableMap.find( trans->actionTable );
140
141         /* Write the transition. */
142         out << "        <t>";
143         writeKey( lowKey );
144         out << " ";
145         writeKey( highKey );
146
147         if ( trans->toState != 0 )
148                 out << " " << trans->toState->alg.stateNum;
149         else
150                 out << " x";
151
152         if ( actionTable != 0 )
153                 out << " " << actionTable->id;
154         else
155                 out << " x";
156         out << "</t>\n";
157 }
158
159 void XMLCodeGen::writeTransList( StateAp *state )
160 {
161         TransListVect outList;
162
163         /* If there is only are no ranges the task is simple. */
164         if ( state->outList.length() > 0 ) {
165                 /* Loop each source range. */
166                 for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
167                         /* Reduce the transition. If it reduced to anything then add it. */
168                         appendTrans( outList, trans->lowKey, trans->highKey, trans );
169                 }
170         }
171
172         out << "      <trans_list length=\"" << outList.length() << "\">\n";
173         for ( TransListVect::Iter tvi = outList; tvi.lte(); tvi++ )
174                 writeTrans( tvi->lowKey, tvi->highKey, tvi->value );
175         out << "      </trans_list>\n";
176 }
177
178 void XMLCodeGen::writeEofTrans( StateAp *state )
179 {
180         RedActionTable *eofActions = 0;
181         if ( state->eofActionTable.length() > 0 )
182                 eofActions = actionTableMap.find( state->eofActionTable );
183         
184         /* The <eof_t> is used when there is an eof target, otherwise the eof
185          * action goes into state actions. */
186         if ( state->eofTarget != 0 ) {
187                 out << "      <eof_t>" << state->eofTarget->alg.stateNum;
188
189                 if ( eofActions != 0 )
190                         out << " " << eofActions->id;
191                 else
192                         out << " x"; 
193
194                 out << "</eof_t>" << endl;
195         }
196 }
197
198 void XMLCodeGen::writeText( InlineItem *item )
199 {
200         if ( item->prev == 0 || item->prev->type != InlineItem::Text )
201                 out << "<text>";
202         xmlEscapeHost( out, item->data, strlen(item->data) );
203         if ( item->next == 0 || item->next->type != InlineItem::Text )
204                 out << "</text>";
205 }
206
207 void XMLCodeGen::writeGoto( InlineItem *item )
208 {
209         if ( pd->generatingSectionSubset )
210                 out << "<goto>-1</goto>";
211         else {
212                 EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id );
213                 out << "<goto>" << targ->value->alg.stateNum << "</goto>";
214         }
215 }
216
217 void XMLCodeGen::writeCall( InlineItem *item )
218 {
219         if ( pd->generatingSectionSubset )
220                 out << "<call>-1</call>";
221         else {
222                 EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id );
223                 out << "<call>" << targ->value->alg.stateNum << "</call>";
224         }
225 }
226
227 void XMLCodeGen::writeNext( InlineItem *item )
228 {
229         if ( pd->generatingSectionSubset )
230                 out << "<next>-1</next>";
231         else {
232                 EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id );
233                 out << "<next>" << targ->value->alg.stateNum << "</next>";
234         }
235 }
236
237 void XMLCodeGen::writeGotoExpr( InlineItem *item )
238 {
239         out << "<goto_expr>";
240         writeInlineList( item->children );
241         out << "</goto_expr>";
242 }
243
244 void XMLCodeGen::writeCallExpr( InlineItem *item )
245 {
246         out << "<call_expr>";
247         writeInlineList( item->children );
248         out << "</call_expr>";
249 }
250
251 void XMLCodeGen::writeNextExpr( InlineItem *item )
252 {
253         out << "<next_expr>";
254         writeInlineList( item->children );
255         out << "</next_expr>";
256 }
257
258 void XMLCodeGen::writeEntry( InlineItem *item )
259 {
260         if ( pd->generatingSectionSubset )
261                 out << "<entry>-1</entry>";
262         else {
263                 EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id );
264                 out << "<entry>" << targ->value->alg.stateNum << "</entry>";
265         }
266 }
267
268 void XMLCodeGen::writeActionExec( InlineItem *item )
269 {
270         out << "<exec>";
271         writeInlineList( item->children );
272         out << "</exec>";
273 }
274
275 void XMLCodeGen::writeLmOnLast( InlineItem *item )
276 {
277         out << "<set_tokend>1</set_tokend>";
278
279         if ( item->longestMatchPart->action != 0 ) {
280                 out << "<sub_action>";
281                 writeInlineList( item->longestMatchPart->action->inlineList );
282                 out << "</sub_action>";
283         }
284 }
285
286 void XMLCodeGen::writeLmOnNext( InlineItem *item )
287 {
288         out << "<set_tokend>0</set_tokend>";
289         out << "<hold></hold>";
290
291         if ( item->longestMatchPart->action != 0 ) {
292                 out << "<sub_action>";
293                 writeInlineList( item->longestMatchPart->action->inlineList );
294                 out << "</sub_action>";
295         }
296 }
297
298 void XMLCodeGen::writeLmOnLagBehind( InlineItem *item )
299 {
300         out << "<exec><get_tokend></get_tokend></exec>";
301
302         if ( item->longestMatchPart->action != 0 ) {
303                 out << "<sub_action>";
304                 writeInlineList( item->longestMatchPart->action->inlineList );
305                 out << "</sub_action>";
306         }
307 }
308
309 void XMLCodeGen::writeLmSwitch( InlineItem *item )
310 {
311         LongestMatch *longestMatch = item->longestMatch;
312         out << "<lm_switch>\n";
313
314         if ( longestMatch->lmSwitchHandlesError ) {
315                 /* If the switch handles error then we should have also forced the
316                  * error state. */
317                 assert( fsm->errState != 0 );
318
319                 out << "        <sub_action id=\"0\">";
320                 out << "<goto>" << fsm->errState->alg.stateNum << "</goto>";
321                 out << "</sub_action>\n";
322         }
323         
324         for ( LmPartList::Iter lmi = *longestMatch->longestMatchList; lmi.lte(); lmi++ ) {
325                 if ( lmi->inLmSelect && lmi->action != 0 ) {
326                         /* Open the action. Write it with the context that sets up _p 
327                          * when doing control flow changes from inside the machine. */
328                         out << "        <sub_action id=\"" << lmi->longestMatchId << "\">";
329                         out << "<exec><get_tokend></get_tokend></exec>";
330                         writeInlineList( lmi->action->inlineList );
331                         out << "</sub_action>\n";
332                 }
333         }
334
335         out << "    </lm_switch>";
336 }
337
338 void XMLCodeGen::writeInlineList( InlineList *inlineList )
339 {
340         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
341                 switch ( item->type ) {
342                 case InlineItem::Text:
343                         writeText( item );
344                         break;
345                 case InlineItem::Goto:
346                         writeGoto( item );
347                         break;
348                 case InlineItem::GotoExpr:
349                         writeGotoExpr( item );
350                         break;
351                 case InlineItem::Call:
352                         writeCall( item );
353                         break;
354                 case InlineItem::CallExpr:
355                         writeCallExpr( item );
356                         break;
357                 case InlineItem::Next:
358                         writeNext( item );
359                         break;
360                 case InlineItem::NextExpr:
361                         writeNextExpr( item );
362                         break;
363                 case InlineItem::Break:
364                         out << "<break></break>";
365                         break;
366                 case InlineItem::Ret: 
367                         out << "<ret></ret>";
368                         break;
369                 case InlineItem::PChar:
370                         out << "<pchar></pchar>";
371                         break;
372                 case InlineItem::Char: 
373                         out << "<char></char>";
374                         break;
375                 case InlineItem::Curs: 
376                         out << "<curs></curs>";
377                         break;
378                 case InlineItem::Targs: 
379                         out << "<targs></targs>";
380                         break;
381                 case InlineItem::Entry:
382                         writeEntry( item );
383                         break;
384
385                 case InlineItem::Hold:
386                         out << "<hold></hold>";
387                         break;
388                 case InlineItem::Exec:
389                         writeActionExec( item );
390                         break;
391
392                 case InlineItem::LmSetActId:
393                         out << "<set_act>" << 
394                                         item->longestMatchPart->longestMatchId << 
395                                         "</set_act>";
396                         break;
397                 case InlineItem::LmSetTokEnd:
398                         out << "<set_tokend>1</set_tokend>";
399                         break;
400
401                 case InlineItem::LmOnLast:
402                         writeLmOnLast( item );
403                         break;
404                 case InlineItem::LmOnNext:
405                         writeLmOnNext( item );
406                         break;
407                 case InlineItem::LmOnLagBehind:
408                         writeLmOnLagBehind( item );
409                         break;
410                 case InlineItem::LmSwitch: 
411                         writeLmSwitch( item );
412                         break;
413
414                 case InlineItem::LmInitAct:
415                         out << "<init_act></init_act>";
416                         break;
417                 case InlineItem::LmInitTokStart:
418                         out << "<init_tokstart></init_tokstart>";
419                         break;
420                 case InlineItem::LmSetTokStart:
421                         out << "<set_tokstart></set_tokstart>";
422                         break;
423                 }
424         }
425 }
426
427 void XMLCodeGen::writeAction( Action *action )
428 {
429         out << "      <action id=\"" << action->actionId << "\"";
430         if ( action->name != 0 ) 
431                 out << " name=\"" << action->name << "\"";
432         out << " line=\"" << action->loc.line << "\" col=\"" << action->loc.col << "\">";
433         writeInlineList( action->inlineList );
434         out << "</action>\n";
435 }
436
437 void xmlEscapeHost( std::ostream &out, char *data, long len )
438 {
439         char *end = data + len;
440         while ( data != end ) {
441                 switch ( *data ) {
442                 case '<': out << "&lt;"; break;
443                 case '>': out << "&gt;"; break;
444                 case '&': out << "&amp;"; break;
445                 default: out << *data; break;
446                 }
447                 data += 1;
448         }
449 }
450
451 void XMLCodeGen::writeStateActions( StateAp *state )
452 {
453         RedActionTable *toStateActions = 0;
454         if ( state->toStateActionTable.length() > 0 )
455                 toStateActions = actionTableMap.find( state->toStateActionTable );
456
457         RedActionTable *fromStateActions = 0;
458         if ( state->fromStateActionTable.length() > 0 )
459                 fromStateActions = actionTableMap.find( state->fromStateActionTable );
460
461         /* EOF actions go out here only if the state has no eof target. If it has
462          * an eof target then an eof transition will be used instead. */
463         RedActionTable *eofActions = 0;
464         if ( state->eofTarget == 0 && state->eofActionTable.length() > 0 )
465                 eofActions = actionTableMap.find( state->eofActionTable );
466         
467         if ( toStateActions != 0 || fromStateActions != 0 || eofActions != 0 ) {
468                 out << "      <state_actions>";
469                 if ( toStateActions != 0 )
470                         out << toStateActions->id;
471                 else
472                         out << "x";
473
474                 if ( fromStateActions != 0 )
475                         out << " " << fromStateActions->id;
476                 else
477                         out << " x";
478
479                 if ( eofActions != 0 )
480                         out << " " << eofActions->id;
481                 else
482                         out << " x";
483
484                 out << "</state_actions>\n";
485         }
486 }
487
488 void XMLCodeGen::writeStateConditions( StateAp *state )
489 {
490         if ( state->stateCondList.length() > 0 ) {
491                 out << "      <cond_list length=\"" << state->stateCondList.length() << "\">\n";
492                 for ( StateCondList::Iter scdi = state->stateCondList; scdi.lte(); scdi++ ) {
493                         out << "        <c>";
494                         writeKey( scdi->lowKey );
495                         out << " ";
496                         writeKey( scdi->highKey );
497                         out << " ";
498                         out << scdi->condSpace->condSpaceId;
499                         out << "</c>\n";
500                 }
501                 out << "      </cond_list>\n";
502         }
503 }
504
505 void XMLCodeGen::writeStateList()
506 {
507         /* Write the list of states. */
508         out << "    <state_list length=\"" << fsm->stateList.length() << "\">\n";
509         for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) {
510                 out << "      <state id=\"" << st->alg.stateNum << "\"";
511                 if ( st->isFinState() )
512                         out << " final=\"t\"";
513                 out << ">\n";
514
515                 writeStateActions( st );
516                 writeEofTrans( st );
517                 writeStateConditions( st );
518                 writeTransList( st );
519
520                 out << "      </state>\n";
521
522                 if ( !st.last() )
523                         out << "\n";
524         }
525         out << "    </state_list>\n";
526 }
527
528 bool XMLCodeGen::writeNameInst( NameInst *nameInst )
529 {
530         bool written = false;
531         if ( nameInst->parent != 0 )
532                 written = writeNameInst( nameInst->parent );
533         
534         if ( nameInst->name != 0 ) {
535                 if ( written )
536                         out << '_';
537                 out << nameInst->name;
538                 written = true;
539         }
540
541         return written;
542 }
543
544 void XMLCodeGen::writeEntryPoints()
545 {
546         /* List of entry points other than start state. */
547         if ( fsm->entryPoints.length() > 0 || pd->lmRequiresErrorState ) {
548                 out << "    <entry_points";
549                 if ( pd->lmRequiresErrorState )
550                         out << " error=\"t\"";
551                 out << ">\n";
552                 for ( EntryMap::Iter en = fsm->entryPoints; en.lte(); en++ ) {
553                         /* Get the name instantiation from nameIndex. */
554                         NameInst *nameInst = pd->nameIndex[en->key];
555                         StateAp *state = en->value;
556                         out << "      <entry name=\"";
557                         writeNameInst( nameInst );
558                         out << "\">" << state->alg.stateNum << "</entry>\n";
559                 }
560                 out << "    </entry_points>\n";
561         }
562 }
563
564 void XMLCodeGen::writeMachine()
565 {
566         /* Open the machine. */
567         out << "  <machine>\n"; 
568         
569         /* Action tables. */
570         reduceActionTables();
571
572         writeActionList();
573         writeActionTableList();
574         writeConditions();
575
576         /* Start state. */
577         out << "    <start_state>" << fsm->startState->alg.stateNum << 
578                         "</start_state>\n";
579         
580         /* Error state. */
581         if ( fsm->errState != 0 ) {
582                 out << "    <error_state>" << fsm->errState->alg.stateNum << 
583                         "</error_state>\n";
584         }
585
586         writeEntryPoints();
587         writeStateList();
588
589         out << "  </machine>\n";
590 }
591
592
593 void XMLCodeGen::writeConditions()
594 {
595         if ( condData->condSpaceMap.length() > 0 ) {
596                 long nextCondSpaceId = 0;
597                 for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ )
598                         cs->condSpaceId = nextCondSpaceId++;
599
600                 out << "    <cond_space_list length=\"" << condData->condSpaceMap.length() << "\">\n";
601                 for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ ) {
602                         out << "      <cond_space id=\"" << cs->condSpaceId << 
603                                 "\" length=\"" << cs->condSet.length() << "\">";
604                         writeKey( cs->baseKey );
605                         for ( CondSet::Iter csi = cs->condSet; csi.lte(); csi++ )
606                                 out << " " << (*csi)->actionId;
607                         out << "</cond_space>\n";
608                 }
609                 out << "    </cond_space_list>\n";
610         }
611 }
612
613 void XMLCodeGen::writeExports()
614 {
615         if ( pd->exportList.length() > 0 ) {
616                 out << "  <exports>\n";
617                 for ( ExportList::Iter exp = pd->exportList; exp.lte(); exp++ ) {
618                         out << "    <ex name=\"" << exp->name << "\">";
619                         writeKey( exp->key );
620                         out << "</ex>\n";
621                 }
622                 out << "  </exports>\n";
623         }
624 }
625
626 void XMLCodeGen::writeXML()
627 {
628         /* Open the definition. */
629         out << "<ragel_def name=\"" << fsmName << "\">\n";
630
631         /* Alphabet type. */
632         out << "  <alphtype>" << keyOps->alphType->internalName << "</alphtype>\n";
633         
634         /* Getkey expression. */
635         if ( pd->getKeyExpr != 0 ) {
636                 out << "  <getkey>";
637                 writeInlineList( pd->getKeyExpr );
638                 out << "</getkey>\n";
639         }
640
641         /* Access expression. */
642         if ( pd->accessExpr != 0 ) {
643                 out << "  <access>";
644                 writeInlineList( pd->accessExpr );
645                 out << "</access>\n";
646         }
647
648         /* PrePush expression. */
649         if ( pd->prePushExpr != 0 ) {
650                 out << "  <prepush>";
651                 writeInlineList( pd->prePushExpr );
652                 out << "</prepush>\n";
653         }
654
655         /* PostPop expression. */
656         if ( pd->postPopExpr != 0 ) {
657                 out << "  <postpop>";
658                 writeInlineList( pd->postPopExpr );
659                 out << "</postpop>\n";
660         }
661
662         /*
663          * Variable expressions.
664          */
665
666         if ( pd->pExpr != 0 ) {
667                 out << "  <p_expr>";
668                 writeInlineList( pd->pExpr );
669                 out << "</p_expr>\n";
670         }
671         
672         if ( pd->peExpr != 0 ) {
673                 out << "  <pe_expr>";
674                 writeInlineList( pd->peExpr );
675                 out << "</pe_expr>\n";
676         }
677
678         if ( pd->eofExpr != 0 ) {
679                 out << "  <eof_expr>";
680                 writeInlineList( pd->eofExpr );
681                 out << "</eof_expr>\n";
682         }
683         
684         if ( pd->csExpr != 0 ) {
685                 out << "  <cs_expr>";
686                 writeInlineList( pd->csExpr );
687                 out << "</cs_expr>\n";
688         }
689         
690         if ( pd->topExpr != 0 ) {
691                 out << "  <top_expr>";
692                 writeInlineList( pd->topExpr );
693                 out << "</top_expr>\n";
694         }
695         
696         if ( pd->stackExpr != 0 ) {
697                 out << "  <stack_expr>";
698                 writeInlineList( pd->stackExpr );
699                 out << "</stack_expr>\n";
700         }
701         
702         if ( pd->actExpr != 0 ) {
703                 out << "  <act_expr>";
704                 writeInlineList( pd->actExpr );
705                 out << "</act_expr>\n";
706         }
707         
708         if ( pd->tokstartExpr != 0 ) {
709                 out << "  <tokstart_expr>";
710                 writeInlineList( pd->tokstartExpr );
711                 out << "</tokstart_expr>\n";
712         }
713         
714         if ( pd->tokendExpr != 0 ) {
715                 out << "  <tokend_expr>";
716                 writeInlineList( pd->tokendExpr );
717                 out << "</tokend_expr>\n";
718         }
719         
720         if ( pd->dataExpr != 0 ) {
721                 out << "  <data_expr>";
722                 writeInlineList( pd->dataExpr );
723                 out << "</data_expr>\n";
724         }
725         
726         writeExports();
727         
728         writeMachine();
729
730         out <<
731                 "</ragel_def>\n";
732 }
733