Can cleanup the xml code generation and remove the holdte and execte tags
[external/ragel.git] / ragel / xmlcodegen.cpp
1 /*
2  *  Copyright 2005-2007 Adrian Thurston <thurston@cs.queensu.ca>
3  */
4
5 /*  This file is part of Ragel.
6  *
7  *  Ragel is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  * 
12  *  Ragel is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  * 
17  *  You should have received a copy of the GNU General Public License
18  *  along with Ragel; if not, write to the Free Software
19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
20  */
21
22
23 #include "ragel.h"
24 #include "xmlcodegen.h"
25 #include "parsedata.h"
26 #include "fsmgraph.h"
27 #include <string.h>
28
29 using namespace std;
30
31 XMLCodeGen::XMLCodeGen( char *fsmName, ParseData *pd, FsmAp *fsm, 
32                 std::ostream &out )
33 :
34         fsmName(fsmName),
35         pd(pd),
36         fsm(fsm),
37         out(out),
38         nextActionTableId(0)
39 {
40 }
41
42
43 void XMLCodeGen::writeActionList()
44 {
45         /* Determine which actions to write. */
46         int nextActionId = 0;
47         for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) {
48                 if ( act->numRefs() > 0 || act->numCondRefs > 0 )
49                         act->actionId = nextActionId++;
50         }
51
52         /* Write the list. */
53         out << "    <action_list length=\"" << nextActionId << "\">\n";
54         for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) {
55                 if ( act->actionId >= 0 )
56                         writeAction( act );
57         }
58         out << "    </action_list>\n";
59 }
60
61 void XMLCodeGen::writeActionTableList()
62 {
63         /* Must first order the action tables based on their id. */
64         int numTables = nextActionTableId;
65         RedActionTable **tables = new RedActionTable*[numTables];
66         for ( ActionTableMap::Iter at = actionTableMap; at.lte(); at++ )
67                 tables[at->id] = at;
68
69         out << "    <action_table_list length=\"" << numTables << "\">\n";
70         for ( int t = 0; t < numTables; t++ ) {
71                 out << "      <action_table id=\"" << t << "\" length=\"" << 
72                                 tables[t]->key.length() << "\">";
73                 for ( ActionTable::Iter atel = tables[t]->key; atel.lte(); atel++ ) {
74                         out << atel->value->actionId;
75                         if ( ! atel.last() )
76                                 out << " ";
77                 }
78                 out << "</action_table>\n";
79         }
80         out << "    </action_table_list>\n";
81
82         delete[] tables;
83 }
84
85 void XMLCodeGen::reduceActionTables()
86 {
87         /* Reduce the actions tables to a set. */
88         for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) {
89                 RedActionTable *actionTable = 0;
90
91                 /* Reduce To State Actions. */
92                 if ( st->toStateActionTable.length() > 0 ) {
93                         if ( actionTableMap.insert( st->toStateActionTable, &actionTable ) )
94                                 actionTable->id = nextActionTableId++;
95                 }
96
97                 /* Reduce From State Actions. */
98                 if ( st->fromStateActionTable.length() > 0 ) {
99                         if ( actionTableMap.insert( st->fromStateActionTable, &actionTable ) )
100                                 actionTable->id = nextActionTableId++;
101                 }
102
103                 /* Reduce EOF actions. */
104                 if ( st->eofActionTable.length() > 0 ) {
105                         if ( actionTableMap.insert( st->eofActionTable, &actionTable ) )
106                                 actionTable->id = nextActionTableId++;
107                 }
108
109                 /* Loop the transitions and reduce their actions. */
110                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
111                         if ( trans->actionTable.length() > 0 ) {
112                                 if ( actionTableMap.insert( trans->actionTable, &actionTable ) )
113                                         actionTable->id = nextActionTableId++;
114                         }
115                 }
116         }
117 }
118
119 void XMLCodeGen::appendTrans( TransListVect &outList, Key lowKey, 
120                 Key highKey, TransAp *trans )
121 {
122         if ( trans->toState != 0 || trans->actionTable.length() > 0 )
123                 outList.append( TransEl( lowKey, highKey, trans ) );
124 }
125
126 void XMLCodeGen::writeKey( Key key )
127 {
128         if ( keyOps->isSigned )
129                 out << key.getVal();
130         else
131                 out << (unsigned long) key.getVal();
132 }
133
134 void XMLCodeGen::writeTrans( Key lowKey, Key highKey, TransAp *trans )
135 {
136         /* First reduce the action. */
137         RedActionTable *actionTable = 0;
138         if ( trans->actionTable.length() > 0 )
139                 actionTable = actionTableMap.find( trans->actionTable );
140
141         /* Write the transition. */
142         out << "        <t>";
143         writeKey( lowKey );
144         out << " ";
145         writeKey( highKey );
146
147         if ( trans->toState != 0 )
148                 out << " " << trans->toState->alg.stateNum;
149         else
150                 out << " x";
151
152         if ( actionTable != 0 )
153                 out << " " << actionTable->id;
154         else
155                 out << " x";
156         out << "</t>\n";
157 }
158
159 void XMLCodeGen::writeTransList( StateAp *state )
160 {
161         TransListVect outList;
162
163         /* If there is only are no ranges the task is simple. */
164         if ( state->outList.length() > 0 ) {
165                 /* Loop each source range. */
166                 for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
167                         /* Reduce the transition. If it reduced to anything then add it. */
168                         appendTrans( outList, trans->lowKey, trans->highKey, trans );
169                 }
170         }
171
172         out << "      <trans_list length=\"" << outList.length() << "\">\n";
173         for ( TransListVect::Iter tvi = outList; tvi.lte(); tvi++ )
174                 writeTrans( tvi->lowKey, tvi->highKey, tvi->value );
175         out << "      </trans_list>\n";
176 }
177
178 void XMLCodeGen::writeText( InlineItem *item )
179 {
180         if ( item->prev == 0 || item->prev->type != InlineItem::Text )
181                 out << "<text>";
182         xmlEscapeHost( out, item->data, strlen(item->data) );
183         if ( item->next == 0 || item->next->type != InlineItem::Text )
184                 out << "</text>";
185 }
186
187 void XMLCodeGen::writeGoto( InlineItem *item )
188 {
189         if ( pd->generatingSectionSubset )
190                 out << "<goto>-1</goto>";
191         else {
192                 EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id );
193                 out << "<goto>" << targ->value->alg.stateNum << "</goto>";
194         }
195 }
196
197 void XMLCodeGen::writeCall( InlineItem *item )
198 {
199         if ( pd->generatingSectionSubset )
200                 out << "<call>-1</call>";
201         else {
202                 EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id );
203                 out << "<call>" << targ->value->alg.stateNum << "</call>";
204         }
205 }
206
207 void XMLCodeGen::writeNext( InlineItem *item )
208 {
209         if ( pd->generatingSectionSubset )
210                 out << "<next>-1</next>";
211         else {
212                 EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id );
213                 out << "<next>" << targ->value->alg.stateNum << "</next>";
214         }
215 }
216
217 void XMLCodeGen::writeGotoExpr( InlineItem *item )
218 {
219         out << "<goto_expr>";
220         writeInlineList( item->children );
221         out << "</goto_expr>";
222 }
223
224 void XMLCodeGen::writeCallExpr( InlineItem *item )
225 {
226         out << "<call_expr>";
227         writeInlineList( item->children );
228         out << "</call_expr>";
229 }
230
231 void XMLCodeGen::writeNextExpr( InlineItem *item )
232 {
233         out << "<next_expr>";
234         writeInlineList( item->children );
235         out << "</next_expr>";
236 }
237
238 void XMLCodeGen::writeEntry( InlineItem *item )
239 {
240         if ( pd->generatingSectionSubset )
241                 out << "<entry>-1</entry>";
242         else {
243                 EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id );
244                 out << "<entry>" << targ->value->alg.stateNum << "</entry>";
245         }
246 }
247
248 void XMLCodeGen::writeActionExec( InlineItem *item )
249 {
250         out << "<exec>";
251         writeInlineList( item->children );
252         out << "</exec>";
253 }
254
255 void XMLCodeGen::writeLmOnLast( InlineItem *item )
256 {
257         out << "<set_tokend>1</set_tokend>";
258
259         if ( item->longestMatchPart->action != 0 ) {
260                 out << "<sub_action>";
261                 writeInlineList( item->longestMatchPart->action->inlineList );
262                 out << "</sub_action>";
263         }
264 }
265
266 void XMLCodeGen::writeLmOnNext( InlineItem *item )
267 {
268         out << "<set_tokend>0</set_tokend>";
269         out << "<hold></hold>";
270
271         if ( item->longestMatchPart->action != 0 ) {
272                 out << "<sub_action>";
273                 writeInlineList( item->longestMatchPart->action->inlineList );
274                 out << "</sub_action>";
275         }
276 }
277
278 void XMLCodeGen::writeLmOnLagBehind( InlineItem *item )
279 {
280         out << "<exec><get_tokend></get_tokend></exec>";
281
282         if ( item->longestMatchPart->action != 0 ) {
283                 out << "<sub_action>";
284                 writeInlineList( item->longestMatchPart->action->inlineList );
285                 out << "</sub_action>";
286         }
287 }
288
289 void XMLCodeGen::writeLmSwitch( InlineItem *item )
290 {
291
292         LongestMatch *longestMatch = item->longestMatch;
293         out << "<lm_switch>\n";
294
295         if ( longestMatch->lmSwitchHandlesError ) {
296                 /* If the switch handles error then we should have also forced the
297                  * error state. */
298                 assert( fsm->errState != 0 );
299
300                 out << "      <sub_action id=\"0\">";
301                 out << "<goto>" << fsm->errState->alg.stateNum << "</goto>";
302                 out << "</sub_action>\n";
303         }
304         
305         for ( LmPartList::Iter lmi = *longestMatch->longestMatchList; lmi.lte(); lmi++ ) {
306                 if ( lmi->inLmSelect && lmi->action != 0 ) {
307                         /* Open the action. Write it with the context that sets up _p 
308                          * when doing control flow changes from inside the machine. */
309                         out << "      <sub_action id=\"" << lmi->longestMatchId << "\">";
310                         out << "<exec><get_tokend></get_tokend></exec>";
311                         writeInlineList( lmi->action->inlineList );
312                         out << "</sub_action>\n";
313                 }
314         }
315
316         out << "    </lm_switch>";
317 }
318
319 void XMLCodeGen::writeInlineList( InlineList *inlineList )
320 {
321         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
322                 switch ( item->type ) {
323                 case InlineItem::Text:
324                         writeText( item );
325                         break;
326                 case InlineItem::Goto:
327                         writeGoto( item );
328                         break;
329                 case InlineItem::GotoExpr:
330                         writeGotoExpr( item );
331                         break;
332                 case InlineItem::Call:
333                         writeCall( item );
334                         break;
335                 case InlineItem::CallExpr:
336                         writeCallExpr( item );
337                         break;
338                 case InlineItem::Next:
339                         writeNext( item );
340                         break;
341                 case InlineItem::NextExpr:
342                         writeNextExpr( item );
343                         break;
344                 case InlineItem::Break:
345                         out << "<break></break>";
346                         break;
347                 case InlineItem::Ret: 
348                         out << "<ret></ret>";
349                         break;
350                 case InlineItem::PChar:
351                         out << "<pchar></pchar>";
352                         break;
353                 case InlineItem::Char: 
354                         out << "<char></char>";
355                         break;
356                 case InlineItem::Curs: 
357                         out << "<curs></curs>";
358                         break;
359                 case InlineItem::Targs: 
360                         out << "<targs></targs>";
361                         break;
362                 case InlineItem::Entry:
363                         writeEntry( item );
364                         break;
365
366                 case InlineItem::Hold:
367                         out << "<hold></hold>";
368                         break;
369                 case InlineItem::Exec:
370                         writeActionExec( item );
371                         break;
372
373                 case InlineItem::LmSetActId:
374                         out << "<set_act>" << 
375                                         item->longestMatchPart->longestMatchId << 
376                                         "</set_act>";
377                         break;
378                 case InlineItem::LmSetTokEnd:
379                         out << "<set_tokend>1</set_tokend>";
380                         break;
381
382                 case InlineItem::LmOnLast:
383                         writeLmOnLast( item );
384                         break;
385                 case InlineItem::LmOnNext:
386                         writeLmOnNext( item );
387                         break;
388                 case InlineItem::LmOnLagBehind:
389                         writeLmOnLagBehind( item );
390                         break;
391                 case InlineItem::LmSwitch: 
392                         writeLmSwitch( item );
393                         break;
394
395                 case InlineItem::LmInitAct:
396                         out << "<init_act></init_act>";
397                         break;
398                 case InlineItem::LmInitTokStart:
399                         out << "<init_tokstart></init_tokstart>";
400                         break;
401                 case InlineItem::LmSetTokStart:
402                         out << "<set_tokstart></set_tokstart>";
403                         break;
404                 }
405         }
406 }
407
408 void XMLCodeGen::writeAction( Action *action )
409 {
410         out << "      <action id=\"" << action->actionId << "\"";
411         if ( action->name != 0 ) 
412                 out << " name=\"" << action->name << "\"";
413         out << " line=\"" << action->loc.line << "\" col=\"" << action->loc.col << "\">";
414         writeInlineList( action->inlineList );
415         out << "</action>\n";
416 }
417
418 void xmlEscapeHost( std::ostream &out, char *data, long len )
419 {
420         char *end = data + len;
421         while ( data != end ) {
422                 switch ( *data ) {
423                 case '<': out << "&lt;"; break;
424                 case '>': out << "&gt;"; break;
425                 case '&': out << "&amp;"; break;
426                 default: out << *data; break;
427                 }
428                 data += 1;
429         }
430 }
431
432 void XMLCodeGen::writeStateActions( StateAp *state )
433 {
434         RedActionTable *toStateActions = 0;
435         if ( state->toStateActionTable.length() > 0 )
436                 toStateActions = actionTableMap.find( state->toStateActionTable );
437
438         RedActionTable *fromStateActions = 0;
439         if ( state->fromStateActionTable.length() > 0 )
440                 fromStateActions = actionTableMap.find( state->fromStateActionTable );
441
442         RedActionTable *eofActions = 0;
443         if ( state->eofActionTable.length() > 0 )
444                 eofActions = actionTableMap.find( state->eofActionTable );
445         
446         if ( toStateActions != 0 || fromStateActions != 0 || eofActions != 0 ) {
447                 out << "      <state_actions>";
448                 if ( toStateActions != 0 )
449                         out << toStateActions->id;
450                 else
451                         out << "x";
452
453                 if ( fromStateActions != 0 )
454                         out << " " << fromStateActions->id;
455                 else
456                         out << " x";
457
458                 if ( eofActions != 0 )
459                         out << " " << eofActions->id;
460                 else
461                         out << " x"; out << "</state_actions>\n";
462         }
463 }
464
465 void XMLCodeGen::writeStateConditions( StateAp *state )
466 {
467         if ( state->stateCondList.length() > 0 ) {
468                 out << "      <cond_list length=\"" << state->stateCondList.length() << "\">\n";
469                 for ( StateCondList::Iter scdi = state->stateCondList; scdi.lte(); scdi++ ) {
470                         out << "        <c>";
471                         writeKey( scdi->lowKey );
472                         out << " ";
473                         writeKey( scdi->highKey );
474                         out << " ";
475                         out << scdi->condSpace->condSpaceId;
476                         out << "</c>\n";
477                 }
478                 out << "      </cond_list>\n";
479         }
480 }
481
482 void XMLCodeGen::writeStateList()
483 {
484         /* Write the list of states. */
485         out << "    <state_list length=\"" << fsm->stateList.length() << "\">\n";
486         for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) {
487                 out << "      <state id=\"" << st->alg.stateNum << "\"";
488                 if ( st->isFinState() )
489                         out << " final=\"t\"";
490                 out << ">\n";
491
492                 writeStateActions( st );
493                 writeStateConditions( st );
494                 writeTransList( st );
495
496                 out << "      </state>\n";
497
498                 if ( !st.last() )
499                         out << "\n";
500         }
501         out << "    </state_list>\n";
502 }
503
504 bool XMLCodeGen::writeNameInst( NameInst *nameInst )
505 {
506         bool written = false;
507         if ( nameInst->parent != 0 )
508                 written = writeNameInst( nameInst->parent );
509         
510         if ( nameInst->name != 0 ) {
511                 if ( written )
512                         out << '_';
513                 out << nameInst->name;
514                 written = true;
515         }
516
517         return written;
518 }
519
520 void XMLCodeGen::writeEntryPoints()
521 {
522         /* List of entry points other than start state. */
523         if ( fsm->entryPoints.length() > 0 || pd->lmRequiresErrorState ) {
524                 out << "    <entry_points";
525                 if ( pd->lmRequiresErrorState )
526                         out << " error=\"t\"";
527                 out << ">\n";
528                 for ( EntryMap::Iter en = fsm->entryPoints; en.lte(); en++ ) {
529                         /* Get the name instantiation from nameIndex. */
530                         NameInst *nameInst = pd->nameIndex[en->key];
531                         StateAp *state = en->value;
532                         out << "      <entry name=\"";
533                         writeNameInst( nameInst );
534                         out << "\">" << state->alg.stateNum << "</entry>\n";
535                 }
536                 out << "    </entry_points>\n";
537         }
538 }
539
540 void XMLCodeGen::writeMachine()
541 {
542         /* Open the machine. */
543         out << "  <machine>\n"; 
544         
545         /* Action tables. */
546         reduceActionTables();
547
548         writeActionList();
549         writeActionTableList();
550         writeConditions();
551
552         /* Start state. */
553         out << "    <start_state>" << fsm->startState->alg.stateNum << 
554                         "</start_state>\n";
555         
556         /* Error state. */
557         if ( fsm->errState != 0 ) {
558                 out << "    <error_state>" << fsm->errState->alg.stateNum << 
559                         "</error_state>\n";
560         }
561
562         writeEntryPoints();
563         writeStateList();
564
565         out << "  </machine>\n";
566 }
567
568
569 void XMLCodeGen::writeConditions()
570 {
571         if ( condData->condSpaceMap.length() > 0 ) {
572                 long nextCondSpaceId = 0;
573                 for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ )
574                         cs->condSpaceId = nextCondSpaceId++;
575
576                 out << "    <cond_space_list length=\"" << condData->condSpaceMap.length() << "\">\n";
577                 for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ ) {
578                         out << "      <cond_space id=\"" << cs->condSpaceId << 
579                                 "\" length=\"" << cs->condSet.length() << "\">";
580                         writeKey( cs->baseKey );
581                         for ( CondSet::Iter csi = cs->condSet; csi.lte(); csi++ )
582                                 out << " " << (*csi)->actionId;
583                         out << "</cond_space>\n";
584                 }
585                 out << "    </cond_space_list>\n";
586         }
587 }
588
589 void XMLCodeGen::writeExports()
590 {
591         if ( pd->exportList.length() > 0 ) {
592                 out << "  <exports>\n";
593                 for ( ExportList::Iter exp = pd->exportList; exp.lte(); exp++ ) {
594                         out << "    <ex name=\"" << exp->name << "\">";
595                         writeKey( exp->key );
596                         out << "</ex>\n";
597                 }
598                 out << "  </exports>\n";
599         }
600 }
601
602 void XMLCodeGen::writeXML()
603 {
604         /* Open the definition. */
605         out << "<ragel_def name=\"" << fsmName << "\">\n";
606
607         /* Alphabet type. */
608         out << "  <alphtype>" << keyOps->alphType->internalName << "</alphtype>\n";
609         
610         /* Getkey expression. */
611         if ( pd->getKeyExpr != 0 ) {
612                 out << "  <getkey>";
613                 writeInlineList( pd->getKeyExpr );
614                 out << "</getkey>\n";
615         }
616
617         /* Access expression. */
618         if ( pd->accessExpr != 0 ) {
619                 out << "  <access>";
620                 writeInlineList( pd->accessExpr );
621                 out << "</access>\n";
622         }
623
624         /*
625          * Variable expressions.
626          */
627
628         if ( pd->pExpr != 0 ) {
629                 out << "  <p_expr>";
630                 writeInlineList( pd->pExpr );
631                 out << "</p_expr>\n";
632         }
633         
634         if ( pd->peExpr != 0 ) {
635                 out << "  <pe_expr>";
636                 writeInlineList( pd->peExpr );
637                 out << "</pe_expr>\n";
638         }
639         
640         if ( pd->csExpr != 0 ) {
641                 out << "  <cs_expr>";
642                 writeInlineList( pd->csExpr );
643                 out << "</cs_expr>\n";
644         }
645         
646         if ( pd->topExpr != 0 ) {
647                 out << "  <top_expr>";
648                 writeInlineList( pd->topExpr );
649                 out << "</top_expr>\n";
650         }
651         
652         if ( pd->stackExpr != 0 ) {
653                 out << "  <stack_expr>";
654                 writeInlineList( pd->stackExpr );
655                 out << "</stack_expr>\n";
656         }
657         
658         if ( pd->actExpr != 0 ) {
659                 out << "  <act_expr>";
660                 writeInlineList( pd->actExpr );
661                 out << "</act_expr>\n";
662         }
663         
664         if ( pd->tokstartExpr != 0 ) {
665                 out << "  <tokstart_expr>";
666                 writeInlineList( pd->tokstartExpr );
667                 out << "</tokstart_expr>\n";
668         }
669         
670         if ( pd->tokendExpr != 0 ) {
671                 out << "  <tokend_expr>";
672                 writeInlineList( pd->tokendExpr );
673                 out << "</tokend_expr>\n";
674         }
675         
676         writeExports();
677         
678         writeMachine();
679
680         out <<
681                 "</ragel_def>\n";
682 }
683