If the condition embedding code runs out of available characters in the
[external/ragel.git] / ragel / xmlcodegen.cpp
1 /*
2  *  Copyright 2005-2007 Adrian Thurston <thurston@cs.queensu.ca>
3  */
4
5 /*  This file is part of Ragel.
6  *
7  *  Ragel is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  * 
12  *  Ragel is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  * 
17  *  You should have received a copy of the GNU General Public License
18  *  along with Ragel; if not, write to the Free Software
19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
20  */
21
22
23 #include "ragel.h"
24 #include "xmlcodegen.h"
25 #include "parsedata.h"
26 #include "fsmgraph.h"
27 #include <string.h>
28
29 using namespace std;
30
31 XMLCodeGen::XMLCodeGen( char *fsmName, ParseData *pd, FsmAp *fsm, 
32                 std::ostream &out )
33 :
34         fsmName(fsmName),
35         pd(pd),
36         fsm(fsm),
37         out(out),
38         nextActionTableId(0)
39 {
40 }
41
42
43 void XMLCodeGen::writeActionList()
44 {
45         /* Determine which actions to write. */
46         int nextActionId = 0;
47         for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) {
48                 if ( act->numRefs() > 0 || act->numCondRefs > 0 )
49                         act->actionId = nextActionId++;
50         }
51
52         /* Write the list. */
53         out << "    <action_list length=\"" << nextActionId << "\">\n";
54         for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) {
55                 if ( act->actionId >= 0 )
56                         writeAction( act );
57         }
58         out << "    </action_list>\n";
59 }
60
61 void XMLCodeGen::writeActionTableList()
62 {
63         /* Must first order the action tables based on their id. */
64         int numTables = nextActionTableId;
65         RedActionTable **tables = new RedActionTable*[numTables];
66         for ( ActionTableMap::Iter at = actionTableMap; at.lte(); at++ )
67                 tables[at->id] = at;
68
69         out << "    <action_table_list length=\"" << numTables << "\">\n";
70         for ( int t = 0; t < numTables; t++ ) {
71                 out << "      <action_table id=\"" << t << "\" length=\"" << 
72                                 tables[t]->key.length() << "\">";
73                 for ( ActionTable::Iter atel = tables[t]->key; atel.lte(); atel++ ) {
74                         out << atel->value->actionId;
75                         if ( ! atel.last() )
76                                 out << " ";
77                 }
78                 out << "</action_table>\n";
79         }
80         out << "    </action_table_list>\n";
81
82         delete[] tables;
83 }
84
85 void XMLCodeGen::reduceActionTables()
86 {
87         /* Reduce the actions tables to a set. */
88         for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) {
89                 RedActionTable *actionTable = 0;
90
91                 /* Reduce To State Actions. */
92                 if ( st->toStateActionTable.length() > 0 ) {
93                         if ( actionTableMap.insert( st->toStateActionTable, &actionTable ) )
94                                 actionTable->id = nextActionTableId++;
95                 }
96
97                 /* Reduce From State Actions. */
98                 if ( st->fromStateActionTable.length() > 0 ) {
99                         if ( actionTableMap.insert( st->fromStateActionTable, &actionTable ) )
100                                 actionTable->id = nextActionTableId++;
101                 }
102
103                 /* Reduce EOF actions. */
104                 if ( st->eofActionTable.length() > 0 ) {
105                         if ( actionTableMap.insert( st->eofActionTable, &actionTable ) )
106                                 actionTable->id = nextActionTableId++;
107                 }
108
109                 /* Loop the transitions and reduce their actions. */
110                 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
111                         if ( trans->actionTable.length() > 0 ) {
112                                 if ( actionTableMap.insert( trans->actionTable, &actionTable ) )
113                                         actionTable->id = nextActionTableId++;
114                         }
115                 }
116         }
117 }
118
119 void XMLCodeGen::appendTrans( TransListVect &outList, Key lowKey, 
120                 Key highKey, TransAp *trans )
121 {
122         if ( trans->toState != 0 || trans->actionTable.length() > 0 )
123                 outList.append( TransEl( lowKey, highKey, trans ) );
124 }
125
126 void XMLCodeGen::writeKey( Key key )
127 {
128         if ( keyOps->isSigned )
129                 out << key.getVal();
130         else
131                 out << (unsigned long) key.getVal();
132 }
133
134 void XMLCodeGen::writeTrans( Key lowKey, Key highKey, TransAp *trans )
135 {
136         /* First reduce the action. */
137         RedActionTable *actionTable = 0;
138         if ( trans->actionTable.length() > 0 )
139                 actionTable = actionTableMap.find( trans->actionTable );
140
141         /* Write the transition. */
142         out << "        <t>";
143         writeKey( lowKey );
144         out << " ";
145         writeKey( highKey );
146
147         if ( trans->toState != 0 )
148                 out << " " << trans->toState->alg.stateNum;
149         else
150                 out << " x";
151
152         if ( actionTable != 0 )
153                 out << " " << actionTable->id;
154         else
155                 out << " x";
156         out << "</t>\n";
157 }
158
159 void XMLCodeGen::writeTransList( StateAp *state )
160 {
161         TransListVect outList;
162
163         /* If there is only are no ranges the task is simple. */
164         if ( state->outList.length() > 0 ) {
165                 /* Loop each source range. */
166                 for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
167                         /* Reduce the transition. If it reduced to anything then add it. */
168                         appendTrans( outList, trans->lowKey, trans->highKey, trans );
169                 }
170         }
171
172         out << "      <trans_list length=\"" << outList.length() << "\">\n";
173         for ( TransListVect::Iter tvi = outList; tvi.lte(); tvi++ )
174                 writeTrans( tvi->lowKey, tvi->highKey, tvi->value );
175         out << "      </trans_list>\n";
176 }
177
178 void XMLCodeGen::writeText( InlineItem *item )
179 {
180         if ( item->prev == 0 || item->prev->type != InlineItem::Text )
181                 out << "<text>";
182         xmlEscapeHost( out, item->data, strlen(item->data) );
183         if ( item->next == 0 || item->next->type != InlineItem::Text )
184                 out << "</text>";
185 }
186
187 void XMLCodeGen::writeGoto( InlineItem *item )
188 {
189         if ( pd->generatingSectionSubset )
190                 out << "<goto>-1</goto>";
191         else {
192                 EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id );
193                 out << "<goto>" << targ->value->alg.stateNum << "</goto>";
194         }
195 }
196
197 void XMLCodeGen::writeCall( InlineItem *item )
198 {
199         if ( pd->generatingSectionSubset )
200                 out << "<call>-1</call>";
201         else {
202                 EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id );
203                 out << "<call>" << targ->value->alg.stateNum << "</call>";
204         }
205 }
206
207 void XMLCodeGen::writeNext( InlineItem *item )
208 {
209         if ( pd->generatingSectionSubset )
210                 out << "<next>-1</next>";
211         else {
212                 EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id );
213                 out << "<next>" << targ->value->alg.stateNum << "</next>";
214         }
215 }
216
217 void XMLCodeGen::writeGotoExpr( InlineItem *item )
218 {
219         out << "<goto_expr>";
220         writeInlineList( item->children );
221         out << "</goto_expr>";
222 }
223
224 void XMLCodeGen::writeCallExpr( InlineItem *item )
225 {
226         out << "<call_expr>";
227         writeInlineList( item->children );
228         out << "</call_expr>";
229 }
230
231 void XMLCodeGen::writeNextExpr( InlineItem *item )
232 {
233         out << "<next_expr>";
234         writeInlineList( item->children );
235         out << "</next_expr>";
236 }
237
238 void XMLCodeGen::writeEntry( InlineItem *item )
239 {
240         if ( pd->generatingSectionSubset )
241                 out << "<entry>-1</entry>";
242         else {
243                 EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id );
244                 out << "<entry>" << targ->value->alg.stateNum << "</entry>";
245         }
246 }
247
248 void XMLCodeGen::writeActionExec( InlineItem *item )
249 {
250         out << "<exec>";
251         writeInlineList( item->children );
252         out << "</exec>";
253 }
254
255 void XMLCodeGen::writeLmOnLast( InlineItem *item )
256 {
257         out << "<set_tokend>1</set_tokend>";
258
259         if ( item->longestMatchPart->action != 0 ) {
260                 out << "<sub_action>";
261                 writeInlineList( item->longestMatchPart->action->inlineList );
262                 out << "</sub_action>";
263         }
264 }
265
266 void XMLCodeGen::writeLmOnNext( InlineItem *item )
267 {
268         out << "<set_tokend>0</set_tokend>";
269         out << "<hold></hold>";
270
271         if ( item->longestMatchPart->action != 0 ) {
272                 out << "<sub_action>";
273                 writeInlineList( item->longestMatchPart->action->inlineList );
274                 out << "</sub_action>";
275         }
276 }
277
278 void XMLCodeGen::writeLmOnLagBehind( InlineItem *item )
279 {
280         out << "<exec><get_tokend></get_tokend></exec>";
281
282         if ( item->longestMatchPart->action != 0 ) {
283                 out << "<sub_action>";
284                 writeInlineList( item->longestMatchPart->action->inlineList );
285                 out << "</sub_action>";
286         }
287 }
288
289 void XMLCodeGen::writeLmSwitch( InlineItem *item )
290 {
291         LongestMatch *longestMatch = item->longestMatch;
292         out << "<lm_switch>\n";
293
294         if ( longestMatch->lmSwitchHandlesError ) {
295                 /* If the switch handles error then we should have also forced the
296                  * error state. */
297                 assert( fsm->errState != 0 );
298
299                 out << "        <sub_action id=\"0\">";
300                 out << "<goto>" << fsm->errState->alg.stateNum << "</goto>";
301                 out << "</sub_action>\n";
302         }
303         
304         for ( LmPartList::Iter lmi = *longestMatch->longestMatchList; lmi.lte(); lmi++ ) {
305                 if ( lmi->inLmSelect && lmi->action != 0 ) {
306                         /* Open the action. Write it with the context that sets up _p 
307                          * when doing control flow changes from inside the machine. */
308                         out << "        <sub_action id=\"" << lmi->longestMatchId << "\">";
309                         out << "<exec><get_tokend></get_tokend></exec>";
310                         writeInlineList( lmi->action->inlineList );
311                         out << "</sub_action>\n";
312                 }
313         }
314
315         out << "    </lm_switch>";
316 }
317
318 void XMLCodeGen::writeInlineList( InlineList *inlineList )
319 {
320         for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
321                 switch ( item->type ) {
322                 case InlineItem::Text:
323                         writeText( item );
324                         break;
325                 case InlineItem::Goto:
326                         writeGoto( item );
327                         break;
328                 case InlineItem::GotoExpr:
329                         writeGotoExpr( item );
330                         break;
331                 case InlineItem::Call:
332                         writeCall( item );
333                         break;
334                 case InlineItem::CallExpr:
335                         writeCallExpr( item );
336                         break;
337                 case InlineItem::Next:
338                         writeNext( item );
339                         break;
340                 case InlineItem::NextExpr:
341                         writeNextExpr( item );
342                         break;
343                 case InlineItem::Break:
344                         out << "<break></break>";
345                         break;
346                 case InlineItem::Ret: 
347                         out << "<ret></ret>";
348                         break;
349                 case InlineItem::PChar:
350                         out << "<pchar></pchar>";
351                         break;
352                 case InlineItem::Char: 
353                         out << "<char></char>";
354                         break;
355                 case InlineItem::Curs: 
356                         out << "<curs></curs>";
357                         break;
358                 case InlineItem::Targs: 
359                         out << "<targs></targs>";
360                         break;
361                 case InlineItem::Entry:
362                         writeEntry( item );
363                         break;
364
365                 case InlineItem::Hold:
366                         out << "<hold></hold>";
367                         break;
368                 case InlineItem::Exec:
369                         writeActionExec( item );
370                         break;
371
372                 case InlineItem::LmSetActId:
373                         out << "<set_act>" << 
374                                         item->longestMatchPart->longestMatchId << 
375                                         "</set_act>";
376                         break;
377                 case InlineItem::LmSetTokEnd:
378                         out << "<set_tokend>1</set_tokend>";
379                         break;
380
381                 case InlineItem::LmOnLast:
382                         writeLmOnLast( item );
383                         break;
384                 case InlineItem::LmOnNext:
385                         writeLmOnNext( item );
386                         break;
387                 case InlineItem::LmOnLagBehind:
388                         writeLmOnLagBehind( item );
389                         break;
390                 case InlineItem::LmSwitch: 
391                         writeLmSwitch( item );
392                         break;
393
394                 case InlineItem::LmInitAct:
395                         out << "<init_act></init_act>";
396                         break;
397                 case InlineItem::LmInitTokStart:
398                         out << "<init_tokstart></init_tokstart>";
399                         break;
400                 case InlineItem::LmSetTokStart:
401                         out << "<set_tokstart></set_tokstart>";
402                         break;
403                 }
404         }
405 }
406
407 void XMLCodeGen::writeAction( Action *action )
408 {
409         out << "      <action id=\"" << action->actionId << "\"";
410         if ( action->name != 0 ) 
411                 out << " name=\"" << action->name << "\"";
412         out << " line=\"" << action->loc.line << "\" col=\"" << action->loc.col << "\">";
413         writeInlineList( action->inlineList );
414         out << "</action>\n";
415 }
416
417 void xmlEscapeHost( std::ostream &out, char *data, long len )
418 {
419         char *end = data + len;
420         while ( data != end ) {
421                 switch ( *data ) {
422                 case '<': out << "&lt;"; break;
423                 case '>': out << "&gt;"; break;
424                 case '&': out << "&amp;"; break;
425                 default: out << *data; break;
426                 }
427                 data += 1;
428         }
429 }
430
431 void XMLCodeGen::writeStateActions( StateAp *state )
432 {
433         RedActionTable *toStateActions = 0;
434         if ( state->toStateActionTable.length() > 0 )
435                 toStateActions = actionTableMap.find( state->toStateActionTable );
436
437         RedActionTable *fromStateActions = 0;
438         if ( state->fromStateActionTable.length() > 0 )
439                 fromStateActions = actionTableMap.find( state->fromStateActionTable );
440
441         RedActionTable *eofActions = 0;
442         if ( state->eofActionTable.length() > 0 )
443                 eofActions = actionTableMap.find( state->eofActionTable );
444         
445         if ( toStateActions != 0 || fromStateActions != 0 || eofActions != 0 ) {
446                 out << "      <state_actions>";
447                 if ( toStateActions != 0 )
448                         out << toStateActions->id;
449                 else
450                         out << "x";
451
452                 if ( fromStateActions != 0 )
453                         out << " " << fromStateActions->id;
454                 else
455                         out << " x";
456
457                 if ( eofActions != 0 )
458                         out << " " << eofActions->id;
459                 else
460                         out << " x"; out << "</state_actions>\n";
461         }
462 }
463
464 void XMLCodeGen::writeStateConditions( StateAp *state )
465 {
466         if ( state->stateCondList.length() > 0 ) {
467                 out << "      <cond_list length=\"" << state->stateCondList.length() << "\">\n";
468                 for ( StateCondList::Iter scdi = state->stateCondList; scdi.lte(); scdi++ ) {
469                         out << "        <c>";
470                         writeKey( scdi->lowKey );
471                         out << " ";
472                         writeKey( scdi->highKey );
473                         out << " ";
474                         out << scdi->condSpace->condSpaceId;
475                         out << "</c>\n";
476                 }
477                 out << "      </cond_list>\n";
478         }
479 }
480
481 void XMLCodeGen::writeStateList()
482 {
483         /* Write the list of states. */
484         out << "    <state_list length=\"" << fsm->stateList.length() << "\">\n";
485         for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) {
486                 out << "      <state id=\"" << st->alg.stateNum << "\"";
487                 if ( st->isFinState() )
488                         out << " final=\"t\"";
489                 out << ">\n";
490
491                 writeStateActions( st );
492                 writeStateConditions( st );
493                 writeTransList( st );
494
495                 out << "      </state>\n";
496
497                 if ( !st.last() )
498                         out << "\n";
499         }
500         out << "    </state_list>\n";
501 }
502
503 bool XMLCodeGen::writeNameInst( NameInst *nameInst )
504 {
505         bool written = false;
506         if ( nameInst->parent != 0 )
507                 written = writeNameInst( nameInst->parent );
508         
509         if ( nameInst->name != 0 ) {
510                 if ( written )
511                         out << '_';
512                 out << nameInst->name;
513                 written = true;
514         }
515
516         return written;
517 }
518
519 void XMLCodeGen::writeEntryPoints()
520 {
521         /* List of entry points other than start state. */
522         if ( fsm->entryPoints.length() > 0 || pd->lmRequiresErrorState ) {
523                 out << "    <entry_points";
524                 if ( pd->lmRequiresErrorState )
525                         out << " error=\"t\"";
526                 out << ">\n";
527                 for ( EntryMap::Iter en = fsm->entryPoints; en.lte(); en++ ) {
528                         /* Get the name instantiation from nameIndex. */
529                         NameInst *nameInst = pd->nameIndex[en->key];
530                         StateAp *state = en->value;
531                         out << "      <entry name=\"";
532                         writeNameInst( nameInst );
533                         out << "\">" << state->alg.stateNum << "</entry>\n";
534                 }
535                 out << "    </entry_points>\n";
536         }
537 }
538
539 void XMLCodeGen::writeMachine()
540 {
541         /* Open the machine. */
542         out << "  <machine>\n"; 
543         
544         /* Action tables. */
545         reduceActionTables();
546
547         writeActionList();
548         writeActionTableList();
549         writeConditions();
550
551         /* Start state. */
552         out << "    <start_state>" << fsm->startState->alg.stateNum << 
553                         "</start_state>\n";
554         
555         /* Error state. */
556         if ( fsm->errState != 0 ) {
557                 out << "    <error_state>" << fsm->errState->alg.stateNum << 
558                         "</error_state>\n";
559         }
560
561         writeEntryPoints();
562         writeStateList();
563
564         out << "  </machine>\n";
565 }
566
567
568 void XMLCodeGen::writeConditions()
569 {
570         if ( condData->condSpaceMap.length() > 0 ) {
571                 long nextCondSpaceId = 0;
572                 for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ )
573                         cs->condSpaceId = nextCondSpaceId++;
574
575                 out << "    <cond_space_list length=\"" << condData->condSpaceMap.length() << "\">\n";
576                 for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ ) {
577                         out << "      <cond_space id=\"" << cs->condSpaceId << 
578                                 "\" length=\"" << cs->condSet.length() << "\">";
579                         writeKey( cs->baseKey );
580                         for ( CondSet::Iter csi = cs->condSet; csi.lte(); csi++ )
581                                 out << " " << (*csi)->actionId;
582                         out << "</cond_space>\n";
583                 }
584                 out << "    </cond_space_list>\n";
585         }
586 }
587
588 void XMLCodeGen::writeExports()
589 {
590         if ( pd->exportList.length() > 0 ) {
591                 out << "  <exports>\n";
592                 for ( ExportList::Iter exp = pd->exportList; exp.lte(); exp++ ) {
593                         out << "    <ex name=\"" << exp->name << "\">";
594                         writeKey( exp->key );
595                         out << "</ex>\n";
596                 }
597                 out << "  </exports>\n";
598         }
599 }
600
601 void XMLCodeGen::writeXML()
602 {
603         /* Open the definition. */
604         out << "<ragel_def name=\"" << fsmName << "\">\n";
605
606         /* Alphabet type. */
607         out << "  <alphtype>" << keyOps->alphType->internalName << "</alphtype>\n";
608         
609         /* Getkey expression. */
610         if ( pd->getKeyExpr != 0 ) {
611                 out << "  <getkey>";
612                 writeInlineList( pd->getKeyExpr );
613                 out << "</getkey>\n";
614         }
615
616         /* Access expression. */
617         if ( pd->accessExpr != 0 ) {
618                 out << "  <access>";
619                 writeInlineList( pd->accessExpr );
620                 out << "</access>\n";
621         }
622
623         /*
624          * Variable expressions.
625          */
626
627         if ( pd->pExpr != 0 ) {
628                 out << "  <p_expr>";
629                 writeInlineList( pd->pExpr );
630                 out << "</p_expr>\n";
631         }
632         
633         if ( pd->peExpr != 0 ) {
634                 out << "  <pe_expr>";
635                 writeInlineList( pd->peExpr );
636                 out << "</pe_expr>\n";
637         }
638         
639         if ( pd->csExpr != 0 ) {
640                 out << "  <cs_expr>";
641                 writeInlineList( pd->csExpr );
642                 out << "</cs_expr>\n";
643         }
644         
645         if ( pd->topExpr != 0 ) {
646                 out << "  <top_expr>";
647                 writeInlineList( pd->topExpr );
648                 out << "</top_expr>\n";
649         }
650         
651         if ( pd->stackExpr != 0 ) {
652                 out << "  <stack_expr>";
653                 writeInlineList( pd->stackExpr );
654                 out << "</stack_expr>\n";
655         }
656         
657         if ( pd->actExpr != 0 ) {
658                 out << "  <act_expr>";
659                 writeInlineList( pd->actExpr );
660                 out << "</act_expr>\n";
661         }
662         
663         if ( pd->tokstartExpr != 0 ) {
664                 out << "  <tokstart_expr>";
665                 writeInlineList( pd->tokstartExpr );
666                 out << "</tokstart_expr>\n";
667         }
668         
669         if ( pd->tokendExpr != 0 ) {
670                 out << "  <tokend_expr>";
671                 writeInlineList( pd->tokendExpr );
672                 out << "</tokend_expr>\n";
673         }
674         
675         if ( pd->dataExpr != 0 ) {
676                 out << "  <data_expr>";
677                 writeInlineList( pd->dataExpr );
678                 out << "</data_expr>\n";
679         }
680         
681         writeExports();
682         
683         writeMachine();
684
685         out <<
686                 "</ragel_def>\n";
687 }
688