Initialize Tizen 2.3
[external/ragel.git] / test / rlscan.rl
1 /*
2  * Lexes Ragel input files.
3  *
4  * @LANG: c++
5  *
6  * Test works with split code gen.
7  */
8
9 #include <iostream>
10 #include <stdlib.h>
11 #include <stdio.h>
12 #include <string.h>
13
14 using namespace std;
15
16 void escapeXML( const char *data )
17 {
18         while ( *data != 0 ) {
19                 switch ( *data ) {
20                         case '<': cout << "&lt;"; break;
21                         case '>': cout << "&gt;"; break;
22                         case '&': cout << "&amp;"; break;
23                         default: cout << *data; break;
24                 }
25                 data += 1;
26         }
27 }
28
29 void escapeXML( char c )
30 {
31         switch ( c ) {
32                 case '<': cout << "&lt;"; break;
33                 case '>': cout << "&gt;"; break;
34                 case '&': cout << "&amp;"; break;
35                 default: cout << c; break;
36         }
37 }
38
39 void escapeXML( const char *data, int len )
40 {
41         for ( const char *end = data + len; data != end; data++  ) {
42                 switch ( *data ) {
43                         case '<': cout << "&lt;"; break;
44                         case '>': cout << "&gt;"; break;
45                         case '&': cout << "&amp;"; break;
46                         default: cout << *data; break;
47                 }
48         }
49 }
50
51 inline void write( const char *data )
52 {
53         cout << data;
54 }
55
56 inline void write( char c )
57 {
58         cout << c;
59 }
60
61 inline void write( const char *data, int len )
62 {
63         cout.write( data, len );
64 }
65
66
67 %%{
68         machine RagelScan;
69
70         word = [a-zA-Z_][a-zA-Z_0-9]*;
71         integer = [0-9]+;
72         hex = '0x' [0-9a-fA-F] [0-9a-fA-F]*;
73
74         default = ^0;
75         EOF = 0;
76
77         # Handles comments in outside code and inline blocks.
78         c_comment := 
79                 ( default* :>> '*/' )
80                 ${ escapeXML( fc ); }
81                 @{ fret; };
82
83         action emit {
84                 escapeXML( ts, te-ts );
85         }
86
87         #
88         # Inline action code
89         #
90
91         ilscan := |*
92
93                 "'" ( [^'\\] | /\\./ )* "'" => emit;
94                 '"' ( [^"\\] | /\\./ )* '"' => emit;
95                 '/*' {
96                         write( "/*" );
97                         fcall c_comment;
98                 };
99                 '//' [^\n]* '\n' => emit;
100
101                 '{' {
102                         write( '{' );
103                         inline_depth += 1; 
104                 };
105
106                 '}' {
107                         write( '}' );
108                         /* If dropping down to the last } then return 
109                          * to ragel code. */
110                         if ( --inline_depth == 0 ) {
111                                 write( "</inline>\n" );
112                                 fgoto rlscan;
113                         }
114                 };
115
116                 default => { escapeXML( *ts ); };
117         *|;
118
119         #
120         # Ragel Tokens
121         #
122
123         rlscan := |*
124                 '}%%' {
125                         if ( !single_line ) {
126                                 write( "</section>\n" );
127                                 fgoto main;
128                         }
129                 };
130
131                 '\n' {
132                         if ( single_line ) {
133                                 write( "</section>\n" );
134                                 fgoto main;
135                         }
136                 };
137
138                 # Word
139                 word {
140                         write( "<word>" );
141                         write( ts, te-ts );
142                         write( "</word>\n" );
143                 };
144
145                 # Decimal integer.
146                 integer {
147                         write( "<int>" );
148                         write( ts, te-ts );
149                         write( "</int>\n" );
150                 };
151
152                 # Hexidecimal integer.
153                 hex {
154                         write( "<hex>" );
155                         write( ts, te-ts );
156                         write( "</hex>\n" );
157                 };
158
159                 # Consume comments.
160                 '#' [^\n]* '\n';
161
162                 # Single literal string.
163                 "'" ( [^'\\] | /\\./ )* "'" {
164                         write( "<single_lit>" );
165                         escapeXML( ts, te-ts );
166                         write( "</single_lit>\n" );
167                 };
168
169                 # Double literal string.
170                 '"' ( [^"\\] | /\\./ )* '"' {
171                         write( "<double_lit>" );
172                         escapeXML( ts, te-ts );
173                         write( "</double_lit>\n" );
174                 };
175
176                 # Or literal.
177                 '[' ( [^\]\\] | /\\./ )* ']' {
178                         write( "<or_lit>" );
179                         escapeXML( ts, te-ts );
180                         write( "</or_lit>\n" );
181                 };
182
183                 # Regex Literal.
184                 '/' ( [^/\\] | /\\./ ) * '/' {
185                         write( "<re_lit>" );
186                         escapeXML( ts, te-ts );
187                         write( "</re_lit>\n" );
188                 };
189
190                 # Open an inline block
191                 '{' {
192                         inline_depth = 1;
193                         write( "<inline>{" );
194                         fgoto ilscan;
195                 };
196
197                 punct {
198                         write( "<symbol>" );
199                         escapeXML( fc );
200                         write( "</symbol>\n" );
201                 };
202                 
203                 default;
204         *|;
205
206         #
207         # Outside code.
208         #
209
210         main := |*
211
212                 "'" ( [^'\\] | /\\./ )* "'" => emit;
213                 '"' ( [^"\\] | /\\./ )* '"' => emit;
214
215                 '/*' {
216                         escapeXML( ts, te-ts );
217                         fcall c_comment;
218                 };
219
220                 '//' [^\n]* '\n' => emit;
221
222                 '%%{' { 
223                         write( "<section>\n" );
224                         single_line = false;
225                         fgoto rlscan;
226                 };
227
228                 '%%' {
229                         write( "<section>\n" ); 
230                         single_line = true; 
231                         fgoto rlscan;
232                 };
233
234                 default { 
235                         escapeXML( *ts );
236                 };
237
238                 # EOF.
239                 EOF;
240         *|;
241 }%%
242
243 %% write data nofinal;
244
245 void test( const char *data )
246 {
247         std::ios::sync_with_stdio(false);
248
249         int cs, act;
250         const char *ts, *te;
251         int stack[1], top;
252
253         bool single_line = false;
254         int inline_depth = 0;
255
256         %% write init;
257
258         /* Read in a block. */
259         const char *p = data;
260         const char *pe = data + strlen( data );
261         const char *eof = pe;
262         %% write exec;
263
264         if ( cs == RagelScan_error ) {
265                 /* Machine failed before finding a token. */
266                 cerr << "PARSE ERROR" << endl;
267                 exit(1);
268         }
269 }
270
271 #define BUFSIZE 2048
272
273 int main()
274 {
275         std::ios::sync_with_stdio(false);
276
277         test("hi %%{ /'}%%'/ { /*{*/ {} } + '\\'' }%%there\n");
278
279         return 0;
280 }
281 #ifdef _____OUTPUT_____
282 hi <section>
283 <re_lit>/'}%%'/</re_lit>
284 <inline>{ /*{*/ {} }</inline>
285 <symbol>+</symbol>
286 <single_lit>'\''</single_lit>
287 </section>
288 there
289 #endif