http2 able to send test.html to nghttp2
[platform/upstream/libwebsockets.git] / lib / minilex.c
1 /*
2  * minilex.c
3  *
4  * High efficiency lexical state parser
5  *
6  * Copyright (C)2011-2014 Andy Green <andy@warmcat.com>
7  *
8  * Licensed under LGPL2
9  *
10  * Usage: gcc minilex.c -o minilex && ./minilex > lextable.h
11  *
12  * Run it twice to test parsing on the generated table on stderr
13  */
14
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18
19 #include "lextable-strings.h"
20
21 /*
22  * b7 = 0 = 1-byte seq
23  *          0x08 = fail
24  *          2-byte seq
25  *          0x00 - 0x07, then terminal as given in 2nd byte
26             3-byte seq
27  *          no match: go fwd 3 byte, match: jump fwd by amt in +1/+2 bytes
28  *    = 1 = 1-byte seq
29  *          no match: die, match go fwd 1 byte
30  */
31
32 unsigned char lextable[] = {
33         #include "lextable.h"
34 };
35
36 #define PARALLEL 30
37
38 struct state {
39         char c[PARALLEL];
40         int state[PARALLEL];
41         int count;
42         int bytepos;
43
44         int real_pos;
45 };
46
47 struct state state[1000];
48 int next = 1;
49
50 #define FAIL_CHAR 0x08
51
52 int lextable_decode(int pos, char c)
53 {
54
55         while (1) {
56                 if (lextable[pos] & (1 << 7)) { /* 1-byte, fail on mismatch */
57                         if ((lextable[pos] & 0x7f) != c)
58                                 return -1;
59                         /* fall thru */
60                         pos++;
61                         if (lextable[pos] == FAIL_CHAR)
62                                 return -1;
63                         return pos;
64                 } else { /* b7 = 0, end or 3-byte */
65                         if (lextable[pos] < FAIL_CHAR) /* terminal marker */
66                                 return pos;
67
68                         if (lextable[pos] == c) /* goto */
69                                 return pos + (lextable[pos + 1]) +
70                                                 (lextable[pos + 2] << 8);
71                         /* fall thru goto */
72                         pos += 3;
73                         /* continue */
74                 }
75         }
76 }
77
78 int main(void)
79 {
80         int n = 0;
81         int m = 0;
82         int prev;
83         char c;
84         int walk;
85         int saw;
86         int y;
87         int j;
88         int pos = 0;
89
90         while (n < sizeof(set) / sizeof(set[0])) {
91
92                 m = 0;
93                 walk = 0;
94                 prev = 0;
95
96                 if (set[n][0] == '\0') {
97                         n++;
98                         continue;
99                 }
100
101                 while (set[n][m]) {
102
103                         saw = 0;
104                         for (y = 0; y < state[walk].count; y++)
105                                 if (state[walk].c[y] == set[n][m]) {
106                                         /* exists -- go forward */
107                                         walk = state[walk].state[y];
108                                         saw = 1;
109                                         break;
110                                 }
111
112                         if (saw)
113                                 goto again;
114
115                         /* something we didn't see before */
116
117                         state[walk].c[state[walk].count] = set[n][m];
118
119                         state[walk].state[state[walk].count] = next;
120                         state[walk].count++;
121                         walk = next++;
122 again:
123                         m++;
124                 }
125
126                 state[walk].c[0] = n++;
127                 state[walk].state[0] = 0; /* terminal marker */
128                 state[walk].count = 1;
129         }
130
131         walk = 0;
132         for (n = 0; n < next; n++) {
133                 state[n].bytepos = walk;
134                 walk += (2 * state[n].count);
135         }
136
137         /* compute everyone's position first */
138
139         pos = 0;
140         walk = 0;
141         for (n = 0; n < next; n++) {
142
143                 state[n].real_pos = pos;
144
145                 for (m = 0; m < state[n].count; m++) {
146
147                         if (state[n].state[m] == 0)
148                                 pos += 2; /* terminal marker */
149                         else { /* c is a character */
150                                 if ((state[state[n].state[m]].bytepos -
151                                                                 walk) == 2)
152                                         pos++;
153                                 else {
154                                         pos += 3;
155                                         if (m == state[n].count - 1)
156                                                 pos++; /* fail */
157                                 }
158                         }
159                         walk += 2;
160                 }
161         }
162
163         walk = 0;
164         pos = 0;
165         for (n = 0; n < next; n++) {
166                 for (m = 0; m < state[n].count; m++) {
167
168                         if (!m)
169                                 fprintf(stdout, "/* pos %04x: %3d */ ",
170                                                           state[n].real_pos, n);
171                         else
172                                 fprintf(stdout, "                    ");
173
174                         y = state[n].c[m];
175                         saw = state[n].state[m];
176
177                         if (saw == 0) { // c is a terminal then
178
179                                 if (y > 0x7ff) {
180                                         fprintf(stderr, "terminal too big\n");
181                                         return 2;
182                                 }
183
184                                 fprintf(stdout, "   0x%02X, 0x%02X           "
185                                         "       "
186                                         "/* - terminal marker %2d - */,\n",
187                                                     y >> 8, y & 0xff, y & 0x7f);
188                                 pos += 2;
189                                 walk += 2;
190                                 continue;
191                         }
192
193                         /* c is a character */
194
195                         prev = y &0x7f;
196                         if (prev < 32 || prev > 126)
197                                 prev = '.';
198
199
200                         if ((state[saw].bytepos - walk) == 2) {
201                                 fprintf(stdout, "   0x%02X /* '%c' -> */,\n",
202                                                 y | 0x80, prev);
203                                 pos++;
204                                 walk += 2;
205                                 continue;
206                         }
207
208                         j = state[saw].real_pos - pos;
209
210                         if (j > 0xffff) {
211                                 fprintf(stderr,
212                                   "Jump > 64K bytes ahead (%d to %d)\n",
213                                         state[n].real_pos, state[saw].real_pos);
214                                 return 1;
215                         }
216                         fprintf(stdout, "   0x%02X /* '%c' */, 0x%02X, 0x%02X  "
217                                 "/* (to 0x%04X state %3d) */,\n",
218                                 y, prev,
219                                 j & 0xff, j >> 8,
220                                 state[saw].real_pos, saw);
221                         pos += 3;
222
223                         if (m == state[n].count - 1) {
224                                 fprintf(stdout,
225                                   "                       0x%02X, /* fail */\n",
226                                                                 FAIL_CHAR);
227                                 pos++; /* fail */
228                         }
229
230                         walk += 2;
231                 }
232         }
233
234         fprintf(stdout, "/* total size %d bytes */\n", pos);
235
236         /*
237          * Try to parse every legal input string
238          */
239
240         for (n = 0; n < sizeof(set) / sizeof(set[0]); n++) {
241                 walk = 0;
242                 m = 0;
243                 y = -1;
244
245                 if (set[n][0] == '\0')
246                         continue;
247
248                 fprintf(stderr, "  trying '%s'\n", set[n]);
249
250                 while (set[n][m]) {
251                         walk = lextable_decode(walk, set[n][m]);
252                         if (walk < 0) {
253                                 fprintf(stderr, "failed\n");
254                                 return 3;
255                         }
256
257                         if (lextable[walk] < FAIL_CHAR) {
258                                 y = (lextable[walk] << 8) + lextable[walk + 1];
259                                 break;
260                         }
261                         m++;
262                 }
263
264                 if (y != n) {
265                         fprintf(stderr, "decode failed %d\n", y);
266                         return 4;
267                 }
268         }
269
270         fprintf(stderr, "All decode OK\n");
271
272         return 0;
273 }