upgrade and improve storage efficiency of minilex
[platform/upstream/libwebsockets.git] / lib / minilex.c
1 /*
2  * minilex.c
3  *
4  * High efficiency lexical state parser
5  *
6  * Copyright (C)2011-2014 Andy Green <andy@warmcat.com>
7  *
8  * Licensed under LGPL2
9  *
10  * Usage: gcc minilex.c -o minilex && ./minilex > lextable.h
11  *
12  * Run it twice to test parsing on the generated table on stderr
13  */
14
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18
19 /* set of parsable strings -- ALL LOWER CASE */
20
21 const char *set[] = {
22         "get ",
23         "post ",
24         "host:",
25         "connection:",
26         "sec-websocket-key1:",
27         "sec-websocket-key2:",
28         "sec-websocket-protocol:",
29         "upgrade:",
30         "origin:",
31         "sec-websocket-draft:",
32         "\x0d\x0a",
33
34         "sec-websocket-key:",
35         "sec-websocket-version:",
36         "sec-websocket-origin:",
37
38         "sec-websocket-extensions:",
39
40         "sec-websocket-accept:",
41         "sec-websocket-nonce:",
42         "http/1.1 ",
43
44         "accept:",
45         "if-modified-since:",
46         "accept-encoding:",
47         "accept-language:",
48         "pragma:",
49         "cache-control:",
50         "authorization:",
51         "cookie:",
52         "content-length:",
53         "content-type:",
54         "date:",
55         "range:",
56         "referer:",
57         "", /* not matchable */
58
59 };
60
61 /*
62  * b7 = 0 = 1-byte seq
63  *          0x08 = fail
64  *          2-byte seq
65  *          0x00 - 0x07, then terminal as given in 2nd byte
66             3-byte seq
67  *          no match: go fwd 3 byte, match: jump fwd by amt in +1/+2 bytes
68  *    = 1 = 1-byte seq
69  *          no match: die, match go fwd 1 byte
70  */
71
72 unsigned char lextable[] = {
73         #include "lextable.h"
74 };
75
76 #define PARALLEL 30
77
78 struct state {
79         char c[PARALLEL];
80         int state[PARALLEL];
81         int count;
82         int bytepos;
83
84         int real_pos;
85 };
86
87 struct state state[1000];
88 int next = 1;
89
90 #define FAIL_CHAR 0x08
91
92 int lextable_decode(int pos, char c)
93 {
94
95         while (1) {
96                 if (lextable[pos] & (1 << 7)) { /* 1-byte, fail on mismatch */
97                         if ((lextable[pos] & 0x7f) != c)
98                                 return -1;
99                         /* fall thru */
100                         pos++;
101                         if (lextable[pos] == FAIL_CHAR)
102                                 return -1;
103                         return pos;
104                 } else { /* b7 = 0, end or 3-byte */
105                         if (lextable[pos] < FAIL_CHAR) /* terminal marker */
106                                 return pos;
107
108                         if (lextable[pos] == c) /* goto */
109                                 return pos + (lextable[pos + 1]) +
110                                                 (lextable[pos + 2] << 8);
111                         /* fall thru goto */
112                         pos += 3;
113                         /* continue */
114                 }
115         }
116 }
117
118 int main(void)
119 {
120         int n = 0;
121         int m = 0;
122         int prev;
123         char c;
124         int walk;
125         int saw;
126         int y;
127         int j;
128         int pos = 0;
129
130         while (n < sizeof(set) / sizeof(set[0])) {
131
132                 m = 0;
133                 walk = 0;
134                 prev = 0;
135
136                 if (set[n][0] == '\0') {
137                         n++;
138                         continue;
139                 }
140
141                 while (set[n][m]) {
142
143                         saw = 0;
144                         for (y = 0; y < state[walk].count; y++)
145                                 if (state[walk].c[y] == set[n][m]) {
146                                         /* exists -- go forward */
147                                         walk = state[walk].state[y];
148                                         saw = 1;
149                                         break;
150                                 }
151
152                         if (saw)
153                                 goto again;
154
155                         /* something we didn't see before */
156
157                         state[walk].c[state[walk].count] = set[n][m];
158
159                         state[walk].state[state[walk].count] = next;
160                         state[walk].count++;
161                         walk = next++;
162 again:
163                         m++;
164                 }
165
166                 state[walk].c[0] = n++;
167                 state[walk].state[0] = 0; /* terminal marker */
168                 state[walk].count = 1;
169         }
170
171         walk = 0;
172         for (n = 0; n < next; n++) {
173                 state[n].bytepos = walk;
174                 walk += (2 * state[n].count);
175         }
176
177         /* compute everyone's position first */
178
179         pos = 0;
180         walk = 0;
181         for (n = 0; n < next; n++) {
182
183                 state[n].real_pos = pos;
184
185                 for (m = 0; m < state[n].count; m++) {
186
187                         if (state[n].state[m] == 0)
188                                 pos += 2; /* terminal marker */
189                         else { /* c is a character */
190                                 if ((state[state[n].state[m]].bytepos -
191                                                                 walk) == 2)
192                                         pos++;
193                                 else {
194                                         pos += 3;
195                                         if (m == state[n].count - 1)
196                                                 pos++; /* fail */
197                                 }
198                         }
199                         walk += 2;
200                 }
201         }
202
203         walk = 0;
204         pos = 0;
205         for (n = 0; n < next; n++) {
206                 for (m = 0; m < state[n].count; m++) {
207
208                         if (!m)
209                                 fprintf(stdout, "/* pos %04x: %3d */ ",
210                                                           state[n].real_pos, n);
211                         else
212                                 fprintf(stdout, "                    ");
213
214                         y = state[n].c[m];
215                         saw = state[n].state[m];
216
217                         if (saw == 0) { // c is a terminal then
218
219                                 if (y > 0x7ff) {
220                                         fprintf(stderr, "terminal too big\n");
221                                         return 2;
222                                 }
223
224                                 fprintf(stdout, "   0x%02X, 0x%02X           "
225                                         "       "
226                                         "/* - terminal marker %2d - */,\n",
227                                                     y >> 8, y & 0xff, y & 0x7f);
228                                 pos += 2;
229                                 walk += 2;
230                                 continue;
231                         }
232
233                         /* c is a character */
234
235                         prev = y &0x7f;
236                         if (prev < 32 || prev > 126)
237                                 prev = '.';
238
239
240                         if ((state[saw].bytepos - walk) == 2) {
241                                 fprintf(stdout, "   0x%02X /* '%c' -> */,\n",
242                                                 y | 0x80, prev);
243                                 pos++;
244                                 walk += 2;
245                                 continue;
246                         }
247
248                         j = state[saw].real_pos - pos;
249
250                         if (j > 0xffff) {
251                                 fprintf(stderr,
252                                   "Jump > 64K bytes ahead (%d to %d)\n",
253                                         state[n].real_pos, state[saw].real_pos);
254                                 return 1;
255                         }
256                         fprintf(stdout, "   0x%02X /* '%c' */, 0x%02X, 0x%02X  "
257                                 "/* (to 0x%04X state %3d) */,\n",
258                                 y, prev,
259                                 j & 0xff, j >> 8,
260                                 state[saw].real_pos, saw);
261                         pos += 3;
262
263                         if (m == state[n].count - 1) {
264                                 fprintf(stdout,
265                                   "                       0x%02X, /* fail */\n",
266                                                                 FAIL_CHAR);
267                                 pos++; /* fail */
268                         }
269
270                         walk += 2;
271                 }
272         }
273
274         fprintf(stdout, "/* total size %d bytes */\n", pos);
275
276         /*
277          * Try to parse every legal input string
278          */
279
280         for (n = 0; n < sizeof(set) / sizeof(set[0]); n++) {
281                 walk = 0;
282                 m = 0;
283                 y = -1;
284
285                 if (set[n][0] == '\0')
286                         continue;
287
288                 fprintf(stderr, "  trying '%s'\n", set[n]);
289
290                 while (set[n][m]) {
291                         walk = lextable_decode(walk, set[n][m]);
292                         if (walk < 0) {
293                                 fprintf(stderr, "failed\n");
294                                 return 3;
295                         }
296
297                         if (lextable[walk] < FAIL_CHAR) {
298                                 y = (lextable[walk] << 8) + lextable[walk + 1];
299                                 break;
300                         }
301                         m++;
302                 }
303
304                 if (y != n) {
305                         fprintf(stderr, "decode failed %d\n", y);
306                         return 4;
307                 }
308         }
309
310         fprintf(stderr, "All decode OK\n");
311
312         return 0;
313 }