http2 add hpack decode support
[platform/upstream/libwebsockets.git] / lib / minilex.c
1 /*
2  * minilex.c
3  *
4  * High efficiency lexical state parser
5  *
6  * Copyright (C)2011-2014 Andy Green <andy@warmcat.com>
7  *
8  * Licensed under LGPL2
9  *
10  * Usage: gcc minilex.c -o minilex && ./minilex > lextable.h
11  *
12  * Run it twice to test parsing on the generated table on stderr
13  */
14
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18
19 /* set of parsable strings -- ALL LOWER CASE */
20
21 const char *set[] = {
22         "get ",
23         "post ",
24         "options ",
25         "host:",
26         "connection:",
27         "upgrade:",
28         "origin:",
29         "sec-websocket-draft:",
30         "\x0d\x0a",
31
32         "sec-websocket-extensions:",
33         "sec-websocket-key1:",
34         "sec-websocket-key2:",
35         "sec-websocket-protocol:",
36
37         "sec-websocket-accept:",
38         "sec-websocket-nonce:",
39         "http/1.1 ",
40         "http2-settings:",
41
42         "accept:",
43         "access-control-request-headers:",
44         "if-modified-since:",
45         "if-none-match:",
46         "accept-encoding:",
47         "accept-language:",
48         "pragma:",
49         "cache-control:",
50         "authorization:",
51         "cookie:",
52         "content-length:",
53         "content-type:",
54         "date:",
55         "range:",
56         "referer:",
57         "sec-websocket-key:",
58         "sec-websocket-version:",
59         "sec-websocket-origin:",
60         
61         ":authority:",
62         ":method:",
63         ":path:",
64         ":scheme:",
65         ":status:",
66         
67         "accept-charset:",
68         "accept-ranges:",
69         "access-control-allow-origin:",
70         "age:",
71         "allow:",
72         "content-disposition:",
73         "content-language:",
74         "content-location:",
75         "content-range:",
76         "etag:",
77         "expect:",
78         "expires:",
79         "from:",
80         "if-match:",
81         "if-range:",
82         "if-unmodified-since:",
83         "last-modified:",
84         "link:",
85         "location:",
86         "max-forwards:",
87         "proxy-authenticate:",
88         "proxy-authorization:",
89         "refresh:",
90         "retry-after:",
91         "server:",
92         "set-cookie:",
93         "strict-transport-security:",
94         "transfer-encoding:",
95         "user-agent:",
96         "vary:",
97         "via:",
98         "www-authenticate:",
99
100         "", /* not matchable */
101
102 };
103
104 /*
105  * b7 = 0 = 1-byte seq
106  *          0x08 = fail
107  *          2-byte seq
108  *          0x00 - 0x07, then terminal as given in 2nd byte
109             3-byte seq
110  *          no match: go fwd 3 byte, match: jump fwd by amt in +1/+2 bytes
111  *    = 1 = 1-byte seq
112  *          no match: die, match go fwd 1 byte
113  */
114
115 unsigned char lextable[] = {
116         #include "lextable.h"
117 };
118
119 #define PARALLEL 30
120
121 struct state {
122         char c[PARALLEL];
123         int state[PARALLEL];
124         int count;
125         int bytepos;
126
127         int real_pos;
128 };
129
130 struct state state[1000];
131 int next = 1;
132
133 #define FAIL_CHAR 0x08
134
135 int lextable_decode(int pos, char c)
136 {
137
138         while (1) {
139                 if (lextable[pos] & (1 << 7)) { /* 1-byte, fail on mismatch */
140                         if ((lextable[pos] & 0x7f) != c)
141                                 return -1;
142                         /* fall thru */
143                         pos++;
144                         if (lextable[pos] == FAIL_CHAR)
145                                 return -1;
146                         return pos;
147                 } else { /* b7 = 0, end or 3-byte */
148                         if (lextable[pos] < FAIL_CHAR) /* terminal marker */
149                                 return pos;
150
151                         if (lextable[pos] == c) /* goto */
152                                 return pos + (lextable[pos + 1]) +
153                                                 (lextable[pos + 2] << 8);
154                         /* fall thru goto */
155                         pos += 3;
156                         /* continue */
157                 }
158         }
159 }
160
161 int main(void)
162 {
163         int n = 0;
164         int m = 0;
165         int prev;
166         char c;
167         int walk;
168         int saw;
169         int y;
170         int j;
171         int pos = 0;
172
173         while (n < sizeof(set) / sizeof(set[0])) {
174
175                 m = 0;
176                 walk = 0;
177                 prev = 0;
178
179                 if (set[n][0] == '\0') {
180                         n++;
181                         continue;
182                 }
183
184                 while (set[n][m]) {
185
186                         saw = 0;
187                         for (y = 0; y < state[walk].count; y++)
188                                 if (state[walk].c[y] == set[n][m]) {
189                                         /* exists -- go forward */
190                                         walk = state[walk].state[y];
191                                         saw = 1;
192                                         break;
193                                 }
194
195                         if (saw)
196                                 goto again;
197
198                         /* something we didn't see before */
199
200                         state[walk].c[state[walk].count] = set[n][m];
201
202                         state[walk].state[state[walk].count] = next;
203                         state[walk].count++;
204                         walk = next++;
205 again:
206                         m++;
207                 }
208
209                 state[walk].c[0] = n++;
210                 state[walk].state[0] = 0; /* terminal marker */
211                 state[walk].count = 1;
212         }
213
214         walk = 0;
215         for (n = 0; n < next; n++) {
216                 state[n].bytepos = walk;
217                 walk += (2 * state[n].count);
218         }
219
220         /* compute everyone's position first */
221
222         pos = 0;
223         walk = 0;
224         for (n = 0; n < next; n++) {
225
226                 state[n].real_pos = pos;
227
228                 for (m = 0; m < state[n].count; m++) {
229
230                         if (state[n].state[m] == 0)
231                                 pos += 2; /* terminal marker */
232                         else { /* c is a character */
233                                 if ((state[state[n].state[m]].bytepos -
234                                                                 walk) == 2)
235                                         pos++;
236                                 else {
237                                         pos += 3;
238                                         if (m == state[n].count - 1)
239                                                 pos++; /* fail */
240                                 }
241                         }
242                         walk += 2;
243                 }
244         }
245
246         walk = 0;
247         pos = 0;
248         for (n = 0; n < next; n++) {
249                 for (m = 0; m < state[n].count; m++) {
250
251                         if (!m)
252                                 fprintf(stdout, "/* pos %04x: %3d */ ",
253                                                           state[n].real_pos, n);
254                         else
255                                 fprintf(stdout, "                    ");
256
257                         y = state[n].c[m];
258                         saw = state[n].state[m];
259
260                         if (saw == 0) { // c is a terminal then
261
262                                 if (y > 0x7ff) {
263                                         fprintf(stderr, "terminal too big\n");
264                                         return 2;
265                                 }
266
267                                 fprintf(stdout, "   0x%02X, 0x%02X           "
268                                         "       "
269                                         "/* - terminal marker %2d - */,\n",
270                                                     y >> 8, y & 0xff, y & 0x7f);
271                                 pos += 2;
272                                 walk += 2;
273                                 continue;
274                         }
275
276                         /* c is a character */
277
278                         prev = y &0x7f;
279                         if (prev < 32 || prev > 126)
280                                 prev = '.';
281
282
283                         if ((state[saw].bytepos - walk) == 2) {
284                                 fprintf(stdout, "   0x%02X /* '%c' -> */,\n",
285                                                 y | 0x80, prev);
286                                 pos++;
287                                 walk += 2;
288                                 continue;
289                         }
290
291                         j = state[saw].real_pos - pos;
292
293                         if (j > 0xffff) {
294                                 fprintf(stderr,
295                                   "Jump > 64K bytes ahead (%d to %d)\n",
296                                         state[n].real_pos, state[saw].real_pos);
297                                 return 1;
298                         }
299                         fprintf(stdout, "   0x%02X /* '%c' */, 0x%02X, 0x%02X  "
300                                 "/* (to 0x%04X state %3d) */,\n",
301                                 y, prev,
302                                 j & 0xff, j >> 8,
303                                 state[saw].real_pos, saw);
304                         pos += 3;
305
306                         if (m == state[n].count - 1) {
307                                 fprintf(stdout,
308                                   "                       0x%02X, /* fail */\n",
309                                                                 FAIL_CHAR);
310                                 pos++; /* fail */
311                         }
312
313                         walk += 2;
314                 }
315         }
316
317         fprintf(stdout, "/* total size %d bytes */\n", pos);
318
319         /*
320          * Try to parse every legal input string
321          */
322
323         for (n = 0; n < sizeof(set) / sizeof(set[0]); n++) {
324                 walk = 0;
325                 m = 0;
326                 y = -1;
327
328                 if (set[n][0] == '\0')
329                         continue;
330
331                 fprintf(stderr, "  trying '%s'\n", set[n]);
332
333                 while (set[n][m]) {
334                         walk = lextable_decode(walk, set[n][m]);
335                         if (walk < 0) {
336                                 fprintf(stderr, "failed\n");
337                                 return 3;
338                         }
339
340                         if (lextable[walk] < FAIL_CHAR) {
341                                 y = (lextable[walk] << 8) + lextable[walk + 1];
342                                 break;
343                         }
344                         m++;
345                 }
346
347                 if (y != n) {
348                         fprintf(stderr, "decode failed %d\n", y);
349                         return 4;
350                 }
351         }
352
353         fprintf(stderr, "All decode OK\n");
354
355         return 0;
356 }