Added new HTTP headers and WSI tokens:
[platform/upstream/libwebsockets.git] / lib / minilex.c
1 /*
2  * minilex.c
3  *
4  * High efficiency lexical state parser
5  *
6  * Copyright (C)2011-2014 Andy Green <andy@warmcat.com>
7  *
8  * Licensed under LGPL2
9  *
10  * Usage: gcc minilex.c -o minilex && ./minilex > lextable.h
11  *
12  * Run it twice to test parsing on the generated table on stderr
13  */
14
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18
19 /* set of parsable strings -- ALL LOWER CASE */
20
21 const char *set[] = {
22         "get ",
23         "post ",
24         "host:",
25         "connection:",
26         "sec-websocket-key1:",
27         "sec-websocket-key2:",
28         "sec-websocket-protocol:",
29         "upgrade:",
30         "origin:",
31         "sec-websocket-draft:",
32         "\x0d\x0a",
33
34         "sec-websocket-key:",
35         "sec-websocket-version:",
36         "sec-websocket-origin:",
37
38         "sec-websocket-extensions:",
39
40         "sec-websocket-accept:",
41         "sec-websocket-nonce:",
42         "http/1.1 ",
43
44         "accept:",
45         "access-control-request-headers:",
46         "if-modified-since:",
47         "if-none-match:"
48         "accept-encoding:",
49         "accept-language:",
50         "pragma:",
51         "cache-control:",
52         "authorization:",
53         "cookie:",
54         "content-length:",
55         "content-type:",
56         "date:",
57         "range:",
58         "referer:",
59         "", /* not matchable */
60
61 };
62
63 /*
64  * b7 = 0 = 1-byte seq
65  *          0x08 = fail
66  *          2-byte seq
67  *          0x00 - 0x07, then terminal as given in 2nd byte
68             3-byte seq
69  *          no match: go fwd 3 byte, match: jump fwd by amt in +1/+2 bytes
70  *    = 1 = 1-byte seq
71  *          no match: die, match go fwd 1 byte
72  */
73
74 unsigned char lextable[] = {
75         #include "lextable.h"
76 };
77
78 #define PARALLEL 30
79
80 struct state {
81         char c[PARALLEL];
82         int state[PARALLEL];
83         int count;
84         int bytepos;
85
86         int real_pos;
87 };
88
89 struct state state[1000];
90 int next = 1;
91
92 #define FAIL_CHAR 0x08
93
94 int lextable_decode(int pos, char c)
95 {
96
97         while (1) {
98                 if (lextable[pos] & (1 << 7)) { /* 1-byte, fail on mismatch */
99                         if ((lextable[pos] & 0x7f) != c)
100                                 return -1;
101                         /* fall thru */
102                         pos++;
103                         if (lextable[pos] == FAIL_CHAR)
104                                 return -1;
105                         return pos;
106                 } else { /* b7 = 0, end or 3-byte */
107                         if (lextable[pos] < FAIL_CHAR) /* terminal marker */
108                                 return pos;
109
110                         if (lextable[pos] == c) /* goto */
111                                 return pos + (lextable[pos + 1]) +
112                                                 (lextable[pos + 2] << 8);
113                         /* fall thru goto */
114                         pos += 3;
115                         /* continue */
116                 }
117         }
118 }
119
120 int main(void)
121 {
122         int n = 0;
123         int m = 0;
124         int prev;
125         char c;
126         int walk;
127         int saw;
128         int y;
129         int j;
130         int pos = 0;
131
132         while (n < sizeof(set) / sizeof(set[0])) {
133
134                 m = 0;
135                 walk = 0;
136                 prev = 0;
137
138                 if (set[n][0] == '\0') {
139                         n++;
140                         continue;
141                 }
142
143                 while (set[n][m]) {
144
145                         saw = 0;
146                         for (y = 0; y < state[walk].count; y++)
147                                 if (state[walk].c[y] == set[n][m]) {
148                                         /* exists -- go forward */
149                                         walk = state[walk].state[y];
150                                         saw = 1;
151                                         break;
152                                 }
153
154                         if (saw)
155                                 goto again;
156
157                         /* something we didn't see before */
158
159                         state[walk].c[state[walk].count] = set[n][m];
160
161                         state[walk].state[state[walk].count] = next;
162                         state[walk].count++;
163                         walk = next++;
164 again:
165                         m++;
166                 }
167
168                 state[walk].c[0] = n++;
169                 state[walk].state[0] = 0; /* terminal marker */
170                 state[walk].count = 1;
171         }
172
173         walk = 0;
174         for (n = 0; n < next; n++) {
175                 state[n].bytepos = walk;
176                 walk += (2 * state[n].count);
177         }
178
179         /* compute everyone's position first */
180
181         pos = 0;
182         walk = 0;
183         for (n = 0; n < next; n++) {
184
185                 state[n].real_pos = pos;
186
187                 for (m = 0; m < state[n].count; m++) {
188
189                         if (state[n].state[m] == 0)
190                                 pos += 2; /* terminal marker */
191                         else { /* c is a character */
192                                 if ((state[state[n].state[m]].bytepos -
193                                                                 walk) == 2)
194                                         pos++;
195                                 else {
196                                         pos += 3;
197                                         if (m == state[n].count - 1)
198                                                 pos++; /* fail */
199                                 }
200                         }
201                         walk += 2;
202                 }
203         }
204
205         walk = 0;
206         pos = 0;
207         for (n = 0; n < next; n++) {
208                 for (m = 0; m < state[n].count; m++) {
209
210                         if (!m)
211                                 fprintf(stdout, "/* pos %04x: %3d */ ",
212                                                           state[n].real_pos, n);
213                         else
214                                 fprintf(stdout, "                    ");
215
216                         y = state[n].c[m];
217                         saw = state[n].state[m];
218
219                         if (saw == 0) { // c is a terminal then
220
221                                 if (y > 0x7ff) {
222                                         fprintf(stderr, "terminal too big\n");
223                                         return 2;
224                                 }
225
226                                 fprintf(stdout, "   0x%02X, 0x%02X           "
227                                         "       "
228                                         "/* - terminal marker %2d - */,\n",
229                                                     y >> 8, y & 0xff, y & 0x7f);
230                                 pos += 2;
231                                 walk += 2;
232                                 continue;
233                         }
234
235                         /* c is a character */
236
237                         prev = y &0x7f;
238                         if (prev < 32 || prev > 126)
239                                 prev = '.';
240
241
242                         if ((state[saw].bytepos - walk) == 2) {
243                                 fprintf(stdout, "   0x%02X /* '%c' -> */,\n",
244                                                 y | 0x80, prev);
245                                 pos++;
246                                 walk += 2;
247                                 continue;
248                         }
249
250                         j = state[saw].real_pos - pos;
251
252                         if (j > 0xffff) {
253                                 fprintf(stderr,
254                                   "Jump > 64K bytes ahead (%d to %d)\n",
255                                         state[n].real_pos, state[saw].real_pos);
256                                 return 1;
257                         }
258                         fprintf(stdout, "   0x%02X /* '%c' */, 0x%02X, 0x%02X  "
259                                 "/* (to 0x%04X state %3d) */,\n",
260                                 y, prev,
261                                 j & 0xff, j >> 8,
262                                 state[saw].real_pos, saw);
263                         pos += 3;
264
265                         if (m == state[n].count - 1) {
266                                 fprintf(stdout,
267                                   "                       0x%02X, /* fail */\n",
268                                                                 FAIL_CHAR);
269                                 pos++; /* fail */
270                         }
271
272                         walk += 2;
273                 }
274         }
275
276         fprintf(stdout, "/* total size %d bytes */\n", pos);
277
278         /*
279          * Try to parse every legal input string
280          */
281
282         for (n = 0; n < sizeof(set) / sizeof(set[0]); n++) {
283                 walk = 0;
284                 m = 0;
285                 y = -1;
286
287                 if (set[n][0] == '\0')
288                         continue;
289
290                 fprintf(stderr, "  trying '%s'\n", set[n]);
291
292                 while (set[n][m]) {
293                         walk = lextable_decode(walk, set[n][m]);
294                         if (walk < 0) {
295                                 fprintf(stderr, "failed\n");
296                                 return 3;
297                         }
298
299                         if (lextable[walk] < FAIL_CHAR) {
300                                 y = (lextable[walk] << 8) + lextable[walk + 1];
301                                 break;
302                         }
303                         m++;
304                 }
305
306                 if (y != n) {
307                         fprintf(stderr, "decode failed %d\n", y);
308                         return 4;
309                 }
310         }
311
312         fprintf(stderr, "All decode OK\n");
313
314         return 0;
315 }