4 * High efficiency lexical state parser
6 * Copyright (C)2011-2013 Andy Green <andy@warmcat.com>
10 * Usage: gcc minilex.c -o minilex && ./minilex > lextable.h
12 * Run it twice to test parsing on the generated table on stderr
19 /* set of parsable strings -- ALL LOWER CASE */
25 "sec-websocket-key1:",
26 "sec-websocket-key2:",
27 "sec-websocket-protocol:",
30 "sec-websocket-draft:",
34 "sec-websocket-version:",
35 "sec-websocket-origin:",
37 "sec-websocket-extensions:",
39 "sec-websocket-accept:",
40 "sec-websocket-nonce:",
55 "", /* not matchable */
59 unsigned char lextable[] = {
72 struct state state[1000];
76 int lextable_decode(int pos, char c)
79 if (!lextable[pos + 1]) /* terminal marker */
82 if ((lextable[pos] & 0x7f) == c) /* goto */
83 return pos + (lextable[pos + 1] << 1);
85 if (lextable[pos] & 0x80) /* fail */
103 while (n < sizeof(set) / sizeof(set[0])) {
109 if (set[n][0] == '\0') {
117 for (y = 0; y < state[walk].count; y++)
118 if (state[walk].c[y] == set[n][m]) {
119 /* exists -- go forward */
120 walk = state[walk].state[y];
128 /* something we didn't see before */
130 state[walk].c[state[walk].count] = set[n][m];
132 state[walk].state[state[walk].count] = next;
139 state[walk].c[0] = n++;
140 state[walk].state[0] = 0; /* terminal marker */
141 state[walk].count = 1;
145 for (n = 0; n < next; n++) {
146 state[n].bytepos = walk;
147 walk += (2 * state[n].count);
151 for (n = 0; n < next; n++) {
152 for (m = 0; m < state[n].count; m++) {
155 fprintf(stdout, "/* pos %3d: state %3d */ ",
158 fprintf(stdout, " ");
161 saw = state[n].state[m];
163 if (m == state[n].count - 1)
164 y |= 0x80; /* last option */
166 if (saw == 0) // c is a terminal then
167 fprintf(stdout, " 0x%02X, 0x00 "
168 "/* - terminal marker %2d - */, \n",
170 else { /* c is a character and we need a byte delta */
171 if ((state[saw].bytepos - walk) / 2 > 0xff) {
173 "Tried to jump > 510 bytes ahead\n");
177 if (prev < 32 || prev > 126)
179 fprintf(stdout, " 0x%02X /* '%c' */, 0x%02X "
180 "/* (to pos %3d state %3d) */,\n",
182 (state[saw].bytepos - walk) / 2,
183 state[saw].bytepos, saw);
189 fprintf(stdout, "/* total size %d bytes */\n", walk);
192 * Test parser... real parser code is the same
195 for (n = 0; n < sizeof(set) / sizeof(set[0]); n++) {
199 if (set[n][0] == '\0')
202 fprintf(stderr, "Trying '%s'\n", set[n]);
205 walk = lextable_decode(walk, set[n][m]);
207 fprintf(stderr, "failed\n");
210 if (lextable[walk + 1] == 0) {
211 fprintf(stderr, "decode: %d\n",
212 lextable[walk] & 0x7f);