4 * High efficiency lexical state parser
6 * Copyright (C)2011-2013 Andy Green <andy@warmcat.com>
10 * Usage: gcc minilex.c -o minilex && ./minilex > lextable.h
12 * Run it twice to test parsing on the generated table on stderr
19 /* set of parsable strings -- ALL LOWER CASE */
26 "sec-websocket-key1:",
27 "sec-websocket-key2:",
28 "sec-websocket-protocol:",
31 "sec-websocket-draft:",
35 "sec-websocket-version:",
36 "sec-websocket-origin:",
38 "sec-websocket-extensions:",
40 "sec-websocket-accept:",
41 "sec-websocket-nonce:",
57 "", /* not matchable */
61 unsigned char lextable[] = {
74 struct state state[1000];
78 int lextable_decode(int pos, char c)
81 if (!lextable[pos + 1]) /* terminal marker */
84 if ((lextable[pos] & 0x7f) == c) /* goto */
85 return pos + (lextable[pos + 1] << 1);
87 if (lextable[pos] & 0x80) /* fail */
105 while (n < sizeof(set) / sizeof(set[0])) {
111 if (set[n][0] == '\0') {
119 for (y = 0; y < state[walk].count; y++)
120 if (state[walk].c[y] == set[n][m]) {
121 /* exists -- go forward */
122 walk = state[walk].state[y];
130 /* something we didn't see before */
132 state[walk].c[state[walk].count] = set[n][m];
134 state[walk].state[state[walk].count] = next;
141 state[walk].c[0] = n++;
142 state[walk].state[0] = 0; /* terminal marker */
143 state[walk].count = 1;
147 for (n = 0; n < next; n++) {
148 state[n].bytepos = walk;
149 walk += (2 * state[n].count);
153 for (n = 0; n < next; n++) {
154 for (m = 0; m < state[n].count; m++) {
157 fprintf(stdout, "/* pos %3d: state %3d */ ",
160 fprintf(stdout, " ");
163 saw = state[n].state[m];
165 if (m == state[n].count - 1)
166 y |= 0x80; /* last option */
168 if (saw == 0) // c is a terminal then
169 fprintf(stdout, " 0x%02X, 0x00 "
170 "/* - terminal marker %2d - */, \n",
172 else { /* c is a character and we need a byte delta */
173 if ((state[saw].bytepos - walk) / 2 > 0xff) {
175 "Tried to jump > 510 bytes ahead\n");
179 if (prev < 32 || prev > 126)
181 fprintf(stdout, " 0x%02X /* '%c' */, 0x%02X "
182 "/* (to pos %3d state %3d) */,\n",
184 (state[saw].bytepos - walk) / 2,
185 state[saw].bytepos, saw);
191 fprintf(stdout, "/* total size %d bytes */\n", walk);
194 * Test parser... real parser code is the same
197 for (n = 0; n < sizeof(set) / sizeof(set[0]); n++) {
201 if (set[n][0] == '\0')
204 fprintf(stderr, "Trying '%s'\n", set[n]);
207 walk = lextable_decode(walk, set[n][m]);
209 fprintf(stderr, "failed\n");
212 if (lextable[walk + 1] == 0) {
213 fprintf(stderr, "decode: %d\n",
214 lextable[walk] & 0x7f);