*
* High efficiency lexical state parser
*
- * Copyright (C)2011-2013 Andy Green <andy@warmcat.com>
+ * Copyright (C)2011-2014 Andy Green <andy@warmcat.com>
*
* Licensed under LGPL2
*
#include <stdlib.h>
#include <string.h>
-/* set of parsable strings -- ALL LOWER CASE */
-
-const char *set[] = {
- "get ",
- "post ",
- "host:",
- "connection:",
- "sec-websocket-key1:",
- "sec-websocket-key2:",
- "sec-websocket-protocol:",
- "upgrade:",
- "origin:",
- "sec-websocket-draft:",
- "\x0d\x0a",
-
- "sec-websocket-key:",
- "sec-websocket-version:",
- "sec-websocket-origin:",
-
- "sec-websocket-extensions:",
-
- "sec-websocket-accept:",
- "sec-websocket-nonce:",
- "http/1.1 ",
-
- "accept:",
- "if-modified-since:",
- "accept-encoding:",
- "accept-language:",
- "pragma:",
- "cache-control:",
- "authorization:",
- "cookie:",
- "content-length:",
- "content-type:",
- "date:",
- "range:",
- "referer:",
- "", /* not matchable */
+#include "lextable-strings.h"
-};
+/*
+ * b7 = 0 = 1-byte seq
+ * 0x08 = fail
+ * 2-byte seq
+ * 0x00 - 0x07, then terminal as given in 2nd byte
+ 3-byte seq
+ * no match: go fwd 3 byte, match: jump fwd by amt in +1/+2 bytes
+ * = 1 = 1-byte seq
+ * no match: die, match go fwd 1 byte
+ */
unsigned char lextable[] = {
#include "lextable.h"
int state[PARALLEL];
int count;
int bytepos;
+
+ int real_pos;
};
struct state state[1000];
int next = 1;
+#define FAIL_CHAR 0x08
int lextable_decode(int pos, char c)
{
while (1) {
- if (!lextable[pos + 1]) /* terminal marker */
+ if (lextable[pos] & (1 << 7)) { /* 1-byte, fail on mismatch */
+ if ((lextable[pos] & 0x7f) != c)
+ return -1;
+ /* fall thru */
+ pos++;
+ if (lextable[pos] == FAIL_CHAR)
+ return -1;
return pos;
-
- if ((lextable[pos] & 0x7f) == c) /* goto */
- return pos + (lextable[pos + 1] << 1);
-
- if (lextable[pos] & 0x80) /* fail */
- return -1;
-
- pos += 2;
+ } else { /* b7 = 0, end or 3-byte */
+ if (lextable[pos] < FAIL_CHAR) /* terminal marker */
+ return pos;
+
+ if (lextable[pos] == c) /* goto */
+ return pos + (lextable[pos + 1]) +
+ (lextable[pos + 2] << 8);
+ /* fall thru goto */
+ pos += 3;
+ /* continue */
+ }
}
}
-
int main(void)
{
int n = 0;
int walk;
int saw;
int y;
+ int j;
+ int pos = 0;
while (n < sizeof(set) / sizeof(set[0])) {
walk += (2 * state[n].count);
}
+ /* compute everyone's position first */
+
+ pos = 0;
walk = 0;
for (n = 0; n < next; n++) {
+
+ state[n].real_pos = pos;
+
+ for (m = 0; m < state[n].count; m++) {
+
+ if (state[n].state[m] == 0)
+ pos += 2; /* terminal marker */
+ else { /* c is a character */
+ if ((state[state[n].state[m]].bytepos -
+ walk) == 2)
+ pos++;
+ else {
+ pos += 3;
+ if (m == state[n].count - 1)
+ pos++; /* fail */
+ }
+ }
+ walk += 2;
+ }
+ }
+
+ walk = 0;
+ pos = 0;
+ for (n = 0; n < next; n++) {
for (m = 0; m < state[n].count; m++) {
if (!m)
- fprintf(stdout, "/* pos %3d: state %3d */ ",
- walk, n);
+ fprintf(stdout, "/* pos %04x: %3d */ ",
+ state[n].real_pos, n);
else
- fprintf(stdout, " ");
+ fprintf(stdout, " ");
y = state[n].c[m];
saw = state[n].state[m];
- if (m == state[n].count - 1)
- y |= 0x80; /* last option */
-
- if (saw == 0) // c is a terminal then
- fprintf(stdout, " 0x%02X, 0x00 "
- "/* - terminal marker %2d - */, \n",
- y, y - 0x80);
- else { /* c is a character and we need a byte delta */
- if ((state[saw].bytepos - walk) / 2 > 0xff) {
- fprintf(stdout,
- "Tried to jump > 510 bytes ahead\n");
- return 1;
+ if (saw == 0) { // c is a terminal then
+
+ if (y > 0x7ff) {
+ fprintf(stderr, "terminal too big\n");
+ return 2;
}
- prev = y &0x7f;
- if (prev < 32 || prev > 126)
- prev = '.';
- fprintf(stdout, " 0x%02X /* '%c' */, 0x%02X "
- "/* (to pos %3d state %3d) */,\n",
- y, prev,
- (state[saw].bytepos - walk) / 2,
- state[saw].bytepos, saw);
+
+ fprintf(stdout, " 0x%02X, 0x%02X "
+ " "
+ "/* - terminal marker %2d - */,\n",
+ y >> 8, y & 0xff, y & 0x7f);
+ pos += 2;
+ walk += 2;
+ continue;
+ }
+
+ /* c is a character */
+
+ prev = y &0x7f;
+ if (prev < 32 || prev > 126)
+ prev = '.';
+
+
+ if ((state[saw].bytepos - walk) == 2) {
+ fprintf(stdout, " 0x%02X /* '%c' -> */,\n",
+ y | 0x80, prev);
+ pos++;
+ walk += 2;
+ continue;
}
+
+ j = state[saw].real_pos - pos;
+
+ if (j > 0xffff) {
+ fprintf(stderr,
+ "Jump > 64K bytes ahead (%d to %d)\n",
+ state[n].real_pos, state[saw].real_pos);
+ return 1;
+ }
+ fprintf(stdout, " 0x%02X /* '%c' */, 0x%02X, 0x%02X "
+ "/* (to 0x%04X state %3d) */,\n",
+ y, prev,
+ j & 0xff, j >> 8,
+ state[saw].real_pos, saw);
+ pos += 3;
+
+ if (m == state[n].count - 1) {
+ fprintf(stdout,
+ " 0x%02X, /* fail */\n",
+ FAIL_CHAR);
+ pos++; /* fail */
+ }
+
walk += 2;
}
}
- fprintf(stdout, "/* total size %d bytes */\n", walk);
+ fprintf(stdout, "/* total size %d bytes */\n", pos);
/*
- * Test parser... real parser code is the same
+ * Try to parse every legal input string
*/
for (n = 0; n < sizeof(set) / sizeof(set[0]); n++) {
walk = 0;
m = 0;
+ y = -1;
if (set[n][0] == '\0')
continue;
- fprintf(stderr, "Trying '%s'\n", set[n]);
+ fprintf(stderr, " trying '%s'\n", set[n]);
while (set[n][m]) {
walk = lextable_decode(walk, set[n][m]);
if (walk < 0) {
fprintf(stderr, "failed\n");
- break;
+ return 3;
}
- if (lextable[walk + 1] == 0) {
- fprintf(stderr, "decode: %d\n",
- lextable[walk] & 0x7f);
+
+ if (lextable[walk] < FAIL_CHAR) {
+ y = (lextable[walk] << 8) + lextable[walk + 1];
break;
}
m++;
}
+
+ if (y != n) {
+ fprintf(stderr, "decode failed %d\n", y);
+ return 4;
+ }
}
+ fprintf(stderr, "All decode OK\n");
+
return 0;
}