shared: add minimal JSON tokenizer
authorLennart Poettering <lennart@poettering.net>
Mon, 15 Dec 2014 21:26:56 +0000 (22:26 +0100)
committerLennart Poettering <lennart@poettering.net>
Mon, 15 Dec 2014 21:27:15 +0000 (22:27 +0100)
.gitignore
Makefile.am
src/shared/json.c [new file with mode: 0644]
src/shared/json.h [new file with mode: 0644]
src/shared/utf8.c
src/shared/utf8.h
src/shared/xml.h
src/test/test-json.c [new file with mode: 0644]

index dbc56bc..bd9125d 100644 (file)
 /test-journal-stream
 /test-journal-syslog
 /test-journal-verify
+/test-json
 /test-libsystemd-sym*
 /test-libudev
 /test-libudev-sym*
index 84b587d..ab07d3b 100644 (file)
@@ -868,6 +868,8 @@ libsystemd_shared_la_SOURCES = \
        src/shared/audit.h \
        src/shared/xml.c \
        src/shared/xml.h \
+       src/shared/json.c \
+       src/shared/json.h \
        src/shared/bus-label.c \
        src/shared/bus-label.h \
        src/shared/gpt.h \
@@ -1366,6 +1368,7 @@ tests += \
        test-tables \
        test-device-nodes \
        test-xml \
+       test-json \
        test-architecture \
        test-socket-util \
        test-fdset \
@@ -1686,6 +1689,13 @@ test_xml_SOURCES = \
 test_xml_LDADD = \
        libsystemd-shared.la
 
+test_json_SOURCES = \
+       src/test/test-json.c
+
+test_json_LDADD = \
+       libsystemd-shared.la \
+       -lm
+
 test_list_SOURCES = \
        src/test/test-list.c
 
diff --git a/src/shared/json.c b/src/shared/json.c
new file mode 100644 (file)
index 0000000..f1495e9
--- /dev/null
@@ -0,0 +1,409 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2014 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <sys/types.h>
+#include <math.h>
+
+#include "macro.h"
+#include "log.h"
+#include "util.h"
+#include "utf8.h"
+#include "json.h"
+
+enum {
+        STATE_NULL,
+        STATE_VALUE,
+        STATE_VALUE_POST,
+};
+
+static void inc_lines(unsigned *line, const char *s, size_t n) {
+        const char *p = s;
+
+        if (!line)
+                return;
+
+        for (;;) {
+                const char *f;
+
+                f = memchr(p, '\n', n);
+                if (!f)
+                        return;
+
+                n -= (f - p) + 1;
+                p = f + 1;
+                (*line)++;
+        }
+}
+
+static int json_parse_string(const char **p, char **ret) {
+        _cleanup_free_ char *s = NULL;
+        size_t n = 0, allocated = 0;
+        const char *c;
+
+        assert(p);
+        assert(*p);
+        assert(ret);
+
+        c = *p;
+
+        if (*c != '"')
+                return -EINVAL;
+
+        c++;
+
+        for (;;) {
+                int len;
+
+                /* Check for EOF */
+                if (*c == 0)
+                        return -EINVAL;
+
+                /* Check for control characters 0x00..0x1f */
+                if (*c > 0 && *c < ' ')
+                        return -EINVAL;
+
+                /* Check for control character 0x7f */
+                if (*c == 0x7f)
+                        return -EINVAL;
+
+                if (*c == '"') {
+                        if (!s) {
+                                s = strdup("");
+                                if (!s)
+                                        return -ENOMEM;
+                        } else
+                                s[n] = 0;
+
+                        *p = c + 1;
+
+                        *ret = s;
+                        s = NULL;
+                        return JSON_STRING;
+                }
+
+                if (*c == '\\') {
+                        char ch = 0;
+                        c++;
+
+                        if (*c == 0)
+                                return -EINVAL;
+
+                        if (IN_SET(*c, '"', '\\', '/'))
+                                ch = *c;
+                        else if (*c == 'b')
+                                ch = '\b';
+                        else if (*c == 'f')
+                                ch = '\f';
+                        else if (*c == 'n')
+                                ch = '\n';
+                        else if (*c == 'r')
+                                ch = '\r';
+                        else if (*c == 't')
+                                ch = '\t';
+                        else if (*c == 'u') {
+                                int aa, bb, cc, dd;
+                                uint16_t x;
+
+                                aa = unhexchar(c[1]);
+                                if (aa < 0)
+                                        return -EINVAL;
+
+                                bb = unhexchar(c[2]);
+                                if (bb < 0)
+                                        return -EINVAL;
+
+                                cc = unhexchar(c[3]);
+                                if (cc < 0)
+                                        return -EINVAL;
+
+                                dd = unhexchar(c[4]);
+                                if (dd < 0)
+                                        return -EINVAL;
+
+
+                                x =     ((uint16_t) aa << 12) |
+                                        ((uint16_t) bb << 8) |
+                                        ((uint16_t) cc << 4) |
+                                        ((uint16_t) dd);
+
+                                if (x <= 0)
+                                        return -EINVAL;
+
+                                if (!GREEDY_REALLOC(s, allocated, n + 4))
+                                        return -ENOMEM;
+
+                                n += utf8_encode_unichar(x, s + n);
+                                c += 5;
+                                continue;
+                        } else
+                                return -EINVAL;
+
+                        if (!GREEDY_REALLOC(s, allocated, n + 2))
+                                return -ENOMEM;
+
+                        s[n++] = ch;
+                        c ++;
+                        continue;
+                }
+
+                len = utf8_encoded_valid_unichar(c);
+                if (len < 0)
+                        return len;
+
+                if (!GREEDY_REALLOC(s, allocated, n + len + 1))
+                        return -ENOMEM;
+
+                memcpy(s + n, c, len);
+                n += len;
+                c += len;
+        }
+}
+
+static int json_parse_number(const char **p, union json_value *ret) {
+        bool negative = false, exponent_negative = false, is_double = false;
+        double x = 0.0, y = 0.0, exponent = 0.0, shift = 1.0;
+        intmax_t i = 0;
+        const char *c;
+
+        assert(p);
+        assert(*p);
+        assert(ret);
+
+        c = *p;
+
+        if (*c == '-') {
+                negative = true;
+                c++;
+        }
+
+        if (*c == '0')
+                c++;
+        else {
+                if (!strchr("123456789", *c) || *c == 0)
+                        return -EINVAL;
+
+                do {
+                        if (!is_double) {
+                                int64_t t;
+
+                                t = 10 * i + (*c - '0');
+                                if (t < i) /* overflow */
+                                        is_double = false;
+                                else
+                                        i = t;
+                        }
+
+                        x = 10.0 * x + (*c - '0');
+                        c++;
+                } while (strchr("0123456789", *c) && *c != 0);
+        }
+
+        if (*c == '.') {
+                is_double = true;
+                c++;
+
+                if (!strchr("0123456789", *c) || *c == 0)
+                        return -EINVAL;
+
+                do {
+                        y = 10.0 * y + (*c - '0');
+                        shift = 10.0 * shift;
+                        c++;
+                } while (strchr("0123456789", *c) && *c != 0);
+        }
+
+        if (*c == 'e' || *c == 'E') {
+                is_double = true;
+                c++;
+
+                if (*c == '-') {
+                        exponent_negative = true;
+                        c++;
+                } else if (*c == '+')
+                        c++;
+
+                if (!strchr("0123456789", *c) || *c == 0)
+                        return -EINVAL;
+
+                do {
+                        exponent = 10.0 * exponent + (*c - '0');
+                        c++;
+                } while (strchr("0123456789", *c) && *c != 0);
+        }
+
+        if (*c != 0)
+                return -EINVAL;
+
+        *p = c;
+
+        if (is_double) {
+                ret->real = ((negative ? -1.0 : 1.0) * (x + (y / shift))) * exp10((exponent_negative ? -1.0 : 1.0) * exponent);
+                return JSON_REAL;
+        } else {
+                ret->integer = negative ? -i : i;
+                return JSON_INTEGER;
+        }
+}
+
+int json_tokenize(
+                const char **p,
+                char **ret_string,
+                union json_value *ret_value,
+                void **state,
+                unsigned *line) {
+
+        const char *c;
+        int t;
+        int r;
+
+        assert(p);
+        assert(*p);
+        assert(ret_string);
+        assert(ret_value);
+        assert(state);
+
+        t = PTR_TO_INT(*state);
+        c = *p;
+
+        if (t == STATE_NULL) {
+                if (line)
+                        *line = 1;
+                t = STATE_VALUE;
+        }
+
+        for (;;) {
+                const char *b;
+
+                b = c + strspn(c, WHITESPACE);
+                if (*b == 0)
+                        return JSON_END;
+
+                inc_lines(line, c, b - c);
+                c = b;
+
+                switch (t) {
+
+                case STATE_VALUE:
+
+                        if (*c == '{') {
+                                *ret_string = NULL;
+                                *ret_value = JSON_VALUE_NULL;
+                                *p = c + 1;
+                                *state = INT_TO_PTR(STATE_VALUE);
+                                return JSON_OBJECT_OPEN;
+
+                        } else if (*c == '}') {
+                                *ret_string = NULL;
+                                *ret_value = JSON_VALUE_NULL;
+                                *p = c + 1;
+                                *state = INT_TO_PTR(STATE_VALUE_POST);
+                                return JSON_OBJECT_CLOSE;
+
+                        } else if (*c == '[') {
+                                *ret_string = NULL;
+                                *ret_value = JSON_VALUE_NULL;
+                                *p = c + 1;
+                                *state = INT_TO_PTR(STATE_VALUE);
+                                return JSON_ARRAY_OPEN;
+
+                        } else if (*c == ']') {
+                                *ret_string = NULL;
+                                *ret_value = JSON_VALUE_NULL;
+                                *p = c + 1;
+                                *state = INT_TO_PTR(STATE_VALUE_POST);
+                                return JSON_ARRAY_CLOSE;
+
+                        } else if (*c == '"') {
+                                r = json_parse_string(&c, ret_string);
+                                if (r < 0)
+                                        return r;
+
+                                *ret_value = JSON_VALUE_NULL;
+                                *p = c;
+                                *state = INT_TO_PTR(STATE_VALUE_POST);
+                                return r;
+
+                        } else if (strchr("-0123456789", *c)) {
+                                r = json_parse_number(&c, ret_value);
+                                if (r < 0)
+                                        return r;
+
+                                *ret_string = NULL;
+                                *p = c;
+                                *state = INT_TO_PTR(STATE_VALUE_POST);
+                                return r;
+
+                        } else if (startswith(c, "true")) {
+                                *ret_string = NULL;
+                                ret_value->boolean = true;
+                                *p = c + 4;
+                                *state = INT_TO_PTR(STATE_VALUE_POST);
+                                return JSON_BOOLEAN;
+
+                        } else if (startswith(c, "false")) {
+                                *ret_string = NULL;
+                                ret_value->boolean = false;
+                                *p = c + 5;
+                                *state = INT_TO_PTR(STATE_VALUE_POST);
+                                return JSON_BOOLEAN;
+
+                        } else if (startswith(c, "null")) {
+                                *ret_string = NULL;
+                                *ret_value = JSON_VALUE_NULL;
+                                *p = c + 4;
+                                *state = INT_TO_PTR(STATE_VALUE_POST);
+                                return JSON_NULL;
+
+                        } else
+                                return -EINVAL;
+
+                case STATE_VALUE_POST:
+
+                        if (*c == ':') {
+                                *ret_string = NULL;
+                                *ret_value = JSON_VALUE_NULL;
+                                *p = c + 1;
+                                *state = INT_TO_PTR(STATE_VALUE);
+                                return JSON_COLON;
+                        } else if (*c == ',') {
+                                *ret_string = NULL;
+                                *ret_value = JSON_VALUE_NULL;
+                                *p = c + 1;
+                                *state = INT_TO_PTR(STATE_VALUE);
+                                return JSON_COMMA;
+                        } else if (*c == '}') {
+                                *ret_string = NULL;
+                                *ret_value = JSON_VALUE_NULL;
+                                *p = c + 1;
+                                *state = INT_TO_PTR(STATE_VALUE_POST);
+                                return JSON_OBJECT_CLOSE;
+                        } else if (*c == ']') {
+                                *ret_string = NULL;
+                                *ret_value = JSON_VALUE_NULL;
+                                *p = c + 1;
+                                *state = INT_TO_PTR(STATE_VALUE_POST);
+                                return JSON_ARRAY_CLOSE;
+                        } else
+                                return -EINVAL;
+                }
+
+        }
+}
diff --git a/src/shared/json.h b/src/shared/json.h
new file mode 100644 (file)
index 0000000..a845713
--- /dev/null
@@ -0,0 +1,50 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+  This file is part of systemd.
+
+  Copyright 2014 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stdbool.h>
+#include <inttypes.h>
+
+enum {
+        JSON_END,
+        JSON_COLON,
+        JSON_COMMA,
+        JSON_OBJECT_OPEN,
+        JSON_OBJECT_CLOSE,
+        JSON_ARRAY_OPEN,
+        JSON_ARRAY_CLOSE,
+        JSON_STRING,
+        JSON_REAL,
+        JSON_INTEGER,
+        JSON_BOOLEAN,
+        JSON_NULL,
+};
+
+union json_value {
+        bool boolean;
+        double real;
+        intmax_t integer;
+};
+
+#define JSON_VALUE_NULL ((union json_value) {})
+
+int json_tokenize(const char **p, char **ret_string, union json_value *ret_value, void **state, unsigned *line);
index 4469a73..67f6285 100644 (file)
@@ -263,39 +263,37 @@ char *ascii_is_valid(const char *str) {
         return (char*) str;
 }
 
+int utf8_encode_unichar(uint16_t c, char *p) {
+        uint8_t *t = (uint8_t*) p;
+        int d;
+
+        if (c < 0x80) {
+                t[0] = (uint8_t) c;
+                return 1;
+        } else if (c < 0x800) {
+                t[0] = (uint8_t) (0xc0 | (c >> 6));
+                t[1] = (uint8_t) (0x80 | (c & 0x3f));
+                return 2;
+        } else {
+                t[0] = (uint8_t) (0xe0 | (c >> 12));
+                t[1] = (uint8_t) (0x80 | ((c >> 6) & 0x3f));
+                t[2] = (uint8_t) (0x80 | (c & 0x3f));
+                return 3;
+        }
+}
+
 char *utf16_to_utf8(const void *s, size_t length) {
-        char *r;
         const uint8_t *f;
-        uint8_t *t;
+        char *r, *t;
 
         r = new(char, (length*3+1)/2 + 1);
         if (!r)
                 return NULL;
 
-        t = (uint8_t*) r;
-
-        for (f = s; f < (const uint8_t*) s + length; f += 2) {
-                uint16_t c;
-
-                c = (f[1] << 8) | f[0];
-
-                if (c == 0) {
-                        *t = 0;
-                        return r;
-                } else if (c < 0x80) {
-                        *(t++) = (uint8_t) c;
-                } else if (c < 0x800) {
-                        *(t++) = (uint8_t) (0xc0 | (c >> 6));
-                        *(t++) = (uint8_t) (0x80 | (c & 0x3f));
-                } else {
-                        *(t++) = (uint8_t) (0xe0 | (c >> 12));
-                        *(t++) = (uint8_t) (0x80 | ((c >> 6) & 0x3f));
-                        *(t++) = (uint8_t) (0x80 | (c & 0x3f));
-                }
-        }
+        for (f = s, t = r; f < (const uint8_t*) s + length; f += 2)
+                t += utf8_encode_unichar((f[1] << 8) | f[0], t);
 
         *t = 0;
-
         return r;
 }
 
index 59abee5..dcf8588 100644 (file)
@@ -36,6 +36,7 @@ bool utf8_is_printable_newline(const char* str, size_t length, bool newline) _pu
 char *utf8_escape_invalid(const char *s);
 char *utf8_escape_non_printable(const char *str);
 
+int utf8_encode_unichar(uint16_t c, char *p);
 char *utf16_to_utf8(const void *s, size_t length);
 
 int utf8_encoded_valid_unichar(const char *str);
index af71709..b256b0b 100644 (file)
@@ -28,7 +28,7 @@ enum {
         XML_TAG_CLOSE,
         XML_TAG_CLOSE_EMPTY,
         XML_ATTRIBUTE_NAME,
-        XML_ATTRIBUTE_VALUE
+        XML_ATTRIBUTE_VALUE,
 };
 
 int xml_tokenize(const char **p, char **name, void **state, unsigned *line);
diff --git a/src/test/test-json.c b/src/test/test-json.c
new file mode 100644 (file)
index 0000000..8777cf7
--- /dev/null
@@ -0,0 +1,101 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2014 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "log.h"
+#include "util.h"
+#include "json.h"
+
+static void test_one(const char *data, ...) {
+        void *state = NULL;
+        va_list ap;
+
+        va_start(ap, data);
+
+        for (;;) {
+                _cleanup_free_ char *str = NULL;
+                union json_value v = {};
+                int t, tt;
+
+                t = json_tokenize(&data, &str, &v, &state, NULL);
+                tt = va_arg(ap, int);
+
+                assert_se(t == tt);
+
+                if (t == JSON_END || t < 0)
+                        break;
+
+                else if (t == JSON_STRING) {
+                        const char *nn;
+
+                        nn = va_arg(ap, const char *);
+                        assert_se(streq_ptr(nn, str));
+
+                } else if (t == JSON_REAL) {
+                        double d;
+
+                        d = va_arg(ap, double);
+                        assert_se(abs(d - v.real) < 0.001);
+
+                } else if (t == JSON_INTEGER) {
+                        intmax_t i;
+
+                        i = va_arg(ap, intmax_t);
+                        assert_se(i == v.integer);
+
+                } else if (t == JSON_BOOLEAN) {
+                        bool b;
+
+                        b = va_arg(ap, int);
+                        assert_se(b == v.boolean);
+                }
+        }
+
+        va_end(ap);
+}
+
+int main(int argc, char *argv[]) {
+
+        test_one("x", -EINVAL);
+        test_one("", JSON_END);
+        test_one(" ", JSON_END);
+        test_one("0", JSON_INTEGER, (intmax_t) 0, JSON_END);
+        test_one("1234", JSON_INTEGER, (intmax_t) 1234, JSON_END);
+        test_one("3.141", JSON_REAL, 3.141, JSON_END);
+        test_one("0.0", JSON_REAL, 0.0, JSON_END);
+        test_one("7e3", JSON_REAL, 7e3, JSON_END);
+        test_one("-7e-3", JSON_REAL, -7e-3, JSON_END);
+        test_one("true", JSON_BOOLEAN, true, JSON_END);
+        test_one("false", JSON_BOOLEAN, false, JSON_END);
+        test_one("null", JSON_NULL, JSON_END);
+        test_one("{}", JSON_OBJECT_OPEN, JSON_OBJECT_CLOSE, JSON_END);
+        test_one("\t {\n} \n", JSON_OBJECT_OPEN, JSON_OBJECT_CLOSE, JSON_END);
+        test_one("[]", JSON_ARRAY_OPEN, JSON_ARRAY_CLOSE, JSON_END);
+        test_one("\t [] \n\n", JSON_ARRAY_OPEN, JSON_ARRAY_CLOSE, JSON_END);
+        test_one("\"\"", JSON_STRING, "", JSON_END);
+        test_one("\"foo\"", JSON_STRING, "foo", JSON_END);
+        test_one("\"foo\\nfoo\"", JSON_STRING, "foo\nfoo", JSON_END);
+        test_one("{\"foo\" : \"bar\"}", JSON_OBJECT_OPEN, JSON_STRING, "foo", JSON_COLON, JSON_STRING, "bar", JSON_OBJECT_CLOSE, JSON_END);
+        test_one("{\"foo\" : [true, false]}", JSON_OBJECT_OPEN, JSON_STRING, "foo", JSON_COLON, JSON_ARRAY_OPEN, JSON_BOOLEAN, true, JSON_COMMA, JSON_BOOLEAN, false, JSON_ARRAY_CLOSE, JSON_OBJECT_CLOSE, JSON_END);
+        test_one("\"\xef\xbf\xbd\"", JSON_STRING, "\xef\xbf\xbd", JSON_END);
+        test_one("\"\\ufffd\"", JSON_STRING, "\xef\xbf\xbd", JSON_END);
+
+        return 0;
+}