2 * Lightweight Embedded JSON Parser
4 * Copyright (C) 2013 Andy Green <andy@warmcat.com>
5 * This code is licensed under LGPL 2.1
6 * http://www.gnu.org/licenses/lgpl-2.1.html
15 * lejp_construct - prepare a struct lejp_ctx for use
17 * \param ctx: pointer to your struct lejp_ctx
18 * \param callback: your user callback which will received parsed tokens
19 * \param user: optional user data pointer untouched by lejp
20 * \param paths: your array of name elements you are interested in
21 * \param count_paths: ARRAY_SIZE() of @paths
23 * Prepares your context struct for use with lejp
27 lejp_construct(struct lejp_ctx *ctx,
28 char (*callback)(struct lejp_ctx *ctx, char reason), void *user,
29 const char * const *paths, unsigned char count_paths)
40 ctx->callback = callback;
43 ctx->count_paths = count_paths;
45 ctx->callback(ctx, LEJPCB_CONSTRUCTED);
49 * lejp_destruct - retire a previously constructed struct lejp_ctx
51 * \param ctx: pointer to your struct lejp_ctx
53 * lejp does not perform any allocations, but since your user code might, this
54 * provides a one-time LEJPCB_DESTRUCTED callback at destruction time where
55 * you can clean up in your callback.
59 lejp_destruct(struct lejp_ctx *ctx)
61 /* no allocations... just let callback know what it happening */
62 ctx->callback(ctx, LEJPCB_DESTRUCTED);
66 * lejp_change_callback - switch to a different callback from now on
68 * \param ctx: pointer to your struct lejp_ctx
69 * \param callback: your user callback which will received parsed tokens
71 * This tells the old callback it was destroyed, in case you want to take any
72 * action because that callback "lost focus", then changes to the new
73 * callback and tells it first that it was constructed, and then started.
75 * Changing callback is a cheap and powerful trick to split out handlers
76 * according to information earlier in the parse. For example you may have
77 * a JSON pair "schema" whose value defines what can be expected for the rest
78 * of the JSON. Rather than having one huge callback for all cases, you can
79 * have an initial one looking for "schema" which then calls
80 * lejp_change_callback() to a handler specific for the schema.
82 * Notice that afterwards, you need to construct the context again anyway to
83 * parse another JSON object, and the callback is reset then to the main,
84 * schema-interpreting one. The construction action is very lightweight.
88 lejp_change_callback(struct lejp_ctx *ctx,
89 char (*callback)(struct lejp_ctx *ctx, char reason))
91 ctx->callback(ctx, LEJPCB_DESTRUCTED);
92 ctx->callback = callback;
93 ctx->callback(ctx, LEJPCB_CONSTRUCTED);
94 ctx->callback(ctx, LEJPCB_START);
98 lejp_check_path_match(struct lejp_ctx *ctx)
103 /* we only need to check if a match is not active */
104 for (n = 0; !ctx->path_match && n < ctx->count_paths; n++) {
116 ctx->wild[ctx->wildcount++] = p - ctx->path;
119 * if * has something after it, match to .
120 * if ends with *, eat everything.
121 * This implies match sequences must be ordered like
124 * if both options are possible
126 while (*p && (*p != '.' || !*q))
132 ctx->path_match = n + 1;
133 ctx->path_match_len = ctx->ppos;
137 if (!ctx->path_match)
142 lejp_get_wildcard(struct lejp_ctx *ctx, int wildcard, char *dest, int len)
146 if (wildcard >= ctx->wildcount || !len)
149 n = ctx->wild[wildcard];
151 while (--len && n < ctx->ppos && (n == ctx->wild[wildcard] || ctx->path[n] != '.'))
152 *dest++ = ctx->path[n++];
157 return n - ctx->wild[wildcard];
161 * lejp_parse - interpret some more incoming data incrementally
163 * \param ctx: previously constructed parsing context
164 * \param json: char buffer with the new data to interpret
165 * \param len: amount of data in the buffer
167 * Because lejp is a stream parser, it incrementally parses as new data
168 * becomes available, maintaining all state in the context struct. So an
169 * incomplete JSON is a normal situation, getting you a LEJP_CONTINUE
170 * return, signalling there's no error but to call again with more data when
171 * it comes to complete the parsing. Successful parsing completes with a
172 * 0 or positive integer indicating how much of the last input buffer was
177 lejp_parse(struct lejp_ctx *ctx, const unsigned char *json, int len)
179 unsigned char c, n, s, ret = LEJP_REJECT_UNKNOWN;
180 static const char esc_char[] = "\"\\/bfnrt";
181 static const char esc_tran[] = "\"\\/\b\f\n\r\t";
182 static const char tokens[] = "rue alse ull ";
184 if (!ctx->sp && !ctx->ppos)
185 ctx->callback(ctx, LEJPCB_START);
190 s = ctx->st[ctx->sp].s;
192 /* skip whitespace unless we should care */
193 if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '#') {
196 ctx->st[ctx->sp].s &= ~LEJP_FLAG_WS_COMMENTLINE;
198 if (!(s & LEJP_FLAG_WS_KEEP)) {
200 ctx->st[ctx->sp].s |=
201 LEJP_FLAG_WS_COMMENTLINE;
206 if (ctx->st[ctx->sp].s & LEJP_FLAG_WS_COMMENTLINE)
212 ret = LEJP_REJECT_IDLE_NO_BRACE;
215 if (ctx->callback(ctx, LEJPCB_OBJECT_START)) {
216 ret = LEJP_REJECT_CALLBACK;
219 ctx->st[ctx->sp].s = LEJP_MEMBERS;
223 ctx->st[ctx->sp].s = LEJP_IDLE;
224 ret = LEJP_REJECT_MEMBERS_NO_CLOSE;
227 ctx->st[ctx->sp].s = LEJP_M_P;
231 ret = LEJP_REJECT_MP_NO_OPEN_QUOTE;
235 ctx->st[ctx->sp].s = LEJP_MP_DELIM;
237 goto add_stack_level;
242 ret = LEJP_REJECT_MP_STRING_UNDERRUN;
245 if (ctx->st[ctx->sp - 1].s != LEJP_MP_DELIM) {
246 ctx->buf[ctx->npos] = '\0';
247 if (ctx->callback(ctx,
248 LEJPCB_VAL_STR_END) < 0) {
249 ret = LEJP_REJECT_CALLBACK;
258 ctx->st[ctx->sp].s = LEJP_MP_STRING_ESC;
261 if (c < ' ') {/* "control characters" not allowed */
262 ret = LEJP_REJECT_MP_ILLEGAL_CTRL;
265 goto emit_string_char;
267 case LEJP_MP_STRING_ESC:
269 ctx->st[ctx->sp].s = LEJP_MP_STRING_ESC_U1;
273 for (n = 0; n < sizeof(esc_char); n++) {
274 if (c != esc_char[n])
278 ctx->st[ctx->sp].s = LEJP_MP_STRING;
279 goto emit_string_char;
281 ret = LEJP_REJECT_MP_STRING_ESC_ILLEGAL_ESC;
282 /* illegal escape char */
285 case LEJP_MP_STRING_ESC_U1:
286 case LEJP_MP_STRING_ESC_U2:
287 case LEJP_MP_STRING_ESC_U3:
288 case LEJP_MP_STRING_ESC_U4:
290 if (c >= '0' && c <= '9')
293 if (c >= 'a' && c <= 'f')
294 ctx->uni = c - 'a' + 10;
296 if (c >= 'A' && c <= 'F')
297 ctx->uni = c - 'A' + 10;
299 ret = LEJP_REJECT_ILLEGAL_HEX;
302 ctx->st[ctx->sp].s++;
304 case LEJP_MP_STRING_ESC_U2:
308 * 0x08-0xff (0x0800 - 0xffff)
311 c = 0xe0 | ((ctx->uni >> 4) & 0xf);
312 goto emit_string_char;
314 case LEJP_MP_STRING_ESC_U3:
315 if (ctx->uni >= 0x080) {
317 * 0x080 - 0xfff (0x0800 - 0xffff)
321 c = 0x80 | ((ctx->uni >> 2) & 0x3f);
322 goto emit_string_char;
324 if (ctx->uni < 0x008)
327 * 0x008 - 0x7f (0x0080 - 0x07ff)
330 c = 0xc0 | (ctx->uni >> 2);
331 goto emit_string_char;
333 case LEJP_MP_STRING_ESC_U4:
334 if (ctx->uni >= 0x0080)
335 /* end of 2 or 3-byte seq */
336 c = 0x80 | (ctx->uni & 0x3f);
339 c = (unsigned char)ctx->uni;
341 ctx->st[ctx->sp].s = LEJP_MP_STRING;
342 goto emit_string_char;
350 ret = LEJP_REJECT_MP_DELIM_MISSING_COLON;
353 ctx->st[ctx->sp].s = LEJP_MP_VALUE;
354 ctx->path[ctx->ppos] = '\0';
356 lejp_check_path_match(ctx);
357 if (ctx->callback(ctx, LEJPCB_PAIR_NAME)) {
358 ret = LEJP_REJECT_CALLBACK;
364 if (c >= '0' && c <= '9') {
368 ctx->st[ctx->sp].s = LEJP_MP_VALUE_NUM_INT;
374 ctx->st[ctx->sp].s = LEJP_MP_COMMA_OR_END;
378 if (ctx->callback(ctx, LEJPCB_VAL_STR_START)) {
379 ret = LEJP_REJECT_CALLBACK;
382 goto add_stack_level;
386 ctx->st[ctx->sp].s = LEJP_MP_COMMA_OR_END;
388 lejp_check_path_match(ctx);
389 if (ctx->callback(ctx, LEJPCB_OBJECT_START)) {
390 ret = LEJP_REJECT_CALLBACK;
394 goto add_stack_level;
398 ctx->st[ctx->sp].s = LEJP_MP_ARRAY_END;
400 ctx->path[ctx->ppos++] = '[';
401 ctx->path[ctx->ppos++] = ']';
402 ctx->path[ctx->ppos] = '\0';
403 if (ctx->callback(ctx, LEJPCB_ARRAY_START)) {
404 ret = LEJP_REJECT_CALLBACK;
407 ctx->i[ctx->ipos++] = 0;
408 if (ctx->ipos > ARRAY_SIZE(ctx->i)) {
409 ret = LEJP_REJECT_MP_DELIM_ISTACK;
412 goto add_stack_level;
416 ctx->st[ctx->sp].s = LEJP_MP_VALUE_TOK;
421 ctx->st[ctx->sp].s = LEJP_MP_VALUE_TOK;
426 ctx->st[ctx->sp].s = LEJP_MP_VALUE_TOK;
429 ret = LEJP_REJECT_MP_DELIM_BAD_VALUE_START;
434 case LEJP_MP_VALUE_NUM_INT:
435 if (!ctx->npos && c == '-') {
436 ctx->f |= LEJP_SEEN_MINUS;
440 if (ctx->dcount < 10 && c >= '0' && c <= '9') {
441 if (ctx->f & LEJP_SEEN_POINT)
442 ctx->f |= LEJP_SEEN_POST_POINT;
447 if (ctx->dcount || (ctx->f & LEJP_SEEN_POINT)) {
448 ret = LEJP_REJECT_MP_VAL_NUM_FORMAT;
451 ctx->f |= LEJP_SEEN_POINT;
455 * before exponent, if we had . we must have had at
456 * least one more digit
459 (LEJP_SEEN_POINT | LEJP_SEEN_POST_POINT)) ==
461 ret = LEJP_REJECT_MP_VAL_NUM_INT_NO_FRAC;
464 if (c == 'e' || c == 'E') {
465 if (ctx->f & LEJP_SEEN_EXP) {
466 ret = LEJP_REJECT_MP_VAL_NUM_FORMAT;
469 ctx->f |= LEJP_SEEN_EXP;
470 ctx->st[ctx->sp].s = LEJP_MP_VALUE_NUM_EXP;
473 /* if none of the above, did we even have a number? */
475 ret = LEJP_REJECT_MP_VAL_NUM_FORMAT;
479 ctx->buf[ctx->npos] = '\0';
480 if (ctx->f & LEJP_SEEN_POINT) {
481 if (ctx->callback(ctx, LEJPCB_VAL_NUM_FLOAT)) {
482 ret = LEJP_REJECT_CALLBACK;
486 if (ctx->callback(ctx, LEJPCB_VAL_NUM_INT)) {
487 ret = LEJP_REJECT_CALLBACK;
492 /* then this is the post-number character, loop */
493 ctx->st[ctx->sp].s = LEJP_MP_COMMA_OR_END;
496 case LEJP_MP_VALUE_NUM_EXP:
497 ctx->st[ctx->sp].s = LEJP_MP_VALUE_NUM_INT;
498 if (c >= '0' && c <= '9')
500 if (c == '+' || c == '-')
502 ret = LEJP_REJECT_MP_VAL_NUM_EXP_BAD_EXP;
505 case LEJP_MP_VALUE_TOK: /* true, false, null */
506 if (c != tokens[ctx->uni]) {
507 ret = LEJP_REJECT_MP_VAL_TOK_UNKNOWN;
511 if (tokens[ctx->uni] != ' ')
517 if (ctx->callback(ctx, LEJPCB_VAL_TRUE)) {
518 ret = LEJP_REJECT_CALLBACK;
525 if (ctx->callback(ctx, LEJPCB_VAL_FALSE)) {
526 ret = LEJP_REJECT_CALLBACK;
532 if (ctx->callback(ctx, LEJPCB_VAL_NULL)) {
533 ret = LEJP_REJECT_CALLBACK;
538 ctx->st[ctx->sp].s = LEJP_MP_COMMA_OR_END;
541 case LEJP_MP_COMMA_OR_END:
542 ctx->path[ctx->ppos] = '\0';
544 /* increment this stack level's index */
545 ctx->st[ctx->sp].s = LEJP_M_P;
549 * since we came back to root level,
550 * no path can still match
555 ctx->ppos = ctx->st[ctx->sp - 1].p;
556 ctx->path[ctx->ppos] = '\0';
557 if (ctx->path_match &&
558 ctx->ppos <= ctx->path_match_len)
560 * we shrank the path to be
561 * smaller than the matching point
565 if (ctx->st[ctx->sp - 1].s != LEJP_MP_ARRAY_END)
567 /* top level is definitely an array... */
569 ctx->i[ctx->ipos - 1]++;
570 ctx->st[ctx->sp].s = LEJP_MP_VALUE;
575 ret = LEJP_REJECT_MP_C_OR_E_UNDERF;
580 if (ctx->st[ctx->sp].s != LEJP_MP_ARRAY_END) {
581 ret = LEJP_REJECT_MP_C_OR_E_NOTARRAY;
584 /* drop the path [n] bit */
585 ctx->ppos = ctx->st[ctx->sp - 1].p;
586 ctx->ipos = ctx->st[ctx->sp - 1].i;
587 ctx->path[ctx->ppos] = '\0';
588 if (ctx->path_match &&
589 ctx->ppos <= ctx->path_match_len)
591 * we shrank the path to be
592 * smaller than the matching point
596 /* do LEJP_MP_ARRAY_END processing */
601 lejp_check_path_match(ctx);
602 if (ctx->callback(ctx, LEJPCB_OBJECT_END)) {
603 ret = LEJP_REJECT_CALLBACK;
606 ctx->callback(ctx, LEJPCB_COMPLETE);
607 /* done, return unused amount */
612 ctx->ppos = ctx->st[ctx->sp - 1].p;
613 ctx->ipos = ctx->st[ctx->sp - 1].i;
614 ctx->path[ctx->ppos] = '\0';
615 if (ctx->path_match &&
616 ctx->ppos <= ctx->path_match_len)
618 * we shrank the path to be
619 * smaller than the matching point
622 lejp_check_path_match(ctx);
623 if (ctx->callback(ctx, LEJPCB_OBJECT_END)) {
624 ret = LEJP_REJECT_CALLBACK;
630 ret = LEJP_REJECT_MP_C_OR_E_NEITHER;
633 case LEJP_MP_ARRAY_END:
634 ctx->path[ctx->ppos] = '\0';
636 /* increment this stack level's index */
638 ctx->i[ctx->ipos - 1]++;
639 ctx->st[ctx->sp].s = LEJP_MP_VALUE;
641 ctx->ppos = ctx->st[ctx->sp - 1].p;
642 ctx->path[ctx->ppos] = '\0';
646 ret = LEJP_REJECT_MP_ARRAY_END_MISSING;
650 ctx->st[ctx->sp].s = LEJP_MP_COMMA_OR_END;
651 ctx->callback(ctx, LEJPCB_ARRAY_END);
658 if (!ctx->sp || ctx->st[ctx->sp - 1].s != LEJP_MP_DELIM) {
659 /* assemble the string value into chunks */
660 ctx->buf[ctx->npos++] = c;
661 if (ctx->npos == sizeof(ctx->buf) - 1) {
662 if (ctx->callback(ctx, LEJPCB_VAL_STR_CHUNK)) {
663 ret = LEJP_REJECT_CALLBACK;
670 /* name part of name:value pair */
671 ctx->path[ctx->ppos++] = c;
675 /* push on to the object stack */
676 if (ctx->ppos && ctx->st[ctx->sp].s != LEJP_MP_COMMA_OR_END &&
677 ctx->st[ctx->sp].s != LEJP_MP_ARRAY_END)
678 ctx->path[ctx->ppos++] = '.';
680 ctx->st[ctx->sp].p = ctx->ppos;
681 ctx->st[ctx->sp].i = ctx->ipos;
682 if (++ctx->sp == ARRAY_SIZE(ctx->st)) {
683 ret = LEJP_REJECT_STACK_OVERFLOW;
686 ctx->path[ctx->ppos] = '\0';
687 ctx->st[ctx->sp].s = c;
688 ctx->st[ctx->sp].b = 0;
692 if (ctx->npos >= sizeof(ctx->buf)) {
693 ret = LEJP_REJECT_NUM_TOO_LONG;
696 ctx->buf[ctx->npos++] = c;
704 return LEJP_CONTINUE;
707 ctx->callback(ctx, LEJPCB_FAILED);