2 * Copyright (c) 2009-2021, Google LLC
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Google LLC nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "upb/json_decode.h"
39 #include "upb/encode.h"
40 #include "upb/reflection.h"
42 /* Special header, must be included last. */
43 #include "upb/port_def.inc"
46 const char *ptr, *end;
47 upb_arena *arena; /* TODO: should we have a tmp arena for tmp data? */
48 const upb_symtab *any_pool;
53 const char *line_begin;
56 const upb_fielddef *debug_field;
59 enum { JD_OBJECT, JD_ARRAY, JD_STRING, JD_NUMBER, JD_TRUE, JD_FALSE, JD_NULL };
61 /* Forward declarations of mutually-recursive functions. */
62 static void jsondec_wellknown(jsondec *d, upb_msg *msg, const upb_msgdef *m);
63 static upb_msgval jsondec_value(jsondec *d, const upb_fielddef *f);
64 static void jsondec_wellknownvalue(jsondec *d, upb_msg *msg,
66 static void jsondec_object(jsondec *d, upb_msg *msg, const upb_msgdef *m);
68 static bool jsondec_streql(upb_strview str, const char *lit) {
69 return str.size == strlen(lit) && memcmp(str.data, lit, str.size) == 0;
72 static bool jsondec_isnullvalue(const upb_fielddef *f) {
73 return upb_fielddef_type(f) == UPB_TYPE_ENUM &&
74 strcmp(upb_enumdef_fullname(upb_fielddef_enumsubdef(f)),
75 "google.protobuf.NullValue") == 0;
78 static bool jsondec_isvalue(const upb_fielddef *f) {
79 return (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
80 upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(f)) ==
81 UPB_WELLKNOWN_VALUE) ||
82 jsondec_isnullvalue(f);
85 UPB_NORETURN static void jsondec_err(jsondec *d, const char *msg) {
86 upb_status_seterrf(d->status, "Error parsing JSON @%d:%d: %s", d->line,
87 (int)(d->ptr - d->line_begin), msg);
88 UPB_LONGJMP(d->err, 1);
92 UPB_NORETURN static void jsondec_errf(jsondec *d, const char *fmt, ...) {
94 upb_status_seterrf(d->status, "Error parsing JSON @%d:%d: ", d->line,
95 (int)(d->ptr - d->line_begin));
97 upb_status_vappenderrf(d->status, fmt, argp);
99 UPB_LONGJMP(d->err, 1);
102 static void jsondec_skipws(jsondec *d) {
103 while (d->ptr != d->end) {
107 d->line_begin = d->ptr;
118 jsondec_err(d, "Unexpected EOF");
121 static bool jsondec_tryparsech(jsondec *d, char ch) {
122 if (d->ptr == d->end || *d->ptr != ch) return false;
127 static void jsondec_parselit(jsondec *d, const char *lit) {
128 size_t avail = d->end - d->ptr;
129 size_t len = strlen(lit);
130 if (avail < len || memcmp(d->ptr, lit, len) != 0) {
131 jsondec_errf(d, "Expected: '%s'", lit);
136 static void jsondec_wsch(jsondec *d, char ch) {
138 if (!jsondec_tryparsech(d, ch)) {
139 jsondec_errf(d, "Expected: '%c'", ch);
143 static void jsondec_true(jsondec *d) { jsondec_parselit(d, "true"); }
144 static void jsondec_false(jsondec *d) { jsondec_parselit(d, "false"); }
145 static void jsondec_null(jsondec *d) { jsondec_parselit(d, "null"); }
147 static void jsondec_entrysep(jsondec *d) {
149 jsondec_parselit(d, ":");
152 static int jsondec_rawpeek(jsondec *d) {
179 jsondec_errf(d, "Unexpected character: '%c'", *d->ptr);
183 /* JSON object/array **********************************************************/
185 /* These are used like so:
187 * jsondec_objstart(d);
188 * while (jsondec_objnext(d)) {
191 * jsondec_objend(d) */
193 static int jsondec_peek(jsondec *d) {
195 return jsondec_rawpeek(d);
198 static void jsondec_push(jsondec *d) {
199 if (--d->depth < 0) {
200 jsondec_err(d, "Recursion limit exceeded");
205 static bool jsondec_seqnext(jsondec *d, char end_ch) {
206 bool is_first = d->is_first;
209 if (*d->ptr == end_ch) return false;
210 if (!is_first) jsondec_parselit(d, ",");
214 static void jsondec_arrstart(jsondec *d) {
216 jsondec_wsch(d, '[');
219 static void jsondec_arrend(jsondec *d) {
221 jsondec_wsch(d, ']');
224 static bool jsondec_arrnext(jsondec *d) {
225 return jsondec_seqnext(d, ']');
228 static void jsondec_objstart(jsondec *d) {
230 jsondec_wsch(d, '{');
233 static void jsondec_objend(jsondec *d) {
235 jsondec_wsch(d, '}');
238 static bool jsondec_objnext(jsondec *d) {
239 if (!jsondec_seqnext(d, '}')) return false;
240 if (jsondec_peek(d) != JD_STRING) {
241 jsondec_err(d, "Object must start with string");
246 /* JSON number ****************************************************************/
248 static bool jsondec_tryskipdigits(jsondec *d) {
249 const char *start = d->ptr;
251 while (d->ptr < d->end) {
252 if (*d->ptr < '0' || *d->ptr > '9') {
258 return d->ptr != start;
261 static void jsondec_skipdigits(jsondec *d) {
262 if (!jsondec_tryskipdigits(d)) {
263 jsondec_err(d, "Expected one or more digits");
267 static double jsondec_number(jsondec *d) {
268 const char *start = d->ptr;
270 assert(jsondec_rawpeek(d) == JD_NUMBER);
272 /* Skip over the syntax of a number, as specified by JSON. */
273 if (*d->ptr == '-') d->ptr++;
275 if (jsondec_tryparsech(d, '0')) {
276 if (jsondec_tryskipdigits(d)) {
277 jsondec_err(d, "number cannot have leading zero");
280 jsondec_skipdigits(d);
283 if (d->ptr == d->end) goto parse;
284 if (jsondec_tryparsech(d, '.')) {
285 jsondec_skipdigits(d);
287 if (d->ptr == d->end) goto parse;
289 if (*d->ptr == 'e' || *d->ptr == 'E') {
291 if (d->ptr == d->end) {
292 jsondec_err(d, "Unexpected EOF in number");
294 if (*d->ptr == '+' || *d->ptr == '-') {
297 jsondec_skipdigits(d);
301 /* Having verified the syntax of a JSON number, use strtod() to parse
302 * (strtod() accepts a superset of JSON syntax). */
306 double val = strtod(start, &end);
307 assert(end == d->ptr);
309 /* Currently the min/max-val conformance tests fail if we check this. Does
310 * this mean the conformance tests are wrong or strtod() is wrong, or
311 * something else? Investigate further. */
313 if (errno == ERANGE) {
314 jsondec_err(d, "Number out of range");
318 if (val > DBL_MAX || val < -DBL_MAX) {
319 jsondec_err(d, "Number out of range");
326 /* JSON string ****************************************************************/
328 static char jsondec_escape(jsondec *d) {
347 jsondec_err(d, "Invalid escape char");
351 static uint32_t jsondec_codepoint(jsondec *d) {
355 if (d->end - d->ptr < 4) {
356 jsondec_err(d, "EOF inside string");
360 while (d->ptr < end) {
362 if (ch >= '0' && ch <= '9') {
364 } else if (ch >= 'a' && ch <= 'f') {
366 } else if (ch >= 'A' && ch <= 'F') {
369 jsondec_err(d, "Invalid hex digit");
377 /* Parses a \uXXXX unicode escape (possibly a surrogate pair). */
378 static size_t jsondec_unicode(jsondec *d, char* out) {
379 uint32_t cp = jsondec_codepoint(d);
380 if (cp >= 0xd800 && cp <= 0xdbff) {
381 /* Surrogate pair: two 16-bit codepoints become a 32-bit codepoint. */
384 jsondec_parselit(d, "\\u");
385 low = jsondec_codepoint(d);
386 if (low < 0xdc00 || low > 0xdfff) {
387 jsondec_err(d, "Invalid low surrogate");
389 cp = (high & 0x3ff) << 10;
392 } else if (cp >= 0xdc00 && cp <= 0xdfff) {
393 jsondec_err(d, "Unpaired low surrogate");
400 } else if (cp <= 0x07FF) {
401 out[0] = ((cp >> 6) & 0x1F) | 0xC0;
402 out[1] = ((cp >> 0) & 0x3F) | 0x80;
404 } else if (cp <= 0xFFFF) {
405 out[0] = ((cp >> 12) & 0x0F) | 0xE0;
406 out[1] = ((cp >> 6) & 0x3F) | 0x80;
407 out[2] = ((cp >> 0) & 0x3F) | 0x80;
409 } else if (cp < 0x10FFFF) {
410 out[0] = ((cp >> 18) & 0x07) | 0xF0;
411 out[1] = ((cp >> 12) & 0x3f) | 0x80;
412 out[2] = ((cp >> 6) & 0x3f) | 0x80;
413 out[3] = ((cp >> 0) & 0x3f) | 0x80;
416 jsondec_err(d, "Invalid codepoint");
420 static void jsondec_resize(jsondec *d, char **buf, char **end, char **buf_end) {
421 size_t oldsize = *buf_end - *buf;
422 size_t len = *end - *buf;
423 size_t size = UPB_MAX(8, 2 * oldsize);
425 *buf = upb_arena_realloc(d->arena, *buf, len, size);
426 if (!*buf) jsondec_err(d, "Out of memory");
429 *buf_end = *buf + size;
432 static upb_strview jsondec_string(jsondec *d) {
435 char *buf_end = NULL;
439 if (*d->ptr++ != '"') {
440 jsondec_err(d, "Expected string");
443 while (d->ptr < d->end) {
446 if (end == buf_end) {
447 jsondec_resize(d, &buf, &end, &buf_end);
454 ret.size = end - buf;
455 *end = '\0'; /* Needed for possible strtod(). */
459 if (d->ptr == d->end) goto eof;
460 if (*d->ptr == 'u') {
462 if (buf_end - end < 4) {
463 /* Allow space for maximum-sized code point (4 bytes). */
464 jsondec_resize(d, &buf, &end, &buf_end);
466 end += jsondec_unicode(d, end);
468 *end++ = jsondec_escape(d);
472 if ((unsigned char)*d->ptr < 0x20) {
473 jsondec_err(d, "Invalid char in JSON string");
481 jsondec_err(d, "EOF inside string");
484 static void jsondec_skipval(jsondec *d) {
485 switch (jsondec_peek(d)) {
488 while (jsondec_objnext(d)) {
497 while (jsondec_arrnext(d)) {
520 /* Base64 decoding for bytes fields. ******************************************/
522 static unsigned int jsondec_base64_tablelookup(const char ch) {
523 /* Table includes the normal base64 chars plus the URL-safe variant. */
524 const signed char table[256] = {
525 -1, -1, -1, -1, -1, -1, -1,
526 -1, -1, -1, -1, -1, -1, -1,
527 -1, -1, -1, -1, -1, -1, -1,
528 -1, -1, -1, -1, -1, -1, -1,
529 -1, -1, -1, -1, -1, -1, -1,
530 -1, -1, -1, -1, -1, -1, -1,
531 -1, 62 /*+*/, -1, 62 /*-*/, -1, 63 /*/ */, 52 /*0*/,
532 53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, 58 /*6*/, 59 /*7*/,
533 60 /*8*/, 61 /*9*/, -1, -1, -1, -1, -1,
534 -1, -1, 0 /*A*/, 1 /*B*/, 2 /*C*/, 3 /*D*/, 4 /*E*/,
535 5 /*F*/, 6 /*G*/, 07 /*H*/, 8 /*I*/, 9 /*J*/, 10 /*K*/, 11 /*L*/,
536 12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, 16 /*Q*/, 17 /*R*/, 18 /*S*/,
537 19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/, 25 /*Z*/,
538 -1, -1, -1, -1, 63 /*_*/, -1, 26 /*a*/,
539 27 /*b*/, 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/, 33 /*h*/,
540 34 /*i*/, 35 /*j*/, 36 /*k*/, 37 /*l*/, 38 /*m*/, 39 /*n*/, 40 /*o*/,
541 41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, 46 /*u*/, 47 /*v*/,
542 48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1, -1, -1,
543 -1, -1, -1, -1, -1, -1, -1,
544 -1, -1, -1, -1, -1, -1, -1,
545 -1, -1, -1, -1, -1, -1, -1,
546 -1, -1, -1, -1, -1, -1, -1,
547 -1, -1, -1, -1, -1, -1, -1,
548 -1, -1, -1, -1, -1, -1, -1,
549 -1, -1, -1, -1, -1, -1, -1,
550 -1, -1, -1, -1, -1, -1, -1,
551 -1, -1, -1, -1, -1, -1, -1,
552 -1, -1, -1, -1, -1, -1, -1,
553 -1, -1, -1, -1, -1, -1, -1,
554 -1, -1, -1, -1, -1, -1, -1,
555 -1, -1, -1, -1, -1, -1, -1,
556 -1, -1, -1, -1, -1, -1, -1,
557 -1, -1, -1, -1, -1, -1, -1,
558 -1, -1, -1, -1, -1, -1, -1,
559 -1, -1, -1, -1, -1, -1, -1,
560 -1, -1, -1, -1, -1, -1, -1,
563 /* Sign-extend return value so high bit will be set on any unexpected char. */
564 return table[(unsigned)ch];
567 static char *jsondec_partialbase64(jsondec *d, const char *ptr, const char *end,
573 val = jsondec_base64_tablelookup(ptr[0]) << 18 |
574 jsondec_base64_tablelookup(ptr[1]) << 12;
579 val = jsondec_base64_tablelookup(ptr[0]) << 18 |
580 jsondec_base64_tablelookup(ptr[1]) << 12 |
581 jsondec_base64_tablelookup(ptr[2]) << 6;
583 out[1] = (val >> 8) & 0xff;
589 jsondec_err(d, "Corrupt base64");
595 static size_t jsondec_base64(jsondec *d, upb_strview str) {
596 /* We decode in place. This is safe because this is a new buffer (not
597 * aliasing the input) and because base64 decoding shrinks 4 bytes into 3. */
598 char *out = (char*)str.data;
599 const char *ptr = str.data;
600 const char *end = ptr + str.size;
601 const char *end4 = ptr + (str.size & -4); /* Round down to multiple of 4. */
603 for (; ptr < end4; ptr += 4, out += 3) {
604 int val = jsondec_base64_tablelookup(ptr[0]) << 18 |
605 jsondec_base64_tablelookup(ptr[1]) << 12 |
606 jsondec_base64_tablelookup(ptr[2]) << 6 |
607 jsondec_base64_tablelookup(ptr[3]) << 0;
610 /* Junk chars or padding. Remove trailing padding, if any. */
611 if (end - ptr == 4 && ptr[3] == '=') {
622 out[1] = (val >> 8) & 0xff;
627 /* Process remaining chars. We do not require padding. */
628 out = jsondec_partialbase64(d, ptr, end, out);
631 return out - str.data;
634 /* Low-level integer parsing **************************************************/
636 /* We use these hand-written routines instead of strto[u]l() because the "long
637 * long" variants aren't in c89. Also our version allows setting a ptr limit. */
639 static const char *jsondec_buftouint64(jsondec *d, const char *ptr,
640 const char *end, uint64_t *val) {
643 unsigned ch = *ptr - '0';
645 if (u64 > UINT64_MAX / 10 || u64 * 10 > UINT64_MAX - ch) {
646 jsondec_err(d, "Integer overflow");
657 static const char *jsondec_buftoint64(jsondec *d, const char *ptr,
658 const char *end, int64_t *val) {
662 if (ptr != end && *ptr == '-') {
667 ptr = jsondec_buftouint64(d, ptr, end, &u64);
668 if (u64 > (uint64_t)INT64_MAX + neg) {
669 jsondec_err(d, "Integer overflow");
672 *val = neg ? -u64 : u64;
676 static uint64_t jsondec_strtouint64(jsondec *d, upb_strview str) {
677 const char *end = str.data + str.size;
679 if (jsondec_buftouint64(d, str.data, end, &ret) != end) {
680 jsondec_err(d, "Non-number characters in quoted integer");
685 static int64_t jsondec_strtoint64(jsondec *d, upb_strview str) {
686 const char *end = str.data + str.size;
688 if (jsondec_buftoint64(d, str.data, end, &ret) != end) {
689 jsondec_err(d, "Non-number characters in quoted integer");
694 /* Primitive value types ******************************************************/
696 /* Parse INT32 or INT64 value. */
697 static upb_msgval jsondec_int(jsondec *d, const upb_fielddef *f) {
700 switch (jsondec_peek(d)) {
702 double dbl = jsondec_number(d);
703 if (dbl > 9223372036854774784.0 || dbl < -9223372036854775808.0) {
704 jsondec_err(d, "JSON number is out of range.");
706 val.int64_val = dbl; /* must be guarded, overflow here is UB */
707 if (val.int64_val != dbl) {
708 jsondec_errf(d, "JSON number was not integral (%f != %" PRId64 ")", dbl,
714 upb_strview str = jsondec_string(d);
715 val.int64_val = jsondec_strtoint64(d, str);
719 jsondec_err(d, "Expected number or string");
722 if (upb_fielddef_type(f) == UPB_TYPE_INT32) {
723 if (val.int64_val > INT32_MAX || val.int64_val < INT32_MIN) {
724 jsondec_err(d, "Integer out of range.");
726 val.int32_val = (int32_t)val.int64_val;
732 /* Parse UINT32 or UINT64 value. */
733 static upb_msgval jsondec_uint(jsondec *d, const upb_fielddef *f) {
734 upb_msgval val = {0};
736 switch (jsondec_peek(d)) {
738 double dbl = jsondec_number(d);
739 if (dbl > 18446744073709549568.0 || dbl < 0) {
740 jsondec_err(d, "JSON number is out of range.");
742 val.uint64_val = dbl; /* must be guarded, overflow here is UB */
743 if (val.uint64_val != dbl) {
744 jsondec_errf(d, "JSON number was not integral (%f != %" PRIu64 ")", dbl,
750 upb_strview str = jsondec_string(d);
751 val.uint64_val = jsondec_strtouint64(d, str);
755 jsondec_err(d, "Expected number or string");
758 if (upb_fielddef_type(f) == UPB_TYPE_UINT32) {
759 if (val.uint64_val > UINT32_MAX) {
760 jsondec_err(d, "Integer out of range.");
762 val.uint32_val = (uint32_t)val.uint64_val;
768 /* Parse DOUBLE or FLOAT value. */
769 static upb_msgval jsondec_double(jsondec *d, const upb_fielddef *f) {
771 upb_msgval val = {0};
773 switch (jsondec_peek(d)) {
775 val.double_val = jsondec_number(d);
778 str = jsondec_string(d);
779 if (jsondec_streql(str, "NaN")) {
780 val.double_val = NAN;
781 } else if (jsondec_streql(str, "Infinity")) {
782 val.double_val = INFINITY;
783 } else if (jsondec_streql(str, "-Infinity")) {
784 val.double_val = -INFINITY;
786 val.double_val = strtod(str.data, NULL);
790 jsondec_err(d, "Expected number or string");
793 if (upb_fielddef_type(f) == UPB_TYPE_FLOAT) {
794 if (val.double_val != INFINITY && val.double_val != -INFINITY &&
795 (val.double_val > FLT_MAX || val.double_val < -FLT_MAX)) {
796 jsondec_err(d, "Float out of range");
798 val.float_val = val.double_val;
804 /* Parse STRING or BYTES value. */
805 static upb_msgval jsondec_strfield(jsondec *d, const upb_fielddef *f) {
807 val.str_val = jsondec_string(d);
808 if (upb_fielddef_type(f) == UPB_TYPE_BYTES) {
809 val.str_val.size = jsondec_base64(d, val.str_val);
814 static upb_msgval jsondec_enum(jsondec *d, const upb_fielddef *f) {
815 switch (jsondec_peek(d)) {
817 const upb_enumdef *e = upb_fielddef_enumsubdef(f);
818 upb_strview str = jsondec_string(d);
820 if (!upb_enumdef_ntoi(e, str.data, str.size, &val.int32_val)) {
821 if (d->options & UPB_JSONDEC_IGNOREUNKNOWN) {
824 jsondec_errf(d, "Unknown enumerator: '" UPB_STRVIEW_FORMAT "'",
825 UPB_STRVIEW_ARGS(str));
831 if (jsondec_isnullvalue(f)) {
840 return jsondec_int(d, f);
844 static upb_msgval jsondec_bool(jsondec *d, const upb_fielddef *f) {
845 bool is_map_key = upb_fielddef_number(f) == 1 &&
846 upb_msgdef_mapentry(upb_fielddef_containingtype(f));
850 upb_strview str = jsondec_string(d);
851 if (jsondec_streql(str, "true")) {
853 } else if (jsondec_streql(str, "false")) {
854 val.bool_val = false;
856 jsondec_err(d, "Invalid boolean map key");
859 switch (jsondec_peek(d)) {
865 val.bool_val = false;
869 jsondec_err(d, "Expected true or false");
876 /* Composite types (array/message/map) ****************************************/
878 static void jsondec_array(jsondec *d, upb_msg *msg, const upb_fielddef *f) {
879 upb_array *arr = upb_msg_mutable(msg, f, d->arena).array;
882 while (jsondec_arrnext(d)) {
883 upb_msgval elem = jsondec_value(d, f);
884 upb_array_append(arr, elem, d->arena);
889 static void jsondec_map(jsondec *d, upb_msg *msg, const upb_fielddef *f) {
890 upb_map *map = upb_msg_mutable(msg, f, d->arena).map;
891 const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
892 const upb_fielddef *key_f = upb_msgdef_itof(entry, 1);
893 const upb_fielddef *val_f = upb_msgdef_itof(entry, 2);
896 while (jsondec_objnext(d)) {
898 key = jsondec_value(d, key_f);
900 val = jsondec_value(d, val_f);
901 upb_map_set(map, key, val, d->arena);
906 static void jsondec_tomsg(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
907 if (upb_msgdef_wellknowntype(m) == UPB_WELLKNOWN_UNSPECIFIED) {
908 jsondec_object(d, msg, m);
910 jsondec_wellknown(d, msg, m);
914 static upb_msgval jsondec_msg(jsondec *d, const upb_fielddef *f) {
915 const upb_msgdef *m = upb_fielddef_msgsubdef(f);
916 upb_msg *msg = upb_msg_new(m, d->arena);
919 jsondec_tomsg(d, msg, m);
924 static void jsondec_field(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
926 const upb_fielddef *f;
927 const upb_fielddef *preserved;
929 name = jsondec_string(d);
931 f = upb_msgdef_lookupjsonname(m, name.data, name.size);
934 if ((d->options & UPB_JSONDEC_IGNOREUNKNOWN) == 0) {
935 jsondec_errf(d, "No such field: " UPB_STRVIEW_FORMAT,
936 UPB_STRVIEW_ARGS(name));
942 if (jsondec_peek(d) == JD_NULL && !jsondec_isvalue(f)) {
943 /* JSON "null" indicates a default value, so no need to set anything. */
948 if (upb_fielddef_realcontainingoneof(f) &&
949 upb_msg_whichoneof(msg, upb_fielddef_containingoneof(f))) {
950 jsondec_err(d, "More than one field for this oneof.");
953 preserved = d->debug_field;
956 if (upb_fielddef_ismap(f)) {
957 jsondec_map(d, msg, f);
958 } else if (upb_fielddef_isseq(f)) {
959 jsondec_array(d, msg, f);
960 } else if (upb_fielddef_issubmsg(f)) {
961 upb_msg *submsg = upb_msg_mutable(msg, f, d->arena).msg;
962 const upb_msgdef *subm = upb_fielddef_msgsubdef(f);
963 jsondec_tomsg(d, submsg, subm);
965 upb_msgval val = jsondec_value(d, f);
966 upb_msg_set(msg, f, val, d->arena);
969 d->debug_field = preserved;
972 static void jsondec_object(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
974 while (jsondec_objnext(d)) {
975 jsondec_field(d, msg, m);
980 static upb_msgval jsondec_value(jsondec *d, const upb_fielddef *f) {
981 switch (upb_fielddef_type(f)) {
983 return jsondec_bool(d, f);
985 case UPB_TYPE_DOUBLE:
986 return jsondec_double(d, f);
987 case UPB_TYPE_UINT32:
988 case UPB_TYPE_UINT64:
989 return jsondec_uint(d, f);
992 return jsondec_int(d, f);
993 case UPB_TYPE_STRING:
995 return jsondec_strfield(d, f);
997 return jsondec_enum(d, f);
998 case UPB_TYPE_MESSAGE:
999 return jsondec_msg(d, f);
1005 /* Well-known types ***********************************************************/
1007 static int jsondec_tsdigits(jsondec *d, const char **ptr, size_t digits,
1008 const char *after) {
1010 const char *p = *ptr;
1011 const char *end = p + digits;
1012 size_t after_len = after ? strlen(after) : 0;
1014 UPB_ASSERT(digits <= 9); /* int can't overflow. */
1016 if (jsondec_buftouint64(d, p, end, &val) != end ||
1017 (after_len && memcmp(end, after, after_len) != 0)) {
1018 jsondec_err(d, "Malformed timestamp");
1021 UPB_ASSERT(val < INT_MAX);
1023 *ptr = end + after_len;
1027 static int jsondec_nanos(jsondec *d, const char **ptr, const char *end) {
1029 const char *p = *ptr;
1031 if (p != end && *p == '.') {
1032 const char *nano_end = jsondec_buftouint64(d, p + 1, end, &nanos);
1033 int digits = (int)(nano_end - p - 1);
1034 int exp_lg10 = 9 - digits;
1036 jsondec_err(d, "Too many digits for partial seconds");
1038 while (exp_lg10--) nanos *= 10;
1042 UPB_ASSERT(nanos < INT_MAX);
1047 /* jsondec_epochdays(1970, 1, 1) == 1970-01-01 == 0. */
1048 int jsondec_epochdays(int y, int m, int d) {
1049 const uint32_t year_base = 4800; /* Before min year, multiple of 400. */
1050 const uint32_t m_adj = m - 3; /* March-based month. */
1051 const uint32_t carry = m_adj > (uint32_t)m ? 1 : 0;
1052 const uint32_t adjust = carry ? 12 : 0;
1053 const uint32_t y_adj = y + year_base - carry;
1054 const uint32_t month_days = ((m_adj + adjust) * 62719 + 769) / 2048;
1055 const uint32_t leap_days = y_adj / 4 - y_adj / 100 + y_adj / 400;
1056 return y_adj * 365 + leap_days + month_days + (d - 1) - 2472632;
1059 static int64_t jsondec_unixtime(int y, int m, int d, int h, int min, int s) {
1060 return (int64_t)jsondec_epochdays(y, m, d) * 86400 + h * 3600 + min * 60 + s;
1063 static void jsondec_timestamp(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1066 upb_strview str = jsondec_string(d);
1067 const char *ptr = str.data;
1068 const char *end = ptr + str.size;
1070 if (str.size < 20) goto malformed;
1073 /* 1972-01-01T01:00:00 */
1074 int year = jsondec_tsdigits(d, &ptr, 4, "-");
1075 int mon = jsondec_tsdigits(d, &ptr, 2, "-");
1076 int day = jsondec_tsdigits(d, &ptr, 2, "T");
1077 int hour = jsondec_tsdigits(d, &ptr, 2, ":");
1078 int min = jsondec_tsdigits(d, &ptr, 2, ":");
1079 int sec = jsondec_tsdigits(d, &ptr, 2, NULL);
1081 seconds.int64_val = jsondec_unixtime(year, mon, day, hour, min, sec);
1084 nanos.int32_val = jsondec_nanos(d, &ptr, end);
1087 /* [+-]08:00 or Z */
1092 if (ptr == end) goto malformed;
1099 if ((end - ptr) != 5) goto malformed;
1100 ofs_hour = jsondec_tsdigits(d, &ptr, 2, ":");
1101 ofs_min = jsondec_tsdigits(d, &ptr, 2, NULL);
1102 ofs_min = ((ofs_hour * 60) + ofs_min) * 60;
1103 seconds.int64_val += (neg ? ofs_min : -ofs_min);
1106 if (ptr != end) goto malformed;
1113 if (seconds.int64_val < -62135596800) {
1114 jsondec_err(d, "Timestamp out of range");
1117 upb_msg_set(msg, upb_msgdef_itof(m, 1), seconds, d->arena);
1118 upb_msg_set(msg, upb_msgdef_itof(m, 2), nanos, d->arena);
1122 jsondec_err(d, "Malformed timestamp");
1125 static void jsondec_duration(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1128 upb_strview str = jsondec_string(d);
1129 const char *ptr = str.data;
1130 const char *end = ptr + str.size;
1131 const int64_t max = (uint64_t)3652500 * 86400;
1133 /* "3.000000001s", "3s", etc. */
1134 ptr = jsondec_buftoint64(d, ptr, end, &seconds.int64_val);
1135 nanos.int32_val = jsondec_nanos(d, &ptr, end);
1137 if (end - ptr != 1 || *ptr != 's') {
1138 jsondec_err(d, "Malformed duration");
1141 if (seconds.int64_val < -max || seconds.int64_val > max) {
1142 jsondec_err(d, "Duration out of range");
1145 if (seconds.int64_val < 0) {
1146 nanos.int32_val = - nanos.int32_val;
1149 upb_msg_set(msg, upb_msgdef_itof(m, 1), seconds, d->arena);
1150 upb_msg_set(msg, upb_msgdef_itof(m, 2), nanos, d->arena);
1153 static void jsondec_listvalue(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1154 const upb_fielddef *values_f = upb_msgdef_itof(m, 1);
1155 const upb_msgdef *value_m = upb_fielddef_msgsubdef(values_f);
1156 upb_array *values = upb_msg_mutable(msg, values_f, d->arena).array;
1158 jsondec_arrstart(d);
1159 while (jsondec_arrnext(d)) {
1160 upb_msg *value_msg = upb_msg_new(value_m, d->arena);
1162 value.msg_val = value_msg;
1163 upb_array_append(values, value, d->arena);
1164 jsondec_wellknownvalue(d, value_msg, value_m);
1169 static void jsondec_struct(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1170 const upb_fielddef *fields_f = upb_msgdef_itof(m, 1);
1171 const upb_msgdef *entry_m = upb_fielddef_msgsubdef(fields_f);
1172 const upb_fielddef *value_f = upb_msgdef_itof(entry_m, 2);
1173 const upb_msgdef *value_m = upb_fielddef_msgsubdef(value_f);
1174 upb_map *fields = upb_msg_mutable(msg, fields_f, d->arena).map;
1176 jsondec_objstart(d);
1177 while (jsondec_objnext(d)) {
1178 upb_msgval key, value;
1179 upb_msg *value_msg = upb_msg_new(value_m, d->arena);
1180 key.str_val = jsondec_string(d);
1181 value.msg_val = value_msg;
1182 upb_map_set(fields, key, value, d->arena);
1183 jsondec_entrysep(d);
1184 jsondec_wellknownvalue(d, value_msg, value_m);
1189 static void jsondec_wellknownvalue(jsondec *d, upb_msg *msg,
1190 const upb_msgdef *m) {
1192 const upb_fielddef *f;
1195 switch (jsondec_peek(d)) {
1197 /* double number_value = 2; */
1198 f = upb_msgdef_itof(m, 2);
1199 val.double_val = jsondec_number(d);
1202 /* string string_value = 3; */
1203 f = upb_msgdef_itof(m, 3);
1204 val.str_val = jsondec_string(d);
1207 /* bool bool_value = 4; */
1208 f = upb_msgdef_itof(m, 4);
1209 val.bool_val = false;
1213 /* bool bool_value = 4; */
1214 f = upb_msgdef_itof(m, 4);
1215 val.bool_val = true;
1219 /* NullValue null_value = 1; */
1220 f = upb_msgdef_itof(m, 1);
1224 /* Note: these cases return, because upb_msg_mutable() is enough. */
1226 /* Struct struct_value = 5; */
1227 f = upb_msgdef_itof(m, 5);
1228 submsg = upb_msg_mutable(msg, f, d->arena).msg;
1229 jsondec_struct(d, submsg, upb_fielddef_msgsubdef(f));
1232 /* ListValue list_value = 6; */
1233 f = upb_msgdef_itof(m, 6);
1234 submsg = upb_msg_mutable(msg, f, d->arena).msg;
1235 jsondec_listvalue(d, submsg, upb_fielddef_msgsubdef(f));
1241 upb_msg_set(msg, f, val, d->arena);
1244 static upb_strview jsondec_mask(jsondec *d, const char *buf, const char *end) {
1245 /* FieldMask fields grow due to inserted '_' characters, so we can't do the
1246 * transform in place. */
1247 const char *ptr = buf;
1251 ret.size = end - ptr;
1253 ret.size += (*ptr >= 'A' && *ptr <= 'Z');
1257 out = upb_arena_malloc(d->arena, ret.size);
1263 if (ch >= 'A' && ch <= 'Z') {
1266 } else if (ch == '_') {
1267 jsondec_err(d, "field mask may not contain '_'");
1276 static void jsondec_fieldmask(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1277 /* repeated string paths = 1; */
1278 const upb_fielddef *paths_f = upb_msgdef_itof(m, 1);
1279 upb_array *arr = upb_msg_mutable(msg, paths_f, d->arena).array;
1280 upb_strview str = jsondec_string(d);
1281 const char *ptr = str.data;
1282 const char *end = ptr + str.size;
1286 const char *elem_end = memchr(ptr, ',', end - ptr);
1288 val.str_val = jsondec_mask(d, ptr, elem_end);
1291 val.str_val = jsondec_mask(d, ptr, end);
1294 upb_array_append(arr, val, d->arena);
1298 static void jsondec_anyfield(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1299 if (upb_msgdef_wellknowntype(m) == UPB_WELLKNOWN_UNSPECIFIED) {
1300 /* For regular types: {"@type": "[user type]", "f1": <V1>, "f2": <V2>}
1301 * where f1, f2, etc. are the normal fields of this type. */
1302 jsondec_field(d, msg, m);
1304 /* For well-known types: {"@type": "[well-known type]", "value": <X>}
1305 * where <X> is whatever encoding the WKT normally uses. */
1306 upb_strview str = jsondec_string(d);
1307 jsondec_entrysep(d);
1308 if (!jsondec_streql(str, "value")) {
1309 jsondec_err(d, "Key for well-known type must be 'value'");
1311 jsondec_wellknown(d, msg, m);
1315 static const upb_msgdef *jsondec_typeurl(jsondec *d, upb_msg *msg,
1316 const upb_msgdef *m) {
1317 const upb_fielddef *type_url_f = upb_msgdef_itof(m, 1);
1318 const upb_msgdef *type_m;
1319 upb_strview type_url = jsondec_string(d);
1320 const char *end = type_url.data + type_url.size;
1321 const char *ptr = end;
1324 val.str_val = type_url;
1325 upb_msg_set(msg, type_url_f, val, d->arena);
1327 /* Find message name after the last '/' */
1328 while (ptr > type_url.data && *--ptr != '/') {}
1330 if (ptr == type_url.data || ptr == end) {
1331 jsondec_err(d, "Type url must have at least one '/' and non-empty host");
1335 type_m = upb_symtab_lookupmsg2(d->any_pool, ptr, end - ptr);
1338 jsondec_err(d, "Type was not found");
1344 static void jsondec_any(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1345 /* string type_url = 1;
1346 * bytes value = 2; */
1347 const upb_fielddef *value_f = upb_msgdef_itof(m, 2);
1349 const upb_msgdef *any_m = NULL;
1350 const char *pre_type_data = NULL;
1351 const char *pre_type_end = NULL;
1354 jsondec_objstart(d);
1356 /* Scan looking for "@type", which is not necessarily first. */
1357 while (!any_m && jsondec_objnext(d)) {
1358 const char *start = d->ptr;
1359 upb_strview name = jsondec_string(d);
1360 jsondec_entrysep(d);
1361 if (jsondec_streql(name, "@type")) {
1362 any_m = jsondec_typeurl(d, msg, m);
1363 if (pre_type_data) {
1364 pre_type_end = start;
1365 while (*pre_type_end != ',') pre_type_end--;
1368 if (!pre_type_data) pre_type_data = start;
1374 jsondec_err(d, "Any object didn't contain a '@type' field");
1377 any_msg = upb_msg_new(any_m, d->arena);
1379 if (pre_type_data) {
1380 size_t len = pre_type_end - pre_type_data + 1;
1381 char *tmp = upb_arena_malloc(d->arena, len);
1382 const char *saved_ptr = d->ptr;
1383 const char *saved_end = d->end;
1384 memcpy(tmp, pre_type_data, len - 1);
1389 while (jsondec_objnext(d)) {
1390 jsondec_anyfield(d, any_msg, any_m);
1396 while (jsondec_objnext(d)) {
1397 jsondec_anyfield(d, any_msg, any_m);
1402 encoded.str_val.data = upb_encode(any_msg, upb_msgdef_layout(any_m), d->arena,
1403 &encoded.str_val.size);
1404 upb_msg_set(msg, value_f, encoded, d->arena);
1407 static void jsondec_wrapper(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1408 const upb_fielddef *value_f = upb_msgdef_itof(m, 1);
1409 upb_msgval val = jsondec_value(d, value_f);
1410 upb_msg_set(msg, value_f, val, d->arena);
1413 static void jsondec_wellknown(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1414 switch (upb_msgdef_wellknowntype(m)) {
1415 case UPB_WELLKNOWN_ANY:
1416 jsondec_any(d, msg, m);
1418 case UPB_WELLKNOWN_FIELDMASK:
1419 jsondec_fieldmask(d, msg, m);
1421 case UPB_WELLKNOWN_DURATION:
1422 jsondec_duration(d, msg, m);
1424 case UPB_WELLKNOWN_TIMESTAMP:
1425 jsondec_timestamp(d, msg, m);
1427 case UPB_WELLKNOWN_VALUE:
1428 jsondec_wellknownvalue(d, msg, m);
1430 case UPB_WELLKNOWN_LISTVALUE:
1431 jsondec_listvalue(d, msg, m);
1433 case UPB_WELLKNOWN_STRUCT:
1434 jsondec_struct(d, msg, m);
1436 case UPB_WELLKNOWN_DOUBLEVALUE:
1437 case UPB_WELLKNOWN_FLOATVALUE:
1438 case UPB_WELLKNOWN_INT64VALUE:
1439 case UPB_WELLKNOWN_UINT64VALUE:
1440 case UPB_WELLKNOWN_INT32VALUE:
1441 case UPB_WELLKNOWN_UINT32VALUE:
1442 case UPB_WELLKNOWN_STRINGVALUE:
1443 case UPB_WELLKNOWN_BYTESVALUE:
1444 case UPB_WELLKNOWN_BOOLVALUE:
1445 jsondec_wrapper(d, msg, m);
1452 bool upb_json_decode(const char *buf, size_t size, upb_msg *msg,
1453 const upb_msgdef *m, const upb_symtab *any_pool,
1454 int options, upb_arena *arena, upb_status *status) {
1457 if (size == 0) return true;
1462 d.any_pool = any_pool;
1464 d.options = options;
1467 d.line_begin = d.ptr;
1468 d.debug_field = NULL;
1471 if (UPB_SETJMP(d.err)) return false;
1473 jsondec_tomsg(&d, msg, m);