2 ** Internal implementation details of the decoder that are shared between
3 ** decode.c and decode_fast.c.
6 #ifndef UPB_DECODE_INT_H_
7 #define UPB_DECODE_INT_H_
12 #include "upb/upb.int.h"
15 #include "upb/port_def.inc"
17 #define DECODE_NOGROUP -1
19 typedef struct upb_decstate {
20 const char *end; /* Can read up to 16 bytes slop beyond this. */
21 const char *limit_ptr; /* = end + UPB_MIN(limit, 0) */
22 upb_msg *unknown_msg; /* If non-NULL, add unknown data at buffer flip. */
23 const char *unknown; /* Start of unknown data. */
24 int limit; /* Submessage limit relative to end. */
26 uint32_t end_group; /* field number of END_GROUP tag, else DECODE_NOGROUP */
33 /* Error function that will abort decoding with longjmp(). We can't declare this
34 * UPB_NORETURN, even though it is appropriate, because if we do then compilers
35 * will "helpfully" refuse to tailcall to it
36 * (see: https://stackoverflow.com/a/55657013), which will defeat a major goal
37 * of our optimizations. That is also why we must declare it in a separate file,
38 * otherwise the compiler will see that it calls longjmp() and deduce that it is
40 const char *fastdecode_err(upb_decstate *d);
42 extern const uint8_t upb_utf8_offsets[];
45 bool decode_verifyutf8_inl(const char *buf, int len) {
51 offset = upb_utf8_offsets[(uint8_t)buf[i]];
52 if (offset == 0 || i + offset > len) {
55 for (j = i + 1; j < i + offset; j++) {
56 if ((buf[j] & 0xc0) != 0x80) {
65 /* x86-64 pointers always have the high 16 bits matching. So we can shift
66 * left 8 and right 8 without loss of information. */
67 UPB_INLINE intptr_t decode_totable(const upb_msglayout *tablep) {
68 return ((intptr_t)tablep << 8) | tablep->table_mask;
71 UPB_INLINE const upb_msglayout *decode_totablep(intptr_t table) {
72 return (const upb_msglayout*)(table >> 8);
76 const char *decode_isdonefallback_inl(upb_decstate *d, const char *ptr,
78 if (overrun < d->limit) {
79 /* Need to copy remaining data into patch buffer. */
80 UPB_ASSERT(overrun < 16);
82 if (!_upb_msg_addunknown(d->unknown_msg, d->unknown, ptr - d->unknown,
86 d->unknown = &d->patch[0] + overrun;
88 memset(d->patch + 16, 0, 16);
89 memcpy(d->patch, d->end, 16);
90 ptr = &d->patch[0] + overrun;
91 d->end = &d->patch[16];
93 d->limit_ptr = d->end + d->limit;
95 UPB_ASSERT(ptr < d->limit_ptr);
102 const char *decode_isdonefallback(upb_decstate *d, const char *ptr,
106 bool decode_isdone(upb_decstate *d, const char **ptr) {
107 int overrun = *ptr - d->end;
108 if (UPB_LIKELY(*ptr < d->limit_ptr)) {
110 } else if (UPB_LIKELY(overrun == d->limit)) {
113 *ptr = decode_isdonefallback(d, *ptr, overrun);
119 const char *fastdecode_tagdispatch(upb_decstate *d, const char *ptr,
120 upb_msg *msg, intptr_t table,
121 uint64_t hasbits, uint32_t tag) {
122 const upb_msglayout *table_p = decode_totablep(table);
123 uint8_t mask = table;
125 size_t idx = tag & mask;
126 UPB_ASSUME((idx & 7) == 0);
128 data = table_p->fasttable[idx].field_data ^ tag;
129 return table_p->fasttable[idx].field_parser(d, ptr, msg, table, hasbits, data);
132 UPB_INLINE uint32_t fastdecode_loadtag(const char* ptr) {
134 memcpy(&tag, ptr, 2);
138 UPB_INLINE void decode_checklimit(upb_decstate *d) {
139 UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit));
142 UPB_INLINE int decode_pushlimit(upb_decstate *d, const char *ptr, int size) {
143 int limit = size + (int)(ptr - d->end);
144 int delta = d->limit - limit;
145 decode_checklimit(d);
147 d->limit_ptr = d->end + UPB_MIN(0, limit);
148 decode_checklimit(d);
152 UPB_INLINE void decode_poplimit(upb_decstate *d, const char *ptr,
154 UPB_ASSERT(ptr - d->end == d->limit);
155 decode_checklimit(d);
156 d->limit += saved_delta;
157 d->limit_ptr = d->end + UPB_MIN(0, d->limit);
158 decode_checklimit(d);
161 #include "upb/port_undef.inc"
163 #endif /* UPB_DECODE_INT_H_ */