2 ** Our memory representation for parsing tables and messages themselves.
3 ** Functions in this file are used by generated code and possibly reflection.
5 ** The definitions in this file are internal to upb.
15 #include "upb/table.int.h"
19 #include "upb/port_def.inc"
25 #define PTR_AT(msg, ofs, type) (type*)((const char*)msg + ofs)
29 /** upb_msglayout *************************************************************/
31 /* upb_msglayout represents the memory layout of a given upb_msgdef. The
32 * members are public so generated code can initialize them, but users MUST NOT
33 * read or write any of its members. */
35 /* These aren't real labels according to descriptor.proto, but in the table we
36 * use these for map/packed fields instead of UPB_LABEL_REPEATED. */
39 _UPB_LABEL_PACKED = 7 /* Low 3 bits are common with UPB_LABEL_REPEATED. */
45 int16_t presence; /* If >0, hasbit_index. If <0, ~oneof_index. */
46 uint16_t submsg_index; /* undefined if descriptortype != MESSAGE or GROUP. */
47 uint8_t descriptortype;
48 uint8_t label; /* google.protobuf.Label or _UPB_LABEL_* above. */
49 } upb_msglayout_field;
54 typedef const char *_upb_field_parser(struct upb_decstate *d, const char *ptr,
55 upb_msg *msg, intptr_t table,
56 uint64_t hasbits, uint64_t data);
60 _upb_field_parser *field_parser;
61 } _upb_fasttable_entry;
63 typedef struct upb_msglayout {
64 const struct upb_msglayout *const* submsgs;
65 const upb_msglayout_field *fields;
66 /* Must be aligned to sizeof(void*). Doesn't include internal members like
67 * unknown fields, extension dict, pointer to msglayout, etc. */
72 /* To constant-initialize the tables of variable length, we need a flexible
73 * array member, and we need to compile in C99 mode. */
74 _upb_fasttable_entry fasttable[];
77 /** upb_msg *******************************************************************/
79 /* Internal members of a upb_msg. We can change this without breaking binary
80 * compatibility. We put these before the user's data. The user's upb_msg*
81 * points after the upb_msg_internal. */
87 } upb_msg_unknowndata;
89 /* Used when a message is not extendable. */
91 upb_msg_unknowndata *unknown;
94 /* Maps upb_fieldtype_t -> memory size. */
95 extern char _upb_fieldtype_to_size[12];
97 UPB_INLINE size_t upb_msg_sizeof(const upb_msglayout *l) {
98 return l->size + sizeof(upb_msg_internal);
101 UPB_INLINE upb_msg *_upb_msg_new_inl(const upb_msglayout *l, upb_arena *a) {
102 size_t size = upb_msg_sizeof(l);
103 void *mem = upb_arena_malloc(a, size);
105 if (UPB_UNLIKELY(!mem)) return NULL;
106 msg = UPB_PTR_AT(mem, sizeof(upb_msg_internal), upb_msg);
107 memset(mem, 0, size);
111 /* Creates a new messages with the given layout on the given arena. */
112 upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a);
114 UPB_INLINE upb_msg_internal *upb_msg_getinternal(upb_msg *msg) {
115 ptrdiff_t size = sizeof(upb_msg_internal);
116 return (upb_msg_internal*)((char*)msg - size);
119 /* Clears the given message. */
120 void _upb_msg_clear(upb_msg *msg, const upb_msglayout *l);
122 /* Discards the unknown fields for this message only. */
123 void _upb_msg_discardunknown_shallow(upb_msg *msg);
125 /* Adds unknown data (serialized protobuf data) to the given message. The data
126 * is copied into the message instance. */
127 bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len,
130 /* Returns a reference to the message's unknown data. */
131 const char *upb_msg_getunknown(const upb_msg *msg, size_t *len);
133 /** Hasbit access *************************************************************/
135 UPB_INLINE bool _upb_hasbit(const upb_msg *msg, size_t idx) {
136 return (*PTR_AT(msg, idx / 8, const char) & (1 << (idx % 8))) != 0;
139 UPB_INLINE void _upb_sethas(const upb_msg *msg, size_t idx) {
140 (*PTR_AT(msg, idx / 8, char)) |= (char)(1 << (idx % 8));
143 UPB_INLINE void _upb_clearhas(const upb_msg *msg, size_t idx) {
144 (*PTR_AT(msg, idx / 8, char)) &= (char)(~(1 << (idx % 8)));
147 UPB_INLINE size_t _upb_msg_hasidx(const upb_msglayout_field *f) {
148 UPB_ASSERT(f->presence > 0);
152 UPB_INLINE bool _upb_hasbit_field(const upb_msg *msg,
153 const upb_msglayout_field *f) {
154 return _upb_hasbit(msg, _upb_msg_hasidx(f));
157 UPB_INLINE void _upb_sethas_field(const upb_msg *msg,
158 const upb_msglayout_field *f) {
159 _upb_sethas(msg, _upb_msg_hasidx(f));
162 UPB_INLINE void _upb_clearhas_field(const upb_msg *msg,
163 const upb_msglayout_field *f) {
164 _upb_clearhas(msg, _upb_msg_hasidx(f));
167 /** Oneof case access *********************************************************/
169 UPB_INLINE uint32_t *_upb_oneofcase(upb_msg *msg, size_t case_ofs) {
170 return PTR_AT(msg, case_ofs, uint32_t);
173 UPB_INLINE uint32_t _upb_getoneofcase(const void *msg, size_t case_ofs) {
174 return *PTR_AT(msg, case_ofs, uint32_t);
177 UPB_INLINE size_t _upb_oneofcase_ofs(const upb_msglayout_field *f) {
178 UPB_ASSERT(f->presence < 0);
179 return ~(ptrdiff_t)f->presence;
182 UPB_INLINE uint32_t *_upb_oneofcase_field(upb_msg *msg,
183 const upb_msglayout_field *f) {
184 return _upb_oneofcase(msg, _upb_oneofcase_ofs(f));
187 UPB_INLINE uint32_t _upb_getoneofcase_field(const upb_msg *msg,
188 const upb_msglayout_field *f) {
189 return _upb_getoneofcase(msg, _upb_oneofcase_ofs(f));
192 UPB_INLINE bool _upb_has_submsg_nohasbit(const upb_msg *msg, size_t ofs) {
193 return *PTR_AT(msg, ofs, const upb_msg*) != NULL;
196 UPB_INLINE bool _upb_isrepeated(const upb_msglayout_field *field) {
197 return (field->label & 3) == UPB_LABEL_REPEATED;
200 UPB_INLINE bool _upb_repeated_or_map(const upb_msglayout_field *field) {
201 return field->label >= UPB_LABEL_REPEATED;
204 /** upb_array *****************************************************************/
206 /* Our internal representation for repeated fields. */
208 uintptr_t data; /* Tagged ptr: low 3 bits of ptr are lg2(elem size). */
209 size_t len; /* Measured in elements. */
210 size_t size; /* Measured in elements. */
214 UPB_INLINE const void *_upb_array_constptr(const upb_array *arr) {
215 UPB_ASSERT((arr->data & 7) <= 4);
216 return (void*)(arr->data & ~(uintptr_t)7);
219 UPB_INLINE uintptr_t _upb_array_tagptr(void* ptr, int elem_size_lg2) {
220 UPB_ASSERT(elem_size_lg2 <= 4);
221 return (uintptr_t)ptr | elem_size_lg2;
224 UPB_INLINE void *_upb_array_ptr(upb_array *arr) {
225 return (void*)_upb_array_constptr(arr);
228 UPB_INLINE uintptr_t _upb_tag_arrptr(void* ptr, int elem_size_lg2) {
229 UPB_ASSERT(elem_size_lg2 <= 4);
230 UPB_ASSERT(((uintptr_t)ptr & 7) == 0);
231 return (uintptr_t)ptr | (unsigned)elem_size_lg2;
234 UPB_INLINE upb_array *_upb_array_new(upb_arena *a, size_t init_size,
236 const size_t arr_size = UPB_ALIGN_UP(sizeof(upb_array), 8);
237 const size_t bytes = sizeof(upb_array) + (init_size << elem_size_lg2);
238 upb_array *arr = (upb_array*)upb_arena_malloc(a, bytes);
239 if (!arr) return NULL;
240 arr->data = _upb_tag_arrptr(UPB_PTR_AT(arr, arr_size, void), elem_size_lg2);
242 arr->size = init_size;
246 /* Resizes the capacity of the array to be at least min_size. */
247 bool _upb_array_realloc(upb_array *arr, size_t min_size, upb_arena *arena);
249 /* Fallback functions for when the accessors require a resize. */
250 void *_upb_array_resize_fallback(upb_array **arr_ptr, size_t size,
251 int elem_size_lg2, upb_arena *arena);
252 bool _upb_array_append_fallback(upb_array **arr_ptr, const void *value,
253 int elem_size_lg2, upb_arena *arena);
255 UPB_INLINE bool _upb_array_reserve(upb_array *arr, size_t size,
257 if (arr->size < size) return _upb_array_realloc(arr, size, arena);
261 UPB_INLINE bool _upb_array_resize(upb_array *arr, size_t size,
263 if (!_upb_array_reserve(arr, size, arena)) return false;
268 UPB_INLINE const void *_upb_array_accessor(const void *msg, size_t ofs,
270 const upb_array *arr = *PTR_AT(msg, ofs, const upb_array*);
272 if (size) *size = arr->len;
273 return _upb_array_constptr(arr);
280 UPB_INLINE void *_upb_array_mutable_accessor(void *msg, size_t ofs,
282 upb_array *arr = *PTR_AT(msg, ofs, upb_array*);
284 if (size) *size = arr->len;
285 return _upb_array_ptr(arr);
292 UPB_INLINE void *_upb_array_resize_accessor2(void *msg, size_t ofs, size_t size,
295 upb_array **arr_ptr = PTR_AT(msg, ofs, upb_array *);
296 upb_array *arr = *arr_ptr;
297 if (!arr || arr->size < size) {
298 return _upb_array_resize_fallback(arr_ptr, size, elem_size_lg2, arena);
301 return _upb_array_ptr(arr);
304 UPB_INLINE bool _upb_array_append_accessor2(void *msg, size_t ofs,
308 upb_array **arr_ptr = PTR_AT(msg, ofs, upb_array *);
309 size_t elem_size = 1 << elem_size_lg2;
310 upb_array *arr = *arr_ptr;
312 if (!arr || arr->len == arr->size) {
313 return _upb_array_append_fallback(arr_ptr, value, elem_size_lg2, arena);
315 ptr = _upb_array_ptr(arr);
316 memcpy(PTR_AT(ptr, arr->len * elem_size, char), value, elem_size);
321 /* Used by old generated code, remove once all code has been regenerated. */
322 UPB_INLINE int _upb_sizelg2(upb_fieldtype_t type) {
328 case UPB_TYPE_UINT32:
331 case UPB_TYPE_MESSAGE:
332 return UPB_SIZE(2, 3);
333 case UPB_TYPE_DOUBLE:
335 case UPB_TYPE_UINT64:
337 case UPB_TYPE_STRING:
339 return UPB_SIZE(3, 4);
343 UPB_INLINE void *_upb_array_resize_accessor(void *msg, size_t ofs, size_t size,
344 upb_fieldtype_t type,
346 return _upb_array_resize_accessor2(msg, ofs, size, _upb_sizelg2(type), arena);
348 UPB_INLINE bool _upb_array_append_accessor(void *msg, size_t ofs,
349 size_t elem_size, upb_fieldtype_t type,
353 return _upb_array_append_accessor2(msg, ofs, _upb_sizelg2(type), value,
357 /** upb_map *******************************************************************/
359 /* Right now we use strmaps for everything. We'll likely want to use
360 * integer-specific maps for integer-keyed maps.*/
362 /* Size of key and val, based on the map type. Strings are represented as '0'
363 * because they must be handled specially. */
370 /* Map entries aren't actually stored, they are only used during parsing. For
371 * parsing, it helps a lot if all map entry messages have the same layout.
372 * The compiler and def.c must ensure that all map entries have this layout. */
374 upb_msg_internal internal;
376 upb_strview str; /* For str/bytes. */
377 upb_value val; /* For all other types. */
380 upb_strview str; /* For str/bytes. */
381 upb_value val; /* For all other types. */
385 /* Creates a new map on the given arena with this key/value type. */
386 upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size);
388 /* Converting between internal table representation and user values.
390 * _upb_map_tokey() and _upb_map_fromkey() are inverses.
391 * _upb_map_tovalue() and _upb_map_fromvalue() are inverses.
393 * These functions account for the fact that strings are treated differently
394 * from other types when stored in a map.
397 UPB_INLINE upb_strview _upb_map_tokey(const void *key, size_t size) {
398 if (size == UPB_MAPTYPE_STRING) {
399 return *(upb_strview*)key;
401 return upb_strview_make((const char*)key, size);
405 UPB_INLINE void _upb_map_fromkey(upb_strview key, void* out, size_t size) {
406 if (size == UPB_MAPTYPE_STRING) {
407 memcpy(out, &key, sizeof(key));
409 memcpy(out, key.data, size);
413 UPB_INLINE bool _upb_map_tovalue(const void *val, size_t size, upb_value *msgval,
415 if (size == UPB_MAPTYPE_STRING) {
416 upb_strview *strp = (upb_strview*)upb_arena_malloc(a, sizeof(*strp));
417 if (!strp) return false;
418 *strp = *(upb_strview*)val;
419 *msgval = upb_value_ptr(strp);
421 memcpy(msgval, val, size);
426 UPB_INLINE void _upb_map_fromvalue(upb_value val, void* out, size_t size) {
427 if (size == UPB_MAPTYPE_STRING) {
428 const upb_strview *strp = (const upb_strview*)upb_value_getptr(val);
429 memcpy(out, strp, sizeof(upb_strview));
431 memcpy(out, &val, size);
435 /* Map operations, shared by reflection and generated code. */
437 UPB_INLINE size_t _upb_map_size(const upb_map *map) {
438 return map->table.t.count;
441 UPB_INLINE bool _upb_map_get(const upb_map *map, const void *key,
442 size_t key_size, void *val, size_t val_size) {
444 upb_strview k = _upb_map_tokey(key, key_size);
445 bool ret = upb_strtable_lookup2(&map->table, k.data, k.size, &tabval);
447 _upb_map_fromvalue(tabval, val, val_size);
452 UPB_INLINE void* _upb_map_next(const upb_map *map, size_t *iter) {
453 upb_strtable_iter it;
456 upb_strtable_next(&it);
458 if (upb_strtable_done(&it)) return NULL;
459 return (void*)str_tabent(&it);
462 UPB_INLINE bool _upb_map_set(upb_map *map, const void *key, size_t key_size,
463 void *val, size_t val_size, upb_arena *arena) {
464 upb_strview strkey = _upb_map_tokey(key, key_size);
465 upb_value tabval = {0};
466 if (!_upb_map_tovalue(val, val_size, &tabval, arena)) return false;
467 upb_alloc *a = upb_arena_alloc(arena);
469 /* TODO(haberman): add overwrite operation to minimize number of lookups. */
470 upb_strtable_remove3(&map->table, strkey.data, strkey.size, NULL, a);
471 return upb_strtable_insert3(&map->table, strkey.data, strkey.size, tabval, a);
474 UPB_INLINE bool _upb_map_delete(upb_map *map, const void *key, size_t key_size) {
475 upb_strview k = _upb_map_tokey(key, key_size);
476 return upb_strtable_remove3(&map->table, k.data, k.size, NULL, NULL);
479 UPB_INLINE void _upb_map_clear(upb_map *map) {
480 upb_strtable_clear(&map->table);
483 /* Message map operations, these get the map from the message first. */
485 UPB_INLINE size_t _upb_msg_map_size(const upb_msg *msg, size_t ofs) {
486 upb_map *map = *UPB_PTR_AT(msg, ofs, upb_map *);
487 return map ? _upb_map_size(map) : 0;
490 UPB_INLINE bool _upb_msg_map_get(const upb_msg *msg, size_t ofs,
491 const void *key, size_t key_size, void *val,
493 upb_map *map = *UPB_PTR_AT(msg, ofs, upb_map *);
494 if (!map) return false;
495 return _upb_map_get(map, key, key_size, val, val_size);
498 UPB_INLINE void *_upb_msg_map_next(const upb_msg *msg, size_t ofs,
500 upb_map *map = *UPB_PTR_AT(msg, ofs, upb_map *);
501 if (!map) return NULL;
502 return _upb_map_next(map, iter);
505 UPB_INLINE bool _upb_msg_map_set(upb_msg *msg, size_t ofs, const void *key,
506 size_t key_size, void *val, size_t val_size,
508 upb_map **map = PTR_AT(msg, ofs, upb_map *);
510 *map = _upb_map_new(arena, key_size, val_size);
512 return _upb_map_set(*map, key, key_size, val, val_size, arena);
515 UPB_INLINE bool _upb_msg_map_delete(upb_msg *msg, size_t ofs, const void *key,
517 upb_map *map = *UPB_PTR_AT(msg, ofs, upb_map *);
518 if (!map) return false;
519 return _upb_map_delete(map, key, key_size);
522 UPB_INLINE void _upb_msg_map_clear(upb_msg *msg, size_t ofs) {
523 upb_map *map = *UPB_PTR_AT(msg, ofs, upb_map *);
528 /* Accessing map key/value from a pointer, used by generated code only. */
530 UPB_INLINE void _upb_msg_map_key(const void* msg, void* key, size_t size) {
531 const upb_tabent *ent = (const upb_tabent*)msg;
534 k.data = upb_tabstr(ent->key, &u32len);
536 _upb_map_fromkey(k, key, size);
539 UPB_INLINE void _upb_msg_map_value(const void* msg, void* val, size_t size) {
540 const upb_tabent *ent = (const upb_tabent*)msg;
542 _upb_value_setval(&v, ent->val.val);
543 _upb_map_fromvalue(v, val, size);
546 UPB_INLINE void _upb_msg_map_set_value(void* msg, const void* val, size_t size) {
547 upb_tabent *ent = (upb_tabent*)msg;
548 /* This is like _upb_map_tovalue() except the entry already exists so we can
549 * reuse the allocated upb_strview for string fields. */
550 if (size == UPB_MAPTYPE_STRING) {
551 upb_strview *strp = (upb_strview*)(uintptr_t)ent->val.val;
552 memcpy(strp, val, sizeof(*strp));
554 memcpy(&ent->val.val, val, size);
558 /** _upb_mapsorter *************************************************************/
560 /* _upb_mapsorter sorts maps and provides ordered iteration over the entries.
561 * Since maps can be recursive (map values can be messages which contain other maps).
562 * _upb_mapsorter can contain a stack of maps. */
565 upb_tabent const**entries;
576 UPB_INLINE void _upb_mapsorter_init(_upb_mapsorter *s) {
582 UPB_INLINE void _upb_mapsorter_destroy(_upb_mapsorter *s) {
583 if (s->entries) free(s->entries);
586 bool _upb_mapsorter_pushmap(_upb_mapsorter *s, upb_descriptortype_t key_type,
587 const upb_map *map, _upb_sortedmap *sorted);
589 UPB_INLINE void _upb_mapsorter_popmap(_upb_mapsorter *s, _upb_sortedmap *sorted) {
590 s->size = sorted->start;
593 UPB_INLINE bool _upb_sortedmap_next(_upb_mapsorter *s, const upb_map *map,
594 _upb_sortedmap *sorted,
595 upb_map_entry *ent) {
596 if (sorted->pos == sorted->end) return false;
597 const upb_tabent *tabent = s->entries[sorted->pos++];
598 upb_strview key = upb_tabstrview(tabent->key);
599 _upb_map_fromkey(key, &ent->k, map->key_size);
600 upb_value val = {tabent->val.val};
601 _upb_map_fromvalue(val, &ent->v, map->val_size);
611 #include "upb/port_undef.inc"
613 #endif /* UPB_MSG_H_ */