2 * Copyright (c) 2009-2021, Google LLC
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Google LLC nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 #include "google/protobuf/descriptor.upb.h"
37 #include "upb/reflection.h"
40 #include "upb/port_def.inc"
44 char str[1]; /* Null-terminated string data follows. */
48 const upb_filedef *file;
49 const upb_msgdef *msgdef;
50 const char *full_name;
51 const char *json_name;
60 const upb_oneofdef *oneof;
62 const upb_msgdef *msgdef;
63 const upb_enumdef *enumdef;
64 const google_protobuf_FieldDescriptorProto *unresolved;
68 uint16_t layout_index;
72 bool proto3_optional_;
73 upb_descriptortype_t type_;
78 const upb_msglayout *layout;
79 const upb_filedef *file;
80 const char *full_name;
82 /* Tables for looking up fields by number and name. */
86 const upb_fielddef *fields;
87 const upb_oneofdef *oneofs;
92 /* Is this a map-entry message? */
94 upb_wellknowntype_t well_known_type;
96 /* TODO(haberman): proper extension ranges (there can be multiple). */
100 const upb_filedef *file;
101 const char *full_name;
107 struct upb_oneofdef {
108 const upb_msgdef *parent;
109 const char *full_name;
112 const upb_fielddef **fields;
120 const char *phpprefix;
121 const char *phpnamespace;
123 const upb_filedef **deps;
124 const upb_msgdef *msgs;
125 const upb_enumdef *enums;
126 const upb_fielddef *exts;
127 const upb_symtab *symtab;
138 upb_strtable syms; /* full_name -> packed def ptr */
139 upb_strtable files; /* file_name -> upb_filedef* */
143 /* Inside a symtab we store tagged pointers to specific def types. */
145 UPB_DEFTYPE_FIELD = 0,
147 /* Only inside symtab table. */
149 UPB_DEFTYPE_ENUM = 2,
151 /* Only inside message table. */
152 UPB_DEFTYPE_ONEOF = 1,
153 UPB_DEFTYPE_FIELD_JSONNAME = 2
156 static const void *unpack_def(upb_value v, upb_deftype_t type) {
157 uintptr_t num = (uintptr_t)upb_value_getconstptr(v);
158 return (num & 3) == type ? (const void*)(num & ~3) : NULL;
161 static upb_value pack_def(const void *ptr, upb_deftype_t type) {
162 uintptr_t num = (uintptr_t)ptr | type;
163 return upb_value_constptr((const void*)num);
166 /* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
167 static bool upb_isbetween(char c, char low, char high) {
168 return c >= low && c <= high;
171 static bool upb_isletter(char c) {
172 return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
175 static bool upb_isalphanum(char c) {
176 return upb_isletter(c) || upb_isbetween(c, '0', '9');
179 static const char *shortdefname(const char *fullname) {
182 if (fullname == NULL) {
184 } else if ((p = strrchr(fullname, '.')) == NULL) {
185 /* No '.' in the name, return the full string. */
188 /* Return one past the last '.'. */
193 /* All submessage fields are lower than all other fields.
194 * Secondly, fields are increasing in order. */
195 uint32_t field_rank(const upb_fielddef *f) {
196 uint32_t ret = upb_fielddef_number(f);
197 const uint32_t high_bit = 1 << 30;
198 UPB_ASSERT(ret < high_bit);
199 if (!upb_fielddef_issubmsg(f))
204 int cmp_fields(const void *p1, const void *p2) {
205 const upb_fielddef *f1 = *(upb_fielddef*const*)p1;
206 const upb_fielddef *f2 = *(upb_fielddef*const*)p2;
207 return field_rank(f1) - field_rank(f2);
210 static void upb_status_setoom(upb_status *status) {
211 upb_status_seterrmsg(status, "out of memory");
214 static void assign_msg_wellknowntype(upb_msgdef *m) {
215 const char *name = upb_msgdef_fullname(m);
217 m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
220 if (!strcmp(name, "google.protobuf.Any")) {
221 m->well_known_type = UPB_WELLKNOWN_ANY;
222 } else if (!strcmp(name, "google.protobuf.FieldMask")) {
223 m->well_known_type = UPB_WELLKNOWN_FIELDMASK;
224 } else if (!strcmp(name, "google.protobuf.Duration")) {
225 m->well_known_type = UPB_WELLKNOWN_DURATION;
226 } else if (!strcmp(name, "google.protobuf.Timestamp")) {
227 m->well_known_type = UPB_WELLKNOWN_TIMESTAMP;
228 } else if (!strcmp(name, "google.protobuf.DoubleValue")) {
229 m->well_known_type = UPB_WELLKNOWN_DOUBLEVALUE;
230 } else if (!strcmp(name, "google.protobuf.FloatValue")) {
231 m->well_known_type = UPB_WELLKNOWN_FLOATVALUE;
232 } else if (!strcmp(name, "google.protobuf.Int64Value")) {
233 m->well_known_type = UPB_WELLKNOWN_INT64VALUE;
234 } else if (!strcmp(name, "google.protobuf.UInt64Value")) {
235 m->well_known_type = UPB_WELLKNOWN_UINT64VALUE;
236 } else if (!strcmp(name, "google.protobuf.Int32Value")) {
237 m->well_known_type = UPB_WELLKNOWN_INT32VALUE;
238 } else if (!strcmp(name, "google.protobuf.UInt32Value")) {
239 m->well_known_type = UPB_WELLKNOWN_UINT32VALUE;
240 } else if (!strcmp(name, "google.protobuf.BoolValue")) {
241 m->well_known_type = UPB_WELLKNOWN_BOOLVALUE;
242 } else if (!strcmp(name, "google.protobuf.StringValue")) {
243 m->well_known_type = UPB_WELLKNOWN_STRINGVALUE;
244 } else if (!strcmp(name, "google.protobuf.BytesValue")) {
245 m->well_known_type = UPB_WELLKNOWN_BYTESVALUE;
246 } else if (!strcmp(name, "google.protobuf.Value")) {
247 m->well_known_type = UPB_WELLKNOWN_VALUE;
248 } else if (!strcmp(name, "google.protobuf.ListValue")) {
249 m->well_known_type = UPB_WELLKNOWN_LISTVALUE;
250 } else if (!strcmp(name, "google.protobuf.Struct")) {
251 m->well_known_type = UPB_WELLKNOWN_STRUCT;
253 m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
258 /* upb_enumdef ****************************************************************/
260 const char *upb_enumdef_fullname(const upb_enumdef *e) {
264 const char *upb_enumdef_name(const upb_enumdef *e) {
265 return shortdefname(e->full_name);
268 const upb_filedef *upb_enumdef_file(const upb_enumdef *e) {
272 int32_t upb_enumdef_default(const upb_enumdef *e) {
273 UPB_ASSERT(upb_enumdef_iton(e, e->defaultval));
274 return e->defaultval;
277 int upb_enumdef_numvals(const upb_enumdef *e) {
278 return (int)upb_strtable_count(&e->ntoi);
281 void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
282 /* We iterate over the ntoi table, to account for duplicate numbers. */
283 upb_strtable_begin(i, &e->ntoi);
286 void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
287 bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
289 bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
290 size_t len, int32_t *num) {
292 if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
295 if (num) *num = upb_value_getint32(v);
299 const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
301 return upb_inttable_lookup(&def->iton, num, &v) ? upb_value_getcstr(v) : NULL;
304 const char *upb_enum_iter_name(upb_enum_iter *iter) {
305 return upb_strtable_iter_key(iter).data;
308 int32_t upb_enum_iter_number(upb_enum_iter *iter) {
309 return upb_value_getint32(upb_strtable_iter_value(iter));
313 /* upb_fielddef ***************************************************************/
315 const char *upb_fielddef_fullname(const upb_fielddef *f) {
319 upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
321 case UPB_DESCRIPTOR_TYPE_DOUBLE:
322 return UPB_TYPE_DOUBLE;
323 case UPB_DESCRIPTOR_TYPE_FLOAT:
324 return UPB_TYPE_FLOAT;
325 case UPB_DESCRIPTOR_TYPE_INT64:
326 case UPB_DESCRIPTOR_TYPE_SINT64:
327 case UPB_DESCRIPTOR_TYPE_SFIXED64:
328 return UPB_TYPE_INT64;
329 case UPB_DESCRIPTOR_TYPE_INT32:
330 case UPB_DESCRIPTOR_TYPE_SFIXED32:
331 case UPB_DESCRIPTOR_TYPE_SINT32:
332 return UPB_TYPE_INT32;
333 case UPB_DESCRIPTOR_TYPE_UINT64:
334 case UPB_DESCRIPTOR_TYPE_FIXED64:
335 return UPB_TYPE_UINT64;
336 case UPB_DESCRIPTOR_TYPE_UINT32:
337 case UPB_DESCRIPTOR_TYPE_FIXED32:
338 return UPB_TYPE_UINT32;
339 case UPB_DESCRIPTOR_TYPE_ENUM:
340 return UPB_TYPE_ENUM;
341 case UPB_DESCRIPTOR_TYPE_BOOL:
342 return UPB_TYPE_BOOL;
343 case UPB_DESCRIPTOR_TYPE_STRING:
344 return UPB_TYPE_STRING;
345 case UPB_DESCRIPTOR_TYPE_BYTES:
346 return UPB_TYPE_BYTES;
347 case UPB_DESCRIPTOR_TYPE_GROUP:
348 case UPB_DESCRIPTOR_TYPE_MESSAGE:
349 return UPB_TYPE_MESSAGE;
354 upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) {
358 uint32_t upb_fielddef_index(const upb_fielddef *f) {
362 upb_label_t upb_fielddef_label(const upb_fielddef *f) {
366 uint32_t upb_fielddef_number(const upb_fielddef *f) {
370 bool upb_fielddef_isextension(const upb_fielddef *f) {
371 return f->is_extension_;
374 bool upb_fielddef_lazy(const upb_fielddef *f) {
378 bool upb_fielddef_packed(const upb_fielddef *f) {
382 const char *upb_fielddef_name(const upb_fielddef *f) {
383 return shortdefname(f->full_name);
386 const char *upb_fielddef_jsonname(const upb_fielddef *f) {
390 const upb_filedef *upb_fielddef_file(const upb_fielddef *f) {
394 const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
398 const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
402 const upb_oneofdef *upb_fielddef_realcontainingoneof(const upb_fielddef *f) {
403 if (!f->oneof || upb_oneofdef_issynthetic(f->oneof)) return NULL;
407 upb_msgval upb_fielddef_default(const upb_fielddef *f) {
408 UPB_ASSERT(!upb_fielddef_issubmsg(f));
410 if (upb_fielddef_isstring(f)) {
411 str_t *str = f->defaultval.str;
413 ret.str_val.data = str->str;
414 ret.str_val.size = str->len;
416 ret.str_val.size = 0;
419 memcpy(&ret, &f->defaultval, 8);
424 static void chkdefaulttype(const upb_fielddef *f, int ctype) {
429 int64_t upb_fielddef_defaultint64(const upb_fielddef *f) {
430 chkdefaulttype(f, UPB_TYPE_INT64);
431 return f->defaultval.sint;
434 int32_t upb_fielddef_defaultint32(const upb_fielddef *f) {
435 chkdefaulttype(f, UPB_TYPE_INT32);
436 return (int32_t)f->defaultval.sint;
439 uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) {
440 chkdefaulttype(f, UPB_TYPE_UINT64);
441 return f->defaultval.uint;
444 uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) {
445 chkdefaulttype(f, UPB_TYPE_UINT32);
446 return (uint32_t)f->defaultval.uint;
449 bool upb_fielddef_defaultbool(const upb_fielddef *f) {
450 chkdefaulttype(f, UPB_TYPE_BOOL);
451 return f->defaultval.boolean;
454 float upb_fielddef_defaultfloat(const upb_fielddef *f) {
455 chkdefaulttype(f, UPB_TYPE_FLOAT);
456 return f->defaultval.flt;
459 double upb_fielddef_defaultdouble(const upb_fielddef *f) {
460 chkdefaulttype(f, UPB_TYPE_DOUBLE);
461 return f->defaultval.dbl;
464 const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
465 str_t *str = f->defaultval.str;
466 UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_STRING ||
467 upb_fielddef_type(f) == UPB_TYPE_BYTES ||
468 upb_fielddef_type(f) == UPB_TYPE_ENUM);
470 if (len) *len = str->len;
478 const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) {
479 return upb_fielddef_type(f) == UPB_TYPE_MESSAGE ? f->sub.msgdef : NULL;
482 const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) {
483 return upb_fielddef_type(f) == UPB_TYPE_ENUM ? f->sub.enumdef : NULL;
486 const upb_msglayout_field *upb_fielddef_layout(const upb_fielddef *f) {
487 return &f->msgdef->layout->fields[f->layout_index];
490 bool upb_fielddef_issubmsg(const upb_fielddef *f) {
491 return upb_fielddef_type(f) == UPB_TYPE_MESSAGE;
494 bool upb_fielddef_isstring(const upb_fielddef *f) {
495 return upb_fielddef_type(f) == UPB_TYPE_STRING ||
496 upb_fielddef_type(f) == UPB_TYPE_BYTES;
499 bool upb_fielddef_isseq(const upb_fielddef *f) {
500 return upb_fielddef_label(f) == UPB_LABEL_REPEATED;
503 bool upb_fielddef_isprimitive(const upb_fielddef *f) {
504 return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
507 bool upb_fielddef_ismap(const upb_fielddef *f) {
508 return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
509 upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
512 bool upb_fielddef_hassubdef(const upb_fielddef *f) {
513 return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
516 bool upb_fielddef_haspresence(const upb_fielddef *f) {
517 if (upb_fielddef_isseq(f)) return false;
518 return upb_fielddef_issubmsg(f) || upb_fielddef_containingoneof(f) ||
519 f->file->syntax == UPB_SYNTAX_PROTO2;
522 static bool between(int32_t x, int32_t low, int32_t high) {
523 return x >= low && x <= high;
526 bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); }
527 bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); }
528 bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
530 bool upb_fielddef_checkdescriptortype(int32_t type) {
531 return between(type, 1, 18);
534 /* upb_msgdef *****************************************************************/
536 const char *upb_msgdef_fullname(const upb_msgdef *m) {
540 const upb_filedef *upb_msgdef_file(const upb_msgdef *m) {
544 const char *upb_msgdef_name(const upb_msgdef *m) {
545 return shortdefname(m->full_name);
548 upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) {
549 return m->file->syntax;
552 const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
554 return upb_inttable_lookup(&m->itof, i, &val) ? upb_value_getconstptr(val)
558 const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
562 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
566 return unpack_def(val, UPB_DEFTYPE_FIELD);
569 const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
573 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
577 return unpack_def(val, UPB_DEFTYPE_ONEOF);
580 bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len,
581 const upb_fielddef **f, const upb_oneofdef **o) {
584 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
588 *o = unpack_def(val, UPB_DEFTYPE_ONEOF);
589 *f = unpack_def(val, UPB_DEFTYPE_FIELD);
590 return *o || *f; /* False if this was a JSON name. */
593 const upb_fielddef *upb_msgdef_lookupjsonname(const upb_msgdef *m,
594 const char *name, size_t len) {
596 const upb_fielddef* f;
598 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
602 f = unpack_def(val, UPB_DEFTYPE_FIELD);
603 if (!f) f = unpack_def(val, UPB_DEFTYPE_FIELD_JSONNAME);
608 int upb_msgdef_numfields(const upb_msgdef *m) {
609 return m->field_count;
612 int upb_msgdef_numoneofs(const upb_msgdef *m) {
613 return m->oneof_count;
616 int upb_msgdef_numrealoneofs(const upb_msgdef *m) {
617 return m->real_oneof_count;
620 int upb_msgdef_fieldcount(const upb_msgdef *m) {
621 return m->field_count;
624 int upb_msgdef_oneofcount(const upb_msgdef *m) {
625 return m->oneof_count;
628 int upb_msgdef_realoneofcount(const upb_msgdef *m) {
629 return m->real_oneof_count;
632 const upb_msglayout *upb_msgdef_layout(const upb_msgdef *m) {
636 const upb_fielddef *upb_msgdef_field(const upb_msgdef *m, int i) {
637 UPB_ASSERT(i >= 0 && i < m->field_count);
638 return &m->fields[i];
641 const upb_oneofdef *upb_msgdef_oneof(const upb_msgdef *m, int i) {
642 UPB_ASSERT(i >= 0 && i < m->oneof_count);
643 return &m->oneofs[i];
646 bool upb_msgdef_mapentry(const upb_msgdef *m) {
650 upb_wellknowntype_t upb_msgdef_wellknowntype(const upb_msgdef *m) {
651 return m->well_known_type;
654 bool upb_msgdef_isnumberwrapper(const upb_msgdef *m) {
655 upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
656 return type >= UPB_WELLKNOWN_DOUBLEVALUE &&
657 type <= UPB_WELLKNOWN_UINT32VALUE;
660 bool upb_msgdef_iswrapper(const upb_msgdef *m) {
661 upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
662 return type >= UPB_WELLKNOWN_DOUBLEVALUE &&
663 type <= UPB_WELLKNOWN_BOOLVALUE;
666 void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
667 upb_inttable_begin(iter, &m->itof);
670 void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
672 bool upb_msg_field_done(const upb_msg_field_iter *iter) {
673 return upb_inttable_done(iter);
676 upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
677 return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter));
680 void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
681 upb_inttable_iter_setdone(iter);
684 bool upb_msg_field_iter_isequal(const upb_msg_field_iter * iter1,
685 const upb_msg_field_iter * iter2) {
686 return upb_inttable_iter_isequal(iter1, iter2);
689 void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
690 upb_strtable_begin(iter, &m->ntof);
691 /* We need to skip past any initial fields. */
692 while (!upb_strtable_done(iter) &&
693 !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF)) {
694 upb_strtable_next(iter);
698 void upb_msg_oneof_next(upb_msg_oneof_iter *iter) {
699 /* We need to skip past fields to return only oneofs. */
701 upb_strtable_next(iter);
702 } while (!upb_strtable_done(iter) &&
703 !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF));
706 bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
707 return upb_strtable_done(iter);
710 const upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
711 return unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF);
714 void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
715 upb_strtable_iter_setdone(iter);
718 bool upb_msg_oneof_iter_isequal(const upb_msg_oneof_iter *iter1,
719 const upb_msg_oneof_iter *iter2) {
720 return upb_strtable_iter_isequal(iter1, iter2);
723 /* upb_oneofdef ***************************************************************/
725 const char *upb_oneofdef_name(const upb_oneofdef *o) {
726 return shortdefname(o->full_name);
729 const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
733 int upb_oneofdef_fieldcount(const upb_oneofdef *o) {
734 return o->field_count;
737 const upb_fielddef *upb_oneofdef_field(const upb_oneofdef *o, int i) {
738 UPB_ASSERT(i < o->field_count);
742 int upb_oneofdef_numfields(const upb_oneofdef *o) {
743 return o->field_count;
746 uint32_t upb_oneofdef_index(const upb_oneofdef *o) {
747 return o - o->parent->oneofs;
750 bool upb_oneofdef_issynthetic(const upb_oneofdef *o) {
754 const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
755 const char *name, size_t length) {
757 return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
758 upb_value_getptr(val) : NULL;
761 const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
763 return upb_inttable_lookup(&o->itof, num, &val) ? upb_value_getptr(val)
767 void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
768 upb_inttable_begin(iter, &o->itof);
771 void upb_oneof_next(upb_oneof_iter *iter) {
772 upb_inttable_next(iter);
775 bool upb_oneof_done(upb_oneof_iter *iter) {
776 return upb_inttable_done(iter);
779 upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
780 return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter));
783 void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
784 upb_inttable_iter_setdone(iter);
787 /* upb_filedef ****************************************************************/
789 const char *upb_filedef_name(const upb_filedef *f) {
793 const char *upb_filedef_package(const upb_filedef *f) {
797 const char *upb_filedef_phpprefix(const upb_filedef *f) {
801 const char *upb_filedef_phpnamespace(const upb_filedef *f) {
802 return f->phpnamespace;
805 upb_syntax_t upb_filedef_syntax(const upb_filedef *f) {
809 int upb_filedef_msgcount(const upb_filedef *f) {
813 int upb_filedef_depcount(const upb_filedef *f) {
817 int upb_filedef_enumcount(const upb_filedef *f) {
818 return f->enum_count;
821 const upb_filedef *upb_filedef_dep(const upb_filedef *f, int i) {
822 return i < 0 || i >= f->dep_count ? NULL : f->deps[i];
825 const upb_msgdef *upb_filedef_msg(const upb_filedef *f, int i) {
826 return i < 0 || i >= f->msg_count ? NULL : &f->msgs[i];
829 const upb_enumdef *upb_filedef_enum(const upb_filedef *f, int i) {
830 return i < 0 || i >= f->enum_count ? NULL : &f->enums[i];
833 const upb_symtab *upb_filedef_symtab(const upb_filedef *f) {
837 void upb_symtab_free(upb_symtab *s) {
838 upb_arena_free(s->arena);
842 upb_symtab *upb_symtab_new(void) {
843 upb_symtab *s = upb_gmalloc(sizeof(*s));
849 s->arena = upb_arena_new();
852 if (!upb_strtable_init(&s->syms, 32, s->arena) ||
853 !upb_strtable_init(&s->files, 4, s->arena)) {
854 upb_arena_free(s->arena);
861 const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
863 return upb_strtable_lookup(&s->syms, sym, &v) ?
864 unpack_def(v, UPB_DEFTYPE_MSG) : NULL;
867 const upb_msgdef *upb_symtab_lookupmsg2(const upb_symtab *s, const char *sym,
870 return upb_strtable_lookup2(&s->syms, sym, len, &v) ?
871 unpack_def(v, UPB_DEFTYPE_MSG) : NULL;
874 const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
876 return upb_strtable_lookup(&s->syms, sym, &v) ?
877 unpack_def(v, UPB_DEFTYPE_ENUM) : NULL;
880 const upb_filedef *upb_symtab_lookupfile(const upb_symtab *s, const char *name) {
882 return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v)
886 const upb_filedef *upb_symtab_lookupfile2(
887 const upb_symtab *s, const char *name, size_t len) {
889 return upb_strtable_lookup2(&s->files, name, len, &v) ?
890 upb_value_getconstptr(v) : NULL;
893 int upb_symtab_filecount(const upb_symtab *s) {
894 return (int)upb_strtable_count(&s->files);
897 /* Code to build defs from descriptor protos. *********************************/
899 /* There is a question of how much validation to do here. It will be difficult
900 * to perfectly match the amount of validation performed by proto2. But since
901 * this code is used to directly build defs from Ruby (for example) we do need
902 * to validate important constraints like uniqueness of names and numbers. */
904 #define CHK_OOM(x) if (!(x)) { symtab_oomerr(ctx); }
908 upb_filedef *file; /* File we are building. */
909 upb_arena *arena; /* Allocate defs here. */
910 const upb_msglayout **layouts; /* NULL if we should build layouts. */
911 upb_status *status; /* Record errors here. */
912 jmp_buf err; /* longjmp() on error. */
915 UPB_NORETURN UPB_NOINLINE UPB_PRINTF(2, 3)
916 static void symtab_errf(symtab_addctx *ctx, const char *fmt, ...) {
919 upb_status_vseterrf(ctx->status, fmt, argp);
921 UPB_LONGJMP(ctx->err, 1);
924 UPB_NORETURN UPB_NOINLINE
925 static void symtab_oomerr(symtab_addctx *ctx) {
926 upb_status_setoom(ctx->status);
927 UPB_LONGJMP(ctx->err, 1);
930 void *symtab_alloc(symtab_addctx *ctx, size_t bytes) {
931 void *ret = upb_arena_malloc(ctx->arena, bytes);
932 if (!ret) symtab_oomerr(ctx);
936 static void check_ident(symtab_addctx *ctx, upb_strview name, bool full) {
937 const char *str = name.data;
938 size_t len = name.size;
941 for (i = 0; i < len; i++) {
944 if (start || !full) {
945 symtab_errf(ctx, "invalid name: unexpected '.' (%.*s)", (int)len, str);
949 if (!upb_isletter(c)) {
952 "invalid name: path components must start with a letter (%.*s)",
957 if (!upb_isalphanum(c)) {
958 symtab_errf(ctx, "invalid name: non-alphanumeric character (%.*s)",
964 symtab_errf(ctx, "invalid name: empty part (%.*s)", (int)len, str);
968 static size_t div_round_up(size_t n, size_t d) {
969 return (n + d - 1) / d;
972 static size_t upb_msgval_sizeof(upb_fieldtype_t type) {
974 case UPB_TYPE_DOUBLE:
976 case UPB_TYPE_UINT64:
980 case UPB_TYPE_UINT32:
985 case UPB_TYPE_MESSAGE:
986 return sizeof(void*);
988 case UPB_TYPE_STRING:
989 return sizeof(upb_strview);
994 static uint8_t upb_msg_fielddefsize(const upb_fielddef *f) {
995 if (upb_msgdef_mapentry(upb_fielddef_containingtype(f))) {
997 UPB_ASSERT(sizeof(ent.k) == sizeof(ent.v));
998 return sizeof(ent.k);
999 } else if (upb_fielddef_isseq(f)) {
1000 return sizeof(void*);
1002 return upb_msgval_sizeof(upb_fielddef_type(f));
1006 static uint32_t upb_msglayout_place(upb_msglayout *l, size_t size) {
1009 l->size = UPB_ALIGN_UP(l->size, size);
1015 static int field_number_cmp(const void *p1, const void *p2) {
1016 const upb_msglayout_field *f1 = p1;
1017 const upb_msglayout_field *f2 = p2;
1018 return f1->number - f2->number;
1021 static void assign_layout_indices(const upb_msgdef *m, upb_msglayout *l,
1022 upb_msglayout_field *fields) {
1024 int n = upb_msgdef_numfields(m);
1025 int dense_below = 0;
1026 for (i = 0; i < n; i++) {
1027 upb_fielddef *f = (upb_fielddef*)upb_msgdef_itof(m, fields[i].number);
1029 f->layout_index = i;
1030 if (i < UINT8_MAX && fields[i].number == i + 1 &&
1031 (i == 0 || fields[i-1].number == i)) {
1032 dense_below = i + 1;
1035 l->dense_below = dense_below;
1038 static void fill_fieldlayout(upb_msglayout_field *field, const upb_fielddef *f) {
1039 field->number = upb_fielddef_number(f);
1040 field->descriptortype = upb_fielddef_descriptortype(f);
1042 if (field->descriptortype == UPB_DTYPE_STRING &&
1043 f->file->syntax == UPB_SYNTAX_PROTO2) {
1044 /* See TableDescriptorType() in upbc/generator.cc for details and
1046 field->descriptortype = UPB_DTYPE_BYTES;
1049 if (upb_fielddef_ismap(f)) {
1050 field->mode = _UPB_MODE_MAP;
1051 } else if (upb_fielddef_isseq(f)) {
1052 field->mode = _UPB_MODE_ARRAY;
1054 field->mode = _UPB_MODE_SCALAR;
1057 if (upb_fielddef_packed(f)) {
1058 field->mode |= _UPB_MODE_IS_PACKED;
1062 /* This function is the dynamic equivalent of message_layout.{cc,h} in upbc.
1063 * It computes a dynamic layout for all of the fields in |m|. */
1064 static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) {
1065 upb_msglayout *l = (upb_msglayout*)m->layout;
1066 upb_msg_field_iter it;
1067 upb_msg_oneof_iter oit;
1069 size_t field_count = upb_msgdef_numfields(m);
1070 size_t submsg_count = 0;
1071 const upb_msglayout **submsgs;
1072 upb_msglayout_field *fields;
1074 memset(l, 0, sizeof(*l) + sizeof(_upb_fasttable_entry));
1076 /* Count sub-messages. */
1077 for (size_t i = 0; i < field_count; i++) {
1078 if (upb_fielddef_issubmsg(&m->fields[i])) {
1083 fields = symtab_alloc(ctx, field_count * sizeof(*fields));
1084 submsgs = symtab_alloc(ctx, submsg_count * sizeof(*submsgs));
1086 l->field_count = upb_msgdef_numfields(m);
1088 l->submsgs = submsgs;
1091 /* TODO(haberman): initialize fast tables so that reflection-based parsing
1092 * can get the same speeds as linked-in types. */
1093 l->fasttable[0].field_parser = &fastdecode_generic;
1094 l->fasttable[0].field_data = 0;
1096 if (upb_msgdef_mapentry(m)) {
1097 /* TODO(haberman): refactor this method so this special case is more
1099 const upb_fielddef *key = upb_msgdef_itof(m, 1);
1100 const upb_fielddef *val = upb_msgdef_itof(m, 2);
1101 fields[0].number = 1;
1102 fields[1].number = 2;
1103 fields[0].mode = _UPB_MODE_SCALAR;
1104 fields[1].mode = _UPB_MODE_SCALAR;
1105 fields[0].presence = 0;
1106 fields[1].presence = 0;
1107 fields[0].descriptortype = upb_fielddef_descriptortype(key);
1108 fields[1].descriptortype = upb_fielddef_descriptortype(val);
1109 fields[0].offset = 0;
1110 fields[1].offset = sizeof(upb_strview);
1111 fields[1].submsg_index = 0;
1113 if (upb_fielddef_type(val) == UPB_TYPE_MESSAGE) {
1114 submsgs[0] = upb_fielddef_msgsubdef(val)->layout;
1118 l->size = 2 * sizeof(upb_strview);
1119 l->size = UPB_ALIGN_UP(l->size, 8);
1123 /* Allocate data offsets in three stages:
1126 * 2. regular fields.
1129 * OPT: There is a lot of room for optimization here to minimize the size.
1132 /* Allocate hasbits and set basic field attributes. */
1134 for (upb_msg_field_begin(&it, m), hasbit = 0;
1135 !upb_msg_field_done(&it);
1136 upb_msg_field_next(&it)) {
1137 upb_fielddef* f = upb_msg_iter_field(&it);
1138 upb_msglayout_field *field = &fields[upb_fielddef_index(f)];
1140 fill_fieldlayout(field, f);
1142 if (upb_fielddef_issubmsg(f)) {
1143 const upb_msgdef *subm = upb_fielddef_msgsubdef(f);
1144 field->submsg_index = submsg_count++;
1145 submsgs[field->submsg_index] = subm->layout;
1148 if (upb_fielddef_haspresence(f) && !upb_fielddef_realcontainingoneof(f)) {
1149 /* We don't use hasbit 0, so that 0 can indicate "no presence" in the
1150 * table. This wastes one hasbit, but we don't worry about it for now. */
1151 field->presence = ++hasbit;
1153 field->presence = 0;
1157 /* Account for space used by hasbits. */
1158 l->size = div_round_up(hasbit, 8);
1160 /* Allocate non-oneof fields. */
1161 for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it);
1162 upb_msg_field_next(&it)) {
1163 const upb_fielddef* f = upb_msg_iter_field(&it);
1164 size_t field_size = upb_msg_fielddefsize(f);
1165 size_t index = upb_fielddef_index(f);
1167 if (upb_fielddef_realcontainingoneof(f)) {
1168 /* Oneofs are handled separately below. */
1172 fields[index].offset = upb_msglayout_place(l, field_size);
1175 /* Allocate oneof fields. Each oneof field consists of a uint32 for the case
1176 * and space for the actual data. */
1177 for (upb_msg_oneof_begin(&oit, m); !upb_msg_oneof_done(&oit);
1178 upb_msg_oneof_next(&oit)) {
1179 const upb_oneofdef* o = upb_msg_iter_oneof(&oit);
1182 size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */
1183 size_t field_size = 0;
1184 uint32_t case_offset;
1185 uint32_t data_offset;
1187 if (upb_oneofdef_issynthetic(o)) continue;
1189 /* Calculate field size: the max of all field sizes. */
1190 for (upb_oneof_begin(&fit, o);
1191 !upb_oneof_done(&fit);
1192 upb_oneof_next(&fit)) {
1193 const upb_fielddef* f = upb_oneof_iter_field(&fit);
1194 field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f));
1197 /* Align and allocate case offset. */
1198 case_offset = upb_msglayout_place(l, case_size);
1199 data_offset = upb_msglayout_place(l, field_size);
1201 for (upb_oneof_begin(&fit, o);
1202 !upb_oneof_done(&fit);
1203 upb_oneof_next(&fit)) {
1204 const upb_fielddef* f = upb_oneof_iter_field(&fit);
1205 fields[upb_fielddef_index(f)].offset = data_offset;
1206 fields[upb_fielddef_index(f)].presence = ~case_offset;
1210 /* Size of the entire structure should be a multiple of its greatest
1211 * alignment. TODO: track overall alignment for real? */
1212 l->size = UPB_ALIGN_UP(l->size, 8);
1214 /* Sort fields by number. */
1215 qsort(fields, upb_msgdef_numfields(m), sizeof(*fields), field_number_cmp);
1216 assign_layout_indices(m, l, fields);
1219 static char *strviewdup(symtab_addctx *ctx, upb_strview view) {
1220 return upb_strdup2(view.data, view.size, ctx->arena);
1223 static bool streql2(const char *a, size_t n, const char *b) {
1224 return n == strlen(b) && memcmp(a, b, n) == 0;
1227 static bool streql_view(upb_strview view, const char *b) {
1228 return streql2(view.data, view.size, b);
1231 static const char *makefullname(symtab_addctx *ctx, const char *prefix,
1234 /* ret = prefix + '.' + name; */
1235 size_t n = strlen(prefix);
1236 char *ret = symtab_alloc(ctx, n + name.size + 2);
1237 strcpy(ret, prefix);
1239 memcpy(&ret[n + 1], name.data, name.size);
1240 ret[n + 1 + name.size] = '\0';
1243 return strviewdup(ctx, name);
1247 static void finalize_oneofs(symtab_addctx *ctx, upb_msgdef *m) {
1249 int synthetic_count = 0;
1250 upb_oneofdef *mutable_oneofs = (upb_oneofdef*)m->oneofs;
1252 for (i = 0; i < m->oneof_count; i++) {
1253 upb_oneofdef *o = &mutable_oneofs[i];
1255 if (o->synthetic && o->field_count != 1) {
1256 symtab_errf(ctx, "Synthetic oneofs must have one field, not %d: %s",
1257 o->field_count, upb_oneofdef_name(o));
1262 } else if (synthetic_count != 0) {
1263 symtab_errf(ctx, "Synthetic oneofs must be after all other oneofs: %s",
1264 upb_oneofdef_name(o));
1267 o->fields = symtab_alloc(ctx, sizeof(upb_fielddef *) * o->field_count);
1271 for (i = 0; i < m->field_count; i++) {
1272 const upb_fielddef *f = &m->fields[i];
1273 upb_oneofdef *o = (upb_oneofdef*)f->oneof;
1275 o->fields[o->field_count++] = f;
1279 m->real_oneof_count = m->oneof_count - synthetic_count;
1282 size_t getjsonname(const char *name, char *buf, size_t len) {
1283 size_t src, dst = 0;
1284 bool ucase_next = false;
1286 #define WRITE(byte) \
1288 if (dst < len) buf[dst - 1] = byte; \
1289 else if (dst == len) buf[dst - 1] = '\0'
1296 /* Implement the transformation as described in the spec:
1297 * 1. upper case all letters after an underscore.
1298 * 2. remove all underscores.
1300 for (src = 0; name[src]; src++) {
1301 if (name[src] == '_') {
1307 WRITE(toupper(name[src]));
1320 static char* makejsonname(symtab_addctx *ctx, const char* name) {
1321 size_t size = getjsonname(name, NULL, 0);
1322 char* json_name = symtab_alloc(ctx, size);
1323 getjsonname(name, json_name, size);
1327 static void symtab_add(symtab_addctx *ctx, const char *name, upb_value v) {
1328 if (upb_strtable_lookup(&ctx->symtab->syms, name, NULL)) {
1329 symtab_errf(ctx, "duplicate symbol '%s'", name);
1331 size_t len = strlen(name);
1332 CHK_OOM(upb_strtable_insert(&ctx->symtab->syms, name, len, v,
1333 ctx->symtab->arena));
1336 /* Given a symbol and the base symbol inside which it is defined, find the
1337 * symbol's definition in t. */
1338 static const void *symtab_resolve(symtab_addctx *ctx, const upb_fielddef *f,
1339 const char *base, upb_strview sym,
1340 upb_deftype_t type) {
1341 const upb_strtable *t = &ctx->symtab->syms;
1342 if(sym.size == 0) goto notfound;
1343 if(sym.data[0] == '.') {
1344 /* Symbols starting with '.' are absolute, so we do a single lookup.
1345 * Slice to omit the leading '.' */
1347 if (!upb_strtable_lookup2(t, sym.data + 1, sym.size - 1, &v)) {
1351 const void *ret = unpack_def(v, type);
1353 symtab_errf(ctx, "type mismatch when resolving field %s, name %s",
1354 f->full_name, sym.data);
1358 /* Remove components from base until we find an entry or run out.
1359 * TODO: This branch is totally broken, but currently not used. */
1366 symtab_errf(ctx, "couldn't resolve name '" UPB_STRVIEW_FORMAT "'",
1367 UPB_STRVIEW_ARGS(sym));
1370 static void create_oneofdef(
1371 symtab_addctx *ctx, upb_msgdef *m,
1372 const google_protobuf_OneofDescriptorProto *oneof_proto) {
1374 upb_strview name = google_protobuf_OneofDescriptorProto_name(oneof_proto);
1377 o = (upb_oneofdef*)&m->oneofs[m->oneof_count++];
1379 o->full_name = makefullname(ctx, m->full_name, name);
1381 o->synthetic = false;
1383 v = pack_def(o, UPB_DEFTYPE_ONEOF);
1384 symtab_add(ctx, o->full_name, v);
1385 CHK_OOM(upb_strtable_insert(&m->ntof, name.data, name.size, v, ctx->arena));
1387 CHK_OOM(upb_inttable_init(&o->itof, ctx->arena));
1388 CHK_OOM(upb_strtable_init(&o->ntof, 4, ctx->arena));
1391 static str_t *newstr(symtab_addctx *ctx, const char *data, size_t len) {
1392 str_t *ret = symtab_alloc(ctx, sizeof(*ret) + len);
1393 if (!ret) return NULL;
1395 if (len) memcpy(ret->str, data, len);
1396 ret->str[len] = '\0';
1400 static void parse_default(symtab_addctx *ctx, const char *str, size_t len,
1406 switch (upb_fielddef_type(f)) {
1407 case UPB_TYPE_INT32:
1408 case UPB_TYPE_INT64:
1409 case UPB_TYPE_UINT32:
1410 case UPB_TYPE_UINT64:
1411 case UPB_TYPE_DOUBLE:
1412 case UPB_TYPE_FLOAT:
1413 /* Standard C number parsing functions expect null-terminated strings. */
1414 if (len >= sizeof(nullz) - 1) {
1415 symtab_errf(ctx, "Default too long: %.*s", (int)len, str);
1417 memcpy(nullz, str, len);
1425 switch (upb_fielddef_type(f)) {
1426 case UPB_TYPE_INT32: {
1427 long val = strtol(str, &end, 0);
1428 if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) {
1431 f->defaultval.sint = val;
1434 case UPB_TYPE_ENUM: {
1435 const upb_enumdef *e = f->sub.enumdef;
1437 if (!upb_enumdef_ntoi(e, str, len, &val)) {
1440 f->defaultval.sint = val;
1443 case UPB_TYPE_INT64: {
1444 long long val = strtoll(str, &end, 0);
1445 if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) {
1448 f->defaultval.sint = val;
1451 case UPB_TYPE_UINT32: {
1452 unsigned long val = strtoul(str, &end, 0);
1453 if (val > UINT32_MAX || errno == ERANGE || *end) {
1456 f->defaultval.uint = val;
1459 case UPB_TYPE_UINT64: {
1460 unsigned long long val = strtoull(str, &end, 0);
1461 if (val > UINT64_MAX || errno == ERANGE || *end) {
1464 f->defaultval.uint = val;
1467 case UPB_TYPE_DOUBLE: {
1468 double val = strtod(str, &end);
1469 if (errno == ERANGE || *end) {
1472 f->defaultval.dbl = val;
1475 case UPB_TYPE_FLOAT: {
1476 float val = strtof(str, &end);
1477 if (errno == ERANGE || *end) {
1480 f->defaultval.flt = val;
1483 case UPB_TYPE_BOOL: {
1484 if (streql2(str, len, "false")) {
1485 f->defaultval.boolean = false;
1486 } else if (streql2(str, len, "true")) {
1487 f->defaultval.boolean = true;
1492 case UPB_TYPE_STRING:
1493 f->defaultval.str = newstr(ctx, str, len);
1495 case UPB_TYPE_BYTES:
1496 /* XXX: need to interpret the C-escaped value. */
1497 f->defaultval.str = newstr(ctx, str, len);
1499 case UPB_TYPE_MESSAGE:
1500 /* Should not have a default value. */
1501 symtab_errf(ctx, "Message should not have a default (%s)",
1502 upb_fielddef_fullname(f));
1508 symtab_errf(ctx, "Invalid default '%.*s' for field %s", (int)len, str,
1509 upb_fielddef_fullname(f));
1512 static void set_default_default(symtab_addctx *ctx, upb_fielddef *f) {
1513 switch (upb_fielddef_type(f)) {
1514 case UPB_TYPE_INT32:
1515 case UPB_TYPE_INT64:
1517 f->defaultval.sint = 0;
1519 case UPB_TYPE_UINT64:
1520 case UPB_TYPE_UINT32:
1521 f->defaultval.uint = 0;
1523 case UPB_TYPE_DOUBLE:
1524 case UPB_TYPE_FLOAT:
1525 f->defaultval.dbl = 0;
1527 case UPB_TYPE_STRING:
1528 case UPB_TYPE_BYTES:
1529 f->defaultval.str = newstr(ctx, NULL, 0);
1532 f->defaultval.boolean = false;
1534 case UPB_TYPE_MESSAGE:
1539 static void create_fielddef(
1540 symtab_addctx *ctx, const char *prefix, upb_msgdef *m,
1541 const google_protobuf_FieldDescriptorProto *field_proto) {
1543 const google_protobuf_FieldOptions *options;
1545 const char *full_name;
1546 const char *json_name;
1547 const char *shortname;
1548 uint32_t field_number;
1550 if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) {
1551 symtab_errf(ctx, "field has no name (%s)", upb_msgdef_fullname(m));
1554 name = google_protobuf_FieldDescriptorProto_name(field_proto);
1555 check_ident(ctx, name, false);
1556 full_name = makefullname(ctx, prefix, name);
1557 shortname = shortdefname(full_name);
1559 if (google_protobuf_FieldDescriptorProto_has_json_name(field_proto)) {
1560 json_name = strviewdup(
1561 ctx, google_protobuf_FieldDescriptorProto_json_name(field_proto));
1563 json_name = makejsonname(ctx, shortname);
1566 field_number = google_protobuf_FieldDescriptorProto_number(field_proto);
1568 if (field_number == 0 || field_number > UPB_MAX_FIELDNUMBER) {
1569 symtab_errf(ctx, "invalid field number (%u)", field_number);
1573 /* direct message field. */
1574 upb_value v, field_v, json_v;
1577 f = (upb_fielddef*)&m->fields[m->field_count];
1578 f->index_ = m->field_count++;
1580 f->is_extension_ = false;
1582 if (upb_strtable_lookup(&m->ntof, shortname, NULL)) {
1583 symtab_errf(ctx, "duplicate field name (%s)", shortname);
1586 if (upb_strtable_lookup(&m->ntof, json_name, NULL)) {
1587 symtab_errf(ctx, "duplicate json_name (%s)", json_name);
1590 if (upb_inttable_lookup(&m->itof, field_number, NULL)) {
1591 symtab_errf(ctx, "duplicate field number (%u)", field_number);
1594 field_v = pack_def(f, UPB_DEFTYPE_FIELD);
1595 json_v = pack_def(f, UPB_DEFTYPE_FIELD_JSONNAME);
1596 v = upb_value_constptr(f);
1597 json_size = strlen(json_name);
1599 CHK_OOM(upb_strtable_insert(&m->ntof, name.data, name.size, field_v,
1601 CHK_OOM(upb_inttable_insert(&m->itof, field_number, v, ctx->arena));
1603 if (strcmp(shortname, json_name) != 0) {
1604 upb_strtable_insert(&m->ntof, json_name, json_size, json_v, ctx->arena);
1608 const upb_msglayout_field *fields = m->layout->fields;
1609 int count = m->layout->field_count;
1612 for (i = 0; i < count; i++) {
1613 if (fields[i].number == field_number) {
1614 f->layout_index = i;
1622 /* extension field. */
1623 f = (upb_fielddef*)&ctx->file->exts[ctx->file->ext_count++];
1624 f->is_extension_ = true;
1625 symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_FIELD));
1628 f->full_name = full_name;
1629 f->json_name = json_name;
1630 f->file = ctx->file;
1631 f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto);
1632 f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto);
1633 f->number_ = field_number;
1635 f->proto3_optional_ =
1636 google_protobuf_FieldDescriptorProto_proto3_optional(field_proto);
1638 /* We can't resolve the subdef or (in the case of extensions) the containing
1639 * message yet, because it may not have been defined yet. We stash a pointer
1640 * to the field_proto until later when we can properly resolve it. */
1641 f->sub.unresolved = field_proto;
1643 if (f->label_ == UPB_LABEL_REQUIRED && f->file->syntax == UPB_SYNTAX_PROTO3) {
1644 symtab_errf(ctx, "proto3 fields cannot be required (%s)", f->full_name);
1647 if (google_protobuf_FieldDescriptorProto_has_oneof_index(field_proto)) {
1649 google_protobuf_FieldDescriptorProto_oneof_index(field_proto);
1650 upb_oneofdef *oneof;
1651 upb_value v = upb_value_constptr(f);
1653 if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
1654 symtab_errf(ctx, "fields in oneof must have OPTIONAL label (%s)",
1659 symtab_errf(ctx, "oneof_index provided for extension field (%s)",
1663 if (oneof_index >= m->oneof_count) {
1664 symtab_errf(ctx, "oneof_index out of range (%s)", f->full_name);
1667 oneof = (upb_oneofdef *)&m->oneofs[oneof_index];
1670 oneof->field_count++;
1671 if (f->proto3_optional_) {
1672 oneof->synthetic = true;
1674 CHK_OOM(upb_inttable_insert(&oneof->itof, f->number_, v, ctx->arena));
1676 upb_strtable_insert(&oneof->ntof, name.data, name.size, v, ctx->arena));
1679 if (f->proto3_optional_) {
1680 symtab_errf(ctx, "field with proto3_optional was not in a oneof (%s)",
1685 options = google_protobuf_FieldDescriptorProto_has_options(field_proto) ?
1686 google_protobuf_FieldDescriptorProto_options(field_proto) : NULL;
1688 if (options && google_protobuf_FieldOptions_has_packed(options)) {
1689 f->packed_ = google_protobuf_FieldOptions_packed(options);
1691 /* Repeated fields default to packed for proto3 only. */
1692 f->packed_ = upb_fielddef_isprimitive(f) &&
1693 f->label_ == UPB_LABEL_REPEATED && f->file->syntax == UPB_SYNTAX_PROTO3;
1697 f->lazy_ = google_protobuf_FieldOptions_lazy(options);
1703 static void create_enumdef(
1704 symtab_addctx *ctx, const char *prefix,
1705 const google_protobuf_EnumDescriptorProto *enum_proto) {
1707 const google_protobuf_EnumValueDescriptorProto *const *values;
1711 name = google_protobuf_EnumDescriptorProto_name(enum_proto);
1712 check_ident(ctx, name, false);
1714 e = (upb_enumdef*)&ctx->file->enums[ctx->file->enum_count++];
1715 e->full_name = makefullname(ctx, prefix, name);
1716 symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM));
1718 values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n);
1719 CHK_OOM(upb_strtable_init(&e->ntoi, n, ctx->arena));
1720 CHK_OOM(upb_inttable_init(&e->iton, ctx->arena));
1722 e->file = ctx->file;
1726 symtab_errf(ctx, "enums must contain at least one value (%s)",
1730 for (i = 0; i < n; i++) {
1731 const google_protobuf_EnumValueDescriptorProto *value = values[i];
1732 upb_strview name = google_protobuf_EnumValueDescriptorProto_name(value);
1733 char *name2 = strviewdup(ctx, name);
1734 int32_t num = google_protobuf_EnumValueDescriptorProto_number(value);
1735 upb_value v = upb_value_int32(num);
1737 if (i == 0 && e->file->syntax == UPB_SYNTAX_PROTO3 && num != 0) {
1738 symtab_errf(ctx, "for proto3, the first enum value must be zero (%s)",
1742 if (upb_strtable_lookup(&e->ntoi, name2, NULL)) {
1743 symtab_errf(ctx, "duplicate enum label '%s'", name2);
1747 CHK_OOM(upb_strtable_insert(&e->ntoi, name2, strlen(name2), v, ctx->arena));
1749 if (!upb_inttable_lookup(&e->iton, num, NULL)) {
1750 upb_value v = upb_value_cstr(name2);
1751 CHK_OOM(upb_inttable_insert(&e->iton, num, v, ctx->arena));
1755 upb_inttable_compact(&e->iton, ctx->arena);
1758 static void create_msgdef(symtab_addctx *ctx, const char *prefix,
1759 const google_protobuf_DescriptorProto *msg_proto) {
1761 const google_protobuf_MessageOptions *options;
1762 const google_protobuf_OneofDescriptorProto *const *oneofs;
1763 const google_protobuf_FieldDescriptorProto *const *fields;
1764 const google_protobuf_EnumDescriptorProto *const *enums;
1765 const google_protobuf_DescriptorProto *const *msgs;
1766 size_t i, n_oneof, n_field, n;
1769 name = google_protobuf_DescriptorProto_name(msg_proto);
1770 check_ident(ctx, name, false);
1772 m = (upb_msgdef*)&ctx->file->msgs[ctx->file->msg_count++];
1773 m->full_name = makefullname(ctx, prefix, name);
1774 symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG));
1776 oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n_oneof);
1777 fields = google_protobuf_DescriptorProto_field(msg_proto, &n_field);
1779 CHK_OOM(upb_inttable_init(&m->itof, ctx->arena));
1780 CHK_OOM(upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena));
1782 m->file = ctx->file;
1783 m->map_entry = false;
1785 options = google_protobuf_DescriptorProto_options(msg_proto);
1788 m->map_entry = google_protobuf_MessageOptions_map_entry(options);
1792 m->layout = *ctx->layouts;
1795 /* Allocate now (to allow cross-linking), populate later. */
1796 m->layout = symtab_alloc(
1797 ctx, sizeof(*m->layout) + sizeof(_upb_fasttable_entry));
1801 m->oneofs = symtab_alloc(ctx, sizeof(*m->oneofs) * n_oneof);
1802 for (i = 0; i < n_oneof; i++) {
1803 create_oneofdef(ctx, m, oneofs[i]);
1807 m->fields = symtab_alloc(ctx, sizeof(*m->fields) * n_field);
1808 for (i = 0; i < n_field; i++) {
1809 create_fielddef(ctx, m->full_name, m, fields[i]);
1812 finalize_oneofs(ctx, m);
1813 assign_msg_wellknowntype(m);
1814 upb_inttable_compact(&m->itof, ctx->arena);
1816 /* This message is built. Now build nested messages and enums. */
1818 enums = google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
1819 for (i = 0; i < n; i++) {
1820 create_enumdef(ctx, m->full_name, enums[i]);
1823 msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
1824 for (i = 0; i < n; i++) {
1825 create_msgdef(ctx, m->full_name, msgs[i]);
1829 static void count_types_in_msg(const google_protobuf_DescriptorProto *msg_proto,
1830 upb_filedef *file) {
1831 const google_protobuf_DescriptorProto *const *msgs;
1836 msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
1837 for (i = 0; i < n; i++) {
1838 count_types_in_msg(msgs[i], file);
1841 google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
1842 file->enum_count += n;
1844 google_protobuf_DescriptorProto_extension(msg_proto, &n);
1845 file->ext_count += n;
1848 static void count_types_in_file(
1849 const google_protobuf_FileDescriptorProto *file_proto,
1850 upb_filedef *file) {
1851 const google_protobuf_DescriptorProto *const *msgs;
1854 msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
1855 for (i = 0; i < n; i++) {
1856 count_types_in_msg(msgs[i], file);
1859 google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
1860 file->enum_count += n;
1862 google_protobuf_FileDescriptorProto_extension(file_proto, &n);
1863 file->ext_count += n;
1866 static void resolve_fielddef(symtab_addctx *ctx, const char *prefix,
1869 const google_protobuf_FieldDescriptorProto *field_proto = f->sub.unresolved;
1871 if (f->is_extension_) {
1872 if (!google_protobuf_FieldDescriptorProto_has_extendee(field_proto)) {
1873 symtab_errf(ctx, "extension for field '%s' had no extendee",
1877 name = google_protobuf_FieldDescriptorProto_extendee(field_proto);
1878 f->msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG);
1881 if ((upb_fielddef_issubmsg(f) || f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) &&
1882 !google_protobuf_FieldDescriptorProto_has_type_name(field_proto)) {
1883 symtab_errf(ctx, "field '%s' is missing type name", f->full_name);
1886 name = google_protobuf_FieldDescriptorProto_type_name(field_proto);
1888 if (upb_fielddef_issubmsg(f)) {
1889 f->sub.msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG);
1890 } else if (f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) {
1891 f->sub.enumdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_ENUM);
1894 /* Have to delay resolving of the default value until now because of the enum
1895 * case, since enum defaults are specified with a label. */
1896 if (google_protobuf_FieldDescriptorProto_has_default_value(field_proto)) {
1897 upb_strview defaultval =
1898 google_protobuf_FieldDescriptorProto_default_value(field_proto);
1900 if (f->file->syntax == UPB_SYNTAX_PROTO3) {
1901 symtab_errf(ctx, "proto3 fields cannot have explicit defaults (%s)",
1905 if (upb_fielddef_issubmsg(f)) {
1906 symtab_errf(ctx, "message fields cannot have explicit defaults (%s)",
1910 parse_default(ctx, defaultval.data, defaultval.size, f);
1912 set_default_default(ctx, f);
1916 static void build_filedef(
1917 symtab_addctx *ctx, upb_filedef *file,
1918 const google_protobuf_FileDescriptorProto *file_proto) {
1919 const google_protobuf_FileOptions *file_options_proto;
1920 const google_protobuf_DescriptorProto *const *msgs;
1921 const google_protobuf_EnumDescriptorProto *const *enums;
1922 const google_protobuf_FieldDescriptorProto *const *exts;
1923 const upb_strview* strs;
1926 file->symtab = ctx->symtab;
1928 /* One pass to count and allocate. */
1929 file->msg_count = 0;
1930 file->enum_count = 0;
1931 file->ext_count = 0;
1932 count_types_in_file(file_proto, file);
1933 file->msgs = symtab_alloc(ctx, sizeof(*file->msgs) * file->msg_count);
1934 file->enums = symtab_alloc(ctx, sizeof(*file->enums) * file->enum_count);
1935 file->exts = symtab_alloc(ctx, sizeof(*file->exts) * file->ext_count);
1937 /* In the second pass we increment these as defs are added. */
1938 file->msg_count = 0;
1939 file->enum_count = 0;
1940 file->ext_count = 0;
1942 if (!google_protobuf_FileDescriptorProto_has_name(file_proto)) {
1943 symtab_errf(ctx, "File has no name");
1947 strviewdup(ctx, google_protobuf_FileDescriptorProto_name(file_proto));
1948 file->phpprefix = NULL;
1949 file->phpnamespace = NULL;
1951 if (google_protobuf_FileDescriptorProto_has_package(file_proto)) {
1952 upb_strview package =
1953 google_protobuf_FileDescriptorProto_package(file_proto);
1954 check_ident(ctx, package, true);
1955 file->package = strviewdup(ctx, package);
1957 file->package = NULL;
1960 if (google_protobuf_FileDescriptorProto_has_syntax(file_proto)) {
1961 upb_strview syntax =
1962 google_protobuf_FileDescriptorProto_syntax(file_proto);
1964 if (streql_view(syntax, "proto2")) {
1965 file->syntax = UPB_SYNTAX_PROTO2;
1966 } else if (streql_view(syntax, "proto3")) {
1967 file->syntax = UPB_SYNTAX_PROTO3;
1969 symtab_errf(ctx, "Invalid syntax '" UPB_STRVIEW_FORMAT "'",
1970 UPB_STRVIEW_ARGS(syntax));
1973 file->syntax = UPB_SYNTAX_PROTO2;
1977 file_options_proto = google_protobuf_FileDescriptorProto_options(file_proto);
1978 if (file_options_proto) {
1979 if (google_protobuf_FileOptions_has_php_class_prefix(file_options_proto)) {
1980 file->phpprefix = strviewdup(
1982 google_protobuf_FileOptions_php_class_prefix(file_options_proto));
1984 if (google_protobuf_FileOptions_has_php_namespace(file_options_proto)) {
1985 file->phpnamespace = strviewdup(
1986 ctx, google_protobuf_FileOptions_php_namespace(file_options_proto));
1990 /* Verify dependencies. */
1991 strs = google_protobuf_FileDescriptorProto_dependency(file_proto, &n);
1992 file->deps = symtab_alloc(ctx, sizeof(*file->deps) * n);
1994 for (i = 0; i < n; i++) {
1995 upb_strview dep_name = strs[i];
1997 if (!upb_strtable_lookup2(&ctx->symtab->files, dep_name.data,
1998 dep_name.size, &v)) {
2000 "Depends on file '" UPB_STRVIEW_FORMAT
2001 "', but it has not been loaded",
2002 UPB_STRVIEW_ARGS(dep_name));
2004 file->deps[i] = upb_value_getconstptr(v);
2007 /* Create messages. */
2008 msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
2009 for (i = 0; i < n; i++) {
2010 create_msgdef(ctx, file->package, msgs[i]);
2014 enums = google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
2015 for (i = 0; i < n; i++) {
2016 create_enumdef(ctx, file->package, enums[i]);
2019 /* Create extensions. */
2020 exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n);
2021 file->exts = symtab_alloc(ctx, sizeof(*file->exts) * n);
2022 for (i = 0; i < n; i++) {
2023 create_fielddef(ctx, file->package, NULL, exts[i]);
2026 /* Now that all names are in the table, build layouts and resolve refs. */
2027 for (i = 0; i < (size_t)file->ext_count; i++) {
2028 resolve_fielddef(ctx, file->package, (upb_fielddef*)&file->exts[i]);
2031 for (i = 0; i < (size_t)file->msg_count; i++) {
2032 const upb_msgdef *m = &file->msgs[i];
2034 for (j = 0; j < m->field_count; j++) {
2035 resolve_fielddef(ctx, m->full_name, (upb_fielddef*)&m->fields[j]);
2039 if (!ctx->layouts) {
2040 for (i = 0; i < (size_t)file->msg_count; i++) {
2041 const upb_msgdef *m = &file->msgs[i];
2042 make_layout(ctx, m);
2047 static void remove_filedef(upb_symtab *s, upb_filedef *file) {
2049 for (i = 0; i < file->msg_count; i++) {
2050 const char *name = file->msgs[i].full_name;
2051 upb_strtable_remove(&s->syms, name, strlen(name), NULL);
2053 for (i = 0; i < file->enum_count; i++) {
2054 const char *name = file->enums[i].full_name;
2055 upb_strtable_remove(&s->syms, name, strlen(name), NULL);
2057 for (i = 0; i < file->ext_count; i++) {
2058 const char *name = file->exts[i].full_name;
2059 upb_strtable_remove(&s->syms, name, strlen(name), NULL);
2063 static const upb_filedef *_upb_symtab_addfile(
2064 upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto,
2065 const upb_msglayout **layouts, upb_status *status) {
2067 upb_strview name = google_protobuf_FileDescriptorProto_name(file_proto);
2069 if (upb_strtable_lookup2(&s->files, name.data, name.size, NULL)) {
2070 upb_status_seterrf(status, "duplicate file name (%.*s)",
2071 UPB_STRVIEW_ARGS(name));
2076 ctx.layouts = layouts;
2077 ctx.status = status;
2079 ctx.arena = upb_arena_new();
2082 upb_status_setoom(status);
2086 if (UPB_UNLIKELY(UPB_SETJMP(ctx.err))) {
2087 UPB_ASSERT(!upb_ok(status));
2089 remove_filedef(s, ctx.file);
2093 ctx.file = symtab_alloc(&ctx, sizeof(*ctx.file));
2094 build_filedef(&ctx, ctx.file, file_proto);
2095 upb_strtable_insert(&s->files, name.data, name.size,
2096 upb_value_constptr(ctx.file), ctx.arena);
2097 UPB_ASSERT(upb_ok(status));
2098 upb_arena_fuse(s->arena, ctx.arena);
2101 upb_arena_free(ctx.arena);
2105 const upb_filedef *upb_symtab_addfile(
2106 upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto,
2107 upb_status *status) {
2108 return _upb_symtab_addfile(s, file_proto, NULL, status);
2111 /* Include here since we want most of this file to be stdio-free. */
2114 bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init) {
2115 /* Since this function should never fail (it would indicate a bug in upb) we
2116 * print errors to stderr instead of returning error status to the user. */
2117 upb_def_init **deps = init->deps;
2118 google_protobuf_FileDescriptorProto *file;
2122 upb_status_clear(&status);
2124 if (upb_strtable_lookup(&s->files, init->filename, NULL)) {
2128 arena = upb_arena_new();
2130 for (; *deps; deps++) {
2131 if (!_upb_symtab_loaddefinit(s, *deps)) goto err;
2134 file = google_protobuf_FileDescriptorProto_parse_ex(
2135 init->descriptor.data, init->descriptor.size, NULL, UPB_DECODE_ALIAS,
2137 s->bytes_loaded += init->descriptor.size;
2142 "Failed to parse compiled-in descriptor for file '%s'. This should "
2148 if (!_upb_symtab_addfile(s, file, init->layouts, &status)) goto err;
2150 upb_arena_free(arena);
2154 fprintf(stderr, "Error loading compiled-in descriptor: %s\n",
2155 upb_status_errmsg(&status));
2156 upb_arena_free(arena);
2160 size_t _upb_symtab_bytesloaded(const upb_symtab *s) {
2161 return s->bytes_loaded;
2164 upb_arena *_upb_symtab_arena(const upb_symtab *s) {