1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
35 #include <google/protobuf/compiler/cpp/cpp_helpers.h>
41 #include <unordered_set>
44 #include <google/protobuf/stubs/common.h>
45 #include <google/protobuf/stubs/logging.h>
46 #include <google/protobuf/descriptor.h>
48 #include <google/protobuf/compiler/scc.h>
49 #include <google/protobuf/io/printer.h>
50 #include <google/protobuf/io/zero_copy_stream.h>
51 #include <google/protobuf/wire_format.h>
52 #include <google/protobuf/wire_format_lite.h>
53 #include <google/protobuf/stubs/strutil.h>
54 #include <google/protobuf/stubs/substitute.h>
57 #include <google/protobuf/stubs/hash.h>
59 #include <google/protobuf/port_def.inc>
68 static const char kAnyMessageName[] = "Any";
69 static const char kAnyProtoFile[] = "google/protobuf/any.proto";
71 std::string DotsToColons(const std::string& name) {
72 return StringReplace(name, ".", "::", true);
75 static const char* const kKeywordList[] = { //
160 static std::unordered_set<std::string>* MakeKeywordsMap() {
161 auto* result = new std::unordered_set<std::string>();
162 for (const auto keyword : kKeywordList) {
163 result->emplace(keyword);
168 static std::unordered_set<std::string>& kKeywords = *MakeKeywordsMap();
170 // Encode [0..63] as 'A'-'Z', 'a'-'z', '0'-'9', '_'
171 char Base63Char(int value) {
172 GOOGLE_CHECK_GE(value, 0);
173 if (value < 26) return 'A' + value;
175 if (value < 26) return 'a' + value;
177 if (value < 10) return '0' + value;
178 GOOGLE_CHECK_EQ(value, 10);
182 // Given a c identifier has 63 legal characters we can't implement base64
183 // encoding. So we return the k least significant "digits" in base 63.
184 template <typename I>
185 std::string Base63(I n, int k) {
188 res += Base63Char(static_cast<int>(n % 63));
194 std::string IntTypeName(const Options& options, const std::string& type) {
195 if (options.opensource_runtime) {
196 return "::PROTOBUF_NAMESPACE_ID::" + type;
202 void SetIntVar(const Options& options, const std::string& type,
203 std::map<std::string, std::string>* variables) {
204 (*variables)[type] = IntTypeName(options, type);
209 void SetCommonVars(const Options& options,
210 std::map<std::string, std::string>* variables) {
211 (*variables)["proto_ns"] = ProtobufNamespace(options);
213 // Warning: there is some clever naming/splitting here to avoid extract script
214 // rewrites. The names of these variables must not be things that the extract
215 // script will rewrite. That's why we use "CHK" (for example) instead of
217 if (options.opensource_runtime) {
218 (*variables)["GOOGLE_PROTOBUF"] = "GOOGLE_PROTOBUF";
219 (*variables)["CHK"] = "GOOGLE_CHECK";
220 (*variables)["DCHK"] = "GOOGLE_DCHECK";
222 // These values are things the extract script would rewrite if we did not
223 // split them. It might not strictly matter since we don't generate google3
224 // code in open-source. But it's good to prevent surprising things from
226 (*variables)["GOOGLE_PROTOBUF"] =
229 (*variables)["CHK"] =
232 (*variables)["DCHK"] =
237 SetIntVar(options, "int8", variables);
238 SetIntVar(options, "uint8", variables);
239 SetIntVar(options, "uint32", variables);
240 SetIntVar(options, "uint64", variables);
241 SetIntVar(options, "int32", variables);
242 SetIntVar(options, "int64", variables);
243 (*variables)["string"] = "std::string";
246 std::string UnderscoresToCamelCase(const std::string& input,
247 bool cap_next_letter) {
249 // Note: I distrust ctype.h due to locales.
250 for (int i = 0; i < input.size(); i++) {
251 if ('a' <= input[i] && input[i] <= 'z') {
252 if (cap_next_letter) {
253 result += input[i] + ('A' - 'a');
257 cap_next_letter = false;
258 } else if ('A' <= input[i] && input[i] <= 'Z') {
259 // Capital letters are left as-is.
261 cap_next_letter = false;
262 } else if ('0' <= input[i] && input[i] <= '9') {
264 cap_next_letter = true;
266 cap_next_letter = true;
272 const char kThickSeparator[] =
273 "// ===================================================================\n";
274 const char kThinSeparator[] =
275 "// -------------------------------------------------------------------\n";
277 bool CanInitializeByZeroing(const FieldDescriptor* field) {
278 if (field->is_repeated() || field->is_extension()) return false;
279 switch (field->cpp_type()) {
280 case FieldDescriptor::CPPTYPE_ENUM:
281 return field->default_value_enum()->number() == 0;
282 case FieldDescriptor::CPPTYPE_INT32:
283 return field->default_value_int32() == 0;
284 case FieldDescriptor::CPPTYPE_INT64:
285 return field->default_value_int64() == 0;
286 case FieldDescriptor::CPPTYPE_UINT32:
287 return field->default_value_uint32() == 0;
288 case FieldDescriptor::CPPTYPE_UINT64:
289 return field->default_value_uint64() == 0;
290 case FieldDescriptor::CPPTYPE_FLOAT:
291 return field->default_value_float() == 0;
292 case FieldDescriptor::CPPTYPE_DOUBLE:
293 return field->default_value_double() == 0;
294 case FieldDescriptor::CPPTYPE_BOOL:
295 return field->default_value_bool() == false;
301 std::string ClassName(const Descriptor* descriptor) {
302 const Descriptor* parent = descriptor->containing_type();
304 if (parent) res += ClassName(parent) + "_";
305 res += descriptor->name();
306 if (IsMapEntryMessage(descriptor)) res += "_DoNotUse";
307 return ResolveKeyword(res);
310 std::string ClassName(const EnumDescriptor* enum_descriptor) {
311 if (enum_descriptor->containing_type() == nullptr) {
312 return ResolveKeyword(enum_descriptor->name());
314 return ClassName(enum_descriptor->containing_type()) + "_" +
315 enum_descriptor->name();
319 std::string QualifiedClassName(const Descriptor* d, const Options& options) {
320 return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
323 std::string QualifiedClassName(const EnumDescriptor* d,
324 const Options& options) {
325 return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
328 std::string QualifiedClassName(const Descriptor* d) {
329 return QualifiedClassName(d, Options());
332 std::string QualifiedClassName(const EnumDescriptor* d) {
333 return QualifiedClassName(d, Options());
336 std::string Namespace(const std::string& package) {
337 if (package.empty()) return "";
338 return "::" + DotsToColons(package);
341 std::string Namespace(const FileDescriptor* d, const Options& options) {
342 std::string ret = Namespace(d->package());
343 if (IsWellKnownMessage(d) && options.opensource_runtime) {
344 // Written with string concatenation to prevent rewriting of
345 // ::google::protobuf.
346 ret = StringReplace(ret,
349 "PROTOBUF_NAMESPACE_ID", false);
354 std::string Namespace(const Descriptor* d, const Options& options) {
355 return Namespace(d->file(), options);
358 std::string Namespace(const FieldDescriptor* d, const Options& options) {
359 return Namespace(d->file(), options);
362 std::string Namespace(const EnumDescriptor* d, const Options& options) {
363 return Namespace(d->file(), options);
366 std::string DefaultInstanceType(const Descriptor* descriptor,
367 const Options& options) {
368 return ClassName(descriptor) + "DefaultTypeInternal";
371 std::string DefaultInstanceName(const Descriptor* descriptor,
372 const Options& options) {
373 return "_" + ClassName(descriptor, false) + "_default_instance_";
376 std::string QualifiedDefaultInstanceName(const Descriptor* descriptor,
377 const Options& options) {
378 return QualifiedFileLevelSymbol(
379 descriptor->file(), DefaultInstanceName(descriptor, options), options);
382 std::string DescriptorTableName(const FileDescriptor* file,
383 const Options& options) {
384 return UniqueName("descriptor_table", file, options);
387 std::string FileDllExport(const FileDescriptor* file, const Options& options) {
388 return UniqueName("PROTOBUF_INTERNAL_EXPORT", file, options);
391 std::string ReferenceFunctionName(const Descriptor* descriptor,
392 const Options& options) {
393 return QualifiedClassName(descriptor, options) + "_ReferenceStrong";
396 std::string SuperClassName(const Descriptor* descriptor,
397 const Options& options) {
398 return "::" + ProtobufNamespace(options) +
399 (HasDescriptorMethods(descriptor->file(), options) ? "::Message"
403 std::string ResolveKeyword(const string& name) {
404 if (kKeywords.count(name) > 0) {
410 std::string FieldName(const FieldDescriptor* field) {
411 std::string result = field->name();
412 LowerString(&result);
413 if (kKeywords.count(result) > 0) {
419 std::string EnumValueName(const EnumValueDescriptor* enum_value) {
420 std::string result = enum_value->name();
421 if (kKeywords.count(result) > 0) {
427 int EstimateAlignmentSize(const FieldDescriptor* field) {
428 if (field == nullptr) return 0;
429 if (field->is_repeated()) return 8;
430 switch (field->cpp_type()) {
431 case FieldDescriptor::CPPTYPE_BOOL:
434 case FieldDescriptor::CPPTYPE_INT32:
435 case FieldDescriptor::CPPTYPE_UINT32:
436 case FieldDescriptor::CPPTYPE_ENUM:
437 case FieldDescriptor::CPPTYPE_FLOAT:
440 case FieldDescriptor::CPPTYPE_INT64:
441 case FieldDescriptor::CPPTYPE_UINT64:
442 case FieldDescriptor::CPPTYPE_DOUBLE:
443 case FieldDescriptor::CPPTYPE_STRING:
444 case FieldDescriptor::CPPTYPE_MESSAGE:
447 GOOGLE_LOG(FATAL) << "Can't get here.";
448 return -1; // Make compiler happy.
451 std::string FieldConstantName(const FieldDescriptor* field) {
452 std::string field_name = UnderscoresToCamelCase(field->name(), true);
453 std::string result = "k" + field_name + "FieldNumber";
455 if (!field->is_extension() &&
456 field->containing_type()->FindFieldByCamelcaseName(
457 field->camelcase_name()) != field) {
458 // This field's camelcase name is not unique. As a hack, add the field
459 // number to the constant name. This makes the constant rather useless,
460 // but what can we do?
461 result += "_" + StrCat(field->number());
467 std::string FieldMessageTypeName(const FieldDescriptor* field,
468 const Options& options) {
469 // Note: The Google-internal version of Protocol Buffers uses this function
470 // as a hook point for hacks to support legacy code.
471 return QualifiedClassName(field->message_type(), options);
474 std::string StripProto(const std::string& filename) {
475 if (HasSuffixString(filename, ".protodevel")) {
476 return StripSuffixString(filename, ".protodevel");
478 return StripSuffixString(filename, ".proto");
482 const char* PrimitiveTypeName(FieldDescriptor::CppType type) {
484 case FieldDescriptor::CPPTYPE_INT32:
485 return "::google::protobuf::int32";
486 case FieldDescriptor::CPPTYPE_INT64:
487 return "::google::protobuf::int64";
488 case FieldDescriptor::CPPTYPE_UINT32:
489 return "::google::protobuf::uint32";
490 case FieldDescriptor::CPPTYPE_UINT64:
491 return "::google::protobuf::uint64";
492 case FieldDescriptor::CPPTYPE_DOUBLE:
494 case FieldDescriptor::CPPTYPE_FLOAT:
496 case FieldDescriptor::CPPTYPE_BOOL:
498 case FieldDescriptor::CPPTYPE_ENUM:
500 case FieldDescriptor::CPPTYPE_STRING:
501 return "std::string";
502 case FieldDescriptor::CPPTYPE_MESSAGE:
505 // No default because we want the compiler to complain if any new
506 // CppTypes are added.
509 GOOGLE_LOG(FATAL) << "Can't get here.";
513 std::string PrimitiveTypeName(const Options& options,
514 FieldDescriptor::CppType type) {
516 case FieldDescriptor::CPPTYPE_INT32:
517 return IntTypeName(options, "int32");
518 case FieldDescriptor::CPPTYPE_INT64:
519 return IntTypeName(options, "int64");
520 case FieldDescriptor::CPPTYPE_UINT32:
521 return IntTypeName(options, "uint32");
522 case FieldDescriptor::CPPTYPE_UINT64:
523 return IntTypeName(options, "uint64");
524 case FieldDescriptor::CPPTYPE_DOUBLE:
526 case FieldDescriptor::CPPTYPE_FLOAT:
528 case FieldDescriptor::CPPTYPE_BOOL:
530 case FieldDescriptor::CPPTYPE_ENUM:
532 case FieldDescriptor::CPPTYPE_STRING:
533 return "std::string";
534 case FieldDescriptor::CPPTYPE_MESSAGE:
537 // No default because we want the compiler to complain if any new
538 // CppTypes are added.
541 GOOGLE_LOG(FATAL) << "Can't get here.";
545 const char* DeclaredTypeMethodName(FieldDescriptor::Type type) {
547 case FieldDescriptor::TYPE_INT32:
549 case FieldDescriptor::TYPE_INT64:
551 case FieldDescriptor::TYPE_UINT32:
553 case FieldDescriptor::TYPE_UINT64:
555 case FieldDescriptor::TYPE_SINT32:
557 case FieldDescriptor::TYPE_SINT64:
559 case FieldDescriptor::TYPE_FIXED32:
561 case FieldDescriptor::TYPE_FIXED64:
563 case FieldDescriptor::TYPE_SFIXED32:
565 case FieldDescriptor::TYPE_SFIXED64:
567 case FieldDescriptor::TYPE_FLOAT:
569 case FieldDescriptor::TYPE_DOUBLE:
572 case FieldDescriptor::TYPE_BOOL:
574 case FieldDescriptor::TYPE_ENUM:
577 case FieldDescriptor::TYPE_STRING:
579 case FieldDescriptor::TYPE_BYTES:
581 case FieldDescriptor::TYPE_GROUP:
583 case FieldDescriptor::TYPE_MESSAGE:
586 // No default because we want the compiler to complain if any new
589 GOOGLE_LOG(FATAL) << "Can't get here.";
593 std::string Int32ToString(int number) {
594 if (number == kint32min) {
595 // This needs to be special-cased, see explanation here:
596 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
597 return StrCat(number + 1, " - 1");
599 return StrCat(number);
603 std::string Int64ToString(const std::string& macro_prefix, int64 number) {
604 if (number == kint64min) {
605 // This needs to be special-cased, see explanation here:
606 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
607 return StrCat(macro_prefix, "_LONGLONG(", number + 1, ") - 1");
609 return StrCat(macro_prefix, "_LONGLONG(", number, ")");
612 std::string UInt64ToString(const std::string& macro_prefix, uint64 number) {
613 return StrCat(macro_prefix, "_ULONGLONG(", number, ")");
616 std::string DefaultValue(const FieldDescriptor* field) {
617 switch (field->cpp_type()) {
618 case FieldDescriptor::CPPTYPE_INT64:
619 return Int64ToString("GG", field->default_value_int64());
620 case FieldDescriptor::CPPTYPE_UINT64:
621 return UInt64ToString("GG", field->default_value_uint64());
623 return DefaultValue(Options(), field);
627 std::string DefaultValue(const Options& options, const FieldDescriptor* field) {
628 switch (field->cpp_type()) {
629 case FieldDescriptor::CPPTYPE_INT32:
630 return Int32ToString(field->default_value_int32());
631 case FieldDescriptor::CPPTYPE_UINT32:
632 return StrCat(field->default_value_uint32()) + "u";
633 case FieldDescriptor::CPPTYPE_INT64:
634 return Int64ToString("PROTOBUF", field->default_value_int64());
635 case FieldDescriptor::CPPTYPE_UINT64:
636 return UInt64ToString("PROTOBUF", field->default_value_uint64());
637 case FieldDescriptor::CPPTYPE_DOUBLE: {
638 double value = field->default_value_double();
639 if (value == std::numeric_limits<double>::infinity()) {
640 return "std::numeric_limits<double>::infinity()";
641 } else if (value == -std::numeric_limits<double>::infinity()) {
642 return "-std::numeric_limits<double>::infinity()";
643 } else if (value != value) {
644 return "std::numeric_limits<double>::quiet_NaN()";
646 return SimpleDtoa(value);
649 case FieldDescriptor::CPPTYPE_FLOAT: {
650 float value = field->default_value_float();
651 if (value == std::numeric_limits<float>::infinity()) {
652 return "std::numeric_limits<float>::infinity()";
653 } else if (value == -std::numeric_limits<float>::infinity()) {
654 return "-std::numeric_limits<float>::infinity()";
655 } else if (value != value) {
656 return "std::numeric_limits<float>::quiet_NaN()";
658 std::string float_value = SimpleFtoa(value);
659 // If floating point value contains a period (.) or an exponent
660 // (either E or e), then append suffix 'f' to make it a float
662 if (float_value.find_first_of(".eE") != string::npos) {
663 float_value.push_back('f');
668 case FieldDescriptor::CPPTYPE_BOOL:
669 return field->default_value_bool() ? "true" : "false";
670 case FieldDescriptor::CPPTYPE_ENUM:
671 // Lazy: Generate a static_cast because we don't have a helper function
672 // that constructs the full name of an enum value.
673 return strings::Substitute(
674 "static_cast< $0 >($1)", ClassName(field->enum_type(), true),
675 Int32ToString(field->default_value_enum()->number()));
676 case FieldDescriptor::CPPTYPE_STRING:
678 EscapeTrigraphs(CEscape(field->default_value_string())) +
680 case FieldDescriptor::CPPTYPE_MESSAGE:
681 return "*" + FieldMessageTypeName(field, options) +
682 "::internal_default_instance()";
684 // Can't actually get here; make compiler happy. (We could add a default
685 // case above but then we wouldn't get the nice compiler warning when a
686 // new type is added.)
687 GOOGLE_LOG(FATAL) << "Can't get here.";
691 // Convert a file name into a valid identifier.
692 std::string FilenameIdentifier(const std::string& filename) {
694 for (int i = 0; i < filename.size(); i++) {
695 if (ascii_isalnum(filename[i])) {
696 result.push_back(filename[i]);
698 // Not alphanumeric. To avoid any possibility of name conflicts we
699 // use the hex code for the character.
700 StrAppend(&result, "_", strings::Hex(static_cast<uint8>(filename[i])));
706 string UniqueName(const std::string& name, const std::string& filename,
707 const Options& options) {
708 return name + "_" + FilenameIdentifier(filename);
711 // Return the qualified C++ name for a file level symbol.
712 std::string QualifiedFileLevelSymbol(const FileDescriptor* file,
713 const std::string& name,
714 const Options& options) {
715 if (file->package().empty()) {
716 return StrCat("::", name);
718 return StrCat(Namespace(file, options), "::", name);
721 // Escape C++ trigraphs by escaping question marks to \?
722 std::string EscapeTrigraphs(const std::string& to_escape) {
723 return StringReplace(to_escape, "?", "\\?", true);
726 // Escaped function name to eliminate naming conflict.
727 std::string SafeFunctionName(const Descriptor* descriptor,
728 const FieldDescriptor* field,
729 const std::string& prefix) {
730 // Do not use FieldName() since it will escape keywords.
731 std::string name = field->name();
733 std::string function_name = prefix + name;
734 if (descriptor->FindFieldByName(function_name)) {
735 // Single underscore will also make it conflicting with the private data
736 // member. We use double underscore to escape function names.
737 function_name.append("__");
738 } else if (kKeywords.count(name) > 0) {
739 // If the field name is a keyword, we append the underscore back to keep it
740 // consistent with other function names.
741 function_name.append("_");
743 return function_name;
746 bool IsStringInlined(const FieldDescriptor* descriptor,
747 const Options& options) {
748 if (options.opensource_runtime) return false;
750 // TODO(ckennelly): Handle inlining for any.proto.
751 if (IsAnyMessage(descriptor->containing_type(), options)) return false;
752 if (descriptor->containing_type()->options().map_entry()) return false;
754 // Limit to proto2, as we rely on has bits to distinguish field presence for
755 // release_$name$. On proto3, we cannot use the address of the string
756 // instance when the field has been inlined.
757 if (!HasFieldPresence(descriptor->file())) return false;
759 if (options.access_info_map) {
760 if (descriptor->is_required()) return true;
765 static bool HasLazyFields(const Descriptor* descriptor,
766 const Options& options) {
767 for (int field_idx = 0; field_idx < descriptor->field_count(); field_idx++) {
768 if (IsLazy(descriptor->field(field_idx), options)) {
772 for (int idx = 0; idx < descriptor->extension_count(); idx++) {
773 if (IsLazy(descriptor->extension(idx), options)) {
777 for (int idx = 0; idx < descriptor->nested_type_count(); idx++) {
778 if (HasLazyFields(descriptor->nested_type(idx), options)) {
785 // Does the given FileDescriptor use lazy fields?
786 bool HasLazyFields(const FileDescriptor* file, const Options& options) {
787 for (int i = 0; i < file->message_type_count(); i++) {
788 const Descriptor* descriptor(file->message_type(i));
789 if (HasLazyFields(descriptor, options)) {
793 for (int field_idx = 0; field_idx < file->extension_count(); field_idx++) {
794 if (IsLazy(file->extension(field_idx), options)) {
801 static bool HasRepeatedFields(const Descriptor* descriptor) {
802 for (int i = 0; i < descriptor->field_count(); ++i) {
803 if (descriptor->field(i)->label() == FieldDescriptor::LABEL_REPEATED) {
807 for (int i = 0; i < descriptor->nested_type_count(); ++i) {
808 if (HasRepeatedFields(descriptor->nested_type(i))) return true;
813 bool HasRepeatedFields(const FileDescriptor* file) {
814 for (int i = 0; i < file->message_type_count(); ++i) {
815 if (HasRepeatedFields(file->message_type(i))) return true;
820 static bool IsStringPieceField(const FieldDescriptor* field,
821 const Options& options) {
822 return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
823 EffectiveStringCType(field, options) == FieldOptions::STRING_PIECE;
826 static bool HasStringPieceFields(const Descriptor* descriptor,
827 const Options& options) {
828 for (int i = 0; i < descriptor->field_count(); ++i) {
829 if (IsStringPieceField(descriptor->field(i), options)) return true;
831 for (int i = 0; i < descriptor->nested_type_count(); ++i) {
832 if (HasStringPieceFields(descriptor->nested_type(i), options)) return true;
837 bool HasStringPieceFields(const FileDescriptor* file, const Options& options) {
838 for (int i = 0; i < file->message_type_count(); ++i) {
839 if (HasStringPieceFields(file->message_type(i), options)) return true;
844 static bool IsCordField(const FieldDescriptor* field, const Options& options) {
845 return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
846 EffectiveStringCType(field, options) == FieldOptions::CORD;
849 static bool HasCordFields(const Descriptor* descriptor,
850 const Options& options) {
851 for (int i = 0; i < descriptor->field_count(); ++i) {
852 if (IsCordField(descriptor->field(i), options)) return true;
854 for (int i = 0; i < descriptor->nested_type_count(); ++i) {
855 if (HasCordFields(descriptor->nested_type(i), options)) return true;
860 bool HasCordFields(const FileDescriptor* file, const Options& options) {
861 for (int i = 0; i < file->message_type_count(); ++i) {
862 if (HasCordFields(file->message_type(i), options)) return true;
867 static bool HasExtensionsOrExtendableMessage(const Descriptor* descriptor) {
868 if (descriptor->extension_range_count() > 0) return true;
869 if (descriptor->extension_count() > 0) return true;
870 for (int i = 0; i < descriptor->nested_type_count(); ++i) {
871 if (HasExtensionsOrExtendableMessage(descriptor->nested_type(i))) {
878 bool HasExtensionsOrExtendableMessage(const FileDescriptor* file) {
879 if (file->extension_count() > 0) return true;
880 for (int i = 0; i < file->message_type_count(); ++i) {
881 if (HasExtensionsOrExtendableMessage(file->message_type(i))) return true;
886 static bool HasMapFields(const Descriptor* descriptor) {
887 for (int i = 0; i < descriptor->field_count(); ++i) {
888 if (descriptor->field(i)->is_map()) {
892 for (int i = 0; i < descriptor->nested_type_count(); ++i) {
893 if (HasMapFields(descriptor->nested_type(i))) return true;
898 bool HasMapFields(const FileDescriptor* file) {
899 for (int i = 0; i < file->message_type_count(); ++i) {
900 if (HasMapFields(file->message_type(i))) return true;
905 static bool HasEnumDefinitions(const Descriptor* message_type) {
906 if (message_type->enum_type_count() > 0) return true;
907 for (int i = 0; i < message_type->nested_type_count(); ++i) {
908 if (HasEnumDefinitions(message_type->nested_type(i))) return true;
913 bool HasEnumDefinitions(const FileDescriptor* file) {
914 if (file->enum_type_count() > 0) return true;
915 for (int i = 0; i < file->message_type_count(); ++i) {
916 if (HasEnumDefinitions(file->message_type(i))) return true;
921 bool IsStringOrMessage(const FieldDescriptor* field) {
922 switch (field->cpp_type()) {
923 case FieldDescriptor::CPPTYPE_INT32:
924 case FieldDescriptor::CPPTYPE_INT64:
925 case FieldDescriptor::CPPTYPE_UINT32:
926 case FieldDescriptor::CPPTYPE_UINT64:
927 case FieldDescriptor::CPPTYPE_DOUBLE:
928 case FieldDescriptor::CPPTYPE_FLOAT:
929 case FieldDescriptor::CPPTYPE_BOOL:
930 case FieldDescriptor::CPPTYPE_ENUM:
932 case FieldDescriptor::CPPTYPE_STRING:
933 case FieldDescriptor::CPPTYPE_MESSAGE:
937 GOOGLE_LOG(FATAL) << "Can't get here.";
941 FieldOptions::CType EffectiveStringCType(const FieldDescriptor* field,
942 const Options& options) {
943 GOOGLE_DCHECK(field->cpp_type() == FieldDescriptor::CPPTYPE_STRING);
944 if (options.opensource_runtime) {
945 // Open-source protobuf release only supports STRING ctype.
946 return FieldOptions::STRING;
948 // Google-internal supports all ctypes.
949 return field->options().ctype();
953 bool IsAnyMessage(const FileDescriptor* descriptor, const Options& options) {
954 return descriptor->name() == kAnyProtoFile;
957 bool IsAnyMessage(const Descriptor* descriptor, const Options& options) {
958 return descriptor->name() == kAnyMessageName &&
959 IsAnyMessage(descriptor->file(), options);
962 bool IsWellKnownMessage(const FileDescriptor* file) {
963 static const std::unordered_set<std::string> well_known_files{
964 "google/protobuf/any.proto",
965 "google/protobuf/api.proto",
966 "google/protobuf/compiler/plugin.proto",
967 "google/protobuf/descriptor.proto",
968 "google/protobuf/duration.proto",
969 "google/protobuf/empty.proto",
970 "google/protobuf/field_mask.proto",
971 "google/protobuf/source_context.proto",
972 "google/protobuf/struct.proto",
973 "google/protobuf/timestamp.proto",
974 "google/protobuf/type.proto",
975 "google/protobuf/wrappers.proto",
977 return well_known_files.find(file->name()) != well_known_files.end();
981 STRICT = 0, // Parsing will fail if non UTF-8 data is in string fields.
982 VERIFY = 1, // Only log an error but parsing will succeed.
983 NONE = 2, // No UTF-8 check.
986 static bool FieldEnforceUtf8(const FieldDescriptor* field,
987 const Options& options) {
991 static bool FileUtf8Verification(const FileDescriptor* file,
992 const Options& options) {
996 // Which level of UTF-8 enforcemant is placed on this file.
997 static Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field,
998 const Options& options) {
999 if (field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3 &&
1000 FieldEnforceUtf8(field, options)) {
1002 } else if (GetOptimizeFor(field->file(), options) !=
1003 FileOptions::LITE_RUNTIME &&
1004 FileUtf8Verification(field->file(), options)) {
1011 std::string GetUtf8Suffix(const FieldDescriptor* field,
1012 const Options& options) {
1013 switch (GetUtf8CheckMode(field, options)) {
1017 return "UTF8Verify";
1019 default: // Some build configs warn on missing return without default.
1024 static void GenerateUtf8CheckCode(const FieldDescriptor* field,
1025 const Options& options, bool for_parse,
1026 const char* parameters,
1027 const char* strict_function,
1028 const char* verify_function,
1029 const Formatter& format) {
1030 switch (GetUtf8CheckMode(field, options)) {
1035 format("::$proto_ns$::internal::WireFormatLite::$1$(\n", strict_function);
1039 format("::$proto_ns$::internal::WireFormatLite::PARSE,\n");
1041 format("::$proto_ns$::internal::WireFormatLite::SERIALIZE,\n");
1043 format("\"$1$\")", field->full_name());
1052 format("::$proto_ns$::internal::WireFormat::$1$(\n", verify_function);
1056 format("::$proto_ns$::internal::WireFormat::PARSE,\n");
1058 format("::$proto_ns$::internal::WireFormat::SERIALIZE,\n");
1060 format("\"$1$\");\n", field->full_name());
1069 void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
1070 const Options& options, bool for_parse,
1071 const char* parameters,
1072 const Formatter& format) {
1073 GenerateUtf8CheckCode(field, options, for_parse, parameters,
1074 "VerifyUtf8String", "VerifyUTF8StringNamedField",
1078 void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
1079 const Options& options, bool for_parse,
1080 const char* parameters,
1081 const Formatter& format) {
1082 GenerateUtf8CheckCode(field, options, for_parse, parameters, "VerifyUtf8Cord",
1083 "VerifyUTF8CordNamedField", format);
1088 void Flatten(const Descriptor* descriptor,
1089 std::vector<const Descriptor*>* flatten) {
1090 for (int i = 0; i < descriptor->nested_type_count(); i++)
1091 Flatten(descriptor->nested_type(i), flatten);
1092 flatten->push_back(descriptor);
1097 void FlattenMessagesInFile(const FileDescriptor* file,
1098 std::vector<const Descriptor*>* result) {
1099 for (int i = 0; i < file->message_type_count(); i++) {
1100 Flatten(file->message_type(i), result);
1104 bool HasWeakFields(const Descriptor* descriptor, const Options& options) {
1105 for (int i = 0; i < descriptor->field_count(); i++) {
1106 if (IsWeak(descriptor->field(i), options)) return true;
1111 bool HasWeakFields(const FileDescriptor* file, const Options& options) {
1112 for (int i = 0; i < file->message_type_count(); ++i) {
1113 if (HasWeakFields(file->message_type(i), options)) return true;
1118 bool UsingImplicitWeakFields(const FileDescriptor* file,
1119 const Options& options) {
1120 return options.lite_implicit_weak_fields &&
1121 GetOptimizeFor(file, options) == FileOptions::LITE_RUNTIME;
1124 bool IsImplicitWeakField(const FieldDescriptor* field, const Options& options,
1125 MessageSCCAnalyzer* scc_analyzer) {
1126 return UsingImplicitWeakFields(field->file(), options) &&
1127 field->type() == FieldDescriptor::TYPE_MESSAGE &&
1128 !field->is_required() && !field->is_map() &&
1129 field->containing_oneof() == nullptr &&
1130 !IsWellKnownMessage(field->message_type()->file()) &&
1131 field->message_type()->file()->name() !=
1132 "net/proto2/proto/descriptor.proto" &&
1133 // We do not support implicit weak fields between messages in the same
1134 // strongly-connected component.
1135 scc_analyzer->GetSCC(field->containing_type()) !=
1136 scc_analyzer->GetSCC(field->message_type());
1139 MessageAnalysis MessageSCCAnalyzer::GetSCCAnalysis(const SCC* scc) {
1140 if (analysis_cache_.count(scc)) return analysis_cache_[scc];
1141 MessageAnalysis result{};
1142 for (int i = 0; i < scc->descriptors.size(); i++) {
1143 const Descriptor* descriptor = scc->descriptors[i];
1144 if (descriptor->extension_range_count() > 0) {
1145 result.contains_extension = true;
1146 // Extensions are found by looking up default_instance and extension
1147 // number in a map. So you'd maybe expect here
1148 // result.constructor_requires_initialization = true;
1149 // However the extension registration mechanism already makes sure
1150 // the default will be initialized.
1152 for (int i = 0; i < descriptor->field_count(); i++) {
1153 const FieldDescriptor* field = descriptor->field(i);
1154 if (field->is_required()) {
1155 result.contains_required = true;
1157 switch (field->type()) {
1158 case FieldDescriptor::TYPE_STRING:
1159 case FieldDescriptor::TYPE_BYTES: {
1160 result.constructor_requires_initialization = true;
1161 if (field->options().ctype() == FieldOptions::CORD) {
1162 result.contains_cord = true;
1166 case FieldDescriptor::TYPE_GROUP:
1167 case FieldDescriptor::TYPE_MESSAGE: {
1168 result.constructor_requires_initialization = true;
1169 const SCC* child = analyzer_.GetSCC(field->message_type());
1171 MessageAnalysis analysis = GetSCCAnalysis(child);
1172 result.contains_cord |= analysis.contains_cord;
1173 result.contains_extension |= analysis.contains_extension;
1174 if (!ShouldIgnoreRequiredFieldCheck(field, options_)) {
1175 result.contains_required |= analysis.contains_required;
1178 // This field points back into the same SCC hence the messages
1179 // in the SCC are recursive. Note if SCC contains more than two
1180 // nodes it has to be recursive, however this test also works for
1181 // a single node that is recursive.
1182 result.is_recursive = true;
1191 // We deliberately only insert the result here. After we contracted the SCC
1192 // in the graph, the graph should be a DAG. Hence we shouldn't need to mark
1193 // nodes visited as we can never return to them. By inserting them here
1194 // we will go in an infinite loop if the SCC is not correct.
1195 return analysis_cache_[scc] = result;
1198 void ListAllFields(const Descriptor* d,
1199 std::vector<const FieldDescriptor*>* fields) {
1200 // Collect sub messages
1201 for (int i = 0; i < d->nested_type_count(); i++) {
1202 ListAllFields(d->nested_type(i), fields);
1204 // Collect message level extensions.
1205 for (int i = 0; i < d->extension_count(); i++) {
1206 fields->push_back(d->extension(i));
1208 // Add types of fields necessary
1209 for (int i = 0; i < d->field_count(); i++) {
1210 fields->push_back(d->field(i));
1214 void ListAllFields(const FileDescriptor* d,
1215 std::vector<const FieldDescriptor*>* fields) {
1216 // Collect file level message.
1217 for (int i = 0; i < d->message_type_count(); i++) {
1218 ListAllFields(d->message_type(i), fields);
1220 // Collect message level extensions.
1221 for (int i = 0; i < d->extension_count(); i++) {
1222 fields->push_back(d->extension(i));
1226 void ListAllTypesForServices(const FileDescriptor* fd,
1227 std::vector<const Descriptor*>* types) {
1228 for (int i = 0; i < fd->service_count(); i++) {
1229 const ServiceDescriptor* sd = fd->service(i);
1230 for (int j = 0; j < sd->method_count(); j++) {
1231 const MethodDescriptor* method = sd->method(j);
1232 types->push_back(method->input_type());
1233 types->push_back(method->output_type());
1238 bool GetBootstrapBasename(const Options& options, const std::string& basename,
1239 std::string* bootstrap_basename) {
1240 if (options.opensource_runtime) {
1244 std::unordered_map<std::string, std::string> bootstrap_mapping{
1245 {"net/proto2/proto/descriptor",
1246 "net/proto2/internal/descriptor"},
1247 {"net/proto2/compiler/proto/plugin",
1248 "net/proto2/compiler/proto/plugin"},
1249 {"net/proto2/compiler/proto/profile",
1250 "net/proto2/compiler/proto/profile_bootstrap"},
1252 auto iter = bootstrap_mapping.find(basename);
1253 if (iter == bootstrap_mapping.end()) {
1254 *bootstrap_basename = basename;
1257 *bootstrap_basename = iter->second;
1262 bool IsBootstrapProto(const Options& options, const FileDescriptor* file) {
1263 std::string my_name = StripProto(file->name());
1264 return GetBootstrapBasename(options, my_name, &my_name);
1267 bool MaybeBootstrap(const Options& options, GeneratorContext* generator_context,
1268 bool bootstrap_flag, std::string* basename) {
1269 std::string bootstrap_basename;
1270 if (!GetBootstrapBasename(options, *basename, &bootstrap_basename)) {
1274 if (bootstrap_flag) {
1275 // Adjust basename, but don't abort code generation.
1276 *basename = bootstrap_basename;
1279 std::string forward_to_basename = bootstrap_basename;
1281 // Generate forwarding headers and empty .pb.cc.
1283 std::unique_ptr<io::ZeroCopyOutputStream> output(
1284 generator_context->Open(*basename + ".pb.h"));
1285 io::Printer printer(output.get(), '$', nullptr);
1287 "#ifndef PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n"
1288 "#define PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n"
1289 "#include \"$forward_to_basename$.pb.h\" // IWYU pragma: export\n"
1290 "#endif // PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n",
1291 "forward_to_basename", forward_to_basename, "filename_identifier",
1292 FilenameIdentifier(*basename));
1294 if (!options.opensource_runtime) {
1295 // HACK HACK HACK, tech debt from the deeps of proto1 and SWIG
1296 // protocoltype is SWIG'ed and we need to forward
1297 if (*basename == "net/proto/protocoltype") {
1300 "%include \"$forward_to_basename$.pb.h\"\n"
1302 "forward_to_basename", forward_to_basename);
1308 std::unique_ptr<io::ZeroCopyOutputStream> output(
1309 generator_context->Open(*basename + ".proto.h"));
1310 io::Printer printer(output.get(), '$', nullptr);
1312 "#ifndef PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n"
1313 "#define PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n"
1314 "#include \"$forward_to_basename$.proto.h\" // IWYU pragma: "
1317 "PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n",
1318 "forward_to_basename", forward_to_basename, "filename_identifier",
1319 FilenameIdentifier(*basename));
1323 std::unique_ptr<io::ZeroCopyOutputStream> output(
1324 generator_context->Open(*basename + ".pb.cc"));
1325 io::Printer printer(output.get(), '$', nullptr);
1326 printer.Print("\n");
1330 std::unique_ptr<io::ZeroCopyOutputStream> output(
1331 generator_context->Open(*basename + ".pb.h.meta"));
1335 std::unique_ptr<io::ZeroCopyOutputStream> output(
1336 generator_context->Open(*basename + ".proto.h.meta"));
1339 // Abort code generation.
1344 class ParseLoopGenerator {
1346 ParseLoopGenerator(int num_hasbits, const Options& options,
1347 MessageSCCAnalyzer* scc_analyzer, io::Printer* printer)
1348 : scc_analyzer_(scc_analyzer),
1351 num_hasbits_(num_hasbits) {}
1353 void GenerateParserLoop(const Descriptor* descriptor) {
1354 format_.Set("classname", ClassName(descriptor));
1355 format_.Set("p_ns", "::" + ProtobufNamespace(options_));
1356 format_.Set("pi_ns",
1357 StrCat("::", ProtobufNamespace(options_), "::internal"));
1358 format_.Set("GOOGLE_PROTOBUF", MacroPrefix(options_));
1359 std::map<std::string, std::string> vars;
1360 SetCommonVars(options_, &vars);
1361 format_.AddMap(vars);
1363 std::vector<const FieldDescriptor*> ordered_fields;
1364 for (auto field : FieldRange(descriptor)) {
1365 ordered_fields.push_back(field);
1367 std::sort(ordered_fields.begin(), ordered_fields.end(),
1368 [](const FieldDescriptor* a, const FieldDescriptor* b) {
1369 return a->number() < b->number();
1373 "const char* $classname$::_InternalParse(const char* ptr, "
1374 "$pi_ns$::ParseContext* ctx) {\n"
1375 "#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure\n");
1377 int hasbits_size = 0;
1378 if (HasFieldPresence(descriptor->file())) {
1379 hasbits_size = (num_hasbits_ + 31) / 32;
1381 // For now only optimize small hasbits.
1382 if (hasbits_size != 1) hasbits_size = 0;
1384 format_("HasBitSetters::HasBits has_bits{};\n");
1385 format_.Set("has_bits", "has_bits");
1387 format_.Set("has_bits", "_has_bits_");
1390 if (descriptor->file()->options().cc_enable_arenas()) {
1391 format_("$p_ns$::Arena* arena = GetArenaNoVirtual(); (void)arena;\n");
1393 GenerateParseLoop(descriptor, ordered_fields);
1395 format_("success:\n");
1396 if (hasbits_size) format_(" _has_bits_.Or(has_bits);\n");
1407 MessageSCCAnalyzer* scc_analyzer_;
1408 const Options& options_;
1412 using WireFormat = internal::WireFormat;
1413 using WireFormatLite = internal::WireFormatLite;
1415 void GenerateArenaString(const FieldDescriptor* field,
1416 const std::string& utf8, std::string field_name) {
1417 if (!field_name.empty()) {
1418 format_("static const char kFieldName[] = $1$;\n",
1419 field_name.substr(2)); // remove ", "
1420 field_name = ", kFieldName";
1422 if (HasFieldPresence(field->file())) {
1423 format_("HasBitSetters::set_has_$1$(&$has_bits$);\n", FieldName(field));
1425 string default_string =
1426 field->default_value_string().empty()
1427 ? "::" + ProtobufNamespace(options_) +
1428 "::internal::GetEmptyStringAlreadyInited()"
1429 : QualifiedClassName(field->containing_type(), options_) +
1430 "::" + MakeDefaultName(field) + ".get()";
1432 "if (arena != nullptr) {\n"
1433 " ptr = $pi_ns$::InlineCopyIntoArenaString$1$(&$2$_, ptr, ctx, "
1437 "$pi_ns$::InlineGreedyStringParser$1$($2$_.MutableNoArenaNoDefault(&$4$"
1440 utf8, FieldName(field), field_name, default_string);
1443 void GenerateStrings(const FieldDescriptor* field, bool check_utf8) {
1445 std::string field_name;
1447 utf8 = GetUtf8Suffix(field, options_);
1448 if (!utf8.empty()) {
1449 field_name = ", nullptr";
1450 if (HasDescriptorMethods(field->file(), options_)) {
1451 field_name = StrCat(", \"", field->full_name(), "\"");
1455 FieldOptions::CType ctype = FieldOptions::STRING;
1456 if (!options_.opensource_runtime) {
1457 // Open source doesn't support other ctypes;
1458 ctype = field->options().ctype();
1460 if (field->file()->options().cc_enable_arenas() && !field->is_repeated() &&
1461 !options_.opensource_runtime &&
1462 GetOptimizeFor(field->file(), options_) != FileOptions::LITE_RUNTIME &&
1463 // For now only use arena string for strings with empty defaults.
1464 field->default_value_string().empty() &&
1465 !IsStringInlined(field, options_) &&
1466 field->containing_oneof() == nullptr && ctype == FieldOptions::STRING) {
1467 GenerateArenaString(field, utf8, field_name);
1472 case FieldOptions::STRING:
1473 name = "GreedyStringParser" + utf8;
1475 case FieldOptions::CORD:
1476 name = "CordParser" + utf8;
1478 case FieldOptions::STRING_PIECE:
1479 name = "StringPieceParser" + utf8;
1482 format_("ptr = $pi_ns$::Inline$1$($2$_$3$(), ptr, ctx$4$);\n", name,
1483 field->is_repeated() && !field->is_packable() ? "add" : "mutable",
1484 FieldName(field), field_name);
1487 void GenerateLengthDelim(const FieldDescriptor* field) {
1488 if (field->is_packable()) {
1489 std::string enum_validator;
1490 if (field->type() == FieldDescriptor::TYPE_ENUM &&
1491 !HasPreservingUnknownEnumSemantics(field)) {
1492 enum_validator = StrCat(
1493 ", ", QualifiedClassName(field->enum_type(), options_),
1494 "_IsValid, mutable_unknown_fields(), ", field->number());
1496 format_("ptr = $pi_ns$::Packed$1$Parser(mutable_$2$(), ptr, ctx$3$);\n",
1497 DeclaredTypeMethodName(field->type()), FieldName(field),
1500 auto field_type = field->type();
1501 switch (field_type) {
1502 case FieldDescriptor::TYPE_STRING:
1503 GenerateStrings(field, true /* utf8 */);
1505 case FieldDescriptor::TYPE_BYTES:
1506 GenerateStrings(field, false /* utf8 */);
1508 case FieldDescriptor::TYPE_MESSAGE: {
1509 if (field->is_map()) {
1510 const FieldDescriptor* val =
1511 field->message_type()->FindFieldByName("value");
1513 if (HasFieldPresence(field->file()) &&
1514 val->type() == FieldDescriptor::TYPE_ENUM) {
1516 "auto object = ::$proto_ns$::internal::InitEnumParseWrapper("
1517 "&$1$_, $2$_IsValid, $3$, &_internal_metadata_);\n"
1518 "ptr = ctx->ParseMessage(&object, ptr);\n",
1519 FieldName(field), QualifiedClassName(val->enum_type()),
1522 format_("ptr = ctx->ParseMessage(&$1$_, ptr);\n",
1525 } else if (IsLazy(field, options_)) {
1526 if (field->containing_oneof() != nullptr) {
1528 "if (!has_$1$()) {\n"
1530 " $2$_.$1$_ = ::$proto_ns$::Arena::CreateMessage<\n"
1531 " $pi_ns$::LazyField>("
1532 "GetArenaNoVirtual());\n"
1535 "ptr = ctx->ParseMessage($2$_.$1$_, ptr);\n",
1536 FieldName(field), field->containing_oneof()->name());
1537 } else if (HasFieldPresence(field->file())) {
1539 "HasBitSetters::set_has_$1$(&$has_bits$);\n"
1540 "ptr = ctx->ParseMessage(&$1$_, ptr);\n",
1543 format_("ptr = ctx->ParseMessage(&$1$_, ptr);\n",
1546 } else if (IsImplicitWeakField(field, options_, scc_analyzer_)) {
1547 if (!field->is_repeated()) {
1549 "ptr = ctx->ParseMessage(HasBitSetters::mutable_$1$(this), "
1554 "ptr = ctx->ParseMessage("
1555 "CastToBase(&$1$_)->AddWeak(reinterpret_cast<const "
1556 "::$proto_ns$::MessageLite*>(&$2$::_$3$_default_instance_)), "
1558 FieldName(field), Namespace(field->message_type(), options_),
1559 ClassName(field->message_type()));
1561 } else if (IsWeak(field, options_)) {
1563 "ptr = ctx->ParseMessage(_weak_field_map_.MutableMessage($1$,"
1564 " _$classname$_default_instance_.$2$_), ptr);\n",
1565 field->number(), FieldName(field));
1567 format_("ptr = ctx->ParseMessage($1$_$2$(), ptr);\n",
1568 field->is_repeated() ? "add" : "mutable", FieldName(field));
1573 GOOGLE_LOG(FATAL) << "Illegal combination for length delimited wiretype "
1574 << " filed type is " << field->type();
1579 // Convert a 1 or 2 byte varint into the equivalent value upon a direct load.
1580 static uint32 SmallVarintValue(uint32 x) {
1581 GOOGLE_DCHECK(x < 128 * 128);
1582 if (x >= 128) x += (x & 0xFF80) + 128;
1586 static bool ShouldRepeat(const FieldDescriptor* descriptor,
1587 internal::WireFormatLite::WireType wiretype) {
1588 constexpr int kMaxTwoByteFieldNumber = 16 * 128;
1589 return descriptor->number() < kMaxTwoByteFieldNumber &&
1590 descriptor->is_repeated() &&
1591 (!descriptor->is_packable() ||
1592 wiretype != internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
1595 void GenerateFieldBody(internal::WireFormatLite::WireType wiretype,
1596 const FieldDescriptor* field) {
1597 uint32 tag = WireFormatLite::MakeTag(field->number(), wiretype);
1599 case WireFormatLite::WIRETYPE_VARINT: {
1600 std::string type = PrimitiveTypeName(options_, field->cpp_type());
1601 std::string prefix = field->is_repeated() ? "add" : "set";
1602 if (field->type() == FieldDescriptor::TYPE_ENUM) {
1604 "$uint64$ val = $pi_ns$::ReadVarint(&ptr);\n"
1606 if (!HasPreservingUnknownEnumSemantics(field)) {
1607 format_("if (PROTOBUF_PREDICT_TRUE($1$_IsValid(val))) {\n",
1608 QualifiedClassName(field->enum_type(), options_));
1611 format_("$1$_$2$(static_cast<$3$>(val));\n", prefix, FieldName(field),
1612 QualifiedClassName(field->enum_type(), options_));
1613 if (!HasPreservingUnknownEnumSemantics(field)) {
1617 " $pi_ns$::WriteVarint($1$, val, mutable_unknown_fields());\n"
1622 int size = field->type() == FieldDescriptor::TYPE_SINT32 ? 32 : 64;
1624 if ((field->type() == FieldDescriptor::TYPE_SINT32 ||
1625 field->type() == FieldDescriptor::TYPE_SINT64)) {
1626 zigzag = StrCat("ZigZag", size);
1628 if (field->is_repeated() || field->containing_oneof()) {
1629 string prefix = field->is_repeated() ? "add" : "set";
1631 "$1$_$2$($pi_ns$::ReadVarint$3$(&ptr));\n"
1633 prefix, FieldName(field), zigzag);
1635 if (HasFieldPresence(field->file())) {
1636 format_("HasBitSetters::set_has_$1$(&$has_bits$);\n",
1640 "$1$_ = $pi_ns$::ReadVarint$2$(&ptr);\n"
1642 FieldName(field), zigzag);
1647 case WireFormatLite::WIRETYPE_FIXED32:
1648 case WireFormatLite::WIRETYPE_FIXED64: {
1649 std::string type = PrimitiveTypeName(options_, field->cpp_type());
1650 if (field->is_repeated() || field->containing_oneof()) {
1651 string prefix = field->is_repeated() ? "add" : "set";
1653 "$1$_$2$($pi_ns$::UnalignedLoad<$3$>(ptr));\n"
1654 "ptr += sizeof($3$);\n",
1655 prefix, FieldName(field), type);
1657 if (HasFieldPresence(field->file())) {
1658 format_("HasBitSetters::set_has_$1$(&$has_bits$);\n",
1662 "$1$_ = $pi_ns$::UnalignedLoad<$2$>(ptr);\n"
1663 "ptr += sizeof($2$);\n",
1664 FieldName(field), type);
1668 case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: {
1669 GenerateLengthDelim(field);
1670 format_("CHK_(ptr);\n");
1673 case WireFormatLite::WIRETYPE_START_GROUP: {
1675 "ptr = ctx->ParseGroup($1$_$2$(), ptr, $3$);\n"
1677 field->is_repeated() ? "add" : "mutable", FieldName(field), tag);
1680 case WireFormatLite::WIRETYPE_END_GROUP: {
1681 GOOGLE_LOG(FATAL) << "Can't have end group field\n";
1684 } // switch (wire_type)
1687 // Returns the tag for this field and in case of repeated packable fields,
1688 // sets a fallback tag in fallback_tag_ptr.
1689 static uint32 ExpectedTag(const FieldDescriptor* field,
1690 uint32* fallback_tag_ptr) {
1691 uint32 expected_tag;
1692 if (field->is_packable()) {
1693 auto expected_wiretype = WireFormat::WireTypeForFieldType(field->type());
1695 WireFormatLite::MakeTag(field->number(), expected_wiretype);
1696 GOOGLE_CHECK(expected_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
1697 auto fallback_wiretype = WireFormatLite::WIRETYPE_LENGTH_DELIMITED;
1698 uint32 fallback_tag =
1699 WireFormatLite::MakeTag(field->number(), fallback_wiretype);
1701 if (field->is_packed()) std::swap(expected_tag, fallback_tag);
1702 *fallback_tag_ptr = fallback_tag;
1704 auto expected_wiretype = WireFormat::WireTypeForField(field);
1706 WireFormatLite::MakeTag(field->number(), expected_wiretype);
1708 return expected_tag;
1711 void GenerateParseLoop(
1712 const Descriptor* descriptor,
1713 const std::vector<const FieldDescriptor*>& ordered_fields) {
1715 "while (!ctx->Done(&ptr)) {\n"
1717 " ptr = $pi_ns$::ReadTag(ptr, &tag);\n"
1719 " switch (tag >> 3) {\n");
1724 for (const auto* field : ordered_fields) {
1725 // Print the field's (or oneof's) proto-syntax definition as a comment.
1726 // We don't want to print group bodies so we cut off after the first
1730 DebugStringOptions options;
1731 options.elide_group_body = true;
1732 options.elide_oneof_body = true;
1733 def = field->DebugStringWithOptions(options);
1734 def = def.substr(0, def.find_first_of('\n'));
1739 def, field->number());
1741 uint32 fallback_tag = 0;
1742 uint32 expected_tag = ExpectedTag(field, &fallback_tag);
1744 "if (PROTOBUF_PREDICT_TRUE(static_cast<$uint8$>(tag) == $1$)) {\n",
1745 expected_tag & 0xFF);
1747 auto wiretype = WireFormatLite::GetTagWireType(expected_tag);
1748 uint32 tag = WireFormatLite::MakeTag(field->number(), wiretype);
1749 int tag_size = io::CodedOutputStream::VarintSize32(tag);
1750 bool is_repeat = ShouldRepeat(field, wiretype);
1759 GenerateFieldBody(wiretype, field);
1761 string type = tag_size == 2 ? "uint16" : "uint8";
1764 " if (!ctx->DataAvailable(ptr)) break;\n"
1765 "} while ($pi_ns$::UnalignedLoad<$1$>(ptr) == $2$);\n",
1766 IntTypeName(options_, type), SmallVarintValue(tag));
1770 format_("} else if (static_cast<$uint8$>(tag) == $1$) {\n",
1771 fallback_tag & 0xFF);
1773 GenerateFieldBody(WireFormatLite::GetTagWireType(fallback_tag), field);
1778 " } else goto handle_unusual;\n"
1780 } // for loop over ordered fields
1783 format_("default: {\n");
1784 if (!ordered_fields.empty()) format_("handle_unusual:\n");
1786 " if ((tag & 7) == 4 || tag == 0) {\n"
1787 " ctx->SetLastTag(tag);\n"
1790 if (IsMapEntryMessage(descriptor)) {
1791 format_(" continue;\n");
1793 if (descriptor->extension_range_count() > 0) {
1795 for (int i = 0; i < descriptor->extension_range_count(); i++) {
1796 const Descriptor::ExtensionRange* range =
1797 descriptor->extension_range(i);
1798 if (i > 0) format_(" ||\n ");
1800 uint32 start_tag = WireFormatLite::MakeTag(
1801 range->start, static_cast<WireFormatLite::WireType>(0));
1802 uint32 end_tag = WireFormatLite::MakeTag(
1803 range->end, static_cast<WireFormatLite::WireType>(0));
1805 if (range->end > FieldDescriptor::kMaxNumber) {
1806 format_("($1$u <= tag)", start_tag);
1808 format_("($1$u <= tag && tag < $2$u)", start_tag, end_tag);
1813 " ptr = _extensions_.ParseField(tag, ptr,\n"
1814 " internal_default_instance(), &_internal_metadata_, ctx);\n"
1815 " CHK_(ptr != nullptr);\n"
1820 " ptr = UnknownFieldParse(tag, &_internal_metadata_, ptr, ctx);\n"
1821 " CHK_(ptr != nullptr);\n"
1824 format_("}\n"); // default case
1833 void GenerateParserLoop(const Descriptor* descriptor, int num_hasbits,
1834 const Options& options,
1835 MessageSCCAnalyzer* scc_analyzer,
1836 io::Printer* printer) {
1837 ParseLoopGenerator generator(num_hasbits, options, scc_analyzer, printer);
1838 generator.GenerateParserLoop(descriptor);
1842 } // namespace compiler
1843 } // namespace protobuf
1844 } // namespace google