tizen 2.3.1 release
[external/protobuf.git] / src / google / protobuf / wire_format.cc
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34
35 #include <stack>
36 #include <string>
37 #include <vector>
38
39 #include <google/protobuf/wire_format.h>
40
41 #include <google/protobuf/stubs/common.h>
42 #include <google/protobuf/stubs/stringprintf.h>
43 #include <google/protobuf/descriptor.h>
44 #include <google/protobuf/wire_format_lite_inl.h>
45 #include <google/protobuf/descriptor.pb.h>
46 #include <google/protobuf/io/coded_stream.h>
47 #include <google/protobuf/io/zero_copy_stream.h>
48 #include <google/protobuf/io/zero_copy_stream_impl.h>
49 #include <google/protobuf/unknown_field_set.h>
50
51
52
53 namespace google {
54 namespace protobuf {
55 namespace internal {
56
57 namespace {
58
59 // This function turns out to be convenient when using some macros later.
60 inline int GetEnumNumber(const EnumValueDescriptor* descriptor) {
61   return descriptor->number();
62 }
63
64 }  // anonymous namespace
65
66 // ===================================================================
67
68 bool UnknownFieldSetFieldSkipper::SkipField(
69     io::CodedInputStream* input, uint32 tag) {
70   return WireFormat::SkipField(input, tag, unknown_fields_);
71 }
72
73 bool UnknownFieldSetFieldSkipper::SkipMessage(io::CodedInputStream* input) {
74   return WireFormat::SkipMessage(input, unknown_fields_);
75 }
76
77 void UnknownFieldSetFieldSkipper::SkipUnknownEnum(
78     int field_number, int value) {
79   unknown_fields_->AddVarint(field_number, value);
80 }
81
82 bool WireFormat::SkipField(io::CodedInputStream* input, uint32 tag,
83                            UnknownFieldSet* unknown_fields) {
84   int number = WireFormatLite::GetTagFieldNumber(tag);
85
86   switch (WireFormatLite::GetTagWireType(tag)) {
87     case WireFormatLite::WIRETYPE_VARINT: {
88       uint64 value;
89       if (!input->ReadVarint64(&value)) return false;
90       if (unknown_fields != NULL) unknown_fields->AddVarint(number, value);
91       return true;
92     }
93     case WireFormatLite::WIRETYPE_FIXED64: {
94       uint64 value;
95       if (!input->ReadLittleEndian64(&value)) return false;
96       if (unknown_fields != NULL) unknown_fields->AddFixed64(number, value);
97       return true;
98     }
99     case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: {
100       uint32 length;
101       if (!input->ReadVarint32(&length)) return false;
102       if (unknown_fields == NULL) {
103         if (!input->Skip(length)) return false;
104       } else {
105         if (!input->ReadString(unknown_fields->AddLengthDelimited(number),
106                                length)) {
107           return false;
108         }
109       }
110       return true;
111     }
112     case WireFormatLite::WIRETYPE_START_GROUP: {
113       if (!input->IncrementRecursionDepth()) return false;
114       if (!SkipMessage(input, (unknown_fields == NULL) ?
115                               NULL : unknown_fields->AddGroup(number))) {
116         return false;
117       }
118       input->DecrementRecursionDepth();
119       // Check that the ending tag matched the starting tag.
120       if (!input->LastTagWas(WireFormatLite::MakeTag(
121           WireFormatLite::GetTagFieldNumber(tag),
122           WireFormatLite::WIRETYPE_END_GROUP))) {
123         return false;
124       }
125       return true;
126     }
127     case WireFormatLite::WIRETYPE_END_GROUP: {
128       return false;
129     }
130     case WireFormatLite::WIRETYPE_FIXED32: {
131       uint32 value;
132       if (!input->ReadLittleEndian32(&value)) return false;
133       if (unknown_fields != NULL) unknown_fields->AddFixed32(number, value);
134       return true;
135     }
136     default: {
137       return false;
138     }
139   }
140 }
141
142 bool WireFormat::SkipMessage(io::CodedInputStream* input,
143                              UnknownFieldSet* unknown_fields) {
144   while(true) {
145     uint32 tag = input->ReadTag();
146     if (tag == 0) {
147       // End of input.  This is a valid place to end, so return true.
148       return true;
149     }
150
151     WireFormatLite::WireType wire_type = WireFormatLite::GetTagWireType(tag);
152
153     if (wire_type == WireFormatLite::WIRETYPE_END_GROUP) {
154       // Must be the end of the message.
155       return true;
156     }
157
158     if (!SkipField(input, tag, unknown_fields)) return false;
159   }
160 }
161
162 void WireFormat::SerializeUnknownFields(const UnknownFieldSet& unknown_fields,
163                                         io::CodedOutputStream* output) {
164   for (int i = 0; i < unknown_fields.field_count(); i++) {
165     const UnknownField& field = unknown_fields.field(i);
166     switch (field.type()) {
167       case UnknownField::TYPE_VARINT:
168         output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
169             WireFormatLite::WIRETYPE_VARINT));
170         output->WriteVarint64(field.varint());
171         break;
172       case UnknownField::TYPE_FIXED32:
173         output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
174             WireFormatLite::WIRETYPE_FIXED32));
175         output->WriteLittleEndian32(field.fixed32());
176         break;
177       case UnknownField::TYPE_FIXED64:
178         output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
179             WireFormatLite::WIRETYPE_FIXED64));
180         output->WriteLittleEndian64(field.fixed64());
181         break;
182       case UnknownField::TYPE_LENGTH_DELIMITED:
183         output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
184             WireFormatLite::WIRETYPE_LENGTH_DELIMITED));
185         output->WriteVarint32(field.length_delimited().size());
186         output->WriteRawMaybeAliased(field.length_delimited().data(),
187                                      field.length_delimited().size());
188         break;
189       case UnknownField::TYPE_GROUP:
190         output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
191             WireFormatLite::WIRETYPE_START_GROUP));
192         SerializeUnknownFields(field.group(), output);
193         output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
194             WireFormatLite::WIRETYPE_END_GROUP));
195         break;
196     }
197   }
198 }
199
200 uint8* WireFormat::SerializeUnknownFieldsToArray(
201     const UnknownFieldSet& unknown_fields,
202     uint8* target) {
203   for (int i = 0; i < unknown_fields.field_count(); i++) {
204     const UnknownField& field = unknown_fields.field(i);
205
206     switch (field.type()) {
207       case UnknownField::TYPE_VARINT:
208         target = WireFormatLite::WriteInt64ToArray(
209             field.number(), field.varint(), target);
210         break;
211       case UnknownField::TYPE_FIXED32:
212         target = WireFormatLite::WriteFixed32ToArray(
213             field.number(), field.fixed32(), target);
214         break;
215       case UnknownField::TYPE_FIXED64:
216         target = WireFormatLite::WriteFixed64ToArray(
217             field.number(), field.fixed64(), target);
218         break;
219       case UnknownField::TYPE_LENGTH_DELIMITED:
220         target = WireFormatLite::WriteBytesToArray(
221             field.number(), field.length_delimited(), target);
222         break;
223       case UnknownField::TYPE_GROUP:
224         target = WireFormatLite::WriteTagToArray(
225             field.number(), WireFormatLite::WIRETYPE_START_GROUP, target);
226         target = SerializeUnknownFieldsToArray(field.group(), target);
227         target = WireFormatLite::WriteTagToArray(
228             field.number(), WireFormatLite::WIRETYPE_END_GROUP, target);
229         break;
230     }
231   }
232   return target;
233 }
234
235 void WireFormat::SerializeUnknownMessageSetItems(
236     const UnknownFieldSet& unknown_fields,
237     io::CodedOutputStream* output) {
238   for (int i = 0; i < unknown_fields.field_count(); i++) {
239     const UnknownField& field = unknown_fields.field(i);
240     // The only unknown fields that are allowed to exist in a MessageSet are
241     // messages, which are length-delimited.
242     if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
243       // Start group.
244       output->WriteVarint32(WireFormatLite::kMessageSetItemStartTag);
245
246       // Write type ID.
247       output->WriteVarint32(WireFormatLite::kMessageSetTypeIdTag);
248       output->WriteVarint32(field.number());
249
250       // Write message.
251       output->WriteVarint32(WireFormatLite::kMessageSetMessageTag);
252       field.SerializeLengthDelimitedNoTag(output);
253
254       // End group.
255       output->WriteVarint32(WireFormatLite::kMessageSetItemEndTag);
256     }
257   }
258 }
259
260 uint8* WireFormat::SerializeUnknownMessageSetItemsToArray(
261     const UnknownFieldSet& unknown_fields,
262     uint8* target) {
263   for (int i = 0; i < unknown_fields.field_count(); i++) {
264     const UnknownField& field = unknown_fields.field(i);
265
266     // The only unknown fields that are allowed to exist in a MessageSet are
267     // messages, which are length-delimited.
268     if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
269       // Start group.
270       target = io::CodedOutputStream::WriteTagToArray(
271           WireFormatLite::kMessageSetItemStartTag, target);
272
273       // Write type ID.
274       target = io::CodedOutputStream::WriteTagToArray(
275           WireFormatLite::kMessageSetTypeIdTag, target);
276       target = io::CodedOutputStream::WriteVarint32ToArray(
277           field.number(), target);
278
279       // Write message.
280       target = io::CodedOutputStream::WriteTagToArray(
281           WireFormatLite::kMessageSetMessageTag, target);
282       target = field.SerializeLengthDelimitedNoTagToArray(target);
283
284       // End group.
285       target = io::CodedOutputStream::WriteTagToArray(
286           WireFormatLite::kMessageSetItemEndTag, target);
287     }
288   }
289
290   return target;
291 }
292
293 int WireFormat::ComputeUnknownFieldsSize(
294     const UnknownFieldSet& unknown_fields) {
295   int size = 0;
296   for (int i = 0; i < unknown_fields.field_count(); i++) {
297     const UnknownField& field = unknown_fields.field(i);
298
299     switch (field.type()) {
300       case UnknownField::TYPE_VARINT:
301         size += io::CodedOutputStream::VarintSize32(
302             WireFormatLite::MakeTag(field.number(),
303             WireFormatLite::WIRETYPE_VARINT));
304         size += io::CodedOutputStream::VarintSize64(field.varint());
305         break;
306       case UnknownField::TYPE_FIXED32:
307         size += io::CodedOutputStream::VarintSize32(
308             WireFormatLite::MakeTag(field.number(),
309             WireFormatLite::WIRETYPE_FIXED32));
310         size += sizeof(int32);
311         break;
312       case UnknownField::TYPE_FIXED64:
313         size += io::CodedOutputStream::VarintSize32(
314             WireFormatLite::MakeTag(field.number(),
315             WireFormatLite::WIRETYPE_FIXED64));
316         size += sizeof(int64);
317         break;
318       case UnknownField::TYPE_LENGTH_DELIMITED:
319         size += io::CodedOutputStream::VarintSize32(
320             WireFormatLite::MakeTag(field.number(),
321             WireFormatLite::WIRETYPE_LENGTH_DELIMITED));
322         size += io::CodedOutputStream::VarintSize32(
323             field.length_delimited().size());
324         size += field.length_delimited().size();
325         break;
326       case UnknownField::TYPE_GROUP:
327         size += io::CodedOutputStream::VarintSize32(
328             WireFormatLite::MakeTag(field.number(),
329             WireFormatLite::WIRETYPE_START_GROUP));
330         size += ComputeUnknownFieldsSize(field.group());
331         size += io::CodedOutputStream::VarintSize32(
332             WireFormatLite::MakeTag(field.number(),
333             WireFormatLite::WIRETYPE_END_GROUP));
334         break;
335     }
336   }
337
338   return size;
339 }
340
341 int WireFormat::ComputeUnknownMessageSetItemsSize(
342     const UnknownFieldSet& unknown_fields) {
343   int size = 0;
344   for (int i = 0; i < unknown_fields.field_count(); i++) {
345     const UnknownField& field = unknown_fields.field(i);
346
347     // The only unknown fields that are allowed to exist in a MessageSet are
348     // messages, which are length-delimited.
349     if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
350       size += WireFormatLite::kMessageSetItemTagsSize;
351       size += io::CodedOutputStream::VarintSize32(field.number());
352
353       int field_size = field.GetLengthDelimitedSize();
354       size += io::CodedOutputStream::VarintSize32(field_size);
355       size += field_size;
356     }
357   }
358
359   return size;
360 }
361
362 // ===================================================================
363
364 bool WireFormat::ParseAndMergePartial(io::CodedInputStream* input,
365                                       Message* message) {
366   const Descriptor* descriptor = message->GetDescriptor();
367   const Reflection* message_reflection = message->GetReflection();
368
369   while(true) {
370     uint32 tag = input->ReadTag();
371     if (tag == 0) {
372       // End of input.  This is a valid place to end, so return true.
373       return true;
374     }
375
376     if (WireFormatLite::GetTagWireType(tag) ==
377         WireFormatLite::WIRETYPE_END_GROUP) {
378       // Must be the end of the message.
379       return true;
380     }
381
382     const FieldDescriptor* field = NULL;
383
384     if (descriptor != NULL) {
385       int field_number = WireFormatLite::GetTagFieldNumber(tag);
386       field = descriptor->FindFieldByNumber(field_number);
387
388       // If that failed, check if the field is an extension.
389       if (field == NULL && descriptor->IsExtensionNumber(field_number)) {
390         if (input->GetExtensionPool() == NULL) {
391           field = message_reflection->FindKnownExtensionByNumber(field_number);
392         } else {
393           field = input->GetExtensionPool()
394                        ->FindExtensionByNumber(descriptor, field_number);
395         }
396       }
397
398       // If that failed, but we're a MessageSet, and this is the tag for a
399       // MessageSet item, then parse that.
400       if (field == NULL &&
401           descriptor->options().message_set_wire_format() &&
402           tag == WireFormatLite::kMessageSetItemStartTag) {
403         if (!ParseAndMergeMessageSetItem(input, message)) {
404           return false;
405         }
406         continue;  // Skip ParseAndMergeField(); already taken care of.
407       }
408     }
409
410     if (!ParseAndMergeField(tag, field, message, input)) {
411       return false;
412     }
413   }
414 }
415
416 bool WireFormat::SkipMessageSetField(io::CodedInputStream* input,
417                                      uint32 field_number,
418                                      UnknownFieldSet* unknown_fields) {
419   uint32 length;
420   if (!input->ReadVarint32(&length)) return false;
421   return input->ReadString(
422       unknown_fields->AddLengthDelimited(field_number), length);
423 }
424
425 bool WireFormat::ParseAndMergeMessageSetField(uint32 field_number,
426                                               const FieldDescriptor* field,
427                                               Message* message,
428                                               io::CodedInputStream* input) {
429   const Reflection* message_reflection = message->GetReflection();
430   if (field == NULL) {
431     // We store unknown MessageSet extensions as groups.
432     return SkipMessageSetField(
433         input, field_number, message_reflection->MutableUnknownFields(message));
434   } else if (field->is_repeated() ||
435              field->type() != FieldDescriptor::TYPE_MESSAGE) {
436     // This shouldn't happen as we only allow optional message extensions to
437     // MessageSet.
438     GOOGLE_LOG(ERROR) << "Extensions of MessageSets must be optional messages.";
439     return false;
440   } else {
441     Message* sub_message = message_reflection->MutableMessage(
442         message, field, input->GetExtensionFactory());
443     return WireFormatLite::ReadMessage(input, sub_message);
444   }
445 }
446
447 bool WireFormat::ParseAndMergeField(
448     uint32 tag,
449     const FieldDescriptor* field,        // May be NULL for unknown
450     Message* message,
451     io::CodedInputStream* input) {
452   const Reflection* message_reflection = message->GetReflection();
453
454   enum { UNKNOWN, NORMAL_FORMAT, PACKED_FORMAT } value_format;
455
456   if (field == NULL) {
457     value_format = UNKNOWN;
458   } else if (WireFormatLite::GetTagWireType(tag) ==
459              WireTypeForFieldType(field->type())) {
460     value_format = NORMAL_FORMAT;
461   } else if (field->is_packable() &&
462              WireFormatLite::GetTagWireType(tag) ==
463              WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
464     value_format = PACKED_FORMAT;
465   } else {
466     // We don't recognize this field. Either the field number is unknown
467     // or the wire type doesn't match. Put it in our unknown field set.
468     value_format = UNKNOWN;
469   }
470
471   if (value_format == UNKNOWN) {
472     return SkipField(input, tag,
473                      message_reflection->MutableUnknownFields(message));
474   } else if (value_format == PACKED_FORMAT) {
475     uint32 length;
476     if (!input->ReadVarint32(&length)) return false;
477     io::CodedInputStream::Limit limit = input->PushLimit(length);
478
479     switch (field->type()) {
480 #define HANDLE_PACKED_TYPE(TYPE, CPPTYPE, CPPTYPE_METHOD)                      \
481       case FieldDescriptor::TYPE_##TYPE: {                                     \
482         while (input->BytesUntilLimit() > 0) {                                 \
483           CPPTYPE value;                                                       \
484           if (!WireFormatLite::ReadPrimitive<                                  \
485                 CPPTYPE, WireFormatLite::TYPE_##TYPE>(input, &value))          \
486             return false;                                                      \
487           message_reflection->Add##CPPTYPE_METHOD(message, field, value);      \
488         }                                                                      \
489         break;                                                                 \
490       }
491
492       HANDLE_PACKED_TYPE( INT32,  int32,  Int32)
493       HANDLE_PACKED_TYPE( INT64,  int64,  Int64)
494       HANDLE_PACKED_TYPE(SINT32,  int32,  Int32)
495       HANDLE_PACKED_TYPE(SINT64,  int64,  Int64)
496       HANDLE_PACKED_TYPE(UINT32, uint32, UInt32)
497       HANDLE_PACKED_TYPE(UINT64, uint64, UInt64)
498
499       HANDLE_PACKED_TYPE( FIXED32, uint32, UInt32)
500       HANDLE_PACKED_TYPE( FIXED64, uint64, UInt64)
501       HANDLE_PACKED_TYPE(SFIXED32,  int32,  Int32)
502       HANDLE_PACKED_TYPE(SFIXED64,  int64,  Int64)
503
504       HANDLE_PACKED_TYPE(FLOAT , float , Float )
505       HANDLE_PACKED_TYPE(DOUBLE, double, Double)
506
507       HANDLE_PACKED_TYPE(BOOL, bool, Bool)
508 #undef HANDLE_PACKED_TYPE
509
510       case FieldDescriptor::TYPE_ENUM: {
511         while (input->BytesUntilLimit() > 0) {
512           int value;
513           if (!WireFormatLite::ReadPrimitive<int, WireFormatLite::TYPE_ENUM>(
514                   input, &value)) return false;
515           const EnumValueDescriptor* enum_value =
516               field->enum_type()->FindValueByNumber(value);
517           if (enum_value != NULL) {
518             message_reflection->AddEnum(message, field, enum_value);
519           }
520         }
521
522         break;
523       }
524
525       case FieldDescriptor::TYPE_STRING:
526       case FieldDescriptor::TYPE_GROUP:
527       case FieldDescriptor::TYPE_MESSAGE:
528       case FieldDescriptor::TYPE_BYTES:
529         // Can't have packed fields of these types: these should be caught by
530         // the protocol compiler.
531         return false;
532         break;
533     }
534
535     input->PopLimit(limit);
536   } else {
537     // Non-packed value (value_format == NORMAL_FORMAT)
538     switch (field->type()) {
539 #define HANDLE_TYPE(TYPE, CPPTYPE, CPPTYPE_METHOD)                            \
540       case FieldDescriptor::TYPE_##TYPE: {                                    \
541         CPPTYPE value;                                                        \
542         if (!WireFormatLite::ReadPrimitive<                                   \
543                 CPPTYPE, WireFormatLite::TYPE_##TYPE>(input, &value))         \
544           return false;                                                       \
545         if (field->is_repeated()) {                                           \
546           message_reflection->Add##CPPTYPE_METHOD(message, field, value);     \
547         } else {                                                              \
548           message_reflection->Set##CPPTYPE_METHOD(message, field, value);     \
549         }                                                                     \
550         break;                                                                \
551       }
552
553       HANDLE_TYPE( INT32,  int32,  Int32)
554       HANDLE_TYPE( INT64,  int64,  Int64)
555       HANDLE_TYPE(SINT32,  int32,  Int32)
556       HANDLE_TYPE(SINT64,  int64,  Int64)
557       HANDLE_TYPE(UINT32, uint32, UInt32)
558       HANDLE_TYPE(UINT64, uint64, UInt64)
559
560       HANDLE_TYPE( FIXED32, uint32, UInt32)
561       HANDLE_TYPE( FIXED64, uint64, UInt64)
562       HANDLE_TYPE(SFIXED32,  int32,  Int32)
563       HANDLE_TYPE(SFIXED64,  int64,  Int64)
564
565       HANDLE_TYPE(FLOAT , float , Float )
566       HANDLE_TYPE(DOUBLE, double, Double)
567
568       HANDLE_TYPE(BOOL, bool, Bool)
569 #undef HANDLE_TYPE
570
571       case FieldDescriptor::TYPE_ENUM: {
572         int value;
573         if (!WireFormatLite::ReadPrimitive<int, WireFormatLite::TYPE_ENUM>(
574                 input, &value)) return false;
575         const EnumValueDescriptor* enum_value =
576           field->enum_type()->FindValueByNumber(value);
577         if (enum_value != NULL) {
578           if (field->is_repeated()) {
579             message_reflection->AddEnum(message, field, enum_value);
580           } else {
581             message_reflection->SetEnum(message, field, enum_value);
582           }
583         } else {
584           // The enum value is not one of the known values.  Add it to the
585           // UnknownFieldSet.
586           int64 sign_extended_value = static_cast<int64>(value);
587           message_reflection->MutableUnknownFields(message)
588                             ->AddVarint(WireFormatLite::GetTagFieldNumber(tag),
589                                         sign_extended_value);
590         }
591         break;
592       }
593
594       // Handle strings separately so that we can optimize the ctype=CORD case.
595       case FieldDescriptor::TYPE_STRING: {
596         string value;
597         if (!WireFormatLite::ReadString(input, &value)) return false;
598         VerifyUTF8StringNamedField(value.data(), value.length(), PARSE,
599                                    field->name().c_str());
600         if (field->is_repeated()) {
601           message_reflection->AddString(message, field, value);
602         } else {
603           message_reflection->SetString(message, field, value);
604         }
605         break;
606       }
607
608       case FieldDescriptor::TYPE_BYTES: {
609         string value;
610         if (!WireFormatLite::ReadBytes(input, &value)) return false;
611         if (field->is_repeated()) {
612           message_reflection->AddString(message, field, value);
613         } else {
614           message_reflection->SetString(message, field, value);
615         }
616         break;
617       }
618
619       case FieldDescriptor::TYPE_GROUP: {
620         Message* sub_message;
621         if (field->is_repeated()) {
622           sub_message = message_reflection->AddMessage(
623               message, field, input->GetExtensionFactory());
624         } else {
625           sub_message = message_reflection->MutableMessage(
626               message, field, input->GetExtensionFactory());
627         }
628
629         if (!WireFormatLite::ReadGroup(WireFormatLite::GetTagFieldNumber(tag),
630                                        input, sub_message))
631           return false;
632         break;
633       }
634
635       case FieldDescriptor::TYPE_MESSAGE: {
636         Message* sub_message;
637         if (field->is_repeated()) {
638           sub_message = message_reflection->AddMessage(
639               message, field, input->GetExtensionFactory());
640         } else {
641           sub_message = message_reflection->MutableMessage(
642               message, field, input->GetExtensionFactory());
643         }
644
645         if (!WireFormatLite::ReadMessage(input, sub_message)) return false;
646         break;
647       }
648     }
649   }
650
651   return true;
652 }
653
654 bool WireFormat::ParseAndMergeMessageSetItem(
655     io::CodedInputStream* input,
656     Message* message) {
657   const Reflection* message_reflection = message->GetReflection();
658
659   // This method parses a group which should contain two fields:
660   //   required int32 type_id = 2;
661   //   required data message = 3;
662
663   uint32 last_type_id = 0;
664
665   // Once we see a type_id, we'll look up the FieldDescriptor for the
666   // extension.
667   const FieldDescriptor* field = NULL;
668
669   // If we see message data before the type_id, we'll append it to this so
670   // we can parse it later.
671   string message_data;
672
673   while (true) {
674     uint32 tag = input->ReadTag();
675     if (tag == 0) return false;
676
677     switch (tag) {
678       case WireFormatLite::kMessageSetTypeIdTag: {
679         uint32 type_id;
680         if (!input->ReadVarint32(&type_id)) return false;
681         last_type_id = type_id;
682         field = message_reflection->FindKnownExtensionByNumber(type_id);
683
684         if (!message_data.empty()) {
685           // We saw some message data before the type_id.  Have to parse it
686           // now.
687           io::ArrayInputStream raw_input(message_data.data(),
688                                          message_data.size());
689           io::CodedInputStream sub_input(&raw_input);
690           if (!ParseAndMergeMessageSetField(last_type_id, field, message,
691                                             &sub_input)) {
692             return false;
693           }
694           message_data.clear();
695         }
696
697         break;
698       }
699
700       case WireFormatLite::kMessageSetMessageTag: {
701         if (last_type_id == 0) {
702           // We haven't seen a type_id yet.  Append this data to message_data.
703           string temp;
704           uint32 length;
705           if (!input->ReadVarint32(&length)) return false;
706           if (!input->ReadString(&temp, length)) return false;
707           io::StringOutputStream output_stream(&message_data);
708           io::CodedOutputStream coded_output(&output_stream);
709           coded_output.WriteVarint32(length);
710           coded_output.WriteString(temp);
711         } else {
712           // Already saw type_id, so we can parse this directly.
713           if (!ParseAndMergeMessageSetField(last_type_id, field, message,
714                                             input)) {
715             return false;
716           }
717         }
718
719         break;
720       }
721
722       case WireFormatLite::kMessageSetItemEndTag: {
723         return true;
724       }
725
726       default: {
727         if (!SkipField(input, tag, NULL)) return false;
728       }
729     }
730   }
731 }
732
733 // ===================================================================
734
735 void WireFormat::SerializeWithCachedSizes(
736     const Message& message,
737     int size, io::CodedOutputStream* output) {
738   const Descriptor* descriptor = message.GetDescriptor();
739   const Reflection* message_reflection = message.GetReflection();
740   int expected_endpoint = output->ByteCount() + size;
741
742   vector<const FieldDescriptor*> fields;
743   message_reflection->ListFields(message, &fields);
744   for (int i = 0; i < fields.size(); i++) {
745     SerializeFieldWithCachedSizes(fields[i], message, output);
746   }
747
748   if (descriptor->options().message_set_wire_format()) {
749     SerializeUnknownMessageSetItems(
750         message_reflection->GetUnknownFields(message), output);
751   } else {
752     SerializeUnknownFields(
753         message_reflection->GetUnknownFields(message), output);
754   }
755
756   GOOGLE_CHECK_EQ(output->ByteCount(), expected_endpoint)
757     << ": Protocol message serialized to a size different from what was "
758        "originally expected.  Perhaps it was modified by another thread "
759        "during serialization?";
760 }
761
762 void WireFormat::SerializeFieldWithCachedSizes(
763     const FieldDescriptor* field,
764     const Message& message,
765     io::CodedOutputStream* output) {
766   const Reflection* message_reflection = message.GetReflection();
767
768   if (field->is_extension() &&
769       field->containing_type()->options().message_set_wire_format() &&
770       field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
771       !field->is_repeated()) {
772     SerializeMessageSetItemWithCachedSizes(field, message, output);
773     return;
774   }
775
776   int count = 0;
777
778   if (field->is_repeated()) {
779     count = message_reflection->FieldSize(message, field);
780   } else if (message_reflection->HasField(message, field)) {
781     count = 1;
782   }
783
784   const bool is_packed = field->options().packed();
785   if (is_packed && count > 0) {
786     WireFormatLite::WriteTag(field->number(),
787         WireFormatLite::WIRETYPE_LENGTH_DELIMITED, output);
788     const int data_size = FieldDataOnlyByteSize(field, message);
789     output->WriteVarint32(data_size);
790   }
791
792   for (int j = 0; j < count; j++) {
793     switch (field->type()) {
794 #define HANDLE_PRIMITIVE_TYPE(TYPE, CPPTYPE, TYPE_METHOD, CPPTYPE_METHOD)      \
795       case FieldDescriptor::TYPE_##TYPE: {                                     \
796         const CPPTYPE value = field->is_repeated() ?                           \
797                               message_reflection->GetRepeated##CPPTYPE_METHOD( \
798                                 message, field, j) :                           \
799                               message_reflection->Get##CPPTYPE_METHOD(         \
800                                 message, field);                               \
801         if (is_packed) {                                                       \
802           WireFormatLite::Write##TYPE_METHOD##NoTag(value, output);            \
803         } else {                                                               \
804           WireFormatLite::Write##TYPE_METHOD(field->number(), value, output);  \
805         }                                                                      \
806         break;                                                                 \
807       }
808
809       HANDLE_PRIMITIVE_TYPE( INT32,  int32,  Int32,  Int32)
810       HANDLE_PRIMITIVE_TYPE( INT64,  int64,  Int64,  Int64)
811       HANDLE_PRIMITIVE_TYPE(SINT32,  int32, SInt32,  Int32)
812       HANDLE_PRIMITIVE_TYPE(SINT64,  int64, SInt64,  Int64)
813       HANDLE_PRIMITIVE_TYPE(UINT32, uint32, UInt32, UInt32)
814       HANDLE_PRIMITIVE_TYPE(UINT64, uint64, UInt64, UInt64)
815
816       HANDLE_PRIMITIVE_TYPE( FIXED32, uint32,  Fixed32, UInt32)
817       HANDLE_PRIMITIVE_TYPE( FIXED64, uint64,  Fixed64, UInt64)
818       HANDLE_PRIMITIVE_TYPE(SFIXED32,  int32, SFixed32,  Int32)
819       HANDLE_PRIMITIVE_TYPE(SFIXED64,  int64, SFixed64,  Int64)
820
821       HANDLE_PRIMITIVE_TYPE(FLOAT , float , Float , Float )
822       HANDLE_PRIMITIVE_TYPE(DOUBLE, double, Double, Double)
823
824       HANDLE_PRIMITIVE_TYPE(BOOL, bool, Bool, Bool)
825 #undef HANDLE_PRIMITIVE_TYPE
826
827 #define HANDLE_TYPE(TYPE, TYPE_METHOD, CPPTYPE_METHOD)                       \
828       case FieldDescriptor::TYPE_##TYPE:                                     \
829         WireFormatLite::Write##TYPE_METHOD(                                  \
830               field->number(),                                               \
831               field->is_repeated() ?                                         \
832                 message_reflection->GetRepeated##CPPTYPE_METHOD(             \
833                   message, field, j) :                                       \
834                 message_reflection->Get##CPPTYPE_METHOD(message, field),     \
835               output);                                                       \
836         break;
837
838       HANDLE_TYPE(GROUP  , Group  , Message)
839       HANDLE_TYPE(MESSAGE, Message, Message)
840 #undef HANDLE_TYPE
841
842       case FieldDescriptor::TYPE_ENUM: {
843         const EnumValueDescriptor* value = field->is_repeated() ?
844           message_reflection->GetRepeatedEnum(message, field, j) :
845           message_reflection->GetEnum(message, field);
846         if (is_packed) {
847           WireFormatLite::WriteEnumNoTag(value->number(), output);
848         } else {
849           WireFormatLite::WriteEnum(field->number(), value->number(), output);
850         }
851         break;
852       }
853
854       // Handle strings separately so that we can get string references
855       // instead of copying.
856       case FieldDescriptor::TYPE_STRING: {
857         string scratch;
858         const string& value = field->is_repeated() ?
859           message_reflection->GetRepeatedStringReference(
860             message, field, j, &scratch) :
861           message_reflection->GetStringReference(message, field, &scratch);
862         VerifyUTF8StringNamedField(value.data(), value.length(), SERIALIZE,
863                                    field->name().c_str());
864         WireFormatLite::WriteString(field->number(), value, output);
865         break;
866       }
867
868       case FieldDescriptor::TYPE_BYTES: {
869         string scratch;
870         const string& value = field->is_repeated() ?
871           message_reflection->GetRepeatedStringReference(
872             message, field, j, &scratch) :
873           message_reflection->GetStringReference(message, field, &scratch);
874         WireFormatLite::WriteBytes(field->number(), value, output);
875         break;
876       }
877     }
878   }
879 }
880
881 void WireFormat::SerializeMessageSetItemWithCachedSizes(
882     const FieldDescriptor* field,
883     const Message& message,
884     io::CodedOutputStream* output) {
885   const Reflection* message_reflection = message.GetReflection();
886
887   // Start group.
888   output->WriteVarint32(WireFormatLite::kMessageSetItemStartTag);
889
890   // Write type ID.
891   output->WriteVarint32(WireFormatLite::kMessageSetTypeIdTag);
892   output->WriteVarint32(field->number());
893
894   // Write message.
895   output->WriteVarint32(WireFormatLite::kMessageSetMessageTag);
896
897   const Message& sub_message = message_reflection->GetMessage(message, field);
898   output->WriteVarint32(sub_message.GetCachedSize());
899   sub_message.SerializeWithCachedSizes(output);
900
901   // End group.
902   output->WriteVarint32(WireFormatLite::kMessageSetItemEndTag);
903 }
904
905 // ===================================================================
906
907 int WireFormat::ByteSize(const Message& message) {
908   const Descriptor* descriptor = message.GetDescriptor();
909   const Reflection* message_reflection = message.GetReflection();
910
911   int our_size = 0;
912
913   vector<const FieldDescriptor*> fields;
914   message_reflection->ListFields(message, &fields);
915   for (int i = 0; i < fields.size(); i++) {
916     our_size += FieldByteSize(fields[i], message);
917   }
918
919   if (descriptor->options().message_set_wire_format()) {
920     our_size += ComputeUnknownMessageSetItemsSize(
921       message_reflection->GetUnknownFields(message));
922   } else {
923     our_size += ComputeUnknownFieldsSize(
924       message_reflection->GetUnknownFields(message));
925   }
926
927   return our_size;
928 }
929
930 int WireFormat::FieldByteSize(
931     const FieldDescriptor* field,
932     const Message& message) {
933   const Reflection* message_reflection = message.GetReflection();
934
935   if (field->is_extension() &&
936       field->containing_type()->options().message_set_wire_format() &&
937       field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
938       !field->is_repeated()) {
939     return MessageSetItemByteSize(field, message);
940   }
941
942   int count = 0;
943   if (field->is_repeated()) {
944     count = message_reflection->FieldSize(message, field);
945   } else if (message_reflection->HasField(message, field)) {
946     count = 1;
947   }
948
949   const int data_size = FieldDataOnlyByteSize(field, message);
950   int our_size = data_size;
951   if (field->options().packed()) {
952     if (data_size > 0) {
953       // Packed fields get serialized like a string, not their native type.
954       // Technically this doesn't really matter; the size only changes if it's
955       // a GROUP
956       our_size += TagSize(field->number(), FieldDescriptor::TYPE_STRING);
957       our_size += io::CodedOutputStream::VarintSize32(data_size);
958     }
959   } else {
960     our_size += count * TagSize(field->number(), field->type());
961   }
962   return our_size;
963 }
964
965 int WireFormat::FieldDataOnlyByteSize(
966     const FieldDescriptor* field,
967     const Message& message) {
968   const Reflection* message_reflection = message.GetReflection();
969
970   int count = 0;
971   if (field->is_repeated()) {
972     count = message_reflection->FieldSize(message, field);
973   } else if (message_reflection->HasField(message, field)) {
974     count = 1;
975   }
976
977   int data_size = 0;
978   switch (field->type()) {
979 #define HANDLE_TYPE(TYPE, TYPE_METHOD, CPPTYPE_METHOD)                     \
980     case FieldDescriptor::TYPE_##TYPE:                                     \
981       if (field->is_repeated()) {                                          \
982         for (int j = 0; j < count; j++) {                                  \
983           data_size += WireFormatLite::TYPE_METHOD##Size(                  \
984             message_reflection->GetRepeated##CPPTYPE_METHOD(               \
985               message, field, j));                                         \
986         }                                                                  \
987       } else {                                                             \
988         data_size += WireFormatLite::TYPE_METHOD##Size(                    \
989           message_reflection->Get##CPPTYPE_METHOD(message, field));        \
990       }                                                                    \
991       break;
992
993 #define HANDLE_FIXED_TYPE(TYPE, TYPE_METHOD)                               \
994     case FieldDescriptor::TYPE_##TYPE:                                     \
995       data_size += count * WireFormatLite::k##TYPE_METHOD##Size;           \
996       break;
997
998     HANDLE_TYPE( INT32,  Int32,  Int32)
999     HANDLE_TYPE( INT64,  Int64,  Int64)
1000     HANDLE_TYPE(SINT32, SInt32,  Int32)
1001     HANDLE_TYPE(SINT64, SInt64,  Int64)
1002     HANDLE_TYPE(UINT32, UInt32, UInt32)
1003     HANDLE_TYPE(UINT64, UInt64, UInt64)
1004
1005     HANDLE_FIXED_TYPE( FIXED32,  Fixed32)
1006     HANDLE_FIXED_TYPE( FIXED64,  Fixed64)
1007     HANDLE_FIXED_TYPE(SFIXED32, SFixed32)
1008     HANDLE_FIXED_TYPE(SFIXED64, SFixed64)
1009
1010     HANDLE_FIXED_TYPE(FLOAT , Float )
1011     HANDLE_FIXED_TYPE(DOUBLE, Double)
1012
1013     HANDLE_FIXED_TYPE(BOOL, Bool)
1014
1015     HANDLE_TYPE(GROUP  , Group  , Message)
1016     HANDLE_TYPE(MESSAGE, Message, Message)
1017 #undef HANDLE_TYPE
1018 #undef HANDLE_FIXED_TYPE
1019
1020     case FieldDescriptor::TYPE_ENUM: {
1021       if (field->is_repeated()) {
1022         for (int j = 0; j < count; j++) {
1023           data_size += WireFormatLite::EnumSize(
1024             message_reflection->GetRepeatedEnum(message, field, j)->number());
1025         }
1026       } else {
1027         data_size += WireFormatLite::EnumSize(
1028           message_reflection->GetEnum(message, field)->number());
1029       }
1030       break;
1031     }
1032
1033     // Handle strings separately so that we can get string references
1034     // instead of copying.
1035     case FieldDescriptor::TYPE_STRING:
1036     case FieldDescriptor::TYPE_BYTES: {
1037       for (int j = 0; j < count; j++) {
1038         string scratch;
1039         const string& value = field->is_repeated() ?
1040           message_reflection->GetRepeatedStringReference(
1041             message, field, j, &scratch) :
1042           message_reflection->GetStringReference(message, field, &scratch);
1043         data_size += WireFormatLite::StringSize(value);
1044       }
1045       break;
1046     }
1047   }
1048   return data_size;
1049 }
1050
1051 int WireFormat::MessageSetItemByteSize(
1052     const FieldDescriptor* field,
1053     const Message& message) {
1054   const Reflection* message_reflection = message.GetReflection();
1055
1056   int our_size = WireFormatLite::kMessageSetItemTagsSize;
1057
1058   // type_id
1059   our_size += io::CodedOutputStream::VarintSize32(field->number());
1060
1061   // message
1062   const Message& sub_message = message_reflection->GetMessage(message, field);
1063   int message_size = sub_message.ByteSize();
1064
1065   our_size += io::CodedOutputStream::VarintSize32(message_size);
1066   our_size += message_size;
1067
1068   return our_size;
1069 }
1070
1071 void WireFormat::VerifyUTF8StringFallback(const char* data,
1072                                           int size,
1073                                           Operation op,
1074                                           const char* field_name) {
1075   if (!IsStructurallyValidUTF8(data, size)) {
1076     const char* operation_str = NULL;
1077     switch (op) {
1078       case PARSE:
1079         operation_str = "parsing";
1080         break;
1081       case SERIALIZE:
1082         operation_str = "serializing";
1083         break;
1084       // no default case: have the compiler warn if a case is not covered.
1085     }
1086     string quoted_field_name = "";
1087     if (field_name != NULL) {
1088       quoted_field_name = StringPrintf(" '%s'", field_name);
1089     }
1090     // no space below to avoid double space when the field name is missing.
1091     GOOGLE_LOG(ERROR) << "String field" << quoted_field_name << " contains invalid "
1092                << "UTF-8 data when " << operation_str << " a protocol "
1093                << "buffer. Use the 'bytes' type if you intend to send raw "
1094                << "bytes. ";
1095   }
1096 }
1097
1098
1099 }  // namespace internal
1100 }  // namespace protobuf
1101 }  // namespace google