1 # Protocol Buffers - Google's data interchange format
2 # Copyright 2008 Google Inc. All rights reserved.
3 # http://code.google.com/p/protobuf/
5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions are
9 # * Redistributions of source code must retain the above copyright
10 # notice, this list of conditions and the following disclaimer.
11 # * Redistributions in binary form must reproduce the above
12 # copyright notice, this list of conditions and the following disclaimer
13 # in the documentation and/or other materials provided with the
15 # * Neither the name of Google Inc. nor the names of its
16 # contributors may be used to endorse or promote products derived from
17 # this software without specific prior written permission.
19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 """Code for encoding protocol message primitives.
33 Contains the logic for encoding every logical protocol field type
34 into one of the 5 physical wire types.
36 This code is designed to push the Python interpreter's performance to the
39 The basic idea is that at startup time, for every field (i.e. every
40 FieldDescriptor) we construct two functions: a "sizer" and an "encoder". The
41 sizer takes a value of this field's type and computes its byte size. The
42 encoder takes a writer function and a value. It encodes the value into byte
43 strings and invokes the writer function to write those strings. Typically the
44 writer function is the write() method of a cStringIO.
46 We try to do as much work as possible when constructing the writer and the
47 sizer rather than when calling them. In particular:
48 * We copy any needed global functions to local variables, so that we do not need
49 to do costly global table lookups at runtime.
50 * Similarly, we try to do any attribute lookups at startup time if possible.
51 * Every field's tag is encoded to bytes at startup, since it can't change at
53 * Whatever component of the field size we can compute at startup, we do.
54 * We *avoid* sharing code if doing so would make the code slower and not sharing
55 does not burden us too much. For example, encoders for repeated fields do
56 not just call the encoders for singular fields in a loop because this would
57 add an extra function call overhead for every loop iteration; instead, we
58 manually inline the single-value encoder into the loop.
59 * If a Python function lacks a return statement, Python actually generates
60 instructions to pop the result of the last statement off the stack, push
61 None onto the stack, and then return that. If we really don't care what
62 value is returned, then we can save two instructions by returning the
63 result of the last statement. It looks funny but it helps.
64 * We assume that type and bounds checking has happened at a higher level.
67 __author__ = 'kenton@google.com (Kenton Varda)'
70 from google.protobuf.internal import wire_format
73 # This will overflow and thus become IEEE-754 "infinity". We would use
74 # "float('inf')" but it doesn't work on Windows pre-Python-2.6.
79 def _VarintSize(value):
80 """Compute the size of a varint value."""
81 if value <= 0x7f: return 1
82 if value <= 0x3fff: return 2
83 if value <= 0x1fffff: return 3
84 if value <= 0xfffffff: return 4
85 if value <= 0x7ffffffff: return 5
86 if value <= 0x3ffffffffff: return 6
87 if value <= 0x1ffffffffffff: return 7
88 if value <= 0xffffffffffffff: return 8
89 if value <= 0x7fffffffffffffff: return 9
93 def _SignedVarintSize(value):
94 """Compute the size of a signed varint value."""
95 if value < 0: return 10
96 if value <= 0x7f: return 1
97 if value <= 0x3fff: return 2
98 if value <= 0x1fffff: return 3
99 if value <= 0xfffffff: return 4
100 if value <= 0x7ffffffff: return 5
101 if value <= 0x3ffffffffff: return 6
102 if value <= 0x1ffffffffffff: return 7
103 if value <= 0xffffffffffffff: return 8
104 if value <= 0x7fffffffffffffff: return 9
108 def _TagSize(field_number):
109 """Returns the number of bytes required to serialize a tag with this field
111 # Just pass in type 0, since the type won't affect the tag+type size.
112 return _VarintSize(wire_format.PackTag(field_number, 0))
115 # --------------------------------------------------------------------
116 # In this section we define some generic sizers. Each of these functions
117 # takes parameters specific to a particular field type, e.g. int32 or fixed64.
118 # It returns another function which in turn takes parameters specific to a
119 # particular field, e.g. the field number and whether it is repeated or packed.
120 # Look at the next section to see how these are used.
123 def _SimpleSizer(compute_value_size):
124 """A sizer which uses the function compute_value_size to compute the size of
125 each value. Typically compute_value_size is _VarintSize."""
127 def SpecificSizer(field_number, is_repeated, is_packed):
128 tag_size = _TagSize(field_number)
130 local_VarintSize = _VarintSize
131 def PackedFieldSize(value):
133 for element in value:
134 result += compute_value_size(element)
135 return result + local_VarintSize(result) + tag_size
136 return PackedFieldSize
138 def RepeatedFieldSize(value):
139 result = tag_size * len(value)
140 for element in value:
141 result += compute_value_size(element)
143 return RepeatedFieldSize
145 def FieldSize(value):
146 return tag_size + compute_value_size(value)
152 def _ModifiedSizer(compute_value_size, modify_value):
153 """Like SimpleSizer, but modify_value is invoked on each value before it is
154 passed to compute_value_size. modify_value is typically ZigZagEncode."""
156 def SpecificSizer(field_number, is_repeated, is_packed):
157 tag_size = _TagSize(field_number)
159 local_VarintSize = _VarintSize
160 def PackedFieldSize(value):
162 for element in value:
163 result += compute_value_size(modify_value(element))
164 return result + local_VarintSize(result) + tag_size
165 return PackedFieldSize
167 def RepeatedFieldSize(value):
168 result = tag_size * len(value)
169 for element in value:
170 result += compute_value_size(modify_value(element))
172 return RepeatedFieldSize
174 def FieldSize(value):
175 return tag_size + compute_value_size(modify_value(value))
181 def _FixedSizer(value_size):
182 """Like _SimpleSizer except for a fixed-size field. The input is the size
185 def SpecificSizer(field_number, is_repeated, is_packed):
186 tag_size = _TagSize(field_number)
188 local_VarintSize = _VarintSize
189 def PackedFieldSize(value):
190 result = len(value) * value_size
191 return result + local_VarintSize(result) + tag_size
192 return PackedFieldSize
194 element_size = value_size + tag_size
195 def RepeatedFieldSize(value):
196 return len(value) * element_size
197 return RepeatedFieldSize
199 field_size = value_size + tag_size
200 def FieldSize(value):
207 # ====================================================================
208 # Here we declare a sizer constructor for each field type. Each "sizer
209 # constructor" is a function that takes (field_number, is_repeated, is_packed)
210 # as parameters and returns a sizer, which in turn takes a field value as
211 # a parameter and returns its encoded size.
214 Int32Sizer = Int64Sizer = EnumSizer = _SimpleSizer(_SignedVarintSize)
216 UInt32Sizer = UInt64Sizer = _SimpleSizer(_VarintSize)
218 SInt32Sizer = SInt64Sizer = _ModifiedSizer(
219 _SignedVarintSize, wire_format.ZigZagEncode)
221 Fixed32Sizer = SFixed32Sizer = FloatSizer = _FixedSizer(4)
222 Fixed64Sizer = SFixed64Sizer = DoubleSizer = _FixedSizer(8)
224 BoolSizer = _FixedSizer(1)
227 def StringSizer(field_number, is_repeated, is_packed):
228 """Returns a sizer for a string field."""
230 tag_size = _TagSize(field_number)
231 local_VarintSize = _VarintSize
235 def RepeatedFieldSize(value):
236 result = tag_size * len(value)
237 for element in value:
238 l = local_len(element.encode('utf-8'))
239 result += local_VarintSize(l) + l
241 return RepeatedFieldSize
243 def FieldSize(value):
244 l = local_len(value.encode('utf-8'))
245 return tag_size + local_VarintSize(l) + l
249 def BytesSizer(field_number, is_repeated, is_packed):
250 """Returns a sizer for a bytes field."""
252 tag_size = _TagSize(field_number)
253 local_VarintSize = _VarintSize
257 def RepeatedFieldSize(value):
258 result = tag_size * len(value)
259 for element in value:
260 l = local_len(element)
261 result += local_VarintSize(l) + l
263 return RepeatedFieldSize
265 def FieldSize(value):
267 return tag_size + local_VarintSize(l) + l
271 def GroupSizer(field_number, is_repeated, is_packed):
272 """Returns a sizer for a group field."""
274 tag_size = _TagSize(field_number) * 2
277 def RepeatedFieldSize(value):
278 result = tag_size * len(value)
279 for element in value:
280 result += element.ByteSize()
282 return RepeatedFieldSize
284 def FieldSize(value):
285 return tag_size + value.ByteSize()
289 def MessageSizer(field_number, is_repeated, is_packed):
290 """Returns a sizer for a message field."""
292 tag_size = _TagSize(field_number)
293 local_VarintSize = _VarintSize
296 def RepeatedFieldSize(value):
297 result = tag_size * len(value)
298 for element in value:
299 l = element.ByteSize()
300 result += local_VarintSize(l) + l
302 return RepeatedFieldSize
304 def FieldSize(value):
306 return tag_size + local_VarintSize(l) + l
310 # --------------------------------------------------------------------
311 # MessageSet is special.
314 def MessageSetItemSizer(field_number):
315 """Returns a sizer for extensions of MessageSet.
317 The message set message looks like this:
319 repeated group Item = 1 {
320 required int32 type_id = 2;
321 required string message = 3;
325 static_size = (_TagSize(1) * 2 + _TagSize(2) + _VarintSize(field_number) +
327 local_VarintSize = _VarintSize
329 def FieldSize(value):
331 return static_size + local_VarintSize(l) + l
336 # ====================================================================
340 def _VarintEncoder():
341 """Return an encoder for a basic varint value (does not include tag)."""
344 def EncodeVarint(write, value):
348 write(local_chr(0x80|bits))
351 return write(local_chr(bits))
356 def _SignedVarintEncoder():
357 """Return an encoder for a basic signed varint value (does not include
361 def EncodeSignedVarint(write, value):
367 write(local_chr(0x80|bits))
370 return write(local_chr(bits))
372 return EncodeSignedVarint
375 _EncodeVarint = _VarintEncoder()
376 _EncodeSignedVarint = _SignedVarintEncoder()
379 def _VarintBytes(value):
380 """Encode the given integer as a varint and return the bytes. This is only
381 called at startup time so it doesn't need to be fast."""
384 _EncodeVarint(pieces.append, value)
385 return "".join(pieces)
388 def TagBytes(field_number, wire_type):
389 """Encode the given tag and return the bytes. Only called at startup."""
391 return _VarintBytes(wire_format.PackTag(field_number, wire_type))
393 # --------------------------------------------------------------------
394 # As with sizers (see above), we have a number of common encoder
398 def _SimpleEncoder(wire_type, encode_value, compute_value_size):
399 """Return a constructor for an encoder for fields of a particular type.
402 wire_type: The field's wire type, for encoding tags.
403 encode_value: A function which encodes an individual value, e.g.
405 compute_value_size: A function which computes the size of an individual
406 value, e.g. _VarintSize().
409 def SpecificEncoder(field_number, is_repeated, is_packed):
411 tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
412 local_EncodeVarint = _EncodeVarint
413 def EncodePackedField(write, value):
416 for element in value:
417 size += compute_value_size(element)
418 local_EncodeVarint(write, size)
419 for element in value:
420 encode_value(write, element)
421 return EncodePackedField
423 tag_bytes = TagBytes(field_number, wire_type)
424 def EncodeRepeatedField(write, value):
425 for element in value:
427 encode_value(write, element)
428 return EncodeRepeatedField
430 tag_bytes = TagBytes(field_number, wire_type)
431 def EncodeField(write, value):
433 return encode_value(write, value)
436 return SpecificEncoder
439 def _ModifiedEncoder(wire_type, encode_value, compute_value_size, modify_value):
440 """Like SimpleEncoder but additionally invokes modify_value on every value
441 before passing it to encode_value. Usually modify_value is ZigZagEncode."""
443 def SpecificEncoder(field_number, is_repeated, is_packed):
445 tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
446 local_EncodeVarint = _EncodeVarint
447 def EncodePackedField(write, value):
450 for element in value:
451 size += compute_value_size(modify_value(element))
452 local_EncodeVarint(write, size)
453 for element in value:
454 encode_value(write, modify_value(element))
455 return EncodePackedField
457 tag_bytes = TagBytes(field_number, wire_type)
458 def EncodeRepeatedField(write, value):
459 for element in value:
461 encode_value(write, modify_value(element))
462 return EncodeRepeatedField
464 tag_bytes = TagBytes(field_number, wire_type)
465 def EncodeField(write, value):
467 return encode_value(write, modify_value(value))
470 return SpecificEncoder
473 def _StructPackEncoder(wire_type, format):
474 """Return a constructor for an encoder for a fixed-width field.
477 wire_type: The field's wire type, for encoding tags.
478 format: The format string to pass to struct.pack().
481 value_size = struct.calcsize(format)
483 def SpecificEncoder(field_number, is_repeated, is_packed):
484 local_struct_pack = struct.pack
486 tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
487 local_EncodeVarint = _EncodeVarint
488 def EncodePackedField(write, value):
490 local_EncodeVarint(write, len(value) * value_size)
491 for element in value:
492 write(local_struct_pack(format, element))
493 return EncodePackedField
495 tag_bytes = TagBytes(field_number, wire_type)
496 def EncodeRepeatedField(write, value):
497 for element in value:
499 write(local_struct_pack(format, element))
500 return EncodeRepeatedField
502 tag_bytes = TagBytes(field_number, wire_type)
503 def EncodeField(write, value):
505 return write(local_struct_pack(format, value))
508 return SpecificEncoder
511 def _FloatingPointEncoder(wire_type, format):
512 """Return a constructor for an encoder for float fields.
514 This is like StructPackEncoder, but catches errors that may be due to
515 passing non-finite floating-point values to struct.pack, and makes a
516 second attempt to encode those values.
519 wire_type: The field's wire type, for encoding tags.
520 format: The format string to pass to struct.pack().
523 value_size = struct.calcsize(format)
525 def EncodeNonFiniteOrRaise(write, value):
526 # Remember that the serialized form uses little-endian byte order.
527 if value == _POS_INF:
528 write('\x00\x00\x80\x7F')
529 elif value == _NEG_INF:
530 write('\x00\x00\x80\xFF')
531 elif value != value: # NaN
532 write('\x00\x00\xC0\x7F')
535 elif value_size == 8:
536 def EncodeNonFiniteOrRaise(write, value):
537 if value == _POS_INF:
538 write('\x00\x00\x00\x00\x00\x00\xF0\x7F')
539 elif value == _NEG_INF:
540 write('\x00\x00\x00\x00\x00\x00\xF0\xFF')
541 elif value != value: # NaN
542 write('\x00\x00\x00\x00\x00\x00\xF8\x7F')
546 raise ValueError('Can\'t encode floating-point values that are '
547 '%d bytes long (only 4 or 8)' % value_size)
549 def SpecificEncoder(field_number, is_repeated, is_packed):
550 local_struct_pack = struct.pack
552 tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
553 local_EncodeVarint = _EncodeVarint
554 def EncodePackedField(write, value):
556 local_EncodeVarint(write, len(value) * value_size)
557 for element in value:
558 # This try/except block is going to be faster than any code that
559 # we could write to check whether element is finite.
561 write(local_struct_pack(format, element))
563 EncodeNonFiniteOrRaise(write, element)
564 return EncodePackedField
566 tag_bytes = TagBytes(field_number, wire_type)
567 def EncodeRepeatedField(write, value):
568 for element in value:
571 write(local_struct_pack(format, element))
573 EncodeNonFiniteOrRaise(write, element)
574 return EncodeRepeatedField
576 tag_bytes = TagBytes(field_number, wire_type)
577 def EncodeField(write, value):
580 write(local_struct_pack(format, value))
582 EncodeNonFiniteOrRaise(write, value)
585 return SpecificEncoder
588 # ====================================================================
589 # Here we declare an encoder constructor for each field type. These work
590 # very similarly to sizer constructors, described earlier.
593 Int32Encoder = Int64Encoder = EnumEncoder = _SimpleEncoder(
594 wire_format.WIRETYPE_VARINT, _EncodeSignedVarint, _SignedVarintSize)
596 UInt32Encoder = UInt64Encoder = _SimpleEncoder(
597 wire_format.WIRETYPE_VARINT, _EncodeVarint, _VarintSize)
599 SInt32Encoder = SInt64Encoder = _ModifiedEncoder(
600 wire_format.WIRETYPE_VARINT, _EncodeVarint, _VarintSize,
601 wire_format.ZigZagEncode)
603 # Note that Python conveniently guarantees that when using the '<' prefix on
604 # formats, they will also have the same size across all platforms (as opposed
605 # to without the prefix, where their sizes depend on the C compiler's basic
607 Fixed32Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED32, '<I')
608 Fixed64Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED64, '<Q')
609 SFixed32Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED32, '<i')
610 SFixed64Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED64, '<q')
611 FloatEncoder = _FloatingPointEncoder(wire_format.WIRETYPE_FIXED32, '<f')
612 DoubleEncoder = _FloatingPointEncoder(wire_format.WIRETYPE_FIXED64, '<d')
615 def BoolEncoder(field_number, is_repeated, is_packed):
616 """Returns an encoder for a boolean field."""
621 tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
622 local_EncodeVarint = _EncodeVarint
623 def EncodePackedField(write, value):
625 local_EncodeVarint(write, len(value))
626 for element in value:
631 return EncodePackedField
633 tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_VARINT)
634 def EncodeRepeatedField(write, value):
635 for element in value:
641 return EncodeRepeatedField
643 tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_VARINT)
644 def EncodeField(write, value):
647 return write(true_byte)
648 return write(false_byte)
652 def StringEncoder(field_number, is_repeated, is_packed):
653 """Returns an encoder for a string field."""
655 tag = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
656 local_EncodeVarint = _EncodeVarint
660 def EncodeRepeatedField(write, value):
661 for element in value:
662 encoded = element.encode('utf-8')
664 local_EncodeVarint(write, local_len(encoded))
666 return EncodeRepeatedField
668 def EncodeField(write, value):
669 encoded = value.encode('utf-8')
671 local_EncodeVarint(write, local_len(encoded))
672 return write(encoded)
676 def BytesEncoder(field_number, is_repeated, is_packed):
677 """Returns an encoder for a bytes field."""
679 tag = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
680 local_EncodeVarint = _EncodeVarint
684 def EncodeRepeatedField(write, value):
685 for element in value:
687 local_EncodeVarint(write, local_len(element))
689 return EncodeRepeatedField
691 def EncodeField(write, value):
693 local_EncodeVarint(write, local_len(value))
698 def GroupEncoder(field_number, is_repeated, is_packed):
699 """Returns an encoder for a group field."""
701 start_tag = TagBytes(field_number, wire_format.WIRETYPE_START_GROUP)
702 end_tag = TagBytes(field_number, wire_format.WIRETYPE_END_GROUP)
705 def EncodeRepeatedField(write, value):
706 for element in value:
708 element._InternalSerialize(write)
710 return EncodeRepeatedField
712 def EncodeField(write, value):
714 value._InternalSerialize(write)
715 return write(end_tag)
719 def MessageEncoder(field_number, is_repeated, is_packed):
720 """Returns an encoder for a message field."""
722 tag = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
723 local_EncodeVarint = _EncodeVarint
726 def EncodeRepeatedField(write, value):
727 for element in value:
729 local_EncodeVarint(write, element.ByteSize())
730 element._InternalSerialize(write)
731 return EncodeRepeatedField
733 def EncodeField(write, value):
735 local_EncodeVarint(write, value.ByteSize())
736 return value._InternalSerialize(write)
740 # --------------------------------------------------------------------
741 # As before, MessageSet is special.
744 def MessageSetItemEncoder(field_number):
745 """Encoder for extensions of MessageSet.
747 The message set message looks like this:
749 repeated group Item = 1 {
750 required int32 type_id = 2;
751 required string message = 3;
755 start_bytes = "".join([
756 TagBytes(1, wire_format.WIRETYPE_START_GROUP),
757 TagBytes(2, wire_format.WIRETYPE_VARINT),
758 _VarintBytes(field_number),
759 TagBytes(3, wire_format.WIRETYPE_LENGTH_DELIMITED)])
760 end_bytes = TagBytes(1, wire_format.WIRETYPE_END_GROUP)
761 local_EncodeVarint = _EncodeVarint
763 def EncodeField(write, value):
765 local_EncodeVarint(write, value.ByteSize())
766 value._InternalSerialize(write)
767 return write(end_bytes)