java/src/main/java/com/google/protobuf/Internal.java

   1 // Protocol Buffers - Google's data interchange format
   2 // Copyright 2008 Google Inc.  All rights reserved.
   3 // https://developers.google.com/protocol-buffers/
   4 //
   5 // Redistribution and use in source and binary forms, with or without
   6 // modification, are permitted provided that the following conditions are
   7 // met:
   8 //
   9 //     * Redistributions of source code must retain the above copyright
  10 // notice, this list of conditions and the following disclaimer.
  11 //     * Redistributions in binary form must reproduce the above
  12 // copyright notice, this list of conditions and the following disclaimer
  13 // in the documentation and/or other materials provided with the
  14 // distribution.
  15 //     * Neither the name of Google Inc. nor the names of its
  16 // contributors may be used to endorse or promote products derived from
  17 // this software without specific prior written permission.
  18 //
  19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30
  31 package com.google.protobuf;
  32
  33 import java.io.IOException;
  34 import java.io.UnsupportedEncodingException;
  35 import java.nio.ByteBuffer;
  36 import java.util.Arrays;
  37 import java.util.List;
  38
  39 /**
  40  * The classes contained within are used internally by the Protocol Buffer
  41  * library and generated message implementations. They are public only because
  42  * those generated messages do not reside in the {@code protobuf} package.
  43  * Others should not use this class directly.
  44  *
  45  * @author kenton@google.com (Kenton Varda)
  46  */
  47 public class Internal {
  48   /**
  49    * Helper called by generated code to construct default values for string
  50    * fields.
  51    * <p>
  52    * The protocol compiler does not actually contain a UTF-8 decoder -- it
  53    * just pushes UTF-8-encoded text around without touching it.  The one place
  54    * where this presents a problem is when generating Java string literals.
  55    * Unicode characters in the string literal would normally need to be encoded
  56    * using a Unicode escape sequence, which would require decoding them.
  57    * To get around this, protoc instead embeds the UTF-8 bytes into the
  58    * generated code and leaves it to the runtime library to decode them.
  59    * <p>
  60    * It gets worse, though.  If protoc just generated a byte array, like:
  61    *   new byte[] {0x12, 0x34, 0x56, 0x78}
  62    * Java actually generates *code* which allocates an array and then fills
  63    * in each value.  This is much less efficient than just embedding the bytes
  64    * directly into the bytecode.  To get around this, we need another
  65    * work-around.  String literals are embedded directly, so protoc actually
  66    * generates a string literal corresponding to the bytes.  The easiest way
  67    * to do this is to use the ISO-8859-1 character set, which corresponds to
  68    * the first 256 characters of the Unicode range.  Protoc can then use
  69    * good old CEscape to generate the string.
  70    * <p>
  71    * So we have a string literal which represents a set of bytes which
  72    * represents another string.  This function -- stringDefaultValue --
  73    * converts from the generated string to the string we actually want.  The
  74    * generated code calls this automatically.
  75    */
  76   public static String stringDefaultValue(String bytes) {
  77     try {
  78       return new String(bytes.getBytes("ISO-8859-1"), "UTF-8");
  79     } catch (UnsupportedEncodingException e) {
  80       // This should never happen since all JVMs are required to implement
  81       // both of the above character sets.
  82       throw new IllegalStateException(
  83           "Java VM does not support a standard character set.", e);
  84     }
  85   }
  86
  87   /**
  88    * Helper called by generated code to construct default values for bytes
  89    * fields.
  90    * <p>
  91    * This is a lot like {@link #stringDefaultValue}, but for bytes fields.
  92    * In this case we only need the second of the two hacks -- allowing us to
  93    * embed raw bytes as a string literal with ISO-8859-1 encoding.
  94    */
  95   public static ByteString bytesDefaultValue(String bytes) {
  96     try {
  97       return ByteString.copyFrom(bytes.getBytes("ISO-8859-1"));
  98     } catch (UnsupportedEncodingException e) {
  99       // This should never happen since all JVMs are required to implement
 100       // ISO-8859-1.
 101       throw new IllegalStateException(
 102           "Java VM does not support a standard character set.", e);
 103     }
 104   }
 105   /**
 106    * Helper called by generated code to construct default values for bytes
 107    * fields.
 108    * <p>
 109    * This is like {@link #bytesDefaultValue}, but returns a byte array.
 110    */
 111   public static byte[] byteArrayDefaultValue(String bytes) {
 112     try {
 113       return bytes.getBytes("ISO-8859-1");
 114     } catch (UnsupportedEncodingException e) {
 115       // This should never happen since all JVMs are required to implement
 116       // ISO-8859-1.
 117       throw new IllegalStateException(
 118           "Java VM does not support a standard character set.", e);
 119     }
 120   }
 121
 122   /**
 123    * Helper called by generated code to construct default values for bytes
 124    * fields.
 125    * <p>
 126    * This is like {@link #bytesDefaultValue}, but returns a ByteBuffer.
 127    */
 128   public static ByteBuffer byteBufferDefaultValue(String bytes) {
 129     return ByteBuffer.wrap(byteArrayDefaultValue(bytes));
 130   }
 131
 132   /**
 133    * Create a new ByteBuffer and copy all the content of {@code source}
 134    * ByteBuffer to the new ByteBuffer. The new ByteBuffer's limit and
 135    * capacity will be source.capacity(), and its position will be 0.
 136    * Note that the state of {@code source} ByteBuffer won't be changed.
 137    */
 138   public static ByteBuffer copyByteBuffer(ByteBuffer source) {
 139     // Make a duplicate of the source ByteBuffer and read data from the
 140     // duplicate. This is to avoid affecting the source ByteBuffer's state.
 141     ByteBuffer temp = source.duplicate();
 142     // We want to copy all the data in the source ByteBuffer, not just the
 143     // remaining bytes.
 144     temp.clear();
 145     ByteBuffer result = ByteBuffer.allocate(temp.capacity());
 146     result.put(temp);
 147     result.clear();
 148     return result;
 149   }
 150
 151   /**
 152    * Helper called by generated code to determine if a byte array is a valid
 153    * UTF-8 encoded string such that the original bytes can be converted to
 154    * a String object and then back to a byte array round tripping the bytes
 155    * without loss.  More precisely, returns {@code true} whenever:
 156    * <pre>   {@code
 157    * Arrays.equals(byteString.toByteArray(),
 158    *     new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8"))
 159    * }</pre>
 160    *
 161    * <p>This method rejects "overlong" byte sequences, as well as
 162    * 3-byte sequences that would map to a surrogate character, in
 163    * accordance with the restricted definition of UTF-8 introduced in
 164    * Unicode 3.1.  Note that the UTF-8 decoder included in Oracle's
 165    * JDK has been modified to also reject "overlong" byte sequences,
 166    * but currently (2011) still accepts 3-byte surrogate character
 167    * byte sequences.
 168    *
 169    * <p>See the Unicode Standard,</br>
 170    * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br>
 171    * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>.
 172    *
 173    * <p>As of 2011-02, this method simply returns the result of {@link
 174    * ByteString#isValidUtf8()}.  Calling that method directly is preferred.
 175    *
 176    * @param byteString the string to check
 177    * @return whether the byte array is round trippable
 178    */
 179   public static boolean isValidUtf8(ByteString byteString) {
 180     return byteString.isValidUtf8();
 181   }
 182
 183   /**
 184    * Like {@link #isValidUtf8(ByteString)} but for byte arrays.
 185    */
 186   public static boolean isValidUtf8(byte[] byteArray) {
 187     return Utf8.isValidUtf8(byteArray);
 188   }
 189
 190   /**
 191    * Helper method to get the UTF-8 bytes of a string.
 192    */
 193   public static byte[] toByteArray(String value) {
 194     try {
 195       return value.getBytes("UTF-8");
 196     } catch (UnsupportedEncodingException e) {
 197       throw new RuntimeException("UTF-8 not supported?", e);
 198     }
 199   }
 200
 201   /**
 202    * Helper method to convert a byte array to a string using UTF-8 encoding.
 203    */
 204   public static String toStringUtf8(byte[] bytes) {
 205     try {
 206       return new String(bytes, "UTF-8");
 207     } catch (UnsupportedEncodingException e) {
 208       throw new RuntimeException("UTF-8 not supported?", e);
 209     }
 210   }
 211
 212   /**
 213    * Interface for an enum value or value descriptor, to be used in FieldSet.
 214    * The lite library stores enum values directly in FieldSets but the full
 215    * library stores EnumValueDescriptors in order to better support reflection.
 216    */
 217   public interface EnumLite {
 218     int getNumber();
 219   }
 220
 221   /**
 222    * Interface for an object which maps integers to {@link EnumLite}s.
 223    * {@link Descriptors.EnumDescriptor} implements this interface by mapping
 224    * numbers to {@link Descriptors.EnumValueDescriptor}s.  Additionally,
 225    * every generated enum type has a static method internalGetValueMap() which
 226    * returns an implementation of this type that maps numbers to enum values.
 227    */
 228   public interface EnumLiteMap<T extends EnumLite> {
 229     T findValueByNumber(int number);
 230   }
 231
 232   /**
 233    * Helper method for implementing {@link MessageLite#hashCode()} for longs.
 234    * @see Long#hashCode()
 235    */
 236   public static int hashLong(long n) {
 237     return (int) (n ^ (n >>> 32));
 238   }
 239
 240   /**
 241    * Helper method for implementing {@link MessageLite#hashCode()} for
 242    * booleans.
 243    * @see Boolean#hashCode()
 244    */
 245   public static int hashBoolean(boolean b) {
 246     return b ? 1231 : 1237;
 247   }
 248
 249   /**
 250    * Helper method for implementing {@link MessageLite#hashCode()} for enums.
 251    * <p>
 252    * This is needed because {@link java.lang.Enum#hashCode()} is final, but we
 253    * need to use the field number as the hash code to ensure compatibility
 254    * between statically and dynamically generated enum objects.
 255    */
 256   public static int hashEnum(EnumLite e) {
 257     return e.getNumber();
 258   }
 259
 260   /**
 261    * Helper method for implementing {@link MessageLite#hashCode()} for
 262    * enum lists.
 263    */
 264   public static int hashEnumList(List<? extends EnumLite> list) {
 265     int hash = 1;
 266     for (EnumLite e : list) {
 267       hash = 31 * hash + hashEnum(e);
 268     }
 269     return hash;
 270   }
 271
 272   /**
 273    * Helper method for implementing {@link MessageLite#equals()} for bytes field.
 274    */
 275   public static boolean equals(List<byte[]> a, List<byte[]> b) {
 276     if (a.size() != b.size()) return false;
 277     for (int i = 0; i < a.size(); ++i) {
 278       if (!Arrays.equals(a.get(i), b.get(i))) {
 279         return false;
 280       }
 281     }
 282     return true;
 283   }
 284
 285   /**
 286    * Helper method for implementing {@link MessageLite#hashCode()} for bytes field.
 287    */
 288   public static int hashCode(List<byte[]> list) {
 289     int hash = 1;
 290     for (byte[] bytes : list) {
 291       hash = 31 * hash + hashCode(bytes);
 292     }
 293     return hash;
 294   }
 295
 296   /**
 297    * Helper method for implementing {@link MessageLite#hashCode()} for bytes field.
 298    */
 299   public static int hashCode(byte[] bytes) {
 300     // The hash code for a byte array should be the same as the hash code for a
 301     // ByteString with the same content. This is to ensure that the generated
 302     // hashCode() method will return the same value as the pure reflection
 303     // based hashCode() method.
 304     return LiteralByteString.hashCode(bytes);
 305   }
 306
 307   /**
 308    * Helper method for implementing {@link MessageLite#equals()} for bytes
 309    * field.
 310    */
 311   public static boolean equalsByteBuffer(ByteBuffer a, ByteBuffer b) {
 312     if (a.capacity() != b.capacity()) {
 313       return false;
 314     }
 315     // ByteBuffer.equals() will only compare the remaining bytes, but we want to
 316     // compare all the content.
 317     return a.duplicate().clear().equals(b.duplicate().clear());
 318   }
 319
 320   /**
 321    * Helper method for implementing {@link MessageLite#equals()} for bytes
 322    * field.
 323    */
 324   public static boolean equalsByteBuffer(
 325       List<ByteBuffer> a, List<ByteBuffer> b) {
 326     if (a.size() != b.size()) {
 327       return false;
 328     }
 329     for (int i = 0; i < a.size(); ++i) {
 330       if (!equalsByteBuffer(a.get(i), b.get(i))) {
 331         return false;
 332       }
 333     }
 334     return true;
 335   }
 336
 337   /**
 338    * Helper method for implementing {@link MessageLite#hashCode()} for bytes
 339    * field.
 340    */
 341   public static int hashCodeByteBuffer(List<ByteBuffer> list) {
 342     int hash = 1;
 343     for (ByteBuffer bytes : list) {
 344       hash = 31 * hash + hashCodeByteBuffer(bytes);
 345     }
 346     return hash;
 347   }
 348
 349   private static final int DEFAULT_BUFFER_SIZE = 4096;
 350
 351   /**
 352    * Helper method for implementing {@link MessageLite#hashCode()} for bytes
 353    * field.
 354    */
 355   public static int hashCodeByteBuffer(ByteBuffer bytes) {
 356     if (bytes.hasArray()) {
 357       // Fast path.
 358       int h = LiteralByteString.hashCode(bytes.capacity(), bytes.array(),
 359           bytes.arrayOffset(), bytes.capacity());
 360       return h == 0 ? 1 : h;
 361     } else {
 362       // Read the data into a temporary byte array before calculating the
 363       // hash value.
 364       final int bufferSize = bytes.capacity() > DEFAULT_BUFFER_SIZE
 365           ? DEFAULT_BUFFER_SIZE : bytes.capacity();
 366       final byte[] buffer = new byte[bufferSize];
 367       final ByteBuffer duplicated = bytes.duplicate();
 368       duplicated.clear();
 369       int h = bytes.capacity();
 370       while (duplicated.remaining() > 0) {
 371         final int length = duplicated.remaining() <= bufferSize ?
 372             duplicated.remaining() : bufferSize;
 373         duplicated.get(buffer, 0, length);
 374         h = LiteralByteString.hashCode(h, buffer, 0, length);
 375       }
 376       return h == 0 ? 1 : h;
 377     }
 378   }
 379
 380   /**
 381    * An empty byte array constant used in generated code.
 382    */
 383   public static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
 384
 385   /**
 386    * An empty byte array constant used in generated code.
 387    */
 388   public static final ByteBuffer EMPTY_BYTE_BUFFER =
 389       ByteBuffer.wrap(EMPTY_BYTE_ARRAY);
 390
 391 }