src/third_party/protobuf/java/src/main/java/com/google/protobuf/ByteString.java

   1 // Protocol Buffers - Google's data interchange format
   2 // Copyright 2008 Google Inc.  All rights reserved.
   3 // http://code.google.com/p/protobuf/
   4 //
   5 // Redistribution and use in source and binary forms, with or without
   6 // modification, are permitted provided that the following conditions are
   7 // met:
   8 //
   9 //     * Redistributions of source code must retain the above copyright
  10 // notice, this list of conditions and the following disclaimer.
  11 //     * Redistributions in binary form must reproduce the above
  12 // copyright notice, this list of conditions and the following disclaimer
  13 // in the documentation and/or other materials provided with the
  14 // distribution.
  15 //     * Neither the name of Google Inc. nor the names of its
  16 // contributors may be used to endorse or promote products derived from
  17 // this software without specific prior written permission.
  18 //
  19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30
  31 package com.google.protobuf;
  32
  33 import java.io.ByteArrayOutputStream;
  34 import java.io.IOException;
  35 import java.io.InputStream;
  36 import java.io.OutputStream;
  37 import java.io.UnsupportedEncodingException;
  38 import java.nio.ByteBuffer;
  39 import java.util.ArrayList;
  40 import java.util.Collection;
  41 import java.util.Iterator;
  42 import java.util.List;
  43 import java.util.NoSuchElementException;
  44
  45 /**
  46  * Immutable sequence of bytes.  Substring is supported by sharing the reference
  47  * to the immutable underlying bytes, as with {@link String}.  Concatenation is
  48  * likewise supported without copying (long strings) by building a tree of
  49  * pieces in {@link RopeByteString}.
  50  * <p>
  51  * Like {@link String}, the contents of a {@link ByteString} can never be
  52  * observed to change, not even in the presence of a data race or incorrect
  53  * API usage in the client code.
  54  *
  55  * @author crazybob@google.com Bob Lee
  56  * @author kenton@google.com Kenton Varda
  57  * @author carlanton@google.com Carl Haverl
  58  * @author martinrb@google.com Martin Buchholz
  59  */
  60 public abstract class ByteString implements Iterable<Byte> {
  61
  62   /**
  63    * When two strings to be concatenated have a combined length shorter than
  64    * this, we just copy their bytes on {@link #concat(ByteString)}.
  65    * The trade-off is copy size versus the overhead of creating tree nodes
  66    * in {@link RopeByteString}.
  67    */
  68   static final int CONCATENATE_BY_COPY_SIZE = 128;
  69
  70   /**
  71    * When copying an InputStream into a ByteString with .readFrom(),
  72    * the chunks in the underlying rope start at 256 bytes, but double
  73    * each iteration up to 8192 bytes.
  74    */
  75   static final int MIN_READ_FROM_CHUNK_SIZE = 0x100;  // 256b
  76   static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000;  // 8k
  77
  78   /**
  79    * Empty {@code ByteString}.
  80    */
  81   public static final ByteString EMPTY = new LiteralByteString(new byte[0]);
  82
  83   // This constructor is here to prevent subclassing outside of this package,
  84   ByteString() {}
  85
  86   /**
  87    * Gets the byte at the given index. This method should be used only for
  88    * random access to individual bytes. To access bytes sequentially, use the
  89    * {@link ByteIterator} returned by {@link #iterator()}, and call {@link
  90    * #substring(int, int)} first if necessary.
  91    *
  92    * @param index index of byte
  93    * @return the value
  94    * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
  95    */
  96   public abstract byte byteAt(int index);
  97
  98   /**
  99    * Return a {@link ByteString.ByteIterator} over the bytes in the ByteString.
 100    * To avoid auto-boxing, you may get the iterator manually and call
 101    * {@link ByteIterator#nextByte()}.
 102    *
 103    * @return the iterator
 104    */
 105   public abstract ByteIterator iterator();
 106
 107   /**
 108    * This interface extends {@code Iterator<Byte>}, so that we can return an
 109    * unboxed {@code byte}.
 110    */
 111   public interface ByteIterator extends Iterator<Byte> {
 112     /**
 113      * An alternative to {@link Iterator#next()} that returns an
 114      * unboxed primitive {@code byte}.
 115      *
 116      * @return the next {@code byte} in the iteration
 117      * @throws NoSuchElementException if the iteration has no more elements
 118      */
 119     byte nextByte();
 120   }
 121
 122   /**
 123    * Gets the number of bytes.
 124    *
 125    * @return size in bytes
 126    */
 127   public abstract int size();
 128
 129   /**
 130    * Returns {@code true} if the size is {@code 0}, {@code false} otherwise.
 131    *
 132    * @return true if this is zero bytes long
 133    */
 134   public boolean isEmpty() {
 135     return size() == 0;
 136   }
 137
 138   // =================================================================
 139   // ByteString -> substring
 140
 141   /**
 142    * Return the substring from {@code beginIndex}, inclusive, to the end of the
 143    * string.
 144    *
 145    * @param beginIndex start at this index
 146    * @return substring sharing underlying data
 147    * @throws IndexOutOfBoundsException if {@code beginIndex < 0} or
 148    *     {@code beginIndex > size()}.
 149    */
 150   public ByteString substring(int beginIndex) {
 151     return substring(beginIndex, size());
 152   }
 153
 154   /**
 155    * Return the substring from {@code beginIndex}, inclusive, to {@code
 156    * endIndex}, exclusive.
 157    *
 158    * @param beginIndex start at this index
 159    * @param endIndex   the last character is the one before this index
 160    * @return substring sharing underlying data
 161    * @throws IndexOutOfBoundsException if {@code beginIndex < 0},
 162    *     {@code endIndex > size()}, or {@code beginIndex > endIndex}.
 163    */
 164   public abstract ByteString substring(int beginIndex, int endIndex);
 165
 166   /**
 167    * Tests if this bytestring starts with the specified prefix.
 168    * Similar to {@link String#startsWith(String)}
 169    *
 170    * @param prefix the prefix.
 171    * @return <code>true</code> if the byte sequence represented by the
 172    *         argument is a prefix of the byte sequence represented by
 173    *         this string; <code>false</code> otherwise.
 174    */
 175   public boolean startsWith(ByteString prefix) {
 176     return size() >= prefix.size() &&
 177            substring(0, prefix.size()).equals(prefix);
 178   }
 179
 180   // =================================================================
 181   // byte[] -> ByteString
 182
 183   /**
 184    * Copies the given bytes into a {@code ByteString}.
 185    *
 186    * @param bytes source array
 187    * @param offset offset in source array
 188    * @param size number of bytes to copy
 189    * @return new {@code ByteString}
 190    */
 191   public static ByteString copyFrom(byte[] bytes, int offset, int size) {
 192     byte[] copy = new byte[size];
 193     System.arraycopy(bytes, offset, copy, 0, size);
 194     return new LiteralByteString(copy);
 195   }
 196
 197   /**
 198    * Copies the given bytes into a {@code ByteString}.
 199    *
 200    * @param bytes to copy
 201    * @return new {@code ByteString}
 202    */
 203   public static ByteString copyFrom(byte[] bytes) {
 204     return copyFrom(bytes, 0, bytes.length);
 205   }
 206
 207   /**
 208    * Copies the next {@code size} bytes from a {@code java.nio.ByteBuffer} into
 209    * a {@code ByteString}.
 210    *
 211    * @param bytes source buffer
 212    * @param size number of bytes to copy
 213    * @return new {@code ByteString}
 214    */
 215   public static ByteString copyFrom(ByteBuffer bytes, int size) {
 216     byte[] copy = new byte[size];
 217     bytes.get(copy);
 218     return new LiteralByteString(copy);
 219   }
 220
 221   /**
 222    * Copies the remaining bytes from a {@code java.nio.ByteBuffer} into
 223    * a {@code ByteString}.
 224    *
 225    * @param bytes sourceBuffer
 226    * @return new {@code ByteString}
 227    */
 228   public static ByteString copyFrom(ByteBuffer bytes) {
 229     return copyFrom(bytes, bytes.remaining());
 230   }
 231
 232   /**
 233    * Encodes {@code text} into a sequence of bytes using the named charset
 234    * and returns the result as a {@code ByteString}.
 235    *
 236    * @param text source string
 237    * @param charsetName encoding to use
 238    * @return new {@code ByteString}
 239    * @throws UnsupportedEncodingException if the encoding isn't found
 240    */
 241   public static ByteString copyFrom(String text, String charsetName)
 242       throws UnsupportedEncodingException {
 243     return new LiteralByteString(text.getBytes(charsetName));
 244   }
 245
 246   /**
 247    * Encodes {@code text} into a sequence of UTF-8 bytes and returns the
 248    * result as a {@code ByteString}.
 249    *
 250    * @param text source string
 251    * @return new {@code ByteString}
 252    */
 253   public static ByteString copyFromUtf8(String text) {
 254     try {
 255       return new LiteralByteString(text.getBytes("UTF-8"));
 256     } catch (UnsupportedEncodingException e) {
 257       throw new RuntimeException("UTF-8 not supported?", e);
 258     }
 259   }
 260
 261   // =================================================================
 262   // InputStream -> ByteString
 263
 264   /**
 265    * Completely reads the given stream's bytes into a
 266    * {@code ByteString}, blocking if necessary until all bytes are
 267    * read through to the end of the stream.
 268    *
 269    * <b>Performance notes:</b> The returned {@code ByteString} is an
 270    * immutable tree of byte arrays ("chunks") of the stream data.  The
 271    * first chunk is small, with subsequent chunks each being double
 272    * the size, up to 8K.  If the caller knows the precise length of
 273    * the stream and wishes to avoid all unnecessary copies and
 274    * allocations, consider using the two-argument version of this
 275    * method, below.
 276    *
 277    * @param streamToDrain The source stream, which is read completely
 278    *     but not closed.
 279    * @return A new {@code ByteString} which is made up of chunks of
 280    *     various sizes, depending on the behavior of the underlying
 281    *     stream.
 282    * @throws IOException IOException is thrown if there is a problem
 283    *     reading the underlying stream.
 284    */
 285   public static ByteString readFrom(InputStream streamToDrain)
 286       throws IOException {
 287     return readFrom(
 288         streamToDrain, MIN_READ_FROM_CHUNK_SIZE, MAX_READ_FROM_CHUNK_SIZE);
 289   }
 290
 291   /**
 292    * Completely reads the given stream's bytes into a
 293    * {@code ByteString}, blocking if necessary until all bytes are
 294    * read through to the end of the stream.
 295    *
 296    * <b>Performance notes:</b> The returned {@code ByteString} is an
 297    * immutable tree of byte arrays ("chunks") of the stream data.  The
 298    * chunkSize parameter sets the size of these byte arrays. In
 299    * particular, if the chunkSize is precisely the same as the length
 300    * of the stream, unnecessary allocations and copies will be
 301    * avoided. Otherwise, the chunks will be of the given size, except
 302    * for the last chunk, which will be resized (via a reallocation and
 303    * copy) to contain the remainder of the stream.
 304    *
 305    * @param streamToDrain The source stream, which is read completely
 306    *     but not closed.
 307    * @param chunkSize The size of the chunks in which to read the
 308    *     stream.
 309    * @return A new {@code ByteString} which is made up of chunks of
 310    *     the given size.
 311    * @throws IOException IOException is thrown if there is a problem
 312    *     reading the underlying stream.
 313    */
 314   public static ByteString readFrom(InputStream streamToDrain, int chunkSize)
 315       throws IOException {
 316     return readFrom(streamToDrain, chunkSize, chunkSize);
 317   }
 318
 319   // Helper method that takes the chunk size range as a parameter.
 320   public static ByteString readFrom(InputStream streamToDrain, int minChunkSize,
 321       int maxChunkSize) throws IOException {
 322     Collection<ByteString> results = new ArrayList<ByteString>();
 323
 324     // copy the inbound bytes into a list of chunks; the chunk size
 325     // grows exponentially to support both short and long streams.
 326     int chunkSize = minChunkSize;
 327     while (true) {
 328       ByteString chunk = readChunk(streamToDrain, chunkSize);
 329       if (chunk == null) {
 330         break;
 331       }
 332       results.add(chunk);
 333       chunkSize = Math.min(chunkSize * 2, maxChunkSize);
 334     }
 335
 336     return ByteString.copyFrom(results);
 337   }
 338
 339   /**
 340    * Blocks until a chunk of the given size can be made from the
 341    * stream, or EOF is reached.  Calls read() repeatedly in case the
 342    * given stream implementation doesn't completely fill the given
 343    * buffer in one read() call.
 344    *
 345    * @return A chunk of the desired size, or else a chunk as large as
 346    * was available when end of stream was reached. Returns null if the
 347    * given stream had no more data in it.
 348    */
 349   private static ByteString readChunk(InputStream in, final int chunkSize)
 350       throws IOException {
 351       final byte[] buf = new byte[chunkSize];
 352       int bytesRead = 0;
 353       while (bytesRead < chunkSize) {
 354         final int count = in.read(buf, bytesRead, chunkSize - bytesRead);
 355         if (count == -1) {
 356           break;
 357         }
 358         bytesRead += count;
 359       }
 360
 361       if (bytesRead == 0) {
 362         return null;
 363       } else {
 364         return ByteString.copyFrom(buf, 0, bytesRead);
 365       }
 366   }
 367
 368   // =================================================================
 369   // Multiple ByteStrings -> One ByteString
 370
 371   /**
 372    * Concatenate the given {@code ByteString} to this one. Short concatenations,
 373    * of total size smaller than {@link ByteString#CONCATENATE_BY_COPY_SIZE}, are
 374    * produced by copying the underlying bytes (as per Rope.java, <a
 375    * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
 376    * BAP95 </a>. In general, the concatenate involves no copying.
 377    *
 378    * @param other string to concatenate
 379    * @return a new {@code ByteString} instance
 380    */
 381   public ByteString concat(ByteString other) {
 382     int thisSize = size();
 383     int otherSize = other.size();
 384     if ((long) thisSize + otherSize >= Integer.MAX_VALUE) {
 385       throw new IllegalArgumentException("ByteString would be too long: " +
 386                                          thisSize + "+" + otherSize);
 387     }
 388
 389     return RopeByteString.concatenate(this, other);
 390   }
 391
 392   /**
 393    * Concatenates all byte strings in the iterable and returns the result.
 394    * This is designed to run in O(list size), not O(total bytes).
 395    *
 396    * <p>The returned {@code ByteString} is not necessarily a unique object.
 397    * If the list is empty, the returned object is the singleton empty
 398    * {@code ByteString}.  If the list has only one element, that
 399    * {@code ByteString} will be returned without copying.
 400    *
 401    * @param byteStrings strings to be concatenated
 402    * @return new {@code ByteString}
 403    */
 404   public static ByteString copyFrom(Iterable<ByteString> byteStrings) {
 405     Collection<ByteString> collection;
 406     if (!(byteStrings instanceof Collection)) {
 407       collection = new ArrayList<ByteString>();
 408       for (ByteString byteString : byteStrings) {
 409         collection.add(byteString);
 410       }
 411     } else {
 412       collection = (Collection<ByteString>) byteStrings;
 413     }
 414     ByteString result;
 415     if (collection.isEmpty()) {
 416       result = EMPTY;
 417     } else {
 418       result = balancedConcat(collection.iterator(), collection.size());
 419     }
 420     return result;
 421   }
 422
 423   // Internal function used by copyFrom(Iterable<ByteString>).
 424   // Create a balanced concatenation of the next "length" elements from the
 425   // iterable.
 426   private static ByteString balancedConcat(Iterator<ByteString> iterator,
 427       int length) {
 428     assert length >= 1;
 429     ByteString result;
 430     if (length == 1) {
 431       result = iterator.next();
 432     } else {
 433       int halfLength = length >>> 1;
 434       ByteString left = balancedConcat(iterator, halfLength);
 435       ByteString right = balancedConcat(iterator, length - halfLength);
 436       result = left.concat(right);
 437     }
 438     return result;
 439   }
 440
 441   // =================================================================
 442   // ByteString -> byte[]
 443
 444   /**
 445    * Copies bytes into a buffer at the given offset.
 446    *
 447    * @param target buffer to copy into
 448    * @param offset in the target buffer
 449    * @throws IndexOutOfBoundsException if the offset is negative or too large
 450    */
 451   public void copyTo(byte[] target, int offset) {
 452     copyTo(target, 0, offset, size());
 453   }
 454
 455   /**
 456    * Copies bytes into a buffer.
 457    *
 458    * @param target       buffer to copy into
 459    * @param sourceOffset offset within these bytes
 460    * @param targetOffset offset within the target buffer
 461    * @param numberToCopy number of bytes to copy
 462    * @throws IndexOutOfBoundsException if an offset or size is negative or too
 463    *     large
 464    */
 465   public void copyTo(byte[] target, int sourceOffset, int targetOffset,
 466       int numberToCopy) {
 467     if (sourceOffset < 0) {
 468       throw new IndexOutOfBoundsException("Source offset < 0: " + sourceOffset);
 469     }
 470     if (targetOffset < 0) {
 471       throw new IndexOutOfBoundsException("Target offset < 0: " + targetOffset);
 472     }
 473     if (numberToCopy < 0) {
 474       throw new IndexOutOfBoundsException("Length < 0: " + numberToCopy);
 475     }
 476     if (sourceOffset + numberToCopy > size()) {
 477       throw new IndexOutOfBoundsException(
 478           "Source end offset < 0: " + (sourceOffset + numberToCopy));
 479     }
 480     if (targetOffset + numberToCopy > target.length) {
 481       throw new IndexOutOfBoundsException(
 482           "Target end offset < 0: " + (targetOffset + numberToCopy));
 483     }
 484     if (numberToCopy > 0) {
 485       copyToInternal(target, sourceOffset, targetOffset, numberToCopy);
 486     }
 487   }
 488
 489   /**
 490    * Internal (package private) implementation of
 491    * @link{#copyTo(byte[],int,int,int}.
 492    * It assumes that all error checking has already been performed and that
 493    * @code{numberToCopy > 0}.
 494    */
 495   protected abstract void copyToInternal(byte[] target, int sourceOffset,
 496       int targetOffset, int numberToCopy);
 497
 498   /**
 499    * Copies bytes into a ByteBuffer.
 500    *
 501    * @param target ByteBuffer to copy into.
 502    * @throws java.nio.ReadOnlyBufferException if the {@code target} is read-only
 503    * @throws java.nio.BufferOverflowException if the {@code target}'s
 504    *     remaining() space is not large enough to hold the data.
 505    */
 506   public abstract void copyTo(ByteBuffer target);
 507
 508   /**
 509    * Copies bytes to a {@code byte[]}.
 510    *
 511    * @return copied bytes
 512    */
 513   public byte[] toByteArray() {
 514     int size = size();
 515     byte[] result = new byte[size];
 516     copyToInternal(result, 0, 0, size);
 517     return result;
 518   }
 519
 520   /**
 521    * Writes the complete contents of this byte string to
 522    * the specified output stream argument.
 523    *
 524    * @param  out  the output stream to which to write the data.
 525    * @throws IOException  if an I/O error occurs.
 526    */
 527   public abstract void writeTo(OutputStream out) throws IOException;
 528
 529   /**
 530    * Constructs a read-only {@code java.nio.ByteBuffer} whose content
 531    * is equal to the contents of this byte string.
 532    * The result uses the same backing array as the byte string, if possible.
 533    *
 534    * @return wrapped bytes
 535    */
 536   public abstract ByteBuffer asReadOnlyByteBuffer();
 537
 538   /**
 539    * Constructs a list of read-only {@code java.nio.ByteBuffer} objects
 540    * such that the concatenation of their contents is equal to the contents
 541    * of this byte string.  The result uses the same backing arrays as the
 542    * byte string.
 543    * <p>
 544    * By returning a list, implementations of this method may be able to avoid
 545    * copying even when there are multiple backing arrays.
 546    *
 547    * @return a list of wrapped bytes
 548    */
 549   public abstract List<ByteBuffer> asReadOnlyByteBufferList();
 550
 551   /**
 552    * Constructs a new {@code String} by decoding the bytes using the
 553    * specified charset.
 554    *
 555    * @param charsetName encode using this charset
 556    * @return new string
 557    * @throws UnsupportedEncodingException if charset isn't recognized
 558    */
 559   public abstract String toString(String charsetName)
 560       throws UnsupportedEncodingException;
 561
 562   // =================================================================
 563   // UTF-8 decoding
 564
 565   /**
 566    * Constructs a new {@code String} by decoding the bytes as UTF-8.
 567    *
 568    * @return new string using UTF-8 encoding
 569    */
 570   public String toStringUtf8() {
 571     try {
 572       return toString("UTF-8");
 573     } catch (UnsupportedEncodingException e) {
 574       throw new RuntimeException("UTF-8 not supported?", e);
 575     }
 576   }
 577
 578   /**
 579    * Tells whether this {@code ByteString} represents a well-formed UTF-8
 580    * byte sequence, such that the original bytes can be converted to a
 581    * String object and then round tripped back to bytes without loss.
 582    *
 583    * <p>More precisely, returns {@code true} whenever: <pre> {@code
 584    * Arrays.equals(byteString.toByteArray(),
 585    *     new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8"))
 586    * }</pre>
 587    *
 588    * <p>This method returns {@code false} for "overlong" byte sequences,
 589    * as well as for 3-byte sequences that would map to a surrogate
 590    * character, in accordance with the restricted definition of UTF-8
 591    * introduced in Unicode 3.1.  Note that the UTF-8 decoder included in
 592    * Oracle's JDK has been modified to also reject "overlong" byte
 593    * sequences, but (as of 2011) still accepts 3-byte surrogate
 594    * character byte sequences.
 595    *
 596    * <p>See the Unicode Standard,</br>
 597    * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br>
 598    * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>.
 599    *
 600    * @return whether the bytes in this {@code ByteString} are a
 601    * well-formed UTF-8 byte sequence
 602    */
 603   public abstract boolean isValidUtf8();
 604
 605   /**
 606    * Tells whether the given byte sequence is a well-formed, malformed, or
 607    * incomplete UTF-8 byte sequence.  This method accepts and returns a partial
 608    * state result, allowing the bytes for a complete UTF-8 byte sequence to be
 609    * composed from multiple {@code ByteString} segments.
 610    *
 611    * @param state either {@code 0} (if this is the initial decoding operation)
 612    *     or the value returned from a call to a partial decoding method for the
 613    *     previous bytes
 614    * @param offset offset of the first byte to check
 615    * @param length number of bytes to check
 616    *
 617    * @return {@code -1} if the partial byte sequence is definitely malformed,
 618    * {@code 0} if it is well-formed (no additional input needed), or, if the
 619    * byte sequence is "incomplete", i.e. apparently terminated in the middle of
 620    * a character, an opaque integer "state" value containing enough information
 621    * to decode the character when passed to a subsequent invocation of a
 622    * partial decoding method.
 623    */
 624   protected abstract int partialIsValidUtf8(int state, int offset, int length);
 625
 626   // =================================================================
 627   // equals() and hashCode()
 628
 629   @Override
 630   public abstract boolean equals(Object o);
 631
 632   /**
 633    * Return a non-zero hashCode depending only on the sequence of bytes
 634    * in this ByteString.
 635    *
 636    * @return hashCode value for this object
 637    */
 638   @Override
 639   public abstract int hashCode();
 640
 641   // =================================================================
 642   // Input stream
 643
 644   /**
 645    * Creates an {@code InputStream} which can be used to read the bytes.
 646    * <p>
 647    * The {@link InputStream} returned by this method is guaranteed to be
 648    * completely non-blocking.  The method {@link InputStream#available()}
 649    * returns the number of bytes remaining in the stream. The methods
 650    * {@link InputStream#read(byte[]), {@link InputStream#read(byte[],int,int)}
 651    * and {@link InputStream#skip(long)} will read/skip as many bytes as are
 652    * available.
 653    * <p>
 654    * The methods in the returned {@link InputStream} might <b>not</b> be
 655    * thread safe.
 656    *
 657    * @return an input stream that returns the bytes of this byte string.
 658    */
 659   public abstract InputStream newInput();
 660
 661   /**
 662    * Creates a {@link CodedInputStream} which can be used to read the bytes.
 663    * Using this is often more efficient than creating a {@link CodedInputStream}
 664    * that wraps the result of {@link #newInput()}.
 665    *
 666    * @return stream based on wrapped data
 667    */
 668   public abstract CodedInputStream newCodedInput();
 669
 670   // =================================================================
 671   // Output stream
 672
 673   /**
 674    * Creates a new {@link Output} with the given initial capacity. Call {@link
 675    * Output#toByteString()} to create the {@code ByteString} instance.
 676    * <p>
 677    * A {@link ByteString.Output} offers the same functionality as a
 678    * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString}
 679    * rather than a {@code byte} array.
 680    *
 681    * @param initialCapacity estimate of number of bytes to be written
 682    * @return {@code OutputStream} for building a {@code ByteString}
 683    */
 684   public static Output newOutput(int initialCapacity) {
 685     return new Output(initialCapacity);
 686   }
 687
 688   /**
 689    * Creates a new {@link Output}. Call {@link Output#toByteString()} to create
 690    * the {@code ByteString} instance.
 691    * <p>
 692    * A {@link ByteString.Output} offers the same functionality as a
 693    * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString}
 694    * rather than a {@code byte array}.
 695    *
 696    * @return {@code OutputStream} for building a {@code ByteString}
 697    */
 698   public static Output newOutput() {
 699     return new Output(CONCATENATE_BY_COPY_SIZE);
 700   }
 701
 702   /**
 703    * Outputs to a {@code ByteString} instance. Call {@link #toByteString()} to
 704    * create the {@code ByteString} instance.
 705    */
 706   public static final class Output extends OutputStream {
 707     // Implementation note.
 708     // The public methods of this class must be synchronized.  ByteStrings
 709     // are guaranteed to be immutable.  Without some sort of locking, it could
 710     // be possible for one thread to call toByteSring(), while another thread
 711     // is still modifying the underlying byte array.
 712
 713     private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
 714     // argument passed by user, indicating initial capacity.
 715     private final int initialCapacity;
 716     // ByteStrings to be concatenated to create the result
 717     private final ArrayList<ByteString> flushedBuffers;
 718     // Total number of bytes in the ByteStrings of flushedBuffers
 719     private int flushedBuffersTotalBytes;
 720     // Current buffer to which we are writing
 721     private byte[] buffer;
 722     // Location in buffer[] to which we write the next byte.
 723     private int bufferPos;
 724
 725     /**
 726      * Creates a new ByteString output stream with the specified
 727      * initial capacity.
 728      *
 729      * @param initialCapacity  the initial capacity of the output stream.
 730      */
 731     Output(int initialCapacity) {
 732       if (initialCapacity < 0) {
 733         throw new IllegalArgumentException("Buffer size < 0");
 734       }
 735       this.initialCapacity = initialCapacity;
 736       this.flushedBuffers = new ArrayList<ByteString>();
 737       this.buffer = new byte[initialCapacity];
 738     }
 739
 740     @Override
 741     public synchronized void write(int b) {
 742       if (bufferPos == buffer.length) {
 743         flushFullBuffer(1);
 744       }
 745       buffer[bufferPos++] = (byte)b;
 746     }
 747
 748     @Override
 749     public synchronized void write(byte[] b, int offset, int length)  {
 750       if (length <= buffer.length - bufferPos) {
 751         // The bytes can fit into the current buffer.
 752         System.arraycopy(b, offset, buffer, bufferPos, length);
 753         bufferPos += length;
 754       } else {
 755         // Use up the current buffer
 756         int copySize  = buffer.length - bufferPos;
 757         System.arraycopy(b, offset, buffer, bufferPos, copySize);
 758         offset += copySize;
 759         length -= copySize;
 760         // Flush the buffer, and get a new buffer at least big enough to cover
 761         // what we still need to output
 762         flushFullBuffer(length);
 763         System.arraycopy(b, offset, buffer, 0 /* count */, length);
 764         bufferPos = length;
 765       }
 766     }
 767
 768     /**
 769      * Creates a byte string. Its size is the current size of this output
 770      * stream and its output has been copied to it.
 771      *
 772      * @return  the current contents of this output stream, as a byte string.
 773      */
 774     public synchronized ByteString toByteString() {
 775       flushLastBuffer();
 776       return ByteString.copyFrom(flushedBuffers);
 777     }
 778
 779     /**
 780      * Implement java.util.Arrays.copyOf() for jdk 1.5.
 781      */
 782     private byte[] copyArray(byte[] buffer, int length) {
 783       byte[] result = new byte[length];
 784       System.arraycopy(buffer, 0, result, 0, Math.min(buffer.length, length));
 785       return result;
 786     }
 787
 788     /**
 789      * Writes the complete contents of this byte array output stream to
 790      * the specified output stream argument.
 791      *
 792      * @param out the output stream to which to write the data.
 793      * @throws IOException  if an I/O error occurs.
 794      */
 795     public void writeTo(OutputStream out) throws IOException {
 796       ByteString[] cachedFlushBuffers;
 797       byte[] cachedBuffer;
 798       int cachedBufferPos;
 799       synchronized (this) {
 800         // Copy the information we need into local variables so as to hold
 801         // the lock for as short a time as possible.
 802         cachedFlushBuffers =
 803             flushedBuffers.toArray(new ByteString[flushedBuffers.size()]);
 804         cachedBuffer = buffer;
 805         cachedBufferPos = bufferPos;
 806       }
 807       for (ByteString byteString : cachedFlushBuffers) {
 808         byteString.writeTo(out);
 809       }
 810
 811       out.write(copyArray(cachedBuffer, cachedBufferPos));
 812     }
 813
 814     /**
 815      * Returns the current size of the output stream.
 816      *
 817      * @return  the current size of the output stream
 818      */
 819     public synchronized int size() {
 820       return flushedBuffersTotalBytes + bufferPos;
 821     }
 822
 823     /**
 824      * Resets this stream, so that all currently accumulated output in the
 825      * output stream is discarded. The output stream can be used again,
 826      * reusing the already allocated buffer space.
 827      */
 828     public synchronized void reset() {
 829       flushedBuffers.clear();
 830       flushedBuffersTotalBytes = 0;
 831       bufferPos = 0;
 832     }
 833
 834     @Override
 835     public String toString() {
 836       return String.format("<ByteString.Output@%s size=%d>",
 837           Integer.toHexString(System.identityHashCode(this)), size());
 838     }
 839
 840     /**
 841      * Internal function used by writers.  The current buffer is full, and the
 842      * writer needs a new buffer whose size is at least the specified minimum
 843      * size.
 844      */
 845     private void flushFullBuffer(int minSize)  {
 846       flushedBuffers.add(new LiteralByteString(buffer));
 847       flushedBuffersTotalBytes += buffer.length;
 848       // We want to increase our total capacity by 50%, but as a minimum,
 849       // the new buffer should also at least be >= minSize and
 850       // >= initial Capacity.
 851       int newSize = Math.max(initialCapacity,
 852           Math.max(minSize, flushedBuffersTotalBytes >>> 1));
 853       buffer = new byte[newSize];
 854       bufferPos = 0;
 855     }
 856
 857     /**
 858      * Internal function used by {@link #toByteString()}. The current buffer may
 859      * or may not be full, but it needs to be flushed.
 860      */
 861     private void flushLastBuffer()  {
 862       if (bufferPos < buffer.length) {
 863         if (bufferPos > 0) {
 864           byte[] bufferCopy = copyArray(buffer, bufferPos);
 865           flushedBuffers.add(new LiteralByteString(bufferCopy));
 866         }
 867         // We reuse this buffer for further writes.
 868       } else {
 869         // Buffer is completely full.  Huzzah.
 870         flushedBuffers.add(new LiteralByteString(buffer));
 871         // 99% of the time, we're not going to use this OutputStream again.
 872         // We set buffer to an empty byte stream so that we're handling this
 873         // case without wasting space.  In the rare case that more writes
 874         // *do* occur, this empty buffer will be flushed and an appropriately
 875         // sized new buffer will be created.
 876         buffer = EMPTY_BYTE_ARRAY;
 877       }
 878       flushedBuffersTotalBytes += bufferPos;
 879       bufferPos = 0;
 880     }
 881   }
 882
 883   /**
 884    * Constructs a new {@code ByteString} builder, which allows you to
 885    * efficiently construct a {@code ByteString} by writing to a {@link
 886    * CodedOutputStream}. Using this is much more efficient than calling {@code
 887    * newOutput()} and wrapping that in a {@code CodedOutputStream}.
 888    *
 889    * <p>This is package-private because it's a somewhat confusing interface.
 890    * Users can call {@link Message#toByteString()} instead of calling this
 891    * directly.
 892    *
 893    * @param size The target byte size of the {@code ByteString}.  You must write
 894    *     exactly this many bytes before building the result.
 895    * @return the builder
 896    */
 897   static CodedBuilder newCodedBuilder(int size) {
 898     return new CodedBuilder(size);
 899   }
 900
 901   /** See {@link ByteString#newCodedBuilder(int)}. */
 902   static final class CodedBuilder {
 903     private final CodedOutputStream output;
 904     private final byte[] buffer;
 905
 906     private CodedBuilder(int size) {
 907       buffer = new byte[size];
 908       output = CodedOutputStream.newInstance(buffer);
 909     }
 910
 911     public ByteString build() {
 912       output.checkNoSpaceLeft();
 913
 914       // We can be confident that the CodedOutputStream will not modify the
 915       // underlying bytes anymore because it already wrote all of them.  So,
 916       // no need to make a copy.
 917       return new LiteralByteString(buffer);
 918     }
 919
 920     public CodedOutputStream getCodedOutput() {
 921       return output;
 922     }
 923   }
 924
 925   // =================================================================
 926   // Methods {@link RopeByteString} needs on instances, which aren't part of the
 927   // public API.
 928
 929   /**
 930    * Return the depth of the tree representing this {@code ByteString}, if any,
 931    * whose root is this node. If this is a leaf node, return 0.
 932    *
 933    * @return tree depth or zero
 934    */
 935   protected abstract int getTreeDepth();
 936
 937   /**
 938    * Return {@code true} if this ByteString is literal (a leaf node) or a
 939    * flat-enough tree in the sense of {@link RopeByteString}.
 940    *
 941    * @return true if the tree is flat enough
 942    */
 943   protected abstract boolean isBalanced();
 944
 945   /**
 946    * Return the cached hash code if available.
 947    *
 948    * @return value of cached hash code or 0 if not computed yet
 949    */
 950   protected abstract int peekCachedHashCode();
 951
 952   /**
 953    * Compute the hash across the value bytes starting with the given hash, and
 954    * return the result.  This is used to compute the hash across strings
 955    * represented as a set of pieces by allowing the hash computation to be
 956    * continued from piece to piece.
 957    *
 958    * @param h starting hash value
 959    * @param offset offset into this value to start looking at data values
 960    * @param length number of data values to include in the hash computation
 961    * @return ending hash value
 962    */
 963   protected abstract int partialHash(int h, int offset, int length);
 964
 965   @Override
 966   public String toString() {
 967     return String.format("<ByteString@%s size=%d>",
 968         Integer.toHexString(System.identityHashCode(this)), size());
 969   }
 970 }