src/third_party/protobuf/java/src/main/java/com/google/protobuf/TextFormat.java

   1 // Protocol Buffers - Google's data interchange format
   2 // Copyright 2008 Google Inc.  All rights reserved.
   3 // http://code.google.com/p/protobuf/
   4 //
   5 // Redistribution and use in source and binary forms, with or without
   6 // modification, are permitted provided that the following conditions are
   7 // met:
   8 //
   9 //     * Redistributions of source code must retain the above copyright
  10 // notice, this list of conditions and the following disclaimer.
  11 //     * Redistributions in binary form must reproduce the above
  12 // copyright notice, this list of conditions and the following disclaimer
  13 // in the documentation and/or other materials provided with the
  14 // distribution.
  15 //     * Neither the name of Google Inc. nor the names of its
  16 // contributors may be used to endorse or promote products derived from
  17 // this software without specific prior written permission.
  18 //
  19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30
  31 package com.google.protobuf;
  32
  33 import com.google.protobuf.Descriptors.Descriptor;
  34 import com.google.protobuf.Descriptors.FieldDescriptor;
  35 import com.google.protobuf.Descriptors.EnumDescriptor;
  36 import com.google.protobuf.Descriptors.EnumValueDescriptor;
  37
  38 import java.io.IOException;
  39 import java.nio.CharBuffer;
  40 import java.math.BigInteger;
  41 import java.util.ArrayList;
  42 import java.util.List;
  43 import java.util.Locale;
  44 import java.util.Map;
  45 import java.util.regex.Matcher;
  46 import java.util.regex.Pattern;
  47
  48 /**
  49  * Provide text parsing and formatting support for proto2 instances.
  50  * The implementation largely follows google/protobuf/text_format.cc.
  51  *
  52  * @author wenboz@google.com Wenbo Zhu
  53  * @author kenton@google.com Kenton Varda
  54  */
  55 public final class TextFormat {
  56   private TextFormat() {}
  57
  58   private static final Printer DEFAULT_PRINTER = new Printer();
  59   private static final Printer SINGLE_LINE_PRINTER =
  60       (new Printer()).setSingleLineMode(true);
  61   private static final Printer UNICODE_PRINTER =
  62       (new Printer()).setEscapeNonAscii(false);
  63
  64   /**
  65    * Outputs a textual representation of the Protocol Message supplied into
  66    * the parameter output. (This representation is the new version of the
  67    * classic "ProtocolPrinter" output from the original Protocol Buffer system)
  68    */
  69   public static void print(final MessageOrBuilder message, final Appendable output)
  70                            throws IOException {
  71     DEFAULT_PRINTER.print(message, new TextGenerator(output));
  72   }
  73
  74   /** Outputs a textual representation of {@code fields} to {@code output}. */
  75   public static void print(final UnknownFieldSet fields,
  76                            final Appendable output)
  77                            throws IOException {
  78     DEFAULT_PRINTER.printUnknownFields(fields, new TextGenerator(output));
  79   }
  80
  81   /**
  82    * Generates a human readable form of this message, useful for debugging and
  83    * other purposes, with no newline characters.
  84    */
  85   public static String shortDebugString(final MessageOrBuilder message) {
  86     try {
  87       final StringBuilder sb = new StringBuilder();
  88       SINGLE_LINE_PRINTER.print(message, new TextGenerator(sb));
  89       // Single line mode currently might have an extra space at the end.
  90       return sb.toString().trim();
  91     } catch (IOException e) {
  92       throw new IllegalStateException(e);
  93     }
  94   }
  95
  96   /**
  97    * Generates a human readable form of the unknown fields, useful for debugging
  98    * and other purposes, with no newline characters.
  99    */
 100   public static String shortDebugString(final UnknownFieldSet fields) {
 101     try {
 102       final StringBuilder sb = new StringBuilder();
 103       SINGLE_LINE_PRINTER.printUnknownFields(fields, new TextGenerator(sb));
 104       // Single line mode currently might have an extra space at the end.
 105       return sb.toString().trim();
 106     } catch (IOException e) {
 107       throw new IllegalStateException(e);
 108     }
 109   }
 110
 111   /**
 112    * Like {@code print()}, but writes directly to a {@code String} and
 113    * returns it.
 114    */
 115   public static String printToString(final MessageOrBuilder message) {
 116     try {
 117       final StringBuilder text = new StringBuilder();
 118       print(message, text);
 119       return text.toString();
 120     } catch (IOException e) {
 121       throw new IllegalStateException(e);
 122     }
 123   }
 124
 125   /**
 126    * Like {@code print()}, but writes directly to a {@code String} and
 127    * returns it.
 128    */
 129   public static String printToString(final UnknownFieldSet fields) {
 130     try {
 131       final StringBuilder text = new StringBuilder();
 132       print(fields, text);
 133       return text.toString();
 134     } catch (IOException e) {
 135       throw new IllegalStateException(e);
 136     }
 137   }
 138
 139   /**
 140    * Same as {@code printToString()}, except that non-ASCII characters
 141    * in string type fields are not escaped in backslash+octals.
 142    */
 143   public static String printToUnicodeString(final MessageOrBuilder message) {
 144     try {
 145       final StringBuilder text = new StringBuilder();
 146       UNICODE_PRINTER.print(message, new TextGenerator(text));
 147       return text.toString();
 148     } catch (IOException e) {
 149       throw new IllegalStateException(e);
 150     }
 151   }
 152
 153   /**
 154    * Same as {@code printToString()}, except that non-ASCII characters
 155    * in string type fields are not escaped in backslash+octals.
 156    */
 157   public static String printToUnicodeString(final UnknownFieldSet fields) {
 158     try {
 159       final StringBuilder text = new StringBuilder();
 160       UNICODE_PRINTER.printUnknownFields(fields, new TextGenerator(text));
 161       return text.toString();
 162     } catch (IOException e) {
 163       throw new IllegalStateException(e);
 164     }
 165   }
 166
 167   public static void printField(final FieldDescriptor field,
 168                                 final Object value,
 169                                 final Appendable output)
 170                                 throws IOException {
 171     DEFAULT_PRINTER.printField(field, value, new TextGenerator(output));
 172   }
 173
 174   public static String printFieldToString(final FieldDescriptor field,
 175                                           final Object value) {
 176     try {
 177       final StringBuilder text = new StringBuilder();
 178       printField(field, value, text);
 179       return text.toString();
 180     } catch (IOException e) {
 181       throw new IllegalStateException(e);
 182     }
 183   }
 184
 185   /**
 186    * Outputs a textual representation of the value of given field value.
 187    *
 188    * @param field the descriptor of the field
 189    * @param value the value of the field
 190    * @param output the output to which to append the formatted value
 191    * @throws ClassCastException if the value is not appropriate for the
 192    *     given field descriptor
 193    * @throws IOException if there is an exception writing to the output
 194    */
 195   public static void printFieldValue(final FieldDescriptor field,
 196                                      final Object value,
 197                                      final Appendable output)
 198                                      throws IOException {
 199     DEFAULT_PRINTER.printFieldValue(field, value, new TextGenerator(output));
 200   }
 201
 202   /**
 203    * Outputs a textual representation of the value of an unknown field.
 204    *
 205    * @param tag the field's tag number
 206    * @param value the value of the field
 207    * @param output the output to which to append the formatted value
 208    * @throws ClassCastException if the value is not appropriate for the
 209    *     given field descriptor
 210    * @throws IOException if there is an exception writing to the output
 211    */
 212   public static void printUnknownFieldValue(final int tag,
 213                                             final Object value,
 214                                             final Appendable output)
 215                                             throws IOException {
 216     printUnknownFieldValue(tag, value, new TextGenerator(output));
 217   }
 218
 219   private static void printUnknownFieldValue(final int tag,
 220                                              final Object value,
 221                                              final TextGenerator generator)
 222                                              throws IOException {
 223     switch (WireFormat.getTagWireType(tag)) {
 224       case WireFormat.WIRETYPE_VARINT:
 225         generator.print(unsignedToString((Long) value));
 226         break;
 227       case WireFormat.WIRETYPE_FIXED32:
 228         generator.print(
 229             String.format((Locale) null, "0x%08x", (Integer) value));
 230         break;
 231       case WireFormat.WIRETYPE_FIXED64:
 232         generator.print(String.format((Locale) null, "0x%016x", (Long) value));
 233         break;
 234       case WireFormat.WIRETYPE_LENGTH_DELIMITED:
 235         generator.print("\"");
 236         generator.print(escapeBytes((ByteString) value));
 237         generator.print("\"");
 238         break;
 239       case WireFormat.WIRETYPE_START_GROUP:
 240         DEFAULT_PRINTER.printUnknownFields((UnknownFieldSet) value, generator);
 241         break;
 242       default:
 243         throw new IllegalArgumentException("Bad tag: " + tag);
 244     }
 245   }
 246
 247   /** Helper class for converting protobufs to text. */
 248   private static final class Printer {
 249     /** Whether to omit newlines from the output. */
 250     boolean singleLineMode = false;
 251
 252     /** Whether to escape non ASCII characters with backslash and octal. */
 253     boolean escapeNonAscii = true;
 254
 255     private Printer() {}
 256
 257     /** Setter of singleLineMode */
 258     private Printer setSingleLineMode(boolean singleLineMode) {
 259       this.singleLineMode = singleLineMode;
 260       return this;
 261     }
 262
 263     /** Setter of escapeNonAscii */
 264     private Printer setEscapeNonAscii(boolean escapeNonAscii) {
 265       this.escapeNonAscii = escapeNonAscii;
 266       return this;
 267     }
 268
 269     private void print(final MessageOrBuilder message, final TextGenerator generator)
 270         throws IOException {
 271       for (Map.Entry<FieldDescriptor, Object> field
 272           : message.getAllFields().entrySet()) {
 273         printField(field.getKey(), field.getValue(), generator);
 274       }
 275       printUnknownFields(message.getUnknownFields(), generator);
 276     }
 277
 278     private void printField(final FieldDescriptor field, final Object value,
 279         final TextGenerator generator) throws IOException {
 280       if (field.isRepeated()) {
 281         // Repeated field.  Print each element.
 282         for (Object element : (List<?>) value) {
 283           printSingleField(field, element, generator);
 284         }
 285       } else {
 286         printSingleField(field, value, generator);
 287       }
 288     }
 289
 290     private void printSingleField(final FieldDescriptor field,
 291                                   final Object value,
 292                                   final TextGenerator generator)
 293                                   throws IOException {
 294       if (field.isExtension()) {
 295         generator.print("[");
 296         // We special-case MessageSet elements for compatibility with proto1.
 297         if (field.getContainingType().getOptions().getMessageSetWireFormat()
 298             && (field.getType() == FieldDescriptor.Type.MESSAGE)
 299             && (field.isOptional())
 300             // object equality
 301             && (field.getExtensionScope() == field.getMessageType())) {
 302           generator.print(field.getMessageType().getFullName());
 303         } else {
 304           generator.print(field.getFullName());
 305         }
 306         generator.print("]");
 307       } else {
 308         if (field.getType() == FieldDescriptor.Type.GROUP) {
 309           // Groups must be serialized with their original capitalization.
 310           generator.print(field.getMessageType().getName());
 311         } else {
 312           generator.print(field.getName());
 313         }
 314       }
 315
 316       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
 317         if (singleLineMode) {
 318           generator.print(" { ");
 319         } else {
 320           generator.print(" {\n");
 321           generator.indent();
 322         }
 323       } else {
 324         generator.print(": ");
 325       }
 326
 327       printFieldValue(field, value, generator);
 328
 329       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
 330         if (singleLineMode) {
 331           generator.print("} ");
 332         } else {
 333           generator.outdent();
 334           generator.print("}\n");
 335         }
 336       } else {
 337         if (singleLineMode) {
 338           generator.print(" ");
 339         } else {
 340           generator.print("\n");
 341         }
 342       }
 343     }
 344
 345     private void printFieldValue(final FieldDescriptor field,
 346                                  final Object value,
 347                                  final TextGenerator generator)
 348                                  throws IOException {
 349       switch (field.getType()) {
 350         case INT32:
 351         case SINT32:
 352         case SFIXED32:
 353           generator.print(((Integer) value).toString());
 354           break;
 355
 356         case INT64:
 357         case SINT64:
 358         case SFIXED64:
 359           generator.print(((Long) value).toString());
 360           break;
 361
 362         case BOOL:
 363           generator.print(((Boolean) value).toString());
 364           break;
 365
 366         case FLOAT:
 367           generator.print(((Float) value).toString());
 368           break;
 369
 370         case DOUBLE:
 371           generator.print(((Double) value).toString());
 372           break;
 373
 374         case UINT32:
 375         case FIXED32:
 376           generator.print(unsignedToString((Integer) value));
 377           break;
 378
 379         case UINT64:
 380         case FIXED64:
 381           generator.print(unsignedToString((Long) value));
 382           break;
 383
 384         case STRING:
 385           generator.print("\"");
 386           generator.print(escapeNonAscii ?
 387               escapeText((String) value) :
 388               (String) value);
 389           generator.print("\"");
 390           break;
 391
 392         case BYTES:
 393           generator.print("\"");
 394           generator.print(escapeBytes((ByteString) value));
 395           generator.print("\"");
 396           break;
 397
 398         case ENUM:
 399           generator.print(((EnumValueDescriptor) value).getName());
 400           break;
 401
 402         case MESSAGE:
 403         case GROUP:
 404           print((Message) value, generator);
 405           break;
 406       }
 407     }
 408
 409     private void printUnknownFields(final UnknownFieldSet unknownFields,
 410                                     final TextGenerator generator)
 411                                     throws IOException {
 412       for (Map.Entry<Integer, UnknownFieldSet.Field> entry :
 413                unknownFields.asMap().entrySet()) {
 414         final int number = entry.getKey();
 415         final UnknownFieldSet.Field field = entry.getValue();
 416         printUnknownField(number, WireFormat.WIRETYPE_VARINT,
 417             field.getVarintList(), generator);
 418         printUnknownField(number, WireFormat.WIRETYPE_FIXED32,
 419             field.getFixed32List(), generator);
 420         printUnknownField(number, WireFormat.WIRETYPE_FIXED64,
 421             field.getFixed64List(), generator);
 422         printUnknownField(number, WireFormat.WIRETYPE_LENGTH_DELIMITED,
 423             field.getLengthDelimitedList(), generator);
 424         for (final UnknownFieldSet value : field.getGroupList()) {
 425           generator.print(entry.getKey().toString());
 426           if (singleLineMode) {
 427             generator.print(" { ");
 428           } else {
 429             generator.print(" {\n");
 430             generator.indent();
 431           }
 432           printUnknownFields(value, generator);
 433           if (singleLineMode) {
 434             generator.print("} ");
 435           } else {
 436             generator.outdent();
 437             generator.print("}\n");
 438           }
 439         }
 440       }
 441     }
 442
 443     private void printUnknownField(final int number,
 444                                    final int wireType,
 445                                    final List<?> values,
 446                                    final TextGenerator generator)
 447                                    throws IOException {
 448       for (final Object value : values) {
 449         generator.print(String.valueOf(number));
 450         generator.print(": ");
 451         printUnknownFieldValue(wireType, value, generator);
 452         generator.print(singleLineMode ? " " : "\n");
 453       }
 454     }
 455   }
 456
 457   /** Convert an unsigned 32-bit integer to a string. */
 458   private static String unsignedToString(final int value) {
 459     if (value >= 0) {
 460       return Integer.toString(value);
 461     } else {
 462       return Long.toString(((long) value) & 0x00000000FFFFFFFFL);
 463     }
 464   }
 465
 466   /** Convert an unsigned 64-bit integer to a string. */
 467   private static String unsignedToString(final long value) {
 468     if (value >= 0) {
 469       return Long.toString(value);
 470     } else {
 471       // Pull off the most-significant bit so that BigInteger doesn't think
 472       // the number is negative, then set it again using setBit().
 473       return BigInteger.valueOf(value & 0x7FFFFFFFFFFFFFFFL)
 474                        .setBit(63).toString();
 475     }
 476   }
 477
 478   /**
 479    * An inner class for writing text to the output stream.
 480    */
 481   private static final class TextGenerator {
 482     private final Appendable output;
 483     private final StringBuilder indent = new StringBuilder();
 484     private boolean atStartOfLine = true;
 485
 486     private TextGenerator(final Appendable output) {
 487       this.output = output;
 488     }
 489
 490     /**
 491      * Indent text by two spaces.  After calling Indent(), two spaces will be
 492      * inserted at the beginning of each line of text.  Indent() may be called
 493      * multiple times to produce deeper indents.
 494      */
 495     public void indent() {
 496       indent.append("  ");
 497     }
 498
 499     /**
 500      * Reduces the current indent level by two spaces, or crashes if the indent
 501      * level is zero.
 502      */
 503     public void outdent() {
 504       final int length = indent.length();
 505       if (length == 0) {
 506         throw new IllegalArgumentException(
 507             " Outdent() without matching Indent().");
 508       }
 509       indent.delete(length - 2, length);
 510     }
 511
 512     /**
 513      * Print text to the output stream.
 514      */
 515     public void print(final CharSequence text) throws IOException {
 516       final int size = text.length();
 517       int pos = 0;
 518
 519       for (int i = 0; i < size; i++) {
 520         if (text.charAt(i) == '\n') {
 521           write(text.subSequence(pos, size), i - pos + 1);
 522           pos = i + 1;
 523           atStartOfLine = true;
 524         }
 525       }
 526       write(text.subSequence(pos, size), size - pos);
 527     }
 528
 529     private void write(final CharSequence data, final int size)
 530                        throws IOException {
 531       if (size == 0) {
 532         return;
 533       }
 534       if (atStartOfLine) {
 535         atStartOfLine = false;
 536         output.append(indent);
 537       }
 538       output.append(data);
 539     }
 540   }
 541
 542   // =================================================================
 543   // Parsing
 544
 545   /**
 546    * Represents a stream of tokens parsed from a {@code String}.
 547    *
 548    * <p>The Java standard library provides many classes that you might think
 549    * would be useful for implementing this, but aren't.  For example:
 550    *
 551    * <ul>
 552    * <li>{@code java.io.StreamTokenizer}:  This almost does what we want -- or,
 553    *   at least, something that would get us close to what we want -- except
 554    *   for one fatal flaw:  It automatically un-escapes strings using Java
 555    *   escape sequences, which do not include all the escape sequences we
 556    *   need to support (e.g. '\x').
 557    * <li>{@code java.util.Scanner}:  This seems like a great way at least to
 558    *   parse regular expressions out of a stream (so we wouldn't have to load
 559    *   the entire input into a single string before parsing).  Sadly,
 560    *   {@code Scanner} requires that tokens be delimited with some delimiter.
 561    *   Thus, although the text "foo:" should parse to two tokens ("foo" and
 562    *   ":"), {@code Scanner} would recognize it only as a single token.
 563    *   Furthermore, {@code Scanner} provides no way to inspect the contents
 564    *   of delimiters, making it impossible to keep track of line and column
 565    *   numbers.
 566    * </ul>
 567    *
 568    * <p>Luckily, Java's regular expression support does manage to be useful to
 569    * us.  (Barely:  We need {@code Matcher.usePattern()}, which is new in
 570    * Java 1.5.)  So, we can use that, at least.  Unfortunately, this implies
 571    * that we need to have the entire input in one contiguous string.
 572    */
 573   private static final class Tokenizer {
 574     private final CharSequence text;
 575     private final Matcher matcher;
 576     private String currentToken;
 577
 578     // The character index within this.text at which the current token begins.
 579     private int pos = 0;
 580
 581     // The line and column numbers of the current token.
 582     private int line = 0;
 583     private int column = 0;
 584
 585     // The line and column numbers of the previous token (allows throwing
 586     // errors *after* consuming).
 587     private int previousLine = 0;
 588     private int previousColumn = 0;
 589
 590     // We use possessive quantifiers (*+ and ++) because otherwise the Java
 591     // regex matcher has stack overflows on large inputs.
 592     private static final Pattern WHITESPACE =
 593       Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE);
 594     private static final Pattern TOKEN = Pattern.compile(
 595       "[a-zA-Z_][0-9a-zA-Z_+-]*+|" +                // an identifier
 596       "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" +             // a number
 597       "\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" +       // a double-quoted string
 598       "\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)",         // a single-quoted string
 599       Pattern.MULTILINE);
 600
 601     private static final Pattern DOUBLE_INFINITY = Pattern.compile(
 602       "-?inf(inity)?",
 603       Pattern.CASE_INSENSITIVE);
 604     private static final Pattern FLOAT_INFINITY = Pattern.compile(
 605       "-?inf(inity)?f?",
 606       Pattern.CASE_INSENSITIVE);
 607     private static final Pattern FLOAT_NAN = Pattern.compile(
 608       "nanf?",
 609       Pattern.CASE_INSENSITIVE);
 610
 611     /** Construct a tokenizer that parses tokens from the given text. */
 612     private Tokenizer(final CharSequence text) {
 613       this.text = text;
 614       this.matcher = WHITESPACE.matcher(text);
 615       skipWhitespace();
 616       nextToken();
 617     }
 618
 619     /** Are we at the end of the input? */
 620     public boolean atEnd() {
 621       return currentToken.length() == 0;
 622     }
 623
 624     /** Advance to the next token. */
 625     public void nextToken() {
 626       previousLine = line;
 627       previousColumn = column;
 628
 629       // Advance the line counter to the current position.
 630       while (pos < matcher.regionStart()) {
 631         if (text.charAt(pos) == '\n') {
 632           ++line;
 633           column = 0;
 634         } else {
 635           ++column;
 636         }
 637         ++pos;
 638       }
 639
 640       // Match the next token.
 641       if (matcher.regionStart() == matcher.regionEnd()) {
 642         // EOF
 643         currentToken = "";
 644       } else {
 645         matcher.usePattern(TOKEN);
 646         if (matcher.lookingAt()) {
 647           currentToken = matcher.group();
 648           matcher.region(matcher.end(), matcher.regionEnd());
 649         } else {
 650           // Take one character.
 651           currentToken = String.valueOf(text.charAt(pos));
 652           matcher.region(pos + 1, matcher.regionEnd());
 653         }
 654
 655         skipWhitespace();
 656       }
 657     }
 658
 659     /**
 660      * Skip over any whitespace so that the matcher region starts at the next
 661      * token.
 662      */
 663     private void skipWhitespace() {
 664       matcher.usePattern(WHITESPACE);
 665       if (matcher.lookingAt()) {
 666         matcher.region(matcher.end(), matcher.regionEnd());
 667       }
 668     }
 669
 670     /**
 671      * If the next token exactly matches {@code token}, consume it and return
 672      * {@code true}.  Otherwise, return {@code false} without doing anything.
 673      */
 674     public boolean tryConsume(final String token) {
 675       if (currentToken.equals(token)) {
 676         nextToken();
 677         return true;
 678       } else {
 679         return false;
 680       }
 681     }
 682
 683     /**
 684      * If the next token exactly matches {@code token}, consume it.  Otherwise,
 685      * throw a {@link ParseException}.
 686      */
 687     public void consume(final String token) throws ParseException {
 688       if (!tryConsume(token)) {
 689         throw parseException("Expected \"" + token + "\".");
 690       }
 691     }
 692
 693     /**
 694      * Returns {@code true} if the next token is an integer, but does
 695      * not consume it.
 696      */
 697     public boolean lookingAtInteger() {
 698       if (currentToken.length() == 0) {
 699         return false;
 700       }
 701
 702       final char c = currentToken.charAt(0);
 703       return ('0' <= c && c <= '9') ||
 704              c == '-' || c == '+';
 705     }
 706
 707     /**
 708      * If the next token is an identifier, consume it and return its value.
 709      * Otherwise, throw a {@link ParseException}.
 710      */
 711     public String consumeIdentifier() throws ParseException {
 712       for (int i = 0; i < currentToken.length(); i++) {
 713         final char c = currentToken.charAt(i);
 714         if (('a' <= c && c <= 'z') ||
 715             ('A' <= c && c <= 'Z') ||
 716             ('0' <= c && c <= '9') ||
 717             (c == '_') || (c == '.')) {
 718           // OK
 719         } else {
 720           throw parseException("Expected identifier.");
 721         }
 722       }
 723
 724       final String result = currentToken;
 725       nextToken();
 726       return result;
 727     }
 728
 729     /**
 730      * If the next token is a 32-bit signed integer, consume it and return its
 731      * value.  Otherwise, throw a {@link ParseException}.
 732      */
 733     public int consumeInt32() throws ParseException {
 734       try {
 735         final int result = parseInt32(currentToken);
 736         nextToken();
 737         return result;
 738       } catch (NumberFormatException e) {
 739         throw integerParseException(e);
 740       }
 741     }
 742
 743     /**
 744      * If the next token is a 32-bit unsigned integer, consume it and return its
 745      * value.  Otherwise, throw a {@link ParseException}.
 746      */
 747     public int consumeUInt32() throws ParseException {
 748       try {
 749         final int result = parseUInt32(currentToken);
 750         nextToken();
 751         return result;
 752       } catch (NumberFormatException e) {
 753         throw integerParseException(e);
 754       }
 755     }
 756
 757     /**
 758      * If the next token is a 64-bit signed integer, consume it and return its
 759      * value.  Otherwise, throw a {@link ParseException}.
 760      */
 761     public long consumeInt64() throws ParseException {
 762       try {
 763         final long result = parseInt64(currentToken);
 764         nextToken();
 765         return result;
 766       } catch (NumberFormatException e) {
 767         throw integerParseException(e);
 768       }
 769     }
 770
 771     /**
 772      * If the next token is a 64-bit unsigned integer, consume it and return its
 773      * value.  Otherwise, throw a {@link ParseException}.
 774      */
 775     public long consumeUInt64() throws ParseException {
 776       try {
 777         final long result = parseUInt64(currentToken);
 778         nextToken();
 779         return result;
 780       } catch (NumberFormatException e) {
 781         throw integerParseException(e);
 782       }
 783     }
 784
 785     /**
 786      * If the next token is a double, consume it and return its value.
 787      * Otherwise, throw a {@link ParseException}.
 788      */
 789     public double consumeDouble() throws ParseException {
 790       // We need to parse infinity and nan separately because
 791       // Double.parseDouble() does not accept "inf", "infinity", or "nan".
 792       if (DOUBLE_INFINITY.matcher(currentToken).matches()) {
 793         final boolean negative = currentToken.startsWith("-");
 794         nextToken();
 795         return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
 796       }
 797       if (currentToken.equalsIgnoreCase("nan")) {
 798         nextToken();
 799         return Double.NaN;
 800       }
 801       try {
 802         final double result = Double.parseDouble(currentToken);
 803         nextToken();
 804         return result;
 805       } catch (NumberFormatException e) {
 806         throw floatParseException(e);
 807       }
 808     }
 809
 810     /**
 811      * If the next token is a float, consume it and return its value.
 812      * Otherwise, throw a {@link ParseException}.
 813      */
 814     public float consumeFloat() throws ParseException {
 815       // We need to parse infinity and nan separately because
 816       // Float.parseFloat() does not accept "inf", "infinity", or "nan".
 817       if (FLOAT_INFINITY.matcher(currentToken).matches()) {
 818         final boolean negative = currentToken.startsWith("-");
 819         nextToken();
 820         return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY;
 821       }
 822       if (FLOAT_NAN.matcher(currentToken).matches()) {
 823         nextToken();
 824         return Float.NaN;
 825       }
 826       try {
 827         final float result = Float.parseFloat(currentToken);
 828         nextToken();
 829         return result;
 830       } catch (NumberFormatException e) {
 831         throw floatParseException(e);
 832       }
 833     }
 834
 835     /**
 836      * If the next token is a boolean, consume it and return its value.
 837      * Otherwise, throw a {@link ParseException}.
 838      */
 839     public boolean consumeBoolean() throws ParseException {
 840       if (currentToken.equals("true") ||
 841           currentToken.equals("t") ||
 842           currentToken.equals("1")) {
 843         nextToken();
 844         return true;
 845       } else if (currentToken.equals("false") ||
 846                  currentToken.equals("f") ||
 847                  currentToken.equals("0")) {
 848         nextToken();
 849         return false;
 850       } else {
 851         throw parseException("Expected \"true\" or \"false\".");
 852       }
 853     }
 854
 855     /**
 856      * If the next token is a string, consume it and return its (unescaped)
 857      * value.  Otherwise, throw a {@link ParseException}.
 858      */
 859     public String consumeString() throws ParseException {
 860       return consumeByteString().toStringUtf8();
 861     }
 862
 863     /**
 864      * If the next token is a string, consume it, unescape it as a
 865      * {@link ByteString}, and return it.  Otherwise, throw a
 866      * {@link ParseException}.
 867      */
 868     public ByteString consumeByteString() throws ParseException {
 869       List<ByteString> list = new ArrayList<ByteString>();
 870       consumeByteString(list);
 871       while (currentToken.startsWith("'") || currentToken.startsWith("\"")) {
 872         consumeByteString(list);
 873       }
 874       return ByteString.copyFrom(list);
 875     }
 876
 877     /**
 878      * Like {@link #consumeByteString()} but adds each token of the string to
 879      * the given list.  String literals (whether bytes or text) may come in
 880      * multiple adjacent tokens which are automatically concatenated, like in
 881      * C or Python.
 882      */
 883     private void consumeByteString(List<ByteString> list) throws ParseException {
 884       final char quote = currentToken.length() > 0 ? currentToken.charAt(0)
 885                                                    : '\0';
 886       if (quote != '\"' && quote != '\'') {
 887         throw parseException("Expected string.");
 888       }
 889
 890       if (currentToken.length() < 2 ||
 891           currentToken.charAt(currentToken.length() - 1) != quote) {
 892         throw parseException("String missing ending quote.");
 893       }
 894
 895       try {
 896         final String escaped =
 897             currentToken.substring(1, currentToken.length() - 1);
 898         final ByteString result = unescapeBytes(escaped);
 899         nextToken();
 900         list.add(result);
 901       } catch (InvalidEscapeSequenceException e) {
 902         throw parseException(e.getMessage());
 903       }
 904     }
 905
 906     /**
 907      * Returns a {@link ParseException} with the current line and column
 908      * numbers in the description, suitable for throwing.
 909      */
 910     public ParseException parseException(final String description) {
 911       // Note:  People generally prefer one-based line and column numbers.
 912       return new ParseException(
 913         line + 1, column + 1, description);
 914     }
 915
 916     /**
 917      * Returns a {@link ParseException} with the line and column numbers of
 918      * the previous token in the description, suitable for throwing.
 919      */
 920     public ParseException parseExceptionPreviousToken(
 921         final String description) {
 922       // Note:  People generally prefer one-based line and column numbers.
 923       return new ParseException(
 924         previousLine + 1, previousColumn + 1, description);
 925     }
 926
 927     /**
 928      * Constructs an appropriate {@link ParseException} for the given
 929      * {@code NumberFormatException} when trying to parse an integer.
 930      */
 931     private ParseException integerParseException(
 932         final NumberFormatException e) {
 933       return parseException("Couldn't parse integer: " + e.getMessage());
 934     }
 935
 936     /**
 937      * Constructs an appropriate {@link ParseException} for the given
 938      * {@code NumberFormatException} when trying to parse a float or double.
 939      */
 940     private ParseException floatParseException(final NumberFormatException e) {
 941       return parseException("Couldn't parse number: " + e.getMessage());
 942     }
 943   }
 944
 945   /** Thrown when parsing an invalid text format message. */
 946   public static class ParseException extends IOException {
 947     private static final long serialVersionUID = 3196188060225107702L;
 948
 949     private final int line;
 950     private final int column;
 951
 952     /** Create a new instance, with -1 as the line and column numbers. */
 953     public ParseException(final String message) {
 954       this(-1, -1, message);
 955     }
 956
 957     /**
 958      * Create a new instance
 959      *
 960      * @param line the line number where the parse error occurred,
 961      * using 1-offset.
 962      * @param column the column number where the parser error occurred,
 963      * using 1-offset.
 964      */
 965     public ParseException(final int line, final int column,
 966         final String message) {
 967       super(Integer.toString(line) + ":" + column + ": " + message);
 968       this.line = line;
 969       this.column = column;
 970     }
 971
 972     /**
 973      * Return the line where the parse exception occurred, or -1 when
 974      * none is provided. The value is specified as 1-offset, so the first
 975      * line is line 1.
 976      */
 977     public int getLine() {
 978       return line;
 979     }
 980
 981     /**
 982      * Return the column where the parse exception occurred, or -1 when
 983      * none is provided. The value is specified as 1-offset, so the first
 984      * line is line 1.
 985      */
 986     public int getColumn() {
 987       return column;
 988     }
 989   }
 990
 991   /**
 992    * Parse a text-format message from {@code input} and merge the contents
 993    * into {@code builder}.
 994    */
 995   public static void merge(final Readable input,
 996                            final Message.Builder builder)
 997                            throws IOException {
 998     merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
 999   }
1000
1001   /**
1002    * Parse a text-format message from {@code input} and merge the contents
1003    * into {@code builder}.
1004    */
1005   public static void merge(final CharSequence input,
1006                            final Message.Builder builder)
1007                            throws ParseException {
1008     merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
1009   }
1010
1011   /**
1012    * Parse a text-format message from {@code input} and merge the contents
1013    * into {@code builder}.  Extensions will be recognized if they are
1014    * registered in {@code extensionRegistry}.
1015    */
1016   public static void merge(final Readable input,
1017                            final ExtensionRegistry extensionRegistry,
1018                            final Message.Builder builder)
1019                            throws IOException {
1020     // Read the entire input to a String then parse that.
1021
1022     // If StreamTokenizer were not quite so crippled, or if there were a kind
1023     // of Reader that could read in chunks that match some particular regex,
1024     // or if we wanted to write a custom Reader to tokenize our stream, then
1025     // we would not have to read to one big String.  Alas, none of these is
1026     // the case.  Oh well.
1027
1028     merge(toStringBuilder(input), extensionRegistry, builder);
1029   }
1030
1031   private static final int BUFFER_SIZE = 4096;
1032
1033   // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer)
1034   // overhead is worthwhile
1035   private static StringBuilder toStringBuilder(final Readable input)
1036       throws IOException {
1037     final StringBuilder text = new StringBuilder();
1038     final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE);
1039     while (true) {
1040       final int n = input.read(buffer);
1041       if (n == -1) {
1042         break;
1043       }
1044       buffer.flip();
1045       text.append(buffer, 0, n);
1046     }
1047     return text;
1048   }
1049
1050   /**
1051    * Parse a text-format message from {@code input} and merge the contents
1052    * into {@code builder}.  Extensions will be recognized if they are
1053    * registered in {@code extensionRegistry}.
1054    */
1055   public static void merge(final CharSequence input,
1056                            final ExtensionRegistry extensionRegistry,
1057                            final Message.Builder builder)
1058                            throws ParseException {
1059     final Tokenizer tokenizer = new Tokenizer(input);
1060
1061     while (!tokenizer.atEnd()) {
1062       mergeField(tokenizer, extensionRegistry, builder);
1063     }
1064   }
1065
1066   /**
1067    * Parse a single field from {@code tokenizer} and merge it into
1068    * {@code builder}.
1069    */
1070   private static void mergeField(final Tokenizer tokenizer,
1071                                  final ExtensionRegistry extensionRegistry,
1072                                  final Message.Builder builder)
1073                                  throws ParseException {
1074     FieldDescriptor field;
1075     final Descriptor type = builder.getDescriptorForType();
1076     ExtensionRegistry.ExtensionInfo extension = null;
1077
1078     if (tokenizer.tryConsume("[")) {
1079       // An extension.
1080       final StringBuilder name =
1081           new StringBuilder(tokenizer.consumeIdentifier());
1082       while (tokenizer.tryConsume(".")) {
1083         name.append('.');
1084         name.append(tokenizer.consumeIdentifier());
1085       }
1086
1087       extension = extensionRegistry.findExtensionByName(name.toString());
1088
1089       if (extension == null) {
1090         throw tokenizer.parseExceptionPreviousToken(
1091           "Extension \"" + name + "\" not found in the ExtensionRegistry.");
1092       } else if (extension.descriptor.getContainingType() != type) {
1093         throw tokenizer.parseExceptionPreviousToken(
1094           "Extension \"" + name + "\" does not extend message type \"" +
1095           type.getFullName() + "\".");
1096       }
1097
1098       tokenizer.consume("]");
1099
1100       field = extension.descriptor;
1101     } else {
1102       final String name = tokenizer.consumeIdentifier();
1103       field = type.findFieldByName(name);
1104
1105       // Group names are expected to be capitalized as they appear in the
1106       // .proto file, which actually matches their type names, not their field
1107       // names.
1108       if (field == null) {
1109         // Explicitly specify US locale so that this code does not break when
1110         // executing in Turkey.
1111         final String lowerName = name.toLowerCase(Locale.US);
1112         field = type.findFieldByName(lowerName);
1113         // If the case-insensitive match worked but the field is NOT a group,
1114         if (field != null && field.getType() != FieldDescriptor.Type.GROUP) {
1115           field = null;
1116         }
1117       }
1118       // Again, special-case group names as described above.
1119       if (field != null && field.getType() == FieldDescriptor.Type.GROUP &&
1120           !field.getMessageType().getName().equals(name)) {
1121         field = null;
1122       }
1123
1124       if (field == null) {
1125         throw tokenizer.parseExceptionPreviousToken(
1126           "Message type \"" + type.getFullName() +
1127           "\" has no field named \"" + name + "\".");
1128       }
1129     }
1130
1131     Object value = null;
1132
1133     if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
1134       tokenizer.tryConsume(":");  // optional
1135
1136       final String endToken;
1137       if (tokenizer.tryConsume("<")) {
1138         endToken = ">";
1139       } else {
1140         tokenizer.consume("{");
1141         endToken = "}";
1142       }
1143
1144       final Message.Builder subBuilder;
1145       if (extension == null) {
1146         subBuilder = builder.newBuilderForField(field);
1147       } else {
1148         subBuilder = extension.defaultInstance.newBuilderForType();
1149       }
1150
1151       while (!tokenizer.tryConsume(endToken)) {
1152         if (tokenizer.atEnd()) {
1153           throw tokenizer.parseException(
1154             "Expected \"" + endToken + "\".");
1155         }
1156         mergeField(tokenizer, extensionRegistry, subBuilder);
1157       }
1158
1159       value = subBuilder.buildPartial();
1160
1161     } else {
1162       tokenizer.consume(":");
1163
1164       switch (field.getType()) {
1165         case INT32:
1166         case SINT32:
1167         case SFIXED32:
1168           value = tokenizer.consumeInt32();
1169           break;
1170
1171         case INT64:
1172         case SINT64:
1173         case SFIXED64:
1174           value = tokenizer.consumeInt64();
1175           break;
1176
1177         case UINT32:
1178         case FIXED32:
1179           value = tokenizer.consumeUInt32();
1180           break;
1181
1182         case UINT64:
1183         case FIXED64:
1184           value = tokenizer.consumeUInt64();
1185           break;
1186
1187         case FLOAT:
1188           value = tokenizer.consumeFloat();
1189           break;
1190
1191         case DOUBLE:
1192           value = tokenizer.consumeDouble();
1193           break;
1194
1195         case BOOL:
1196           value = tokenizer.consumeBoolean();
1197           break;
1198
1199         case STRING:
1200           value = tokenizer.consumeString();
1201           break;
1202
1203         case BYTES:
1204           value = tokenizer.consumeByteString();
1205           break;
1206
1207         case ENUM:
1208           final EnumDescriptor enumType = field.getEnumType();
1209
1210           if (tokenizer.lookingAtInteger()) {
1211             final int number = tokenizer.consumeInt32();
1212             value = enumType.findValueByNumber(number);
1213             if (value == null) {
1214               throw tokenizer.parseExceptionPreviousToken(
1215                 "Enum type \"" + enumType.getFullName() +
1216                 "\" has no value with number " + number + '.');
1217             }
1218           } else {
1219             final String id = tokenizer.consumeIdentifier();
1220             value = enumType.findValueByName(id);
1221             if (value == null) {
1222               throw tokenizer.parseExceptionPreviousToken(
1223                 "Enum type \"" + enumType.getFullName() +
1224                 "\" has no value named \"" + id + "\".");
1225             }
1226           }
1227
1228           break;
1229
1230         case MESSAGE:
1231         case GROUP:
1232           throw new RuntimeException("Can't get here.");
1233       }
1234     }
1235
1236     if (field.isRepeated()) {
1237       builder.addRepeatedField(field, value);
1238     } else {
1239       builder.setField(field, value);
1240     }
1241   }
1242
1243   // =================================================================
1244   // Utility functions
1245   //
1246   // Some of these methods are package-private because Descriptors.java uses
1247   // them.
1248
1249   /**
1250    * Escapes bytes in the format used in protocol buffer text format, which
1251    * is the same as the format used for C string literals.  All bytes
1252    * that are not printable 7-bit ASCII characters are escaped, as well as
1253    * backslash, single-quote, and double-quote characters.  Characters for
1254    * which no defined short-hand escape sequence is defined will be escaped
1255    * using 3-digit octal sequences.
1256    */
1257   static String escapeBytes(final ByteString input) {
1258     final StringBuilder builder = new StringBuilder(input.size());
1259     for (int i = 0; i < input.size(); i++) {
1260       final byte b = input.byteAt(i);
1261       switch (b) {
1262         // Java does not recognize \a or \v, apparently.
1263         case 0x07: builder.append("\\a" ); break;
1264         case '\b': builder.append("\\b" ); break;
1265         case '\f': builder.append("\\f" ); break;
1266         case '\n': builder.append("\\n" ); break;
1267         case '\r': builder.append("\\r" ); break;
1268         case '\t': builder.append("\\t" ); break;
1269         case 0x0b: builder.append("\\v" ); break;
1270         case '\\': builder.append("\\\\"); break;
1271         case '\'': builder.append("\\\'"); break;
1272         case '"' : builder.append("\\\""); break;
1273         default:
1274           // Note:  Bytes with the high-order bit set should be escaped.  Since
1275           //   bytes are signed, such bytes will compare less than 0x20, hence
1276           //   the following line is correct.
1277           if (b >= 0x20) {
1278             builder.append((char) b);
1279           } else {
1280             builder.append('\\');
1281             builder.append((char) ('0' + ((b >>> 6) & 3)));
1282             builder.append((char) ('0' + ((b >>> 3) & 7)));
1283             builder.append((char) ('0' + (b & 7)));
1284           }
1285           break;
1286       }
1287     }
1288     return builder.toString();
1289   }
1290
1291   /**
1292    * Un-escape a byte sequence as escaped using
1293    * {@link #escapeBytes(ByteString)}.  Two-digit hex escapes (starting with
1294    * "\x") are also recognized.
1295    */
1296   static ByteString unescapeBytes(final CharSequence charString)
1297       throws InvalidEscapeSequenceException {
1298     // First convert the Java character sequence to UTF-8 bytes.
1299     ByteString input = ByteString.copyFromUtf8(charString.toString());
1300     // Then unescape certain byte sequences introduced by ASCII '\\'.  The valid
1301     // escapes can all be expressed with ASCII characters, so it is safe to
1302     // operate on bytes here.
1303     //
1304     // Unescaping the input byte array will result in a byte sequence that's no
1305     // longer than the input.  That's because each escape sequence is between
1306     // two and four bytes long and stands for a single byte.
1307     final byte[] result = new byte[input.size()];
1308     int pos = 0;
1309     for (int i = 0; i < input.size(); i++) {
1310       byte c = input.byteAt(i);
1311       if (c == '\\') {
1312         if (i + 1 < input.size()) {
1313           ++i;
1314           c = input.byteAt(i);
1315           if (isOctal(c)) {
1316             // Octal escape.
1317             int code = digitValue(c);
1318             if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
1319               ++i;
1320               code = code * 8 + digitValue(input.byteAt(i));
1321             }
1322             if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
1323               ++i;
1324               code = code * 8 + digitValue(input.byteAt(i));
1325             }
1326             // TODO: Check that 0 <= code && code <= 0xFF.
1327             result[pos++] = (byte)code;
1328           } else {
1329             switch (c) {
1330               case 'a' : result[pos++] = 0x07; break;
1331               case 'b' : result[pos++] = '\b'; break;
1332               case 'f' : result[pos++] = '\f'; break;
1333               case 'n' : result[pos++] = '\n'; break;
1334               case 'r' : result[pos++] = '\r'; break;
1335               case 't' : result[pos++] = '\t'; break;
1336               case 'v' : result[pos++] = 0x0b; break;
1337               case '\\': result[pos++] = '\\'; break;
1338               case '\'': result[pos++] = '\''; break;
1339               case '"' : result[pos++] = '\"'; break;
1340
1341               case 'x':
1342                 // hex escape
1343                 int code = 0;
1344                 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
1345                   ++i;
1346                   code = digitValue(input.byteAt(i));
1347                 } else {
1348                   throw new InvalidEscapeSequenceException(
1349                       "Invalid escape sequence: '\\x' with no digits");
1350                 }
1351                 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
1352                   ++i;
1353                   code = code * 16 + digitValue(input.byteAt(i));
1354                 }
1355                 result[pos++] = (byte)code;
1356                 break;
1357
1358               default:
1359                 throw new InvalidEscapeSequenceException(
1360                     "Invalid escape sequence: '\\" + (char)c + '\'');
1361             }
1362           }
1363         } else {
1364           throw new InvalidEscapeSequenceException(
1365               "Invalid escape sequence: '\\' at end of string.");
1366         }
1367       } else {
1368         result[pos++] = c;
1369       }
1370     }
1371
1372     return ByteString.copyFrom(result, 0, pos);
1373   }
1374
1375   /**
1376    * Thrown by {@link TextFormat#unescapeBytes} and
1377    * {@link TextFormat#unescapeText} when an invalid escape sequence is seen.
1378    */
1379   static class InvalidEscapeSequenceException extends IOException {
1380     private static final long serialVersionUID = -8164033650142593304L;
1381
1382     InvalidEscapeSequenceException(final String description) {
1383       super(description);
1384     }
1385   }
1386
1387   /**
1388    * Like {@link #escapeBytes(ByteString)}, but escapes a text string.
1389    * Non-ASCII characters are first encoded as UTF-8, then each byte is escaped
1390    * individually as a 3-digit octal escape.  Yes, it's weird.
1391    */
1392   static String escapeText(final String input) {
1393     return escapeBytes(ByteString.copyFromUtf8(input));
1394   }
1395
1396   /**
1397    * Un-escape a text string as escaped using {@link #escapeText(String)}.
1398    * Two-digit hex escapes (starting with "\x") are also recognized.
1399    */
1400   static String unescapeText(final String input)
1401                              throws InvalidEscapeSequenceException {
1402     return unescapeBytes(input).toStringUtf8();
1403   }
1404
1405   /** Is this an octal digit? */
1406   private static boolean isOctal(final byte c) {
1407     return '0' <= c && c <= '7';
1408   }
1409
1410   /** Is this a hex digit? */
1411   private static boolean isHex(final byte c) {
1412     return ('0' <= c && c <= '9') ||
1413            ('a' <= c && c <= 'f') ||
1414            ('A' <= c && c <= 'F');
1415   }
1416
1417   /**
1418    * Interpret a character as a digit (in any base up to 36) and return the
1419    * numeric value.  This is like {@code Character.digit()} but we don't accept
1420    * non-ASCII digits.
1421    */
1422   private static int digitValue(final byte c) {
1423     if ('0' <= c && c <= '9') {
1424       return c - '0';
1425     } else if ('a' <= c && c <= 'z') {
1426       return c - 'a' + 10;
1427     } else {
1428       return c - 'A' + 10;
1429     }
1430   }
1431
1432   /**
1433    * Parse a 32-bit signed integer from the text.  Unlike the Java standard
1434    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
1435    * and "0" to signify hexadecimal and octal numbers, respectively.
1436    */
1437   static int parseInt32(final String text) throws NumberFormatException {
1438     return (int) parseInteger(text, true, false);
1439   }
1440
1441   /**
1442    * Parse a 32-bit unsigned integer from the text.  Unlike the Java standard
1443    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
1444    * and "0" to signify hexadecimal and octal numbers, respectively.  The
1445    * result is coerced to a (signed) {@code int} when returned since Java has
1446    * no unsigned integer type.
1447    */
1448   static int parseUInt32(final String text) throws NumberFormatException {
1449     return (int) parseInteger(text, false, false);
1450   }
1451
1452   /**
1453    * Parse a 64-bit signed integer from the text.  Unlike the Java standard
1454    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
1455    * and "0" to signify hexadecimal and octal numbers, respectively.
1456    */
1457   static long parseInt64(final String text) throws NumberFormatException {
1458     return parseInteger(text, true, true);
1459   }
1460
1461   /**
1462    * Parse a 64-bit unsigned integer from the text.  Unlike the Java standard
1463    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
1464    * and "0" to signify hexadecimal and octal numbers, respectively.  The
1465    * result is coerced to a (signed) {@code long} when returned since Java has
1466    * no unsigned long type.
1467    */
1468   static long parseUInt64(final String text) throws NumberFormatException {
1469     return parseInteger(text, false, true);
1470   }
1471
1472   private static long parseInteger(final String text,
1473                                    final boolean isSigned,
1474                                    final boolean isLong)
1475                                    throws NumberFormatException {
1476     int pos = 0;
1477
1478     boolean negative = false;
1479     if (text.startsWith("-", pos)) {
1480       if (!isSigned) {
1481         throw new NumberFormatException("Number must be positive: " + text);
1482       }
1483       ++pos;
1484       negative = true;
1485     }
1486
1487     int radix = 10;
1488     if (text.startsWith("0x", pos)) {
1489       pos += 2;
1490       radix = 16;
1491     } else if (text.startsWith("0", pos)) {
1492       radix = 8;
1493     }
1494
1495     final String numberText = text.substring(pos);
1496
1497     long result = 0;
1498     if (numberText.length() < 16) {
1499       // Can safely assume no overflow.
1500       result = Long.parseLong(numberText, radix);
1501       if (negative) {
1502         result = -result;
1503       }
1504
1505       // Check bounds.
1506       // No need to check for 64-bit numbers since they'd have to be 16 chars
1507       // or longer to overflow.
1508       if (!isLong) {
1509         if (isSigned) {
1510           if (result > Integer.MAX_VALUE || result < Integer.MIN_VALUE) {
1511             throw new NumberFormatException(
1512               "Number out of range for 32-bit signed integer: " + text);
1513           }
1514         } else {
1515           if (result >= (1L << 32) || result < 0) {
1516             throw new NumberFormatException(
1517               "Number out of range for 32-bit unsigned integer: " + text);
1518           }
1519         }
1520       }
1521     } else {
1522       BigInteger bigValue = new BigInteger(numberText, radix);
1523       if (negative) {
1524         bigValue = bigValue.negate();
1525       }
1526
1527       // Check bounds.
1528       if (!isLong) {
1529         if (isSigned) {
1530           if (bigValue.bitLength() > 31) {
1531             throw new NumberFormatException(
1532               "Number out of range for 32-bit signed integer: " + text);
1533           }
1534         } else {
1535           if (bigValue.bitLength() > 32) {
1536             throw new NumberFormatException(
1537               "Number out of range for 32-bit unsigned integer: " + text);
1538           }
1539         }
1540       } else {
1541         if (isSigned) {
1542           if (bigValue.bitLength() > 63) {
1543             throw new NumberFormatException(
1544               "Number out of range for 64-bit signed integer: " + text);
1545           }
1546         } else {
1547           if (bigValue.bitLength() > 64) {
1548             throw new NumberFormatException(
1549               "Number out of range for 64-bit unsigned integer: " + text);
1550           }
1551         }
1552       }
1553
1554       result = bigValue.longValue();
1555     }
1556
1557     return result;
1558   }
1559 }