base/json/json_parser.cc

   1 // Copyright 2012 The Chromium Authors
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/json/json_parser.h"
   6
   7 #include <cmath>
   8 #include <iterator>
   9 #include <utility>
  10 #include <vector>
  11
  12 #include "base/check_op.h"
  13 #include "base/feature_list.h"
  14 #include "base/features.h"
  15 #include "base/json/json_reader.h"
  16 #include "base/metrics/histogram_functions.h"
  17 #include "base/notreached.h"
  18 #include "base/numerics/safe_conversions.h"
  19 #include "base/ranges/algorithm.h"
  20 #include "base/strings/string_number_conversions.h"
  21 #include "base/strings/string_piece.h"
  22 #include "base/strings/string_util.h"
  23 #include "base/strings/stringprintf.h"
  24 #include "base/strings/utf_string_conversion_utils.h"
  25 #include "base/strings/utf_string_conversions.h"
  26 #include "base/third_party/icu/icu_utf.h"
  27 #include "third_party/abseil-cpp/absl/types/optional.h"
  28
  29 namespace base {
  30 namespace internal {
  31
  32 namespace {
  33
  34 // Values 1000 and above are used by JSONFileValueSerializer::JsonFileError.
  35 static_assert(JSONParser::JSON_PARSE_ERROR_COUNT < 1000,
  36               "JSONParser error out of bounds");
  37
  38 std::string ErrorCodeToString(JSONParser::JsonParseError error_code) {
  39   switch (error_code) {
  40     case JSONParser::JSON_NO_ERROR:
  41       return std::string();
  42     case JSONParser::JSON_SYNTAX_ERROR:
  43       return JSONParser::kSyntaxError;
  44     case JSONParser::JSON_INVALID_ESCAPE:
  45       return JSONParser::kInvalidEscape;
  46     case JSONParser::JSON_UNEXPECTED_TOKEN:
  47       return JSONParser::kUnexpectedToken;
  48     case JSONParser::JSON_TRAILING_COMMA:
  49       return JSONParser::kTrailingComma;
  50     case JSONParser::JSON_TOO_MUCH_NESTING:
  51       return JSONParser::kTooMuchNesting;
  52     case JSONParser::JSON_UNEXPECTED_DATA_AFTER_ROOT:
  53       return JSONParser::kUnexpectedDataAfterRoot;
  54     case JSONParser::JSON_UNSUPPORTED_ENCODING:
  55       return JSONParser::kUnsupportedEncoding;
  56     case JSONParser::JSON_UNQUOTED_DICTIONARY_KEY:
  57       return JSONParser::kUnquotedDictionaryKey;
  58     case JSONParser::JSON_UNREPRESENTABLE_NUMBER:
  59       return JSONParser::kUnrepresentableNumber;
  60     case JSONParser::JSON_PARSE_ERROR_COUNT:
  61       break;
  62   }
  63   NOTREACHED();
  64   return std::string();
  65 }
  66
  67 const int32_t kExtendedASCIIStart = 0x80;
  68 constexpr base_icu::UChar32 kUnicodeReplacementPoint = 0xFFFD;
  69
  70 // UnprefixedHexStringToInt acts like |HexStringToInt|, but enforces that the
  71 // input consists purely of hex digits. I.e. no "0x" nor "OX" prefix is
  72 // permitted.
  73 bool UnprefixedHexStringToInt(StringPiece input, int* output) {
  74   for (size_t i = 0; i < input.size(); i++) {
  75     if (!IsHexDigit(input[i])) {
  76       return false;
  77     }
  78   }
  79   return HexStringToInt(input, output);
  80 }
  81
  82 // These values are persisted to logs. Entries should not be renumbered and
  83 // numeric values should never be reused.
  84 enum class ChromiumJsonExtension {
  85   kCComment,
  86   kCppComment,
  87   kXEscape,
  88   kVerticalTabEscape,
  89   kControlCharacter,
  90   kMaxValue = kControlCharacter,
  91 };
  92
  93 const char kExtensionHistogramName[] =
  94     "Security.JSONParser.ChromiumExtensionUsage";
  95
  96 }  // namespace
  97
  98 // This is U+FFFD.
  99 const char kUnicodeReplacementString[] = "\xEF\xBF\xBD";
 100
 101 const char JSONParser::kSyntaxError[] = "Syntax error.";
 102 const char JSONParser::kInvalidEscape[] = "Invalid escape sequence.";
 103 const char JSONParser::kUnexpectedToken[] = "Unexpected token.";
 104 const char JSONParser::kTrailingComma[] = "Trailing comma not allowed.";
 105 const char JSONParser::kTooMuchNesting[] = "Too much nesting.";
 106 const char JSONParser::kUnexpectedDataAfterRoot[] =
 107     "Unexpected data after root element.";
 108 const char JSONParser::kUnsupportedEncoding[] =
 109     "Unsupported encoding. JSON must be UTF-8.";
 110 const char JSONParser::kUnquotedDictionaryKey[] =
 111     "Dictionary keys must be quoted.";
 112 const char JSONParser::kUnrepresentableNumber[] =
 113     "Number cannot be represented.";
 114
 115 JSONParser::JSONParser(int options, size_t max_depth)
 116     : options_(options),
 117       max_depth_(max_depth),
 118       index_(0),
 119       stack_depth_(0),
 120       line_number_(0),
 121       index_last_line_(0),
 122       error_code_(JSON_NO_ERROR),
 123       error_line_(0),
 124       error_column_(0) {
 125   CHECK_LE(max_depth, kAbsoluteMaxDepth);
 126 }
 127
 128 JSONParser::~JSONParser() = default;
 129
 130 absl::optional<Value> JSONParser::Parse(StringPiece input) {
 131   input_ = input;
 132   index_ = 0;
 133   // Line and column counting is 1-based, but |index_| is 0-based. For example,
 134   // if input is "Aaa\nB" then 'A' and 'B' are both in column 1 (at lines 1 and
 135   // 2) and have indexes of 0 and 4. We track the line number explicitly (the
 136   // |line_number_| field) and the column number implicitly (the difference
 137   // between |index_| and |index_last_line_|). In calculating that difference,
 138   // |index_last_line_| is the index of the '\r' or '\n', not the index of the
 139   // first byte after the '\n'. For the 'B' in "Aaa\nB", its |index_| and
 140   // |index_last_line_| would be 4 and 3: 'B' is in column (4 - 3) = 1. We
 141   // initialize |index_last_line_| to -1, not 0, since -1 is the (out of range)
 142   // index of the imaginary '\n' immediately before the start of the string:
 143   // 'A' is in column (0 - -1) = 1.
 144   line_number_ = 1;
 145   index_last_line_ = static_cast<size_t>(-1);
 146
 147   error_code_ = JSON_NO_ERROR;
 148   error_line_ = 0;
 149   error_column_ = 0;
 150
 151   // When the input JSON string starts with a UTF-8 Byte-Order-Mark,
 152   // advance the start position to avoid the ParseNextToken function mis-
 153   // treating a Unicode BOM as an invalid character and returning NULL.
 154   ConsumeIfMatch("\xEF\xBB\xBF");
 155
 156   // Parse the first and any nested tokens.
 157   absl::optional<Value> root(ParseNextToken());
 158   if (!root)
 159     return absl::nullopt;
 160
 161   // Make sure the input stream is at an end.
 162   if (GetNextToken() != T_END_OF_INPUT) {
 163     ReportError(JSON_UNEXPECTED_DATA_AFTER_ROOT, 0);
 164     return absl::nullopt;
 165   }
 166
 167   return root;
 168 }
 169
 170 JSONParser::JsonParseError JSONParser::error_code() const {
 171   return error_code_;
 172 }
 173
 174 std::string JSONParser::GetErrorMessage() const {
 175   return FormatErrorMessage(error_line_, error_column_,
 176                             ErrorCodeToString(error_code_));
 177 }
 178
 179 int JSONParser::error_line() const {
 180   return error_line_;
 181 }
 182
 183 int JSONParser::error_column() const {
 184   return error_column_;
 185 }
 186
 187 // StringBuilder ///////////////////////////////////////////////////////////////
 188
 189 JSONParser::StringBuilder::StringBuilder() : StringBuilder(nullptr) {}
 190
 191 JSONParser::StringBuilder::StringBuilder(const char* pos)
 192     : pos_(pos), length_(0) {}
 193
 194 JSONParser::StringBuilder::~StringBuilder() = default;
 195
 196 JSONParser::StringBuilder& JSONParser::StringBuilder::operator=(
 197     StringBuilder&& other) = default;
 198
 199 void JSONParser::StringBuilder::Append(base_icu::UChar32 point) {
 200   DCHECK(IsValidCodepoint(point));
 201
 202   if (point < kExtendedASCIIStart) {
 203     if (!string_) {
 204       DCHECK_EQ(static_cast<char>(point), pos_[length_]);
 205       ++length_;
 206     } else {
 207       string_->push_back(static_cast<char>(point));
 208     }
 209   } else {
 210     Convert();
 211     if (UNLIKELY(point == kUnicodeReplacementPoint)) {
 212       string_->append(kUnicodeReplacementString);
 213     } else {
 214       WriteUnicodeCharacter(point, &*string_);
 215     }
 216   }
 217 }
 218
 219 void JSONParser::StringBuilder::Convert() {
 220   if (string_)
 221     return;
 222   string_.emplace(pos_, length_);
 223 }
 224
 225 std::string JSONParser::StringBuilder::DestructiveAsString() {
 226   if (string_)
 227     return std::move(*string_);
 228   return std::string(pos_, length_);
 229 }
 230
 231 // JSONParser private //////////////////////////////////////////////////////////
 232
 233 absl::optional<StringPiece> JSONParser::PeekChars(size_t count) {
 234   if (index_ + count > input_.length())
 235     return absl::nullopt;
 236   // Using StringPiece::substr() is significantly slower (according to
 237   // base_perftests) than constructing a substring manually.
 238   return StringPiece(input_.data() + index_, count);
 239 }
 240
 241 absl::optional<char> JSONParser::PeekChar() {
 242   absl::optional<StringPiece> chars = PeekChars(1);
 243   if (chars)
 244     return (*chars)[0];
 245   return absl::nullopt;
 246 }
 247
 248 absl::optional<StringPiece> JSONParser::ConsumeChars(size_t count) {
 249   absl::optional<StringPiece> chars = PeekChars(count);
 250   if (chars)
 251     index_ += count;
 252   return chars;
 253 }
 254
 255 absl::optional<char> JSONParser::ConsumeChar() {
 256   absl::optional<StringPiece> chars = ConsumeChars(1);
 257   if (chars)
 258     return (*chars)[0];
 259   return absl::nullopt;
 260 }
 261
 262 const char* JSONParser::pos() {
 263   CHECK_LE(index_, input_.length());
 264   return input_.data() + index_;
 265 }
 266
 267 JSONParser::Token JSONParser::GetNextToken() {
 268   EatWhitespaceAndComments();
 269
 270   absl::optional<char> c = PeekChar();
 271   if (!c)
 272     return T_END_OF_INPUT;
 273
 274   switch (*c) {
 275     case '{':
 276       return T_OBJECT_BEGIN;
 277     case '}':
 278       return T_OBJECT_END;
 279     case '[':
 280       return T_ARRAY_BEGIN;
 281     case ']':
 282       return T_ARRAY_END;
 283     case '"':
 284       return T_STRING;
 285     case '0':
 286     case '1':
 287     case '2':
 288     case '3':
 289     case '4':
 290     case '5':
 291     case '6':
 292     case '7':
 293     case '8':
 294     case '9':
 295     case '-':
 296       return T_NUMBER;
 297     case 't':
 298       return T_BOOL_TRUE;
 299     case 'f':
 300       return T_BOOL_FALSE;
 301     case 'n':
 302       return T_NULL;
 303     case ',':
 304       return T_LIST_SEPARATOR;
 305     case ':':
 306       return T_OBJECT_PAIR_SEPARATOR;
 307     default:
 308       return T_INVALID_TOKEN;
 309   }
 310 }
 311
 312 void JSONParser::EatWhitespaceAndComments() {
 313   while (absl::optional<char> c = PeekChar()) {
 314     switch (*c) {
 315       case '\r':
 316       case '\n':
 317         index_last_line_ = index_;
 318         // Don't increment line_number_ twice for "\r\n".
 319         if (!(c == '\n' && index_ > 0 && input_[index_ - 1] == '\r')) {
 320           ++line_number_;
 321         }
 322         [[fallthrough]];
 323       case ' ':
 324       case '\t':
 325         ConsumeChar();
 326         break;
 327       case '/':
 328         if (!EatComment())
 329           return;
 330         break;
 331       default:
 332         return;
 333     }
 334   }
 335 }
 336
 337 bool JSONParser::EatComment() {
 338   absl::optional<StringPiece> comment_start = PeekChars(2);
 339   if (!comment_start)
 340     return false;
 341
 342   const bool comments_allowed = options_ & JSON_ALLOW_COMMENTS;
 343
 344   if (comment_start == "//") {
 345     UmaHistogramEnumeration(kExtensionHistogramName,
 346                             ChromiumJsonExtension::kCppComment);
 347     if (!comments_allowed) {
 348       ReportError(JSON_UNEXPECTED_TOKEN, 0);
 349       return false;
 350     }
 351
 352     ConsumeChars(2);
 353     // Single line comment, read to newline.
 354     while (absl::optional<char> c = PeekChar()) {
 355       if (c == '\n' || c == '\r')
 356         return true;
 357       ConsumeChar();
 358     }
 359   } else if (comment_start == "/*") {
 360     UmaHistogramEnumeration(kExtensionHistogramName,
 361                             ChromiumJsonExtension::kCComment);
 362     if (!comments_allowed) {
 363       ReportError(JSON_UNEXPECTED_TOKEN, 0);
 364       return false;
 365     }
 366
 367     ConsumeChars(2);
 368     char previous_char = '\0';
 369     // Block comment, read until end marker.
 370     while (absl::optional<char> c = PeekChar()) {
 371       if (previous_char == '*' && c == '/') {
 372         // EatWhitespaceAndComments will inspect pos(), which will still be on
 373         // the last / of the comment, so advance once more (which may also be
 374         // end of input).
 375         ConsumeChar();
 376         return true;
 377       }
 378       previous_char = *ConsumeChar();
 379     }
 380
 381     // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
 382   }
 383
 384   return false;
 385 }
 386
 387 absl::optional<Value> JSONParser::ParseNextToken() {
 388   return ParseToken(GetNextToken());
 389 }
 390
 391 absl::optional<Value> JSONParser::ParseToken(Token token) {
 392   switch (token) {
 393     case T_OBJECT_BEGIN:
 394       return ConsumeDictionary();
 395     case T_ARRAY_BEGIN:
 396       return ConsumeList();
 397     case T_STRING:
 398       return ConsumeString();
 399     case T_NUMBER:
 400       return ConsumeNumber();
 401     case T_BOOL_TRUE:
 402     case T_BOOL_FALSE:
 403     case T_NULL:
 404       return ConsumeLiteral();
 405     default:
 406       ReportError(JSON_UNEXPECTED_TOKEN, 0);
 407       return absl::nullopt;
 408   }
 409 }
 410
 411 absl::optional<Value> JSONParser::ConsumeDictionary() {
 412   if (ConsumeChar() != '{') {
 413     ReportError(JSON_UNEXPECTED_TOKEN, 0);
 414     return absl::nullopt;
 415   }
 416
 417   StackMarker depth_check(max_depth_, &stack_depth_);
 418   if (depth_check.IsTooDeep()) {
 419     ReportError(JSON_TOO_MUCH_NESTING, -1);
 420     return absl::nullopt;
 421   }
 422
 423   std::vector<std::pair<std::string, Value>> values;
 424
 425   Token token = GetNextToken();
 426   while (token != T_OBJECT_END) {
 427     if (token != T_STRING) {
 428       ReportError(JSON_UNQUOTED_DICTIONARY_KEY, 0);
 429       return absl::nullopt;
 430     }
 431
 432     // First consume the key.
 433     StringBuilder key;
 434     if (!ConsumeStringRaw(&key)) {
 435       return absl::nullopt;
 436     }
 437
 438     // Read the separator.
 439     token = GetNextToken();
 440     if (token != T_OBJECT_PAIR_SEPARATOR) {
 441       ReportError(JSON_SYNTAX_ERROR, 0);
 442       return absl::nullopt;
 443     }
 444
 445     // The next token is the value. Ownership transfers to |dict|.
 446     ConsumeChar();
 447     absl::optional<Value> value = ParseNextToken();
 448     if (!value) {
 449       // ReportError from deeper level.
 450       return absl::nullopt;
 451     }
 452
 453     values.emplace_back(key.DestructiveAsString(), std::move(*value));
 454
 455     token = GetNextToken();
 456     if (token == T_LIST_SEPARATOR) {
 457       ConsumeChar();
 458       token = GetNextToken();
 459       if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
 460         ReportError(JSON_TRAILING_COMMA, 0);
 461         return absl::nullopt;
 462       }
 463     } else if (token != T_OBJECT_END) {
 464       ReportError(JSON_SYNTAX_ERROR, 0);
 465       return absl::nullopt;
 466     }
 467   }
 468
 469   ConsumeChar();  // Closing '}'.
 470   // Reverse |dict_storage| to keep the last of elements with the same key in
 471   // the input.
 472   ranges::reverse(values);
 473   return Value(Value::Dict(std::make_move_iterator(values.begin()),
 474                            std::make_move_iterator(values.end())));
 475 }
 476
 477 absl::optional<Value> JSONParser::ConsumeList() {
 478   if (ConsumeChar() != '[') {
 479     ReportError(JSON_UNEXPECTED_TOKEN, 0);
 480     return absl::nullopt;
 481   }
 482
 483   StackMarker depth_check(max_depth_, &stack_depth_);
 484   if (depth_check.IsTooDeep()) {
 485     ReportError(JSON_TOO_MUCH_NESTING, -1);
 486     return absl::nullopt;
 487   }
 488
 489   Value::List list;
 490
 491   Token token = GetNextToken();
 492   while (token != T_ARRAY_END) {
 493     absl::optional<Value> item = ParseToken(token);
 494     if (!item) {
 495       // ReportError from deeper level.
 496       return absl::nullopt;
 497     }
 498
 499     list.Append(std::move(*item));
 500
 501     token = GetNextToken();
 502     if (token == T_LIST_SEPARATOR) {
 503       ConsumeChar();
 504       token = GetNextToken();
 505       if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
 506         ReportError(JSON_TRAILING_COMMA, 0);
 507         return absl::nullopt;
 508       }
 509     } else if (token != T_ARRAY_END) {
 510       ReportError(JSON_SYNTAX_ERROR, 0);
 511       return absl::nullopt;
 512     }
 513   }
 514
 515   ConsumeChar();  // Closing ']'.
 516
 517   return Value(std::move(list));
 518 }
 519
 520 absl::optional<Value> JSONParser::ConsumeString() {
 521   StringBuilder string;
 522   if (!ConsumeStringRaw(&string))
 523     return absl::nullopt;
 524   return Value(string.DestructiveAsString());
 525 }
 526
 527 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
 528   if (ConsumeChar() != '"') {
 529     ReportError(JSON_UNEXPECTED_TOKEN, 0);
 530     return false;
 531   }
 532
 533   // StringBuilder will internally build a StringPiece unless a UTF-16
 534   // conversion occurs, at which point it will perform a copy into a
 535   // std::string.
 536   StringBuilder string(pos());
 537
 538   while (absl::optional<char> c = PeekChar()) {
 539     base_icu::UChar32 next_char = 0;
 540     if (static_cast<unsigned char>(*c) < kExtendedASCIIStart) {
 541       // Fast path for ASCII.
 542       next_char = *c;
 543     } else if (!ReadUnicodeCharacter(input_.data(), input_.length(), &index_,
 544                                      &next_char) ||
 545                !IsValidCodepoint(next_char)) {
 546       if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0) {
 547         ReportError(JSON_UNSUPPORTED_ENCODING, 0);
 548         return false;
 549       }
 550       ConsumeChar();
 551       string.Append(kUnicodeReplacementPoint);
 552       continue;
 553     }
 554
 555     if (next_char == '"') {
 556       ConsumeChar();
 557       *out = std::move(string);
 558       return true;
 559     }
 560     if (next_char != '\\') {
 561       // Per Section 7, "All Unicode characters may be placed within the
 562       // quotation marks, except for the characters that MUST be escaped:
 563       // quotation mark, reverse solidus, and the control characters (U+0000
 564       // through U+001F)".
 565       if (next_char <= 0x1F) {
 566         UmaHistogramEnumeration(kExtensionHistogramName,
 567                                 ChromiumJsonExtension::kControlCharacter);
 568         if (!(options_ & JSON_ALLOW_CONTROL_CHARS)) {
 569           ReportError(JSON_UNSUPPORTED_ENCODING, -1);
 570           return false;
 571         }
 572       }
 573
 574       // If this character is not an escape sequence, track any line breaks and
 575       // copy next_char to the StringBuilder. The JSON spec forbids unescaped
 576       // ASCII control characters within a string, including '\r' and '\n', but
 577       // this implementation is more lenient.
 578       if ((next_char == '\r') || (next_char == '\n')) {
 579         index_last_line_ = index_;
 580         // Don't increment line_number_ twice for "\r\n". We are guaranteed
 581         // that (index_ > 0) because we are consuming a string, so we must have
 582         // seen an opening '"' quote character.
 583         if ((next_char == '\r') || (input_[index_ - 1] != '\r')) {
 584           ++line_number_;
 585         }
 586       }
 587       ConsumeChar();
 588       string.Append(next_char);
 589     } else {
 590       // And if it is an escape sequence, the input string will be adjusted
 591       // (either by combining the two characters of an encoded escape sequence,
 592       // or with a UTF conversion), so using StringPiece isn't possible -- force
 593       // a conversion.
 594       string.Convert();
 595
 596       // Read past the escape '\' and ensure there's a character following.
 597       absl::optional<StringPiece> escape_sequence = ConsumeChars(2);
 598       if (!escape_sequence) {
 599         ReportError(JSON_INVALID_ESCAPE, -1);
 600         return false;
 601       }
 602
 603       switch ((*escape_sequence)[1]) {
 604         // Allowed esape sequences:
 605         case 'x': {  // UTF-8 sequence.
 606           // UTF-8 \x escape sequences are not allowed in the spec, but they
 607           // are supported here for backwards-compatiblity with the old parser.
 608           UmaHistogramEnumeration(kExtensionHistogramName,
 609                                   ChromiumJsonExtension::kXEscape);
 610           if (!(options_ & JSON_ALLOW_X_ESCAPES)) {
 611             ReportError(JSON_INVALID_ESCAPE, -1);
 612             return false;
 613           }
 614
 615           escape_sequence = ConsumeChars(2);
 616           if (!escape_sequence) {
 617             ReportError(JSON_INVALID_ESCAPE, -3);
 618             return false;
 619           }
 620
 621           int hex_digit = 0;
 622           if (!UnprefixedHexStringToInt(*escape_sequence, &hex_digit) ||
 623               !IsValidCharacter(hex_digit)) {
 624             ReportError(JSON_INVALID_ESCAPE, -3);
 625             return false;
 626           }
 627
 628           string.Append(hex_digit);
 629           break;
 630         }
 631         case 'u': {  // UTF-16 sequence.
 632           // UTF units are of the form \uXXXX.
 633           base_icu::UChar32 code_point;
 634           if (!DecodeUTF16(&code_point)) {
 635             ReportError(JSON_INVALID_ESCAPE, -1);
 636             return false;
 637           }
 638           string.Append(code_point);
 639           break;
 640         }
 641         case '"':
 642           string.Append('"');
 643           break;
 644         case '\\':
 645           string.Append('\\');
 646           break;
 647         case '/':
 648           string.Append('/');
 649           break;
 650         case 'b':
 651           string.Append('\b');
 652           break;
 653         case 'f':
 654           string.Append('\f');
 655           break;
 656         case 'n':
 657           string.Append('\n');
 658           break;
 659         case 'r':
 660           string.Append('\r');
 661           break;
 662         case 't':
 663           string.Append('\t');
 664           break;
 665         case 'v':  // Not listed as valid escape sequence in the RFC.
 666           UmaHistogramEnumeration(kExtensionHistogramName,
 667                                   ChromiumJsonExtension::kVerticalTabEscape);
 668           if (!(options_ & JSON_ALLOW_VERT_TAB)) {
 669             ReportError(JSON_INVALID_ESCAPE, -1);
 670             return false;
 671           }
 672           string.Append('\v');
 673           break;
 674         // All other escape squences are illegal.
 675         default:
 676           ReportError(JSON_INVALID_ESCAPE, -1);
 677           return false;
 678       }
 679     }
 680   }
 681
 682   ReportError(JSON_SYNTAX_ERROR, -1);
 683   return false;
 684 }
 685
 686 // Entry is at the first X in \uXXXX.
 687 bool JSONParser::DecodeUTF16(base_icu::UChar32* out_code_point) {
 688   absl::optional<StringPiece> escape_sequence = ConsumeChars(4);
 689   if (!escape_sequence)
 690     return false;
 691
 692   // Consume the UTF-16 code unit, which may be a high surrogate.
 693   int code_unit16_high = 0;
 694   if (!UnprefixedHexStringToInt(*escape_sequence, &code_unit16_high))
 695     return false;
 696
 697   // If this is a high surrogate, consume the next code unit to get the
 698   // low surrogate.
 699   if (CBU16_IS_SURROGATE(code_unit16_high)) {
 700     // Make sure this is the high surrogate.
 701     if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) {
 702       if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
 703         return false;
 704       *out_code_point = kUnicodeReplacementPoint;
 705       return true;
 706     }
 707
 708     // Make sure that the token has more characters to consume the
 709     // lower surrogate.
 710     if (!ConsumeIfMatch("\\u")) {
 711       if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
 712         return false;
 713       *out_code_point = kUnicodeReplacementPoint;
 714       return true;
 715     }
 716
 717     escape_sequence = ConsumeChars(4);
 718     if (!escape_sequence)
 719       return false;
 720
 721     int code_unit16_low = 0;
 722     if (!UnprefixedHexStringToInt(*escape_sequence, &code_unit16_low))
 723       return false;
 724
 725     if (!CBU16_IS_TRAIL(code_unit16_low)) {
 726       if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
 727         return false;
 728       *out_code_point = kUnicodeReplacementPoint;
 729       return true;
 730     }
 731
 732     base_icu::UChar32 code_point =
 733         CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low);
 734
 735     *out_code_point = code_point;
 736   } else {
 737     // Not a surrogate.
 738     DCHECK(CBU16_IS_SINGLE(code_unit16_high));
 739
 740     *out_code_point = code_unit16_high;
 741   }
 742
 743   return true;
 744 }
 745
 746 absl::optional<Value> JSONParser::ConsumeNumber() {
 747   const char* num_start = pos();
 748   const size_t start_index = index_;
 749   size_t end_index = start_index;
 750
 751   if (PeekChar() == '-')
 752     ConsumeChar();
 753
 754   if (!ReadInt(false)) {
 755     ReportError(JSON_SYNTAX_ERROR, 0);
 756     return absl::nullopt;
 757   }
 758   end_index = index_;
 759
 760   // The optional fraction part.
 761   if (PeekChar() == '.') {
 762     ConsumeChar();
 763     if (!ReadInt(true)) {
 764       ReportError(JSON_SYNTAX_ERROR, 0);
 765       return absl::nullopt;
 766     }
 767     end_index = index_;
 768   }
 769
 770   // Optional exponent part.
 771   absl::optional<char> c = PeekChar();
 772   if (c == 'e' || c == 'E') {
 773     ConsumeChar();
 774     if (PeekChar() == '-' || PeekChar() == '+') {
 775       ConsumeChar();
 776     }
 777     if (!ReadInt(true)) {
 778       ReportError(JSON_SYNTAX_ERROR, 0);
 779       return absl::nullopt;
 780     }
 781     end_index = index_;
 782   }
 783
 784   // ReadInt is greedy because numbers have no easily detectable sentinel,
 785   // so save off where the parser should be on exit (see Consume invariant at
 786   // the top of the header), then make sure the next token is one which is
 787   // valid.
 788   size_t exit_index = index_;
 789
 790   switch (GetNextToken()) {
 791     case T_OBJECT_END:
 792     case T_ARRAY_END:
 793     case T_LIST_SEPARATOR:
 794     case T_END_OF_INPUT:
 795       break;
 796     default:
 797       ReportError(JSON_SYNTAX_ERROR, 0);
 798       return absl::nullopt;
 799   }
 800
 801   index_ = exit_index;
 802
 803   StringPiece num_string(num_start, end_index - start_index);
 804
 805   int num_int;
 806   if (StringToInt(num_string, &num_int)) {
 807     // StringToInt will treat `-0` as zero, losing the significance of the
 808     // negation.
 809     if (num_int == 0 && num_string.starts_with('-')) {
 810       if (base::FeatureList::IsEnabled(features::kJsonNegativeZero)) {
 811         return Value(-0.0);
 812       }
 813     }
 814     return Value(num_int);
 815   }
 816
 817   double num_double;
 818   if (StringToDouble(num_string, &num_double) && std::isfinite(num_double)) {
 819     return Value(num_double);
 820   }
 821
 822   ReportError(JSON_UNREPRESENTABLE_NUMBER, 0);
 823   return absl::nullopt;
 824 }
 825
 826 bool JSONParser::ReadInt(bool allow_leading_zeros) {
 827   size_t len = 0;
 828   char first = 0;
 829
 830   while (absl::optional<char> c = PeekChar()) {
 831     if (!IsAsciiDigit(c))
 832       break;
 833
 834     if (len == 0)
 835       first = *c;
 836
 837     ++len;
 838     ConsumeChar();
 839   }
 840
 841   if (len == 0)
 842     return false;
 843
 844   if (!allow_leading_zeros && len > 1 && first == '0')
 845     return false;
 846
 847   return true;
 848 }
 849
 850 absl::optional<Value> JSONParser::ConsumeLiteral() {
 851   if (ConsumeIfMatch("true"))
 852     return Value(true);
 853   if (ConsumeIfMatch("false"))
 854     return Value(false);
 855   if (ConsumeIfMatch("null"))
 856     return Value(Value::Type::NONE);
 857   ReportError(JSON_SYNTAX_ERROR, 0);
 858   return absl::nullopt;
 859 }
 860
 861 bool JSONParser::ConsumeIfMatch(StringPiece match) {
 862   if (match == PeekChars(match.size())) {
 863     ConsumeChars(match.size());
 864     return true;
 865   }
 866   return false;
 867 }
 868
 869 void JSONParser::ReportError(JsonParseError code, int column_adjust) {
 870   error_code_ = code;
 871   error_line_ = line_number_;
 872   error_column_ = static_cast<int>(index_ - index_last_line_) + column_adjust;
 873
 874   // For a final blank line ('\n' and then EOF), a negative column_adjust may
 875   // put us below 1, which doesn't really make sense for 1-based columns.
 876   if (error_column_ < 1) {
 877     error_column_ = 1;
 878   }
 879 }
 880
 881 // static
 882 std::string JSONParser::FormatErrorMessage(int line, int column,
 883                                            const std::string& description) {
 884   if (line || column) {
 885     return StringPrintf("Line: %i, column: %i, %s",
 886         line, column, description.c_str());
 887   }
 888   return description;
 889 }
 890
 891 }  // namespace internal
 892 }  // namespace base