src/lib_json/json_reader.cpp

   1 // Copyright 2007-2011 Baptiste Lepilleur
   2 // Distributed under MIT license, or public domain if desired and
   3 // recognized in your jurisdiction.
   4 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
   5
   6 #if !defined(JSON_IS_AMALGAMATION)
   7 #include <json/assertions.h>
   8 #include <json/reader.h>
   9 #include <json/value.h>
  10 #include "json_tool.h"
  11 #endif // if !defined(JSON_IS_AMALGAMATION)
  12 #include <utility>
  13 #include <cstdio>
  14 #include <cassert>
  15 #include <cstring>
  16 #include <istream>
  17 #include <sstream>
  18 #include <memory>
  19 #include <set>
  20 #include <stdexcept>
  21
  22 #if defined(_MSC_VER) && _MSC_VER < 1500 // VC++ 8.0 and below
  23 #define snprintf _snprintf
  24 #endif
  25
  26 #if defined(_MSC_VER) && _MSC_VER >= 1400 // VC++ 8.0
  27 // Disable warning about strdup being deprecated.
  28 #pragma warning(disable : 4996)
  29 #endif
  30
  31 static int const stackLimit_g = 1000;
  32 static int       stackDepth_g = 0;  // see readValue()
  33
  34 namespace Json {
  35
  36 #if __cplusplus >= 201103L
  37 typedef std::unique_ptr<CharReader> CharReaderPtr;
  38 #else
  39 typedef std::auto_ptr<CharReader>   CharReaderPtr;
  40 #endif
  41
  42 // Implementation of class Features
  43 // ////////////////////////////////
  44
  45 Features::Features()
  46     : allowComments_(true), strictRoot_(false),
  47       allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {}
  48
  49 Features Features::all() { return Features(); }
  50
  51 Features Features::strictMode() {
  52   Features features;
  53   features.allowComments_ = false;
  54   features.strictRoot_ = true;
  55   features.allowDroppedNullPlaceholders_ = false;
  56   features.allowNumericKeys_ = false;
  57   return features;
  58 }
  59
  60 // Implementation of class Reader
  61 // ////////////////////////////////
  62
  63 static bool containsNewLine(Reader::Location begin, Reader::Location end) {
  64   for (; begin < end; ++begin)
  65     if (*begin == '\n' || *begin == '\r')
  66       return true;
  67   return false;
  68 }
  69
  70 // Class Reader
  71 // //////////////////////////////////////////////////////////////////
  72
  73 Reader::Reader()
  74     : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
  75       lastValue_(), commentsBefore_(), features_(Features::all()),
  76       collectComments_() {}
  77
  78 Reader::Reader(const Features& features)
  79     : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
  80       lastValue_(), commentsBefore_(), features_(features), collectComments_() {
  81 }
  82
  83 bool
  84 Reader::parse(const std::string& document, Value& root, bool collectComments) {
  85   document_ = document;
  86   const char* begin = document_.c_str();
  87   const char* end = begin + document_.length();
  88   return parse(begin, end, root, collectComments);
  89 }
  90
  91 bool Reader::parse(std::istream& sin, Value& root, bool collectComments) {
  92   // std::istream_iterator<char> begin(sin);
  93   // std::istream_iterator<char> end;
  94   // Those would allow streamed input from a file, if parse() were a
  95   // template function.
  96
  97   // Since std::string is reference-counted, this at least does not
  98   // create an extra copy.
  99   std::string doc;
 100   std::getline(sin, doc, (char)EOF);
 101   return parse(doc, root, collectComments);
 102 }
 103
 104 bool Reader::parse(const char* beginDoc,
 105                    const char* endDoc,
 106                    Value& root,
 107                    bool collectComments) {
 108   if (!features_.allowComments_) {
 109     collectComments = false;
 110   }
 111
 112   begin_ = beginDoc;
 113   end_ = endDoc;
 114   collectComments_ = collectComments;
 115   current_ = begin_;
 116   lastValueEnd_ = 0;
 117   lastValue_ = 0;
 118   commentsBefore_ = "";
 119   errors_.clear();
 120   while (!nodes_.empty())
 121     nodes_.pop();
 122   nodes_.push(&root);
 123
 124   stackDepth_g = 0;  // Yes, this is bad coding, but options are limited.
 125   bool successful = readValue();
 126   Token token;
 127   skipCommentTokens(token);
 128   if (collectComments_ && !commentsBefore_.empty())
 129     root.setComment(commentsBefore_, commentAfter);
 130   if (features_.strictRoot_) {
 131     if (!root.isArray() && !root.isObject()) {
 132       // Set error location to start of doc, ideally should be first token found
 133       // in doc
 134       token.type_ = tokenError;
 135       token.start_ = beginDoc;
 136       token.end_ = endDoc;
 137       addError(
 138           "A valid JSON document must be either an array or an object value.",
 139           token);
 140       return false;
 141     }
 142   }
 143   return successful;
 144 }
 145
 146 bool Reader::readValue() {
 147   // This is a non-reentrant way to support a stackLimit. Terrible!
 148   // But this deprecated class has a security problem: Bad input can
 149   // cause a seg-fault. This seems like a fair, binary-compatible way
 150   // to prevent the problem.
 151   if (stackDepth_g >= stackLimit_g) throw std::runtime_error("Exceeded stackLimit in readValue().");
 152   ++stackDepth_g;
 153
 154   Token token;
 155   skipCommentTokens(token);
 156   bool successful = true;
 157
 158   if (collectComments_ && !commentsBefore_.empty()) {
 159     currentValue().setComment(commentsBefore_, commentBefore);
 160     commentsBefore_ = "";
 161   }
 162
 163   switch (token.type_) {
 164   case tokenObjectBegin:
 165     successful = readObject(token);
 166     currentValue().setOffsetLimit(current_ - begin_);
 167     break;
 168   case tokenArrayBegin:
 169     successful = readArray(token);
 170     currentValue().setOffsetLimit(current_ - begin_);
 171     break;
 172   case tokenNumber:
 173     successful = decodeNumber(token);
 174     break;
 175   case tokenString:
 176     successful = decodeString(token);
 177     break;
 178   case tokenTrue:
 179     {
 180     Value v(true);
 181     currentValue().swapPayload(v);
 182     currentValue().setOffsetStart(token.start_ - begin_);
 183     currentValue().setOffsetLimit(token.end_ - begin_);
 184     }
 185     break;
 186   case tokenFalse:
 187     {
 188     Value v(false);
 189     currentValue().swapPayload(v);
 190     currentValue().setOffsetStart(token.start_ - begin_);
 191     currentValue().setOffsetLimit(token.end_ - begin_);
 192     }
 193     break;
 194   case tokenNull:
 195     {
 196     Value v;
 197     currentValue().swapPayload(v);
 198     currentValue().setOffsetStart(token.start_ - begin_);
 199     currentValue().setOffsetLimit(token.end_ - begin_);
 200     }
 201     break;
 202   case tokenArraySeparator:
 203     if (features_.allowDroppedNullPlaceholders_) {
 204       // "Un-read" the current token and mark the current value as a null
 205       // token.
 206       current_--;
 207       Value v;
 208       currentValue().swapPayload(v);
 209       currentValue().setOffsetStart(current_ - begin_ - 1);
 210       currentValue().setOffsetLimit(current_ - begin_);
 211       break;
 212     }
 213   // Else, fall through...
 214   default:
 215     currentValue().setOffsetStart(token.start_ - begin_);
 216     currentValue().setOffsetLimit(token.end_ - begin_);
 217     return addError("Syntax error: value, object or array expected.", token);
 218   }
 219
 220   if (collectComments_) {
 221     lastValueEnd_ = current_;
 222     lastValue_ = &currentValue();
 223   }
 224
 225   --stackDepth_g;
 226   return successful;
 227 }
 228
 229 void Reader::skipCommentTokens(Token& token) {
 230   if (features_.allowComments_) {
 231     do {
 232       readToken(token);
 233     } while (token.type_ == tokenComment);
 234   } else {
 235     readToken(token);
 236   }
 237 }
 238
 239 bool Reader::readToken(Token& token) {
 240   skipSpaces();
 241   token.start_ = current_;
 242   Char c = getNextChar();
 243   bool ok = true;
 244   switch (c) {
 245   case '{':
 246     token.type_ = tokenObjectBegin;
 247     break;
 248   case '}':
 249     token.type_ = tokenObjectEnd;
 250     break;
 251   case '[':
 252     token.type_ = tokenArrayBegin;
 253     break;
 254   case ']':
 255     token.type_ = tokenArrayEnd;
 256     break;
 257   case '"':
 258     token.type_ = tokenString;
 259     ok = readString();
 260     break;
 261   case '/':
 262     token.type_ = tokenComment;
 263     ok = readComment();
 264     break;
 265   case '0':
 266   case '1':
 267   case '2':
 268   case '3':
 269   case '4':
 270   case '5':
 271   case '6':
 272   case '7':
 273   case '8':
 274   case '9':
 275   case '-':
 276     token.type_ = tokenNumber;
 277     readNumber();
 278     break;
 279   case 't':
 280     token.type_ = tokenTrue;
 281     ok = match("rue", 3);
 282     break;
 283   case 'f':
 284     token.type_ = tokenFalse;
 285     ok = match("alse", 4);
 286     break;
 287   case 'n':
 288     token.type_ = tokenNull;
 289     ok = match("ull", 3);
 290     break;
 291   case ',':
 292     token.type_ = tokenArraySeparator;
 293     break;
 294   case ':':
 295     token.type_ = tokenMemberSeparator;
 296     break;
 297   case 0:
 298     token.type_ = tokenEndOfStream;
 299     break;
 300   default:
 301     ok = false;
 302     break;
 303   }
 304   if (!ok)
 305     token.type_ = tokenError;
 306   token.end_ = current_;
 307   return true;
 308 }
 309
 310 void Reader::skipSpaces() {
 311   while (current_ != end_) {
 312     Char c = *current_;
 313     if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
 314       ++current_;
 315     else
 316       break;
 317   }
 318 }
 319
 320 bool Reader::match(Location pattern, int patternLength) {
 321   if (end_ - current_ < patternLength)
 322     return false;
 323   int index = patternLength;
 324   while (index--)
 325     if (current_[index] != pattern[index])
 326       return false;
 327   current_ += patternLength;
 328   return true;
 329 }
 330
 331 bool Reader::readComment() {
 332   Location commentBegin = current_ - 1;
 333   Char c = getNextChar();
 334   bool successful = false;
 335   if (c == '*')
 336     successful = readCStyleComment();
 337   else if (c == '/')
 338     successful = readCppStyleComment();
 339   if (!successful)
 340     return false;
 341
 342   if (collectComments_) {
 343     CommentPlacement placement = commentBefore;
 344     if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
 345       if (c != '*' || !containsNewLine(commentBegin, current_))
 346         placement = commentAfterOnSameLine;
 347     }
 348
 349     addComment(commentBegin, current_, placement);
 350   }
 351   return true;
 352 }
 353
 354 static std::string normalizeEOL(Reader::Location begin, Reader::Location end) {
 355   std::string normalized;
 356   normalized.reserve(end - begin);
 357   Reader::Location current = begin;
 358   while (current != end) {
 359     char c = *current++;
 360     if (c == '\r') {
 361       if (current != end && *current == '\n')
 362          // convert dos EOL
 363          ++current;
 364       // convert Mac EOL
 365       normalized += '\n';
 366     } else {
 367       normalized += c;
 368     }
 369   }
 370   return normalized;
 371 }
 372
 373 void
 374 Reader::addComment(Location begin, Location end, CommentPlacement placement) {
 375   assert(collectComments_);
 376   const std::string& normalized = normalizeEOL(begin, end);
 377   if (placement == commentAfterOnSameLine) {
 378     assert(lastValue_ != 0);
 379     lastValue_->setComment(normalized, placement);
 380   } else {
 381     commentsBefore_ += normalized;
 382   }
 383 }
 384
 385 bool Reader::readCStyleComment() {
 386   while (current_ != end_) {
 387     Char c = getNextChar();
 388     if (c == '*' && *current_ == '/')
 389       break;
 390   }
 391   return getNextChar() == '/';
 392 }
 393
 394 bool Reader::readCppStyleComment() {
 395   while (current_ != end_) {
 396     Char c = getNextChar();
 397     if (c == '\n')
 398       break;
 399     if (c == '\r') {
 400       // Consume DOS EOL. It will be normalized in addComment.
 401       if (current_ != end_ && *current_ == '\n')
 402         getNextChar();
 403       // Break on Moc OS 9 EOL.
 404       break;
 405     }
 406   }
 407   return true;
 408 }
 409
 410 void Reader::readNumber() {
 411   const char *p = current_;
 412   char c = '0'; // stopgap for already consumed character
 413   // integral part
 414   while (c >= '0' && c <= '9')
 415     c = (current_ = p) < end_ ? *p++ : 0;
 416   // fractional part
 417   if (c == '.') {
 418     c = (current_ = p) < end_ ? *p++ : 0;
 419     while (c >= '0' && c <= '9')
 420       c = (current_ = p) < end_ ? *p++ : 0;
 421   }
 422   // exponential part
 423   if (c == 'e' || c == 'E') {
 424     c = (current_ = p) < end_ ? *p++ : 0;
 425     if (c == '+' || c == '-')
 426       c = (current_ = p) < end_ ? *p++ : 0;
 427     while (c >= '0' && c <= '9')
 428       c = (current_ = p) < end_ ? *p++ : 0;
 429   }
 430 }
 431
 432 bool Reader::readString() {
 433   Char c = 0;
 434   while (current_ != end_) {
 435     c = getNextChar();
 436     if (c == '\\')
 437       getNextChar();
 438     else if (c == '"')
 439       break;
 440   }
 441   return c == '"';
 442 }
 443
 444 bool Reader::readObject(Token& tokenStart) {
 445   Token tokenName;
 446   std::string name;
 447   Value init(objectValue);
 448   currentValue().swapPayload(init);
 449   currentValue().setOffsetStart(tokenStart.start_ - begin_);
 450   while (readToken(tokenName)) {
 451     bool initialTokenOk = true;
 452     while (tokenName.type_ == tokenComment && initialTokenOk)
 453       initialTokenOk = readToken(tokenName);
 454     if (!initialTokenOk)
 455       break;
 456     if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
 457       return true;
 458     name = "";
 459     if (tokenName.type_ == tokenString) {
 460       if (!decodeString(tokenName, name))
 461         return recoverFromError(tokenObjectEnd);
 462     } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
 463       Value numberName;
 464       if (!decodeNumber(tokenName, numberName))
 465         return recoverFromError(tokenObjectEnd);
 466       name = numberName.asString();
 467     } else {
 468       break;
 469     }
 470
 471     Token colon;
 472     if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
 473       return addErrorAndRecover(
 474           "Missing ':' after object member name", colon, tokenObjectEnd);
 475     }
 476     Value& value = currentValue()[name];
 477     nodes_.push(&value);
 478     bool ok = readValue();
 479     nodes_.pop();
 480     if (!ok) // error already set
 481       return recoverFromError(tokenObjectEnd);
 482
 483     Token comma;
 484     if (!readToken(comma) ||
 485         (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
 486          comma.type_ != tokenComment)) {
 487       return addErrorAndRecover(
 488           "Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
 489     }
 490     bool finalizeTokenOk = true;
 491     while (comma.type_ == tokenComment && finalizeTokenOk)
 492       finalizeTokenOk = readToken(comma);
 493     if (comma.type_ == tokenObjectEnd)
 494       return true;
 495   }
 496   return addErrorAndRecover(
 497       "Missing '}' or object member name", tokenName, tokenObjectEnd);
 498 }
 499
 500 bool Reader::readArray(Token& tokenStart) {
 501   Value init(arrayValue);
 502   currentValue().swapPayload(init);
 503   currentValue().setOffsetStart(tokenStart.start_ - begin_);
 504   skipSpaces();
 505   if (*current_ == ']') // empty array
 506   {
 507     Token endArray;
 508     readToken(endArray);
 509     return true;
 510   }
 511   int index = 0;
 512   for (;;) {
 513     Value& value = currentValue()[index++];
 514     nodes_.push(&value);
 515     bool ok = readValue();
 516     nodes_.pop();
 517     if (!ok) // error already set
 518       return recoverFromError(tokenArrayEnd);
 519
 520     Token token;
 521     // Accept Comment after last item in the array.
 522     ok = readToken(token);
 523     while (token.type_ == tokenComment && ok) {
 524       ok = readToken(token);
 525     }
 526     bool badTokenType =
 527         (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
 528     if (!ok || badTokenType) {
 529       return addErrorAndRecover(
 530           "Missing ',' or ']' in array declaration", token, tokenArrayEnd);
 531     }
 532     if (token.type_ == tokenArrayEnd)
 533       break;
 534   }
 535   return true;
 536 }
 537
 538 bool Reader::decodeNumber(Token& token) {
 539   Value decoded;
 540   if (!decodeNumber(token, decoded))
 541     return false;
 542   currentValue().swapPayload(decoded);
 543   currentValue().setOffsetStart(token.start_ - begin_);
 544   currentValue().setOffsetLimit(token.end_ - begin_);
 545   return true;
 546 }
 547
 548 bool Reader::decodeNumber(Token& token, Value& decoded) {
 549   // Attempts to parse the number as an integer. If the number is
 550   // larger than the maximum supported value of an integer then
 551   // we decode the number as a double.
 552   Location current = token.start_;
 553   bool isNegative = *current == '-';
 554   if (isNegative)
 555     ++current;
 556   // TODO: Help the compiler do the div and mod at compile time or get rid of them.
 557   Value::LargestUInt maxIntegerValue =
 558       isNegative ? Value::LargestUInt(-Value::minLargestInt)
 559                  : Value::maxLargestUInt;
 560   Value::LargestUInt threshold = maxIntegerValue / 10;
 561   Value::LargestUInt value = 0;
 562   while (current < token.end_) {
 563     Char c = *current++;
 564     if (c < '0' || c > '9')
 565       return decodeDouble(token, decoded);
 566     Value::UInt digit(c - '0');
 567     if (value >= threshold) {
 568       // We've hit or exceeded the max value divided by 10 (rounded down). If
 569       // a) we've only just touched the limit, b) this is the last digit, and
 570       // c) it's small enough to fit in that rounding delta, we're okay.
 571       // Otherwise treat this number as a double to avoid overflow.
 572       if (value > threshold || current != token.end_ ||
 573           digit > maxIntegerValue % 10) {
 574         return decodeDouble(token, decoded);
 575       }
 576     }
 577     value = value * 10 + digit;
 578   }
 579   if (isNegative)
 580     decoded = -Value::LargestInt(value);
 581   else if (value <= Value::LargestUInt(Value::maxInt))
 582     decoded = Value::LargestInt(value);
 583   else
 584     decoded = value;
 585   return true;
 586 }
 587
 588 bool Reader::decodeDouble(Token& token) {
 589   Value decoded;
 590   if (!decodeDouble(token, decoded))
 591     return false;
 592   currentValue().swapPayload(decoded);
 593   currentValue().setOffsetStart(token.start_ - begin_);
 594   currentValue().setOffsetLimit(token.end_ - begin_);
 595   return true;
 596 }
 597
 598 bool Reader::decodeDouble(Token& token, Value& decoded) {
 599   double value = 0;
 600   const int bufferSize = 32;
 601   int count;
 602   int length = int(token.end_ - token.start_);
 603
 604   // Sanity check to avoid buffer overflow exploits.
 605   if (length < 0) {
 606     return addError("Unable to parse token length", token);
 607   }
 608
 609   // Avoid using a string constant for the format control string given to
 610   // sscanf, as this can cause hard to debug crashes on OS X. See here for more
 611   // info:
 612   //
 613   //     http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
 614   char format[] = "%lf";
 615
 616   if (length <= bufferSize) {
 617     Char buffer[bufferSize + 1];
 618     memcpy(buffer, token.start_, length);
 619     buffer[length] = 0;
 620     count = sscanf(buffer, format, &value);
 621   } else {
 622     std::string buffer(token.start_, token.end_);
 623     count = sscanf(buffer.c_str(), format, &value);
 624   }
 625
 626   if (count != 1)
 627     return addError("'" + std::string(token.start_, token.end_) +
 628                         "' is not a number.",
 629                     token);
 630   decoded = value;
 631   return true;
 632 }
 633
 634 bool Reader::decodeString(Token& token) {
 635   std::string decoded_string;
 636   if (!decodeString(token, decoded_string))
 637     return false;
 638   Value decoded(decoded_string);
 639   currentValue().swapPayload(decoded);
 640   currentValue().setOffsetStart(token.start_ - begin_);
 641   currentValue().setOffsetLimit(token.end_ - begin_);
 642   return true;
 643 }
 644
 645 bool Reader::decodeString(Token& token, std::string& decoded) {
 646   decoded.reserve(token.end_ - token.start_ - 2);
 647   Location current = token.start_ + 1; // skip '"'
 648   Location end = token.end_ - 1;       // do not include '"'
 649   while (current != end) {
 650     Char c = *current++;
 651     if (c == '"')
 652       break;
 653     else if (c == '\\') {
 654       if (current == end)
 655         return addError("Empty escape sequence in string", token, current);
 656       Char escape = *current++;
 657       switch (escape) {
 658       case '"':
 659         decoded += '"';
 660         break;
 661       case '/':
 662         decoded += '/';
 663         break;
 664       case '\\':
 665         decoded += '\\';
 666         break;
 667       case 'b':
 668         decoded += '\b';
 669         break;
 670       case 'f':
 671         decoded += '\f';
 672         break;
 673       case 'n':
 674         decoded += '\n';
 675         break;
 676       case 'r':
 677         decoded += '\r';
 678         break;
 679       case 't':
 680         decoded += '\t';
 681         break;
 682       case 'u': {
 683         unsigned int unicode;
 684         if (!decodeUnicodeCodePoint(token, current, end, unicode))
 685           return false;
 686         decoded += codePointToUTF8(unicode);
 687       } break;
 688       default:
 689         return addError("Bad escape sequence in string", token, current);
 690       }
 691     } else {
 692       decoded += c;
 693     }
 694   }
 695   return true;
 696 }
 697
 698 bool Reader::decodeUnicodeCodePoint(Token& token,
 699                                     Location& current,
 700                                     Location end,
 701                                     unsigned int& unicode) {
 702
 703   if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
 704     return false;
 705   if (unicode >= 0xD800 && unicode <= 0xDBFF) {
 706     // surrogate pairs
 707     if (end - current < 6)
 708       return addError(
 709           "additional six characters expected to parse unicode surrogate pair.",
 710           token,
 711           current);
 712     unsigned int surrogatePair;
 713     if (*(current++) == '\\' && *(current++) == 'u') {
 714       if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
 715         unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
 716       } else
 717         return false;
 718     } else
 719       return addError("expecting another \\u token to begin the second half of "
 720                       "a unicode surrogate pair",
 721                       token,
 722                       current);
 723   }
 724   return true;
 725 }
 726
 727 bool Reader::decodeUnicodeEscapeSequence(Token& token,
 728                                          Location& current,
 729                                          Location end,
 730                                          unsigned int& unicode) {
 731   if (end - current < 4)
 732     return addError(
 733         "Bad unicode escape sequence in string: four digits expected.",
 734         token,
 735         current);
 736   unicode = 0;
 737   for (int index = 0; index < 4; ++index) {
 738     Char c = *current++;
 739     unicode *= 16;
 740     if (c >= '0' && c <= '9')
 741       unicode += c - '0';
 742     else if (c >= 'a' && c <= 'f')
 743       unicode += c - 'a' + 10;
 744     else if (c >= 'A' && c <= 'F')
 745       unicode += c - 'A' + 10;
 746     else
 747       return addError(
 748           "Bad unicode escape sequence in string: hexadecimal digit expected.",
 749           token,
 750           current);
 751   }
 752   return true;
 753 }
 754
 755 bool
 756 Reader::addError(const std::string& message, Token& token, Location extra) {
 757   ErrorInfo info;
 758   info.token_ = token;
 759   info.message_ = message;
 760   info.extra_ = extra;
 761   errors_.push_back(info);
 762   return false;
 763 }
 764
 765 bool Reader::recoverFromError(TokenType skipUntilToken) {
 766   int errorCount = int(errors_.size());
 767   Token skip;
 768   for (;;) {
 769     if (!readToken(skip))
 770       errors_.resize(errorCount); // discard errors caused by recovery
 771     if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
 772       break;
 773   }
 774   errors_.resize(errorCount);
 775   return false;
 776 }
 777
 778 bool Reader::addErrorAndRecover(const std::string& message,
 779                                 Token& token,
 780                                 TokenType skipUntilToken) {
 781   addError(message, token);
 782   return recoverFromError(skipUntilToken);
 783 }
 784
 785 Value& Reader::currentValue() { return *(nodes_.top()); }
 786
 787 Reader::Char Reader::getNextChar() {
 788   if (current_ == end_)
 789     return 0;
 790   return *current_++;
 791 }
 792
 793 void Reader::getLocationLineAndColumn(Location location,
 794                                       int& line,
 795                                       int& column) const {
 796   Location current = begin_;
 797   Location lastLineStart = current;
 798   line = 0;
 799   while (current < location && current != end_) {
 800     Char c = *current++;
 801     if (c == '\r') {
 802       if (*current == '\n')
 803         ++current;
 804       lastLineStart = current;
 805       ++line;
 806     } else if (c == '\n') {
 807       lastLineStart = current;
 808       ++line;
 809     }
 810   }
 811   // column & line start at 1
 812   column = int(location - lastLineStart) + 1;
 813   ++line;
 814 }
 815
 816 std::string Reader::getLocationLineAndColumn(Location location) const {
 817   int line, column;
 818   getLocationLineAndColumn(location, line, column);
 819   char buffer[18 + 16 + 16 + 1];
 820 #if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__)
 821 #if defined(WINCE)
 822   _snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
 823 #else
 824   sprintf_s(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
 825 #endif
 826 #else
 827   snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
 828 #endif
 829   return buffer;
 830 }
 831
 832 // Deprecated. Preserved for backward compatibility
 833 std::string Reader::getFormatedErrorMessages() const {
 834   return getFormattedErrorMessages();
 835 }
 836
 837 std::string Reader::getFormattedErrorMessages() const {
 838   std::string formattedMessage;
 839   for (Errors::const_iterator itError = errors_.begin();
 840        itError != errors_.end();
 841        ++itError) {
 842     const ErrorInfo& error = *itError;
 843     formattedMessage +=
 844         "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
 845     formattedMessage += "  " + error.message_ + "\n";
 846     if (error.extra_)
 847       formattedMessage +=
 848           "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
 849   }
 850   return formattedMessage;
 851 }
 852
 853 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
 854   std::vector<Reader::StructuredError> allErrors;
 855   for (Errors::const_iterator itError = errors_.begin();
 856        itError != errors_.end();
 857        ++itError) {
 858     const ErrorInfo& error = *itError;
 859     Reader::StructuredError structured;
 860     structured.offset_start = error.token_.start_ - begin_;
 861     structured.offset_limit = error.token_.end_ - begin_;
 862     structured.message = error.message_;
 863     allErrors.push_back(structured);
 864   }
 865   return allErrors;
 866 }
 867
 868 bool Reader::pushError(const Value& value, const std::string& message) {
 869   size_t length = end_ - begin_;
 870   if(value.getOffsetStart() > length
 871     || value.getOffsetLimit() > length)
 872     return false;
 873   Token token;
 874   token.type_ = tokenError;
 875   token.start_ = begin_ + value.getOffsetStart();
 876   token.end_ = end_ + value.getOffsetLimit();
 877   ErrorInfo info;
 878   info.token_ = token;
 879   info.message_ = message;
 880   info.extra_ = 0;
 881   errors_.push_back(info);
 882   return true;
 883 }
 884
 885 bool Reader::pushError(const Value& value, const std::string& message, const Value& extra) {
 886   size_t length = end_ - begin_;
 887   if(value.getOffsetStart() > length
 888     || value.getOffsetLimit() > length
 889     || extra.getOffsetLimit() > length)
 890     return false;
 891   Token token;
 892   token.type_ = tokenError;
 893   token.start_ = begin_ + value.getOffsetStart();
 894   token.end_ = begin_ + value.getOffsetLimit();
 895   ErrorInfo info;
 896   info.token_ = token;
 897   info.message_ = message;
 898   info.extra_ = begin_ + extra.getOffsetStart();
 899   errors_.push_back(info);
 900   return true;
 901 }
 902
 903 bool Reader::good() const {
 904   return !errors_.size();
 905 }
 906
 907 // exact copy of Features
 908 class OurFeatures {
 909 public:
 910   static OurFeatures all();
 911   static OurFeatures strictMode();
 912   OurFeatures();
 913   bool allowComments_;
 914   bool strictRoot_;
 915   bool allowDroppedNullPlaceholders_;
 916   bool allowNumericKeys_;
 917   int stackLimit_;
 918 };  // OurFeatures
 919
 920 // exact copy of Implementation of class Features
 921 // ////////////////////////////////
 922
 923 OurFeatures::OurFeatures()
 924     : allowComments_(true), strictRoot_(false),
 925       allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {}
 926
 927 OurFeatures OurFeatures::all() { return OurFeatures(); }
 928
 929 OurFeatures OurFeatures::strictMode() {
 930   OurFeatures features;
 931   features.allowComments_ = false;
 932   features.strictRoot_ = true;
 933   features.allowDroppedNullPlaceholders_ = false;
 934   features.allowNumericKeys_ = false;
 935   return features;
 936 }
 937
 938 // Implementation of class Reader
 939 // ////////////////////////////////
 940
 941 // exact copy of Reader, renamed to OurReader
 942 class OurReader {
 943 public:
 944   typedef char Char;
 945   typedef const Char* Location;
 946   struct StructuredError {
 947     size_t offset_start;
 948     size_t offset_limit;
 949     std::string message;
 950   };
 951
 952   OurReader(OurFeatures const& features);
 953   bool parse(const char* beginDoc,
 954              const char* endDoc,
 955              Value& root,
 956              bool collectComments = true);
 957   std::string getFormattedErrorMessages() const;
 958   std::vector<StructuredError> getStructuredErrors() const;
 959   bool pushError(const Value& value, const std::string& message);
 960   bool pushError(const Value& value, const std::string& message, const Value& extra);
 961   bool good() const;
 962
 963 private:
 964   OurReader(OurReader const&);  // no impl
 965   void operator=(OurReader const&);  // no impl
 966
 967   enum TokenType {
 968     tokenEndOfStream = 0,
 969     tokenObjectBegin,
 970     tokenObjectEnd,
 971     tokenArrayBegin,
 972     tokenArrayEnd,
 973     tokenString,
 974     tokenNumber,
 975     tokenTrue,
 976     tokenFalse,
 977     tokenNull,
 978     tokenArraySeparator,
 979     tokenMemberSeparator,
 980     tokenComment,
 981     tokenError
 982   };
 983
 984   class Token {
 985   public:
 986     TokenType type_;
 987     Location start_;
 988     Location end_;
 989   };
 990
 991   class ErrorInfo {
 992   public:
 993     Token token_;
 994     std::string message_;
 995     Location extra_;
 996   };
 997
 998   typedef std::deque<ErrorInfo> Errors;
 999
1000   bool readToken(Token& token);
1001   void skipSpaces();
1002   bool match(Location pattern, int patternLength);
1003   bool readComment();
1004   bool readCStyleComment();
1005   bool readCppStyleComment();
1006   bool readString();
1007   void readNumber();
1008   bool readValue();
1009   bool readObject(Token& token);
1010   bool readArray(Token& token);
1011   bool decodeNumber(Token& token);
1012   bool decodeNumber(Token& token, Value& decoded);
1013   bool decodeString(Token& token);
1014   bool decodeString(Token& token, std::string& decoded);
1015   bool decodeDouble(Token& token);
1016   bool decodeDouble(Token& token, Value& decoded);
1017   bool decodeUnicodeCodePoint(Token& token,
1018                               Location& current,
1019                               Location end,
1020                               unsigned int& unicode);
1021   bool decodeUnicodeEscapeSequence(Token& token,
1022                                    Location& current,
1023                                    Location end,
1024                                    unsigned int& unicode);
1025   bool addError(const std::string& message, Token& token, Location extra = 0);
1026   bool recoverFromError(TokenType skipUntilToken);
1027   bool addErrorAndRecover(const std::string& message,
1028                           Token& token,
1029                           TokenType skipUntilToken);
1030   void skipUntilSpace();
1031   Value& currentValue();
1032   Char getNextChar();
1033   void
1034   getLocationLineAndColumn(Location location, int& line, int& column) const;
1035   std::string getLocationLineAndColumn(Location location) const;
1036   void addComment(Location begin, Location end, CommentPlacement placement);
1037   void skipCommentTokens(Token& token);
1038
1039   typedef std::stack<Value*> Nodes;
1040   Nodes nodes_;
1041   Errors errors_;
1042   std::string document_;
1043   Location begin_;
1044   Location end_;
1045   Location current_;
1046   Location lastValueEnd_;
1047   Value* lastValue_;
1048   std::string commentsBefore_;
1049   int stackDepth_;
1050
1051   OurFeatures const features_;
1052   bool collectComments_;
1053 };  // OurReader
1054
1055 // complete copy of Read impl, for OurReader
1056
1057 OurReader::OurReader(OurFeatures const& features)
1058     : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
1059       lastValue_(), commentsBefore_(), features_(features), collectComments_() {
1060 }
1061
1062 bool OurReader::parse(const char* beginDoc,
1063                    const char* endDoc,
1064                    Value& root,
1065                    bool collectComments) {
1066   if (!features_.allowComments_) {
1067     collectComments = false;
1068   }
1069
1070   begin_ = beginDoc;
1071   end_ = endDoc;
1072   collectComments_ = collectComments;
1073   current_ = begin_;
1074   lastValueEnd_ = 0;
1075   lastValue_ = 0;
1076   commentsBefore_ = "";
1077   errors_.clear();
1078   while (!nodes_.empty())
1079     nodes_.pop();
1080   nodes_.push(&root);
1081
1082   stackDepth_ = 0;
1083   bool successful = readValue();
1084   Token token;
1085   skipCommentTokens(token);
1086   if (collectComments_ && !commentsBefore_.empty())
1087     root.setComment(commentsBefore_, commentAfter);
1088   if (features_.strictRoot_) {
1089     if (!root.isArray() && !root.isObject()) {
1090       // Set error location to start of doc, ideally should be first token found
1091       // in doc
1092       token.type_ = tokenError;
1093       token.start_ = beginDoc;
1094       token.end_ = endDoc;
1095       addError(
1096           "A valid JSON document must be either an array or an object value.",
1097           token);
1098       return false;
1099     }
1100   }
1101   return successful;
1102 }
1103
1104 bool OurReader::readValue() {
1105   if (stackDepth_ >= features_.stackLimit_) throw std::runtime_error("Exceeded stackLimit in readValue().");
1106   ++stackDepth_;
1107   Token token;
1108   skipCommentTokens(token);
1109   bool successful = true;
1110
1111   if (collectComments_ && !commentsBefore_.empty()) {
1112     currentValue().setComment(commentsBefore_, commentBefore);
1113     commentsBefore_ = "";
1114   }
1115
1116   switch (token.type_) {
1117   case tokenObjectBegin:
1118     successful = readObject(token);
1119     currentValue().setOffsetLimit(current_ - begin_);
1120     break;
1121   case tokenArrayBegin:
1122     successful = readArray(token);
1123     currentValue().setOffsetLimit(current_ - begin_);
1124     break;
1125   case tokenNumber:
1126     successful = decodeNumber(token);
1127     break;
1128   case tokenString:
1129     successful = decodeString(token);
1130     break;
1131   case tokenTrue:
1132     {
1133     Value v(true);
1134     currentValue().swapPayload(v);
1135     currentValue().setOffsetStart(token.start_ - begin_);
1136     currentValue().setOffsetLimit(token.end_ - begin_);
1137     }
1138     break;
1139   case tokenFalse:
1140     {
1141     Value v(false);
1142     currentValue().swapPayload(v);
1143     currentValue().setOffsetStart(token.start_ - begin_);
1144     currentValue().setOffsetLimit(token.end_ - begin_);
1145     }
1146     break;
1147   case tokenNull:
1148     {
1149     Value v;
1150     currentValue().swapPayload(v);
1151     currentValue().setOffsetStart(token.start_ - begin_);
1152     currentValue().setOffsetLimit(token.end_ - begin_);
1153     }
1154     break;
1155   case tokenArraySeparator:
1156     if (features_.allowDroppedNullPlaceholders_) {
1157       // "Un-read" the current token and mark the current value as a null
1158       // token.
1159       current_--;
1160       Value v;
1161       currentValue().swapPayload(v);
1162       currentValue().setOffsetStart(current_ - begin_ - 1);
1163       currentValue().setOffsetLimit(current_ - begin_);
1164       break;
1165     }
1166   // Else, fall through...
1167   default:
1168     currentValue().setOffsetStart(token.start_ - begin_);
1169     currentValue().setOffsetLimit(token.end_ - begin_);
1170     return addError("Syntax error: value, object or array expected.", token);
1171   }
1172
1173   if (collectComments_) {
1174     lastValueEnd_ = current_;
1175     lastValue_ = &currentValue();
1176   }
1177
1178   --stackDepth_;
1179   return successful;
1180 }
1181
1182 void OurReader::skipCommentTokens(Token& token) {
1183   if (features_.allowComments_) {
1184     do {
1185       readToken(token);
1186     } while (token.type_ == tokenComment);
1187   } else {
1188     readToken(token);
1189   }
1190 }
1191
1192 bool OurReader::readToken(Token& token) {
1193   skipSpaces();
1194   token.start_ = current_;
1195   Char c = getNextChar();
1196   bool ok = true;
1197   switch (c) {
1198   case '{':
1199     token.type_ = tokenObjectBegin;
1200     break;
1201   case '}':
1202     token.type_ = tokenObjectEnd;
1203     break;
1204   case '[':
1205     token.type_ = tokenArrayBegin;
1206     break;
1207   case ']':
1208     token.type_ = tokenArrayEnd;
1209     break;
1210   case '"':
1211     token.type_ = tokenString;
1212     ok = readString();
1213     break;
1214   case '/':
1215     token.type_ = tokenComment;
1216     ok = readComment();
1217     break;
1218   case '0':
1219   case '1':
1220   case '2':
1221   case '3':
1222   case '4':
1223   case '5':
1224   case '6':
1225   case '7':
1226   case '8':
1227   case '9':
1228   case '-':
1229     token.type_ = tokenNumber;
1230     readNumber();
1231     break;
1232   case 't':
1233     token.type_ = tokenTrue;
1234     ok = match("rue", 3);
1235     break;
1236   case 'f':
1237     token.type_ = tokenFalse;
1238     ok = match("alse", 4);
1239     break;
1240   case 'n':
1241     token.type_ = tokenNull;
1242     ok = match("ull", 3);
1243     break;
1244   case ',':
1245     token.type_ = tokenArraySeparator;
1246     break;
1247   case ':':
1248     token.type_ = tokenMemberSeparator;
1249     break;
1250   case 0:
1251     token.type_ = tokenEndOfStream;
1252     break;
1253   default:
1254     ok = false;
1255     break;
1256   }
1257   if (!ok)
1258     token.type_ = tokenError;
1259   token.end_ = current_;
1260   return true;
1261 }
1262
1263 void OurReader::skipSpaces() {
1264   while (current_ != end_) {
1265     Char c = *current_;
1266     if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1267       ++current_;
1268     else
1269       break;
1270   }
1271 }
1272
1273 bool OurReader::match(Location pattern, int patternLength) {
1274   if (end_ - current_ < patternLength)
1275     return false;
1276   int index = patternLength;
1277   while (index--)
1278     if (current_[index] != pattern[index])
1279       return false;
1280   current_ += patternLength;
1281   return true;
1282 }
1283
1284 bool OurReader::readComment() {
1285   Location commentBegin = current_ - 1;
1286   Char c = getNextChar();
1287   bool successful = false;
1288   if (c == '*')
1289     successful = readCStyleComment();
1290   else if (c == '/')
1291     successful = readCppStyleComment();
1292   if (!successful)
1293     return false;
1294
1295   if (collectComments_) {
1296     CommentPlacement placement = commentBefore;
1297     if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1298       if (c != '*' || !containsNewLine(commentBegin, current_))
1299         placement = commentAfterOnSameLine;
1300     }
1301
1302     addComment(commentBegin, current_, placement);
1303   }
1304   return true;
1305 }
1306
1307 void
1308 OurReader::addComment(Location begin, Location end, CommentPlacement placement) {
1309   assert(collectComments_);
1310   const std::string& normalized = normalizeEOL(begin, end);
1311   if (placement == commentAfterOnSameLine) {
1312     assert(lastValue_ != 0);
1313     lastValue_->setComment(normalized, placement);
1314   } else {
1315     commentsBefore_ += normalized;
1316   }
1317 }
1318
1319 bool OurReader::readCStyleComment() {
1320   while (current_ != end_) {
1321     Char c = getNextChar();
1322     if (c == '*' && *current_ == '/')
1323       break;
1324   }
1325   return getNextChar() == '/';
1326 }
1327
1328 bool OurReader::readCppStyleComment() {
1329   while (current_ != end_) {
1330     Char c = getNextChar();
1331     if (c == '\n')
1332       break;
1333     if (c == '\r') {
1334       // Consume DOS EOL. It will be normalized in addComment.
1335       if (current_ != end_ && *current_ == '\n')
1336         getNextChar();
1337       // Break on Moc OS 9 EOL.
1338       break;
1339     }
1340   }
1341   return true;
1342 }
1343
1344 void OurReader::readNumber() {
1345   const char *p = current_;
1346   char c = '0'; // stopgap for already consumed character
1347   // integral part
1348   while (c >= '0' && c <= '9')
1349     c = (current_ = p) < end_ ? *p++ : 0;
1350   // fractional part
1351   if (c == '.') {
1352     c = (current_ = p) < end_ ? *p++ : 0;
1353     while (c >= '0' && c <= '9')
1354       c = (current_ = p) < end_ ? *p++ : 0;
1355   }
1356   // exponential part
1357   if (c == 'e' || c == 'E') {
1358     c = (current_ = p) < end_ ? *p++ : 0;
1359     if (c == '+' || c == '-')
1360       c = (current_ = p) < end_ ? *p++ : 0;
1361     while (c >= '0' && c <= '9')
1362       c = (current_ = p) < end_ ? *p++ : 0;
1363   }
1364 }
1365
1366 bool OurReader::readString() {
1367   Char c = 0;
1368   while (current_ != end_) {
1369     c = getNextChar();
1370     if (c == '\\')
1371       getNextChar();
1372     else if (c == '"')
1373       break;
1374   }
1375   return c == '"';
1376 }
1377
1378 bool OurReader::readObject(Token& tokenStart) {
1379   Token tokenName;
1380   std::string name;
1381   Value init(objectValue);
1382   currentValue().swapPayload(init);
1383   currentValue().setOffsetStart(tokenStart.start_ - begin_);
1384   while (readToken(tokenName)) {
1385     bool initialTokenOk = true;
1386     while (tokenName.type_ == tokenComment && initialTokenOk)
1387       initialTokenOk = readToken(tokenName);
1388     if (!initialTokenOk)
1389       break;
1390     if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
1391       return true;
1392     name = "";
1393     if (tokenName.type_ == tokenString) {
1394       if (!decodeString(tokenName, name))
1395         return recoverFromError(tokenObjectEnd);
1396     } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1397       Value numberName;
1398       if (!decodeNumber(tokenName, numberName))
1399         return recoverFromError(tokenObjectEnd);
1400       name = numberName.asString();
1401     } else {
1402       break;
1403     }
1404
1405     Token colon;
1406     if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1407       return addErrorAndRecover(
1408           "Missing ':' after object member name", colon, tokenObjectEnd);
1409     }
1410     Value& value = currentValue()[name];
1411     nodes_.push(&value);
1412     bool ok = readValue();
1413     nodes_.pop();
1414     if (!ok) // error already set
1415       return recoverFromError(tokenObjectEnd);
1416
1417     Token comma;
1418     if (!readToken(comma) ||
1419         (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1420          comma.type_ != tokenComment)) {
1421       return addErrorAndRecover(
1422           "Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
1423     }
1424     bool finalizeTokenOk = true;
1425     while (comma.type_ == tokenComment && finalizeTokenOk)
1426       finalizeTokenOk = readToken(comma);
1427     if (comma.type_ == tokenObjectEnd)
1428       return true;
1429   }
1430   return addErrorAndRecover(
1431       "Missing '}' or object member name", tokenName, tokenObjectEnd);
1432 }
1433
1434 bool OurReader::readArray(Token& tokenStart) {
1435   Value init(arrayValue);
1436   currentValue().swapPayload(init);
1437   currentValue().setOffsetStart(tokenStart.start_ - begin_);
1438   skipSpaces();
1439   if (*current_ == ']') // empty array
1440   {
1441     Token endArray;
1442     readToken(endArray);
1443     return true;
1444   }
1445   int index = 0;
1446   for (;;) {
1447     Value& value = currentValue()[index++];
1448     nodes_.push(&value);
1449     bool ok = readValue();
1450     nodes_.pop();
1451     if (!ok) // error already set
1452       return recoverFromError(tokenArrayEnd);
1453
1454     Token token;
1455     // Accept Comment after last item in the array.
1456     ok = readToken(token);
1457     while (token.type_ == tokenComment && ok) {
1458       ok = readToken(token);
1459     }
1460     bool badTokenType =
1461         (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
1462     if (!ok || badTokenType) {
1463       return addErrorAndRecover(
1464           "Missing ',' or ']' in array declaration", token, tokenArrayEnd);
1465     }
1466     if (token.type_ == tokenArrayEnd)
1467       break;
1468   }
1469   return true;
1470 }
1471
1472 bool OurReader::decodeNumber(Token& token) {
1473   Value decoded;
1474   if (!decodeNumber(token, decoded))
1475     return false;
1476   currentValue().swapPayload(decoded);
1477   currentValue().setOffsetStart(token.start_ - begin_);
1478   currentValue().setOffsetLimit(token.end_ - begin_);
1479   return true;
1480 }
1481
1482 bool OurReader::decodeNumber(Token& token, Value& decoded) {
1483   // Attempts to parse the number as an integer. If the number is
1484   // larger than the maximum supported value of an integer then
1485   // we decode the number as a double.
1486   Location current = token.start_;
1487   bool isNegative = *current == '-';
1488   if (isNegative)
1489     ++current;
1490   // TODO: Help the compiler do the div and mod at compile time or get rid of them.
1491   Value::LargestUInt maxIntegerValue =
1492       isNegative ? Value::LargestUInt(-Value::minLargestInt)
1493                  : Value::maxLargestUInt;
1494   Value::LargestUInt threshold = maxIntegerValue / 10;
1495   Value::LargestUInt value = 0;
1496   while (current < token.end_) {
1497     Char c = *current++;
1498     if (c < '0' || c > '9')
1499       return decodeDouble(token, decoded);
1500     Value::UInt digit(c - '0');
1501     if (value >= threshold) {
1502       // We've hit or exceeded the max value divided by 10 (rounded down). If
1503       // a) we've only just touched the limit, b) this is the last digit, and
1504       // c) it's small enough to fit in that rounding delta, we're okay.
1505       // Otherwise treat this number as a double to avoid overflow.
1506       if (value > threshold || current != token.end_ ||
1507           digit > maxIntegerValue % 10) {
1508         return decodeDouble(token, decoded);
1509       }
1510     }
1511     value = value * 10 + digit;
1512   }
1513   if (isNegative)
1514     decoded = -Value::LargestInt(value);
1515   else if (value <= Value::LargestUInt(Value::maxInt))
1516     decoded = Value::LargestInt(value);
1517   else
1518     decoded = value;
1519   return true;
1520 }
1521
1522 bool OurReader::decodeDouble(Token& token) {
1523   Value decoded;
1524   if (!decodeDouble(token, decoded))
1525     return false;
1526   currentValue().swapPayload(decoded);
1527   currentValue().setOffsetStart(token.start_ - begin_);
1528   currentValue().setOffsetLimit(token.end_ - begin_);
1529   return true;
1530 }
1531
1532 bool OurReader::decodeDouble(Token& token, Value& decoded) {
1533   double value = 0;
1534   const int bufferSize = 32;
1535   int count;
1536   int length = int(token.end_ - token.start_);
1537
1538   // Sanity check to avoid buffer overflow exploits.
1539   if (length < 0) {
1540     return addError("Unable to parse token length", token);
1541   }
1542
1543   // Avoid using a string constant for the format control string given to
1544   // sscanf, as this can cause hard to debug crashes on OS X. See here for more
1545   // info:
1546   //
1547   //     http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
1548   char format[] = "%lf";
1549
1550   if (length <= bufferSize) {
1551     Char buffer[bufferSize + 1];
1552     memcpy(buffer, token.start_, length);
1553     buffer[length] = 0;
1554     count = sscanf(buffer, format, &value);
1555   } else {
1556     std::string buffer(token.start_, token.end_);
1557     count = sscanf(buffer.c_str(), format, &value);
1558   }
1559
1560   if (count != 1)
1561     return addError("'" + std::string(token.start_, token.end_) +
1562                         "' is not a number.",
1563                     token);
1564   decoded = value;
1565   return true;
1566 }
1567
1568 bool OurReader::decodeString(Token& token) {
1569   std::string decoded_string;
1570   if (!decodeString(token, decoded_string))
1571     return false;
1572   Value decoded(decoded_string);
1573   currentValue().swapPayload(decoded);
1574   currentValue().setOffsetStart(token.start_ - begin_);
1575   currentValue().setOffsetLimit(token.end_ - begin_);
1576   return true;
1577 }
1578
1579 bool OurReader::decodeString(Token& token, std::string& decoded) {
1580   decoded.reserve(token.end_ - token.start_ - 2);
1581   Location current = token.start_ + 1; // skip '"'
1582   Location end = token.end_ - 1;       // do not include '"'
1583   while (current != end) {
1584     Char c = *current++;
1585     if (c == '"')
1586       break;
1587     else if (c == '\\') {
1588       if (current == end)
1589         return addError("Empty escape sequence in string", token, current);
1590       Char escape = *current++;
1591       switch (escape) {
1592       case '"':
1593         decoded += '"';
1594         break;
1595       case '/':
1596         decoded += '/';
1597         break;
1598       case '\\':
1599         decoded += '\\';
1600         break;
1601       case 'b':
1602         decoded += '\b';
1603         break;
1604       case 'f':
1605         decoded += '\f';
1606         break;
1607       case 'n':
1608         decoded += '\n';
1609         break;
1610       case 'r':
1611         decoded += '\r';
1612         break;
1613       case 't':
1614         decoded += '\t';
1615         break;
1616       case 'u': {
1617         unsigned int unicode;
1618         if (!decodeUnicodeCodePoint(token, current, end, unicode))
1619           return false;
1620         decoded += codePointToUTF8(unicode);
1621       } break;
1622       default:
1623         return addError("Bad escape sequence in string", token, current);
1624       }
1625     } else {
1626       decoded += c;
1627     }
1628   }
1629   return true;
1630 }
1631
1632 bool OurReader::decodeUnicodeCodePoint(Token& token,
1633                                     Location& current,
1634                                     Location end,
1635                                     unsigned int& unicode) {
1636
1637   if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1638     return false;
1639   if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1640     // surrogate pairs
1641     if (end - current < 6)
1642       return addError(
1643           "additional six characters expected to parse unicode surrogate pair.",
1644           token,
1645           current);
1646     unsigned int surrogatePair;
1647     if (*(current++) == '\\' && *(current++) == 'u') {
1648       if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1649         unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1650       } else
1651         return false;
1652     } else
1653       return addError("expecting another \\u token to begin the second half of "
1654                       "a unicode surrogate pair",
1655                       token,
1656                       current);
1657   }
1658   return true;
1659 }
1660
1661 bool OurReader::decodeUnicodeEscapeSequence(Token& token,
1662                                          Location& current,
1663                                          Location end,
1664                                          unsigned int& unicode) {
1665   if (end - current < 4)
1666     return addError(
1667         "Bad unicode escape sequence in string: four digits expected.",
1668         token,
1669         current);
1670   unicode = 0;
1671   for (int index = 0; index < 4; ++index) {
1672     Char c = *current++;
1673     unicode *= 16;
1674     if (c >= '0' && c <= '9')
1675       unicode += c - '0';
1676     else if (c >= 'a' && c <= 'f')
1677       unicode += c - 'a' + 10;
1678     else if (c >= 'A' && c <= 'F')
1679       unicode += c - 'A' + 10;
1680     else
1681       return addError(
1682           "Bad unicode escape sequence in string: hexadecimal digit expected.",
1683           token,
1684           current);
1685   }
1686   return true;
1687 }
1688
1689 bool
1690 OurReader::addError(const std::string& message, Token& token, Location extra) {
1691   ErrorInfo info;
1692   info.token_ = token;
1693   info.message_ = message;
1694   info.extra_ = extra;
1695   errors_.push_back(info);
1696   return false;
1697 }
1698
1699 bool OurReader::recoverFromError(TokenType skipUntilToken) {
1700   int errorCount = int(errors_.size());
1701   Token skip;
1702   for (;;) {
1703     if (!readToken(skip))
1704       errors_.resize(errorCount); // discard errors caused by recovery
1705     if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1706       break;
1707   }
1708   errors_.resize(errorCount);
1709   return false;
1710 }
1711
1712 bool OurReader::addErrorAndRecover(const std::string& message,
1713                                 Token& token,
1714                                 TokenType skipUntilToken) {
1715   addError(message, token);
1716   return recoverFromError(skipUntilToken);
1717 }
1718
1719 Value& OurReader::currentValue() { return *(nodes_.top()); }
1720
1721 OurReader::Char OurReader::getNextChar() {
1722   if (current_ == end_)
1723     return 0;
1724   return *current_++;
1725 }
1726
1727 void OurReader::getLocationLineAndColumn(Location location,
1728                                       int& line,
1729                                       int& column) const {
1730   Location current = begin_;
1731   Location lastLineStart = current;
1732   line = 0;
1733   while (current < location && current != end_) {
1734     Char c = *current++;
1735     if (c == '\r') {
1736       if (*current == '\n')
1737         ++current;
1738       lastLineStart = current;
1739       ++line;
1740     } else if (c == '\n') {
1741       lastLineStart = current;
1742       ++line;
1743     }
1744   }
1745   // column & line start at 1
1746   column = int(location - lastLineStart) + 1;
1747   ++line;
1748 }
1749
1750 std::string OurReader::getLocationLineAndColumn(Location location) const {
1751   int line, column;
1752   getLocationLineAndColumn(location, line, column);
1753   char buffer[18 + 16 + 16 + 1];
1754 #if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__)
1755 #if defined(WINCE)
1756   _snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1757 #else
1758   sprintf_s(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1759 #endif
1760 #else
1761   snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1762 #endif
1763   return buffer;
1764 }
1765
1766 std::string OurReader::getFormattedErrorMessages() const {
1767   std::string formattedMessage;
1768   for (Errors::const_iterator itError = errors_.begin();
1769        itError != errors_.end();
1770        ++itError) {
1771     const ErrorInfo& error = *itError;
1772     formattedMessage +=
1773         "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1774     formattedMessage += "  " + error.message_ + "\n";
1775     if (error.extra_)
1776       formattedMessage +=
1777           "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1778   }
1779   return formattedMessage;
1780 }
1781
1782 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
1783   std::vector<OurReader::StructuredError> allErrors;
1784   for (Errors::const_iterator itError = errors_.begin();
1785        itError != errors_.end();
1786        ++itError) {
1787     const ErrorInfo& error = *itError;
1788     OurReader::StructuredError structured;
1789     structured.offset_start = error.token_.start_ - begin_;
1790     structured.offset_limit = error.token_.end_ - begin_;
1791     structured.message = error.message_;
1792     allErrors.push_back(structured);
1793   }
1794   return allErrors;
1795 }
1796
1797 bool OurReader::pushError(const Value& value, const std::string& message) {
1798   size_t length = end_ - begin_;
1799   if(value.getOffsetStart() > length
1800     || value.getOffsetLimit() > length)
1801     return false;
1802   Token token;
1803   token.type_ = tokenError;
1804   token.start_ = begin_ + value.getOffsetStart();
1805   token.end_ = end_ + value.getOffsetLimit();
1806   ErrorInfo info;
1807   info.token_ = token;
1808   info.message_ = message;
1809   info.extra_ = 0;
1810   errors_.push_back(info);
1811   return true;
1812 }
1813
1814 bool OurReader::pushError(const Value& value, const std::string& message, const Value& extra) {
1815   size_t length = end_ - begin_;
1816   if(value.getOffsetStart() > length
1817     || value.getOffsetLimit() > length
1818     || extra.getOffsetLimit() > length)
1819     return false;
1820   Token token;
1821   token.type_ = tokenError;
1822   token.start_ = begin_ + value.getOffsetStart();
1823   token.end_ = begin_ + value.getOffsetLimit();
1824   ErrorInfo info;
1825   info.token_ = token;
1826   info.message_ = message;
1827   info.extra_ = begin_ + extra.getOffsetStart();
1828   errors_.push_back(info);
1829   return true;
1830 }
1831
1832 bool OurReader::good() const {
1833   return !errors_.size();
1834 }
1835
1836
1837 class OurCharReader : public CharReader {
1838   bool const collectComments_;
1839   OurReader reader_;
1840 public:
1841   OurCharReader(
1842     bool collectComments,
1843     OurFeatures const& features)
1844   : collectComments_(collectComments)
1845   , reader_(features)
1846   {}
1847   virtual bool parse(
1848       char const* beginDoc, char const* endDoc,
1849       Value* root, std::string* errs) {
1850     bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1851     if (errs) {
1852       *errs = reader_.getFormattedErrorMessages();
1853     }
1854     return ok;
1855   }
1856 };
1857
1858 CharReaderBuilder::CharReaderBuilder()
1859 {
1860   setDefaults(&settings_);
1861 }
1862 CharReaderBuilder::~CharReaderBuilder()
1863 {}
1864 CharReader* CharReaderBuilder::newCharReader() const
1865 {
1866   bool collectComments = settings_["collectComments"].asBool();
1867   OurFeatures features = OurFeatures::all();
1868   features.allowComments_ = settings_["allowComments"].asBool();
1869   features.strictRoot_ = settings_["strictRoot"].asBool();
1870   features.allowDroppedNullPlaceholders_ = settings_["allowDroppedNullPlaceholders"].asBool();
1871   features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1872   features.stackLimit_ = settings_["stackLimit"].asInt();
1873   return new OurCharReader(collectComments, features);
1874 }
1875 static void getValidReaderKeys(std::set<std::string>* valid_keys)
1876 {
1877   valid_keys->clear();
1878   valid_keys->insert("collectComments");
1879   valid_keys->insert("allowComments");
1880   valid_keys->insert("strictRoot");
1881   valid_keys->insert("allowDroppedNullPlaceholders");
1882   valid_keys->insert("allowNumericKeys");
1883   valid_keys->insert("stackLimit");
1884 }
1885 bool CharReaderBuilder::validate(Json::Value* invalid) const
1886 {
1887   Json::Value my_invalid;
1888   if (!invalid) invalid = &my_invalid;  // so we do not need to test for NULL
1889   Json::Value& inv = *invalid;
1890   bool valid = true;
1891   std::set<std::string> valid_keys;
1892   getValidReaderKeys(&valid_keys);
1893   Value::Members keys = settings_.getMemberNames();
1894   size_t n = keys.size();
1895   for (size_t i = 0; i < n; ++i) {
1896     std::string const& key = keys[i];
1897     if (valid_keys.find(key) == valid_keys.end()) {
1898       inv[key] = settings_[key];
1899     }
1900   }
1901   return valid;
1902 }
1903 // static
1904 void CharReaderBuilder::strictMode(Json::Value* settings)
1905 {
1906 //! [CharReaderBuilderStrictMode]
1907   (*settings)["allowComments"] = false;
1908   (*settings)["strictRoot"] = true;
1909   (*settings)["allowDroppedNullPlaceholders"] = false;
1910   (*settings)["allowNumericKeys"] = false;
1911 //! [CharReaderBuilderStrictMode]
1912 }
1913 // static
1914 void CharReaderBuilder::setDefaults(Json::Value* settings)
1915 {
1916 //! [CharReaderBuilderDefaults]
1917   (*settings)["collectComments"] = true;
1918   (*settings)["allowComments"] = true;
1919   (*settings)["strictRoot"] = false;
1920   (*settings)["allowDroppedNullPlaceholders"] = false;
1921   (*settings)["allowNumericKeys"] = false;
1922   (*settings)["stackLimit"] = 1000;
1923 //! [CharReaderBuilderDefaults]
1924 }
1925
1926 //////////////////////////////////
1927 // global functions
1928
1929 bool parseFromStream(
1930     CharReader::Factory const& fact, std::istream& sin,
1931     Value* root, std::string* errs)
1932 {
1933   std::ostringstream ssin;
1934   ssin << sin.rdbuf();
1935   std::string doc = ssin.str();
1936   char const* begin = doc.data();
1937   char const* end = begin + doc.size();
1938   // Note that we do not actually need a null-terminator.
1939   CharReaderPtr const reader(fact.newCharReader());
1940   return reader->parse(begin, end, root, errs);
1941 }
1942
1943 std::istream& operator>>(std::istream& sin, Value& root) {
1944   CharReaderBuilder b;
1945   std::string errs;
1946   bool ok = parseFromStream(b, sin, &root, &errs);
1947   if (!ok) {
1948     fprintf(stderr,
1949             "Error from reader: %s",
1950             errs.c_str());
1951
1952     JSON_FAIL_MESSAGE("reader error");
1953   }
1954   return sin;
1955 }
1956
1957 } // namespace Json