1 // Copyright 2007-2011 Baptiste Lepilleur
2 // Distributed under MIT license, or public domain if desired and
3 // recognized in your jurisdiction.
4 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6 #if !defined(JSON_IS_AMALGAMATION)
7 #include <json/assertions.h>
8 #include <json/reader.h>
9 #include <json/value.h>
10 #include "json_tool.h"
11 #endif // if !defined(JSON_IS_AMALGAMATION)
22 #if defined(_MSC_VER) && _MSC_VER < 1500 // VC++ 8.0 and below
23 #define snprintf _snprintf
26 #if defined(_MSC_VER) && _MSC_VER >= 1400 // VC++ 8.0
27 // Disable warning about strdup being deprecated.
28 #pragma warning(disable : 4996)
31 static int const stackLimit_g = 1000;
32 static int stackDepth_g = 0; // see readValue()
36 #if __cplusplus >= 201103L
37 typedef std::unique_ptr<CharReader> CharReaderPtr;
39 typedef std::auto_ptr<CharReader> CharReaderPtr;
42 // Implementation of class Features
43 // ////////////////////////////////
46 : allowComments_(true), strictRoot_(false),
47 allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {}
49 Features Features::all() { return Features(); }
51 Features Features::strictMode() {
53 features.allowComments_ = false;
54 features.strictRoot_ = true;
55 features.allowDroppedNullPlaceholders_ = false;
56 features.allowNumericKeys_ = false;
60 // Implementation of class Reader
61 // ////////////////////////////////
63 static bool containsNewLine(Reader::Location begin, Reader::Location end) {
64 for (; begin < end; ++begin)
65 if (*begin == '\n' || *begin == '\r')
71 // //////////////////////////////////////////////////////////////////
74 : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
75 lastValue_(), commentsBefore_(), features_(Features::all()),
78 Reader::Reader(const Features& features)
79 : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
80 lastValue_(), commentsBefore_(), features_(features), collectComments_() {
84 Reader::parse(const std::string& document, Value& root, bool collectComments) {
86 const char* begin = document_.c_str();
87 const char* end = begin + document_.length();
88 return parse(begin, end, root, collectComments);
91 bool Reader::parse(std::istream& sin, Value& root, bool collectComments) {
92 // std::istream_iterator<char> begin(sin);
93 // std::istream_iterator<char> end;
94 // Those would allow streamed input from a file, if parse() were a
97 // Since std::string is reference-counted, this at least does not
98 // create an extra copy.
100 std::getline(sin, doc, (char)EOF);
101 return parse(doc, root, collectComments);
104 bool Reader::parse(const char* beginDoc,
107 bool collectComments) {
108 if (!features_.allowComments_) {
109 collectComments = false;
114 collectComments_ = collectComments;
118 commentsBefore_ = "";
120 while (!nodes_.empty())
124 stackDepth_g = 0; // Yes, this is bad coding, but options are limited.
125 bool successful = readValue();
127 skipCommentTokens(token);
128 if (collectComments_ && !commentsBefore_.empty())
129 root.setComment(commentsBefore_, commentAfter);
130 if (features_.strictRoot_) {
131 if (!root.isArray() && !root.isObject()) {
132 // Set error location to start of doc, ideally should be first token found
134 token.type_ = tokenError;
135 token.start_ = beginDoc;
138 "A valid JSON document must be either an array or an object value.",
146 bool Reader::readValue() {
147 // This is a non-reentrant way to support a stackLimit. Terrible!
148 // But this deprecated class has a security problem: Bad input can
149 // cause a seg-fault. This seems like a fair, binary-compatible way
150 // to prevent the problem.
151 if (stackDepth_g >= stackLimit_g) throw std::runtime_error("Exceeded stackLimit in readValue().");
155 skipCommentTokens(token);
156 bool successful = true;
158 if (collectComments_ && !commentsBefore_.empty()) {
159 currentValue().setComment(commentsBefore_, commentBefore);
160 commentsBefore_ = "";
163 switch (token.type_) {
164 case tokenObjectBegin:
165 successful = readObject(token);
166 currentValue().setOffsetLimit(current_ - begin_);
168 case tokenArrayBegin:
169 successful = readArray(token);
170 currentValue().setOffsetLimit(current_ - begin_);
173 successful = decodeNumber(token);
176 successful = decodeString(token);
181 currentValue().swapPayload(v);
182 currentValue().setOffsetStart(token.start_ - begin_);
183 currentValue().setOffsetLimit(token.end_ - begin_);
189 currentValue().swapPayload(v);
190 currentValue().setOffsetStart(token.start_ - begin_);
191 currentValue().setOffsetLimit(token.end_ - begin_);
197 currentValue().swapPayload(v);
198 currentValue().setOffsetStart(token.start_ - begin_);
199 currentValue().setOffsetLimit(token.end_ - begin_);
202 case tokenArraySeparator:
203 if (features_.allowDroppedNullPlaceholders_) {
204 // "Un-read" the current token and mark the current value as a null
208 currentValue().swapPayload(v);
209 currentValue().setOffsetStart(current_ - begin_ - 1);
210 currentValue().setOffsetLimit(current_ - begin_);
213 // Else, fall through...
215 currentValue().setOffsetStart(token.start_ - begin_);
216 currentValue().setOffsetLimit(token.end_ - begin_);
217 return addError("Syntax error: value, object or array expected.", token);
220 if (collectComments_) {
221 lastValueEnd_ = current_;
222 lastValue_ = ¤tValue();
229 void Reader::skipCommentTokens(Token& token) {
230 if (features_.allowComments_) {
233 } while (token.type_ == tokenComment);
239 bool Reader::readToken(Token& token) {
241 token.start_ = current_;
242 Char c = getNextChar();
246 token.type_ = tokenObjectBegin;
249 token.type_ = tokenObjectEnd;
252 token.type_ = tokenArrayBegin;
255 token.type_ = tokenArrayEnd;
258 token.type_ = tokenString;
262 token.type_ = tokenComment;
276 token.type_ = tokenNumber;
280 token.type_ = tokenTrue;
281 ok = match("rue", 3);
284 token.type_ = tokenFalse;
285 ok = match("alse", 4);
288 token.type_ = tokenNull;
289 ok = match("ull", 3);
292 token.type_ = tokenArraySeparator;
295 token.type_ = tokenMemberSeparator;
298 token.type_ = tokenEndOfStream;
305 token.type_ = tokenError;
306 token.end_ = current_;
310 void Reader::skipSpaces() {
311 while (current_ != end_) {
313 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
320 bool Reader::match(Location pattern, int patternLength) {
321 if (end_ - current_ < patternLength)
323 int index = patternLength;
325 if (current_[index] != pattern[index])
327 current_ += patternLength;
331 bool Reader::readComment() {
332 Location commentBegin = current_ - 1;
333 Char c = getNextChar();
334 bool successful = false;
336 successful = readCStyleComment();
338 successful = readCppStyleComment();
342 if (collectComments_) {
343 CommentPlacement placement = commentBefore;
344 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
345 if (c != '*' || !containsNewLine(commentBegin, current_))
346 placement = commentAfterOnSameLine;
349 addComment(commentBegin, current_, placement);
354 static std::string normalizeEOL(Reader::Location begin, Reader::Location end) {
355 std::string normalized;
356 normalized.reserve(end - begin);
357 Reader::Location current = begin;
358 while (current != end) {
361 if (current != end && *current == '\n')
374 Reader::addComment(Location begin, Location end, CommentPlacement placement) {
375 assert(collectComments_);
376 const std::string& normalized = normalizeEOL(begin, end);
377 if (placement == commentAfterOnSameLine) {
378 assert(lastValue_ != 0);
379 lastValue_->setComment(normalized, placement);
381 commentsBefore_ += normalized;
385 bool Reader::readCStyleComment() {
386 while (current_ != end_) {
387 Char c = getNextChar();
388 if (c == '*' && *current_ == '/')
391 return getNextChar() == '/';
394 bool Reader::readCppStyleComment() {
395 while (current_ != end_) {
396 Char c = getNextChar();
400 // Consume DOS EOL. It will be normalized in addComment.
401 if (current_ != end_ && *current_ == '\n')
403 // Break on Moc OS 9 EOL.
410 void Reader::readNumber() {
411 const char *p = current_;
412 char c = '0'; // stopgap for already consumed character
414 while (c >= '0' && c <= '9')
415 c = (current_ = p) < end_ ? *p++ : 0;
418 c = (current_ = p) < end_ ? *p++ : 0;
419 while (c >= '0' && c <= '9')
420 c = (current_ = p) < end_ ? *p++ : 0;
423 if (c == 'e' || c == 'E') {
424 c = (current_ = p) < end_ ? *p++ : 0;
425 if (c == '+' || c == '-')
426 c = (current_ = p) < end_ ? *p++ : 0;
427 while (c >= '0' && c <= '9')
428 c = (current_ = p) < end_ ? *p++ : 0;
432 bool Reader::readString() {
434 while (current_ != end_) {
444 bool Reader::readObject(Token& tokenStart) {
447 Value init(objectValue);
448 currentValue().swapPayload(init);
449 currentValue().setOffsetStart(tokenStart.start_ - begin_);
450 while (readToken(tokenName)) {
451 bool initialTokenOk = true;
452 while (tokenName.type_ == tokenComment && initialTokenOk)
453 initialTokenOk = readToken(tokenName);
456 if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
459 if (tokenName.type_ == tokenString) {
460 if (!decodeString(tokenName, name))
461 return recoverFromError(tokenObjectEnd);
462 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
464 if (!decodeNumber(tokenName, numberName))
465 return recoverFromError(tokenObjectEnd);
466 name = numberName.asString();
472 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
473 return addErrorAndRecover(
474 "Missing ':' after object member name", colon, tokenObjectEnd);
476 Value& value = currentValue()[name];
478 bool ok = readValue();
480 if (!ok) // error already set
481 return recoverFromError(tokenObjectEnd);
484 if (!readToken(comma) ||
485 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
486 comma.type_ != tokenComment)) {
487 return addErrorAndRecover(
488 "Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
490 bool finalizeTokenOk = true;
491 while (comma.type_ == tokenComment && finalizeTokenOk)
492 finalizeTokenOk = readToken(comma);
493 if (comma.type_ == tokenObjectEnd)
496 return addErrorAndRecover(
497 "Missing '}' or object member name", tokenName, tokenObjectEnd);
500 bool Reader::readArray(Token& tokenStart) {
501 Value init(arrayValue);
502 currentValue().swapPayload(init);
503 currentValue().setOffsetStart(tokenStart.start_ - begin_);
505 if (*current_ == ']') // empty array
513 Value& value = currentValue()[index++];
515 bool ok = readValue();
517 if (!ok) // error already set
518 return recoverFromError(tokenArrayEnd);
521 // Accept Comment after last item in the array.
522 ok = readToken(token);
523 while (token.type_ == tokenComment && ok) {
524 ok = readToken(token);
527 (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
528 if (!ok || badTokenType) {
529 return addErrorAndRecover(
530 "Missing ',' or ']' in array declaration", token, tokenArrayEnd);
532 if (token.type_ == tokenArrayEnd)
538 bool Reader::decodeNumber(Token& token) {
540 if (!decodeNumber(token, decoded))
542 currentValue().swapPayload(decoded);
543 currentValue().setOffsetStart(token.start_ - begin_);
544 currentValue().setOffsetLimit(token.end_ - begin_);
548 bool Reader::decodeNumber(Token& token, Value& decoded) {
549 // Attempts to parse the number as an integer. If the number is
550 // larger than the maximum supported value of an integer then
551 // we decode the number as a double.
552 Location current = token.start_;
553 bool isNegative = *current == '-';
556 // TODO: Help the compiler do the div and mod at compile time or get rid of them.
557 Value::LargestUInt maxIntegerValue =
558 isNegative ? Value::LargestUInt(-Value::minLargestInt)
559 : Value::maxLargestUInt;
560 Value::LargestUInt threshold = maxIntegerValue / 10;
561 Value::LargestUInt value = 0;
562 while (current < token.end_) {
564 if (c < '0' || c > '9')
565 return decodeDouble(token, decoded);
566 Value::UInt digit(c - '0');
567 if (value >= threshold) {
568 // We've hit or exceeded the max value divided by 10 (rounded down). If
569 // a) we've only just touched the limit, b) this is the last digit, and
570 // c) it's small enough to fit in that rounding delta, we're okay.
571 // Otherwise treat this number as a double to avoid overflow.
572 if (value > threshold || current != token.end_ ||
573 digit > maxIntegerValue % 10) {
574 return decodeDouble(token, decoded);
577 value = value * 10 + digit;
580 decoded = -Value::LargestInt(value);
581 else if (value <= Value::LargestUInt(Value::maxInt))
582 decoded = Value::LargestInt(value);
588 bool Reader::decodeDouble(Token& token) {
590 if (!decodeDouble(token, decoded))
592 currentValue().swapPayload(decoded);
593 currentValue().setOffsetStart(token.start_ - begin_);
594 currentValue().setOffsetLimit(token.end_ - begin_);
598 bool Reader::decodeDouble(Token& token, Value& decoded) {
600 const int bufferSize = 32;
602 int length = int(token.end_ - token.start_);
604 // Sanity check to avoid buffer overflow exploits.
606 return addError("Unable to parse token length", token);
609 // Avoid using a string constant for the format control string given to
610 // sscanf, as this can cause hard to debug crashes on OS X. See here for more
613 // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
614 char format[] = "%lf";
616 if (length <= bufferSize) {
617 Char buffer[bufferSize + 1];
618 memcpy(buffer, token.start_, length);
620 count = sscanf(buffer, format, &value);
622 std::string buffer(token.start_, token.end_);
623 count = sscanf(buffer.c_str(), format, &value);
627 return addError("'" + std::string(token.start_, token.end_) +
628 "' is not a number.",
634 bool Reader::decodeString(Token& token) {
635 std::string decoded_string;
636 if (!decodeString(token, decoded_string))
638 Value decoded(decoded_string);
639 currentValue().swapPayload(decoded);
640 currentValue().setOffsetStart(token.start_ - begin_);
641 currentValue().setOffsetLimit(token.end_ - begin_);
645 bool Reader::decodeString(Token& token, std::string& decoded) {
646 decoded.reserve(token.end_ - token.start_ - 2);
647 Location current = token.start_ + 1; // skip '"'
648 Location end = token.end_ - 1; // do not include '"'
649 while (current != end) {
653 else if (c == '\\') {
655 return addError("Empty escape sequence in string", token, current);
656 Char escape = *current++;
683 unsigned int unicode;
684 if (!decodeUnicodeCodePoint(token, current, end, unicode))
686 decoded += codePointToUTF8(unicode);
689 return addError("Bad escape sequence in string", token, current);
698 bool Reader::decodeUnicodeCodePoint(Token& token,
701 unsigned int& unicode) {
703 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
705 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
707 if (end - current < 6)
709 "additional six characters expected to parse unicode surrogate pair.",
712 unsigned int surrogatePair;
713 if (*(current++) == '\\' && *(current++) == 'u') {
714 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
715 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
719 return addError("expecting another \\u token to begin the second half of "
720 "a unicode surrogate pair",
727 bool Reader::decodeUnicodeEscapeSequence(Token& token,
730 unsigned int& unicode) {
731 if (end - current < 4)
733 "Bad unicode escape sequence in string: four digits expected.",
737 for (int index = 0; index < 4; ++index) {
740 if (c >= '0' && c <= '9')
742 else if (c >= 'a' && c <= 'f')
743 unicode += c - 'a' + 10;
744 else if (c >= 'A' && c <= 'F')
745 unicode += c - 'A' + 10;
748 "Bad unicode escape sequence in string: hexadecimal digit expected.",
756 Reader::addError(const std::string& message, Token& token, Location extra) {
759 info.message_ = message;
761 errors_.push_back(info);
765 bool Reader::recoverFromError(TokenType skipUntilToken) {
766 int errorCount = int(errors_.size());
769 if (!readToken(skip))
770 errors_.resize(errorCount); // discard errors caused by recovery
771 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
774 errors_.resize(errorCount);
778 bool Reader::addErrorAndRecover(const std::string& message,
780 TokenType skipUntilToken) {
781 addError(message, token);
782 return recoverFromError(skipUntilToken);
785 Value& Reader::currentValue() { return *(nodes_.top()); }
787 Reader::Char Reader::getNextChar() {
788 if (current_ == end_)
793 void Reader::getLocationLineAndColumn(Location location,
796 Location current = begin_;
797 Location lastLineStart = current;
799 while (current < location && current != end_) {
802 if (*current == '\n')
804 lastLineStart = current;
806 } else if (c == '\n') {
807 lastLineStart = current;
811 // column & line start at 1
812 column = int(location - lastLineStart) + 1;
816 std::string Reader::getLocationLineAndColumn(Location location) const {
818 getLocationLineAndColumn(location, line, column);
819 char buffer[18 + 16 + 16 + 1];
820 #if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__)
822 _snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
824 sprintf_s(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
827 snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
832 // Deprecated. Preserved for backward compatibility
833 std::string Reader::getFormatedErrorMessages() const {
834 return getFormattedErrorMessages();
837 std::string Reader::getFormattedErrorMessages() const {
838 std::string formattedMessage;
839 for (Errors::const_iterator itError = errors_.begin();
840 itError != errors_.end();
842 const ErrorInfo& error = *itError;
844 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
845 formattedMessage += " " + error.message_ + "\n";
848 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
850 return formattedMessage;
853 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
854 std::vector<Reader::StructuredError> allErrors;
855 for (Errors::const_iterator itError = errors_.begin();
856 itError != errors_.end();
858 const ErrorInfo& error = *itError;
859 Reader::StructuredError structured;
860 structured.offset_start = error.token_.start_ - begin_;
861 structured.offset_limit = error.token_.end_ - begin_;
862 structured.message = error.message_;
863 allErrors.push_back(structured);
868 bool Reader::pushError(const Value& value, const std::string& message) {
869 size_t length = end_ - begin_;
870 if(value.getOffsetStart() > length
871 || value.getOffsetLimit() > length)
874 token.type_ = tokenError;
875 token.start_ = begin_ + value.getOffsetStart();
876 token.end_ = end_ + value.getOffsetLimit();
879 info.message_ = message;
881 errors_.push_back(info);
885 bool Reader::pushError(const Value& value, const std::string& message, const Value& extra) {
886 size_t length = end_ - begin_;
887 if(value.getOffsetStart() > length
888 || value.getOffsetLimit() > length
889 || extra.getOffsetLimit() > length)
892 token.type_ = tokenError;
893 token.start_ = begin_ + value.getOffsetStart();
894 token.end_ = begin_ + value.getOffsetLimit();
897 info.message_ = message;
898 info.extra_ = begin_ + extra.getOffsetStart();
899 errors_.push_back(info);
903 bool Reader::good() const {
904 return !errors_.size();
907 // exact copy of Features
910 static OurFeatures all();
911 static OurFeatures strictMode();
915 bool allowDroppedNullPlaceholders_;
916 bool allowNumericKeys_;
920 // exact copy of Implementation of class Features
921 // ////////////////////////////////
923 OurFeatures::OurFeatures()
924 : allowComments_(true), strictRoot_(false),
925 allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {}
927 OurFeatures OurFeatures::all() { return OurFeatures(); }
929 OurFeatures OurFeatures::strictMode() {
930 OurFeatures features;
931 features.allowComments_ = false;
932 features.strictRoot_ = true;
933 features.allowDroppedNullPlaceholders_ = false;
934 features.allowNumericKeys_ = false;
938 // Implementation of class Reader
939 // ////////////////////////////////
941 // exact copy of Reader, renamed to OurReader
945 typedef const Char* Location;
946 struct StructuredError {
952 OurReader(OurFeatures const& features);
953 bool parse(const char* beginDoc,
956 bool collectComments = true);
957 std::string getFormattedErrorMessages() const;
958 std::vector<StructuredError> getStructuredErrors() const;
959 bool pushError(const Value& value, const std::string& message);
960 bool pushError(const Value& value, const std::string& message, const Value& extra);
964 OurReader(OurReader const&); // no impl
965 void operator=(OurReader const&); // no impl
968 tokenEndOfStream = 0,
979 tokenMemberSeparator,
994 std::string message_;
998 typedef std::deque<ErrorInfo> Errors;
1000 bool readToken(Token& token);
1002 bool match(Location pattern, int patternLength);
1004 bool readCStyleComment();
1005 bool readCppStyleComment();
1009 bool readObject(Token& token);
1010 bool readArray(Token& token);
1011 bool decodeNumber(Token& token);
1012 bool decodeNumber(Token& token, Value& decoded);
1013 bool decodeString(Token& token);
1014 bool decodeString(Token& token, std::string& decoded);
1015 bool decodeDouble(Token& token);
1016 bool decodeDouble(Token& token, Value& decoded);
1017 bool decodeUnicodeCodePoint(Token& token,
1020 unsigned int& unicode);
1021 bool decodeUnicodeEscapeSequence(Token& token,
1024 unsigned int& unicode);
1025 bool addError(const std::string& message, Token& token, Location extra = 0);
1026 bool recoverFromError(TokenType skipUntilToken);
1027 bool addErrorAndRecover(const std::string& message,
1029 TokenType skipUntilToken);
1030 void skipUntilSpace();
1031 Value& currentValue();
1034 getLocationLineAndColumn(Location location, int& line, int& column) const;
1035 std::string getLocationLineAndColumn(Location location) const;
1036 void addComment(Location begin, Location end, CommentPlacement placement);
1037 void skipCommentTokens(Token& token);
1039 typedef std::stack<Value*> Nodes;
1042 std::string document_;
1046 Location lastValueEnd_;
1048 std::string commentsBefore_;
1051 OurFeatures const features_;
1052 bool collectComments_;
1055 // complete copy of Read impl, for OurReader
1057 OurReader::OurReader(OurFeatures const& features)
1058 : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
1059 lastValue_(), commentsBefore_(), features_(features), collectComments_() {
1062 bool OurReader::parse(const char* beginDoc,
1065 bool collectComments) {
1066 if (!features_.allowComments_) {
1067 collectComments = false;
1072 collectComments_ = collectComments;
1076 commentsBefore_ = "";
1078 while (!nodes_.empty())
1083 bool successful = readValue();
1085 skipCommentTokens(token);
1086 if (collectComments_ && !commentsBefore_.empty())
1087 root.setComment(commentsBefore_, commentAfter);
1088 if (features_.strictRoot_) {
1089 if (!root.isArray() && !root.isObject()) {
1090 // Set error location to start of doc, ideally should be first token found
1092 token.type_ = tokenError;
1093 token.start_ = beginDoc;
1094 token.end_ = endDoc;
1096 "A valid JSON document must be either an array or an object value.",
1104 bool OurReader::readValue() {
1105 if (stackDepth_ >= features_.stackLimit_) throw std::runtime_error("Exceeded stackLimit in readValue().");
1108 skipCommentTokens(token);
1109 bool successful = true;
1111 if (collectComments_ && !commentsBefore_.empty()) {
1112 currentValue().setComment(commentsBefore_, commentBefore);
1113 commentsBefore_ = "";
1116 switch (token.type_) {
1117 case tokenObjectBegin:
1118 successful = readObject(token);
1119 currentValue().setOffsetLimit(current_ - begin_);
1121 case tokenArrayBegin:
1122 successful = readArray(token);
1123 currentValue().setOffsetLimit(current_ - begin_);
1126 successful = decodeNumber(token);
1129 successful = decodeString(token);
1134 currentValue().swapPayload(v);
1135 currentValue().setOffsetStart(token.start_ - begin_);
1136 currentValue().setOffsetLimit(token.end_ - begin_);
1142 currentValue().swapPayload(v);
1143 currentValue().setOffsetStart(token.start_ - begin_);
1144 currentValue().setOffsetLimit(token.end_ - begin_);
1150 currentValue().swapPayload(v);
1151 currentValue().setOffsetStart(token.start_ - begin_);
1152 currentValue().setOffsetLimit(token.end_ - begin_);
1155 case tokenArraySeparator:
1156 if (features_.allowDroppedNullPlaceholders_) {
1157 // "Un-read" the current token and mark the current value as a null
1161 currentValue().swapPayload(v);
1162 currentValue().setOffsetStart(current_ - begin_ - 1);
1163 currentValue().setOffsetLimit(current_ - begin_);
1166 // Else, fall through...
1168 currentValue().setOffsetStart(token.start_ - begin_);
1169 currentValue().setOffsetLimit(token.end_ - begin_);
1170 return addError("Syntax error: value, object or array expected.", token);
1173 if (collectComments_) {
1174 lastValueEnd_ = current_;
1175 lastValue_ = ¤tValue();
1182 void OurReader::skipCommentTokens(Token& token) {
1183 if (features_.allowComments_) {
1186 } while (token.type_ == tokenComment);
1192 bool OurReader::readToken(Token& token) {
1194 token.start_ = current_;
1195 Char c = getNextChar();
1199 token.type_ = tokenObjectBegin;
1202 token.type_ = tokenObjectEnd;
1205 token.type_ = tokenArrayBegin;
1208 token.type_ = tokenArrayEnd;
1211 token.type_ = tokenString;
1215 token.type_ = tokenComment;
1229 token.type_ = tokenNumber;
1233 token.type_ = tokenTrue;
1234 ok = match("rue", 3);
1237 token.type_ = tokenFalse;
1238 ok = match("alse", 4);
1241 token.type_ = tokenNull;
1242 ok = match("ull", 3);
1245 token.type_ = tokenArraySeparator;
1248 token.type_ = tokenMemberSeparator;
1251 token.type_ = tokenEndOfStream;
1258 token.type_ = tokenError;
1259 token.end_ = current_;
1263 void OurReader::skipSpaces() {
1264 while (current_ != end_) {
1266 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1273 bool OurReader::match(Location pattern, int patternLength) {
1274 if (end_ - current_ < patternLength)
1276 int index = patternLength;
1278 if (current_[index] != pattern[index])
1280 current_ += patternLength;
1284 bool OurReader::readComment() {
1285 Location commentBegin = current_ - 1;
1286 Char c = getNextChar();
1287 bool successful = false;
1289 successful = readCStyleComment();
1291 successful = readCppStyleComment();
1295 if (collectComments_) {
1296 CommentPlacement placement = commentBefore;
1297 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1298 if (c != '*' || !containsNewLine(commentBegin, current_))
1299 placement = commentAfterOnSameLine;
1302 addComment(commentBegin, current_, placement);
1308 OurReader::addComment(Location begin, Location end, CommentPlacement placement) {
1309 assert(collectComments_);
1310 const std::string& normalized = normalizeEOL(begin, end);
1311 if (placement == commentAfterOnSameLine) {
1312 assert(lastValue_ != 0);
1313 lastValue_->setComment(normalized, placement);
1315 commentsBefore_ += normalized;
1319 bool OurReader::readCStyleComment() {
1320 while (current_ != end_) {
1321 Char c = getNextChar();
1322 if (c == '*' && *current_ == '/')
1325 return getNextChar() == '/';
1328 bool OurReader::readCppStyleComment() {
1329 while (current_ != end_) {
1330 Char c = getNextChar();
1334 // Consume DOS EOL. It will be normalized in addComment.
1335 if (current_ != end_ && *current_ == '\n')
1337 // Break on Moc OS 9 EOL.
1344 void OurReader::readNumber() {
1345 const char *p = current_;
1346 char c = '0'; // stopgap for already consumed character
1348 while (c >= '0' && c <= '9')
1349 c = (current_ = p) < end_ ? *p++ : 0;
1352 c = (current_ = p) < end_ ? *p++ : 0;
1353 while (c >= '0' && c <= '9')
1354 c = (current_ = p) < end_ ? *p++ : 0;
1357 if (c == 'e' || c == 'E') {
1358 c = (current_ = p) < end_ ? *p++ : 0;
1359 if (c == '+' || c == '-')
1360 c = (current_ = p) < end_ ? *p++ : 0;
1361 while (c >= '0' && c <= '9')
1362 c = (current_ = p) < end_ ? *p++ : 0;
1366 bool OurReader::readString() {
1368 while (current_ != end_) {
1378 bool OurReader::readObject(Token& tokenStart) {
1381 Value init(objectValue);
1382 currentValue().swapPayload(init);
1383 currentValue().setOffsetStart(tokenStart.start_ - begin_);
1384 while (readToken(tokenName)) {
1385 bool initialTokenOk = true;
1386 while (tokenName.type_ == tokenComment && initialTokenOk)
1387 initialTokenOk = readToken(tokenName);
1388 if (!initialTokenOk)
1390 if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
1393 if (tokenName.type_ == tokenString) {
1394 if (!decodeString(tokenName, name))
1395 return recoverFromError(tokenObjectEnd);
1396 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1398 if (!decodeNumber(tokenName, numberName))
1399 return recoverFromError(tokenObjectEnd);
1400 name = numberName.asString();
1406 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1407 return addErrorAndRecover(
1408 "Missing ':' after object member name", colon, tokenObjectEnd);
1410 Value& value = currentValue()[name];
1411 nodes_.push(&value);
1412 bool ok = readValue();
1414 if (!ok) // error already set
1415 return recoverFromError(tokenObjectEnd);
1418 if (!readToken(comma) ||
1419 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1420 comma.type_ != tokenComment)) {
1421 return addErrorAndRecover(
1422 "Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
1424 bool finalizeTokenOk = true;
1425 while (comma.type_ == tokenComment && finalizeTokenOk)
1426 finalizeTokenOk = readToken(comma);
1427 if (comma.type_ == tokenObjectEnd)
1430 return addErrorAndRecover(
1431 "Missing '}' or object member name", tokenName, tokenObjectEnd);
1434 bool OurReader::readArray(Token& tokenStart) {
1435 Value init(arrayValue);
1436 currentValue().swapPayload(init);
1437 currentValue().setOffsetStart(tokenStart.start_ - begin_);
1439 if (*current_ == ']') // empty array
1442 readToken(endArray);
1447 Value& value = currentValue()[index++];
1448 nodes_.push(&value);
1449 bool ok = readValue();
1451 if (!ok) // error already set
1452 return recoverFromError(tokenArrayEnd);
1455 // Accept Comment after last item in the array.
1456 ok = readToken(token);
1457 while (token.type_ == tokenComment && ok) {
1458 ok = readToken(token);
1461 (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
1462 if (!ok || badTokenType) {
1463 return addErrorAndRecover(
1464 "Missing ',' or ']' in array declaration", token, tokenArrayEnd);
1466 if (token.type_ == tokenArrayEnd)
1472 bool OurReader::decodeNumber(Token& token) {
1474 if (!decodeNumber(token, decoded))
1476 currentValue().swapPayload(decoded);
1477 currentValue().setOffsetStart(token.start_ - begin_);
1478 currentValue().setOffsetLimit(token.end_ - begin_);
1482 bool OurReader::decodeNumber(Token& token, Value& decoded) {
1483 // Attempts to parse the number as an integer. If the number is
1484 // larger than the maximum supported value of an integer then
1485 // we decode the number as a double.
1486 Location current = token.start_;
1487 bool isNegative = *current == '-';
1490 // TODO: Help the compiler do the div and mod at compile time or get rid of them.
1491 Value::LargestUInt maxIntegerValue =
1492 isNegative ? Value::LargestUInt(-Value::minLargestInt)
1493 : Value::maxLargestUInt;
1494 Value::LargestUInt threshold = maxIntegerValue / 10;
1495 Value::LargestUInt value = 0;
1496 while (current < token.end_) {
1497 Char c = *current++;
1498 if (c < '0' || c > '9')
1499 return decodeDouble(token, decoded);
1500 Value::UInt digit(c - '0');
1501 if (value >= threshold) {
1502 // We've hit or exceeded the max value divided by 10 (rounded down). If
1503 // a) we've only just touched the limit, b) this is the last digit, and
1504 // c) it's small enough to fit in that rounding delta, we're okay.
1505 // Otherwise treat this number as a double to avoid overflow.
1506 if (value > threshold || current != token.end_ ||
1507 digit > maxIntegerValue % 10) {
1508 return decodeDouble(token, decoded);
1511 value = value * 10 + digit;
1514 decoded = -Value::LargestInt(value);
1515 else if (value <= Value::LargestUInt(Value::maxInt))
1516 decoded = Value::LargestInt(value);
1522 bool OurReader::decodeDouble(Token& token) {
1524 if (!decodeDouble(token, decoded))
1526 currentValue().swapPayload(decoded);
1527 currentValue().setOffsetStart(token.start_ - begin_);
1528 currentValue().setOffsetLimit(token.end_ - begin_);
1532 bool OurReader::decodeDouble(Token& token, Value& decoded) {
1534 const int bufferSize = 32;
1536 int length = int(token.end_ - token.start_);
1538 // Sanity check to avoid buffer overflow exploits.
1540 return addError("Unable to parse token length", token);
1543 // Avoid using a string constant for the format control string given to
1544 // sscanf, as this can cause hard to debug crashes on OS X. See here for more
1547 // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
1548 char format[] = "%lf";
1550 if (length <= bufferSize) {
1551 Char buffer[bufferSize + 1];
1552 memcpy(buffer, token.start_, length);
1554 count = sscanf(buffer, format, &value);
1556 std::string buffer(token.start_, token.end_);
1557 count = sscanf(buffer.c_str(), format, &value);
1561 return addError("'" + std::string(token.start_, token.end_) +
1562 "' is not a number.",
1568 bool OurReader::decodeString(Token& token) {
1569 std::string decoded_string;
1570 if (!decodeString(token, decoded_string))
1572 Value decoded(decoded_string);
1573 currentValue().swapPayload(decoded);
1574 currentValue().setOffsetStart(token.start_ - begin_);
1575 currentValue().setOffsetLimit(token.end_ - begin_);
1579 bool OurReader::decodeString(Token& token, std::string& decoded) {
1580 decoded.reserve(token.end_ - token.start_ - 2);
1581 Location current = token.start_ + 1; // skip '"'
1582 Location end = token.end_ - 1; // do not include '"'
1583 while (current != end) {
1584 Char c = *current++;
1587 else if (c == '\\') {
1589 return addError("Empty escape sequence in string", token, current);
1590 Char escape = *current++;
1617 unsigned int unicode;
1618 if (!decodeUnicodeCodePoint(token, current, end, unicode))
1620 decoded += codePointToUTF8(unicode);
1623 return addError("Bad escape sequence in string", token, current);
1632 bool OurReader::decodeUnicodeCodePoint(Token& token,
1635 unsigned int& unicode) {
1637 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1639 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1641 if (end - current < 6)
1643 "additional six characters expected to parse unicode surrogate pair.",
1646 unsigned int surrogatePair;
1647 if (*(current++) == '\\' && *(current++) == 'u') {
1648 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1649 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1653 return addError("expecting another \\u token to begin the second half of "
1654 "a unicode surrogate pair",
1661 bool OurReader::decodeUnicodeEscapeSequence(Token& token,
1664 unsigned int& unicode) {
1665 if (end - current < 4)
1667 "Bad unicode escape sequence in string: four digits expected.",
1671 for (int index = 0; index < 4; ++index) {
1672 Char c = *current++;
1674 if (c >= '0' && c <= '9')
1676 else if (c >= 'a' && c <= 'f')
1677 unicode += c - 'a' + 10;
1678 else if (c >= 'A' && c <= 'F')
1679 unicode += c - 'A' + 10;
1682 "Bad unicode escape sequence in string: hexadecimal digit expected.",
1690 OurReader::addError(const std::string& message, Token& token, Location extra) {
1692 info.token_ = token;
1693 info.message_ = message;
1694 info.extra_ = extra;
1695 errors_.push_back(info);
1699 bool OurReader::recoverFromError(TokenType skipUntilToken) {
1700 int errorCount = int(errors_.size());
1703 if (!readToken(skip))
1704 errors_.resize(errorCount); // discard errors caused by recovery
1705 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1708 errors_.resize(errorCount);
1712 bool OurReader::addErrorAndRecover(const std::string& message,
1714 TokenType skipUntilToken) {
1715 addError(message, token);
1716 return recoverFromError(skipUntilToken);
1719 Value& OurReader::currentValue() { return *(nodes_.top()); }
1721 OurReader::Char OurReader::getNextChar() {
1722 if (current_ == end_)
1727 void OurReader::getLocationLineAndColumn(Location location,
1729 int& column) const {
1730 Location current = begin_;
1731 Location lastLineStart = current;
1733 while (current < location && current != end_) {
1734 Char c = *current++;
1736 if (*current == '\n')
1738 lastLineStart = current;
1740 } else if (c == '\n') {
1741 lastLineStart = current;
1745 // column & line start at 1
1746 column = int(location - lastLineStart) + 1;
1750 std::string OurReader::getLocationLineAndColumn(Location location) const {
1752 getLocationLineAndColumn(location, line, column);
1753 char buffer[18 + 16 + 16 + 1];
1754 #if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__)
1756 _snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1758 sprintf_s(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1761 snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1766 std::string OurReader::getFormattedErrorMessages() const {
1767 std::string formattedMessage;
1768 for (Errors::const_iterator itError = errors_.begin();
1769 itError != errors_.end();
1771 const ErrorInfo& error = *itError;
1773 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1774 formattedMessage += " " + error.message_ + "\n";
1777 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1779 return formattedMessage;
1782 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
1783 std::vector<OurReader::StructuredError> allErrors;
1784 for (Errors::const_iterator itError = errors_.begin();
1785 itError != errors_.end();
1787 const ErrorInfo& error = *itError;
1788 OurReader::StructuredError structured;
1789 structured.offset_start = error.token_.start_ - begin_;
1790 structured.offset_limit = error.token_.end_ - begin_;
1791 structured.message = error.message_;
1792 allErrors.push_back(structured);
1797 bool OurReader::pushError(const Value& value, const std::string& message) {
1798 size_t length = end_ - begin_;
1799 if(value.getOffsetStart() > length
1800 || value.getOffsetLimit() > length)
1803 token.type_ = tokenError;
1804 token.start_ = begin_ + value.getOffsetStart();
1805 token.end_ = end_ + value.getOffsetLimit();
1807 info.token_ = token;
1808 info.message_ = message;
1810 errors_.push_back(info);
1814 bool OurReader::pushError(const Value& value, const std::string& message, const Value& extra) {
1815 size_t length = end_ - begin_;
1816 if(value.getOffsetStart() > length
1817 || value.getOffsetLimit() > length
1818 || extra.getOffsetLimit() > length)
1821 token.type_ = tokenError;
1822 token.start_ = begin_ + value.getOffsetStart();
1823 token.end_ = begin_ + value.getOffsetLimit();
1825 info.token_ = token;
1826 info.message_ = message;
1827 info.extra_ = begin_ + extra.getOffsetStart();
1828 errors_.push_back(info);
1832 bool OurReader::good() const {
1833 return !errors_.size();
1837 class OurCharReader : public CharReader {
1838 bool const collectComments_;
1842 bool collectComments,
1843 OurFeatures const& features)
1844 : collectComments_(collectComments)
1848 char const* beginDoc, char const* endDoc,
1849 Value* root, std::string* errs) {
1850 bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1852 *errs = reader_.getFormattedErrorMessages();
1858 CharReaderBuilder::CharReaderBuilder()
1860 setDefaults(&settings_);
1862 CharReaderBuilder::~CharReaderBuilder()
1864 CharReader* CharReaderBuilder::newCharReader() const
1866 bool collectComments = settings_["collectComments"].asBool();
1867 OurFeatures features = OurFeatures::all();
1868 features.allowComments_ = settings_["allowComments"].asBool();
1869 features.strictRoot_ = settings_["strictRoot"].asBool();
1870 features.allowDroppedNullPlaceholders_ = settings_["allowDroppedNullPlaceholders"].asBool();
1871 features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1872 features.stackLimit_ = settings_["stackLimit"].asInt();
1873 return new OurCharReader(collectComments, features);
1875 static void getValidReaderKeys(std::set<std::string>* valid_keys)
1877 valid_keys->clear();
1878 valid_keys->insert("collectComments");
1879 valid_keys->insert("allowComments");
1880 valid_keys->insert("strictRoot");
1881 valid_keys->insert("allowDroppedNullPlaceholders");
1882 valid_keys->insert("allowNumericKeys");
1883 valid_keys->insert("stackLimit");
1885 bool CharReaderBuilder::validate(Json::Value* invalid) const
1887 Json::Value my_invalid;
1888 if (!invalid) invalid = &my_invalid; // so we do not need to test for NULL
1889 Json::Value& inv = *invalid;
1891 std::set<std::string> valid_keys;
1892 getValidReaderKeys(&valid_keys);
1893 Value::Members keys = settings_.getMemberNames();
1894 size_t n = keys.size();
1895 for (size_t i = 0; i < n; ++i) {
1896 std::string const& key = keys[i];
1897 if (valid_keys.find(key) == valid_keys.end()) {
1898 inv[key] = settings_[key];
1904 void CharReaderBuilder::strictMode(Json::Value* settings)
1906 //! [CharReaderBuilderStrictMode]
1907 (*settings)["allowComments"] = false;
1908 (*settings)["strictRoot"] = true;
1909 (*settings)["allowDroppedNullPlaceholders"] = false;
1910 (*settings)["allowNumericKeys"] = false;
1911 //! [CharReaderBuilderStrictMode]
1914 void CharReaderBuilder::setDefaults(Json::Value* settings)
1916 //! [CharReaderBuilderDefaults]
1917 (*settings)["collectComments"] = true;
1918 (*settings)["allowComments"] = true;
1919 (*settings)["strictRoot"] = false;
1920 (*settings)["allowDroppedNullPlaceholders"] = false;
1921 (*settings)["allowNumericKeys"] = false;
1922 (*settings)["stackLimit"] = 1000;
1923 //! [CharReaderBuilderDefaults]
1926 //////////////////////////////////
1929 bool parseFromStream(
1930 CharReader::Factory const& fact, std::istream& sin,
1931 Value* root, std::string* errs)
1933 std::ostringstream ssin;
1934 ssin << sin.rdbuf();
1935 std::string doc = ssin.str();
1936 char const* begin = doc.data();
1937 char const* end = begin + doc.size();
1938 // Note that we do not actually need a null-terminator.
1939 CharReaderPtr const reader(fact.newCharReader());
1940 return reader->parse(begin, end, root, errs);
1943 std::istream& operator>>(std::istream& sin, Value& root) {
1944 CharReaderBuilder b;
1946 bool ok = parseFromStream(b, sin, &root, &errs);
1949 "Error from reader: %s",
1952 JSON_FAIL_MESSAGE("reader error");