1 // Tencent is pleased to support the open source community by making RapidJSON available.
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
8 // http://opensource.org/licenses/MIT
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
15 #ifndef RAPIDJSON_READER_H_
16 #define RAPIDJSON_READER_H_
20 #include "allocators.h"
22 #include "encodedstream.h"
23 #include "internal/clzll.h"
24 #include "internal/meta.h"
25 #include "internal/stack.h"
26 #include "internal/strtod.h"
29 #if defined(RAPIDJSON_SIMD) && defined(_MSC_VER)
31 #pragma intrinsic(_BitScanForward)
33 #ifdef RAPIDJSON_SSE42
34 #include <nmmintrin.h>
35 #elif defined(RAPIDJSON_SSE2)
36 #include <emmintrin.h>
37 #elif defined(RAPIDJSON_NEON)
43 RAPIDJSON_DIAG_OFF(old-style-cast)
44 RAPIDJSON_DIAG_OFF(padded)
45 RAPIDJSON_DIAG_OFF(switch-enum)
46 #elif defined(_MSC_VER)
48 RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
49 RAPIDJSON_DIAG_OFF(4702) // unreachable code
54 RAPIDJSON_DIAG_OFF(effc++)
57 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
58 #define RAPIDJSON_NOTHING /* deliberately empty */
59 #ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN
60 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \
61 RAPIDJSON_MULTILINEMACRO_BEGIN \
62 if (RAPIDJSON_UNLIKELY(HasParseError())) { return value; } \
63 RAPIDJSON_MULTILINEMACRO_END
65 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \
66 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING)
69 /*! \def RAPIDJSON_PARSE_ERROR_NORETURN
70 \ingroup RAPIDJSON_ERRORS
71 \brief Macro to indicate a parse error.
72 \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
73 \param offset position of the error in JSON input (\c size_t)
75 This macros can be used as a customization point for the internal
76 error handling mechanism of RapidJSON.
78 A common usage model is to throw an exception instead of requiring the
79 caller to explicitly check the \ref rapidjson::GenericReader::Parse's
83 #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode,offset) \
84 throw ParseException(parseErrorCode, #parseErrorCode, offset)
86 #include <stdexcept> // std::runtime_error
87 #include "rapidjson/error/error.h" // rapidjson::ParseResult
89 struct ParseException : std::runtime_error, rapidjson::ParseResult {
90 ParseException(rapidjson::ParseErrorCode code, const char* msg, size_t offset)
91 : std::runtime_error(msg), ParseResult(code, offset) {}
94 #include "rapidjson/reader.h"
97 \see RAPIDJSON_PARSE_ERROR, rapidjson::GenericReader::Parse
99 #ifndef RAPIDJSON_PARSE_ERROR_NORETURN
100 #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \
101 RAPIDJSON_MULTILINEMACRO_BEGIN \
102 RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \
103 SetParseError(parseErrorCode, offset); \
104 RAPIDJSON_MULTILINEMACRO_END
107 /*! \def RAPIDJSON_PARSE_ERROR
108 \ingroup RAPIDJSON_ERRORS
109 \brief (Internal) macro to indicate and handle a parse error.
110 \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
111 \param offset position of the error in JSON input (\c size_t)
113 Invokes RAPIDJSON_PARSE_ERROR_NORETURN and stops the parsing.
115 \see RAPIDJSON_PARSE_ERROR_NORETURN
118 #ifndef RAPIDJSON_PARSE_ERROR
119 #define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \
120 RAPIDJSON_MULTILINEMACRO_BEGIN \
121 RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \
122 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \
123 RAPIDJSON_MULTILINEMACRO_END
126 #include "error/error.h" // ParseErrorCode, ParseResult
128 RAPIDJSON_NAMESPACE_BEGIN
130 ///////////////////////////////////////////////////////////////////////////////
133 /*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS
134 \ingroup RAPIDJSON_CONFIG
135 \brief User-defined kParseDefaultFlags definition.
137 User can define this as any \c ParseFlag combinations.
139 #ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS
140 #define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags
143 //! Combination of parseFlags
144 /*! \see Reader::Parse, Document::Parse, Document::ParseInsitu, Document::ParseStream
147 kParseNoFlags = 0, //!< No flags are set.
148 kParseInsituFlag = 1, //!< In-situ(destructive) parsing.
149 kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings.
150 kParseIterativeFlag = 4, //!< Iterative(constant complexity in terms of function call stack size) parsing.
151 kParseStopWhenDoneFlag = 8, //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error.
152 kParseFullPrecisionFlag = 16, //!< Parse number in full precision (but slower).
153 kParseCommentsFlag = 32, //!< Allow one-line (//) and multi-line (/**/) comments.
154 kParseNumbersAsStringsFlag = 64, //!< Parse all numbers (ints/doubles) as strings.
155 kParseTrailingCommasFlag = 128, //!< Allow trailing commas at the end of objects and arrays.
156 kParseNanAndInfFlag = 256, //!< Allow parsing NaN, Inf, Infinity, -Inf and -Infinity as doubles.
157 kParseEscapedApostropheFlag = 512, //!< Allow escaped apostrophe in strings.
158 kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS
161 ///////////////////////////////////////////////////////////////////////////////
164 /*! \class rapidjson::Handler
165 \brief Concept for receiving events from GenericReader upon parsing.
166 The functions return true if no error occurs. If they return false,
167 the event publisher should terminate the process.
175 bool Uint(unsigned i);
176 bool Int64(int64_t i);
177 bool Uint64(uint64_t i);
178 bool Double(double d);
179 /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
180 bool RawNumber(const Ch* str, SizeType length, bool copy);
181 bool String(const Ch* str, SizeType length, bool copy);
183 bool Key(const Ch* str, SizeType length, bool copy);
184 bool EndObject(SizeType memberCount);
186 bool EndArray(SizeType elementCount);
190 ///////////////////////////////////////////////////////////////////////////////
193 //! Default implementation of Handler.
194 /*! This can be used as base class of any reader handler.
195 \note implements Handler concept
197 template<typename Encoding = UTF8<>, typename Derived = void>
198 struct BaseReaderHandler {
199 typedef typename Encoding::Ch Ch;
201 typedef typename internal::SelectIf<internal::IsSame<Derived, void>, BaseReaderHandler, Derived>::Type Override;
203 bool Default() { return true; }
204 bool Null() { return static_cast<Override&>(*this).Default(); }
205 bool Bool(bool) { return static_cast<Override&>(*this).Default(); }
206 bool Int(int) { return static_cast<Override&>(*this).Default(); }
207 bool Uint(unsigned) { return static_cast<Override&>(*this).Default(); }
208 bool Int64(int64_t) { return static_cast<Override&>(*this).Default(); }
209 bool Uint64(uint64_t) { return static_cast<Override&>(*this).Default(); }
210 bool Double(double) { return static_cast<Override&>(*this).Default(); }
211 /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
212 bool RawNumber(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
213 bool String(const Ch*, SizeType, bool) { return static_cast<Override&>(*this).Default(); }
214 bool StartObject() { return static_cast<Override&>(*this).Default(); }
215 bool Key(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
216 bool EndObject(SizeType) { return static_cast<Override&>(*this).Default(); }
217 bool StartArray() { return static_cast<Override&>(*this).Default(); }
218 bool EndArray(SizeType) { return static_cast<Override&>(*this).Default(); }
221 ///////////////////////////////////////////////////////////////////////////////
226 template<typename Stream, int = StreamTraits<Stream>::copyOptimization>
227 class StreamLocalCopy;
229 //! Do copy optimization.
230 template<typename Stream>
231 class StreamLocalCopy<Stream, 1> {
233 StreamLocalCopy(Stream& original) : s(original), original_(original) {}
234 ~StreamLocalCopy() { original_ = s; }
239 StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
245 template<typename Stream>
246 class StreamLocalCopy<Stream, 0> {
248 StreamLocalCopy(Stream& original) : s(original) {}
253 StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
256 } // namespace internal
258 ///////////////////////////////////////////////////////////////////////////////
261 //! Skip the JSON white spaces in a stream.
262 /*! \param is A input stream for skipping white spaces.
263 \note This function has SSE2/SSE4.2 specialization.
265 template<typename InputStream>
266 void SkipWhitespace(InputStream& is) {
267 internal::StreamLocalCopy<InputStream> copy(is);
268 InputStream& s(copy.s);
270 typename InputStream::Ch c;
271 while ((c = s.Peek()) == ' ' || c == '\n' || c == '\r' || c == '\t')
275 inline const char* SkipWhitespace(const char* p, const char* end) {
276 while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
281 #ifdef RAPIDJSON_SSE42
282 //! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
283 inline const char *SkipWhitespace_SIMD(const char* p) {
284 // Fast return for single non-whitespace
285 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
290 // 16-byte align to the next boundary
291 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
292 while (p != nextAligned)
293 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
298 // The rest of string using SIMD
299 static const char whitespace[16] = " \n\r\t";
300 const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
303 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
304 const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY);
305 if (r != 16) // some of characters is non-whitespace
310 inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
311 // Fast return for single non-whitespace
312 if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
317 // The middle of string using SIMD
318 static const char whitespace[16] = " \n\r\t";
319 const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
321 for (; p <= end - 16; p += 16) {
322 const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
323 const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY);
324 if (r != 16) // some of characters is non-whitespace
328 return SkipWhitespace(p, end);
331 #elif defined(RAPIDJSON_SSE2)
333 //! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once.
334 inline const char *SkipWhitespace_SIMD(const char* p) {
335 // Fast return for single non-whitespace
336 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
341 // 16-byte align to the next boundary
342 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
343 while (p != nextAligned)
344 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
349 // The rest of string
350 #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
351 static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
354 const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
355 const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
356 const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
357 const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
360 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
361 __m128i x = _mm_cmpeq_epi8(s, w0);
362 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
363 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
364 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
365 unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
366 if (r != 0) { // some of characters may be non-whitespace
367 #ifdef _MSC_VER // Find the index of first non-whitespace
368 unsigned long offset;
369 _BitScanForward(&offset, r);
372 return p + __builtin_ffs(r) - 1;
378 inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
379 // Fast return for single non-whitespace
380 if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
385 // The rest of string
386 #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
387 static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
390 const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
391 const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
392 const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
393 const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
395 for (; p <= end - 16; p += 16) {
396 const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
397 __m128i x = _mm_cmpeq_epi8(s, w0);
398 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
399 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
400 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
401 unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
402 if (r != 0) { // some of characters may be non-whitespace
403 #ifdef _MSC_VER // Find the index of first non-whitespace
404 unsigned long offset;
405 _BitScanForward(&offset, r);
408 return p + __builtin_ffs(r) - 1;
413 return SkipWhitespace(p, end);
416 #elif defined(RAPIDJSON_NEON)
418 //! Skip whitespace with ARM Neon instructions, testing 16 8-byte characters at once.
419 inline const char *SkipWhitespace_SIMD(const char* p) {
420 // Fast return for single non-whitespace
421 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
426 // 16-byte align to the next boundary
427 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
428 while (p != nextAligned)
429 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
434 const uint8x16_t w0 = vmovq_n_u8(' ');
435 const uint8x16_t w1 = vmovq_n_u8('\n');
436 const uint8x16_t w2 = vmovq_n_u8('\r');
437 const uint8x16_t w3 = vmovq_n_u8('\t');
440 const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
441 uint8x16_t x = vceqq_u8(s, w0);
442 x = vorrq_u8(x, vceqq_u8(s, w1));
443 x = vorrq_u8(x, vceqq_u8(s, w2));
444 x = vorrq_u8(x, vceqq_u8(s, w3));
446 x = vmvnq_u8(x); // Negate
447 x = vrev64q_u8(x); // Rev in 64
448 uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
449 uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
453 uint32_t lz = internal::clzll(high);
454 return p + 8 + (lz >> 3);
457 uint32_t lz = internal::clzll(low);
458 return p + (lz >> 3);
463 inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
464 // Fast return for single non-whitespace
465 if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
470 const uint8x16_t w0 = vmovq_n_u8(' ');
471 const uint8x16_t w1 = vmovq_n_u8('\n');
472 const uint8x16_t w2 = vmovq_n_u8('\r');
473 const uint8x16_t w3 = vmovq_n_u8('\t');
475 for (; p <= end - 16; p += 16) {
476 const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
477 uint8x16_t x = vceqq_u8(s, w0);
478 x = vorrq_u8(x, vceqq_u8(s, w1));
479 x = vorrq_u8(x, vceqq_u8(s, w2));
480 x = vorrq_u8(x, vceqq_u8(s, w3));
482 x = vmvnq_u8(x); // Negate
483 x = vrev64q_u8(x); // Rev in 64
484 uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
485 uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
489 uint32_t lz = internal::clzll(high);
490 return p + 8 + (lz >> 3);
493 uint32_t lz = internal::clzll(low);
494 return p + (lz >> 3);
498 return SkipWhitespace(p, end);
501 #endif // RAPIDJSON_NEON
503 #ifdef RAPIDJSON_SIMD
504 //! Template function specialization for InsituStringStream
505 template<> inline void SkipWhitespace(InsituStringStream& is) {
506 is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_));
509 //! Template function specialization for StringStream
510 template<> inline void SkipWhitespace(StringStream& is) {
511 is.src_ = SkipWhitespace_SIMD(is.src_);
514 template<> inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream>& is) {
515 is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_);
517 #endif // RAPIDJSON_SIMD
519 ///////////////////////////////////////////////////////////////////////////////
522 //! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator.
523 /*! GenericReader parses JSON text from a stream, and send events synchronously to an
524 object implementing Handler concept.
526 It needs to allocate a stack for storing a single decoded string during
527 non-destructive parsing.
529 For in-situ parsing, the decoded string is directly written to the source
530 text string, no temporary buffer is required.
532 A GenericReader object can be reused for parsing multiple JSON text.
534 \tparam SourceEncoding Encoding of the input stream.
535 \tparam TargetEncoding Encoding of the parse output.
536 \tparam StackAllocator Allocator type for stack.
538 template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator = CrtAllocator>
539 class GenericReader {
541 typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type
544 /*! \param stackAllocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing)
545 \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing)
547 GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) :
548 stack_(stackAllocator, stackCapacity), parseResult_(), state_(IterativeParsingStartState) {}
551 /*! \tparam parseFlags Combination of \ref ParseFlag.
552 \tparam InputStream Type of input stream, implementing Stream concept.
553 \tparam Handler Type of handler, implementing Handler concept.
554 \param is Input stream to be parsed.
555 \param handler The handler to receive events.
556 \return Whether the parsing is successful.
558 template <unsigned parseFlags, typename InputStream, typename Handler>
559 ParseResult Parse(InputStream& is, Handler& handler) {
560 if (parseFlags & kParseIterativeFlag)
561 return IterativeParse<parseFlags>(is, handler);
563 parseResult_.Clear();
565 ClearStackOnExit scope(*this);
567 SkipWhitespaceAndComments<parseFlags>(is);
568 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
570 if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) {
571 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell());
572 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
575 ParseValue<parseFlags>(is, handler);
576 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
578 if (!(parseFlags & kParseStopWhenDoneFlag)) {
579 SkipWhitespaceAndComments<parseFlags>(is);
580 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
582 if (RAPIDJSON_UNLIKELY(is.Peek() != '\0')) {
583 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell());
584 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
592 //! Parse JSON text (with \ref kParseDefaultFlags)
593 /*! \tparam InputStream Type of input stream, implementing Stream concept
594 \tparam Handler Type of handler, implementing Handler concept.
595 \param is Input stream to be parsed.
596 \param handler The handler to receive events.
597 \return Whether the parsing is successful.
599 template <typename InputStream, typename Handler>
600 ParseResult Parse(InputStream& is, Handler& handler) {
601 return Parse<kParseDefaultFlags>(is, handler);
604 //! Initialize JSON text token-by-token parsing
607 void IterativeParseInit() {
608 parseResult_.Clear();
609 state_ = IterativeParsingStartState;
612 //! Parse one token from JSON text
613 /*! \tparam InputStream Type of input stream, implementing Stream concept
614 \tparam Handler Type of handler, implementing Handler concept.
615 \param is Input stream to be parsed.
616 \param handler The handler to receive events.
617 \return Whether the parsing is successful.
619 template <unsigned parseFlags, typename InputStream, typename Handler>
620 bool IterativeParseNext(InputStream& is, Handler& handler) {
621 while (RAPIDJSON_LIKELY(is.Peek() != '\0')) {
622 SkipWhitespaceAndComments<parseFlags>(is);
624 Token t = Tokenize(is.Peek());
625 IterativeParsingState n = Predict(state_, t);
626 IterativeParsingState d = Transit<parseFlags>(state_, t, n, is, handler);
628 // If we've finished or hit an error...
629 if (RAPIDJSON_UNLIKELY(IsIterativeParsingCompleteState(d))) {
631 if (d == IterativeParsingErrorState) {
632 HandleError(state_, is);
636 // Transition to the finish state.
637 RAPIDJSON_ASSERT(d == IterativeParsingFinishState);
640 // If StopWhenDone is not set...
641 if (!(parseFlags & kParseStopWhenDoneFlag)) {
642 // ... and extra non-whitespace data is found...
643 SkipWhitespaceAndComments<parseFlags>(is);
644 if (is.Peek() != '\0') {
645 // ... this is considered an error.
646 HandleError(state_, is);
651 // Success! We are done!
655 // Transition to the new state.
658 // If we parsed anything other than a delimiter, we invoked the handler, so we can return true now.
659 if (!IsIterativeParsingDelimiterState(n))
663 // We reached the end of file.
666 if (state_ != IterativeParsingFinishState) {
667 HandleError(state_, is);
674 //! Check if token-by-token parsing JSON text is complete
675 /*! \return Whether the JSON has been fully decoded.
677 RAPIDJSON_FORCEINLINE bool IterativeParseComplete() const {
678 return IsIterativeParsingCompleteState(state_);
681 //! Whether a parse error has occurred in the last parsing.
682 bool HasParseError() const { return parseResult_.IsError(); }
684 //! Get the \ref ParseErrorCode of last parsing.
685 ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); }
687 //! Get the position of last parsing error in input, 0 otherwise.
688 size_t GetErrorOffset() const { return parseResult_.Offset(); }
691 void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); }
694 // Prohibit copy constructor & assignment operator.
695 GenericReader(const GenericReader&);
696 GenericReader& operator=(const GenericReader&);
698 void ClearStack() { stack_.Clear(); }
700 // clear stack on any exit from ParseStream, e.g. due to exception
701 struct ClearStackOnExit {
702 explicit ClearStackOnExit(GenericReader& r) : r_(r) {}
703 ~ClearStackOnExit() { r_.ClearStack(); }
706 ClearStackOnExit(const ClearStackOnExit&);
707 ClearStackOnExit& operator=(const ClearStackOnExit&);
710 template<unsigned parseFlags, typename InputStream>
711 void SkipWhitespaceAndComments(InputStream& is) {
714 if (parseFlags & kParseCommentsFlag) {
715 while (RAPIDJSON_UNLIKELY(Consume(is, '/'))) {
716 if (Consume(is, '*')) {
718 if (RAPIDJSON_UNLIKELY(is.Peek() == '\0'))
719 RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
720 else if (Consume(is, '*')) {
721 if (Consume(is, '/'))
728 else if (RAPIDJSON_LIKELY(Consume(is, '/')))
729 while (is.Peek() != '\0' && is.Take() != '\n') {}
731 RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
738 // Parse object: { string : value, ... }
739 template<unsigned parseFlags, typename InputStream, typename Handler>
740 void ParseObject(InputStream& is, Handler& handler) {
741 RAPIDJSON_ASSERT(is.Peek() == '{');
742 is.Take(); // Skip '{'
744 if (RAPIDJSON_UNLIKELY(!handler.StartObject()))
745 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
747 SkipWhitespaceAndComments<parseFlags>(is);
748 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
750 if (Consume(is, '}')) {
751 if (RAPIDJSON_UNLIKELY(!handler.EndObject(0))) // empty object
752 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
756 for (SizeType memberCount = 0;;) {
757 if (RAPIDJSON_UNLIKELY(is.Peek() != '"'))
758 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell());
760 ParseString<parseFlags>(is, handler, true);
761 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
763 SkipWhitespaceAndComments<parseFlags>(is);
764 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
766 if (RAPIDJSON_UNLIKELY(!Consume(is, ':')))
767 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell());
769 SkipWhitespaceAndComments<parseFlags>(is);
770 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
772 ParseValue<parseFlags>(is, handler);
773 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
775 SkipWhitespaceAndComments<parseFlags>(is);
776 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
783 SkipWhitespaceAndComments<parseFlags>(is);
784 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
788 if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
789 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
792 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); break; // This useless break is only for making warning and coverage happy
795 if (parseFlags & kParseTrailingCommasFlag) {
796 if (is.Peek() == '}') {
797 if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
798 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
806 // Parse array: [ value, ... ]
807 template<unsigned parseFlags, typename InputStream, typename Handler>
808 void ParseArray(InputStream& is, Handler& handler) {
809 RAPIDJSON_ASSERT(is.Peek() == '[');
810 is.Take(); // Skip '['
812 if (RAPIDJSON_UNLIKELY(!handler.StartArray()))
813 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
815 SkipWhitespaceAndComments<parseFlags>(is);
816 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
818 if (Consume(is, ']')) {
819 if (RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array
820 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
824 for (SizeType elementCount = 0;;) {
825 ParseValue<parseFlags>(is, handler);
826 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
829 SkipWhitespaceAndComments<parseFlags>(is);
830 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
832 if (Consume(is, ',')) {
833 SkipWhitespaceAndComments<parseFlags>(is);
834 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
836 else if (Consume(is, ']')) {
837 if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
838 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
842 RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell());
844 if (parseFlags & kParseTrailingCommasFlag) {
845 if (is.Peek() == ']') {
846 if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
847 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
855 template<unsigned parseFlags, typename InputStream, typename Handler>
856 void ParseNull(InputStream& is, Handler& handler) {
857 RAPIDJSON_ASSERT(is.Peek() == 'n');
860 if (RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') && Consume(is, 'l'))) {
861 if (RAPIDJSON_UNLIKELY(!handler.Null()))
862 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
865 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
868 template<unsigned parseFlags, typename InputStream, typename Handler>
869 void ParseTrue(InputStream& is, Handler& handler) {
870 RAPIDJSON_ASSERT(is.Peek() == 't');
873 if (RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') && Consume(is, 'e'))) {
874 if (RAPIDJSON_UNLIKELY(!handler.Bool(true)))
875 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
878 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
881 template<unsigned parseFlags, typename InputStream, typename Handler>
882 void ParseFalse(InputStream& is, Handler& handler) {
883 RAPIDJSON_ASSERT(is.Peek() == 'f');
886 if (RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') && Consume(is, 's') && Consume(is, 'e'))) {
887 if (RAPIDJSON_UNLIKELY(!handler.Bool(false)))
888 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
891 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
894 template<typename InputStream>
895 RAPIDJSON_FORCEINLINE static bool Consume(InputStream& is, typename InputStream::Ch expect) {
896 if (RAPIDJSON_LIKELY(is.Peek() == expect)) {
904 // Helper function to parse four hexadecimal digits in \uXXXX in ParseString().
905 template<typename InputStream>
906 unsigned ParseHex4(InputStream& is, size_t escapeOffset) {
907 unsigned codepoint = 0;
908 for (int i = 0; i < 4; i++) {
911 codepoint += static_cast<unsigned>(c);
912 if (c >= '0' && c <= '9')
914 else if (c >= 'A' && c <= 'F')
915 codepoint -= 'A' - 10;
916 else if (c >= 'a' && c <= 'f')
917 codepoint -= 'a' - 10;
919 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, escapeOffset);
920 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0);
927 template <typename CharType>
932 StackStream(internal::Stack<StackAllocator>& stack) : stack_(stack), length_(0) {}
933 RAPIDJSON_FORCEINLINE void Put(Ch c) {
934 *stack_.template Push<Ch>() = c;
938 RAPIDJSON_FORCEINLINE void* Push(SizeType count) {
940 return stack_.template Push<Ch>(count);
943 size_t Length() const { return length_; }
946 return stack_.template Pop<Ch>(length_);
950 StackStream(const StackStream&);
951 StackStream& operator=(const StackStream&);
953 internal::Stack<StackAllocator>& stack_;
957 // Parse string and generate String event. Different code paths for kParseInsituFlag.
958 template<unsigned parseFlags, typename InputStream, typename Handler>
959 void ParseString(InputStream& is, Handler& handler, bool isKey = false) {
960 internal::StreamLocalCopy<InputStream> copy(is);
961 InputStream& s(copy.s);
963 RAPIDJSON_ASSERT(s.Peek() == '\"');
964 s.Take(); // Skip '\"'
966 bool success = false;
967 if (parseFlags & kParseInsituFlag) {
968 typename InputStream::Ch *head = s.PutBegin();
969 ParseStringToStream<parseFlags, SourceEncoding, SourceEncoding>(s, s);
970 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
971 size_t length = s.PutEnd(head) - 1;
972 RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
973 const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
974 success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false));
977 StackStream<typename TargetEncoding::Ch> stackStream(stack_);
978 ParseStringToStream<parseFlags, SourceEncoding, TargetEncoding>(s, stackStream);
979 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
980 SizeType length = static_cast<SizeType>(stackStream.Length()) - 1;
981 const typename TargetEncoding::Ch* const str = stackStream.Pop();
982 success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true));
984 if (RAPIDJSON_UNLIKELY(!success))
985 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell());
988 // Parse string to an output is
989 // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation.
990 template<unsigned parseFlags, typename SEncoding, typename TEncoding, typename InputStream, typename OutputStream>
991 RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) {
992 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
993 #define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
994 static const char escape[256] = {
995 Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '/',
996 Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0,
997 0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0,
998 0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
999 Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16
1005 // Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation.
1006 if (!(parseFlags & kParseValidateEncodingFlag))
1007 ScanCopyUnescapedString(is, os);
1010 if (RAPIDJSON_UNLIKELY(c == '\\')) { // Escape
1011 size_t escapeOffset = is.Tell(); // For invalid escaping, report the initial '\\' as error offset
1014 if ((sizeof(Ch) == 1 || unsigned(e) < 256) && RAPIDJSON_LIKELY(escape[static_cast<unsigned char>(e)])) {
1016 os.Put(static_cast<typename TEncoding::Ch>(escape[static_cast<unsigned char>(e)]));
1018 else if ((parseFlags & kParseEscapedApostropheFlag) && RAPIDJSON_LIKELY(e == '\'')) { // Allow escaped apostrophe
1022 else if (RAPIDJSON_LIKELY(e == 'u')) { // Unicode
1024 unsigned codepoint = ParseHex4(is, escapeOffset);
1025 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
1026 if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDFFF)) {
1027 // high surrogate, check if followed by valid low surrogate
1028 if (RAPIDJSON_LIKELY(codepoint <= 0xDBFF)) {
1029 // Handle UTF-16 surrogate pair
1030 if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
1031 RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
1032 unsigned codepoint2 = ParseHex4(is, escapeOffset);
1033 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
1034 if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
1035 RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
1036 codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
1038 // single low surrogate
1041 RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
1044 TEncoding::Encode(os, codepoint);
1047 RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, escapeOffset);
1049 else if (RAPIDJSON_UNLIKELY(c == '"')) { // Closing double quote
1051 os.Put('\0'); // null-terminate the string
1054 else if (RAPIDJSON_UNLIKELY(static_cast<unsigned>(c) < 0x20)) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
1056 RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell());
1058 RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, is.Tell());
1061 size_t offset = is.Tell();
1062 if (RAPIDJSON_UNLIKELY((parseFlags & kParseValidateEncodingFlag ?
1063 !Transcoder<SEncoding, TEncoding>::Validate(is, os) :
1064 !Transcoder<SEncoding, TEncoding>::Transcode(is, os))))
1065 RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, offset);
1070 template<typename InputStream, typename OutputStream>
1071 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream&, OutputStream&) {
1072 // Do nothing for generic version
1075 #if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
1076 // StringStream -> StackStream<char>
1077 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
1078 const char* p = is.src_;
1080 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1081 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1082 while (p != nextAligned)
1083 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1090 // The rest of string using SIMD
1091 static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
1092 static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
1093 static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
1094 const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
1095 const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
1096 const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
1099 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
1100 const __m128i t1 = _mm_cmpeq_epi8(s, dq);
1101 const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1102 const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
1103 const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
1104 unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
1105 if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
1107 #ifdef _MSC_VER // Find the index of first escaped
1108 unsigned long offset;
1109 _BitScanForward(&offset, r);
1112 length = static_cast<SizeType>(__builtin_ffs(r) - 1);
1115 char* q = reinterpret_cast<char*>(os.Push(length));
1116 for (size_t i = 0; i < length; i++)
1123 _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s);
1129 // InsituStringStream -> InsituStringStream
1130 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
1131 RAPIDJSON_ASSERT(&is == &os);
1134 if (is.src_ == is.dst_) {
1135 SkipUnescapedString(is);
1142 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1143 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1144 while (p != nextAligned)
1145 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1153 // The rest of string using SIMD
1154 static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
1155 static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
1156 static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
1157 const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
1158 const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
1159 const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
1161 for (;; p += 16, q += 16) {
1162 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
1163 const __m128i t1 = _mm_cmpeq_epi8(s, dq);
1164 const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1165 const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
1166 const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
1167 unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
1168 if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
1170 #ifdef _MSC_VER // Find the index of first escaped
1171 unsigned long offset;
1172 _BitScanForward(&offset, r);
1175 length = static_cast<size_t>(__builtin_ffs(r) - 1);
1177 for (const char* pend = p + length; p != pend; )
1181 _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s);
1188 // When read/write pointers are the same for insitu stream, just skip unescaped characters
1189 static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
1190 RAPIDJSON_ASSERT(is.src_ == is.dst_);
1193 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1194 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1195 for (; p != nextAligned; p++)
1196 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1197 is.src_ = is.dst_ = p;
1201 // The rest of string using SIMD
1202 static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
1203 static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
1204 static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
1205 const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
1206 const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
1207 const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
1210 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
1211 const __m128i t1 = _mm_cmpeq_epi8(s, dq);
1212 const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1213 const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
1214 const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
1215 unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
1216 if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
1218 #ifdef _MSC_VER // Find the index of first escaped
1219 unsigned long offset;
1220 _BitScanForward(&offset, r);
1223 length = static_cast<size_t>(__builtin_ffs(r) - 1);
1230 is.src_ = is.dst_ = p;
1232 #elif defined(RAPIDJSON_NEON)
1233 // StringStream -> StackStream<char>
1234 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
1235 const char* p = is.src_;
1237 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1238 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1239 while (p != nextAligned)
1240 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1247 // The rest of string using SIMD
1248 const uint8x16_t s0 = vmovq_n_u8('"');
1249 const uint8x16_t s1 = vmovq_n_u8('\\');
1250 const uint8x16_t s2 = vmovq_n_u8('\b');
1251 const uint8x16_t s3 = vmovq_n_u8(32);
1254 const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
1255 uint8x16_t x = vceqq_u8(s, s0);
1256 x = vorrq_u8(x, vceqq_u8(s, s1));
1257 x = vorrq_u8(x, vceqq_u8(s, s2));
1258 x = vorrq_u8(x, vcltq_u8(s, s3));
1260 x = vrev64q_u8(x); // Rev in 64
1261 uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
1262 uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
1264 SizeType length = 0;
1265 bool escaped = false;
1268 uint32_t lz = internal::clzll(high);
1269 length = 8 + (lz >> 3);
1273 uint32_t lz = internal::clzll(low);
1277 if (RAPIDJSON_UNLIKELY(escaped)) { // some of characters is escaped
1279 char* q = reinterpret_cast<char*>(os.Push(length));
1280 for (size_t i = 0; i < length; i++)
1287 vst1q_u8(reinterpret_cast<uint8_t *>(os.Push(16)), s);
1293 // InsituStringStream -> InsituStringStream
1294 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
1295 RAPIDJSON_ASSERT(&is == &os);
1298 if (is.src_ == is.dst_) {
1299 SkipUnescapedString(is);
1306 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1307 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1308 while (p != nextAligned)
1309 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1317 // The rest of string using SIMD
1318 const uint8x16_t s0 = vmovq_n_u8('"');
1319 const uint8x16_t s1 = vmovq_n_u8('\\');
1320 const uint8x16_t s2 = vmovq_n_u8('\b');
1321 const uint8x16_t s3 = vmovq_n_u8(32);
1323 for (;; p += 16, q += 16) {
1324 const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p));
1325 uint8x16_t x = vceqq_u8(s, s0);
1326 x = vorrq_u8(x, vceqq_u8(s, s1));
1327 x = vorrq_u8(x, vceqq_u8(s, s2));
1328 x = vorrq_u8(x, vcltq_u8(s, s3));
1330 x = vrev64q_u8(x); // Rev in 64
1331 uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
1332 uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
1334 SizeType length = 0;
1335 bool escaped = false;
1338 uint32_t lz = internal::clzll(high);
1339 length = 8 + (lz >> 3);
1343 uint32_t lz = internal::clzll(low);
1347 if (RAPIDJSON_UNLIKELY(escaped)) { // some of characters is escaped
1348 for (const char* pend = p + length; p != pend; ) {
1353 vst1q_u8(reinterpret_cast<uint8_t *>(q), s);
1360 // When read/write pointers are the same for insitu stream, just skip unescaped characters
1361 static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
1362 RAPIDJSON_ASSERT(is.src_ == is.dst_);
1365 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1366 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1367 for (; p != nextAligned; p++)
1368 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1369 is.src_ = is.dst_ = p;
1373 // The rest of string using SIMD
1374 const uint8x16_t s0 = vmovq_n_u8('"');
1375 const uint8x16_t s1 = vmovq_n_u8('\\');
1376 const uint8x16_t s2 = vmovq_n_u8('\b');
1377 const uint8x16_t s3 = vmovq_n_u8(32);
1380 const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p));
1381 uint8x16_t x = vceqq_u8(s, s0);
1382 x = vorrq_u8(x, vceqq_u8(s, s1));
1383 x = vorrq_u8(x, vceqq_u8(s, s2));
1384 x = vorrq_u8(x, vcltq_u8(s, s3));
1386 x = vrev64q_u8(x); // Rev in 64
1387 uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
1388 uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
1392 uint32_t lz = internal::clzll(high);
1397 uint32_t lz = internal::clzll(low);
1403 is.src_ = is.dst_ = p;
1405 #endif // RAPIDJSON_NEON
1407 template<typename InputStream, bool backup, bool pushOnTake>
1410 template<typename InputStream>
1411 class NumberStream<InputStream, false, false> {
1413 typedef typename InputStream::Ch Ch;
1415 NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader; }
1417 RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); }
1418 RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
1419 RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); }
1420 RAPIDJSON_FORCEINLINE void Push(char) {}
1422 size_t Tell() { return is.Tell(); }
1423 size_t Length() { return 0; }
1424 const char* Pop() { return 0; }
1427 NumberStream& operator=(const NumberStream&);
1432 template<typename InputStream>
1433 class NumberStream<InputStream, true, false> : public NumberStream<InputStream, false, false> {
1434 typedef NumberStream<InputStream, false, false> Base;
1436 NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is), stackStream(reader.stack_) {}
1438 RAPIDJSON_FORCEINLINE Ch TakePush() {
1439 stackStream.Put(static_cast<char>(Base::is.Peek()));
1440 return Base::is.Take();
1443 RAPIDJSON_FORCEINLINE void Push(char c) {
1447 size_t Length() { return stackStream.Length(); }
1450 stackStream.Put('\0');
1451 return stackStream.Pop();
1455 StackStream<char> stackStream;
1458 template<typename InputStream>
1459 class NumberStream<InputStream, true, true> : public NumberStream<InputStream, true, false> {
1460 typedef NumberStream<InputStream, true, false> Base;
1462 NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is) {}
1464 RAPIDJSON_FORCEINLINE Ch Take() { return Base::TakePush(); }
1467 template<unsigned parseFlags, typename InputStream, typename Handler>
1468 void ParseNumber(InputStream& is, Handler& handler) {
1469 internal::StreamLocalCopy<InputStream> copy(is);
1470 NumberStream<InputStream,
1471 ((parseFlags & kParseNumbersAsStringsFlag) != 0) ?
1472 ((parseFlags & kParseInsituFlag) == 0) :
1473 ((parseFlags & kParseFullPrecisionFlag) != 0),
1474 (parseFlags & kParseNumbersAsStringsFlag) != 0 &&
1475 (parseFlags & kParseInsituFlag) == 0> s(*this, copy.s);
1477 size_t startOffset = s.Tell();
1479 bool useNanOrInf = false;
1482 bool minus = Consume(s, '-');
1484 // Parse int: zero / ( digit1-9 *DIGIT )
1487 bool use64bit = false;
1488 int significandDigit = 0;
1489 if (RAPIDJSON_UNLIKELY(s.Peek() == '0')) {
1493 else if (RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9')) {
1494 i = static_cast<unsigned>(s.TakePush() - '0');
1497 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1498 if (RAPIDJSON_UNLIKELY(i >= 214748364)) { // 2^31 = 2147483648
1499 if (RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8')) {
1505 i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
1509 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1510 if (RAPIDJSON_UNLIKELY(i >= 429496729)) { // 2^32 - 1 = 4294967295
1511 if (RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5')) {
1517 i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
1521 // Parse NaN or Infinity here
1522 else if ((parseFlags & kParseNanAndInfFlag) && RAPIDJSON_LIKELY((s.Peek() == 'I' || s.Peek() == 'N'))) {
1523 if (Consume(s, 'N')) {
1524 if (Consume(s, 'a') && Consume(s, 'N')) {
1525 d = std::numeric_limits<double>::quiet_NaN();
1529 else if (RAPIDJSON_LIKELY(Consume(s, 'I'))) {
1530 if (Consume(s, 'n') && Consume(s, 'f')) {
1531 d = (minus ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity());
1534 if (RAPIDJSON_UNLIKELY(s.Peek() == 'i' && !(Consume(s, 'i') && Consume(s, 'n')
1535 && Consume(s, 'i') && Consume(s, 't') && Consume(s, 'y')))) {
1536 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
1541 if (RAPIDJSON_UNLIKELY(!useNanOrInf)) {
1542 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
1546 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
1549 bool useDouble = false;
1552 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1553 if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC))) // 2^63 = 9223372036854775808
1554 if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8')) {
1555 d = static_cast<double>(i64);
1559 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1563 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1564 if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999))) // 2^64 - 1 = 18446744073709551615
1565 if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5')) {
1566 d = static_cast<double>(i64);
1570 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1575 // Force double for big integer
1577 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1578 d = d * 10 + (s.TakePush() - '0');
1582 // Parse frac = decimal-point 1*DIGIT
1584 size_t decimalPosition;
1585 if (Consume(s, '.')) {
1586 decimalPosition = s.Length();
1588 if (RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9')))
1589 RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell());
1593 // Use i64 to store significand in 64-bit architecture
1597 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1598 if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path
1601 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1608 d = static_cast<double>(i64);
1610 // Use double to store significand in 32-bit architecture
1611 d = static_cast<double>(use64bit ? i64 : i);
1616 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1617 if (significandDigit < 17) {
1618 d = d * 10.0 + (s.TakePush() - '0');
1620 if (RAPIDJSON_LIKELY(d > 0.0))
1628 decimalPosition = s.Length(); // decimal position at the end of integer.
1630 // Parse exp = e [ minus / plus ] 1*DIGIT
1632 if (Consume(s, 'e') || Consume(s, 'E')) {
1634 d = static_cast<double>(use64bit ? i64 : i);
1638 bool expMinus = false;
1639 if (Consume(s, '+'))
1641 else if (Consume(s, '-'))
1644 if (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1645 exp = static_cast<int>(s.Take() - '0');
1647 // (exp + expFrac) must not underflow int => we're detecting when -exp gets
1648 // dangerously close to INT_MIN (a pessimistic next digit 9 would push it into
1649 // underflow territory):
1651 // -(exp * 10 + 9) + expFrac >= INT_MIN
1652 // <=> exp <= (expFrac - INT_MIN - 9) / 10
1653 RAPIDJSON_ASSERT(expFrac <= 0);
1654 int maxExp = (expFrac + 2147483639) / 10;
1656 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1657 exp = exp * 10 + static_cast<int>(s.Take() - '0');
1658 if (RAPIDJSON_UNLIKELY(exp > maxExp)) {
1659 while (RAPIDJSON_UNLIKELY(s.Peek() >= '0' && s.Peek() <= '9')) // Consume the rest of exponent
1664 else { // positive exp
1665 int maxExp = 308 - expFrac;
1666 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1667 exp = exp * 10 + static_cast<int>(s.Take() - '0');
1668 if (RAPIDJSON_UNLIKELY(exp > maxExp))
1669 RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
1674 RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissExponent, s.Tell());
1680 // Finish parsing, call event according to the type of number.
1683 if (parseFlags & kParseNumbersAsStringsFlag) {
1684 if (parseFlags & kParseInsituFlag) {
1685 s.Pop(); // Pop stack no matter if it will be used or not.
1686 typename InputStream::Ch* head = is.PutBegin();
1687 const size_t length = s.Tell() - startOffset;
1688 RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
1689 // unable to insert the \0 character here, it will erase the comma after this number
1690 const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
1691 cont = handler.RawNumber(str, SizeType(length), false);
1694 SizeType numCharsToCopy = static_cast<SizeType>(s.Length());
1695 StringStream srcStream(s.Pop());
1696 StackStream<typename TargetEncoding::Ch> dstStream(stack_);
1697 while (numCharsToCopy--) {
1698 Transcoder<UTF8<>, TargetEncoding>::Transcode(srcStream, dstStream);
1700 dstStream.Put('\0');
1701 const typename TargetEncoding::Ch* str = dstStream.Pop();
1702 const SizeType length = static_cast<SizeType>(dstStream.Length()) - 1;
1703 cont = handler.RawNumber(str, SizeType(length), true);
1707 size_t length = s.Length();
1708 const char* decimal = s.Pop(); // Pop stack no matter if it will be used or not.
1711 int p = exp + expFrac;
1712 if (parseFlags & kParseFullPrecisionFlag)
1713 d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp);
1715 d = internal::StrtodNormalPrecision(d, p);
1717 // Use > max, instead of == inf, to fix bogus warning -Wfloat-equal
1718 if (d > (std::numeric_limits<double>::max)()) {
1720 // TODO: internal::StrtodX should report overflow (or underflow)
1721 RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
1724 cont = handler.Double(minus ? -d : d);
1726 else if (useNanOrInf) {
1727 cont = handler.Double(d);
1732 cont = handler.Int64(static_cast<int64_t>(~i64 + 1));
1734 cont = handler.Uint64(i64);
1738 cont = handler.Int(static_cast<int32_t>(~i + 1));
1740 cont = handler.Uint(i);
1744 if (RAPIDJSON_UNLIKELY(!cont))
1745 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, startOffset);
1748 // Parse any JSON value
1749 template<unsigned parseFlags, typename InputStream, typename Handler>
1750 void ParseValue(InputStream& is, Handler& handler) {
1751 switch (is.Peek()) {
1752 case 'n': ParseNull <parseFlags>(is, handler); break;
1753 case 't': ParseTrue <parseFlags>(is, handler); break;
1754 case 'f': ParseFalse <parseFlags>(is, handler); break;
1755 case '"': ParseString<parseFlags>(is, handler); break;
1756 case '{': ParseObject<parseFlags>(is, handler); break;
1757 case '[': ParseArray <parseFlags>(is, handler); break;
1759 ParseNumber<parseFlags>(is, handler);
1765 // Iterative Parsing
1768 enum IterativeParsingState {
1769 IterativeParsingFinishState = 0, // sink states at top
1770 IterativeParsingErrorState, // sink states at top
1771 IterativeParsingStartState,
1774 IterativeParsingObjectInitialState,
1775 IterativeParsingMemberKeyState,
1776 IterativeParsingMemberValueState,
1777 IterativeParsingObjectFinishState,
1780 IterativeParsingArrayInitialState,
1781 IterativeParsingElementState,
1782 IterativeParsingArrayFinishState,
1784 // Single value state
1785 IterativeParsingValueState,
1787 // Delimiter states (at bottom)
1788 IterativeParsingElementDelimiterState,
1789 IterativeParsingMemberDelimiterState,
1790 IterativeParsingKeyValueDelimiterState,
1792 cIterativeParsingStateCount
1797 LeftBracketToken = 0,
1800 LeftCurlyBracketToken,
1801 RightCurlyBracketToken,
1815 RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) const {
1817 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
1818 #define N NumberToken
1819 #define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N
1820 // Maps from ASCII to Token
1821 static const unsigned char tokenMap[256] = {
1824 N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F
1825 N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F
1827 N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F
1828 N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F
1829 N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F
1830 N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF
1836 if (sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256)
1837 return static_cast<Token>(tokenMap[static_cast<unsigned char>(c)]);
1842 RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) const {
1843 // current state x one lookahead token -> new state
1844 static const char G[cIterativeParsingStateCount][kTokenCount] = {
1845 // Finish(sink state)
1847 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1848 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1849 IterativeParsingErrorState
1851 // Error(sink state)
1853 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1854 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1855 IterativeParsingErrorState
1859 IterativeParsingArrayInitialState, // Left bracket
1860 IterativeParsingErrorState, // Right bracket
1861 IterativeParsingObjectInitialState, // Left curly bracket
1862 IterativeParsingErrorState, // Right curly bracket
1863 IterativeParsingErrorState, // Comma
1864 IterativeParsingErrorState, // Colon
1865 IterativeParsingValueState, // String
1866 IterativeParsingValueState, // False
1867 IterativeParsingValueState, // True
1868 IterativeParsingValueState, // Null
1869 IterativeParsingValueState // Number
1873 IterativeParsingErrorState, // Left bracket
1874 IterativeParsingErrorState, // Right bracket
1875 IterativeParsingErrorState, // Left curly bracket
1876 IterativeParsingObjectFinishState, // Right curly bracket
1877 IterativeParsingErrorState, // Comma
1878 IterativeParsingErrorState, // Colon
1879 IterativeParsingMemberKeyState, // String
1880 IterativeParsingErrorState, // False
1881 IterativeParsingErrorState, // True
1882 IterativeParsingErrorState, // Null
1883 IterativeParsingErrorState // Number
1887 IterativeParsingErrorState, // Left bracket
1888 IterativeParsingErrorState, // Right bracket
1889 IterativeParsingErrorState, // Left curly bracket
1890 IterativeParsingErrorState, // Right curly bracket
1891 IterativeParsingErrorState, // Comma
1892 IterativeParsingKeyValueDelimiterState, // Colon
1893 IterativeParsingErrorState, // String
1894 IterativeParsingErrorState, // False
1895 IterativeParsingErrorState, // True
1896 IterativeParsingErrorState, // Null
1897 IterativeParsingErrorState // Number
1901 IterativeParsingErrorState, // Left bracket
1902 IterativeParsingErrorState, // Right bracket
1903 IterativeParsingErrorState, // Left curly bracket
1904 IterativeParsingObjectFinishState, // Right curly bracket
1905 IterativeParsingMemberDelimiterState, // Comma
1906 IterativeParsingErrorState, // Colon
1907 IterativeParsingErrorState, // String
1908 IterativeParsingErrorState, // False
1909 IterativeParsingErrorState, // True
1910 IterativeParsingErrorState, // Null
1911 IterativeParsingErrorState // Number
1913 // ObjectFinish(sink state)
1915 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1916 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1917 IterativeParsingErrorState
1921 IterativeParsingArrayInitialState, // Left bracket(push Element state)
1922 IterativeParsingArrayFinishState, // Right bracket
1923 IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
1924 IterativeParsingErrorState, // Right curly bracket
1925 IterativeParsingErrorState, // Comma
1926 IterativeParsingErrorState, // Colon
1927 IterativeParsingElementState, // String
1928 IterativeParsingElementState, // False
1929 IterativeParsingElementState, // True
1930 IterativeParsingElementState, // Null
1931 IterativeParsingElementState // Number
1935 IterativeParsingErrorState, // Left bracket
1936 IterativeParsingArrayFinishState, // Right bracket
1937 IterativeParsingErrorState, // Left curly bracket
1938 IterativeParsingErrorState, // Right curly bracket
1939 IterativeParsingElementDelimiterState, // Comma
1940 IterativeParsingErrorState, // Colon
1941 IterativeParsingErrorState, // String
1942 IterativeParsingErrorState, // False
1943 IterativeParsingErrorState, // True
1944 IterativeParsingErrorState, // Null
1945 IterativeParsingErrorState // Number
1947 // ArrayFinish(sink state)
1949 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1950 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1951 IterativeParsingErrorState
1953 // Single Value (sink state)
1955 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1956 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1957 IterativeParsingErrorState
1961 IterativeParsingArrayInitialState, // Left bracket(push Element state)
1962 IterativeParsingArrayFinishState, // Right bracket
1963 IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
1964 IterativeParsingErrorState, // Right curly bracket
1965 IterativeParsingErrorState, // Comma
1966 IterativeParsingErrorState, // Colon
1967 IterativeParsingElementState, // String
1968 IterativeParsingElementState, // False
1969 IterativeParsingElementState, // True
1970 IterativeParsingElementState, // Null
1971 IterativeParsingElementState // Number
1975 IterativeParsingErrorState, // Left bracket
1976 IterativeParsingErrorState, // Right bracket
1977 IterativeParsingErrorState, // Left curly bracket
1978 IterativeParsingObjectFinishState, // Right curly bracket
1979 IterativeParsingErrorState, // Comma
1980 IterativeParsingErrorState, // Colon
1981 IterativeParsingMemberKeyState, // String
1982 IterativeParsingErrorState, // False
1983 IterativeParsingErrorState, // True
1984 IterativeParsingErrorState, // Null
1985 IterativeParsingErrorState // Number
1987 // KeyValueDelimiter
1989 IterativeParsingArrayInitialState, // Left bracket(push MemberValue state)
1990 IterativeParsingErrorState, // Right bracket
1991 IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state)
1992 IterativeParsingErrorState, // Right curly bracket
1993 IterativeParsingErrorState, // Comma
1994 IterativeParsingErrorState, // Colon
1995 IterativeParsingMemberValueState, // String
1996 IterativeParsingMemberValueState, // False
1997 IterativeParsingMemberValueState, // True
1998 IterativeParsingMemberValueState, // Null
1999 IterativeParsingMemberValueState // Number
2003 return static_cast<IterativeParsingState>(G[state][token]);
2006 // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit().
2007 // May return a new state on state pop.
2008 template <unsigned parseFlags, typename InputStream, typename Handler>
2009 RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) {
2013 case IterativeParsingErrorState:
2016 case IterativeParsingObjectInitialState:
2017 case IterativeParsingArrayInitialState:
2019 // Push the state(Element or MemeberValue) if we are nested in another array or value of member.
2020 // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop.
2021 IterativeParsingState n = src;
2022 if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState)
2023 n = IterativeParsingElementState;
2024 else if (src == IterativeParsingKeyValueDelimiterState)
2025 n = IterativeParsingMemberValueState;
2026 // Push current state.
2027 *stack_.template Push<SizeType>(1) = n;
2028 // Initialize and push the member/element count.
2029 *stack_.template Push<SizeType>(1) = 0;
2031 bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray();
2032 // On handler short circuits the parsing.
2034 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
2035 return IterativeParsingErrorState;
2043 case IterativeParsingMemberKeyState:
2044 ParseString<parseFlags>(is, handler, true);
2045 if (HasParseError())
2046 return IterativeParsingErrorState;
2050 case IterativeParsingKeyValueDelimiterState:
2051 RAPIDJSON_ASSERT(token == ColonToken);
2055 case IterativeParsingMemberValueState:
2056 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
2057 ParseValue<parseFlags>(is, handler);
2058 if (HasParseError()) {
2059 return IterativeParsingErrorState;
2063 case IterativeParsingElementState:
2064 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
2065 ParseValue<parseFlags>(is, handler);
2066 if (HasParseError()) {
2067 return IterativeParsingErrorState;
2071 case IterativeParsingMemberDelimiterState:
2072 case IterativeParsingElementDelimiterState:
2074 // Update member/element count.
2075 *stack_.template Top<SizeType>() = *stack_.template Top<SizeType>() + 1;
2078 case IterativeParsingObjectFinishState:
2080 // Transit from delimiter is only allowed when trailing commas are enabled
2081 if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingMemberDelimiterState) {
2082 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell());
2083 return IterativeParsingErrorState;
2085 // Get member count.
2086 SizeType c = *stack_.template Pop<SizeType>(1);
2087 // If the object is not empty, count the last member.
2088 if (src == IterativeParsingMemberValueState)
2090 // Restore the state.
2091 IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
2092 // Transit to Finish state if this is the topmost scope.
2093 if (n == IterativeParsingStartState)
2094 n = IterativeParsingFinishState;
2096 bool hr = handler.EndObject(c);
2097 // On handler short circuits the parsing.
2099 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
2100 return IterativeParsingErrorState;
2108 case IterativeParsingArrayFinishState:
2110 // Transit from delimiter is only allowed when trailing commas are enabled
2111 if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingElementDelimiterState) {
2112 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorValueInvalid, is.Tell());
2113 return IterativeParsingErrorState;
2115 // Get element count.
2116 SizeType c = *stack_.template Pop<SizeType>(1);
2117 // If the array is not empty, count the last element.
2118 if (src == IterativeParsingElementState)
2120 // Restore the state.
2121 IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
2122 // Transit to Finish state if this is the topmost scope.
2123 if (n == IterativeParsingStartState)
2124 n = IterativeParsingFinishState;
2126 bool hr = handler.EndArray(c);
2127 // On handler short circuits the parsing.
2129 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
2130 return IterativeParsingErrorState;
2139 // This branch is for IterativeParsingValueState actually.
2140 // Use `default:` rather than
2141 // `case IterativeParsingValueState:` is for code coverage.
2143 // The IterativeParsingStartState is not enumerated in this switch-case.
2144 // It is impossible for that case. And it can be caught by following assertion.
2146 // The IterativeParsingFinishState is not enumerated in this switch-case either.
2147 // It is a "derivative" state which cannot triggered from Predict() directly.
2148 // Therefore it cannot happen here. And it can be caught by following assertion.
2149 RAPIDJSON_ASSERT(dst == IterativeParsingValueState);
2151 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
2152 ParseValue<parseFlags>(is, handler);
2153 if (HasParseError()) {
2154 return IterativeParsingErrorState;
2156 return IterativeParsingFinishState;
2160 template <typename InputStream>
2161 void HandleError(IterativeParsingState src, InputStream& is) {
2162 if (HasParseError()) {
2163 // Error flag has been set.
2168 case IterativeParsingStartState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return;
2169 case IterativeParsingFinishState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return;
2170 case IterativeParsingObjectInitialState:
2171 case IterativeParsingMemberDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return;
2172 case IterativeParsingMemberKeyState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return;
2173 case IterativeParsingMemberValueState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return;
2174 case IterativeParsingKeyValueDelimiterState:
2175 case IterativeParsingArrayInitialState:
2176 case IterativeParsingElementDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); return;
2177 default: RAPIDJSON_ASSERT(src == IterativeParsingElementState); RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return;
2181 RAPIDJSON_FORCEINLINE bool IsIterativeParsingDelimiterState(IterativeParsingState s) const {
2182 return s >= IterativeParsingElementDelimiterState;
2185 RAPIDJSON_FORCEINLINE bool IsIterativeParsingCompleteState(IterativeParsingState s) const {
2186 return s <= IterativeParsingErrorState;
2189 template <unsigned parseFlags, typename InputStream, typename Handler>
2190 ParseResult IterativeParse(InputStream& is, Handler& handler) {
2191 parseResult_.Clear();
2192 ClearStackOnExit scope(*this);
2193 IterativeParsingState state = IterativeParsingStartState;
2195 SkipWhitespaceAndComments<parseFlags>(is);
2196 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
2197 while (is.Peek() != '\0') {
2198 Token t = Tokenize(is.Peek());
2199 IterativeParsingState n = Predict(state, t);
2200 IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
2202 if (d == IterativeParsingErrorState) {
2203 HandleError(state, is);
2209 // Do not further consume streams if a root JSON has been parsed.
2210 if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState)
2213 SkipWhitespaceAndComments<parseFlags>(is);
2214 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
2217 // Handle the end of file.
2218 if (state != IterativeParsingFinishState)
2219 HandleError(state, is);
2221 return parseResult_;
2224 static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string.
2225 internal::Stack<StackAllocator> stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing.
2226 ParseResult parseResult_;
2227 IterativeParsingState state_;
2228 }; // class GenericReader
2230 //! Reader with UTF8 encoding and default allocator.
2231 typedef GenericReader<UTF8<>, UTF8<> > Reader;
2233 RAPIDJSON_NAMESPACE_END
2235 #if defined(__clang__) || defined(_MSC_VER)
2244 #endif // RAPIDJSON_READER_H_