From 10b68524f674df1c8f447f41beae35edafb8fed9 Mon Sep 17 00:00:00 2001 From: Lars Knoll Date: Wed, 23 Jan 2013 10:39:58 +0100 Subject: [PATCH] Refactor JSON.parse Use a modified copy of the parser in QtCore to get all test cases to pass and to be a lot faster. In addition, this will later on to add support for the second argument to JSON.parse Change-Id: Iaa4cbda29db9c53c3dd5ab2b2ded71efdaa6f8ee Reviewed-by: Simon Hausmann --- qv4jsonobject.cpp | 658 +++++++++++++++++++++++++++++++++++++++++++------ qv4jsonobject.h | 5 - tests/TestExpectations | 7 +- 3 files changed, 581 insertions(+), 89 deletions(-) diff --git a/qv4jsonobject.cpp b/qv4jsonobject.cpp index d8e793e..6c0b02f 100644 --- a/qv4jsonobject.cpp +++ b/qv4jsonobject.cpp @@ -48,123 +48,625 @@ namespace QQmlJS { namespace VM { -JsonObject::JsonObject(ExecutionContext *context) - : Object() +//#define PARSER_DEBUG +#ifdef PARSER_DEBUG +static int indent = 0; +#define BEGIN qDebug() << QByteArray(4*indent++, ' ').constData() +#define END --indent +#define DEBUG qDebug() << QByteArray(4*indent, ' ').constData() +#else +#define BEGIN if (1) ; else qDebug() +#define END do {} while (0) +#define DEBUG if (1) ; else qDebug() +#endif + + +class Parser { - prototype = context->engine->objectPrototype; +public: + Parser(ExecutionContext *context, const QChar *json, int length); - defineDefaultProperty(context, QStringLiteral("parse"), method_parse, 2); - defineDefaultProperty(context, QStringLiteral("stringify"), method_stringify); -} + Value parse(QJsonParseError *error); +private: + inline bool eatSpace(); + inline QChar nextToken(); -Value JsonObject::method_parse(ExecutionContext *ctx) + Value parseObject(); + Value parseArray(); + bool parseMember(Object *o); + bool parseString(QString *string); + bool parseValue(Value *val); + bool parseNumber(Value *val); + + ExecutionContext *context; + const QChar *head; + const QChar *json; + const QChar *end; + + int nestingLevel; + QJsonParseError::ParseError lastError; +}; + +static const int nestingLimit = 1024; + + +Parser::Parser(ExecutionContext *context, const QChar *json, int length) + : context(context), head(json), json(json), nestingLevel(0), lastError(QJsonParseError::NoError) { - QString jtext = ctx->argument(0).toString(ctx)->toQString(); + end = json + length; +} + + + +/* + +begin-array = ws %x5B ws ; [ left square bracket + +begin-object = ws %x7B ws ; { left curly bracket - const QChar *ch = jtext.constData(); - const QChar *end = ch + jtext.size(); +end-array = ws %x5D ws ; ] right square bracket - bool simple = false; - while (ch < end) { - if (*ch == ' ' || *ch == '\t' || *ch == '\n' || *ch == '\r') { - ++ch; - } else if (*ch == '[' || *ch == '{') { +end-object = ws %x7D ws ; } right curly bracket + +name-separator = ws %x3A ws ; : colon + +value-separator = ws %x2C ws ; , comma + +Insignificant whitespace is allowed before or after any of the six +structural characters. + +ws = *( + %x20 / ; Space + %x09 / ; Horizontal tab + %x0A / ; Line feed or New line + %x0D ; Carriage return + ) + +*/ + +enum { + Space = 0x20, + Tab = 0x09, + LineFeed = 0x0a, + Return = 0x0d, + BeginArray = 0x5b, + BeginObject = 0x7b, + EndArray = 0x5d, + EndObject = 0x7d, + NameSeparator = 0x3a, + ValueSeparator = 0x2c, + Quote = 0x22 +}; + +bool Parser::eatSpace() +{ + while (json < end) { + if (*json > Space) break; - } else { - // simple type - jtext.prepend('['); - jtext.append(']'); - simple = true; + if (*json != Space && + *json != Tab && + *json != LineFeed && + *json != Return) + break; + ++json; + } + return (json < end); +} + +QChar Parser::nextToken() +{ + if (!eatSpace()) + return 0; + QChar token = *json++; + switch (token.unicode()) { + case BeginArray: + case BeginObject: + case NameSeparator: + case ValueSeparator: + case EndArray: + case EndObject: + eatSpace(); + case Quote: + break; + default: + token = 0; + break; + } + return token; +} + +/* + JSON-text = object / array +*/ +Value Parser::parse(QJsonParseError *error) +{ +#ifdef PARSER_DEBUG + indent = 0; + qDebug() << ">>>>> parser begin"; +#endif + + eatSpace(); + + Value v; + if (!parseValue(&v)) { +#ifdef PARSER_DEBUG + qDebug() << ">>>>> parser error"; +#endif + if (lastError == QJsonParseError::NoError) + lastError = QJsonParseError::IllegalValue; + error->offset = json - head; + error->error = lastError; + return Value::undefinedValue(); + } + + // some input left... + if (eatSpace()) { + lastError = QJsonParseError::IllegalValue; + error->offset = json - head; + error->error = lastError; + return Value::undefinedValue(); + } + + END; + error->offset = 0; + error->error = QJsonParseError::NoError; + return v; +} + +/* + object = begin-object [ member *( value-separator member ) ] + end-object +*/ + +Value Parser::parseObject() +{ + if (++nestingLevel > nestingLimit) { + lastError = QJsonParseError::DeepNesting; + return Value::undefinedValue(); + } + + BEGIN << "parseObject pos=" << json; + + Object *o = context->engine->newObject(); + Value objectVal = Value::fromObject(o); + + QChar token = nextToken(); + while (token == Quote) { + if (!parseMember(o)) + return Value::undefinedValue(); + token = nextToken(); + if (token != ValueSeparator) break; + token = nextToken(); + if (token == EndObject) { + lastError = QJsonParseError::MissingObject; + return Value::undefinedValue(); } } - QJsonParseError e; - QJsonDocument doc = QJsonDocument::fromJson(jtext.toUtf8(), &e); - if (e.error != QJsonParseError::NoError) - ctx->throwSyntaxError(0); + DEBUG << "end token=" << token; + if (token != EndObject) { + lastError = QJsonParseError::UnterminatedObject; + return Value::undefinedValue(); + } - // iterate over the doc and convert to V4 types - Value result; - if (doc.isArray()) - result = convertArray(ctx, doc.array()); - else if (doc.isObject()) - result = convertObject(ctx, doc.object()); - else - result = Value::undefinedValue(); + END; + + --nestingLevel; + return objectVal; +} + +/* + member = string name-separator value +*/ +bool Parser::parseMember(Object *o) +{ + BEGIN << "parseMember"; + if (!o->members) + o->members.reset(new PropertyTable()); - if (simple) { - result = result.objectValue()->__get__(ctx, (uint)0); + QString key; + if (!parseString(&key)) + return false; + QChar token = nextToken(); + if (token != NameSeparator) { + lastError = QJsonParseError::MissingNameSeparator; + return false; } + Value val; + if (!parseValue(&val)) + return false; - return result; + PropertyDescriptor *p = o->members->insert(context->engine->identifier(key)); + p->value = val; + + END; + return true; } -Value JsonObject::method_stringify(ExecutionContext *ctx) +/* + array = begin-array [ value *( value-separator value ) ] end-array +*/ +Value Parser::parseArray() { - Q_UNUSED(ctx); - assert(!"Not implemented"); + BEGIN << "parseArray"; + Array array; + + if (++nestingLevel > nestingLimit) { + lastError = QJsonParseError::DeepNesting; + return Value::undefinedValue(); + } + + if (!eatSpace()) { + lastError = QJsonParseError::UnterminatedArray; + return Value::undefinedValue(); + } + if (*json == EndArray) { + nextToken(); + } else { + uint index = 0; + while (1) { + Value val; + if (!parseValue(&val)) + return Value::undefinedValue(); + array.set(index, val); + QChar token = nextToken(); + if (token == EndArray) + break; + else if (token != ValueSeparator) { + if (!eatSpace()) + lastError = QJsonParseError::UnterminatedArray; + else + lastError = QJsonParseError::MissingValueSeparator; + return Value::undefinedValue(); + } + ++index; + } + } + + DEBUG << "size =" << array.length(); + END; + + --nestingLevel; + return Value::fromObject(context->engine->newArrayObject(context, array)); } -static void checkString(ExecutionContext *context, const QString &s) +/* +value = false / null / true / object / array / number / string + +*/ + +bool Parser::parseValue(Value *val) { - const QChar *ch = s.constData(); - const QChar *end = ch + s.length(); - while (ch < end) { - if (ch->unicode() <= 0x1f) - context->throwSyntaxError(0); - ++ch; + BEGIN << "parse Value" << *json; + + switch ((json++)->unicode()) { + case 'n': + if (end - json < 4) { + lastError = QJsonParseError::IllegalValue; + return false; + } + if (*json++ == 'u' && + *json++ == 'l' && + *json++ == 'l') { + *val = Value::nullValue(); + DEBUG << "value: null"; + END; + return true; + } + lastError = QJsonParseError::IllegalValue; + return false; + case 't': + if (end - json < 4) { + lastError = QJsonParseError::IllegalValue; + return false; + } + if (*json++ == 'r' && + *json++ == 'u' && + *json++ == 'e') { + *val = Value::fromBoolean(true); + DEBUG << "value: true"; + END; + return true; + } + lastError = QJsonParseError::IllegalValue; + return false; + case 'f': + if (end - json < 5) { + lastError = QJsonParseError::IllegalValue; + return false; + } + if (*json++ == 'a' && + *json++ == 'l' && + *json++ == 's' && + *json++ == 'e') { + *val = Value::fromBoolean(false); + DEBUG << "value: false"; + END; + return true; + } + lastError = QJsonParseError::IllegalValue; + return false; + case Quote: { + QString value; + if (!parseString(&value)) + return false; + DEBUG << "value: string"; + END; + *val = Value::fromString(context, value); + return true; } + case BeginArray: { + *val = parseArray(); + if (val->isUndefined()) + return false; + DEBUG << "value: array"; + END; + return true; + } + case BeginObject: { + *val = parseObject(); + if (val->isUndefined()) + return false; + DEBUG << "value: object"; + END; + return true; + } + case EndArray: + lastError = QJsonParseError::MissingObject; + return false; + default: + --json; + if (!parseNumber(val)) + return false; + DEBUG << "value: number"; + END; + } + + return true; } -Value JsonObject::convertValue(ExecutionContext *context, const QJsonValue &value) + + + + +/* + number = [ minus ] int [ frac ] [ exp ] + decimal-point = %x2E ; . + digit1-9 = %x31-39 ; 1-9 + e = %x65 / %x45 ; e E + exp = e [ minus / plus ] 1*DIGIT + frac = decimal-point 1*DIGIT + int = zero / ( digit1-9 *DIGIT ) + minus = %x2D ; - + plus = %x2B ; + + zero = %x30 ; 0 + +*/ + +bool Parser::parseNumber(Value *val) { - switch (value.type()) { - case QJsonValue::Null: - return Value::nullValue(); - case QJsonValue::Bool: - return Value::fromBoolean(value.toBool()); - case QJsonValue::Double: - return Value::fromDouble(value.toDouble()); - case QJsonValue::String: { - Value v = Value::fromString(context, value.toString()); - checkString(context, v.stringValue()->toQString()); - return v; + BEGIN << "parseNumber" << *json; + + const QChar *start = json; + bool isInt = true; + + // minus + if (json < end && *json == '-') + ++json; + + // int = zero / ( digit1-9 *DIGIT ) + if (json < end && *json == '0') { + ++json; + } else { + while (json < end && *json >= '0' && *json <= '9') + ++json; + } + + // frac = decimal-point 1*DIGIT + if (json < end && *json == '.') { + isInt = false; + ++json; + while (json < end && *json >= '0' && *json <= '9') + ++json; + } + + // exp = e [ minus / plus ] 1*DIGIT + if (json < end && (*json == 'e' || *json == 'E')) { + isInt = false; + ++json; + if (json < end && (*json == '-' || *json == '+')) + ++json; + while (json < end && *json >= '0' && *json <= '9') + ++json; + } + + QString number(start, json - start); + DEBUG << "numberstring" << number; + + if (isInt) { + bool ok; + int n = number.toInt(&ok); + if (ok && n < (1<<25) && n > -(1<<25)) { + *val = Value::fromInt32(n); + END; + return true; + } + } + + bool ok; + double d; + d = number.toDouble(&ok); + + if (!ok) { + lastError = QJsonParseError::IllegalNumber; + return false; + } + + * val = Value::fromDouble(d); + + END; + return true; +} + +/* + + string = quotation-mark *char quotation-mark + + char = unescaped / + escape ( + %x22 / ; " quotation mark U+0022 + %x5C / ; \ reverse solidus U+005C + %x2F / ; / solidus U+002F + %x62 / ; b backspace U+0008 + %x66 / ; f form feed U+000C + %x6E / ; n line feed U+000A + %x72 / ; r carriage return U+000D + %x74 / ; t tab U+0009 + %x75 4HEXDIG ) ; uXXXX U+XXXX + + escape = %x5C ; \ + + quotation-mark = %x22 ; " + + unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + */ +static inline bool addHexDigit(QChar digit, uint *result) +{ + ushort d = digit.unicode(); + *result <<= 4; + if (d >= '0' && d <= '9') + *result |= (d - '0'); + else if (d >= 'a' && d <= 'f') + *result |= (d - 'a') + 10; + else if (d >= 'A' && d <= 'F') + *result |= (d - 'A') + 10; + else + return false; + return true; +} + +static inline bool scanEscapeSequence(const QChar *&json, const QChar *end, uint *ch) +{ + ++json; + if (json >= end) + return false; + + DEBUG << "scan escape"; + uint escaped = (json++)->unicode(); + switch (escaped) { + case '"': + *ch = '"'; break; + case '\\': + *ch = '\\'; break; + case '/': + *ch = '/'; break; + case 'b': + *ch = 0x8; break; + case 'f': + *ch = 0xc; break; + case 'n': + *ch = 0xa; break; + case 'r': + *ch = 0xd; break; + case 't': + *ch = 0x9; break; + case 'u': { + *ch = 0; + if (json > end - 4) + return false; + for (int i = 0; i < 4; ++i) { + if (!addHexDigit(*json, ch)) + return false; + ++json; + } + if (*ch <= 0x1f) + return false; + return true; } - case QJsonValue::Array: - return convertArray(context, value.toArray()); - case QJsonValue::Object: - return convertObject(context, value.toObject()); default: - assert(!"internal error in JSON conversion"); - return Value::undefinedValue(); + return false; } + return true; } -Value JsonObject::convertArray(ExecutionContext *context, const QJsonArray &array) + +bool Parser::parseString(QString *string) { - ArrayObject *o = context->engine->newArrayObject(context); - for (int i = 0; i < array.size(); ++i) { - QJsonValue v = array.at(i); - o->array.set(i, convertValue(context, v)); + BEGIN << "parse string stringPos=" << json; + + while (json < end) { + if (*json == '"') + break; + else if (*json == '\\') { + uint ch = 0; + if (!scanEscapeSequence(json, end, &ch)) { + lastError = QJsonParseError::IllegalEscapeSequence; + return false; + } + qDebug() << "scanEscape" << hex << ch; + if (QChar::requiresSurrogates(ch)) { + *string += QChar::highSurrogate(ch); + *string += QChar::lowSurrogate(ch); + } else { + *string += QChar(ch); + } + } else { + if (json->unicode() <= 0x1f) { + lastError = QJsonParseError::IllegalEscapeSequence; + return false; + } + *string += *json; + ++json; + } + } + ++json; + + if (json > end) { + lastError = QJsonParseError::UnterminatedString; + return false; } - o->array.setLengthUnchecked(array.size()); - return Value::fromObject(o); + + END; + return true; } -Value JsonObject::convertObject(ExecutionContext *context, const QJsonObject &object) + + +JsonObject::JsonObject(ExecutionContext *context) + : Object() { - Object *o = context->engine->newObject(); - for (QJsonObject::const_iterator it = object.constBegin(); it != object.constEnd(); ++it) { - QString key = it.key(); - checkString(context, key); - QJsonValue v = it.value(); - o->__put__(context, key, convertValue(context, v)); + prototype = context->engine->objectPrototype; + + defineDefaultProperty(context, QStringLiteral("parse"), method_parse, 2); + defineDefaultProperty(context, QStringLiteral("stringify"), method_stringify); +} + + +Value JsonObject::method_parse(ExecutionContext *ctx) +{ + QString jtext = ctx->argument(0).toString(ctx)->toQString(); + + DEBUG << "parsing source = " << jtext; + Parser parser(ctx, jtext.constData(), jtext.length()); + QJsonParseError error; + Value result = parser.parse(&error); + if (error.error != QJsonParseError::NoError) { + DEBUG << "parse error" << error.errorString(); + ctx->throwSyntaxError(0); } - return Value::fromObject(o); + + return result; +} + +Value JsonObject::method_stringify(ExecutionContext *ctx) +{ + Q_UNUSED(ctx); + assert(!"Not implemented"); } + } } diff --git a/qv4jsonobject.h b/qv4jsonobject.h index 193d490..6626471 100644 --- a/qv4jsonobject.h +++ b/qv4jsonobject.h @@ -53,11 +53,6 @@ struct JsonObject : Object { static Value method_parse(ExecutionContext *ctx); static Value method_stringify(ExecutionContext *ctx); -private: - static Value convertArray(ExecutionContext *context, const QJsonArray &array); - static Value convertObject(ExecutionContext *context, const QJsonObject &object); - static Value convertValue(ExecutionContext *context, const QJsonValue &value); - }; } diff --git a/tests/TestExpectations b/tests/TestExpectations index b0a00e6..4740928 100644 --- a/tests/TestExpectations +++ b/tests/TestExpectations @@ -269,11 +269,6 @@ S12.7_A7 failing S12.8_A4_T1 failing S12.8_A4_T2 failing S12.8_A4_T3 failing -15.12.1.1-g6-3 failing -15.12.1.1-g6-4 failing -15.12.1.1-g6-5 failing -15.12.1.1-g6-6 failing -15.12.1.1-g6-7 failing S15.12.2_A1 failing 15.12.3-0-2 failing 15.12.3-11-1 failing @@ -514,4 +509,4 @@ S15.4.4.4_A1_T2 failing 15.4.4.21-8-b-iii-1-6 failing 15.12.3_4-1-1 -15.12.3_4-1-3 \ No newline at end of file +15.12.3_4-1-3 -- 2.7.4