From 4ce100fa622d97de51d7cccde2301653ee95bbe7 Mon Sep 17 00:00:00 2001 From: isaacs Date: Mon, 14 Jun 2010 21:13:09 -0700 Subject: [PATCH] A replacement for decodeURIComponent that doesn't throw. And add a few more tests. --- lib/querystring.js | 34 ++++++----------- src/node_http_parser.cc | 81 +++++++++++++++++++++++++++++++++++++++++ test/simple/test-querystring.js | 4 +- 3 files changed, 95 insertions(+), 24 deletions(-) diff --git a/lib/querystring.js b/lib/querystring.js index df19123..eb415bc 100644 --- a/lib/querystring.js +++ b/lib/querystring.js @@ -1,10 +1,10 @@ // Query String Utilities var QueryString = exports; +var urlDecode = process.binding('http_parser').urlDecode; -QueryString.unescape = function (str, decodeSpaces) { - return decodeURIComponent(decodeSpaces ? str.replace(/\+/g, " ") : str); -}; +// a safe fast alternative to decodeURIComponent +QueryString.unescape = urlDecode; QueryString.escape = function (str) { return encodeURIComponent(str); @@ -25,17 +25,17 @@ var stack = []; * @param name {String} (optional) Name of the current key, for handling children recursively. * @static */ -QueryString.stringify = function (obj, sep, eq, munge, name) { +QueryString.stringify = QueryString.encode = function (obj, sep, eq, munge, name) { munge = typeof(munge) == "undefined" ? true : munge; sep = sep || "&"; eq = eq || "="; if (isA(obj, null) || isA(obj, undefined) || typeof(obj) === 'function') { - return name ? encodeURIComponent(name) + eq : ''; + return name ? QueryString.escape(name) + eq : ''; } if (isBool(obj)) obj = +obj; if (isNumber(obj) || isString(obj)) { - return encodeURIComponent(name) + eq + encodeURIComponent(obj); + return QueryString.escape(name) + eq + QueryString.escape(obj); } if (isA(obj, [])) { var s = []; @@ -71,11 +71,11 @@ QueryString.stringify = function (obj, sep, eq, munge, name) { return s; }; -QueryString.parseQuery = QueryString.parse = function (qs, sep, eq) { +QueryString.parse = QueryString.decode = function (qs, sep, eq) { return (qs || '') .split(sep||"&") .map(pieceParser(eq||"=")) - .reduce(mergeParams); + .reduce(mergeParams) }; // Parse a key=val string. @@ -87,26 +87,14 @@ QueryString.parseQuery = QueryString.parse = function (qs, sep, eq) { // return parse(foo[bar], [{bla:"baz"}]) // return parse(foo, {bar:[{bla:"baz"}]}) // return {foo:{bar:[{bla:"baz"}]}} -var trimmerPattern = /^\s+|\s+$/g, - slicerPattern = /(.*)\[([^\]]*)\]$/; +var slicerPattern = /(.*)\[([^\]]*)\]$/; var pieceParser = function (eq) { return function parsePiece (key, val) { if (arguments.length !== 2) { // key=val, called from the map/reduce key = key.split(eq); - return parsePiece( - QueryString.unescape(key.shift(), true), - QueryString.unescape(key.join(eq), true) - ); - } - key = key.replace(trimmerPattern, ''); - if (isString(val)) { - val = val.replace(trimmerPattern, ''); - // convert numerals to numbers - if (!isNaN(val)) { - var numVal = +val; - if (val === numVal.toString(10)) val = numVal; - } + return parsePiece(QueryString.unescape(key.shift(), true), + QueryString.unescape(key.join(eq), true)); } var sliced = slicerPattern.exec(key); if (!sliced) { diff --git a/src/node_http_parser.cc b/src/node_http_parser.cc index ac25570..6d2e2b3 100644 --- a/src/node_http_parser.cc +++ b/src/node_http_parser.cc @@ -315,6 +315,86 @@ class Parser : public ObjectWrap { }; +static Handle UrlDecode (const Arguments& args) { + HandleScope scope; + + if (!args[0]->IsString()) { + return ThrowException(Exception::TypeError( + String::New("First arg must be a string"))); + } + + bool decode_spaces = args[1]->IsTrue(); + + String::Utf8Value in_v(args[0]->ToString()); + size_t l = in_v.length(); + char* out = strdup(*in_v); + + enum { CHAR, HEX0, HEX1 } state = CHAR; + + int n, m, hexchar; + size_t in_index = 0, out_index = 0; + char c; + for (; in_index <= l; in_index++) { + c = out[in_index]; + switch (state) { + case CHAR: + switch (c) { + case '%': + n = 0; + m = 0; + state = HEX0; + break; + case '+': + if (decode_spaces) c = ' '; + // pass thru + default: + out[out_index++] = c; + break; + } + break; + + case HEX0: + state = HEX1; + hexchar = c; + if ('0' <= c && c <= '9') { + n = c - '0'; + } else if ('a' <= c && c <= 'f') { + n = c - 'a' + 10; + } else if ('A' <= c && c <= 'F') { + n = c - 'A' + 10; + } else { + out[out_index++] = '%'; + out[out_index++] = c; + state = CHAR; + break; + } + break; + + case HEX1: + state = CHAR; + if ('0' <= c && c <= '9') { + m = c - '0'; + } else if ('a' <= c && c <= 'f') { + m = c - 'a' + 10; + } else if ('A' <= c && c <= 'F') { + m = c - 'A' + 10; + } else { + out[out_index++] = '%'; + out[out_index++] = hexchar; + out[out_index++] = c; + break; + } + out[out_index++] = 16*n + m; + break; + } + } + + Local out_v = String::New(out, out_index-1); + free(out); + return scope.Close(out_v); +} + + void InitHttpParser(Handle target) { HandleScope scope; @@ -327,6 +407,7 @@ void InitHttpParser(Handle target) { NODE_SET_PROTOTYPE_METHOD(t, "reinitialize", Parser::Reinitialize); target->Set(String::NewSymbol("HTTPParser"), t->GetFunction()); + NODE_SET_METHOD(target, "urlDecode", UrlDecode); on_message_begin_sym = NODE_PSYMBOL("onMessageBegin"); on_path_sym = NODE_PSYMBOL("onPath"); diff --git a/test/simple/test-querystring.js b/test/simple/test-querystring.js index b328b4e..45d2c35 100644 --- a/test/simple/test-querystring.js +++ b/test/simple/test-querystring.js @@ -29,7 +29,9 @@ var qsTestCases = [ ["foo[bar][bla]=baz&foo[bar][bla]=blo", "foo%5Bbar%5D%5Bbla%5D%5B%5D=baz&foo%5Bbar%5D%5Bbla%5D%5B%5D=blo", {"foo":{"bar":{"bla":["baz","blo"]}}}], ["foo[bar][][bla]=baz&foo[bar][][bla]=blo", "foo%5Bbar%5D%5B%5D%5Bbla%5D=baz&foo%5Bbar%5D%5B%5D%5Bbla%5D=blo", {"foo":{"bar":[{"bla":"baz"},{"bla":"blo"}]}}], ["foo[bar][bla][]=baz&foo[bar][bla][]=blo", "foo%5Bbar%5D%5Bbla%5D%5B%5D=baz&foo%5Bbar%5D%5Bbla%5D%5B%5D=blo", {"foo":{"bar":{"bla":["baz","blo"]}}}], - [" foo = bar ", "foo=bar", {"foo":"bar"}] + [" foo = bar ", "%20foo%20=%20bar%20", {" foo ":" bar "}], + ["foo=%zx", "foo=%25zx", {"foo":"%zx"}], + ["foo=%EF%BF%BD", "foo=%EF%BF%BD", {"foo" : "\ufffd" }] ]; // [ wonkyQS, canonicalQS, obj ] -- 2.7.4