exports.resolveObject = urlResolveObject;
exports.format = urlFormat;
+// Reference: RFC 3986, RFC 1808, RFC 2396
+
// define these here so at least they only have to be
// compiled once on the first module load.
-var protocolPattern = /^([a-z0-9]+:)/,
+var protocolPattern = /^([a-z0-9]+:)/i,
portPattern = /:[0-9]+$/,
- delims = ['<', '>', '"', '\'', '`', /\s/],
+ // RFC 2396: characters reserved for delimiting URLs.
+ delims = ['<', '>', '"', '`', ' ', '\r', '\n', '\t'],
+ // RFC 2396: characters not allowed for various reasons.
unwise = ['{', '}', '|', '\\', '^', '~', '[', ']', '`'].concat(delims),
- nonHostChars = ['/', '?', ';', '#'].concat(unwise),
+ // Allowed by RFCs, but cause of XSS attacks. Always escape these.
+ autoEscape = ['\''],
+ // Characters that are never ever allowed in a hostname.
+ // Note that any invalid chars are also handled, but these
+ // are the ones that are *expected* to be seen, so we fast-path
+ // them.
+ nonHostChars = ['%', '/', '?', ';', '#']
+ .concat(unwise).concat(autoEscape),
hostnameMaxLen = 255,
- hostnamePartPattern = /^[a-z0-9][a-z0-9A-Z-]{0,62}$/,
+ hostnamePartPattern = /^[a-zA-Z0-9][a-z0-9A-Z-]{0,62}$/,
+ hostnamePartStart = /^([a-zA-Z0-9][a-z0-9A-Z-]{0,62})(.*)$/,
+ // protocols that can allow "unsafe" and "unwise" chars.
unsafeProtocol = {
'javascript': true,
'javascript:': true
},
+ // protocols that never have a hostname.
hostlessProtocol = {
'javascript': true,
'javascript:': true,
'file': true,
'file:': true
},
+ // protocols that always have a path component.
pathedProtocol = {
'http': true,
'https': true,
'gopher:': true,
'file:': true
},
+ // protocols that always contain a // bit.
slashedProtocol = {
'http': true,
'https': true,
var out = {},
rest = url;
+ // cut off any delimiters.
+ // This is to support parse stuff like "<http://foo.com>"
+ for (var i = 0, l = rest.length; i < l; i++) {
+ if (delims.indexOf(rest.charAt(i)) === -1) break;
+ }
+ if (i !== 0) rest = rest.substr(i);
+
+
var proto = protocolPattern.exec(rest);
if (proto) {
proto = proto[0];
- out.protocol = proto;
+ var lowerProto = proto.toLowerCase();
+ out.protocol = lowerProto;
rest = rest.substr(proto.length);
}
var key = keys[i];
out[key] = p[key];
}
+
// we've indicated that there is a hostname,
// so even if it's empty, it has to be present.
out.hostname = out.hostname || '';
var hostparts = out.hostname.split(/\./);
for (var i = 0, l = hostparts.length; i < l; i++) {
var part = hostparts[i];
+ if (!part) continue;
if (!part.match(hostnamePartPattern)) {
- out.hostname = '';
+ var validParts = hostparts.slice(0, i);
+ var notHost = hostparts.slice(i + 1);
+ var bit = part.match(hostnamePartStart);
+ if (bit) {
+ validParts.push(bit[1]);
+ notHost.unshift(bit[2]);
+ }
+ if (notHost.length) {
+ rest = '/' + notHost.join('.') + rest
+ }
+ out.hostname = validParts.join('.');
break;
}
}
}
+ // hostnames are always lower case.
+ out.hostname = out.hostname.toLowerCase();
+
+ out.host = ((out.auth) ? out.auth + '@' : '') +
+ (out.hostname || '') +
+ ((out.port) ? ':' + out.port : '');
+ out.href += out.host;
}
// now rest is set to the post-host stuff.
// chop off any delim chars.
- if (!unsafeProtocol[proto]) {
+ if (!unsafeProtocol[lowerProto]) {
+
+ // First, make 100% sure that any "autoEscape" chars get
+ // escaped, even if encodeURIComponent doesn't think they
+ // need to be.
+ for (var i = 0, l = autoEscape.length; i < l; i++) {
+ var ae = autoEscape[i];
+ var esc = encodeURIComponent(ae);
+ if (esc === ae) {
+ esc = escape(ae);
+ }
+ rest = rest.split(ae).join(esc);
+ }
+
+ // Now make sure that delims never appear in a url.
var chop = rest.length;
for (var i = 0, l = delims.length; i < l; i++) {
var c = rest.indexOf(delims[i]);
'href': '//some_path',
'pathname': '//some_path'
},
+ 'HTTP://www.example.com/' : {
+ 'href': 'http://www.example.com/',
+ 'protocol': 'http:',
+ 'host': 'www.example.com',
+ 'hostname': 'www.example.com',
+ 'pathname': '/'
+ },
+ 'http://www.ExAmPlE.com/' : {
+ 'href': 'http://www.example.com/',
+ 'protocol': 'http:',
+ 'host': 'www.example.com',
+ 'hostname': 'www.example.com',
+ 'pathname': '/'
+
+ },
+ 'http://user:pw@www.ExAmPlE.com/' : {
+ 'href': 'http://user:pw@www.example.com/',
+ 'protocol': 'http:',
+ 'auth': 'user:pw',
+ 'host': 'user:pw@www.example.com',
+ 'hostname': 'www.example.com',
+ 'pathname': '/'
+
+ },
+ 'http://USER:PW@www.ExAmPlE.com/' : {
+ 'href': 'http://USER:PW@www.example.com/',
+ 'protocol': 'http:',
+ 'auth': 'USER:PW',
+ 'host': 'USER:PW@www.example.com',
+ 'hostname': 'www.example.com',
+ 'pathname': '/'
+ },
+ 'http://x.com/path?that\'s#all, folks' : {
+ 'href': 'http://x.com/path?that%27s#all,',
+ 'protocol': 'http:',
+ 'host': 'x.com',
+ 'hostname': 'x.com',
+ 'search': '?that%27s',
+ 'query': 'that%27s',
+ 'pathname': '/path',
+ 'hash': '#all,'
+ },
+ 'HTTP://X.COM/Y' : {
+ 'href': 'http://x.com/Y',
+ 'protocol': 'http:',
+ 'host': 'x.com',
+ 'hostname': 'x.com',
+ 'pathname': '/Y',
+ },
+ // an unexpected invalid char in the hostname.
+ 'HtTp://x.y.cOm*a/b/c?d=e#f g<h>i' : {
+ 'href': 'http://x.y.com/*a/b/c?d=e#f',
+ 'protocol': 'http:',
+ 'host': 'x.y.com',
+ 'hostname': 'x.y.com',
+ 'pathname': '/*a/b/c',
+ 'search': '?d=e',
+ 'query': 'd=e',
+ 'hash': '#f'
+ },
+ // make sure that we don't accidentally lcast the path parts.
+ 'HtTp://x.y.cOm*A/b/c?d=e#f g<h>i' : {
+ 'href': 'http://x.y.com/*A/b/c?d=e#f',
+ 'protocol': 'http:',
+ 'host': 'x.y.com',
+ 'hostname': 'x.y.com',
+ 'pathname': '/*A/b/c',
+ 'search': '?d=e',
+ 'query': 'd=e',
+ 'hash': '#f'
+ },
+ 'http://x...y...#p': {
+ 'href': 'http://x...y.../#p',
+ 'protocol': 'http:',
+ 'host': 'x...y...',
+ 'hostname': 'x...y...',
+ 'hash': '#p',
+ 'pathname': '/'
+ },
+ 'http://x/p/"quoted"': {
+ 'href': 'http://x/p/',
+ 'protocol':'http:',
+ 'host': 'x',
+ 'hostname': 'x',
+ 'pathname': '/p/'
+ },
+ '<http://goo.corn/bread> Is a URL!': {
+ 'href': 'http://goo.corn/bread',
+ 'protocol': 'http:',
+ 'host': 'goo.corn',
+ 'hostname': 'goo.corn',
+ 'pathname': '/bread'
+ },
'http://www.narwhaljs.org/blog/categories?id=news' : {
'href': 'http://www.narwhaljs.org/blog/categories?id=news',
'protocol': 'http:',
'query': '??&hl=en&src=api&x=2&y=2&z=3&s=',
'pathname': '/vt/lyrs=m@114'
},
- 'http://user:pass@mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=' : {
- 'href': 'http://user:pass@mt0.google.com/vt/lyrs=m@114???' +
- '&hl=en&src=api&x=2&y=2&z=3&s=',
- 'protocol': 'http:',
- 'host': 'user:pass@mt0.google.com',
- 'auth': 'user:pass',
- 'hostname': 'mt0.google.com',
- 'search': '???&hl=en&src=api&x=2&y=2&z=3&s=',
- 'query': '??&hl=en&src=api&x=2&y=2&z=3&s=',
- 'pathname': '/vt/lyrs=m@114'
- },
+ 'http://user:pass@mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=':
+ {
+ 'href': 'http://user:pass@mt0.google.com/vt/lyrs=m@114???' +
+ '&hl=en&src=api&x=2&y=2&z=3&s=',
+ 'protocol': 'http:',
+ 'host': 'user:pass@mt0.google.com',
+ 'auth': 'user:pass',
+ 'hostname': 'mt0.google.com',
+ 'search': '???&hl=en&src=api&x=2&y=2&z=3&s=',
+ 'query': '??&hl=en&src=api&x=2&y=2&z=3&s=',
+ 'pathname': '/vt/lyrs=m@114'
+ },
'file:///etc/passwd' : {
'href': 'file:///etc/passwd',
'protocol': 'file:',
'parse(' + u + ').' + i + ' == ' + e + '\nactual: ' + a);
}
- var expected = u,
+ var expected = parseTests[u].href,
actual = url.format(parseTests[u]);
assert.equal(expected, actual,