3 const punycode = require('punycode');
5 exports.parse = urlParse;
6 exports.resolve = urlResolve;
7 exports.resolveObject = urlResolveObject;
8 exports.format = urlFormat;
27 // Reference: RFC 3986, RFC 1808, RFC 2396
29 // define these here so at least they only have to be
30 // compiled once on the first module load.
31 const protocolPattern = /^([a-z0-9.+-]+:)/i;
32 const portPattern = /:[0-9]*$/;
34 // Special case for a simple path URL
35 const simplePathPattern = /^(\/\/?(?!\/)[^\?\s]*)(\?[^\s]*)?$/;
37 // RFC 2396: characters reserved for delimiting URLs.
38 // We actually just auto-escape these.
39 const delims = ['<', '>', '"', '`', ' ', '\r', '\n', '\t'];
41 // RFC 2396: characters not allowed for various reasons.
42 const unwise = ['{', '}', '|', '\\', '^', '`'].concat(delims);
44 // Allowed by RFCs, but cause of XSS attacks. Always escape these.
45 const autoEscape = ['\''].concat(unwise);
47 // Characters that are never ever allowed in a hostname.
48 // Note that any invalid chars are also handled, but these
49 // are the ones that are *expected* to be seen, so we fast-path them.
50 const nonHostChars = ['%', '/', '?', ';', '#'].concat(autoEscape);
51 const hostEndingChars = ['/', '?', '#'];
52 const hostnameMaxLen = 255;
53 const hostnamePartPattern = /^[+a-z0-9A-Z_-]{0,63}$/;
54 const hostnamePartStart = /^([+a-z0-9A-Z_-]{0,63})(.*)$/;
55 // protocols that can allow "unsafe" and "unwise" chars.
56 const unsafeProtocol = {
60 // protocols that never have a hostname.
61 const hostlessProtocol = {
65 // protocols that always contain a // bit.
66 const slashedProtocol = {
78 const querystring = require('querystring');
80 function urlParse(url, parseQueryString, slashesDenoteHost) {
81 if (url instanceof Url) return url;
84 u.parse(url, parseQueryString, slashesDenoteHost);
88 Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) {
89 if (typeof url !== 'string') {
90 throw new TypeError("Parameter 'url' must be a string, not " + typeof url);
93 // Copy chrome, IE, opera backslash-handling behavior.
94 // Back slashes before the query string get converted to forward slashes
95 // See: https://code.google.com/p/chromium/issues/detail?id=25916
96 var queryIndex = url.indexOf('?'),
98 (queryIndex !== -1 && queryIndex < url.indexOf('#')) ? '?' : '#',
99 uSplit = url.split(splitter),
101 uSplit[0] = uSplit[0].replace(slashRegex, '/');
102 url = uSplit.join(splitter);
106 // trim before proceeding.
107 // This is to support parse stuff like " http://foo.com \n"
110 if (!slashesDenoteHost && url.split('#').length === 1) {
111 // Try fast path regexp
112 var simplePath = simplePathPattern.exec(rest);
116 this.pathname = simplePath[1];
118 this.search = simplePath[2];
119 if (parseQueryString) {
120 this.query = querystring.parse(this.search.substr(1));
122 this.query = this.search.substr(1);
124 } else if (parseQueryString) {
132 var proto = protocolPattern.exec(rest);
135 var lowerProto = proto.toLowerCase();
136 this.protocol = lowerProto;
137 rest = rest.substr(proto.length);
140 // figure out if it's got a host
141 // user@server is *always* interpreted as a hostname, and url
142 // resolution will treat //foo/bar as host=foo,path=bar because that's
143 // how the browser resolves relative URLs.
144 if (slashesDenoteHost || proto || rest.match(/^\/\/[^@\/]+@[^@\/]+/)) {
145 var slashes = rest.substr(0, 2) === '//';
146 if (slashes && !(proto && hostlessProtocol[proto])) {
147 rest = rest.substr(2);
152 if (!hostlessProtocol[proto] &&
153 (slashes || (proto && !slashedProtocol[proto]))) {
155 // there's a hostname.
156 // the first instance of /, ?, ;, or # ends the host.
158 // If there is an @ in the hostname, then non-host chars *are* allowed
159 // to the left of the last @ sign, unless some host-ending character
160 // comes *before* the @-sign.
161 // URLs are obnoxious.
164 // http://a@b@c/ => user:a@b host:c
165 // http://a@b?@c => user:a host:b path:/?@c
167 // v0.12 TODO(isaacs): This is not quite how Chrome does things.
168 // Review our test case against browsers more comprehensively.
170 // find the first instance of any hostEndingChars
172 for (var i = 0; i < hostEndingChars.length; i++) {
173 var hec = rest.indexOf(hostEndingChars[i]);
174 if (hec !== -1 && (hostEnd === -1 || hec < hostEnd))
178 // at this point, either we have an explicit point where the
179 // auth portion cannot go past, or the last @ char is the decider.
181 if (hostEnd === -1) {
182 // atSign can be anywhere.
183 atSign = rest.lastIndexOf('@');
185 // atSign must be in auth portion.
186 // http://a@b/c@d => host:b auth:a path:/c@d
187 atSign = rest.lastIndexOf('@', hostEnd);
190 // Now we have a portion which is definitely the auth.
193 auth = rest.slice(0, atSign);
194 rest = rest.slice(atSign + 1);
195 this.auth = decodeURIComponent(auth);
198 // the host is the remaining to the left of the first non-host char
200 for (var i = 0; i < nonHostChars.length; i++) {
201 var hec = rest.indexOf(nonHostChars[i]);
202 if (hec !== -1 && (hostEnd === -1 || hec < hostEnd))
205 // if we still have not hit it, then the entire thing is a host.
207 hostEnd = rest.length;
209 this.host = rest.slice(0, hostEnd);
210 rest = rest.slice(hostEnd);
215 // we've indicated that there is a hostname,
216 // so even if it's empty, it has to be present.
217 this.hostname = this.hostname || '';
219 // if hostname begins with [ and ends with ]
220 // assume that it's an IPv6 address.
221 var ipv6Hostname = this.hostname[0] === '[' &&
222 this.hostname[this.hostname.length - 1] === ']';
224 // validate a little.
226 var hostparts = this.hostname.split(/\./);
227 for (var i = 0, l = hostparts.length; i < l; i++) {
228 var part = hostparts[i];
230 if (!part.match(hostnamePartPattern)) {
232 for (var j = 0, k = part.length; j < k; j++) {
233 if (part.charCodeAt(j) > 127) {
234 // we replace non-ASCII char with a temporary placeholder
235 // we need this to make sure size of hostname is not
236 // broken by replacing non-ASCII by nothing
242 // we test again with ASCII char only
243 if (!newpart.match(hostnamePartPattern)) {
244 var validParts = hostparts.slice(0, i);
245 var notHost = hostparts.slice(i + 1);
246 var bit = part.match(hostnamePartStart);
248 validParts.push(bit[1]);
249 notHost.unshift(bit[2]);
251 if (notHost.length) {
252 rest = '/' + notHost.join('.') + rest;
254 this.hostname = validParts.join('.');
261 if (this.hostname.length > hostnameMaxLen) {
264 // hostnames are always lower case.
265 this.hostname = this.hostname.toLowerCase();
269 // IDNA Support: Returns a punycoded representation of "domain".
270 // It only converts parts of the domain name that
271 // have non-ASCII characters, i.e. it doesn't matter if
272 // you call it with a domain that already is ASCII-only.
273 this.hostname = punycode.toASCII(this.hostname);
276 var p = this.port ? ':' + this.port : '';
277 var h = this.hostname || '';
280 // strip [ and ] from the hostname
281 // the host field still retains them, though
283 this.hostname = this.hostname.substr(1, this.hostname.length - 2);
284 if (rest[0] !== '/') {
290 // now rest is set to the post-host stuff.
291 // chop off any delim chars.
292 if (!unsafeProtocol[lowerProto]) {
294 // First, make 100% sure that any "autoEscape" chars get
295 // escaped, even if encodeURIComponent doesn't think they
297 for (var i = 0, l = autoEscape.length; i < l; i++) {
298 var ae = autoEscape[i];
299 if (rest.indexOf(ae) === -1)
301 var esc = encodeURIComponent(ae);
305 rest = rest.split(ae).join(esc);
310 // chop off from the tail first.
311 var hash = rest.indexOf('#');
313 // got a fragment string.
314 this.hash = rest.substr(hash);
315 rest = rest.slice(0, hash);
317 var qm = rest.indexOf('?');
319 this.search = rest.substr(qm);
320 this.query = rest.substr(qm + 1);
321 if (parseQueryString) {
322 this.query = querystring.parse(this.query);
324 rest = rest.slice(0, qm);
325 } else if (parseQueryString) {
326 // no query string, but parseQueryString still requested
330 if (rest) this.pathname = rest;
331 if (slashedProtocol[lowerProto] &&
332 this.hostname && !this.pathname) {
336 //to support http.request
337 if (this.pathname || this.search) {
338 var p = this.pathname || '';
339 var s = this.search || '';
343 // finally, reconstruct the href based on what has been validated.
344 this.href = this.format();
348 // format a parsed object into a url string
349 function urlFormat(obj) {
350 // ensure it's an object, and not a string url.
351 // If it's an obj, this is a no-op.
352 // this way, you can call url_format() on strings
353 // to clean up potentially wonky urls.
354 if (typeof obj === 'string') obj = urlParse(obj);
356 else if (typeof obj !== 'object' || obj === null)
357 throw new TypeError("Parameter 'urlObj' must be an object, not " +
358 obj === null ? 'null' : typeof obj);
360 else if (!(obj instanceof Url)) return Url.prototype.format.call(obj);
365 Url.prototype.format = function() {
366 var auth = this.auth || '';
368 auth = encodeURIComponent(auth);
369 auth = auth.replace(/%3A/i, ':');
373 var protocol = this.protocol || '',
374 pathname = this.pathname || '',
375 hash = this.hash || '',
380 host = auth + this.host;
381 } else if (this.hostname) {
382 host = auth + (this.hostname.indexOf(':') === -1 ?
384 '[' + this.hostname + ']');
386 host += ':' + this.port;
390 if (this.query !== null &&
391 typeof this.query === 'object' &&
392 Object.keys(this.query).length) {
393 query = querystring.stringify(this.query);
396 var search = this.search || (query && ('?' + query)) || '';
398 if (protocol && protocol.substr(-1) !== ':') protocol += ':';
400 // only the slashedProtocols get the //. Not mailto:, xmpp:, etc.
401 // unless they had them to begin with.
403 (!protocol || slashedProtocol[protocol]) && host !== false) {
404 host = '//' + (host || '');
405 if (pathname && pathname.charAt(0) !== '/') pathname = '/' + pathname;
410 if (hash && hash.charAt(0) !== '#') hash = '#' + hash;
411 if (search && search.charAt(0) !== '?') search = '?' + search;
413 pathname = pathname.replace(/[?#]/g, function(match) {
414 return encodeURIComponent(match);
416 search = search.replace('#', '%23');
418 return protocol + host + pathname + search + hash;
421 function urlResolve(source, relative) {
422 return urlParse(source, false, true).resolve(relative);
425 Url.prototype.resolve = function(relative) {
426 return this.resolveObject(urlParse(relative, false, true)).format();
429 function urlResolveObject(source, relative) {
430 if (!source) return relative;
431 return urlParse(source, false, true).resolveObject(relative);
434 Url.prototype.resolveObject = function(relative) {
435 if (typeof relative === 'string') {
437 rel.parse(relative, false, true);
441 var result = new Url();
442 var tkeys = Object.keys(this);
443 for (var tk = 0; tk < tkeys.length; tk++) {
444 var tkey = tkeys[tk];
445 result[tkey] = this[tkey];
448 // hash is always overridden, no matter what.
449 // even href="" will remove it.
450 result.hash = relative.hash;
452 // if the relative url is empty, then there's nothing left to do here.
453 if (relative.href === '') {
454 result.href = result.format();
458 // hrefs like //foo/bar always cut to the protocol.
459 if (relative.slashes && !relative.protocol) {
460 // take everything except the protocol from relative
461 var rkeys = Object.keys(relative);
462 for (var rk = 0; rk < rkeys.length; rk++) {
463 var rkey = rkeys[rk];
464 if (rkey !== 'protocol')
465 result[rkey] = relative[rkey];
468 //urlParse appends trailing / to urls like http://www.example.com
469 if (slashedProtocol[result.protocol] &&
470 result.hostname && !result.pathname) {
471 result.path = result.pathname = '/';
474 result.href = result.format();
478 if (relative.protocol && relative.protocol !== result.protocol) {
479 // if it's a known url protocol, then changing
480 // the protocol does weird things
481 // first, if it's not file:, then we MUST have a host,
482 // and if there was a path
483 // to begin with, then we MUST have a path.
484 // if it is file:, then the host is dropped,
485 // because that's known to be hostless.
486 // anything else is assumed to be absolute.
487 if (!slashedProtocol[relative.protocol]) {
488 var keys = Object.keys(relative);
489 for (var v = 0; v < keys.length; v++) {
491 result[k] = relative[k];
493 result.href = result.format();
497 result.protocol = relative.protocol;
498 if (!relative.host &&
499 !/^file:?$/.test(relative.protocol) &&
500 !hostlessProtocol[relative.protocol]) {
501 var relPath = (relative.pathname || '').split('/');
502 while (relPath.length && !(relative.host = relPath.shift()));
503 if (!relative.host) relative.host = '';
504 if (!relative.hostname) relative.hostname = '';
505 if (relPath[0] !== '') relPath.unshift('');
506 if (relPath.length < 2) relPath.unshift('');
507 result.pathname = relPath.join('/');
509 result.pathname = relative.pathname;
511 result.search = relative.search;
512 result.query = relative.query;
513 result.host = relative.host || '';
514 result.auth = relative.auth;
515 result.hostname = relative.hostname || relative.host;
516 result.port = relative.port;
517 // to support http.request
518 if (result.pathname || result.search) {
519 var p = result.pathname || '';
520 var s = result.search || '';
523 result.slashes = result.slashes || relative.slashes;
524 result.href = result.format();
528 var isSourceAbs = (result.pathname && result.pathname.charAt(0) === '/'),
531 relative.pathname && relative.pathname.charAt(0) === '/'
533 mustEndAbs = (isRelAbs || isSourceAbs ||
534 (result.host && relative.pathname)),
535 removeAllDots = mustEndAbs,
536 srcPath = result.pathname && result.pathname.split('/') || [],
537 relPath = relative.pathname && relative.pathname.split('/') || [],
538 psychotic = result.protocol && !slashedProtocol[result.protocol];
540 // if the url is a non-slashed url, then relative
541 // links like ../.. should be able
542 // to crawl up to the hostname, as well. This is strange.
543 // result.protocol has already been set by now.
544 // Later on, put the first path part into the host field.
546 result.hostname = '';
549 if (srcPath[0] === '') srcPath[0] = result.host;
550 else srcPath.unshift(result.host);
553 if (relative.protocol) {
554 relative.hostname = null;
555 relative.port = null;
557 if (relPath[0] === '') relPath[0] = relative.host;
558 else relPath.unshift(relative.host);
560 relative.host = null;
562 mustEndAbs = mustEndAbs && (relPath[0] === '' || srcPath[0] === '');
567 result.host = (relative.host || relative.host === '') ?
568 relative.host : result.host;
569 result.hostname = (relative.hostname || relative.hostname === '') ?
570 relative.hostname : result.hostname;
571 result.search = relative.search;
572 result.query = relative.query;
574 // fall through to the dot-handling below.
575 } else if (relPath.length) {
577 // throw away the existing file, and take the new path instead.
578 if (!srcPath) srcPath = [];
580 srcPath = srcPath.concat(relPath);
581 result.search = relative.search;
582 result.query = relative.query;
583 } else if (relative.search !== null && relative.search !== undefined) {
584 // just pull out the search.
586 // Put this after the other two cases because it simplifies the booleans
588 result.hostname = result.host = srcPath.shift();
589 //occasionally the auth can get stuck only in host
590 //this especially happens in cases like
591 //url.resolveObject('mailto:local1@domain1', 'local2@domain2')
592 var authInHost = result.host && result.host.indexOf('@') > 0 ?
593 result.host.split('@') : false;
595 result.auth = authInHost.shift();
596 result.host = result.hostname = authInHost.shift();
599 result.search = relative.search;
600 result.query = relative.query;
601 //to support http.request
602 if (result.pathname !== null || result.search !== null) {
603 result.path = (result.pathname ? result.pathname : '') +
604 (result.search ? result.search : '');
606 result.href = result.format();
610 if (!srcPath.length) {
611 // no path at all. easy.
612 // we've already handled the other stuff above.
613 result.pathname = null;
614 //to support http.request
616 result.path = '/' + result.search;
620 result.href = result.format();
624 // if a url ENDs in . or .., then it must get a trailing slash.
625 // however, if it ends in anything else non-slashy,
626 // then it must NOT get a trailing slash.
627 var last = srcPath.slice(-1)[0];
628 var hasTrailingSlash = (
629 (result.host || relative.host || srcPath.length > 1) &&
630 (last === '.' || last === '..') || last === '');
632 // strip single dots, resolve double dots to parent dir
633 // if the path tries to go above the root, `up` ends up > 0
635 for (var i = srcPath.length; i >= 0; i--) {
638 spliceOne(srcPath, i);
639 } else if (last === '..') {
640 spliceOne(srcPath, i);
643 spliceOne(srcPath, i);
648 // if the path is allowed to go above the root, restore leading ..s
649 if (!mustEndAbs && !removeAllDots) {
651 srcPath.unshift('..');
655 if (mustEndAbs && srcPath[0] !== '' &&
656 (!srcPath[0] || srcPath[0].charAt(0) !== '/')) {
660 if (hasTrailingSlash && (srcPath.join('/').substr(-1) !== '/')) {
664 var isAbsolute = srcPath[0] === '' ||
665 (srcPath[0] && srcPath[0].charAt(0) === '/');
669 result.hostname = result.host = isAbsolute ? '' :
670 srcPath.length ? srcPath.shift() : '';
671 //occasionally the auth can get stuck only in host
672 //this especially happens in cases like
673 //url.resolveObject('mailto:local1@domain1', 'local2@domain2')
674 var authInHost = result.host && result.host.indexOf('@') > 0 ?
675 result.host.split('@') : false;
677 result.auth = authInHost.shift();
678 result.host = result.hostname = authInHost.shift();
682 mustEndAbs = mustEndAbs || (result.host && srcPath.length);
684 if (mustEndAbs && !isAbsolute) {
688 if (!srcPath.length) {
689 result.pathname = null;
692 result.pathname = srcPath.join('/');
695 //to support request.http
696 if (result.pathname !== null || result.search !== null) {
697 result.path = (result.pathname ? result.pathname : '') +
698 (result.search ? result.search : '');
700 result.auth = relative.auth || result.auth;
701 result.slashes = result.slashes || relative.slashes;
702 result.href = result.format();
706 Url.prototype.parseHost = function() {
707 var host = this.host;
708 var port = portPattern.exec(host);
712 this.port = port.substr(1);
714 host = host.substr(0, host.length - port.length);
716 if (host) this.hostname = host;
719 // About 1.5x faster than the two-arg version of Array#splice().
720 function spliceOne(list, index) {
721 for (var i = index, k = i + 1, n = list.length; k < n; i += 1, k += 1)