From 581e524ac7dd0b75d2a34102cf10dacb368df340 Mon Sep 17 00:00:00 2001 From: DongHun Kwak Date: Fri, 17 Mar 2023 11:09:09 +0900 Subject: [PATCH 1/1] Import url 2.3.1 --- .cargo_vcs_info.json | 6 + Cargo.toml | 85 + Cargo.toml.orig | 53 + LICENSE-APACHE | 201 ++ LICENSE-MIT | 25 + README.md | 14 + src/host.rs | 498 +++ src/lib.rs | 2923 ++++++++++++++++ src/origin.rs | 112 + src/parser.rs | 1593 +++++++++ src/path_segments.rs | 246 ++ src/quirks.rs | 326 ++ src/slicing.rs | 187 + tests/data.rs | 270 ++ tests/debugger_visualizer.rs | 102 + tests/setters_tests.json | 1876 ++++++++++ tests/unit.rs | 1164 +++++++ tests/urltestdata.json | 7908 ++++++++++++++++++++++++++++++++++++++++++ 18 files changed, 17589 insertions(+) create mode 100644 .cargo_vcs_info.json create mode 100644 Cargo.toml create mode 100644 Cargo.toml.orig create mode 100644 LICENSE-APACHE create mode 100644 LICENSE-MIT create mode 100644 README.md create mode 100644 src/host.rs create mode 100644 src/lib.rs create mode 100644 src/origin.rs create mode 100644 src/parser.rs create mode 100644 src/path_segments.rs create mode 100644 src/quirks.rs create mode 100644 src/slicing.rs create mode 100644 tests/data.rs create mode 100644 tests/debugger_visualizer.rs create mode 100644 tests/setters_tests.json create mode 100644 tests/unit.rs create mode 100644 tests/urltestdata.json diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json new file mode 100644 index 0000000..5bf7f75 --- /dev/null +++ b/.cargo_vcs_info.json @@ -0,0 +1,6 @@ +{ + "git": { + "sha1": "359bc90a4f07224f79cc79c45dc873d44bcd6f14" + }, + "path_in_vcs": "url" +} \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..4363ae5 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,85 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +rust-version = "1.51" +name = "url" +version = "2.3.1" +authors = ["The rust-url developers"] +include = [ + "src/**/*", + "LICENSE-*", + "README.md", + "tests/**", +] +description = "URL library for Rust, based on the WHATWG URL Standard" +documentation = "https://docs.rs/url" +readme = "README.md" +keywords = [ + "url", + "parser", +] +categories = [ + "parser-implementations", + "web-programming", + "encoding", +] +license = "MIT OR Apache-2.0" +repository = "https://github.com/servo/rust-url" + +[[test]] +name = "debugger_visualizer" +path = "tests/debugger_visualizer.rs" +test = false +required-features = ["debugger_visualizer"] + +[[bench]] +name = "parse_url" +path = "benches/parse_url.rs" +harness = false + +[dependencies.form_urlencoded] +version = "1.1.0" + +[dependencies.idna] +version = "0.3.0" + +[dependencies.percent-encoding] +version = "2.2.0" + +[dependencies.serde] +version = "1.0" +features = ["derive"] +optional = true + +[dev-dependencies.bencher] +version = "0.1" + +[dev-dependencies.debugger_test] +version = "0.1" + +[dev-dependencies.debugger_test_parser] +version = "0.1" + +[dev-dependencies.serde_json] +version = "1.0" + +[features] +debugger_visualizer = [] +default = [] +expose_internals = [] + +[badges.appveyor] +repository = "Manishearth/rust-url" + +[badges.travis-ci] +repository = "servo/rust-url" diff --git a/Cargo.toml.orig b/Cargo.toml.orig new file mode 100644 index 0000000..39b90fa --- /dev/null +++ b/Cargo.toml.orig @@ -0,0 +1,53 @@ +[package] + +name = "url" +# When updating version, also modify html_root_url in the lib.rs +version = "2.3.1" +authors = ["The rust-url developers"] + +description = "URL library for Rust, based on the WHATWG URL Standard" +documentation = "https://docs.rs/url" +repository = "https://github.com/servo/rust-url" +readme = "../README.md" +keywords = ["url", "parser"] +categories = ["parser-implementations", "web-programming", "encoding"] +license = "MIT OR Apache-2.0" +include = ["src/**/*", "LICENSE-*", "README.md", "tests/**"] +edition = "2018" +rust-version = "1.51" + +[badges] +travis-ci = { repository = "servo/rust-url" } +appveyor = { repository = "Manishearth/rust-url" } + +[dev-dependencies] +serde_json = "1.0" +bencher = "0.1" +# To test debugger visualizers defined for the url crate such as url.natvis +debugger_test = "0.1" +debugger_test_parser = "0.1" + +[dependencies] +form_urlencoded = { version = "1.1.0", path = "../form_urlencoded" } +idna = { version = "0.3.0", path = "../idna" } +percent-encoding = { version = "2.2.0", path = "../percent_encoding" } +serde = {version = "1.0", optional = true, features = ["derive"]} + +[features] +default = [] +# UNSTABLE FEATURES (requires Rust nightly) +# Enable to use the #[debugger_visualizer] attribute. +debugger_visualizer = [] +# Expose internal offsets of the URL. +expose_internals = [] + +[[bench]] +name = "parse_url" +path = "benches/parse_url.rs" +harness = false + +[[test]] +name = "debugger_visualizer" +path = "tests/debugger_visualizer.rs" +required-features = ["debugger_visualizer"] +test = false diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 0000000..16fe87b --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 0000000..51d5dc7 --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2013-2022 The rust-url developers + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..ab31bff --- /dev/null +++ b/README.md @@ -0,0 +1,14 @@ +rust-url +======== + +[![Build status](https://github.com/servo/rust-url/workflows/CI/badge.svg)](https://github.com/servo/rust-url/actions?query=workflow%3ACI) +[![Coverage](https://codecov.io/gh/servo/rust-url/branch/master/graph/badge.svg)](https://codecov.io/gh/servo/rust-url) +[![Chat](https://img.shields.io/badge/chat-%23rust--url:mozilla.org-%2346BC99?logo=Matrix)](https://matrix.to/#/#rust-url:mozilla.org) +[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE-MIT) +[![License: Apache 2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE-APACHE) + +URL library for Rust, based on the [URL Standard](https://url.spec.whatwg.org/). + +[Documentation](https://docs.rs/url/) + +Please see [UPGRADING.md](https://github.com/servo/rust-url/blob/master/UPGRADING.md) if you are upgrading from a previous version. diff --git a/src/host.rs b/src/host.rs new file mode 100644 index 0000000..f1921c6 --- /dev/null +++ b/src/host.rs @@ -0,0 +1,498 @@ +// Copyright 2013-2016 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::cmp; +use std::fmt::{self, Formatter}; +use std::net::{Ipv4Addr, Ipv6Addr}; + +use percent_encoding::{percent_decode, utf8_percent_encode, CONTROLS}; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +use crate::parser::{ParseError, ParseResult}; + +#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))] +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub(crate) enum HostInternal { + None, + Domain, + Ipv4(Ipv4Addr), + Ipv6(Ipv6Addr), +} + +impl From> for HostInternal { + fn from(host: Host) -> HostInternal { + match host { + Host::Domain(ref s) if s.is_empty() => HostInternal::None, + Host::Domain(_) => HostInternal::Domain, + Host::Ipv4(address) => HostInternal::Ipv4(address), + Host::Ipv6(address) => HostInternal::Ipv6(address), + } + } +} + +/// The host name of an URL. +#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))] +#[derive(Clone, Debug, Eq, Ord, PartialOrd, Hash)] +pub enum Host { + /// A DNS domain name, as '.' dot-separated labels. + /// Non-ASCII labels are encoded in punycode per IDNA if this is the host of + /// a special URL, or percent encoded for non-special URLs. Hosts for + /// non-special URLs are also called opaque hosts. + Domain(S), + + /// An IPv4 address. + /// `Url::host_str` returns the serialization of this address, + /// as four decimal integers separated by `.` dots. + Ipv4(Ipv4Addr), + + /// An IPv6 address. + /// `Url::host_str` returns the serialization of that address between `[` and `]` brackets, + /// in the format per [RFC 5952 *A Recommendation + /// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952): + /// lowercase hexadecimal with maximal `::` compression. + Ipv6(Ipv6Addr), +} + +impl<'a> Host<&'a str> { + /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`. + pub fn to_owned(&self) -> Host { + match *self { + Host::Domain(domain) => Host::Domain(domain.to_owned()), + Host::Ipv4(address) => Host::Ipv4(address), + Host::Ipv6(address) => Host::Ipv6(address), + } + } +} + +impl Host { + /// Parse a host: either an IPv6 address in [] square brackets, or a domain. + /// + /// + pub fn parse(input: &str) -> Result { + if input.starts_with('[') { + if !input.ends_with(']') { + return Err(ParseError::InvalidIpv6Address); + } + return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6); + } + let domain = percent_decode(input.as_bytes()).decode_utf8_lossy(); + + let domain = Self::domain_to_ascii(&domain)?; + + if domain.is_empty() { + return Err(ParseError::EmptyHost); + } + + let is_invalid_domain_char = |c| { + matches!( + c, + '\0'..='\u{001F}' + | ' ' + | '#' + | '%' + | '/' + | ':' + | '<' + | '>' + | '?' + | '@' + | '[' + | '\\' + | ']' + | '^' + | '\u{007F}' + | '|' + ) + }; + + if domain.find(is_invalid_domain_char).is_some() { + Err(ParseError::InvalidDomainCharacter) + } else if ends_in_a_number(&domain) { + let address = parse_ipv4addr(&domain)?; + Ok(Host::Ipv4(address)) + } else { + Ok(Host::Domain(domain)) + } + } + + // + pub fn parse_opaque(input: &str) -> Result { + if input.starts_with('[') { + if !input.ends_with(']') { + return Err(ParseError::InvalidIpv6Address); + } + return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6); + } + + let is_invalid_host_char = |c| { + matches!( + c, + '\0' | '\t' + | '\n' + | '\r' + | ' ' + | '#' + | '/' + | ':' + | '<' + | '>' + | '?' + | '@' + | '[' + | '\\' + | ']' + | '^' + | '|' + ) + }; + + if input.find(is_invalid_host_char).is_some() { + Err(ParseError::InvalidDomainCharacter) + } else { + Ok(Host::Domain( + utf8_percent_encode(input, CONTROLS).to_string(), + )) + } + } + + /// convert domain with idna + fn domain_to_ascii(domain: &str) -> Result { + idna::domain_to_ascii(domain).map_err(Into::into) + } +} + +impl> fmt::Display for Host { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match *self { + Host::Domain(ref domain) => domain.as_ref().fmt(f), + Host::Ipv4(ref addr) => addr.fmt(f), + Host::Ipv6(ref addr) => { + f.write_str("[")?; + write_ipv6(addr, f)?; + f.write_str("]") + } + } + } +} + +impl PartialEq> for Host +where + S: PartialEq, +{ + fn eq(&self, other: &Host) -> bool { + match (self, other) { + (Host::Domain(a), Host::Domain(b)) => a == b, + (Host::Ipv4(a), Host::Ipv4(b)) => a == b, + (Host::Ipv6(a), Host::Ipv6(b)) => a == b, + (_, _) => false, + } + } +} + +fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter<'_>) -> fmt::Result { + let segments = addr.segments(); + let (compress_start, compress_end) = longest_zero_sequence(&segments); + let mut i = 0; + while i < 8 { + if i == compress_start { + f.write_str(":")?; + if i == 0 { + f.write_str(":")?; + } + if compress_end < 8 { + i = compress_end; + } else { + break; + } + } + write!(f, "{:x}", segments[i as usize])?; + if i < 7 { + f.write_str(":")?; + } + i += 1; + } + Ok(()) +} + +// https://url.spec.whatwg.org/#concept-ipv6-serializer step 2 and 3 +fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) { + let mut longest = -1; + let mut longest_length = -1; + let mut start = -1; + macro_rules! finish_sequence( + ($end: expr) => { + if start >= 0 { + let length = $end - start; + if length > longest_length { + longest = start; + longest_length = length; + } + } + }; + ); + for i in 0..8 { + if pieces[i as usize] == 0 { + if start < 0 { + start = i; + } + } else { + finish_sequence!(i); + start = -1; + } + } + finish_sequence!(8); + // https://url.spec.whatwg.org/#concept-ipv6-serializer + // step 3: ignore lone zeroes + if longest_length < 2 { + (-1, -2) + } else { + (longest, longest + longest_length) + } +} + +/// +fn ends_in_a_number(input: &str) -> bool { + let mut parts = input.rsplit('.'); + let last = parts.next().unwrap(); + let last = if last.is_empty() { + if let Some(last) = parts.next() { + last + } else { + return false; + } + } else { + last + }; + if !last.is_empty() && last.chars().all(|c| ('0'..='9').contains(&c)) { + return true; + } + + parse_ipv4number(last).is_ok() +} + +/// +/// Ok(None) means the input is a valid number, but it overflows a `u32`. +fn parse_ipv4number(mut input: &str) -> Result, ()> { + if input.is_empty() { + return Err(()); + } + + let mut r = 10; + if input.starts_with("0x") || input.starts_with("0X") { + input = &input[2..]; + r = 16; + } else if input.len() >= 2 && input.starts_with('0') { + input = &input[1..]; + r = 8; + } + + if input.is_empty() { + return Ok(Some(0)); + } + + let valid_number = match r { + 8 => input.chars().all(|c| ('0'..='7').contains(&c)), + 10 => input.chars().all(|c| ('0'..='9').contains(&c)), + 16 => input.chars().all(|c| { + ('0'..='9').contains(&c) || ('a'..='f').contains(&c) || ('A'..='F').contains(&c) + }), + _ => false, + }; + if !valid_number { + return Err(()); + } + + match u32::from_str_radix(input, r) { + Ok(num) => Ok(Some(num)), + Err(_) => Ok(None), // The only possible error kind here is an integer overflow. + // The validity of the chars in the input is checked above. + } +} + +/// +fn parse_ipv4addr(input: &str) -> ParseResult { + let mut parts: Vec<&str> = input.split('.').collect(); + if parts.last() == Some(&"") { + parts.pop(); + } + if parts.len() > 4 { + return Err(ParseError::InvalidIpv4Address); + } + let mut numbers: Vec = Vec::new(); + for part in parts { + match parse_ipv4number(part) { + Ok(Some(n)) => numbers.push(n), + Ok(None) => return Err(ParseError::InvalidIpv4Address), // u32 overflow + Err(()) => return Err(ParseError::InvalidIpv4Address), + }; + } + let mut ipv4 = numbers.pop().expect("a non-empty list of numbers"); + // Equivalent to: ipv4 >= 256 ** (4 − numbers.len()) + if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) { + return Err(ParseError::InvalidIpv4Address); + } + if numbers.iter().any(|x| *x > 255) { + return Err(ParseError::InvalidIpv4Address); + } + for (counter, n) in numbers.iter().enumerate() { + ipv4 += n << (8 * (3 - counter as u32)) + } + Ok(Ipv4Addr::from(ipv4)) +} + +/// +fn parse_ipv6addr(input: &str) -> ParseResult { + let input = input.as_bytes(); + let len = input.len(); + let mut is_ip_v4 = false; + let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0]; + let mut piece_pointer = 0; + let mut compress_pointer = None; + let mut i = 0; + + if len < 2 { + return Err(ParseError::InvalidIpv6Address); + } + + if input[0] == b':' { + if input[1] != b':' { + return Err(ParseError::InvalidIpv6Address); + } + i = 2; + piece_pointer = 1; + compress_pointer = Some(1); + } + + while i < len { + if piece_pointer == 8 { + return Err(ParseError::InvalidIpv6Address); + } + if input[i] == b':' { + if compress_pointer.is_some() { + return Err(ParseError::InvalidIpv6Address); + } + i += 1; + piece_pointer += 1; + compress_pointer = Some(piece_pointer); + continue; + } + let start = i; + let end = cmp::min(len, start + 4); + let mut value = 0u16; + while i < end { + match (input[i] as char).to_digit(16) { + Some(digit) => { + value = value * 0x10 + digit as u16; + i += 1; + } + None => break, + } + } + if i < len { + match input[i] { + b'.' => { + if i == start { + return Err(ParseError::InvalidIpv6Address); + } + i = start; + if piece_pointer > 6 { + return Err(ParseError::InvalidIpv6Address); + } + is_ip_v4 = true; + } + b':' => { + i += 1; + if i == len { + return Err(ParseError::InvalidIpv6Address); + } + } + _ => return Err(ParseError::InvalidIpv6Address), + } + } + if is_ip_v4 { + break; + } + pieces[piece_pointer] = value; + piece_pointer += 1; + } + + if is_ip_v4 { + if piece_pointer > 6 { + return Err(ParseError::InvalidIpv6Address); + } + let mut numbers_seen = 0; + while i < len { + if numbers_seen > 0 { + if numbers_seen < 4 && (i < len && input[i] == b'.') { + i += 1 + } else { + return Err(ParseError::InvalidIpv6Address); + } + } + + let mut ipv4_piece = None; + while i < len { + let digit = match input[i] { + c @ b'0'..=b'9' => c - b'0', + _ => break, + }; + match ipv4_piece { + None => ipv4_piece = Some(digit as u16), + Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero + Some(ref mut v) => { + *v = *v * 10 + digit as u16; + if *v > 255 { + return Err(ParseError::InvalidIpv6Address); + } + } + } + i += 1; + } + + pieces[piece_pointer] = if let Some(v) = ipv4_piece { + pieces[piece_pointer] * 0x100 + v + } else { + return Err(ParseError::InvalidIpv6Address); + }; + numbers_seen += 1; + + if numbers_seen == 2 || numbers_seen == 4 { + piece_pointer += 1; + } + } + + if numbers_seen != 4 { + return Err(ParseError::InvalidIpv6Address); + } + } + + if i < len { + return Err(ParseError::InvalidIpv6Address); + } + + match compress_pointer { + Some(compress_pointer) => { + let mut swaps = piece_pointer - compress_pointer; + piece_pointer = 7; + while swaps > 0 { + pieces.swap(piece_pointer, compress_pointer + swaps - 1); + swaps -= 1; + piece_pointer -= 1; + } + } + _ => { + if piece_pointer != 8 { + return Err(ParseError::InvalidIpv6Address); + } + } + } + Ok(Ipv6Addr::new( + pieces[0], pieces[1], pieces[2], pieces[3], pieces[4], pieces[5], pieces[6], pieces[7], + )) +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..6dc09d1 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,2923 @@ +// Copyright 2013-2015 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/*! + +rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/) +for the [Rust](http://rust-lang.org/) programming language. + + +# URL parsing and data structures + +First, URL parsing may fail for various reasons and therefore returns a `Result`. + +``` +use url::{Url, ParseError}; + +assert!(Url::parse("http://[:::1]") == Err(ParseError::InvalidIpv6Address)) +``` + +Let’s parse a valid URL and look at its components. + +``` +use url::{Url, Host, Position}; +# use url::ParseError; +# fn run() -> Result<(), ParseError> { +let issue_list_url = Url::parse( + "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open" +)?; + + +assert!(issue_list_url.scheme() == "https"); +assert!(issue_list_url.username() == ""); +assert!(issue_list_url.password() == None); +assert!(issue_list_url.host_str() == Some("github.com")); +assert!(issue_list_url.host() == Some(Host::Domain("github.com"))); +assert!(issue_list_url.port() == None); +assert!(issue_list_url.path() == "/rust-lang/rust/issues"); +assert!(issue_list_url.path_segments().map(|c| c.collect::>()) == + Some(vec!["rust-lang", "rust", "issues"])); +assert!(issue_list_url.query() == Some("labels=E-easy&state=open")); +assert!(&issue_list_url[Position::BeforePath..] == "/rust-lang/rust/issues?labels=E-easy&state=open"); +assert!(issue_list_url.fragment() == None); +assert!(!issue_list_url.cannot_be_a_base()); +# Ok(()) +# } +# run().unwrap(); +``` + +Some URLs are said to be *cannot-be-a-base*: +they don’t have a username, password, host, or port, +and their "path" is an arbitrary string rather than slash-separated segments: + +``` +use url::Url; +# use url::ParseError; + +# fn run() -> Result<(), ParseError> { +let data_url = Url::parse("data:text/plain,Hello?World#")?; + +assert!(data_url.cannot_be_a_base()); +assert!(data_url.scheme() == "data"); +assert!(data_url.path() == "text/plain,Hello"); +assert!(data_url.path_segments().is_none()); +assert!(data_url.query() == Some("World")); +assert!(data_url.fragment() == Some("")); +# Ok(()) +# } +# run().unwrap(); +``` + +## Serde + +Enable the `serde` feature to include `Deserialize` and `Serialize` implementations for `url::Url`. + +# Base URL + +Many contexts allow URL *references* that can be relative to a *base URL*: + +```html + +``` + +Since parsed URLs are absolute, giving a base is required for parsing relative URLs: + +``` +use url::{Url, ParseError}; + +assert!(Url::parse("../main.css") == Err(ParseError::RelativeUrlWithoutBase)) +``` + +Use the `join` method on an `Url` to use it as a base URL: + +``` +use url::Url; +# use url::ParseError; + +# fn run() -> Result<(), ParseError> { +let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html")?; +let css_url = this_document.join("../main.css")?; +assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css"); +# Ok(()) +# } +# run().unwrap(); +``` + +# Feature: `serde` + +If you enable the `serde` feature, [`Url`](struct.Url.html) will implement +[`serde::Serialize`](https://docs.rs/serde/1/serde/trait.Serialize.html) and +[`serde::Deserialize`](https://docs.rs/serde/1/serde/trait.Deserialize.html). +See [serde documentation](https://serde.rs) for more information. + +```toml +url = { version = "2", features = ["serde"] } +``` + +*/ + +#![doc(html_root_url = "https://docs.rs/url/2.3.1")] +#![cfg_attr( + feature = "debugger_visualizer", + feature(debugger_visualizer), + debugger_visualizer(natvis_file = "../../debug_metadata/url.natvis") +)] + +pub use form_urlencoded; + +#[cfg(feature = "serde")] +extern crate serde; + +use crate::host::HostInternal; +use crate::parser::{to_u32, Context, Parser, SchemeType, PATH_SEGMENT, USERINFO}; +use percent_encoding::{percent_decode, percent_encode, utf8_percent_encode}; +use std::borrow::Borrow; +use std::cmp; +use std::fmt::{self, Write}; +use std::hash; +use std::io; +use std::mem; +use std::net::{IpAddr, SocketAddr, ToSocketAddrs}; +use std::ops::{Range, RangeFrom, RangeTo}; +use std::path::{Path, PathBuf}; +use std::str; + +use std::convert::TryFrom; + +pub use crate::host::Host; +pub use crate::origin::{OpaqueOrigin, Origin}; +pub use crate::parser::{ParseError, SyntaxViolation}; +pub use crate::path_segments::PathSegmentsMut; +pub use crate::slicing::Position; +pub use form_urlencoded::EncodingOverride; + +mod host; +mod origin; +mod parser; +mod path_segments; +mod slicing; + +#[doc(hidden)] +pub mod quirks; + +/// A parsed URL record. +#[derive(Clone)] +pub struct Url { + /// Syntax in pseudo-BNF: + /// + /// url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]? + /// non-hierarchical = non-hierarchical-path + /// non-hierarchical-path = /* Does not start with "/" */ + /// hierarchical = authority? hierarchical-path + /// authority = "//" userinfo? host [ ":" port ]? + /// userinfo = username [ ":" password ]? "@" + /// hierarchical-path = [ "/" path-segment ]+ + serialization: String, + + // Components + scheme_end: u32, // Before ':' + username_end: u32, // Before ':' (if a password is given) or '@' (if not) + host_start: u32, + host_end: u32, + host: HostInternal, + port: Option, + path_start: u32, // Before initial '/', if any + query_start: Option, // Before '?', unlike Position::QueryStart + fragment_start: Option, // Before '#', unlike Position::FragmentStart +} + +/// Full configuration for the URL parser. +#[derive(Copy, Clone)] +pub struct ParseOptions<'a> { + base_url: Option<&'a Url>, + encoding_override: EncodingOverride<'a>, + violation_fn: Option<&'a dyn Fn(SyntaxViolation)>, +} + +impl<'a> ParseOptions<'a> { + /// Change the base URL + pub fn base_url(mut self, new: Option<&'a Url>) -> Self { + self.base_url = new; + self + } + + /// Override the character encoding of query strings. + /// This is a legacy concept only relevant for HTML. + pub fn encoding_override(mut self, new: EncodingOverride<'a>) -> Self { + self.encoding_override = new; + self + } + + /// Call the provided function or closure for a non-fatal `SyntaxViolation` + /// when it occurs during parsing. Note that since the provided function is + /// `Fn`, the caller might need to utilize _interior mutability_, such as with + /// a `RefCell`, to collect the violations. + /// + /// ## Example + /// ``` + /// use std::cell::RefCell; + /// use url::{Url, SyntaxViolation}; + /// # use url::ParseError; + /// # fn run() -> Result<(), url::ParseError> { + /// let violations = RefCell::new(Vec::new()); + /// let url = Url::options() + /// .syntax_violation_callback(Some(&|v| violations.borrow_mut().push(v))) + /// .parse("https:////example.com")?; + /// assert_eq!(url.as_str(), "https://example.com/"); + /// assert_eq!(violations.into_inner(), + /// vec!(SyntaxViolation::ExpectedDoubleSlash)); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self { + self.violation_fn = new; + self + } + + /// Parse an URL string with the configuration so far. + pub fn parse(self, input: &str) -> Result { + Parser { + serialization: String::with_capacity(input.len()), + base_url: self.base_url, + query_encoding_override: self.encoding_override, + violation_fn: self.violation_fn, + context: Context::UrlParser, + } + .parse_url(input) + } +} + +impl Url { + /// Parse an absolute URL from a string. + /// + /// # Examples + /// + /// ```rust + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url = Url::parse("https://example.net")?; + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// + /// # Errors + /// + /// If the function can not parse an absolute URL from the given string, + /// a [`ParseError`] variant will be returned. + /// + /// [`ParseError`]: enum.ParseError.html + #[inline] + pub fn parse(input: &str) -> Result { + Url::options().parse(input) + } + + /// Parse an absolute URL from a string and add params to its query string. + /// + /// Existing params are not removed. + /// + /// # Examples + /// + /// ```rust + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url = Url::parse_with_params("https://example.net?dont=clobberme", + /// &[("lang", "rust"), ("browser", "servo")])?; + /// assert_eq!("https://example.net/?dont=clobberme&lang=rust&browser=servo", url.as_str()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// + /// # Errors + /// + /// If the function can not parse an absolute URL from the given string, + /// a [`ParseError`] variant will be returned. + /// + /// [`ParseError`]: enum.ParseError.html + #[inline] + pub fn parse_with_params(input: &str, iter: I) -> Result + where + I: IntoIterator, + I::Item: Borrow<(K, V)>, + K: AsRef, + V: AsRef, + { + let mut url = Url::options().parse(input); + + if let Ok(ref mut url) = url { + url.query_pairs_mut().extend_pairs(iter); + } + + url + } + + /// Parse a string as an URL, with this URL as the base URL. + /// + /// The inverse of this is [`make_relative`]. + /// + /// Note: a trailing slash is significant. + /// Without it, the last path component is considered to be a “file” name + /// to be removed to get at the “directory” that is used as the base: + /// + /// # Examples + /// + /// ```rust + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let base = Url::parse("https://example.net/a/b.html")?; + /// let url = base.join("c.png")?; + /// assert_eq!(url.as_str(), "https://example.net/a/c.png"); // Not /a/b.html/c.png + /// + /// let base = Url::parse("https://example.net/a/b/")?; + /// let url = base.join("c.png")?; + /// assert_eq!(url.as_str(), "https://example.net/a/b/c.png"); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// + /// # Errors + /// + /// If the function can not parse an URL from the given string + /// with this URL as the base URL, a [`ParseError`] variant will be returned. + /// + /// [`ParseError`]: enum.ParseError.html + /// [`make_relative`]: #method.make_relative + #[inline] + pub fn join(&self, input: &str) -> Result { + Url::options().base_url(Some(self)).parse(input) + } + + /// Creates a relative URL if possible, with this URL as the base URL. + /// + /// This is the inverse of [`join`]. + /// + /// # Examples + /// + /// ```rust + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let base = Url::parse("https://example.net/a/b.html")?; + /// let url = Url::parse("https://example.net/a/c.png")?; + /// let relative = base.make_relative(&url); + /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png")); + /// + /// let base = Url::parse("https://example.net/a/b/")?; + /// let url = Url::parse("https://example.net/a/b/c.png")?; + /// let relative = base.make_relative(&url); + /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png")); + /// + /// let base = Url::parse("https://example.net/a/b/")?; + /// let url = Url::parse("https://example.net/a/d/c.png")?; + /// let relative = base.make_relative(&url); + /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("../d/c.png")); + /// + /// let base = Url::parse("https://example.net/a/b.html?c=d")?; + /// let url = Url::parse("https://example.net/a/b.html?e=f")?; + /// let relative = base.make_relative(&url); + /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("?e=f")); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// + /// # Errors + /// + /// If this URL can't be a base for the given URL, `None` is returned. + /// This is for example the case if the scheme, host or port are not the same. + /// + /// [`join`]: #method.join + pub fn make_relative(&self, url: &Url) -> Option { + if self.cannot_be_a_base() { + return None; + } + + // Scheme, host and port need to be the same + if self.scheme() != url.scheme() || self.host() != url.host() || self.port() != url.port() { + return None; + } + + // We ignore username/password at this point + + // The path has to be transformed + let mut relative = String::new(); + + // Extract the filename of both URIs, these need to be handled separately + fn extract_path_filename(s: &str) -> (&str, &str) { + let last_slash_idx = s.rfind('/').unwrap_or(0); + let (path, filename) = s.split_at(last_slash_idx); + if filename.is_empty() { + (path, "") + } else { + (path, &filename[1..]) + } + } + + let (base_path, base_filename) = extract_path_filename(self.path()); + let (url_path, url_filename) = extract_path_filename(url.path()); + + let mut base_path = base_path.split('/').peekable(); + let mut url_path = url_path.split('/').peekable(); + + // Skip over the common prefix + while base_path.peek().is_some() && base_path.peek() == url_path.peek() { + base_path.next(); + url_path.next(); + } + + // Add `..` segments for the remainder of the base path + for base_path_segment in base_path { + // Skip empty last segments + if base_path_segment.is_empty() { + break; + } + + if !relative.is_empty() { + relative.push('/'); + } + + relative.push_str(".."); + } + + // Append the remainder of the other URI + for url_path_segment in url_path { + if !relative.is_empty() { + relative.push('/'); + } + + relative.push_str(url_path_segment); + } + + // Add the filename if they are not the same + if !relative.is_empty() || base_filename != url_filename { + // If the URIs filename is empty this means that it was a directory + // so we'll have to append a '/'. + // + // Otherwise append it directly as the new filename. + if url_filename.is_empty() { + relative.push('/'); + } else { + if !relative.is_empty() { + relative.push('/'); + } + relative.push_str(url_filename); + } + } + + // Query and fragment are only taken from the other URI + if let Some(query) = url.query() { + relative.push('?'); + relative.push_str(query); + } + + if let Some(fragment) = url.fragment() { + relative.push('#'); + relative.push_str(fragment); + } + + Some(relative) + } + + /// Return a default `ParseOptions` that can fully configure the URL parser. + /// + /// # Examples + /// + /// Get default `ParseOptions`, then change base url + /// + /// ```rust + /// use url::Url; + /// # use url::ParseError; + /// # fn run() -> Result<(), ParseError> { + /// let options = Url::options(); + /// let api = Url::parse("https://api.example.com")?; + /// let base_url = options.base_url(Some(&api)); + /// let version_url = base_url.parse("version.json")?; + /// assert_eq!(version_url.as_str(), "https://api.example.com/version.json"); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + pub fn options<'a>() -> ParseOptions<'a> { + ParseOptions { + base_url: None, + encoding_override: None, + violation_fn: None, + } + } + + /// Return the serialization of this URL. + /// + /// This is fast since that serialization is already stored in the `Url` struct. + /// + /// # Examples + /// + /// ```rust + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url_str = "https://example.net/"; + /// let url = Url::parse(url_str)?; + /// assert_eq!(url.as_str(), url_str); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + #[inline] + pub fn as_str(&self) -> &str { + &self.serialization + } + + /// Return the serialization of this URL. + /// + /// This consumes the `Url` and takes ownership of the `String` stored in it. + /// + /// # Examples + /// + /// ```rust + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url_str = "https://example.net/"; + /// let url = Url::parse(url_str)?; + /// assert_eq!(String::from(url), url_str); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + #[inline] + #[deprecated(since = "2.3.0", note = "use Into")] + pub fn into_string(self) -> String { + self.into() + } + + /// For internal testing, not part of the public API. + /// + /// Methods of the `Url` struct assume a number of invariants. + /// This checks each of these invariants and panic if one is not met. + /// This is for testing rust-url itself. + #[doc(hidden)] + pub fn check_invariants(&self) -> Result<(), String> { + macro_rules! assert { + ($x: expr) => { + if !$x { + return Err(format!( + "!( {} ) for URL {:?}", + stringify!($x), + self.serialization + )); + } + }; + } + + macro_rules! assert_eq { + ($a: expr, $b: expr) => { + { + let a = $a; + let b = $b; + if a != b { + return Err(format!("{:?} != {:?} ({} != {}) for URL {:?}", + a, b, stringify!($a), stringify!($b), + self.serialization)) + } + } + } + } + + assert!(self.scheme_end >= 1); + assert!(matches!(self.byte_at(0), b'a'..=b'z' | b'A'..=b'Z')); + assert!(self + .slice(1..self.scheme_end) + .chars() + .all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.'))); + assert_eq!(self.byte_at(self.scheme_end), b':'); + + if self.slice(self.scheme_end + 1..).starts_with("//") { + // URL with authority + if self.username_end != self.serialization.len() as u32 { + match self.byte_at(self.username_end) { + b':' => { + assert!(self.host_start >= self.username_end + 2); + assert_eq!(self.byte_at(self.host_start - 1), b'@'); + } + b'@' => assert!(self.host_start == self.username_end + 1), + _ => assert_eq!(self.username_end, self.scheme_end + 3), + } + } + assert!(self.host_start >= self.username_end); + assert!(self.host_end >= self.host_start); + let host_str = self.slice(self.host_start..self.host_end); + match self.host { + HostInternal::None => assert_eq!(host_str, ""), + HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()), + HostInternal::Ipv6(address) => { + let h: Host = Host::Ipv6(address); + assert_eq!(host_str, h.to_string()) + } + HostInternal::Domain => { + if SchemeType::from(self.scheme()).is_special() { + assert!(!host_str.is_empty()) + } + } + } + if self.path_start == self.host_end { + assert_eq!(self.port, None); + } else { + assert_eq!(self.byte_at(self.host_end), b':'); + let port_str = self.slice(self.host_end + 1..self.path_start); + assert_eq!( + self.port, + Some(port_str.parse::().expect("Couldn't parse port?")) + ); + } + assert!( + self.path_start as usize == self.serialization.len() + || matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?') + ); + } else { + // Anarchist URL (no authority) + assert_eq!(self.username_end, self.scheme_end + 1); + assert_eq!(self.host_start, self.scheme_end + 1); + assert_eq!(self.host_end, self.scheme_end + 1); + assert_eq!(self.host, HostInternal::None); + assert_eq!(self.port, None); + assert_eq!(self.path_start, self.scheme_end + 1); + } + if let Some(start) = self.query_start { + assert!(start >= self.path_start); + assert_eq!(self.byte_at(start), b'?'); + } + if let Some(start) = self.fragment_start { + assert!(start >= self.path_start); + assert_eq!(self.byte_at(start), b'#'); + } + if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) { + assert!(fragment_start > query_start); + } + + let other = Url::parse(self.as_str()).expect("Failed to parse myself?"); + assert_eq!(&self.serialization, &other.serialization); + assert_eq!(self.scheme_end, other.scheme_end); + assert_eq!(self.username_end, other.username_end); + assert_eq!(self.host_start, other.host_start); + assert_eq!(self.host_end, other.host_end); + assert!( + self.host == other.host || + // XXX No host round-trips to empty host. + // See https://github.com/whatwg/url/issues/79 + (self.host_str(), other.host_str()) == (None, Some("")) + ); + assert_eq!(self.port, other.port); + assert_eq!(self.path_start, other.path_start); + assert_eq!(self.query_start, other.query_start); + assert_eq!(self.fragment_start, other.fragment_start); + Ok(()) + } + + /// Return the origin of this URL () + /// + /// Note: this returns an opaque origin for `file:` URLs, which causes + /// `url.origin() != url.origin()`. + /// + /// # Examples + /// + /// URL with `ftp` scheme: + /// + /// ```rust + /// use url::{Host, Origin, Url}; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url = Url::parse("ftp://example.com/foo")?; + /// assert_eq!(url.origin(), + /// Origin::Tuple("ftp".into(), + /// Host::Domain("example.com".into()), + /// 21)); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// + /// URL with `blob` scheme: + /// + /// ```rust + /// use url::{Host, Origin, Url}; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url = Url::parse("blob:https://example.com/foo")?; + /// assert_eq!(url.origin(), + /// Origin::Tuple("https".into(), + /// Host::Domain("example.com".into()), + /// 443)); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// + /// URL with `file` scheme: + /// + /// ```rust + /// use url::{Host, Origin, Url}; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url = Url::parse("file:///tmp/foo")?; + /// assert!(!url.origin().is_tuple()); + /// + /// let other_url = Url::parse("file:///tmp/foo")?; + /// assert!(url.origin() != other_url.origin()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// + /// URL with other scheme: + /// + /// ```rust + /// use url::{Host, Origin, Url}; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url = Url::parse("foo:bar")?; + /// assert!(!url.origin().is_tuple()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + #[inline] + pub fn origin(&self) -> Origin { + origin::url_origin(self) + } + + /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter. + /// + /// # Examples + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url = Url::parse("file:///tmp/foo")?; + /// assert_eq!(url.scheme(), "file"); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + #[inline] + pub fn scheme(&self) -> &str { + self.slice(..self.scheme_end) + } + + /// Return whether the URL has an 'authority', + /// which can contain a username, password, host, and port number. + /// + /// URLs that do *not* are either path-only like `unix:/run/foo.socket` + /// or cannot-be-a-base like `data:text/plain,Stuff`. + /// + /// # Examples + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url = Url::parse("ftp://rms@example.com")?; + /// assert!(url.has_authority()); + /// + /// let url = Url::parse("unix:/run/foo.socket")?; + /// assert!(!url.has_authority()); + /// + /// let url = Url::parse("data:text/plain,Stuff")?; + /// assert!(!url.has_authority()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + #[inline] + pub fn has_authority(&self) -> bool { + debug_assert!(self.byte_at(self.scheme_end) == b':'); + self.slice(self.scheme_end..).starts_with("://") + } + + /// Return whether this URL is a cannot-be-a-base URL, + /// meaning that parsing a relative URL string with this URL as the base will return an error. + /// + /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash, + /// as is typically the case of `data:` and `mailto:` URLs. + /// + /// # Examples + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url = Url::parse("ftp://rms@example.com")?; + /// assert!(!url.cannot_be_a_base()); + /// + /// let url = Url::parse("unix:/run/foo.socket")?; + /// assert!(!url.cannot_be_a_base()); + /// + /// let url = Url::parse("data:text/plain,Stuff")?; + /// assert!(url.cannot_be_a_base()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + #[inline] + pub fn cannot_be_a_base(&self) -> bool { + !self.slice(self.scheme_end + 1..).starts_with('/') + } + + /// Return the username for this URL (typically the empty string) + /// as a percent-encoded ASCII string. + /// + /// # Examples + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url = Url::parse("ftp://rms@example.com")?; + /// assert_eq!(url.username(), "rms"); + /// + /// let url = Url::parse("ftp://:secret123@example.com")?; + /// assert_eq!(url.username(), ""); + /// + /// let url = Url::parse("https://example.com")?; + /// assert_eq!(url.username(), ""); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + pub fn username(&self) -> &str { + let scheme_separator_len = "://".len() as u32; + if self.has_authority() && self.username_end > self.scheme_end + scheme_separator_len { + self.slice(self.scheme_end + scheme_separator_len..self.username_end) + } else { + "" + } + } + + /// Return the password for this URL, if any, as a percent-encoded ASCII string. + /// + /// # Examples + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url = Url::parse("ftp://rms:secret123@example.com")?; + /// assert_eq!(url.password(), Some("secret123")); + /// + /// let url = Url::parse("ftp://:secret123@example.com")?; + /// assert_eq!(url.password(), Some("secret123")); + /// + /// let url = Url::parse("ftp://rms@example.com")?; + /// assert_eq!(url.password(), None); + /// + /// let url = Url::parse("https://example.com")?; + /// assert_eq!(url.password(), None); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + pub fn password(&self) -> Option<&str> { + // This ':' is not the one marking a port number since a host can not be empty. + // (Except for file: URLs, which do not have port numbers.) + if self.has_authority() + && self.username_end != self.serialization.len() as u32 + && self.byte_at(self.username_end) == b':' + { + debug_assert!(self.byte_at(self.host_start - 1) == b'@'); + Some(self.slice(self.username_end + 1..self.host_start - 1)) + } else { + None + } + } + + /// Equivalent to `url.host().is_some()`. + /// + /// # Examples + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url = Url::parse("ftp://rms@example.com")?; + /// assert!(url.has_host()); + /// + /// let url = Url::parse("unix:/run/foo.socket")?; + /// assert!(!url.has_host()); + /// + /// let url = Url::parse("data:text/plain,Stuff")?; + /// assert!(!url.has_host()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + pub fn has_host(&self) -> bool { + !matches!(self.host, HostInternal::None) + } + + /// Return the string representation of the host (domain or IP address) for this URL, if any. + /// + /// Non-ASCII domains are punycode-encoded per IDNA if this is the host + /// of a special URL, or percent encoded for non-special URLs. + /// IPv6 addresses are given between `[` and `]` brackets. + /// + /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs + /// don’t have a host. + /// + /// See also the `host` method. + /// + /// # Examples + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url = Url::parse("https://127.0.0.1/index.html")?; + /// assert_eq!(url.host_str(), Some("127.0.0.1")); + /// + /// let url = Url::parse("ftp://rms@example.com")?; + /// assert_eq!(url.host_str(), Some("example.com")); + /// + /// let url = Url::parse("unix:/run/foo.socket")?; + /// assert_eq!(url.host_str(), None); + /// + /// let url = Url::parse("data:text/plain,Stuff")?; + /// assert_eq!(url.host_str(), None); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + pub fn host_str(&self) -> Option<&str> { + if self.has_host() { + Some(self.slice(self.host_start..self.host_end)) + } else { + None + } + } + + /// Return the parsed representation of the host for this URL. + /// Non-ASCII domain labels are punycode-encoded per IDNA if this is the host + /// of a special URL, or percent encoded for non-special URLs. + /// + /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs + /// don’t have a host. + /// + /// See also the `host_str` method. + /// + /// # Examples + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url = Url::parse("https://127.0.0.1/index.html")?; + /// assert!(url.host().is_some()); + /// + /// let url = Url::parse("ftp://rms@example.com")?; + /// assert!(url.host().is_some()); + /// + /// let url = Url::parse("unix:/run/foo.socket")?; + /// assert!(url.host().is_none()); + /// + /// let url = Url::parse("data:text/plain,Stuff")?; + /// assert!(url.host().is_none()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + pub fn host(&self) -> Option> { + match self.host { + HostInternal::None => None, + HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))), + HostInternal::Ipv4(address) => Some(Host::Ipv4(address)), + HostInternal::Ipv6(address) => Some(Host::Ipv6(address)), + } + } + + /// If this URL has a host and it is a domain name (not an IP address), return it. + /// Non-ASCII domains are punycode-encoded per IDNA if this is the host + /// of a special URL, or percent encoded for non-special URLs. + /// + /// # Examples + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url = Url::parse("https://127.0.0.1/")?; + /// assert_eq!(url.domain(), None); + /// + /// let url = Url::parse("mailto:rms@example.net")?; + /// assert_eq!(url.domain(), None); + /// + /// let url = Url::parse("https://example.com/")?; + /// assert_eq!(url.domain(), Some("example.com")); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + pub fn domain(&self) -> Option<&str> { + match self.host { + HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)), + _ => None, + } + } + + /// Return the port number for this URL, if any. + /// + /// Note that default port numbers are never reflected by the serialization, + /// use the `port_or_known_default()` method if you want a default port number returned. + /// + /// # Examples + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url = Url::parse("https://example.com")?; + /// assert_eq!(url.port(), None); + /// + /// let url = Url::parse("https://example.com:443/")?; + /// assert_eq!(url.port(), None); + /// + /// let url = Url::parse("ssh://example.com:22")?; + /// assert_eq!(url.port(), Some(22)); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + #[inline] + pub fn port(&self) -> Option { + self.port + } + + /// Return the port number for this URL, or the default port number if it is known. + /// + /// This method only knows the default port number + /// of the `http`, `https`, `ws`, `wss` and `ftp` schemes. + /// + /// For URLs in these schemes, this method always returns `Some(_)`. + /// For other schemes, it is the same as `Url::port()`. + /// + /// # Examples + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url = Url::parse("foo://example.com")?; + /// assert_eq!(url.port_or_known_default(), None); + /// + /// let url = Url::parse("foo://example.com:1456")?; + /// assert_eq!(url.port_or_known_default(), Some(1456)); + /// + /// let url = Url::parse("https://example.com")?; + /// assert_eq!(url.port_or_known_default(), Some(443)); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + #[inline] + pub fn port_or_known_default(&self) -> Option { + self.port.or_else(|| parser::default_port(self.scheme())) + } + + /// Resolve a URL’s host and port number to `SocketAddr`. + /// + /// If the URL has the default port number of a scheme that is unknown to this library, + /// `default_port_number` provides an opportunity to provide the actual port number. + /// In non-example code this should be implemented either simply as `|| None`, + /// or by matching on the URL’s `.scheme()`. + /// + /// If the host is a domain, it is resolved using the standard library’s DNS support. + /// + /// # Examples + /// + /// ```no_run + /// let url = url::Url::parse("https://example.net/").unwrap(); + /// let addrs = url.socket_addrs(|| None).unwrap(); + /// std::net::TcpStream::connect(&*addrs) + /// # ; + /// ``` + /// + /// ``` + /// /// With application-specific known default port numbers + /// fn socket_addrs(url: url::Url) -> std::io::Result> { + /// url.socket_addrs(|| match url.scheme() { + /// "socks5" | "socks5h" => Some(1080), + /// _ => None, + /// }) + /// } + /// ``` + pub fn socket_addrs( + &self, + default_port_number: impl Fn() -> Option, + ) -> io::Result> { + // Note: trying to avoid the Vec allocation by returning `impl AsRef<[SocketAddr]>` + // causes borrowck issues because the return value borrows `default_port_number`: + // + // https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md#scoping-for-type-and-lifetime-parameters + // + // > This RFC proposes that *all* type parameters are considered in scope + // > for `impl Trait` in return position + + fn io_result(opt: Option, message: &str) -> io::Result { + opt.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, message)) + } + + let host = io_result(self.host(), "No host name in the URL")?; + let port = io_result( + self.port_or_known_default().or_else(default_port_number), + "No port number in the URL", + )?; + Ok(match host { + Host::Domain(domain) => (domain, port).to_socket_addrs()?.collect(), + Host::Ipv4(ip) => vec![(ip, port).into()], + Host::Ipv6(ip) => vec![(ip, port).into()], + }) + } + + /// Return the path for this URL, as a percent-encoded ASCII string. + /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'. + /// For other URLs, this starts with a '/' slash + /// and continues with slash-separated path segments. + /// + /// # Examples + /// + /// ```rust + /// use url::{Url, ParseError}; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url = Url::parse("https://example.com/api/versions?page=2")?; + /// assert_eq!(url.path(), "/api/versions"); + /// + /// let url = Url::parse("https://example.com")?; + /// assert_eq!(url.path(), "/"); + /// + /// let url = Url::parse("https://example.com/countries/việt nam")?; + /// assert_eq!(url.path(), "/countries/vi%E1%BB%87t%20nam"); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + pub fn path(&self) -> &str { + match (self.query_start, self.fragment_start) { + (None, None) => self.slice(self.path_start..), + (Some(next_component_start), _) | (None, Some(next_component_start)) => { + self.slice(self.path_start..next_component_start) + } + } + } + + /// Unless this URL is cannot-be-a-base, + /// return an iterator of '/' slash-separated path segments, + /// each as a percent-encoded ASCII string. + /// + /// Return `None` for cannot-be-a-base URLs. + /// + /// When `Some` is returned, the iterator always contains at least one string + /// (which may be empty). + /// + /// # Examples + /// + /// ``` + /// use url::Url; + /// # use std::error::Error; + /// + /// # fn run() -> Result<(), Box> { + /// let url = Url::parse("https://example.com/foo/bar")?; + /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?; + /// assert_eq!(path_segments.next(), Some("foo")); + /// assert_eq!(path_segments.next(), Some("bar")); + /// assert_eq!(path_segments.next(), None); + /// + /// let url = Url::parse("https://example.com")?; + /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?; + /// assert_eq!(path_segments.next(), Some("")); + /// assert_eq!(path_segments.next(), None); + /// + /// let url = Url::parse("data:text/plain,HelloWorld")?; + /// assert!(url.path_segments().is_none()); + /// + /// let url = Url::parse("https://example.com/countries/việt nam")?; + /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?; + /// assert_eq!(path_segments.next(), Some("countries")); + /// assert_eq!(path_segments.next(), Some("vi%E1%BB%87t%20nam")); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + pub fn path_segments(&self) -> Option> { + let path = self.path(); + path.strip_prefix('/').map(|remainder| remainder.split('/')) + } + + /// Return this URL’s query string, if any, as a percent-encoded ASCII string. + /// + /// # Examples + /// + /// ```rust + /// use url::Url; + /// # use url::ParseError; + /// + /// fn run() -> Result<(), ParseError> { + /// let url = Url::parse("https://example.com/products?page=2")?; + /// let query = url.query(); + /// assert_eq!(query, Some("page=2")); + /// + /// let url = Url::parse("https://example.com/products")?; + /// let query = url.query(); + /// assert!(query.is_none()); + /// + /// let url = Url::parse("https://example.com/?country=español")?; + /// let query = url.query(); + /// assert_eq!(query, Some("country=espa%C3%B1ol")); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + pub fn query(&self) -> Option<&str> { + match (self.query_start, self.fragment_start) { + (None, _) => None, + (Some(query_start), None) => { + debug_assert!(self.byte_at(query_start) == b'?'); + Some(self.slice(query_start + 1..)) + } + (Some(query_start), Some(fragment_start)) => { + debug_assert!(self.byte_at(query_start) == b'?'); + Some(self.slice(query_start + 1..fragment_start)) + } + } + } + + /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded` + /// and return an iterator of (key, value) pairs. + /// + /// # Examples + /// + /// ```rust + /// use std::borrow::Cow; + /// + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url = Url::parse("https://example.com/products?page=2&sort=desc")?; + /// let mut pairs = url.query_pairs(); + /// + /// assert_eq!(pairs.count(), 2); + /// + /// assert_eq!(pairs.next(), Some((Cow::Borrowed("page"), Cow::Borrowed("2")))); + /// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort"), Cow::Borrowed("desc")))); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + + #[inline] + pub fn query_pairs(&self) -> form_urlencoded::Parse<'_> { + form_urlencoded::parse(self.query().unwrap_or("").as_bytes()) + } + + /// Return this URL’s fragment identifier, if any. + /// + /// A fragment is the part of the URL after the `#` symbol. + /// The fragment is optional and, if present, contains a fragment identifier + /// that identifies a secondary resource, such as a section heading + /// of a document. + /// + /// In HTML, the fragment identifier is usually the id attribute of a an element + /// that is scrolled to on load. Browsers typically will not send the fragment portion + /// of a URL to the server. + /// + /// **Note:** the parser did *not* percent-encode this component, + /// but the input may have been percent-encoded already. + /// + /// # Examples + /// + /// ```rust + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url = Url::parse("https://example.com/data.csv#row=4")?; + /// + /// assert_eq!(url.fragment(), Some("row=4")); + /// + /// let url = Url::parse("https://example.com/data.csv#cell=4,1-6,2")?; + /// + /// assert_eq!(url.fragment(), Some("cell=4,1-6,2")); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + pub fn fragment(&self) -> Option<&str> { + self.fragment_start.map(|start| { + debug_assert!(self.byte_at(start) == b'#'); + self.slice(start + 1..) + }) + } + + fn mutate) -> R, R>(&mut self, f: F) -> R { + let mut parser = Parser::for_setter(mem::take(&mut self.serialization)); + let result = f(&mut parser); + self.serialization = parser.serialization; + result + } + + /// Change this URL’s fragment identifier. + /// + /// # Examples + /// + /// ```rust + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("https://example.com/data.csv")?; + /// assert_eq!(url.as_str(), "https://example.com/data.csv"); + + /// url.set_fragment(Some("cell=4,1-6,2")); + /// assert_eq!(url.as_str(), "https://example.com/data.csv#cell=4,1-6,2"); + /// assert_eq!(url.fragment(), Some("cell=4,1-6,2")); + /// + /// url.set_fragment(None); + /// assert_eq!(url.as_str(), "https://example.com/data.csv"); + /// assert!(url.fragment().is_none()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + pub fn set_fragment(&mut self, fragment: Option<&str>) { + // Remove any previous fragment + if let Some(start) = self.fragment_start { + debug_assert!(self.byte_at(start) == b'#'); + self.serialization.truncate(start as usize); + } + // Write the new one + if let Some(input) = fragment { + self.fragment_start = Some(to_u32(self.serialization.len()).unwrap()); + self.serialization.push('#'); + self.mutate(|parser| parser.parse_fragment(parser::Input::no_trim(input))) + } else { + self.fragment_start = None + } + } + + fn take_fragment(&mut self) -> Option { + self.fragment_start.take().map(|start| { + debug_assert!(self.byte_at(start) == b'#'); + let fragment = self.slice(start + 1..).to_owned(); + self.serialization.truncate(start as usize); + fragment + }) + } + + fn restore_already_parsed_fragment(&mut self, fragment: Option) { + if let Some(ref fragment) = fragment { + assert!(self.fragment_start.is_none()); + self.fragment_start = Some(to_u32(self.serialization.len()).unwrap()); + self.serialization.push('#'); + self.serialization.push_str(fragment); + } + } + + /// Change this URL’s query string. + /// + /// # Examples + /// + /// ```rust + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("https://example.com/products")?; + /// assert_eq!(url.as_str(), "https://example.com/products"); + /// + /// url.set_query(Some("page=2")); + /// assert_eq!(url.as_str(), "https://example.com/products?page=2"); + /// assert_eq!(url.query(), Some("page=2")); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + pub fn set_query(&mut self, query: Option<&str>) { + let fragment = self.take_fragment(); + + // Remove any previous query + if let Some(start) = self.query_start.take() { + debug_assert!(self.byte_at(start) == b'?'); + self.serialization.truncate(start as usize); + } + // Write the new query, if any + if let Some(input) = query { + self.query_start = Some(to_u32(self.serialization.len()).unwrap()); + self.serialization.push('?'); + let scheme_type = SchemeType::from(self.scheme()); + let scheme_end = self.scheme_end; + self.mutate(|parser| { + let vfn = parser.violation_fn; + parser.parse_query( + scheme_type, + scheme_end, + parser::Input::trim_tab_and_newlines(input, vfn), + ) + }); + } + + self.restore_already_parsed_fragment(fragment); + } + + /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs + /// in `application/x-www-form-urlencoded` syntax. + /// + /// The return value has a method-chaining API: + /// + /// ```rust + /// # use url::{Url, ParseError}; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("https://example.net?lang=fr#nav")?; + /// assert_eq!(url.query(), Some("lang=fr")); + /// + /// url.query_pairs_mut().append_pair("foo", "bar"); + /// assert_eq!(url.query(), Some("lang=fr&foo=bar")); + /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav"); + /// + /// url.query_pairs_mut() + /// .clear() + /// .append_pair("foo", "bar & baz") + /// .append_pair("saisons", "\u{00C9}t\u{00E9}+hiver"); + /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver")); + /// assert_eq!(url.as_str(), + /// "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav"); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// + /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`, + /// not `url.set_query(None)`. + /// + /// The state of `Url` is unspecified if this return value is leaked without being dropped. + pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>> { + let fragment = self.take_fragment(); + + let query_start; + if let Some(start) = self.query_start { + debug_assert!(self.byte_at(start) == b'?'); + query_start = start as usize; + } else { + query_start = self.serialization.len(); + self.query_start = Some(to_u32(query_start).unwrap()); + self.serialization.push('?'); + } + + let query = UrlQuery { + url: Some(self), + fragment, + }; + form_urlencoded::Serializer::for_suffix(query, query_start + "?".len()) + } + + fn take_after_path(&mut self) -> String { + match (self.query_start, self.fragment_start) { + (Some(i), _) | (None, Some(i)) => { + let after_path = self.slice(i..).to_owned(); + self.serialization.truncate(i as usize); + after_path + } + (None, None) => String::new(), + } + } + + /// Change this URL’s path. + /// + /// # Examples + /// + /// ```rust + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("https://example.com")?; + /// url.set_path("api/comments"); + /// assert_eq!(url.as_str(), "https://example.com/api/comments"); + /// assert_eq!(url.path(), "/api/comments"); + /// + /// let mut url = Url::parse("https://example.com/api")?; + /// url.set_path("data/report.csv"); + /// assert_eq!(url.as_str(), "https://example.com/data/report.csv"); + /// assert_eq!(url.path(), "/data/report.csv"); + /// + /// // `set_path` percent-encodes the given string if it's not already percent-encoded. + /// let mut url = Url::parse("https://example.com")?; + /// url.set_path("api/some comments"); + /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments"); + /// assert_eq!(url.path(), "/api/some%20comments"); + /// + /// // `set_path` will not double percent-encode the string if it's already percent-encoded. + /// let mut url = Url::parse("https://example.com")?; + /// url.set_path("api/some%20comments"); + /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments"); + /// assert_eq!(url.path(), "/api/some%20comments"); + /// + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + pub fn set_path(&mut self, mut path: &str) { + let after_path = self.take_after_path(); + let old_after_path_pos = to_u32(self.serialization.len()).unwrap(); + let cannot_be_a_base = self.cannot_be_a_base(); + let scheme_type = SchemeType::from(self.scheme()); + self.serialization.truncate(self.path_start as usize); + self.mutate(|parser| { + if cannot_be_a_base { + if path.starts_with('/') { + parser.serialization.push_str("%2F"); + path = &path[1..]; + } + parser.parse_cannot_be_a_base_path(parser::Input::new(path)); + } else { + let mut has_host = true; // FIXME + parser.parse_path_start(scheme_type, &mut has_host, parser::Input::new(path)); + } + }); + self.restore_after_path(old_after_path_pos, &after_path); + } + + /// Return an object with methods to manipulate this URL’s path segments. + /// + /// Return `Err(())` if this URL is cannot-be-a-base. + #[allow(clippy::result_unit_err)] + pub fn path_segments_mut(&mut self) -> Result, ()> { + if self.cannot_be_a_base() { + Err(()) + } else { + Ok(path_segments::new(self)) + } + } + + fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) { + let new_after_path_position = to_u32(self.serialization.len()).unwrap(); + let adjust = |index: &mut u32| { + *index -= old_after_path_position; + *index += new_after_path_position; + }; + if let Some(ref mut index) = self.query_start { + adjust(index) + } + if let Some(ref mut index) = self.fragment_start { + adjust(index) + } + self.serialization.push_str(after_path) + } + + /// Change this URL’s port number. + /// + /// Note that default port numbers are not reflected in the serialization. + /// + /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme; + /// do nothing and return `Err`. + /// + /// # Examples + /// + /// ``` + /// use url::Url; + /// # use std::error::Error; + /// + /// # fn run() -> Result<(), Box> { + /// let mut url = Url::parse("ssh://example.net:2048/")?; + /// + /// url.set_port(Some(4096)).map_err(|_| "cannot be base")?; + /// assert_eq!(url.as_str(), "ssh://example.net:4096/"); + /// + /// url.set_port(None).map_err(|_| "cannot be base")?; + /// assert_eq!(url.as_str(), "ssh://example.net/"); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// + /// Known default port numbers are not reflected: + /// + /// ```rust + /// use url::Url; + /// # use std::error::Error; + /// + /// # fn run() -> Result<(), Box> { + /// let mut url = Url::parse("https://example.org/")?; + /// + /// url.set_port(Some(443)).map_err(|_| "cannot be base")?; + /// assert!(url.port().is_none()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// + /// Cannot set port for cannot-be-a-base URLs: + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("mailto:rms@example.net")?; + /// + /// let result = url.set_port(Some(80)); + /// assert!(result.is_err()); + /// + /// let result = url.set_port(None); + /// assert!(result.is_err()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + #[allow(clippy::result_unit_err)] + pub fn set_port(&mut self, mut port: Option) -> Result<(), ()> { + // has_host implies !cannot_be_a_base + if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" { + return Err(()); + } + if port.is_some() && port == parser::default_port(self.scheme()) { + port = None + } + self.set_port_internal(port); + Ok(()) + } + + fn set_port_internal(&mut self, port: Option) { + match (self.port, port) { + (None, None) => {} + (Some(_), None) => { + self.serialization + .drain(self.host_end as usize..self.path_start as usize); + let offset = self.path_start - self.host_end; + self.path_start = self.host_end; + if let Some(ref mut index) = self.query_start { + *index -= offset + } + if let Some(ref mut index) = self.fragment_start { + *index -= offset + } + } + (Some(old), Some(new)) if old == new => {} + (_, Some(new)) => { + let path_and_after = self.slice(self.path_start..).to_owned(); + self.serialization.truncate(self.host_end as usize); + write!(&mut self.serialization, ":{}", new).unwrap(); + let old_path_start = self.path_start; + let new_path_start = to_u32(self.serialization.len()).unwrap(); + self.path_start = new_path_start; + let adjust = |index: &mut u32| { + *index -= old_path_start; + *index += new_path_start; + }; + if let Some(ref mut index) = self.query_start { + adjust(index) + } + if let Some(ref mut index) = self.fragment_start { + adjust(index) + } + self.serialization.push_str(&path_and_after); + } + } + self.port = port; + } + + /// Change this URL’s host. + /// + /// Removing the host (calling this with `None`) + /// will also remove any username, password, and port number. + /// + /// # Examples + /// + /// Change host: + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("https://example.net")?; + /// let result = url.set_host(Some("rust-lang.org")); + /// assert!(result.is_ok()); + /// assert_eq!(url.as_str(), "https://rust-lang.org/"); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// + /// Remove host: + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("foo://example.net")?; + /// let result = url.set_host(None); + /// assert!(result.is_ok()); + /// assert_eq!(url.as_str(), "foo:/"); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// + /// Cannot remove host for 'special' schemes (e.g. `http`): + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("https://example.net")?; + /// let result = url.set_host(None); + /// assert!(result.is_err()); + /// assert_eq!(url.as_str(), "https://example.net/"); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// + /// Cannot change or remove host for cannot-be-a-base URLs: + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("mailto:rms@example.net")?; + /// + /// let result = url.set_host(Some("rust-lang.org")); + /// assert!(result.is_err()); + /// assert_eq!(url.as_str(), "mailto:rms@example.net"); + /// + /// let result = url.set_host(None); + /// assert!(result.is_err()); + /// assert_eq!(url.as_str(), "mailto:rms@example.net"); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// + /// # Errors + /// + /// If this URL is cannot-be-a-base or there is an error parsing the given `host`, + /// a [`ParseError`] variant will be returned. + /// + /// [`ParseError`]: enum.ParseError.html + pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> { + if self.cannot_be_a_base() { + return Err(ParseError::SetHostOnCannotBeABaseUrl); + } + + let scheme_type = SchemeType::from(self.scheme()); + + if let Some(host) = host { + if host.is_empty() && scheme_type.is_special() && !scheme_type.is_file() { + return Err(ParseError::EmptyHost); + } + let mut host_substr = host; + // Otherwise, if c is U+003A (:) and the [] flag is unset, then + if !host.starts_with('[') || !host.ends_with(']') { + match host.find(':') { + Some(0) => { + // If buffer is the empty string, validation error, return failure. + return Err(ParseError::InvalidDomainCharacter); + } + // Let host be the result of host parsing buffer + Some(colon_index) => { + host_substr = &host[..colon_index]; + } + None => {} + } + } + if SchemeType::from(self.scheme()).is_special() { + self.set_host_internal(Host::parse(host_substr)?, None); + } else { + self.set_host_internal(Host::parse_opaque(host_substr)?, None); + } + } else if self.has_host() { + if scheme_type.is_special() && !scheme_type.is_file() { + return Err(ParseError::EmptyHost); + } else if self.serialization.len() == self.path_start as usize { + self.serialization.push('/'); + } + debug_assert!(self.byte_at(self.scheme_end) == b':'); + debug_assert!(self.byte_at(self.path_start) == b'/'); + + let new_path_start = if scheme_type.is_file() { + self.scheme_end + 3 + } else { + self.scheme_end + 1 + }; + + self.serialization + .drain(new_path_start as usize..self.path_start as usize); + let offset = self.path_start - new_path_start; + self.path_start = new_path_start; + self.username_end = new_path_start; + self.host_start = new_path_start; + self.host_end = new_path_start; + self.port = None; + if let Some(ref mut index) = self.query_start { + *index -= offset + } + if let Some(ref mut index) = self.fragment_start { + *index -= offset + } + } + Ok(()) + } + + /// opt_new_port: None means leave unchanged, Some(None) means remove any port number. + fn set_host_internal(&mut self, host: Host, opt_new_port: Option>) { + let old_suffix_pos = if opt_new_port.is_some() { + self.path_start + } else { + self.host_end + }; + let suffix = self.slice(old_suffix_pos..).to_owned(); + self.serialization.truncate(self.host_start as usize); + if !self.has_authority() { + debug_assert!(self.slice(self.scheme_end..self.host_start) == ":"); + debug_assert!(self.username_end == self.host_start); + self.serialization.push('/'); + self.serialization.push('/'); + self.username_end += 2; + self.host_start += 2; + } + write!(&mut self.serialization, "{}", host).unwrap(); + self.host_end = to_u32(self.serialization.len()).unwrap(); + self.host = host.into(); + + if let Some(new_port) = opt_new_port { + self.port = new_port; + if let Some(port) = new_port { + write!(&mut self.serialization, ":{}", port).unwrap(); + } + } + let new_suffix_pos = to_u32(self.serialization.len()).unwrap(); + self.serialization.push_str(&suffix); + + let adjust = |index: &mut u32| { + *index -= old_suffix_pos; + *index += new_suffix_pos; + }; + adjust(&mut self.path_start); + if let Some(ref mut index) = self.query_start { + adjust(index) + } + if let Some(ref mut index) = self.fragment_start { + adjust(index) + } + } + + /// Change this URL’s host to the given IP address. + /// + /// If this URL is cannot-be-a-base, do nothing and return `Err`. + /// + /// Compared to `Url::set_host`, this skips the host parser. + /// + /// # Examples + /// + /// ```rust + /// use url::{Url, ParseError}; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("http://example.com")?; + /// url.set_ip_host("127.0.0.1".parse().unwrap()); + /// assert_eq!(url.host_str(), Some("127.0.0.1")); + /// assert_eq!(url.as_str(), "http://127.0.0.1/"); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// + /// Cannot change URL's from mailto(cannot-be-base) to ip: + /// + /// ```rust + /// use url::{Url, ParseError}; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("mailto:rms@example.com")?; + /// let result = url.set_ip_host("127.0.0.1".parse().unwrap()); + /// + /// assert_eq!(url.as_str(), "mailto:rms@example.com"); + /// assert!(result.is_err()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// + #[allow(clippy::result_unit_err)] + pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> { + if self.cannot_be_a_base() { + return Err(()); + } + + let address = match address { + IpAddr::V4(address) => Host::Ipv4(address), + IpAddr::V6(address) => Host::Ipv6(address), + }; + self.set_host_internal(address, None); + Ok(()) + } + + /// Change this URL’s password. + /// + /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`. + /// + /// # Examples + /// + /// ```rust + /// use url::{Url, ParseError}; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("mailto:rmz@example.com")?; + /// let result = url.set_password(Some("secret_password")); + /// assert!(result.is_err()); + /// + /// let mut url = Url::parse("ftp://user1:secret1@example.com")?; + /// let result = url.set_password(Some("secret_password")); + /// assert_eq!(url.password(), Some("secret_password")); + /// + /// let mut url = Url::parse("ftp://user2:@example.com")?; + /// let result = url.set_password(Some("secret2")); + /// assert!(result.is_ok()); + /// assert_eq!(url.password(), Some("secret2")); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + #[allow(clippy::result_unit_err)] + pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> { + // has_host implies !cannot_be_a_base + if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" { + return Err(()); + } + if let Some(password) = password { + let host_and_after = self.slice(self.host_start..).to_owned(); + self.serialization.truncate(self.username_end as usize); + self.serialization.push(':'); + self.serialization + .extend(utf8_percent_encode(password, USERINFO)); + self.serialization.push('@'); + + let old_host_start = self.host_start; + let new_host_start = to_u32(self.serialization.len()).unwrap(); + let adjust = |index: &mut u32| { + *index -= old_host_start; + *index += new_host_start; + }; + self.host_start = new_host_start; + adjust(&mut self.host_end); + adjust(&mut self.path_start); + if let Some(ref mut index) = self.query_start { + adjust(index) + } + if let Some(ref mut index) = self.fragment_start { + adjust(index) + } + + self.serialization.push_str(&host_and_after); + } else if self.byte_at(self.username_end) == b':' { + // If there is a password to remove + let has_username_or_password = self.byte_at(self.host_start - 1) == b'@'; + debug_assert!(has_username_or_password); + let username_start = self.scheme_end + 3; + let empty_username = username_start == self.username_end; + let start = self.username_end; // Remove the ':' + let end = if empty_username { + self.host_start // Remove the '@' as well + } else { + self.host_start - 1 // Keep the '@' to separate the username from the host + }; + self.serialization.drain(start as usize..end as usize); + let offset = end - start; + self.host_start -= offset; + self.host_end -= offset; + self.path_start -= offset; + if let Some(ref mut index) = self.query_start { + *index -= offset + } + if let Some(ref mut index) = self.fragment_start { + *index -= offset + } + } + Ok(()) + } + + /// Change this URL’s username. + /// + /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`. + /// # Examples + /// + /// Cannot setup username from mailto(cannot-be-base) + /// + /// ```rust + /// use url::{Url, ParseError}; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("mailto:rmz@example.com")?; + /// let result = url.set_username("user1"); + /// assert_eq!(url.as_str(), "mailto:rmz@example.com"); + /// assert!(result.is_err()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// + /// Setup username to user1 + /// + /// ```rust + /// use url::{Url, ParseError}; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("ftp://:secre1@example.com/")?; + /// let result = url.set_username("user1"); + /// assert!(result.is_ok()); + /// assert_eq!(url.username(), "user1"); + /// assert_eq!(url.as_str(), "ftp://user1:secre1@example.com/"); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + #[allow(clippy::result_unit_err)] + pub fn set_username(&mut self, username: &str) -> Result<(), ()> { + // has_host implies !cannot_be_a_base + if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" { + return Err(()); + } + let username_start = self.scheme_end + 3; + debug_assert!(self.slice(self.scheme_end..username_start) == "://"); + if self.slice(username_start..self.username_end) == username { + return Ok(()); + } + let after_username = self.slice(self.username_end..).to_owned(); + self.serialization.truncate(username_start as usize); + self.serialization + .extend(utf8_percent_encode(username, USERINFO)); + + let mut removed_bytes = self.username_end; + self.username_end = to_u32(self.serialization.len()).unwrap(); + let mut added_bytes = self.username_end; + + let new_username_is_empty = self.username_end == username_start; + match (new_username_is_empty, after_username.chars().next()) { + (true, Some('@')) => { + removed_bytes += 1; + self.serialization.push_str(&after_username[1..]); + } + (false, Some('@')) | (_, Some(':')) | (true, _) => { + self.serialization.push_str(&after_username); + } + (false, _) => { + added_bytes += 1; + self.serialization.push('@'); + self.serialization.push_str(&after_username); + } + } + + let adjust = |index: &mut u32| { + *index -= removed_bytes; + *index += added_bytes; + }; + adjust(&mut self.host_start); + adjust(&mut self.host_end); + adjust(&mut self.path_start); + if let Some(ref mut index) = self.query_start { + adjust(index) + } + if let Some(ref mut index) = self.fragment_start { + adjust(index) + } + Ok(()) + } + + /// Change this URL’s scheme. + /// + /// Do nothing and return `Err` under the following circumstances: + /// + /// * If the new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+` + /// * If this URL is cannot-be-a-base and the new scheme is one of + /// `http`, `https`, `ws`, `wss` or `ftp` + /// * If either the old or new scheme is `http`, `https`, `ws`, + /// `wss` or `ftp` and the other is not one of these + /// * If the new scheme is `file` and this URL includes credentials + /// or has a non-null port + /// * If this URL's scheme is `file` and its host is empty or null + /// + /// See also [the URL specification's section on legal scheme state + /// overrides](https://url.spec.whatwg.org/#scheme-state). + /// + /// # Examples + /// + /// Change the URL’s scheme from `https` to `http`: + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("https://example.net")?; + /// let result = url.set_scheme("http"); + /// assert_eq!(url.as_str(), "http://example.net/"); + /// assert!(result.is_ok()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// Change the URL’s scheme from `foo` to `bar`: + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("foo://example.net")?; + /// let result = url.set_scheme("bar"); + /// assert_eq!(url.as_str(), "bar://example.net"); + /// assert!(result.is_ok()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// + /// Cannot change URL’s scheme from `https` to `foõ`: + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("https://example.net")?; + /// let result = url.set_scheme("foõ"); + /// assert_eq!(url.as_str(), "https://example.net/"); + /// assert!(result.is_err()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// + /// Cannot change URL’s scheme from `mailto` (cannot-be-a-base) to `https`: + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("mailto:rms@example.net")?; + /// let result = url.set_scheme("https"); + /// assert_eq!(url.as_str(), "mailto:rms@example.net"); + /// assert!(result.is_err()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// Cannot change the URL’s scheme from `foo` to `https`: + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("foo://example.net")?; + /// let result = url.set_scheme("https"); + /// assert_eq!(url.as_str(), "foo://example.net"); + /// assert!(result.is_err()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// Cannot change the URL’s scheme from `http` to `foo`: + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("http://example.net")?; + /// let result = url.set_scheme("foo"); + /// assert_eq!(url.as_str(), "http://example.net/"); + /// assert!(result.is_err()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + #[allow(clippy::result_unit_err, clippy::suspicious_operation_groupings)] + pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> { + let mut parser = Parser::for_setter(String::new()); + let remaining = parser.parse_scheme(parser::Input::new(scheme))?; + let new_scheme_type = SchemeType::from(&parser.serialization); + let old_scheme_type = SchemeType::from(self.scheme()); + // If url’s scheme is a special scheme and buffer is not a special scheme, then return. + if (new_scheme_type.is_special() && !old_scheme_type.is_special()) || + // If url’s scheme is not a special scheme and buffer is a special scheme, then return. + (!new_scheme_type.is_special() && old_scheme_type.is_special()) || + // If url includes credentials or has a non-null port, and buffer is "file", then return. + // If url’s scheme is "file" and its host is an empty host or null, then return. + (new_scheme_type.is_file() && self.has_authority()) + { + return Err(()); + } + + if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) { + return Err(()); + } + let old_scheme_end = self.scheme_end; + let new_scheme_end = to_u32(parser.serialization.len()).unwrap(); + let adjust = |index: &mut u32| { + *index -= old_scheme_end; + *index += new_scheme_end; + }; + + self.scheme_end = new_scheme_end; + adjust(&mut self.username_end); + adjust(&mut self.host_start); + adjust(&mut self.host_end); + adjust(&mut self.path_start); + if let Some(ref mut index) = self.query_start { + adjust(index) + } + if let Some(ref mut index) = self.fragment_start { + adjust(index) + } + + parser.serialization.push_str(self.slice(old_scheme_end..)); + self.serialization = parser.serialization; + + // Update the port so it can be removed + // If it is the scheme's default + // we don't mind it silently failing + // if there was no port in the first place + let previous_port = self.port(); + let _ = self.set_port(previous_port); + + Ok(()) + } + + /// Convert a file name as `std::path::Path` into an URL in the `file` scheme. + /// + /// This returns `Err` if the given path is not absolute or, + /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`). + /// + /// # Examples + /// + /// On Unix-like platforms: + /// + /// ``` + /// # if cfg!(unix) { + /// use url::Url; + /// + /// # fn run() -> Result<(), ()> { + /// let url = Url::from_file_path("/tmp/foo.txt")?; + /// assert_eq!(url.as_str(), "file:///tmp/foo.txt"); + /// + /// let url = Url::from_file_path("../foo.txt"); + /// assert!(url.is_err()); + /// + /// let url = Url::from_file_path("https://google.com/"); + /// assert!(url.is_err()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// # } + /// ``` + #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] + #[allow(clippy::result_unit_err)] + pub fn from_file_path>(path: P) -> Result { + let mut serialization = "file://".to_owned(); + let host_start = serialization.len() as u32; + let (host_end, host) = path_to_file_url_segments(path.as_ref(), &mut serialization)?; + Ok(Url { + serialization, + scheme_end: "file".len() as u32, + username_end: host_start, + host_start, + host_end, + host, + port: None, + path_start: host_end, + query_start: None, + fragment_start: None, + }) + } + + /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme. + /// + /// This returns `Err` if the given path is not absolute or, + /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`). + /// + /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash + /// so that the entire path is considered when using this URL as a base URL. + /// + /// For example: + /// + /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))` + /// as the base URL is `file:///var/www/index.html` + /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))` + /// as the base URL is `file:///var/index.html`, which might not be what was intended. + /// + /// Note that `std::path` does not consider trailing slashes significant + /// and usually does not include them (e.g. in `Path::parent()`). + #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] + #[allow(clippy::result_unit_err)] + pub fn from_directory_path>(path: P) -> Result { + let mut url = Url::from_file_path(path)?; + if !url.serialization.ends_with('/') { + url.serialization.push('/') + } + Ok(url) + } + + /// Serialize with Serde using the internal representation of the `Url` struct. + /// + /// The corresponding `deserialize_internal` method sacrifices some invariant-checking + /// for speed, compared to the `Deserialize` trait impl. + /// + /// This method is only available if the `serde` Cargo feature is enabled. + #[cfg(feature = "serde")] + #[deny(unused)] + pub fn serialize_internal(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + use serde::Serialize; + // Destructuring first lets us ensure that adding or removing fields forces this method + // to be updated + let Url { + ref serialization, + ref scheme_end, + ref username_end, + ref host_start, + ref host_end, + ref host, + ref port, + ref path_start, + ref query_start, + ref fragment_start, + } = *self; + ( + serialization, + scheme_end, + username_end, + host_start, + host_end, + host, + port, + path_start, + query_start, + fragment_start, + ) + .serialize(serializer) + } + + /// Serialize with Serde using the internal representation of the `Url` struct. + /// + /// The corresponding `deserialize_internal` method sacrifices some invariant-checking + /// for speed, compared to the `Deserialize` trait impl. + /// + /// This method is only available if the `serde` Cargo feature is enabled. + #[cfg(feature = "serde")] + #[deny(unused)] + pub fn deserialize_internal<'de, D>(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + use serde::de::{Deserialize, Error, Unexpected}; + let ( + serialization, + scheme_end, + username_end, + host_start, + host_end, + host, + port, + path_start, + query_start, + fragment_start, + ) = Deserialize::deserialize(deserializer)?; + let url = Url { + serialization, + scheme_end, + username_end, + host_start, + host_end, + host, + port, + path_start, + query_start, + fragment_start, + }; + if cfg!(debug_assertions) { + url.check_invariants().map_err(|reason| { + let reason: &str = &reason; + Error::invalid_value(Unexpected::Other("value"), &reason) + })? + } + Ok(url) + } + + /// Assuming the URL is in the `file` scheme or similar, + /// convert its path to an absolute `std::path::Path`. + /// + /// **Note:** This does not actually check the URL’s `scheme`, + /// and may give nonsensical results for other schemes. + /// It is the user’s responsibility to check the URL’s scheme before calling this. + /// + /// ``` + /// # use url::Url; + /// # let url = Url::parse("file:///etc/passwd").unwrap(); + /// let path = url.to_file_path(); + /// ``` + /// + /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where + /// `file:` URLs may have a non-local host), + /// or if `Path::new_opt()` returns `None`. + /// (That is, if the percent-decoded path contains a NUL byte or, + /// for a Windows path, is not UTF-8.) + #[inline] + #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] + #[allow(clippy::result_unit_err)] + pub fn to_file_path(&self) -> Result { + if let Some(segments) = self.path_segments() { + let host = match self.host() { + None | Some(Host::Domain("localhost")) => None, + Some(_) if cfg!(windows) && self.scheme() == "file" => { + Some(&self.serialization[self.host_start as usize..self.host_end as usize]) + } + _ => return Err(()), + }; + + return file_url_segments_to_pathbuf(host, segments); + } + Err(()) + } + + // Private helper methods: + + #[inline] + fn slice(&self, range: R) -> &str + where + R: RangeArg, + { + range.slice_of(&self.serialization) + } + + #[inline] + fn byte_at(&self, i: u32) -> u8 { + self.serialization.as_bytes()[i as usize] + } +} + +/// Parse a string as an URL, without a base URL or encoding override. +impl str::FromStr for Url { + type Err = ParseError; + + #[inline] + fn from_str(input: &str) -> Result { + Url::parse(input) + } +} + +impl<'a> TryFrom<&'a str> for Url { + type Error = ParseError; + + fn try_from(s: &'a str) -> Result { + Url::parse(s) + } +} + +/// Display the serialization of this URL. +impl fmt::Display for Url { + #[inline] + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&self.serialization, formatter) + } +} + +/// String conversion. +impl From for String { + fn from(value: Url) -> String { + value.serialization + } +} + +/// Debug the serialization of this URL. +impl fmt::Debug for Url { + #[inline] + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter + .debug_struct("Url") + .field("scheme", &self.scheme()) + .field("cannot_be_a_base", &self.cannot_be_a_base()) + .field("username", &self.username()) + .field("password", &self.password()) + .field("host", &self.host()) + .field("port", &self.port()) + .field("path", &self.path()) + .field("query", &self.query()) + .field("fragment", &self.fragment()) + .finish() + } +} + +/// URLs compare like their serialization. +impl Eq for Url {} + +/// URLs compare like their serialization. +impl PartialEq for Url { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.serialization == other.serialization + } +} + +/// URLs compare like their serialization. +impl Ord for Url { + #[inline] + fn cmp(&self, other: &Self) -> cmp::Ordering { + self.serialization.cmp(&other.serialization) + } +} + +/// URLs compare like their serialization. +impl PartialOrd for Url { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + self.serialization.partial_cmp(&other.serialization) + } +} + +/// URLs hash like their serialization. +impl hash::Hash for Url { + #[inline] + fn hash(&self, state: &mut H) + where + H: hash::Hasher, + { + hash::Hash::hash(&self.serialization, state) + } +} + +/// Return the serialization of this URL. +impl AsRef for Url { + #[inline] + fn as_ref(&self) -> &str { + &self.serialization + } +} + +trait RangeArg { + fn slice_of<'a>(&self, s: &'a str) -> &'a str; +} + +impl RangeArg for Range { + #[inline] + fn slice_of<'a>(&self, s: &'a str) -> &'a str { + &s[self.start as usize..self.end as usize] + } +} + +impl RangeArg for RangeFrom { + #[inline] + fn slice_of<'a>(&self, s: &'a str) -> &'a str { + &s[self.start as usize..] + } +} + +impl RangeArg for RangeTo { + #[inline] + fn slice_of<'a>(&self, s: &'a str) -> &'a str { + &s[..self.end as usize] + } +} + +/// Serializes this URL into a `serde` stream. +/// +/// This implementation is only available if the `serde` Cargo feature is enabled. +#[cfg(feature = "serde")] +impl serde::Serialize for Url { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serializer.serialize_str(self.as_str()) + } +} + +/// Deserializes this URL from a `serde` stream. +/// +/// This implementation is only available if the `serde` Cargo feature is enabled. +#[cfg(feature = "serde")] +impl<'de> serde::Deserialize<'de> for Url { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + use serde::de::{Error, Unexpected, Visitor}; + + struct UrlVisitor; + + impl<'de> Visitor<'de> for UrlVisitor { + type Value = Url; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a string representing an URL") + } + + fn visit_str(self, s: &str) -> Result + where + E: Error, + { + Url::parse(s).map_err(|err| { + let err_s = format!("{}", err); + Error::invalid_value(Unexpected::Str(s), &err_s.as_str()) + }) + } + } + + deserializer.deserialize_str(UrlVisitor) + } +} + +#[cfg(any(unix, target_os = "redox", target_os = "wasi"))] +fn path_to_file_url_segments( + path: &Path, + serialization: &mut String, +) -> Result<(u32, HostInternal), ()> { + #[cfg(any(unix, target_os = "redox"))] + use std::os::unix::prelude::OsStrExt; + #[cfg(target_os = "wasi")] + use std::os::wasi::prelude::OsStrExt; + if !path.is_absolute() { + return Err(()); + } + let host_end = to_u32(serialization.len()).unwrap(); + let mut empty = true; + // skip the root component + for component in path.components().skip(1) { + empty = false; + serialization.push('/'); + serialization.extend(percent_encode( + component.as_os_str().as_bytes(), + PATH_SEGMENT, + )); + } + if empty { + // An URL’s path must not be empty. + serialization.push('/'); + } + Ok((host_end, HostInternal::None)) +} + +#[cfg(windows)] +fn path_to_file_url_segments( + path: &Path, + serialization: &mut String, +) -> Result<(u32, HostInternal), ()> { + path_to_file_url_segments_windows(path, serialization) +} + +// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 +#[cfg_attr(not(windows), allow(dead_code))] +fn path_to_file_url_segments_windows( + path: &Path, + serialization: &mut String, +) -> Result<(u32, HostInternal), ()> { + use std::path::{Component, Prefix}; + if !path.is_absolute() { + return Err(()); + } + let mut components = path.components(); + + let host_start = serialization.len() + 1; + let host_end; + let host_internal; + + match components.next() { + Some(Component::Prefix(ref p)) => match p.kind() { + Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => { + host_end = to_u32(serialization.len()).unwrap(); + host_internal = HostInternal::None; + serialization.push('/'); + serialization.push(letter as char); + serialization.push(':'); + } + Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => { + let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?; + write!(serialization, "{}", host).unwrap(); + host_end = to_u32(serialization.len()).unwrap(); + host_internal = host.into(); + serialization.push('/'); + let share = share.to_str().ok_or(())?; + serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT)); + } + _ => return Err(()), + }, + _ => return Err(()), + } + + let mut path_only_has_prefix = true; + for component in components { + if component == Component::RootDir { + continue; + } + + path_only_has_prefix = false; + // FIXME: somehow work with non-unicode? + let component = component.as_os_str().to_str().ok_or(())?; + + serialization.push('/'); + serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT)); + } + + // A windows drive letter must end with a slash. + if serialization.len() > host_start + && parser::is_windows_drive_letter(&serialization[host_start..]) + && path_only_has_prefix + { + serialization.push('/'); + } + + Ok((host_end, host_internal)) +} + +#[cfg(any(unix, target_os = "redox", target_os = "wasi"))] +fn file_url_segments_to_pathbuf( + host: Option<&str>, + segments: str::Split<'_, char>, +) -> Result { + use std::ffi::OsStr; + #[cfg(any(unix, target_os = "redox"))] + use std::os::unix::prelude::OsStrExt; + #[cfg(target_os = "wasi")] + use std::os::wasi::prelude::OsStrExt; + + if host.is_some() { + return Err(()); + } + + let mut bytes = if cfg!(target_os = "redox") { + b"file:".to_vec() + } else { + Vec::new() + }; + + for segment in segments { + bytes.push(b'/'); + bytes.extend(percent_decode(segment.as_bytes())); + } + + // A windows drive letter must end with a slash. + if bytes.len() > 2 + && matches!(bytes[bytes.len() - 2], b'a'..=b'z' | b'A'..=b'Z') + && matches!(bytes[bytes.len() - 1], b':' | b'|') + { + bytes.push(b'/'); + } + + let os_str = OsStr::from_bytes(&bytes); + let path = PathBuf::from(os_str); + + debug_assert!( + path.is_absolute(), + "to_file_path() failed to produce an absolute Path" + ); + + Ok(path) +} + +#[cfg(windows)] +fn file_url_segments_to_pathbuf( + host: Option<&str>, + segments: str::Split, +) -> Result { + file_url_segments_to_pathbuf_windows(host, segments) +} + +// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 +#[cfg_attr(not(windows), allow(dead_code))] +fn file_url_segments_to_pathbuf_windows( + host: Option<&str>, + mut segments: str::Split<'_, char>, +) -> Result { + let mut string = if let Some(host) = host { + r"\\".to_owned() + host + } else { + let first = segments.next().ok_or(())?; + + match first.len() { + 2 => { + if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' { + return Err(()); + } + + first.to_owned() + } + + 4 => { + if !first.starts_with(parser::ascii_alpha) { + return Err(()); + } + let bytes = first.as_bytes(); + if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') { + return Err(()); + } + + first[0..1].to_owned() + ":" + } + + _ => return Err(()), + } + }; + + for segment in segments { + string.push('\\'); + + // Currently non-unicode windows paths cannot be represented + match String::from_utf8(percent_decode(segment.as_bytes()).collect()) { + Ok(s) => string.push_str(&s), + Err(..) => return Err(()), + } + } + let path = PathBuf::from(string); + debug_assert!( + path.is_absolute(), + "to_file_path() failed to produce an absolute Path" + ); + Ok(path) +} + +/// Implementation detail of `Url::query_pairs_mut`. Typically not used directly. +#[derive(Debug)] +pub struct UrlQuery<'a> { + url: Option<&'a mut Url>, + fragment: Option, +} + +// `as_mut_string` string here exposes the internal serialization of an `Url`, +// which should not be exposed to users. +// We achieve that by not giving users direct access to `UrlQuery`: +// * Its fields are private +// (and so can not be constructed with struct literal syntax outside of this crate), +// * It has no constructor +// * It is only visible (on the type level) to users in the return type of +// `Url::query_pairs_mut` which is `Serializer` +// * `Serializer` keeps its target in a private field +// * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`. +impl<'a> form_urlencoded::Target for UrlQuery<'a> { + fn as_mut_string(&mut self) -> &mut String { + &mut self.url.as_mut().unwrap().serialization + } + + fn finish(mut self) -> &'a mut Url { + let url = self.url.take().unwrap(); + url.restore_already_parsed_fragment(self.fragment.take()); + url + } + + type Finished = &'a mut Url; +} + +impl<'a> Drop for UrlQuery<'a> { + fn drop(&mut self) { + if let Some(url) = self.url.take() { + url.restore_already_parsed_fragment(self.fragment.take()) + } + } +} diff --git a/src/origin.rs b/src/origin.rs new file mode 100644 index 0000000..81193f5 --- /dev/null +++ b/src/origin.rs @@ -0,0 +1,112 @@ +// Copyright 2016 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use crate::host::Host; +use crate::parser::default_port; +use crate::Url; +use std::sync::atomic::{AtomicUsize, Ordering}; + +pub fn url_origin(url: &Url) -> Origin { + let scheme = url.scheme(); + match scheme { + "blob" => { + let result = Url::parse(url.path()); + match result { + Ok(ref url) => url_origin(url), + Err(_) => Origin::new_opaque(), + } + } + "ftp" | "http" | "https" | "ws" | "wss" => Origin::Tuple( + scheme.to_owned(), + url.host().unwrap().to_owned(), + url.port_or_known_default().unwrap(), + ), + // TODO: Figure out what to do if the scheme is a file + "file" => Origin::new_opaque(), + _ => Origin::new_opaque(), + } +} + +/// The origin of an URL +/// +/// Two URLs with the same origin are considered +/// to originate from the same entity and can therefore trust +/// each other. +/// +/// The origin is determined based on the scheme as follows: +/// +/// - If the scheme is "blob" the origin is the origin of the +/// URL contained in the path component. If parsing fails, +/// it is an opaque origin. +/// - If the scheme is "ftp", "http", "https", "ws", or "wss", +/// then the origin is a tuple of the scheme, host, and port. +/// - If the scheme is anything else, the origin is opaque, meaning +/// the URL does not have the same origin as any other URL. +/// +/// For more information see +#[derive(PartialEq, Eq, Hash, Clone, Debug)] +pub enum Origin { + /// A globally unique identifier + Opaque(OpaqueOrigin), + + /// Consists of the URL's scheme, host and port + Tuple(String, Host, u16), +} + +impl Origin { + /// Creates a new opaque origin that is only equal to itself. + pub fn new_opaque() -> Origin { + static COUNTER: AtomicUsize = AtomicUsize::new(0); + Origin::Opaque(OpaqueOrigin(COUNTER.fetch_add(1, Ordering::SeqCst))) + } + + /// Return whether this origin is a (scheme, host, port) tuple + /// (as opposed to an opaque origin). + pub fn is_tuple(&self) -> bool { + matches!(*self, Origin::Tuple(..)) + } + + /// + pub fn ascii_serialization(&self) -> String { + match *self { + Origin::Opaque(_) => "null".to_owned(), + Origin::Tuple(ref scheme, ref host, port) => { + if default_port(scheme) == Some(port) { + format!("{}://{}", scheme, host) + } else { + format!("{}://{}:{}", scheme, host, port) + } + } + } + } + + /// + pub fn unicode_serialization(&self) -> String { + match *self { + Origin::Opaque(_) => "null".to_owned(), + Origin::Tuple(ref scheme, ref host, port) => { + let host = match *host { + Host::Domain(ref domain) => { + let (domain, _errors) = idna::domain_to_unicode(domain); + Host::Domain(domain) + } + _ => host.clone(), + }; + if default_port(scheme) == Some(port) { + format!("{}://{}", scheme, host) + } else { + format!("{}://{}:{}", scheme, host, port) + } + } + } + } +} + +/// Opaque identifier for URLs that have file or other schemes +#[derive(Eq, PartialEq, Hash, Clone, Debug)] +pub struct OpaqueOrigin(usize); diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..f5438c5 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,1593 @@ +// Copyright 2013-2016 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::error::Error; +use std::fmt::{self, Formatter, Write}; +use std::str; + +use crate::host::{Host, HostInternal}; +use crate::Url; +use form_urlencoded::EncodingOverride; +use percent_encoding::{percent_encode, utf8_percent_encode, AsciiSet, CONTROLS}; + +/// https://url.spec.whatwg.org/#fragment-percent-encode-set +const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`'); + +/// https://url.spec.whatwg.org/#path-percent-encode-set +const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}'); + +/// https://url.spec.whatwg.org/#userinfo-percent-encode-set +pub(crate) const USERINFO: &AsciiSet = &PATH + .add(b'/') + .add(b':') + .add(b';') + .add(b'=') + .add(b'@') + .add(b'[') + .add(b'\\') + .add(b']') + .add(b'^') + .add(b'|'); + +pub(crate) const PATH_SEGMENT: &AsciiSet = &PATH.add(b'/').add(b'%'); + +// The backslash (\) character is treated as a path separator in special URLs +// so it needs to be additionally escaped in that case. +pub(crate) const SPECIAL_PATH_SEGMENT: &AsciiSet = &PATH_SEGMENT.add(b'\\'); + +// https://url.spec.whatwg.org/#query-state +const QUERY: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'#').add(b'<').add(b'>'); +const SPECIAL_QUERY: &AsciiSet = &QUERY.add(b'\''); + +pub type ParseResult = Result; + +macro_rules! simple_enum_error { + ($($name: ident => $description: expr,)+) => { + /// Errors that can occur during parsing. + /// + /// This may be extended in the future so exhaustive matching is + /// discouraged with an unused variant. + #[derive(PartialEq, Eq, Clone, Copy, Debug)] + #[non_exhaustive] + pub enum ParseError { + $( + $name, + )+ + } + + impl fmt::Display for ParseError { + fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { + match *self { + $( + ParseError::$name => fmt.write_str($description), + )+ + } + } + } + } +} + +impl Error for ParseError {} + +simple_enum_error! { + EmptyHost => "empty host", + IdnaError => "invalid international domain name", + InvalidPort => "invalid port number", + InvalidIpv4Address => "invalid IPv4 address", + InvalidIpv6Address => "invalid IPv6 address", + InvalidDomainCharacter => "invalid domain character", + RelativeUrlWithoutBase => "relative URL without a base", + RelativeUrlWithCannotBeABaseBase => "relative URL with a cannot-be-a-base base", + SetHostOnCannotBeABaseUrl => "a cannot-be-a-base URL doesn’t have a host to set", + Overflow => "URLs more than 4 GB are not supported", +} + +impl From<::idna::Errors> for ParseError { + fn from(_: ::idna::Errors) -> ParseError { + ParseError::IdnaError + } +} + +macro_rules! syntax_violation_enum { + ($($name: ident => $description: expr,)+) => { + /// Non-fatal syntax violations that can occur during parsing. + /// + /// This may be extended in the future so exhaustive matching is + /// discouraged with an unused variant. + #[derive(PartialEq, Eq, Clone, Copy, Debug)] + #[non_exhaustive] + pub enum SyntaxViolation { + $( + $name, + )+ + } + + impl SyntaxViolation { + pub fn description(&self) -> &'static str { + match *self { + $( + SyntaxViolation::$name => $description, + )+ + } + } + } + } +} + +syntax_violation_enum! { + Backslash => "backslash", + C0SpaceIgnored => + "leading or trailing control or space character are ignored in URLs", + EmbeddedCredentials => + "embedding authentication information (username or password) \ + in an URL is not recommended", + ExpectedDoubleSlash => "expected //", + ExpectedFileDoubleSlash => "expected // after file:", + FileWithHostAndWindowsDrive => "file: with host and Windows drive letter", + NonUrlCodePoint => "non-URL code point", + NullInFragment => "NULL characters are ignored in URL fragment identifiers", + PercentDecode => "expected 2 hex digits after %", + TabOrNewlineIgnored => "tabs or newlines are ignored in URLs", + UnencodedAtSign => "unencoded @ sign in username or password", +} + +impl fmt::Display for SyntaxViolation { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self.description(), f) + } +} + +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum SchemeType { + File, + SpecialNotFile, + NotSpecial, +} + +impl SchemeType { + pub fn is_special(&self) -> bool { + !matches!(*self, SchemeType::NotSpecial) + } + + pub fn is_file(&self) -> bool { + matches!(*self, SchemeType::File) + } + + pub fn from(s: &str) -> Self { + match s { + "http" | "https" | "ws" | "wss" | "ftp" => SchemeType::SpecialNotFile, + "file" => SchemeType::File, + _ => SchemeType::NotSpecial, + } + } +} + +pub fn default_port(scheme: &str) -> Option { + match scheme { + "http" | "ws" => Some(80), + "https" | "wss" => Some(443), + "ftp" => Some(21), + _ => None, + } +} + +#[derive(Clone)] +pub struct Input<'i> { + chars: str::Chars<'i>, +} + +impl<'i> Input<'i> { + pub fn new(input: &'i str) -> Self { + Input::with_log(input, None) + } + + pub fn no_trim(input: &'i str) -> Self { + Input { + chars: input.chars(), + } + } + + pub fn trim_tab_and_newlines( + original_input: &'i str, + vfn: Option<&dyn Fn(SyntaxViolation)>, + ) -> Self { + let input = original_input.trim_matches(ascii_tab_or_new_line); + if let Some(vfn) = vfn { + if input.len() < original_input.len() { + vfn(SyntaxViolation::C0SpaceIgnored) + } + if input.chars().any(|c| matches!(c, '\t' | '\n' | '\r')) { + vfn(SyntaxViolation::TabOrNewlineIgnored) + } + } + Input { + chars: input.chars(), + } + } + + pub fn with_log(original_input: &'i str, vfn: Option<&dyn Fn(SyntaxViolation)>) -> Self { + let input = original_input.trim_matches(c0_control_or_space); + if let Some(vfn) = vfn { + if input.len() < original_input.len() { + vfn(SyntaxViolation::C0SpaceIgnored) + } + if input.chars().any(|c| matches!(c, '\t' | '\n' | '\r')) { + vfn(SyntaxViolation::TabOrNewlineIgnored) + } + } + Input { + chars: input.chars(), + } + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.clone().next().is_none() + } + + #[inline] + fn starts_with(&self, p: P) -> bool { + p.split_prefix(&mut self.clone()) + } + + #[inline] + pub fn split_prefix(&self, p: P) -> Option { + let mut remaining = self.clone(); + if p.split_prefix(&mut remaining) { + Some(remaining) + } else { + None + } + } + + #[inline] + fn split_first(&self) -> (Option, Self) { + let mut remaining = self.clone(); + (remaining.next(), remaining) + } + + #[inline] + fn count_matching bool>(&self, f: F) -> (u32, Self) { + let mut count = 0; + let mut remaining = self.clone(); + loop { + let mut input = remaining.clone(); + if matches!(input.next(), Some(c) if f(c)) { + remaining = input; + count += 1; + } else { + return (count, remaining); + } + } + } + + #[inline] + fn next_utf8(&mut self) -> Option<(char, &'i str)> { + loop { + let utf8 = self.chars.as_str(); + match self.chars.next() { + Some(c) => { + if !matches!(c, '\t' | '\n' | '\r') { + return Some((c, &utf8[..c.len_utf8()])); + } + } + None => return None, + } + } + } +} + +pub trait Pattern { + fn split_prefix(self, input: &mut Input) -> bool; +} + +impl Pattern for char { + fn split_prefix(self, input: &mut Input) -> bool { + input.next() == Some(self) + } +} + +impl<'a> Pattern for &'a str { + fn split_prefix(self, input: &mut Input) -> bool { + for c in self.chars() { + if input.next() != Some(c) { + return false; + } + } + true + } +} + +impl bool> Pattern for F { + fn split_prefix(self, input: &mut Input) -> bool { + input.next().map_or(false, self) + } +} + +impl<'i> Iterator for Input<'i> { + type Item = char; + fn next(&mut self) -> Option { + self.chars + .by_ref() + .find(|&c| !matches!(c, '\t' | '\n' | '\r')) + } +} + +pub struct Parser<'a> { + pub serialization: String, + pub base_url: Option<&'a Url>, + pub query_encoding_override: EncodingOverride<'a>, + pub violation_fn: Option<&'a dyn Fn(SyntaxViolation)>, + pub context: Context, +} + +#[derive(PartialEq, Eq, Copy, Clone)] +pub enum Context { + UrlParser, + Setter, + PathSegmentSetter, +} + +impl<'a> Parser<'a> { + fn log_violation(&self, v: SyntaxViolation) { + if let Some(f) = self.violation_fn { + f(v) + } + } + + fn log_violation_if(&self, v: SyntaxViolation, test: impl FnOnce() -> bool) { + if let Some(f) = self.violation_fn { + if test() { + f(v) + } + } + } + + pub fn for_setter(serialization: String) -> Parser<'a> { + Parser { + serialization, + base_url: None, + query_encoding_override: None, + violation_fn: None, + context: Context::Setter, + } + } + + /// https://url.spec.whatwg.org/#concept-basic-url-parser + pub fn parse_url(mut self, input: &str) -> ParseResult { + let input = Input::with_log(input, self.violation_fn); + if let Ok(remaining) = self.parse_scheme(input.clone()) { + return self.parse_with_scheme(remaining); + } + + // No-scheme state + if let Some(base_url) = self.base_url { + if input.starts_with('#') { + self.fragment_only(base_url, input) + } else if base_url.cannot_be_a_base() { + Err(ParseError::RelativeUrlWithCannotBeABaseBase) + } else { + let scheme_type = SchemeType::from(base_url.scheme()); + if scheme_type.is_file() { + self.parse_file(input, scheme_type, Some(base_url)) + } else { + self.parse_relative(input, scheme_type, base_url) + } + } + } else { + Err(ParseError::RelativeUrlWithoutBase) + } + } + + pub fn parse_scheme<'i>(&mut self, mut input: Input<'i>) -> Result, ()> { + if input.is_empty() || !input.starts_with(ascii_alpha) { + return Err(()); + } + debug_assert!(self.serialization.is_empty()); + while let Some(c) = input.next() { + match c { + 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.' => { + self.serialization.push(c.to_ascii_lowercase()) + } + ':' => return Ok(input), + _ => { + self.serialization.clear(); + return Err(()); + } + } + } + // EOF before ':' + if self.context == Context::Setter { + Ok(input) + } else { + self.serialization.clear(); + Err(()) + } + } + + fn parse_with_scheme(mut self, input: Input<'_>) -> ParseResult { + use crate::SyntaxViolation::{ExpectedDoubleSlash, ExpectedFileDoubleSlash}; + let scheme_end = to_u32(self.serialization.len())?; + let scheme_type = SchemeType::from(&self.serialization); + self.serialization.push(':'); + match scheme_type { + SchemeType::File => { + self.log_violation_if(ExpectedFileDoubleSlash, || !input.starts_with("//")); + let base_file_url = self.base_url.and_then(|base| { + if base.scheme() == "file" { + Some(base) + } else { + None + } + }); + self.serialization.clear(); + self.parse_file(input, scheme_type, base_file_url) + } + SchemeType::SpecialNotFile => { + // special relative or authority state + let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\')); + if let Some(base_url) = self.base_url { + if slashes_count < 2 + && base_url.scheme() == &self.serialization[..scheme_end as usize] + { + // "Cannot-be-a-base" URLs only happen with "not special" schemes. + debug_assert!(!base_url.cannot_be_a_base()); + self.serialization.clear(); + return self.parse_relative(input, scheme_type, base_url); + } + } + // special authority slashes state + self.log_violation_if(ExpectedDoubleSlash, || { + input + .clone() + .take_while(|&c| matches!(c, '/' | '\\')) + .collect::() + != "//" + }); + self.after_double_slash(remaining, scheme_type, scheme_end) + } + SchemeType::NotSpecial => self.parse_non_special(input, scheme_type, scheme_end), + } + } + + /// Scheme other than file, http, https, ws, ws, ftp. + fn parse_non_special( + mut self, + input: Input<'_>, + scheme_type: SchemeType, + scheme_end: u32, + ) -> ParseResult { + // path or authority state ( + if let Some(input) = input.split_prefix("//") { + return self.after_double_slash(input, scheme_type, scheme_end); + } + // Anarchist URL (no authority) + let path_start = to_u32(self.serialization.len())?; + let username_end = path_start; + let host_start = path_start; + let host_end = path_start; + let host = HostInternal::None; + let port = None; + let remaining = if let Some(input) = input.split_prefix('/') { + let path_start = self.serialization.len(); + self.serialization.push('/'); + self.parse_path(scheme_type, &mut false, path_start, input) + } else { + self.parse_cannot_be_a_base_path(input) + }; + self.with_query_and_fragment( + scheme_type, + scheme_end, + username_end, + host_start, + host_end, + host, + port, + path_start, + remaining, + ) + } + + fn parse_file( + mut self, + input: Input<'_>, + scheme_type: SchemeType, + base_file_url: Option<&Url>, + ) -> ParseResult { + use crate::SyntaxViolation::Backslash; + // file state + debug_assert!(self.serialization.is_empty()); + let (first_char, input_after_first_char) = input.split_first(); + if matches!(first_char, Some('/') | Some('\\')) { + self.log_violation_if(SyntaxViolation::Backslash, || first_char == Some('\\')); + // file slash state + let (next_char, input_after_next_char) = input_after_first_char.split_first(); + if matches!(next_char, Some('/') | Some('\\')) { + self.log_violation_if(Backslash, || next_char == Some('\\')); + // file host state + self.serialization.push_str("file://"); + let scheme_end = "file".len() as u32; + let host_start = "file://".len() as u32; + let (path_start, mut host, remaining) = + self.parse_file_host(input_after_next_char)?; + let mut host_end = to_u32(self.serialization.len())?; + let mut has_host = !matches!(host, HostInternal::None); + let remaining = if path_start { + self.parse_path_start(SchemeType::File, &mut has_host, remaining) + } else { + let path_start = self.serialization.len(); + self.serialization.push('/'); + self.parse_path(SchemeType::File, &mut has_host, path_start, remaining) + }; + + // For file URLs that have a host and whose path starts + // with the windows drive letter we just remove the host. + if !has_host { + self.serialization + .drain(host_start as usize..host_end as usize); + host_end = host_start; + host = HostInternal::None; + } + let (query_start, fragment_start) = + self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?; + return Ok(Url { + serialization: self.serialization, + scheme_end, + username_end: host_start, + host_start, + host_end, + host, + port: None, + path_start: host_end, + query_start, + fragment_start, + }); + } else { + self.serialization.push_str("file://"); + let scheme_end = "file".len() as u32; + let host_start = "file://".len(); + let mut host_end = host_start; + let mut host = HostInternal::None; + if !starts_with_windows_drive_letter_segment(&input_after_first_char) { + if let Some(base_url) = base_file_url { + let first_segment = base_url.path_segments().unwrap().next().unwrap(); + if is_normalized_windows_drive_letter(first_segment) { + self.serialization.push('/'); + self.serialization.push_str(first_segment); + } else if let Some(host_str) = base_url.host_str() { + self.serialization.push_str(host_str); + host_end = self.serialization.len(); + host = base_url.host; + } + } + } + // If c is the EOF code point, U+002F (/), U+005C (\), U+003F (?), or U+0023 (#), then decrease pointer by one + let parse_path_input = if let Some(c) = first_char { + if c == '/' || c == '\\' || c == '?' || c == '#' { + input + } else { + input_after_first_char + } + } else { + input_after_first_char + }; + + let remaining = + self.parse_path(SchemeType::File, &mut false, host_end, parse_path_input); + + let host_start = host_start as u32; + + let (query_start, fragment_start) = + self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?; + + let host_end = host_end as u32; + return Ok(Url { + serialization: self.serialization, + scheme_end, + username_end: host_start, + host_start, + host_end, + host, + port: None, + path_start: host_end, + query_start, + fragment_start, + }); + } + } + if let Some(base_url) = base_file_url { + match first_char { + None => { + // Copy everything except the fragment + let before_fragment = match base_url.fragment_start { + Some(i) => &base_url.serialization[..i as usize], + None => &*base_url.serialization, + }; + self.serialization.push_str(before_fragment); + Ok(Url { + serialization: self.serialization, + fragment_start: None, + ..*base_url + }) + } + Some('?') => { + // Copy everything up to the query string + let before_query = match (base_url.query_start, base_url.fragment_start) { + (None, None) => &*base_url.serialization, + (Some(i), _) | (None, Some(i)) => base_url.slice(..i), + }; + self.serialization.push_str(before_query); + let (query_start, fragment_start) = + self.parse_query_and_fragment(scheme_type, base_url.scheme_end, input)?; + Ok(Url { + serialization: self.serialization, + query_start, + fragment_start, + ..*base_url + }) + } + Some('#') => self.fragment_only(base_url, input), + _ => { + if !starts_with_windows_drive_letter_segment(&input) { + let before_query = match (base_url.query_start, base_url.fragment_start) { + (None, None) => &*base_url.serialization, + (Some(i), _) | (None, Some(i)) => base_url.slice(..i), + }; + self.serialization.push_str(before_query); + self.shorten_path(SchemeType::File, base_url.path_start as usize); + let remaining = self.parse_path( + SchemeType::File, + &mut true, + base_url.path_start as usize, + input, + ); + self.with_query_and_fragment( + SchemeType::File, + base_url.scheme_end, + base_url.username_end, + base_url.host_start, + base_url.host_end, + base_url.host, + base_url.port, + base_url.path_start, + remaining, + ) + } else { + self.serialization.push_str("file:///"); + let scheme_end = "file".len() as u32; + let path_start = "file://".len(); + let remaining = + self.parse_path(SchemeType::File, &mut false, path_start, input); + let (query_start, fragment_start) = + self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?; + let path_start = path_start as u32; + Ok(Url { + serialization: self.serialization, + scheme_end, + username_end: path_start, + host_start: path_start, + host_end: path_start, + host: HostInternal::None, + port: None, + path_start, + query_start, + fragment_start, + }) + } + } + } + } else { + self.serialization.push_str("file:///"); + let scheme_end = "file".len() as u32; + let path_start = "file://".len(); + let remaining = self.parse_path(SchemeType::File, &mut false, path_start, input); + let (query_start, fragment_start) = + self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?; + let path_start = path_start as u32; + Ok(Url { + serialization: self.serialization, + scheme_end, + username_end: path_start, + host_start: path_start, + host_end: path_start, + host: HostInternal::None, + port: None, + path_start, + query_start, + fragment_start, + }) + } + } + + fn parse_relative( + mut self, + input: Input<'_>, + scheme_type: SchemeType, + base_url: &Url, + ) -> ParseResult { + // relative state + debug_assert!(self.serialization.is_empty()); + let (first_char, input_after_first_char) = input.split_first(); + match first_char { + None => { + // Copy everything except the fragment + let before_fragment = match base_url.fragment_start { + Some(i) => &base_url.serialization[..i as usize], + None => &*base_url.serialization, + }; + self.serialization.push_str(before_fragment); + Ok(Url { + serialization: self.serialization, + fragment_start: None, + ..*base_url + }) + } + Some('?') => { + // Copy everything up to the query string + let before_query = match (base_url.query_start, base_url.fragment_start) { + (None, None) => &*base_url.serialization, + (Some(i), _) | (None, Some(i)) => base_url.slice(..i), + }; + self.serialization.push_str(before_query); + let (query_start, fragment_start) = + self.parse_query_and_fragment(scheme_type, base_url.scheme_end, input)?; + Ok(Url { + serialization: self.serialization, + query_start, + fragment_start, + ..*base_url + }) + } + Some('#') => self.fragment_only(base_url, input), + Some('/') | Some('\\') => { + let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\')); + if slashes_count >= 2 { + self.log_violation_if(SyntaxViolation::ExpectedDoubleSlash, || { + input + .clone() + .take_while(|&c| matches!(c, '/' | '\\')) + .collect::() + != "//" + }); + let scheme_end = base_url.scheme_end; + debug_assert!(base_url.byte_at(scheme_end) == b':'); + self.serialization + .push_str(base_url.slice(..scheme_end + 1)); + if let Some(after_prefix) = input.split_prefix("//") { + return self.after_double_slash(after_prefix, scheme_type, scheme_end); + } + return self.after_double_slash(remaining, scheme_type, scheme_end); + } + let path_start = base_url.path_start; + self.serialization.push_str(base_url.slice(..path_start)); + self.serialization.push('/'); + let remaining = self.parse_path( + scheme_type, + &mut true, + path_start as usize, + input_after_first_char, + ); + self.with_query_and_fragment( + scheme_type, + base_url.scheme_end, + base_url.username_end, + base_url.host_start, + base_url.host_end, + base_url.host, + base_url.port, + base_url.path_start, + remaining, + ) + } + _ => { + let before_query = match (base_url.query_start, base_url.fragment_start) { + (None, None) => &*base_url.serialization, + (Some(i), _) | (None, Some(i)) => base_url.slice(..i), + }; + self.serialization.push_str(before_query); + // FIXME spec says just "remove last entry", not the "pop" algorithm + self.pop_path(scheme_type, base_url.path_start as usize); + // A special url always has a path. + // A path always starts with '/' + if self.serialization.len() == base_url.path_start as usize + && (SchemeType::from(base_url.scheme()).is_special() || !input.is_empty()) + { + self.serialization.push('/'); + } + let remaining = match input.split_first() { + (Some('/'), remaining) => self.parse_path( + scheme_type, + &mut true, + base_url.path_start as usize, + remaining, + ), + _ => { + self.parse_path(scheme_type, &mut true, base_url.path_start as usize, input) + } + }; + self.with_query_and_fragment( + scheme_type, + base_url.scheme_end, + base_url.username_end, + base_url.host_start, + base_url.host_end, + base_url.host, + base_url.port, + base_url.path_start, + remaining, + ) + } + } + } + + fn after_double_slash( + mut self, + input: Input<'_>, + scheme_type: SchemeType, + scheme_end: u32, + ) -> ParseResult { + self.serialization.push('/'); + self.serialization.push('/'); + // authority state + let before_authority = self.serialization.len(); + let (username_end, remaining) = self.parse_userinfo(input, scheme_type)?; + let has_authority = before_authority != self.serialization.len(); + // host state + let host_start = to_u32(self.serialization.len())?; + let (host_end, host, port, remaining) = + self.parse_host_and_port(remaining, scheme_end, scheme_type)?; + if host == HostInternal::None && has_authority { + return Err(ParseError::EmptyHost); + } + // path state + let path_start = to_u32(self.serialization.len())?; + let remaining = self.parse_path_start(scheme_type, &mut true, remaining); + self.with_query_and_fragment( + scheme_type, + scheme_end, + username_end, + host_start, + host_end, + host, + port, + path_start, + remaining, + ) + } + + /// Return (username_end, remaining) + fn parse_userinfo<'i>( + &mut self, + mut input: Input<'i>, + scheme_type: SchemeType, + ) -> ParseResult<(u32, Input<'i>)> { + let mut last_at = None; + let mut remaining = input.clone(); + let mut char_count = 0; + while let Some(c) = remaining.next() { + match c { + '@' => { + if last_at.is_some() { + self.log_violation(SyntaxViolation::UnencodedAtSign) + } else { + self.log_violation(SyntaxViolation::EmbeddedCredentials) + } + last_at = Some((char_count, remaining.clone())) + } + '/' | '?' | '#' => break, + '\\' if scheme_type.is_special() => break, + _ => (), + } + char_count += 1; + } + let (mut userinfo_char_count, remaining) = match last_at { + None => return Ok((to_u32(self.serialization.len())?, input)), + Some((0, remaining)) => { + // Otherwise, if one of the following is true + // c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#) + // url is special and c is U+005C (\) + // If @ flag is set and buffer is the empty string, validation error, return failure. + if let (Some(c), _) = remaining.split_first() { + if c == '/' || c == '?' || c == '#' || (scheme_type.is_special() && c == '\\') { + return Err(ParseError::EmptyHost); + } + } + return Ok((to_u32(self.serialization.len())?, remaining)); + } + Some(x) => x, + }; + + let mut username_end = None; + let mut has_password = false; + let mut has_username = false; + while userinfo_char_count > 0 { + let (c, utf8_c) = input.next_utf8().unwrap(); + userinfo_char_count -= 1; + if c == ':' && username_end.is_none() { + // Start parsing password + username_end = Some(to_u32(self.serialization.len())?); + // We don't add a colon if the password is empty + if userinfo_char_count > 0 { + self.serialization.push(':'); + has_password = true; + } + } else { + if !has_password { + has_username = true; + } + self.check_url_code_point(c, &input); + self.serialization + .extend(utf8_percent_encode(utf8_c, USERINFO)); + } + } + let username_end = match username_end { + Some(i) => i, + None => to_u32(self.serialization.len())?, + }; + if has_username || has_password { + self.serialization.push('@'); + } + Ok((username_end, remaining)) + } + + fn parse_host_and_port<'i>( + &mut self, + input: Input<'i>, + scheme_end: u32, + scheme_type: SchemeType, + ) -> ParseResult<(u32, HostInternal, Option, Input<'i>)> { + let (host, remaining) = Parser::parse_host(input, scheme_type)?; + write!(&mut self.serialization, "{}", host).unwrap(); + let host_end = to_u32(self.serialization.len())?; + if let Host::Domain(h) = &host { + if h.is_empty() { + // Port with an empty host + if remaining.starts_with(":") { + return Err(ParseError::EmptyHost); + } + if scheme_type.is_special() { + return Err(ParseError::EmptyHost); + } + } + }; + + let (port, remaining) = if let Some(remaining) = remaining.split_prefix(':') { + let scheme = || default_port(&self.serialization[..scheme_end as usize]); + Parser::parse_port(remaining, scheme, self.context)? + } else { + (None, remaining) + }; + if let Some(port) = port { + write!(&mut self.serialization, ":{}", port).unwrap() + } + Ok((host_end, host.into(), port, remaining)) + } + + pub fn parse_host( + mut input: Input<'_>, + scheme_type: SchemeType, + ) -> ParseResult<(Host, Input<'_>)> { + if scheme_type.is_file() { + return Parser::get_file_host(input); + } + // Undo the Input abstraction here to avoid allocating in the common case + // where the host part of the input does not contain any tab or newline + let input_str = input.chars.as_str(); + let mut inside_square_brackets = false; + let mut has_ignored_chars = false; + let mut non_ignored_chars = 0; + let mut bytes = 0; + for c in input_str.chars() { + match c { + ':' if !inside_square_brackets => break, + '\\' if scheme_type.is_special() => break, + '/' | '?' | '#' => break, + '\t' | '\n' | '\r' => { + has_ignored_chars = true; + } + '[' => { + inside_square_brackets = true; + non_ignored_chars += 1 + } + ']' => { + inside_square_brackets = false; + non_ignored_chars += 1 + } + _ => non_ignored_chars += 1, + } + bytes += c.len_utf8(); + } + let replaced: String; + let host_str; + { + let host_input = input.by_ref().take(non_ignored_chars); + if has_ignored_chars { + replaced = host_input.collect(); + host_str = &*replaced + } else { + for _ in host_input {} + host_str = &input_str[..bytes] + } + } + if scheme_type == SchemeType::SpecialNotFile && host_str.is_empty() { + return Err(ParseError::EmptyHost); + } + if !scheme_type.is_special() { + let host = Host::parse_opaque(host_str)?; + return Ok((host, input)); + } + let host = Host::parse(host_str)?; + Ok((host, input)) + } + + fn get_file_host(input: Input<'_>) -> ParseResult<(Host, Input<'_>)> { + let (_, host_str, remaining) = Parser::file_host(input)?; + let host = match Host::parse(&host_str)? { + Host::Domain(ref d) if d == "localhost" => Host::Domain("".to_string()), + host => host, + }; + Ok((host, remaining)) + } + + fn parse_file_host<'i>( + &mut self, + input: Input<'i>, + ) -> ParseResult<(bool, HostInternal, Input<'i>)> { + let has_host; + let (_, host_str, remaining) = Parser::file_host(input)?; + let host = if host_str.is_empty() { + has_host = false; + HostInternal::None + } else { + match Host::parse(&host_str)? { + Host::Domain(ref d) if d == "localhost" => { + has_host = false; + HostInternal::None + } + host => { + write!(&mut self.serialization, "{}", host).unwrap(); + has_host = true; + host.into() + } + } + }; + Ok((has_host, host, remaining)) + } + + pub fn file_host(input: Input) -> ParseResult<(bool, String, Input)> { + // Undo the Input abstraction here to avoid allocating in the common case + // where the host part of the input does not contain any tab or newline + let input_str = input.chars.as_str(); + let mut has_ignored_chars = false; + let mut non_ignored_chars = 0; + let mut bytes = 0; + for c in input_str.chars() { + match c { + '/' | '\\' | '?' | '#' => break, + '\t' | '\n' | '\r' => has_ignored_chars = true, + _ => non_ignored_chars += 1, + } + bytes += c.len_utf8(); + } + let replaced: String; + let host_str; + let mut remaining = input.clone(); + { + let host_input = remaining.by_ref().take(non_ignored_chars); + if has_ignored_chars { + replaced = host_input.collect(); + host_str = &*replaced + } else { + for _ in host_input {} + host_str = &input_str[..bytes] + } + } + if is_windows_drive_letter(host_str) { + return Ok((false, "".to_string(), input)); + } + Ok((true, host_str.to_string(), remaining)) + } + + pub fn parse_port

( + mut input: Input<'_>, + default_port: P, + context: Context, + ) -> ParseResult<(Option, Input<'_>)> + where + P: Fn() -> Option, + { + let mut port: u32 = 0; + let mut has_any_digit = false; + while let (Some(c), remaining) = input.split_first() { + if let Some(digit) = c.to_digit(10) { + port = port * 10 + digit; + if port > ::std::u16::MAX as u32 { + return Err(ParseError::InvalidPort); + } + has_any_digit = true; + } else if context == Context::UrlParser && !matches!(c, '/' | '\\' | '?' | '#') { + return Err(ParseError::InvalidPort); + } else { + break; + } + input = remaining; + } + let mut opt_port = Some(port as u16); + if !has_any_digit || opt_port == default_port() { + opt_port = None; + } + Ok((opt_port, input)) + } + + pub fn parse_path_start<'i>( + &mut self, + scheme_type: SchemeType, + has_host: &mut bool, + input: Input<'i>, + ) -> Input<'i> { + let path_start = self.serialization.len(); + let (maybe_c, remaining) = input.split_first(); + // If url is special, then: + if scheme_type.is_special() { + if maybe_c == Some('\\') { + // If c is U+005C (\), validation error. + self.log_violation(SyntaxViolation::Backslash); + } + // A special URL always has a non-empty path. + if !self.serialization.ends_with('/') { + self.serialization.push('/'); + // We have already made sure the forward slash is present. + if maybe_c == Some('/') || maybe_c == Some('\\') { + return self.parse_path(scheme_type, has_host, path_start, remaining); + } + } + return self.parse_path(scheme_type, has_host, path_start, input); + } else if maybe_c == Some('?') || maybe_c == Some('#') { + // Otherwise, if state override is not given and c is U+003F (?), + // set url’s query to the empty string and state to query state. + // Otherwise, if state override is not given and c is U+0023 (#), + // set url’s fragment to the empty string and state to fragment state. + // The query and path states will be handled by the caller. + return input; + } + + if maybe_c != None && maybe_c != Some('/') { + self.serialization.push('/'); + } + // Otherwise, if c is not the EOF code point: + self.parse_path(scheme_type, has_host, path_start, input) + } + + pub fn parse_path<'i>( + &mut self, + scheme_type: SchemeType, + has_host: &mut bool, + path_start: usize, + mut input: Input<'i>, + ) -> Input<'i> { + // Relative path state + loop { + let segment_start = self.serialization.len(); + let mut ends_with_slash = false; + loop { + let input_before_c = input.clone(); + let (c, utf8_c) = if let Some(x) = input.next_utf8() { + x + } else { + break; + }; + match c { + '/' if self.context != Context::PathSegmentSetter => { + self.serialization.push(c); + ends_with_slash = true; + break; + } + '\\' if self.context != Context::PathSegmentSetter + && scheme_type.is_special() => + { + self.log_violation(SyntaxViolation::Backslash); + self.serialization.push('/'); + ends_with_slash = true; + break; + } + '?' | '#' if self.context == Context::UrlParser => { + input = input_before_c; + break; + } + _ => { + self.check_url_code_point(c, &input); + if self.context == Context::PathSegmentSetter { + if scheme_type.is_special() { + self.serialization + .extend(utf8_percent_encode(utf8_c, SPECIAL_PATH_SEGMENT)); + } else { + self.serialization + .extend(utf8_percent_encode(utf8_c, PATH_SEGMENT)); + } + } else { + self.serialization.extend(utf8_percent_encode(utf8_c, PATH)); + } + } + } + } + let segment_before_slash = if ends_with_slash { + &self.serialization[segment_start..self.serialization.len() - 1] + } else { + &self.serialization[segment_start..self.serialization.len()] + }; + match segment_before_slash { + // If buffer is a double-dot path segment, shorten url’s path, + ".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e" + | ".%2E" => { + debug_assert!(self.serialization.as_bytes()[segment_start - 1] == b'/'); + self.serialization.truncate(segment_start); + if self.serialization.ends_with('/') + && Parser::last_slash_can_be_removed(&self.serialization, path_start) + { + self.serialization.pop(); + } + self.shorten_path(scheme_type, path_start); + + // and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path. + if ends_with_slash && !self.serialization.ends_with('/') { + self.serialization.push('/'); + } + } + // Otherwise, if buffer is a single-dot path segment and if neither c is U+002F (/), + // nor url is special and c is U+005C (\), append the empty string to url’s path. + "." | "%2e" | "%2E" => { + self.serialization.truncate(segment_start); + if !self.serialization.ends_with('/') { + self.serialization.push('/'); + } + } + _ => { + // If url’s scheme is "file", url’s path is empty, and buffer is a Windows drive letter, then + if scheme_type.is_file() && is_windows_drive_letter(segment_before_slash) { + // Replace the second code point in buffer with U+003A (:). + if let Some(c) = segment_before_slash.chars().next() { + self.serialization.truncate(segment_start); + self.serialization.push(c); + self.serialization.push(':'); + if ends_with_slash { + self.serialization.push('/'); + } + } + // If url’s host is neither the empty string nor null, + // validation error, set url’s host to the empty string. + if *has_host { + self.log_violation(SyntaxViolation::FileWithHostAndWindowsDrive); + *has_host = false; // FIXME account for this in callers + } + } + } + } + if !ends_with_slash { + break; + } + } + if scheme_type.is_file() { + // while url’s path’s size is greater than 1 + // and url’s path[0] is the empty string, + // validation error, remove the first item from url’s path. + //FIXME: log violation + let path = self.serialization.split_off(path_start); + self.serialization.push('/'); + self.serialization.push_str(path.trim_start_matches('/')); + } + + input + } + + fn last_slash_can_be_removed(serialization: &str, path_start: usize) -> bool { + let url_before_segment = &serialization[..serialization.len() - 1]; + if let Some(segment_before_start) = url_before_segment.rfind('/') { + // Do not remove the root slash + segment_before_start >= path_start + // Or a windows drive letter slash + && !path_starts_with_windows_drive_letter(&serialization[segment_before_start..]) + } else { + false + } + } + + /// https://url.spec.whatwg.org/#shorten-a-urls-path + fn shorten_path(&mut self, scheme_type: SchemeType, path_start: usize) { + // If path is empty, then return. + if self.serialization.len() == path_start { + return; + } + // If url’s scheme is "file", path’s size is 1, and path[0] is a normalized Windows drive letter, then return. + if scheme_type.is_file() + && is_normalized_windows_drive_letter(&self.serialization[path_start..]) + { + return; + } + // Remove path’s last item. + self.pop_path(scheme_type, path_start); + } + + /// https://url.spec.whatwg.org/#pop-a-urls-path + fn pop_path(&mut self, scheme_type: SchemeType, path_start: usize) { + if self.serialization.len() > path_start { + let slash_position = self.serialization[path_start..].rfind('/').unwrap(); + // + 1 since rfind returns the position before the slash. + let segment_start = path_start + slash_position + 1; + // Don’t pop a Windows drive letter + if !(scheme_type.is_file() + && is_normalized_windows_drive_letter(&self.serialization[segment_start..])) + { + self.serialization.truncate(segment_start); + } + } + } + + pub fn parse_cannot_be_a_base_path<'i>(&mut self, mut input: Input<'i>) -> Input<'i> { + loop { + let input_before_c = input.clone(); + match input.next_utf8() { + Some(('?', _)) | Some(('#', _)) if self.context == Context::UrlParser => { + return input_before_c + } + Some((c, utf8_c)) => { + self.check_url_code_point(c, &input); + self.serialization + .extend(utf8_percent_encode(utf8_c, CONTROLS)); + } + None => return input, + } + } + } + + #[allow(clippy::too_many_arguments)] + fn with_query_and_fragment( + mut self, + scheme_type: SchemeType, + scheme_end: u32, + username_end: u32, + host_start: u32, + host_end: u32, + host: HostInternal, + port: Option, + path_start: u32, + remaining: Input<'_>, + ) -> ParseResult { + let (query_start, fragment_start) = + self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?; + Ok(Url { + serialization: self.serialization, + scheme_end, + username_end, + host_start, + host_end, + host, + port, + path_start, + query_start, + fragment_start, + }) + } + + /// Return (query_start, fragment_start) + fn parse_query_and_fragment( + &mut self, + scheme_type: SchemeType, + scheme_end: u32, + mut input: Input<'_>, + ) -> ParseResult<(Option, Option)> { + let mut query_start = None; + match input.next() { + Some('#') => {} + Some('?') => { + query_start = Some(to_u32(self.serialization.len())?); + self.serialization.push('?'); + let remaining = self.parse_query(scheme_type, scheme_end, input); + if let Some(remaining) = remaining { + input = remaining + } else { + return Ok((query_start, None)); + } + } + None => return Ok((None, None)), + _ => panic!("Programming error. parse_query_and_fragment() called without ? or #"), + } + + let fragment_start = to_u32(self.serialization.len())?; + self.serialization.push('#'); + self.parse_fragment(input); + Ok((query_start, Some(fragment_start))) + } + + pub fn parse_query<'i>( + &mut self, + scheme_type: SchemeType, + scheme_end: u32, + mut input: Input<'i>, + ) -> Option> { + let len = input.chars.as_str().len(); + let mut query = String::with_capacity(len); // FIXME: use a streaming decoder instead + let mut remaining = None; + while let Some(c) = input.next() { + if c == '#' && self.context == Context::UrlParser { + remaining = Some(input); + break; + } else { + self.check_url_code_point(c, &input); + query.push(c); + } + } + + let encoding = match &self.serialization[..scheme_end as usize] { + "http" | "https" | "file" | "ftp" => self.query_encoding_override, + _ => None, + }; + let query_bytes = if let Some(o) = encoding { + o(&query) + } else { + query.as_bytes().into() + }; + let set = if scheme_type.is_special() { + SPECIAL_QUERY + } else { + QUERY + }; + self.serialization.extend(percent_encode(&query_bytes, set)); + remaining + } + + fn fragment_only(mut self, base_url: &Url, mut input: Input<'_>) -> ParseResult { + let before_fragment = match base_url.fragment_start { + Some(i) => base_url.slice(..i), + None => &*base_url.serialization, + }; + debug_assert!(self.serialization.is_empty()); + self.serialization + .reserve(before_fragment.len() + input.chars.as_str().len()); + self.serialization.push_str(before_fragment); + self.serialization.push('#'); + let next = input.next(); + debug_assert!(next == Some('#')); + self.parse_fragment(input); + Ok(Url { + serialization: self.serialization, + fragment_start: Some(to_u32(before_fragment.len())?), + ..*base_url + }) + } + + pub fn parse_fragment(&mut self, mut input: Input<'_>) { + while let Some((c, utf8_c)) = input.next_utf8() { + if c == '\0' { + self.log_violation(SyntaxViolation::NullInFragment) + } else { + self.check_url_code_point(c, &input); + } + self.serialization + .extend(utf8_percent_encode(utf8_c, FRAGMENT)); + } + } + + fn check_url_code_point(&self, c: char, input: &Input<'_>) { + if let Some(vfn) = self.violation_fn { + if c == '%' { + let mut input = input.clone(); + if !matches!((input.next(), input.next()), (Some(a), Some(b)) + if is_ascii_hex_digit(a) && is_ascii_hex_digit(b)) + { + vfn(SyntaxViolation::PercentDecode) + } + } else if !is_url_code_point(c) { + vfn(SyntaxViolation::NonUrlCodePoint) + } + } + } +} + +#[inline] +fn is_ascii_hex_digit(c: char) -> bool { + matches!(c, 'a'..='f' | 'A'..='F' | '0'..='9') +} + +// Non URL code points: +// U+0000 to U+0020 (space) +// " # % < > [ \ ] ^ ` { | } +// U+007F to U+009F +// surrogates +// U+FDD0 to U+FDEF +// Last two of each plane: U+__FFFE to U+__FFFF for __ in 00 to 10 hex +#[inline] +fn is_url_code_point(c: char) -> bool { + matches!(c, + 'a'..='z' | + 'A'..='Z' | + '0'..='9' | + '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | '-' | + '.' | '/' | ':' | ';' | '=' | '?' | '@' | '_' | '~' | + '\u{A0}'..='\u{D7FF}' | '\u{E000}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' | + '\u{10000}'..='\u{1FFFD}' | '\u{20000}'..='\u{2FFFD}' | + '\u{30000}'..='\u{3FFFD}' | '\u{40000}'..='\u{4FFFD}' | + '\u{50000}'..='\u{5FFFD}' | '\u{60000}'..='\u{6FFFD}' | + '\u{70000}'..='\u{7FFFD}' | '\u{80000}'..='\u{8FFFD}' | + '\u{90000}'..='\u{9FFFD}' | '\u{A0000}'..='\u{AFFFD}' | + '\u{B0000}'..='\u{BFFFD}' | '\u{C0000}'..='\u{CFFFD}' | + '\u{D0000}'..='\u{DFFFD}' | '\u{E1000}'..='\u{EFFFD}' | + '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}') +} + +/// https://url.spec.whatwg.org/#c0-controls-and-space +#[inline] +fn c0_control_or_space(ch: char) -> bool { + ch <= ' ' // U+0000 to U+0020 +} + +/// https://infra.spec.whatwg.org/#ascii-tab-or-newline +#[inline] +fn ascii_tab_or_new_line(ch: char) -> bool { + matches!(ch, '\t' | '\r' | '\n') +} + +/// https://url.spec.whatwg.org/#ascii-alpha +#[inline] +pub fn ascii_alpha(ch: char) -> bool { + matches!(ch, 'a'..='z' | 'A'..='Z') +} + +#[inline] +pub fn to_u32(i: usize) -> ParseResult { + if i <= ::std::u32::MAX as usize { + Ok(i as u32) + } else { + Err(ParseError::Overflow) + } +} + +fn is_normalized_windows_drive_letter(segment: &str) -> bool { + is_windows_drive_letter(segment) && segment.as_bytes()[1] == b':' +} + +/// Whether the scheme is file:, the path has a single segment, and that segment +/// is a Windows drive letter +#[inline] +pub fn is_windows_drive_letter(segment: &str) -> bool { + segment.len() == 2 && starts_with_windows_drive_letter(segment) +} + +/// Whether path starts with a root slash +/// and a windows drive letter eg: "/c:" or "/a:/" +fn path_starts_with_windows_drive_letter(s: &str) -> bool { + if let Some(c) = s.as_bytes().first() { + matches!(c, b'/' | b'\\' | b'?' | b'#') && starts_with_windows_drive_letter(&s[1..]) + } else { + false + } +} + +fn starts_with_windows_drive_letter(s: &str) -> bool { + s.len() >= 2 + && ascii_alpha(s.as_bytes()[0] as char) + && matches!(s.as_bytes()[1], b':' | b'|') + && (s.len() == 2 || matches!(s.as_bytes()[2], b'/' | b'\\' | b'?' | b'#')) +} + +/// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter +fn starts_with_windows_drive_letter_segment(input: &Input<'_>) -> bool { + let mut input = input.clone(); + match (input.next(), input.next(), input.next()) { + // its first two code points are a Windows drive letter + // its third code point is U+002F (/), U+005C (\), U+003F (?), or U+0023 (#). + (Some(a), Some(b), Some(c)) + if ascii_alpha(a) && matches!(b, ':' | '|') && matches!(c, '/' | '\\' | '?' | '#') => + { + true + } + // its first two code points are a Windows drive letter + // its length is 2 + (Some(a), Some(b), None) if ascii_alpha(a) && matches!(b, ':' | '|') => true, + _ => false, + } +} diff --git a/src/path_segments.rs b/src/path_segments.rs new file mode 100644 index 0000000..29afc1e --- /dev/null +++ b/src/path_segments.rs @@ -0,0 +1,246 @@ +// Copyright 2016 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use crate::parser::{self, to_u32, SchemeType}; +use crate::Url; +use std::str; + +/// Exposes methods to manipulate the path of an URL that is not cannot-be-base. +/// +/// The path always starts with a `/` slash, and is made of slash-separated segments. +/// There is always at least one segment (which may be the empty string). +/// +/// Examples: +/// +/// ```rust +/// use url::Url; +/// # use std::error::Error; +/// +/// # fn run() -> Result<(), Box> { +/// let mut url = Url::parse("mailto:me@example.com")?; +/// assert!(url.path_segments_mut().is_err()); +/// +/// let mut url = Url::parse("http://example.net/foo/index.html")?; +/// url.path_segments_mut().map_err(|_| "cannot be base")? +/// .pop().push("img").push("2/100%.png"); +/// assert_eq!(url.as_str(), "http://example.net/foo/img/2%2F100%25.png"); +/// # Ok(()) +/// # } +/// # run().unwrap(); +/// ``` +#[derive(Debug)] +pub struct PathSegmentsMut<'a> { + url: &'a mut Url, + after_first_slash: usize, + after_path: String, + old_after_path_position: u32, +} + +// Not re-exported outside the crate +pub fn new(url: &mut Url) -> PathSegmentsMut<'_> { + let after_path = url.take_after_path(); + let old_after_path_position = to_u32(url.serialization.len()).unwrap(); + // Special urls always have a non empty path + if SchemeType::from(url.scheme()).is_special() { + debug_assert!(url.byte_at(url.path_start) == b'/'); + } else { + debug_assert!( + url.serialization.len() == url.path_start as usize + || url.byte_at(url.path_start) == b'/' + ); + } + PathSegmentsMut { + after_first_slash: url.path_start as usize + "/".len(), + url, + old_after_path_position, + after_path, + } +} + +impl<'a> Drop for PathSegmentsMut<'a> { + fn drop(&mut self) { + self.url + .restore_after_path(self.old_after_path_position, &self.after_path) + } +} + +impl<'a> PathSegmentsMut<'a> { + /// Remove all segments in the path, leaving the minimal `url.path() == "/"`. + /// + /// Returns `&mut Self` so that method calls can be chained. + /// + /// Example: + /// + /// ```rust + /// use url::Url; + /// # use std::error::Error; + /// + /// # fn run() -> Result<(), Box> { + /// let mut url = Url::parse("https://github.com/servo/rust-url/")?; + /// url.path_segments_mut().map_err(|_| "cannot be base")? + /// .clear().push("logout"); + /// assert_eq!(url.as_str(), "https://github.com/logout"); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + pub fn clear(&mut self) -> &mut Self { + self.url.serialization.truncate(self.after_first_slash); + self + } + + /// Remove the last segment of this URL’s path if it is empty, + /// except if these was only one segment to begin with. + /// + /// In other words, remove one path trailing slash, if any, + /// unless it is also the initial slash (so this does nothing if `url.path() == "/")`. + /// + /// Returns `&mut Self` so that method calls can be chained. + /// + /// Example: + /// + /// ```rust + /// use url::Url; + /// # use std::error::Error; + /// + /// # fn run() -> Result<(), Box> { + /// let mut url = Url::parse("https://github.com/servo/rust-url/")?; + /// url.path_segments_mut().map_err(|_| "cannot be base")? + /// .push("pulls"); + /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url//pulls"); + /// + /// let mut url = Url::parse("https://github.com/servo/rust-url/")?; + /// url.path_segments_mut().map_err(|_| "cannot be base")? + /// .pop_if_empty().push("pulls"); + /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/pulls"); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + pub fn pop_if_empty(&mut self) -> &mut Self { + if self.after_first_slash >= self.url.serialization.len() { + return self; + } + if self.url.serialization[self.after_first_slash..].ends_with('/') { + self.url.serialization.pop(); + } + self + } + + /// Remove the last segment of this URL’s path. + /// + /// If the path only has one segment, make it empty such that `url.path() == "/"`. + /// + /// Returns `&mut Self` so that method calls can be chained. + pub fn pop(&mut self) -> &mut Self { + if self.after_first_slash >= self.url.serialization.len() { + return self; + } + let last_slash = self.url.serialization[self.after_first_slash..] + .rfind('/') + .unwrap_or(0); + self.url + .serialization + .truncate(self.after_first_slash + last_slash); + self + } + + /// Append the given segment at the end of this URL’s path. + /// + /// See the documentation for `.extend()`. + /// + /// Returns `&mut Self` so that method calls can be chained. + pub fn push(&mut self, segment: &str) -> &mut Self { + self.extend(Some(segment)) + } + + /// Append each segment from the given iterator at the end of this URL’s path. + /// + /// Each segment is percent-encoded like in `Url::parse` or `Url::join`, + /// except that `%` and `/` characters are also encoded (to `%25` and `%2F`). + /// This is unlike `Url::parse` where `%` is left as-is in case some of the input + /// is already percent-encoded, and `/` denotes a path segment separator.) + /// + /// Note that, in addition to slashes between new segments, + /// this always adds a slash between the existing path and the new segments + /// *except* if the existing path is `"/"`. + /// If the previous last segment was empty (if the path had a trailing slash) + /// the path after `.extend()` will contain two consecutive slashes. + /// If that is undesired, call `.pop_if_empty()` first. + /// + /// To obtain a behavior similar to `Url::join`, call `.pop()` unconditionally first. + /// + /// Returns `&mut Self` so that method calls can be chained. + /// + /// Example: + /// + /// ```rust + /// use url::Url; + /// # use std::error::Error; + /// + /// # fn run() -> Result<(), Box> { + /// let mut url = Url::parse("https://github.com/")?; + /// let org = "servo"; + /// let repo = "rust-url"; + /// let issue_number = "188"; + /// url.path_segments_mut().map_err(|_| "cannot be base")? + /// .extend(&[org, repo, "issues", issue_number]); + /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/issues/188"); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// + /// In order to make sure that parsing the serialization of an URL gives the same URL, + /// a segment is ignored if it is `"."` or `".."`: + /// + /// ```rust + /// use url::Url; + /// # use std::error::Error; + /// + /// # fn run() -> Result<(), Box> { + /// let mut url = Url::parse("https://github.com/servo")?; + /// url.path_segments_mut().map_err(|_| "cannot be base")? + /// .extend(&["..", "rust-url", ".", "pulls"]); + /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/pulls"); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + pub fn extend(&mut self, segments: I) -> &mut Self + where + I: IntoIterator, + I::Item: AsRef, + { + let scheme_type = SchemeType::from(self.url.scheme()); + let path_start = self.url.path_start as usize; + self.url.mutate(|parser| { + parser.context = parser::Context::PathSegmentSetter; + for segment in segments { + let segment = segment.as_ref(); + if matches!(segment, "." | "..") { + continue; + } + if parser.serialization.len() > path_start + 1 + // Non special url's path might still be empty + || parser.serialization.len() == path_start + { + parser.serialization.push('/'); + } + let mut has_host = true; // FIXME account for this? + parser.parse_path( + scheme_type, + &mut has_host, + path_start, + parser::Input::new(segment), + ); + } + }); + self + } +} diff --git a/src/quirks.rs b/src/quirks.rs new file mode 100644 index 0000000..0674ebb --- /dev/null +++ b/src/quirks.rs @@ -0,0 +1,326 @@ +// Copyright 2016 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Getters and setters for URL components implemented per https://url.spec.whatwg.org/#api +//! +//! Unless you need to be interoperable with web browsers, +//! you probably want to use `Url` method instead. + +use crate::parser::{default_port, Context, Input, Parser, SchemeType}; +use crate::{Host, ParseError, Position, Url}; + +/// Internal components / offsets of a URL. +/// +/// https://user@pass:example.com:1234/foo/bar?baz#quux +/// | | | | ^^^^| | | +/// | | | | | | | `----- fragment_start +/// | | | | | | `--------- query_start +/// | | | | | `----------------- path_start +/// | | | | `--------------------- port +/// | | | `----------------------- host_end +/// | | `---------------------------------- host_start +/// | `--------------------------------------- username_end +/// `---------------------------------------------- scheme_end +#[derive(Copy, Clone)] +#[cfg(feature = "expose_internals")] +pub struct InternalComponents { + pub scheme_end: u32, + pub username_end: u32, + pub host_start: u32, + pub host_end: u32, + pub port: Option, + pub path_start: u32, + pub query_start: Option, + pub fragment_start: Option, +} + +/// Internal component / parsed offsets of the URL. +/// +/// This can be useful for implementing efficient serialization +/// for the URL. +#[cfg(feature = "expose_internals")] +pub fn internal_components(url: &Url) -> InternalComponents { + InternalComponents { + scheme_end: url.scheme_end, + username_end: url.username_end, + host_start: url.host_start, + host_end: url.host_end, + port: url.port, + path_start: url.path_start, + query_start: url.query_start, + fragment_start: url.fragment_start, + } +} + +/// https://url.spec.whatwg.org/#dom-url-domaintoascii +pub fn domain_to_ascii(domain: &str) -> String { + match Host::parse(domain) { + Ok(Host::Domain(domain)) => domain, + _ => String::new(), + } +} + +/// https://url.spec.whatwg.org/#dom-url-domaintounicode +pub fn domain_to_unicode(domain: &str) -> String { + match Host::parse(domain) { + Ok(Host::Domain(ref domain)) => { + let (unicode, _errors) = idna::domain_to_unicode(domain); + unicode + } + _ => String::new(), + } +} + +/// Getter for https://url.spec.whatwg.org/#dom-url-href +pub fn href(url: &Url) -> &str { + url.as_str() +} + +/// Setter for https://url.spec.whatwg.org/#dom-url-href +pub fn set_href(url: &mut Url, value: &str) -> Result<(), ParseError> { + *url = Url::parse(value)?; + Ok(()) +} + +/// Getter for https://url.spec.whatwg.org/#dom-url-origin +pub fn origin(url: &Url) -> String { + url.origin().ascii_serialization() +} + +/// Getter for https://url.spec.whatwg.org/#dom-url-protocol +#[inline] +pub fn protocol(url: &Url) -> &str { + &url.as_str()[..url.scheme().len() + ":".len()] +} + +/// Setter for https://url.spec.whatwg.org/#dom-url-protocol +#[allow(clippy::result_unit_err)] +pub fn set_protocol(url: &mut Url, mut new_protocol: &str) -> Result<(), ()> { + // The scheme state in the spec ignores everything after the first `:`, + // but `set_scheme` errors if there is more. + if let Some(position) = new_protocol.find(':') { + new_protocol = &new_protocol[..position]; + } + url.set_scheme(new_protocol) +} + +/// Getter for https://url.spec.whatwg.org/#dom-url-username +#[inline] +pub fn username(url: &Url) -> &str { + url.username() +} + +/// Setter for https://url.spec.whatwg.org/#dom-url-username +#[allow(clippy::result_unit_err)] +pub fn set_username(url: &mut Url, new_username: &str) -> Result<(), ()> { + url.set_username(new_username) +} + +/// Getter for https://url.spec.whatwg.org/#dom-url-password +#[inline] +pub fn password(url: &Url) -> &str { + url.password().unwrap_or("") +} + +/// Setter for https://url.spec.whatwg.org/#dom-url-password +#[allow(clippy::result_unit_err)] +pub fn set_password(url: &mut Url, new_password: &str) -> Result<(), ()> { + url.set_password(if new_password.is_empty() { + None + } else { + Some(new_password) + }) +} + +/// Getter for https://url.spec.whatwg.org/#dom-url-host +#[inline] +pub fn host(url: &Url) -> &str { + &url[Position::BeforeHost..Position::AfterPort] +} + +/// Setter for https://url.spec.whatwg.org/#dom-url-host +#[allow(clippy::result_unit_err)] +pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> { + // If context object’s url’s cannot-be-a-base-URL flag is set, then return. + if url.cannot_be_a_base() { + return Err(()); + } + // Host parsing rules are strict, + // We don't want to trim the input + let input = Input::no_trim(new_host); + let host; + let opt_port; + { + let scheme = url.scheme(); + let scheme_type = SchemeType::from(scheme); + if scheme_type == SchemeType::File && new_host.is_empty() { + url.set_host_internal(Host::Domain(String::new()), None); + return Ok(()); + } + + if let Ok((h, remaining)) = Parser::parse_host(input, scheme_type) { + host = h; + opt_port = if let Some(remaining) = remaining.split_prefix(':') { + if remaining.is_empty() { + None + } else { + Parser::parse_port(remaining, || default_port(scheme), Context::Setter) + .ok() + .map(|(port, _remaining)| port) + } + } else { + None + }; + } else { + return Err(()); + } + } + // Make sure we won't set an empty host to a url with a username or a port + if host == Host::Domain("".to_string()) + && (!username(url).is_empty() || matches!(opt_port, Some(Some(_))) || url.port().is_some()) + { + return Err(()); + } + url.set_host_internal(host, opt_port); + Ok(()) +} + +/// Getter for https://url.spec.whatwg.org/#dom-url-hostname +#[inline] +pub fn hostname(url: &Url) -> &str { + url.host_str().unwrap_or("") +} + +/// Setter for https://url.spec.whatwg.org/#dom-url-hostname +#[allow(clippy::result_unit_err)] +pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> { + if url.cannot_be_a_base() { + return Err(()); + } + // Host parsing rules are strict we don't want to trim the input + let input = Input::no_trim(new_hostname); + let scheme_type = SchemeType::from(url.scheme()); + if scheme_type == SchemeType::File && new_hostname.is_empty() { + url.set_host_internal(Host::Domain(String::new()), None); + return Ok(()); + } + + if let Ok((host, _remaining)) = Parser::parse_host(input, scheme_type) { + if let Host::Domain(h) = &host { + if h.is_empty() { + // Empty host on special not file url + if SchemeType::from(url.scheme()) == SchemeType::SpecialNotFile + // Port with an empty host + ||!port(url).is_empty() + // Empty host that includes credentials + || !url.username().is_empty() + || !url.password().unwrap_or("").is_empty() + { + return Err(()); + } + } + } + url.set_host_internal(host, None); + Ok(()) + } else { + Err(()) + } +} + +/// Getter for https://url.spec.whatwg.org/#dom-url-port +#[inline] +pub fn port(url: &Url) -> &str { + &url[Position::BeforePort..Position::AfterPort] +} + +/// Setter for https://url.spec.whatwg.org/#dom-url-port +#[allow(clippy::result_unit_err)] +pub fn set_port(url: &mut Url, new_port: &str) -> Result<(), ()> { + let result; + { + // has_host implies !cannot_be_a_base + let scheme = url.scheme(); + if !url.has_host() || url.host() == Some(Host::Domain("")) || scheme == "file" { + return Err(()); + } + result = Parser::parse_port( + Input::new(new_port), + || default_port(scheme), + Context::Setter, + ) + } + if let Ok((new_port, _remaining)) = result { + url.set_port_internal(new_port); + Ok(()) + } else { + Err(()) + } +} + +/// Getter for https://url.spec.whatwg.org/#dom-url-pathname +#[inline] +pub fn pathname(url: &Url) -> &str { + url.path() +} + +/// Setter for https://url.spec.whatwg.org/#dom-url-pathname +pub fn set_pathname(url: &mut Url, new_pathname: &str) { + if url.cannot_be_a_base() { + return; + } + if new_pathname.starts_with('/') + || (SchemeType::from(url.scheme()).is_special() + // \ is a segment delimiter for 'special' URLs" + && new_pathname.starts_with('\\')) + { + url.set_path(new_pathname) + } else { + let mut path_to_set = String::from("/"); + path_to_set.push_str(new_pathname); + url.set_path(&path_to_set) + } +} + +/// Getter for https://url.spec.whatwg.org/#dom-url-search +pub fn search(url: &Url) -> &str { + trim(&url[Position::AfterPath..Position::AfterQuery]) +} + +/// Setter for https://url.spec.whatwg.org/#dom-url-search +pub fn set_search(url: &mut Url, new_search: &str) { + url.set_query(match new_search { + "" => None, + _ if new_search.starts_with('?') => Some(&new_search[1..]), + _ => Some(new_search), + }) +} + +/// Getter for https://url.spec.whatwg.org/#dom-url-hash +pub fn hash(url: &Url) -> &str { + trim(&url[Position::AfterQuery..]) +} + +/// Setter for https://url.spec.whatwg.org/#dom-url-hash +pub fn set_hash(url: &mut Url, new_hash: &str) { + url.set_fragment(match new_hash { + // If the given value is the empty string, + // then set context object’s url’s fragment to null and return. + "" => None, + // Let input be the given value with a single leading U+0023 (#) removed, if any. + _ if new_hash.starts_with('#') => Some(&new_hash[1..]), + _ => Some(new_hash), + }) +} + +fn trim(s: &str) -> &str { + if s.len() == 1 { + "" + } else { + s + } +} diff --git a/src/slicing.rs b/src/slicing.rs new file mode 100644 index 0000000..a90337b --- /dev/null +++ b/src/slicing.rs @@ -0,0 +1,187 @@ +// Copyright 2016 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use crate::Url; +use std::ops::{Index, Range, RangeFrom, RangeFull, RangeTo}; + +impl Index for Url { + type Output = str; + fn index(&self, _: RangeFull) -> &str { + &self.serialization + } +} + +impl Index> for Url { + type Output = str; + fn index(&self, range: RangeFrom) -> &str { + &self.serialization[self.index(range.start)..] + } +} + +impl Index> for Url { + type Output = str; + fn index(&self, range: RangeTo) -> &str { + &self.serialization[..self.index(range.end)] + } +} + +impl Index> for Url { + type Output = str; + fn index(&self, range: Range) -> &str { + &self.serialization[self.index(range.start)..self.index(range.end)] + } +} + +/// Indicates a position within a URL based on its components. +/// +/// A range of positions can be used for slicing `Url`: +/// +/// ```rust +/// # use url::{Url, Position}; +/// # fn something(some_url: Url) { +/// let serialization: &str = &some_url[..]; +/// let serialization_without_fragment: &str = &some_url[..Position::AfterQuery]; +/// let authority: &str = &some_url[Position::BeforeUsername..Position::AfterPort]; +/// let data_url_payload: &str = &some_url[Position::BeforePath..Position::AfterQuery]; +/// let scheme_relative: &str = &some_url[Position::BeforeUsername..]; +/// # } +/// ``` +/// +/// In a pseudo-grammar (where `[`…`]?` makes a sub-sequence optional), +/// URL components and delimiters that separate them are: +/// +/// ```notrust +/// url = +/// scheme ":" +/// [ "//" [ username [ ":" password ]? "@" ]? host [ ":" port ]? ]? +/// path [ "?" query ]? [ "#" fragment ]? +/// ``` +/// +/// When a given component is not present, +/// its "before" and "after" position are the same +/// (so that `&some_url[BeforeFoo..AfterFoo]` is the empty string) +/// and component ordering is preserved +/// (so that a missing query "is between" a path and a fragment). +/// +/// The end of a component and the start of the next are either the same or separate +/// by a delimiter. +/// (Not that the initial `/` of a path is considered part of the path here, not a delimiter.) +/// For example, `&url[..BeforeFragment]` would include a `#` delimiter (if present in `url`), +/// so `&url[..AfterQuery]` might be desired instead. +/// +/// `BeforeScheme` and `AfterFragment` are always the start and end of the entire URL, +/// so `&url[BeforeScheme..X]` is the same as `&url[..X]` +/// and `&url[X..AfterFragment]` is the same as `&url[X..]`. +#[derive(Copy, Clone, Debug)] +pub enum Position { + BeforeScheme, + AfterScheme, + BeforeUsername, + AfterUsername, + BeforePassword, + AfterPassword, + BeforeHost, + AfterHost, + BeforePort, + AfterPort, + BeforePath, + AfterPath, + BeforeQuery, + AfterQuery, + BeforeFragment, + AfterFragment, +} + +impl Url { + #[inline] + fn index(&self, position: Position) -> usize { + match position { + Position::BeforeScheme => 0, + + Position::AfterScheme => self.scheme_end as usize, + + Position::BeforeUsername => { + if self.has_authority() { + self.scheme_end as usize + "://".len() + } else { + debug_assert!(self.byte_at(self.scheme_end) == b':'); + debug_assert!(self.scheme_end + ":".len() as u32 == self.username_end); + self.scheme_end as usize + ":".len() + } + } + + Position::AfterUsername => self.username_end as usize, + + Position::BeforePassword => { + if self.has_authority() && self.byte_at(self.username_end) == b':' { + self.username_end as usize + ":".len() + } else { + debug_assert!(self.username_end == self.host_start); + self.username_end as usize + } + } + + Position::AfterPassword => { + if self.has_authority() && self.byte_at(self.username_end) == b':' { + debug_assert!(self.byte_at(self.host_start - "@".len() as u32) == b'@'); + self.host_start as usize - "@".len() + } else { + debug_assert!(self.username_end == self.host_start); + self.host_start as usize + } + } + + Position::BeforeHost => self.host_start as usize, + + Position::AfterHost => self.host_end as usize, + + Position::BeforePort => { + if self.port.is_some() { + debug_assert!(self.byte_at(self.host_end) == b':'); + self.host_end as usize + ":".len() + } else { + self.host_end as usize + } + } + + Position::AfterPort => self.path_start as usize, + + Position::BeforePath => self.path_start as usize, + + Position::AfterPath => match (self.query_start, self.fragment_start) { + (Some(q), _) => q as usize, + (None, Some(f)) => f as usize, + (None, None) => self.serialization.len(), + }, + + Position::BeforeQuery => match (self.query_start, self.fragment_start) { + (Some(q), _) => { + debug_assert!(self.byte_at(q) == b'?'); + q as usize + "?".len() + } + (None, Some(f)) => f as usize, + (None, None) => self.serialization.len(), + }, + + Position::AfterQuery => match self.fragment_start { + None => self.serialization.len(), + Some(f) => f as usize, + }, + + Position::BeforeFragment => match self.fragment_start { + Some(f) => { + debug_assert!(self.byte_at(f) == b'#'); + f as usize + "#".len() + } + None => self.serialization.len(), + }, + + Position::AfterFragment => self.serialization.len(), + } + } +} diff --git a/tests/data.rs b/tests/data.rs new file mode 100644 index 0000000..f4001ca --- /dev/null +++ b/tests/data.rs @@ -0,0 +1,270 @@ +// Copyright 2013-2014 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Data-driven tests + +use std::str::FromStr; + +use serde_json::Value; +use url::{quirks, Url}; + +#[test] +fn urltestdata() { + let idna_skip_inputs = [ + "http://www.foo。bar.com", + "http://Go.com", + "http://你好你好", + "https://faß.ExAmPlE/", + "http://0Xc0.0250.01", + "ftp://%e2%98%83", + "https://%e2%98%83", + "file://a\u{ad}b/p", + "file://a%C2%ADb/p", + "http://GOO\u{200b}\u{2060}\u{feff}goo.com", + ]; + + // Copied from https://github.com/web-platform-tests/wpt/blob/master/url/ + let mut json = Value::from_str(include_str!("urltestdata.json")) + .expect("JSON parse error in urltestdata.json"); + + let mut passed = true; + for entry in json.as_array_mut().unwrap() { + if entry.is_string() { + continue; // ignore comments + } + + let maybe_base = entry + .take_key("base") + .expect("missing base key") + .maybe_string(); + let input = entry.take_string("input"); + let failure = entry.take_key("failure").is_some(); + + { + if idna_skip_inputs.contains(&input.as_str()) { + continue; + } + } + + let res = if let Some(base) = maybe_base { + let base = match Url::parse(&base) { + Ok(base) => base, + Err(_) if failure => continue, + Err(message) => { + eprint_failure( + format!(" failed: error parsing base {:?}: {}", base, message), + &format!("parse base for {:?}", input), + None, + ); + passed = false; + continue; + } + }; + base.join(&input) + } else { + Url::parse(&input) + }; + + let url = match (res, failure) { + (Ok(url), false) => url, + (Err(_), true) => continue, + (Err(message), false) => { + eprint_failure( + format!(" failed: {}", message), + &format!("parse URL for {:?}", input), + None, + ); + passed = false; + continue; + } + (Ok(_), true) => { + eprint_failure( + format!(" failed: expected parse error for URL {:?}", input), + &format!("parse URL for {:?}", input), + None, + ); + passed = false; + continue; + } + }; + + passed &= check_invariants(&url, &format!("invariants for {:?}", input), None); + + for &attr in ATTRIBS { + passed &= test_eq_eprint( + entry.take_string(attr), + get(&url, attr), + &format!("{:?} - {}", input, attr), + None, + ); + } + + if let Some(expected_origin) = entry.take_key("origin").map(|s| s.string()) { + passed &= test_eq_eprint( + expected_origin, + &quirks::origin(&url), + &format!("origin for {:?}", input), + None, + ); + } + } + + assert!(passed) +} + +#[test] +fn setters_tests() { + let mut json = Value::from_str(include_str!("setters_tests.json")) + .expect("JSON parse error in setters_tests.json"); + + let mut passed = true; + for &attr in ATTRIBS { + if attr == "href" { + continue; + } + + let mut tests = json.take_key(attr).unwrap(); + for mut test in tests.as_array_mut().unwrap().drain(..) { + let comment = test.take_key("comment").map(|s| s.string()); + { + if let Some(comment) = comment.as_ref() { + if comment.starts_with("IDNA Nontransitional_Processing") { + continue; + } + } + } + let href = test.take_string("href"); + let new_value = test.take_string("new_value"); + let name = format!("{:?}.{} = {:?}", href, attr, new_value); + let mut expected = test.take_key("expected").unwrap(); + + let mut url = Url::parse(&href).unwrap(); + let comment_ref = comment.as_deref(); + passed &= check_invariants(&url, &name, comment_ref); + set(&mut url, attr, &new_value); + + for attr in ATTRIBS { + if let Some(value) = expected.take_key(attr) { + passed &= test_eq_eprint(value.string(), get(&url, attr), &name, comment_ref); + }; + } + + passed &= check_invariants(&url, &name, comment_ref); + } + } + + assert!(passed); +} + +fn check_invariants(url: &Url, name: &str, comment: Option<&str>) -> bool { + let mut passed = true; + if let Err(e) = url.check_invariants() { + passed = false; + eprint_failure( + format!(" failed: invariants checked -> {:?}", e), + name, + comment, + ); + } + + #[cfg(feature = "serde")] + { + let bytes = serde_json::to_vec(url).unwrap(); + let new_url: Url = serde_json::from_slice(&bytes).unwrap(); + passed &= test_eq_eprint(url.to_string(), &new_url.to_string(), name, comment); + } + + passed +} + +trait JsonExt { + fn take_key(&mut self, key: &str) -> Option; + fn string(self) -> String; + fn maybe_string(self) -> Option; + fn take_string(&mut self, key: &str) -> String; +} + +impl JsonExt for Value { + fn take_key(&mut self, key: &str) -> Option { + self.as_object_mut().unwrap().remove(key) + } + + fn string(self) -> String { + self.maybe_string().expect("") + } + + fn maybe_string(self) -> Option { + match self { + Value::String(s) => Some(s), + Value::Null => None, + _ => panic!("Not a Value::String or Value::Null"), + } + } + + fn take_string(&mut self, key: &str) -> String { + self.take_key(key).unwrap().string() + } +} + +fn get<'a>(url: &'a Url, attr: &str) -> &'a str { + match attr { + "href" => quirks::href(url), + "protocol" => quirks::protocol(url), + "username" => quirks::username(url), + "password" => quirks::password(url), + "hostname" => quirks::hostname(url), + "host" => quirks::host(url), + "port" => quirks::port(url), + "pathname" => quirks::pathname(url), + "search" => quirks::search(url), + "hash" => quirks::hash(url), + _ => unreachable!(), + } +} + +#[allow(clippy::unit_arg)] +fn set<'a>(url: &'a mut Url, attr: &str, new: &str) { + let _ = match attr { + "protocol" => quirks::set_protocol(url, new), + "username" => quirks::set_username(url, new), + "password" => quirks::set_password(url, new), + "hostname" => quirks::set_hostname(url, new), + "host" => quirks::set_host(url, new), + "port" => quirks::set_port(url, new), + "pathname" => Ok(quirks::set_pathname(url, new)), + "search" => Ok(quirks::set_search(url, new)), + "hash" => Ok(quirks::set_hash(url, new)), + _ => unreachable!(), + }; +} + +fn test_eq_eprint(expected: String, actual: &str, name: &str, comment: Option<&str>) -> bool { + if expected == actual { + return true; + } + eprint_failure( + format!("expected: {}\n actual: {}", expected, actual), + name, + comment, + ); + false +} + +fn eprint_failure(err: String, name: &str, comment: Option<&str>) { + eprintln!(" test: {}\n{}", name, err); + if let Some(comment) = comment { + eprintln!("{}\n", comment); + } else { + eprintln!(); + } +} + +const ATTRIBS: &[&str] = &[ + "href", "protocol", "username", "password", "host", "hostname", "port", "pathname", "search", + "hash", +]; diff --git a/tests/debugger_visualizer.rs b/tests/debugger_visualizer.rs new file mode 100644 index 0000000..4558e07 --- /dev/null +++ b/tests/debugger_visualizer.rs @@ -0,0 +1,102 @@ +use debugger_test::debugger_test; +use url::Url; + +#[inline(never)] +fn __break() {} + +#[debugger_test( + debugger = "cdb", + commands = " + .nvlist + + dx base_url + + dx url_with_non_special_scheme + + dx url_with_user_pass_port_query_fragments + + dx url_blob + + dx url_with_base + + dx url_with_base_replaced + + dx url_with_comma", + expected_statements = r#" + pattern:debugger_visualizer-.*\.exe \(embedded NatVis ".*-[0-9]+\.natvis"\) + + base_url : "http://example.org/foo/bar" [Type: url::Url] + [] [Type: url::Url] + [scheme] : "http" + [host] : "example.org" + [path] : "/foo/bar" + + url_with_non_special_scheme : "non-special://test/x" [Type: url::Url] + [] [Type: url::Url] + [scheme] : "non-special" + [host] : "test" + [path] : "/x" + + url_with_user_pass_port_query_fragments : "http://user:pass@foo:21/bar;par?b#c" [Type: url::Url] + [] [Type: url::Url] + [scheme] : "http" + [username] : "user" + [host] : "foo" + [port] : 21 + [path] : "/bar;par" + [query] : "b" + [fragment] : "c" + + url_blob : "blob:https://example.com:443/" [Type: url::Url] + [] [Type: url::Url] + [scheme] : "blob" + [path] : "https://example.com:443/" + + url_with_base : "http://example.org/a%2fc" [Type: url::Url] + [] [Type: url::Url] + [scheme] : "http" + [host] : "example.org" + [path] : "/a%2fc" + + url_with_base_replaced : "http://[::7f00:1]/" [Type: url::Url] + [] [Type: url::Url] + [scheme] : "http" + [host] : "[::7f00:1]" + [path] : "/" + + url_with_comma : "data:text/html,test#test" [Type: url::Url] + [] [Type: url::Url] + [scheme] : "data" + [path] : "text/html,test" + [fragment] : "test" + "# +)] +fn test_url_visualizer() { + // Copied from https://github.com/web-platform-tests/wpt/blob/master/url/ + let base_url = Url::parse("http://example.org/foo/bar").unwrap(); + assert_eq!(base_url.as_str(), "http://example.org/foo/bar"); + + let url_with_non_special_scheme = Url::parse("non-special://:@test/x").unwrap(); + assert_eq!(url_with_non_special_scheme.as_str(), "non-special://test/x"); + + let url_with_user_pass_port_query_fragments = + Url::parse("http://user:pass@foo:21/bar;par?b#c").unwrap(); + assert_eq!( + url_with_user_pass_port_query_fragments.as_str(), + "http://user:pass@foo:21/bar;par?b#c" + ); + + let url_blob = Url::parse("blob:https://example.com:443/").unwrap(); + assert_eq!(url_blob.as_str(), "blob:https://example.com:443/"); + + let url_with_base = base_url.join("/a%2fc").unwrap(); + assert_eq!(url_with_base.as_str(), "http://example.org/a%2fc"); + + let url_with_base_replaced = base_url.join("http://[::127.0.0.1]").unwrap(); + assert_eq!(url_with_base_replaced.as_str(), "http://[::7f00:1]/"); + + let url_with_comma = base_url.join("data:text/html,test#test").unwrap(); + assert_eq!(url_with_comma.as_str(), "data:text/html,test#test"); + + __break(); +} diff --git a/tests/setters_tests.json b/tests/setters_tests.json new file mode 100644 index 0000000..4280032 --- /dev/null +++ b/tests/setters_tests.json @@ -0,0 +1,1876 @@ +{ + "comment": [ + "AS OF https://github.com/jsdom/whatwg-url/commit/35f04dfd3048cf6362f4398745bb13375c5020c2", + "## Tests for setters of https://url.spec.whatwg.org/#urlutils-members", + "", + "This file contains a JSON object.", + "Other than 'comment', each key is an attribute of the `URL` interface", + "defined in WHATWG’s URL Standard.", + "The values are arrays of test case objects for that attribute.", + "", + "To run a test case for the attribute `attr`:", + "", + "* Create a new `URL` object with the value for the 'href' key", + " the constructor single parameter. (Without a base URL.)", + " This must not throw.", + "* Set the attribute `attr` to (invoke its setter with)", + " with the value of for 'new_value' key.", + "* The value for the 'expected' key is another object.", + " For each `key` / `value` pair of that object,", + " get the attribute `key` (invoke its getter).", + " The returned string must be equal to `value`.", + "", + "Note: the 'href' setter is already covered by urltestdata.json." + ], + "protocol": [ + { + "comment": "The empty string is not a valid scheme. Setter leaves the URL unchanged.", + "href": "a://example.net", + "new_value": "", + "expected": { + "href": "a://example.net", + "protocol": "a:" + } + }, + { + "href": "a://example.net", + "new_value": "b", + "expected": { + "href": "b://example.net", + "protocol": "b:" + } + }, + { + "href": "javascript:alert(1)", + "new_value": "defuse", + "expected": { + "href": "defuse:alert(1)", + "protocol": "defuse:" + } + }, + { + "comment": "Upper-case ASCII is lower-cased", + "href": "a://example.net", + "new_value": "B", + "expected": { + "href": "b://example.net", + "protocol": "b:" + } + }, + { + "comment": "Non-ASCII is rejected", + "href": "a://example.net", + "new_value": "é", + "expected": { + "href": "a://example.net", + "protocol": "a:" + } + }, + { + "comment": "No leading digit", + "href": "a://example.net", + "new_value": "0b", + "expected": { + "href": "a://example.net", + "protocol": "a:" + } + }, + { + "comment": "No leading punctuation", + "href": "a://example.net", + "new_value": "+b", + "expected": { + "href": "a://example.net", + "protocol": "a:" + } + }, + { + "href": "a://example.net", + "new_value": "bC0+-.", + "expected": { + "href": "bc0+-.://example.net", + "protocol": "bc0+-.:" + } + }, + { + "comment": "Only some punctuation is acceptable", + "href": "a://example.net", + "new_value": "b,c", + "expected": { + "href": "a://example.net", + "protocol": "a:" + } + }, + { + "comment": "Non-ASCII is rejected", + "href": "a://example.net", + "new_value": "bé", + "expected": { + "href": "a://example.net", + "protocol": "a:" + } + }, + { + "comment": "Can’t switch from URL containing username/password/port to file", + "href": "http://test@example.net", + "new_value": "file", + "expected": { + "href": "http://test@example.net/", + "protocol": "http:" + } + }, + { + "href": "gopher://example.net:1234", + "new_value": "file", + "expected": { + "href": "gopher://example.net:1234", + "protocol": "gopher:" + } + }, + { + "href": "wss://x:x@example.net:1234", + "new_value": "file", + "expected": { + "href": "wss://x:x@example.net:1234/", + "protocol": "wss:" + } + }, + { + "comment": "Can’t switch from file URL with no host", + "href": "file://localhost/", + "new_value": "http", + "expected": { + "href": "file:///", + "protocol": "file:" + } + }, + { + "href": "file:///test", + "new_value": "gopher", + "expected": { + "href": "file:///test", + "protocol": "file:" + } + }, + { + "href": "file:", + "new_value": "wss", + "expected": { + "href": "file:///", + "protocol": "file:" + } + }, + { + "comment": "Can’t switch from special scheme to non-special", + "href": "http://example.net", + "new_value": "b", + "expected": { + "href": "http://example.net/", + "protocol": "http:" + } + }, + { + "href": "file://hi/path", + "new_value": "s", + "expected": { + "href": "file://hi/path", + "protocol": "file:" + } + }, + { + "href": "https://example.net", + "new_value": "s", + "expected": { + "href": "https://example.net/", + "protocol": "https:" + } + }, + { + "href": "ftp://example.net", + "new_value": "test", + "expected": { + "href": "ftp://example.net/", + "protocol": "ftp:" + } + }, + { + "comment": "Cannot-be-a-base URL doesn’t have a host, but URL in a special scheme must.", + "href": "mailto:me@example.net", + "new_value": "http", + "expected": { + "href": "mailto:me@example.net", + "protocol": "mailto:" + } + }, + { + "comment": "Can’t switch from non-special scheme to special", + "href": "ssh://me@example.net", + "new_value": "http", + "expected": { + "href": "ssh://me@example.net", + "protocol": "ssh:" + } + }, + { + "href": "ssh://me@example.net", + "new_value": "https", + "expected": { + "href": "ssh://me@example.net", + "protocol": "ssh:" + } + }, + { + "href": "ssh://me@example.net", + "new_value": "file", + "expected": { + "href": "ssh://me@example.net", + "protocol": "ssh:" + } + }, + { + "href": "ssh://example.net", + "new_value": "file", + "expected": { + "href": "ssh://example.net", + "protocol": "ssh:" + } + }, + { + "href": "nonsense:///test", + "new_value": "https", + "expected": { + "href": "nonsense:///test", + "protocol": "nonsense:" + } + }, + { + "comment": "Stuff after the first ':' is ignored", + "href": "http://example.net", + "new_value": "https:foo : bar", + "expected": { + "href": "https://example.net/", + "protocol": "https:" + } + }, + { + "comment": "Stuff after the first ':' is ignored", + "href": "data:text/html,

Test", + "new_value": "view-source+data:foo : bar", + "expected": { + "href": "view-source+data:text/html,

Test", + "protocol": "view-source+data:" + } + }, + { + "comment": "Port is set to null if it is the default for new scheme.", + "href": "http://foo.com:443/", + "new_value": "https", + "expected": { + "href": "https://foo.com/", + "protocol": "https:", + "port": "" + } + } + ], + "username": [ + { + "comment": "No host means no username", + "href": "file:///home/you/index.html", + "new_value": "me", + "expected": { + "href": "file:///home/you/index.html", + "username": "" + } + }, + { + "comment": "No host means no username", + "href": "unix:/run/foo.socket", + "new_value": "me", + "expected": { + "href": "unix:/run/foo.socket", + "username": "" + } + }, + { + "comment": "Cannot-be-a-base means no username", + "href": "mailto:you@example.net", + "new_value": "me", + "expected": { + "href": "mailto:you@example.net", + "username": "" + } + }, + { + "href": "javascript:alert(1)", + "new_value": "wario", + "expected": { + "href": "javascript:alert(1)", + "username": "" + } + }, + { + "href": "http://example.net", + "new_value": "me", + "expected": { + "href": "http://me@example.net/", + "username": "me" + } + }, + { + "href": "http://:secret@example.net", + "new_value": "me", + "expected": { + "href": "http://me:secret@example.net/", + "username": "me" + } + }, + { + "href": "http://me@example.net", + "new_value": "", + "expected": { + "href": "http://example.net/", + "username": "" + } + }, + { + "href": "http://me:secret@example.net", + "new_value": "", + "expected": { + "href": "http://:secret@example.net/", + "username": "" + } + }, + { + "comment": "UTF-8 percent encoding with the userinfo encode set.", + "href": "http://example.net", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "expected": { + "href": "http://%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9@example.net/", + "username": "%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9" + } + }, + { + "comment": "Bytes already percent-encoded are left as-is.", + "href": "http://example.net", + "new_value": "%c3%89té", + "expected": { + "href": "http://%c3%89t%C3%A9@example.net/", + "username": "%c3%89t%C3%A9" + } + }, + { + "href": "sc:///", + "new_value": "x", + "expected": { + "href": "sc:///", + "username": "" + } + }, + { + "href": "javascript://x/", + "new_value": "wario", + "expected": { + "href": "javascript://wario@x/", + "username": "wario" + } + }, + { + "href": "file://test/", + "new_value": "test", + "expected": { + "href": "file://test/", + "username": "" + } + } + ], + "password": [ + { + "comment": "No host means no password", + "href": "file:///home/me/index.html", + "new_value": "secret", + "expected": { + "href": "file:///home/me/index.html", + "password": "" + } + }, + { + "comment": "No host means no password", + "href": "unix:/run/foo.socket", + "new_value": "secret", + "expected": { + "href": "unix:/run/foo.socket", + "password": "" + } + }, + { + "comment": "Cannot-be-a-base means no password", + "href": "mailto:me@example.net", + "new_value": "secret", + "expected": { + "href": "mailto:me@example.net", + "password": "" + } + }, + { + "href": "http://example.net", + "new_value": "secret", + "expected": { + "href": "http://:secret@example.net/", + "password": "secret" + } + }, + { + "href": "http://me@example.net", + "new_value": "secret", + "expected": { + "href": "http://me:secret@example.net/", + "password": "secret" + } + }, + { + "href": "http://:secret@example.net", + "new_value": "", + "expected": { + "href": "http://example.net/", + "password": "" + } + }, + { + "href": "http://me:secret@example.net", + "new_value": "", + "expected": { + "href": "http://me@example.net/", + "password": "" + } + }, + { + "comment": "UTF-8 percent encoding with the userinfo encode set.", + "href": "http://example.net", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "expected": { + "href": "http://:%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9@example.net/", + "password": "%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9" + } + }, + { + "comment": "Bytes already percent-encoded are left as-is.", + "href": "http://example.net", + "new_value": "%c3%89té", + "expected": { + "href": "http://:%c3%89t%C3%A9@example.net/", + "password": "%c3%89t%C3%A9" + } + }, + { + "href": "sc:///", + "new_value": "x", + "expected": { + "href": "sc:///", + "password": "" + } + }, + { + "href": "javascript://x/", + "new_value": "bowser", + "expected": { + "href": "javascript://:bowser@x/", + "password": "bowser" + } + }, + { + "href": "file://test/", + "new_value": "test", + "expected": { + "href": "file://test/", + "password": "" + } + } + ], + "host": [ + { + "comment": "Non-special scheme", + "href": "sc://x/", + "new_value": "\u0000", + "expected": { + "href": "sc://x/", + "host": "x", + "hostname": "x" + } + }, + { + "href": "sc://x/", + "new_value": "\u0009", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "\u000A", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "\u000D", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": " ", + "expected": { + "href": "sc://x/", + "host": "x", + "hostname": "x" + } + }, + { + "href": "sc://x/", + "new_value": "#", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "/", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "?", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "@", + "expected": { + "href": "sc://x/", + "host": "x", + "hostname": "x" + } + }, + { + "href": "sc://x/", + "new_value": "ß", + "expected": { + "href": "sc://%C3%9F/", + "host": "%C3%9F", + "hostname": "%C3%9F" + } + }, + { + "comment": "IDNA Nontransitional_Processing", + "href": "https://x/", + "new_value": "ß", + "expected": { + "href": "https://xn--zca/", + "host": "xn--zca", + "hostname": "xn--zca" + } + }, + { + "comment": "Cannot-be-a-base means no host", + "href": "mailto:me@example.net", + "new_value": "example.com", + "expected": { + "href": "mailto:me@example.net", + "host": "" + } + }, + { + "comment": "Cannot-be-a-base means no host", + "href": "data:text/plain,Stuff", + "new_value": "example.net", + "expected": { + "href": "data:text/plain,Stuff", + "host": "" + } + }, + { + "href": "http://example.net", + "new_value": "example.com:8080", + "expected": { + "href": "http://example.com:8080/", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Port number is unchanged if not specified in the new value", + "href": "http://example.net:8080", + "new_value": "example.com", + "expected": { + "href": "http://example.com:8080/", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Port number is unchanged if not specified", + "href": "http://example.net:8080", + "new_value": "example.com:", + "expected": { + "href": "http://example.com:8080/", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "The empty host is not valid for special schemes", + "href": "http://example.net", + "new_value": "", + "expected": { + "href": "http://example.net/", + "host": "example.net" + } + }, + { + "comment": "The empty host is OK for non-special schemes", + "href": "view-source+http://example.net/foo", + "new_value": "", + "expected": { + "href": "view-source+http:///foo", + "host": "" + } + }, + { + "comment": "Path-only URLs can gain a host", + "href": "a:/foo", + "new_value": "example.net", + "expected": { + "href": "a://example.net/foo", + "host": "example.net" + } + }, + { + "comment": "IPv4 address syntax is normalized", + "href": "http://example.net", + "new_value": "0x7F000001:8080", + "expected": { + "href": "http://127.0.0.1:8080/", + "host": "127.0.0.1:8080", + "hostname": "127.0.0.1", + "port": "8080" + } + }, + { + "comment": "IPv6 address syntax is normalized", + "href": "http://example.net", + "new_value": "[::0:01]:2", + "expected": { + "href": "http://[::1]:2/", + "host": "[::1]:2", + "hostname": "[::1]", + "port": "2" + } + }, + { + "comment": "IPv6 literal address with port, crbug.com/1012416", + "href": "http://example.net", + "new_value": "[2001:db8::2]:4002", + "expected": { + "href": "http://[2001:db8::2]:4002/", + "host": "[2001:db8::2]:4002", + "hostname": "[2001:db8::2]", + "port": "4002" + } + }, + { + "comment": "Default port number is removed", + "href": "http://example.net", + "new_value": "example.com:80", + "expected": { + "href": "http://example.com/", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Default port number is removed", + "href": "https://example.net", + "new_value": "example.com:443", + "expected": { + "href": "https://example.com/", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Default port number is only removed for the relevant scheme", + "href": "https://example.net", + "new_value": "example.com:80", + "expected": { + "href": "https://example.com:80/", + "host": "example.com:80", + "hostname": "example.com", + "port": "80" + } + }, + { + "comment": "Port number is removed if new port is scheme default and existing URL has a non-default port", + "href": "http://example.net:8080", + "new_value": "example.com:80", + "expected": { + "href": "http://example.com/", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a / delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com/stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a / delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com:8080/stuff", + "expected": { + "href": "http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Stuff after a ? delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com?stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a ? delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com:8080?stuff", + "expected": { + "href": "http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Stuff after a # delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com#stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a # delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com:8080#stuff", + "expected": { + "href": "http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Stuff after a \\ delimiter is ignored for special schemes", + "href": "http://example.net/path", + "new_value": "example.com\\stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a \\ delimiter is ignored for special schemes", + "href": "http://example.net/path", + "new_value": "example.com:8080\\stuff", + "expected": { + "href": "http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "\\ is not a delimiter for non-special schemes, but still forbidden in hosts", + "href": "view-source+http://example.net/path", + "new_value": "example.com\\stuff", + "expected": { + "href": "view-source+http://example.net/path", + "host": "example.net", + "hostname": "example.net", + "port": "" + } + }, + { + "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", + "href": "view-source+http://example.net/path", + "new_value": "example.com:8080stuff2", + "expected": { + "href": "view-source+http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", + "href": "http://example.net/path", + "new_value": "example.com:8080stuff2", + "expected": { + "href": "http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", + "href": "http://example.net/path", + "new_value": "example.com:8080+2", + "expected": { + "href": "http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Port numbers are 16 bit integers", + "href": "http://example.net/path", + "new_value": "example.com:65535", + "expected": { + "href": "http://example.com:65535/path", + "host": "example.com:65535", + "hostname": "example.com", + "port": "65535" + } + }, + { + "comment": "Port numbers are 16 bit integers, overflowing is an error. Hostname is still set, though.", + "href": "http://example.net/path", + "new_value": "example.com:65536", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Broken IPv6", + "href": "http://example.net/", + "new_value": "[google.com]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.2.3.4x]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.2.3.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.2.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "file://y/", + "new_value": "x:123", + "expected": { + "href": "file://y/", + "host": "y", + "hostname": "y", + "port": "" + } + }, + { + "href": "file://y/", + "new_value": "loc%41lhost", + "expected": { + "href": "file:///", + "host": "", + "hostname": "", + "port": "" + } + }, + { + "href": "sc://test@test/", + "new_value": "", + "expected": { + "href": "sc://test@test/", + "host": "test", + "hostname": "test", + "username": "test" + } + }, + { + "href": "sc://test:12/", + "new_value": "", + "expected": { + "href": "sc://test:12/", + "host": "test:12", + "hostname": "test", + "port": "12" + } + } + ], + "hostname": [ + { + "comment": "Non-special scheme", + "href": "sc://x/", + "new_value": "\u0000", + "expected": { + "href": "sc://x/", + "host": "x", + "hostname": "x" + } + }, + { + "href": "sc://x/", + "new_value": "\u0009", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "\u000A", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "\u000D", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": " ", + "expected": { + "href": "sc://x/", + "host": "x", + "hostname": "x" + } + }, + { + "href": "sc://x/", + "new_value": "#", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "/", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "?", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "@", + "expected": { + "href": "sc://x/", + "host": "x", + "hostname": "x" + } + }, + { + "comment": "Cannot-be-a-base means no host", + "href": "mailto:me@example.net", + "new_value": "example.com", + "expected": { + "href": "mailto:me@example.net", + "host": "" + } + }, + { + "comment": "Cannot-be-a-base means no host", + "href": "data:text/plain,Stuff", + "new_value": "example.net", + "expected": { + "href": "data:text/plain,Stuff", + "host": "" + } + }, + { + "href": "http://example.net:8080", + "new_value": "example.com", + "expected": { + "href": "http://example.com:8080/", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "The empty host is not valid for special schemes", + "href": "http://example.net", + "new_value": "", + "expected": { + "href": "http://example.net/", + "host": "example.net" + } + }, + { + "comment": "The empty host is OK for non-special schemes", + "href": "view-source+http://example.net/foo", + "new_value": "", + "expected": { + "href": "view-source+http:///foo", + "host": "" + } + }, + { + "comment": "Path-only URLs can gain a host", + "href": "a:/foo", + "new_value": "example.net", + "expected": { + "href": "a://example.net/foo", + "host": "example.net" + } + }, + { + "comment": "IPv4 address syntax is normalized", + "href": "http://example.net:8080", + "new_value": "0x7F000001", + "expected": { + "href": "http://127.0.0.1:8080/", + "host": "127.0.0.1:8080", + "hostname": "127.0.0.1", + "port": "8080" + } + }, + { + "comment": "IPv6 address syntax is normalized", + "href": "http://example.net", + "new_value": "[::0:01]", + "expected": { + "href": "http://[::1]/", + "host": "[::1]", + "hostname": "[::1]", + "port": "" + } + }, + { + "comment": "Stuff after a : delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com:8080", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a : delimiter is ignored", + "href": "http://example.net:8080/path", + "new_value": "example.com:", + "expected": { + "href": "http://example.com:8080/path", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080" + } + }, + { + "comment": "Stuff after a / delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com/stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a ? delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com?stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a # delimiter is ignored", + "href": "http://example.net/path", + "new_value": "example.com#stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "Stuff after a \\ delimiter is ignored for special schemes", + "href": "http://example.net/path", + "new_value": "example.com\\stuff", + "expected": { + "href": "http://example.com/path", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, + { + "comment": "\\ is not a delimiter for non-special schemes, but still forbidden in hosts", + "href": "view-source+http://example.net/path", + "new_value": "example.com\\stuff", + "expected": { + "href": "view-source+http://example.net/path", + "host": "example.net", + "hostname": "example.net", + "port": "" + } + }, + { + "comment": "Broken IPv6", + "href": "http://example.net/", + "new_value": "[google.com]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.2.3.4x]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.2.3.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.2.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "file://y/", + "new_value": "x:123", + "expected": { + "href": "file://y/", + "host": "y", + "hostname": "y", + "port": "" + } + }, + { + "href": "file://y/", + "new_value": "loc%41lhost", + "expected": { + "href": "file:///", + "host": "", + "hostname": "", + "port": "" + } + }, + { + "href": "sc://test@test/", + "new_value": "", + "expected": { + "href": "sc://test@test/", + "host": "test", + "hostname": "test", + "username": "test" + } + }, + { + "href": "sc://test:12/", + "new_value": "", + "expected": { + "href": "sc://test:12/", + "host": "test:12", + "hostname": "test", + "port": "12" + } + } + ], + "port": [ + { + "href": "http://example.net", + "new_value": "8080", + "expected": { + "href": "http://example.net:8080/", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Port number is removed if empty is the new value", + "href": "http://example.net:8080", + "new_value": "", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net", + "port": "" + } + }, + { + "comment": "Default port number is removed", + "href": "http://example.net:8080", + "new_value": "80", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net", + "port": "" + } + }, + { + "comment": "Default port number is removed", + "href": "https://example.net:4433", + "new_value": "443", + "expected": { + "href": "https://example.net/", + "host": "example.net", + "hostname": "example.net", + "port": "" + } + }, + { + "comment": "Default port number is only removed for the relevant scheme", + "href": "https://example.net", + "new_value": "80", + "expected": { + "href": "https://example.net:80/", + "host": "example.net:80", + "hostname": "example.net", + "port": "80" + } + }, + { + "comment": "Stuff after a / delimiter is ignored", + "href": "http://example.net/path", + "new_value": "8080/stuff", + "expected": { + "href": "http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Stuff after a ? delimiter is ignored", + "href": "http://example.net/path", + "new_value": "8080?stuff", + "expected": { + "href": "http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Stuff after a # delimiter is ignored", + "href": "http://example.net/path", + "new_value": "8080#stuff", + "expected": { + "href": "http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Stuff after a \\ delimiter is ignored for special schemes", + "href": "http://example.net/path", + "new_value": "8080\\stuff", + "expected": { + "href": "http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", + "href": "view-source+http://example.net/path", + "new_value": "8080stuff2", + "expected": { + "href": "view-source+http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", + "href": "http://example.net/path", + "new_value": "8080stuff2", + "expected": { + "href": "http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", + "href": "http://example.net/path", + "new_value": "8080+2", + "expected": { + "href": "http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Port numbers are 16 bit integers", + "href": "http://example.net/path", + "new_value": "65535", + "expected": { + "href": "http://example.net:65535/path", + "host": "example.net:65535", + "hostname": "example.net", + "port": "65535" + } + }, + { + "comment": "Port numbers are 16 bit integers, overflowing is an error", + "href": "http://example.net:8080/path", + "new_value": "65536", + "expected": { + "href": "http://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "comment": "Port numbers are 16 bit integers, overflowing is an error", + "href": "non-special://example.net:8080/path", + "new_value": "65536", + "expected": { + "href": "non-special://example.net:8080/path", + "host": "example.net:8080", + "hostname": "example.net", + "port": "8080" + } + }, + { + "href": "file://test/", + "new_value": "12", + "expected": { + "href": "file://test/", + "port": "" + } + }, + { + "href": "file://localhost/", + "new_value": "12", + "expected": { + "href": "file:///", + "port": "" + } + }, + { + "href": "non-base:value", + "new_value": "12", + "expected": { + "href": "non-base:value", + "port": "" + } + }, + { + "href": "sc:///", + "new_value": "12", + "expected": { + "href": "sc:///", + "port": "" + } + }, + { + "href": "sc://x/", + "new_value": "12", + "expected": { + "href": "sc://x:12/", + "port": "12" + } + }, + { + "href": "javascript://x/", + "new_value": "12", + "expected": { + "href": "javascript://x:12/", + "port": "12" + } + } + ], + "pathname": [ + { + "comment": "Cannot-be-a-base don’t have a path", + "href": "mailto:me@example.net", + "new_value": "/foo", + "expected": { + "href": "mailto:me@example.net", + "pathname": "me@example.net" + } + }, + { + "href": "unix:/run/foo.socket?timeout=10", + "new_value": "/var/log/../run/bar.socket", + "expected": { + "href": "unix:/var/run/bar.socket?timeout=10", + "pathname": "/var/run/bar.socket" + } + }, + { + "href": "https://example.net#nav", + "new_value": "home", + "expected": { + "href": "https://example.net/home#nav", + "pathname": "/home" + } + }, + { + "href": "https://example.net#nav", + "new_value": "../home", + "expected": { + "href": "https://example.net/home#nav", + "pathname": "/home" + } + }, + { + "comment": "\\ is a segment delimiter for 'special' URLs", + "href": "http://example.net/home?lang=fr#nav", + "new_value": "\\a\\%2E\\b\\%2e.\\c", + "expected": { + "href": "http://example.net/a/c?lang=fr#nav", + "pathname": "/a/c" + } + }, + { + "comment": "\\ is *not* a segment delimiter for non-'special' URLs", + "href": "view-source+http://example.net/home?lang=fr#nav", + "new_value": "\\a\\%2E\\b\\%2e.\\c", + "expected": { + "href": "view-source+http://example.net/\\a\\%2E\\b\\%2e.\\c?lang=fr#nav", + "pathname": "/\\a\\%2E\\b\\%2e.\\c" + } + }, + { + "comment": "UTF-8 percent encoding with the default encode set. Tabs and newlines are removed.", + "href": "a:/", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "expected": { + "href": "a:/%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9", + "pathname": "/%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9" + } + }, + { + "comment": "Bytes already percent-encoded are left as-is, including %2E outside dotted segments.", + "href": "http://example.net", + "new_value": "%2e%2E%c3%89té", + "expected": { + "href": "http://example.net/%2e%2E%c3%89t%C3%A9", + "pathname": "/%2e%2E%c3%89t%C3%A9" + } + }, + { + "comment": "? needs to be encoded", + "href": "http://example.net", + "new_value": "?", + "expected": { + "href": "http://example.net/%3F", + "pathname": "/%3F" + } + }, + { + "comment": "# needs to be encoded", + "href": "http://example.net", + "new_value": "#", + "expected": { + "href": "http://example.net/%23", + "pathname": "/%23" + } + }, + { + "comment": "? needs to be encoded, non-special scheme", + "href": "sc://example.net", + "new_value": "?", + "expected": { + "href": "sc://example.net/%3F", + "pathname": "/%3F" + } + }, + { + "comment": "# needs to be encoded, non-special scheme", + "href": "sc://example.net", + "new_value": "#", + "expected": { + "href": "sc://example.net/%23", + "pathname": "/%23" + } + }, + { + "comment": "File URLs and (back)slashes", + "href": "file://monkey/", + "new_value": "\\\\", + "expected": { + "href": "file://monkey/", + "pathname": "/" + } + }, + { + "comment": "File URLs and (back)slashes", + "href": "file:///unicorn", + "new_value": "//\\/", + "expected": { + "href": "file:///", + "pathname": "/" + } + }, + { + "comment": "File URLs and (back)slashes", + "href": "file:///unicorn", + "new_value": "//monkey/..//", + "expected": { + "href": "file:///", + "pathname": "/" + } + } + ], + "search": [ + { + "href": "https://example.net#nav", + "new_value": "lang=fr", + "expected": { + "href": "https://example.net/?lang=fr#nav", + "search": "?lang=fr" + } + }, + { + "href": "https://example.net?lang=en-US#nav", + "new_value": "lang=fr", + "expected": { + "href": "https://example.net/?lang=fr#nav", + "search": "?lang=fr" + } + }, + { + "href": "https://example.net?lang=en-US#nav", + "new_value": "?lang=fr", + "expected": { + "href": "https://example.net/?lang=fr#nav", + "search": "?lang=fr" + } + }, + { + "href": "https://example.net?lang=en-US#nav", + "new_value": "??lang=fr", + "expected": { + "href": "https://example.net/??lang=fr#nav", + "search": "??lang=fr" + } + }, + { + "href": "https://example.net?lang=en-US#nav", + "new_value": "?", + "expected": { + "href": "https://example.net/?#nav", + "search": "" + } + }, + { + "href": "https://example.net?lang=en-US#nav", + "new_value": "", + "expected": { + "href": "https://example.net/#nav", + "search": "" + } + }, + { + "href": "https://example.net?lang=en-US", + "new_value": "", + "expected": { + "href": "https://example.net/", + "search": "" + } + }, + { + "href": "https://example.net", + "new_value": "", + "expected": { + "href": "https://example.net/", + "search": "" + } + }, + { + "comment": "UTF-8 percent encoding with the query encode set. Tabs and newlines are removed.", + "href": "a:/", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "expected": { + "href": "a:/?%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9", + "search": "?%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9" + } + }, + { + "comment": "Bytes already percent-encoded are left as-is", + "href": "http://example.net", + "new_value": "%c3%89té", + "expected": { + "href": "http://example.net/?%c3%89t%C3%A9", + "search": "?%c3%89t%C3%A9" + } + } + ], + "hash": [ + { + "href": "https://example.net", + "new_value": "main", + "expected": { + "href": "https://example.net/#main", + "hash": "#main" + } + }, + { + "href": "https://example.net#nav", + "new_value": "main", + "expected": { + "href": "https://example.net/#main", + "hash": "#main" + } + }, + { + "href": "https://example.net?lang=en-US", + "new_value": "##nav", + "expected": { + "href": "https://example.net/?lang=en-US##nav", + "hash": "##nav" + } + }, + { + "href": "https://example.net?lang=en-US#nav", + "new_value": "#main", + "expected": { + "href": "https://example.net/?lang=en-US#main", + "hash": "#main" + } + }, + { + "href": "https://example.net?lang=en-US#nav", + "new_value": "#", + "expected": { + "href": "https://example.net/?lang=en-US#", + "hash": "" + } + }, + { + "href": "https://example.net?lang=en-US#nav", + "new_value": "", + "expected": { + "href": "https://example.net/?lang=en-US", + "hash": "" + } + }, + { + "href": "http://example.net", + "new_value": "#foo bar", + "expected": { + "href": "http://example.net/#foo%20bar", + "hash": "#foo%20bar" + } + }, + { + "href": "http://example.net", + "new_value": "#foo\"bar", + "expected": { + "href": "http://example.net/#foo%22bar", + "hash": "#foo%22bar" + } + }, + { + "href": "http://example.net", + "new_value": "#foobar", + "expected": { + "href": "http://example.net/#foo%3Ebar", + "hash": "#foo%3Ebar" + } + }, + { + "href": "http://example.net", + "new_value": "#foo`bar", + "expected": { + "href": "http://example.net/#foo%60bar", + "hash": "#foo%60bar" + } + }, + { + "comment": "Simple percent-encoding; tabs and newlines are removed", + "href": "a:/", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "expected": { + "href": "a:/#%00%01%1F%20!%22#$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_%60az{|}~%7F%C2%80%C2%81%C3%89%C3%A9", + "hash": "#%00%01%1F%20!%22#$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_%60az{|}~%7F%C2%80%C2%81%C3%89%C3%A9" + } + }, + { + "comment": "Percent-encode NULLs in fragment", + "href": "http://example.net", + "new_value": "a\u0000b", + "expected": { + "href": "http://example.net/#a%00b", + "hash": "#a%00b" + } + }, + { + "comment": "Percent-encode NULLs in fragment", + "href": "non-spec:/", + "new_value": "a\u0000b", + "expected": { + "href": "non-spec:/#a%00b", + "hash": "#a%00b" + } + }, + { + "comment": "Bytes already percent-encoded are left as-is", + "href": "http://example.net", + "new_value": "%c3%89té", + "expected": { + "href": "http://example.net/#%c3%89t%C3%A9", + "hash": "#%c3%89t%C3%A9" + } + }, + { + "href": "javascript:alert(1)", + "new_value": "castle", + "expected": { + "href": "javascript:alert(1)#castle", + "hash": "#castle" + } + } + ] +} diff --git a/tests/unit.rs b/tests/unit.rs new file mode 100644 index 0000000..55ff59a --- /dev/null +++ b/tests/unit.rs @@ -0,0 +1,1164 @@ +// Copyright 2013-2014 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Unit tests + +use std::borrow::Cow; +use std::cell::{Cell, RefCell}; +use std::net::{Ipv4Addr, Ipv6Addr}; +use std::path::{Path, PathBuf}; +use url::{form_urlencoded, Host, Origin, Url}; + +#[test] +fn size() { + use std::mem::size_of; + assert_eq!(size_of::(), size_of::>()); +} + +#[test] +fn test_relative() { + let base: Url = "sc://%C3%B1".parse().unwrap(); + let url = base.join("/resources/testharness.js").unwrap(); + assert_eq!(url.as_str(), "sc://%C3%B1/resources/testharness.js"); +} + +#[test] +fn test_relative_empty() { + let base: Url = "sc://%C3%B1".parse().unwrap(); + let url = base.join("").unwrap(); + assert_eq!(url.as_str(), "sc://%C3%B1"); +} + +#[test] +fn test_set_empty_host() { + let mut base: Url = "moz://foo:bar@servo/baz".parse().unwrap(); + base.set_username("").unwrap(); + assert_eq!(base.as_str(), "moz://:bar@servo/baz"); + base.set_host(None).unwrap(); + assert_eq!(base.as_str(), "moz:/baz"); + base.set_host(Some("servo")).unwrap(); + assert_eq!(base.as_str(), "moz://servo/baz"); + + let mut base: Url = "file://server/share/foo/bar".parse().unwrap(); + base.set_host(None).unwrap(); + assert_eq!(base.as_str(), "file:///share/foo/bar"); + + let mut base: Url = "file://server/share/foo/bar".parse().unwrap(); + base.set_host(Some("foo")).unwrap(); + assert_eq!(base.as_str(), "file://foo/share/foo/bar"); +} + +#[test] +fn test_set_empty_hostname() { + use url::quirks; + let mut base: Url = "moz://foo@servo/baz".parse().unwrap(); + assert!( + quirks::set_hostname(&mut base, "").is_err(), + "setting an empty hostname to a url with a username should fail" + ); + base = "moz://:pass@servo/baz".parse().unwrap(); + assert!( + quirks::set_hostname(&mut base, "").is_err(), + "setting an empty hostname to a url with a password should fail" + ); + base = "moz://servo/baz".parse().unwrap(); + quirks::set_hostname(&mut base, "").unwrap(); + assert_eq!(base.as_str(), "moz:///baz"); +} + +macro_rules! assert_from_file_path { + ($path: expr) => { + assert_from_file_path!($path, $path) + }; + ($path: expr, $url_path: expr) => {{ + let url = Url::from_file_path(Path::new($path)).unwrap(); + assert_eq!(url.host(), None); + assert_eq!(url.path(), $url_path); + assert_eq!(url.to_file_path(), Ok(PathBuf::from($path))); + }}; +} + +#[test] +fn new_file_paths() { + if cfg!(unix) { + assert_eq!(Url::from_file_path(Path::new("relative")), Err(())); + assert_eq!(Url::from_file_path(Path::new("../relative")), Err(())); + } + if cfg!(windows) { + assert_eq!(Url::from_file_path(Path::new("relative")), Err(())); + assert_eq!(Url::from_file_path(Path::new(r"..\relative")), Err(())); + assert_eq!(Url::from_file_path(Path::new(r"\drive-relative")), Err(())); + assert_eq!(Url::from_file_path(Path::new(r"\\ucn\")), Err(())); + } + + if cfg!(unix) { + assert_from_file_path!("/foo/bar"); + assert_from_file_path!("/foo/ba\0r", "/foo/ba%00r"); + assert_from_file_path!("/foo/ba%00r", "/foo/ba%2500r"); + } +} + +#[test] +#[cfg(unix)] +fn new_path_bad_utf8() { + use std::ffi::OsStr; + use std::os::unix::prelude::*; + + let url = Url::from_file_path(Path::new(OsStr::from_bytes(b"/foo/ba\x80r"))).unwrap(); + let os_str = OsStr::from_bytes(b"/foo/ba\x80r"); + assert_eq!(url.to_file_path(), Ok(PathBuf::from(os_str))); +} + +#[test] +fn new_path_windows_fun() { + if cfg!(windows) { + assert_from_file_path!(r"C:\foo\bar", "/C:/foo/bar"); + assert_from_file_path!("C:\\foo\\ba\0r", "/C:/foo/ba%00r"); + + // Invalid UTF-8 + assert!(Url::parse("file:///C:/foo/ba%80r") + .unwrap() + .to_file_path() + .is_err()); + + // test windows canonicalized path + let path = PathBuf::from(r"\\?\C:\foo\bar"); + assert!(Url::from_file_path(path).is_ok()); + + // Percent-encoded drive letter + let url = Url::parse("file:///C%3A/foo/bar").unwrap(); + assert_eq!(url.to_file_path(), Ok(PathBuf::from(r"C:\foo\bar"))); + } +} + +#[test] +fn new_directory_paths() { + if cfg!(unix) { + assert_eq!(Url::from_directory_path(Path::new("relative")), Err(())); + assert_eq!(Url::from_directory_path(Path::new("../relative")), Err(())); + + let url = Url::from_directory_path(Path::new("/foo/bar")).unwrap(); + assert_eq!(url.host(), None); + assert_eq!(url.path(), "/foo/bar/"); + } + if cfg!(windows) { + assert_eq!(Url::from_directory_path(Path::new("relative")), Err(())); + assert_eq!(Url::from_directory_path(Path::new(r"..\relative")), Err(())); + assert_eq!( + Url::from_directory_path(Path::new(r"\drive-relative")), + Err(()) + ); + assert_eq!(Url::from_directory_path(Path::new(r"\\ucn\")), Err(())); + + let url = Url::from_directory_path(Path::new(r"C:\foo\bar")).unwrap(); + assert_eq!(url.host(), None); + assert_eq!(url.path(), "/C:/foo/bar/"); + } +} + +#[test] +fn path_backslash_fun() { + let mut special_url = "http://foobar.com".parse::().unwrap(); + special_url.path_segments_mut().unwrap().push("foo\\bar"); + assert_eq!(special_url.as_str(), "http://foobar.com/foo%5Cbar"); + + let mut nonspecial_url = "thing://foobar.com".parse::().unwrap(); + nonspecial_url.path_segments_mut().unwrap().push("foo\\bar"); + assert_eq!(nonspecial_url.as_str(), "thing://foobar.com/foo\\bar"); +} + +#[test] +fn from_str() { + assert!("http://testing.com/this".parse::().is_ok()); +} + +#[test] +fn parse_with_params() { + let url = Url::parse_with_params( + "http://testing.com/this?dont=clobberme", + &[("lang", "rust")], + ) + .unwrap(); + + assert_eq!( + url.as_str(), + "http://testing.com/this?dont=clobberme&lang=rust" + ); +} + +#[test] +fn issue_124() { + let url: Url = "file:a".parse().unwrap(); + assert_eq!(url.path(), "/a"); + let url: Url = "file:...".parse().unwrap(); + assert_eq!(url.path(), "/..."); + let url: Url = "file:..".parse().unwrap(); + assert_eq!(url.path(), "/"); +} + +#[test] +fn test_equality() { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + + fn check_eq(a: &Url, b: &Url) { + assert_eq!(a, b); + + let mut h1 = DefaultHasher::new(); + a.hash(&mut h1); + let mut h2 = DefaultHasher::new(); + b.hash(&mut h2); + assert_eq!(h1.finish(), h2.finish()); + } + + fn url(s: &str) -> Url { + let rv = s.parse().unwrap(); + check_eq(&rv, &rv); + rv + } + + // Doesn't care if default port is given. + let a: Url = url("https://example.com/"); + let b: Url = url("https://example.com:443/"); + check_eq(&a, &b); + + // Different ports + let a: Url = url("http://example.com/"); + let b: Url = url("http://example.com:8080/"); + assert!(a != b, "{:?} != {:?}", a, b); + + // Different scheme + let a: Url = url("http://example.com/"); + let b: Url = url("https://example.com/"); + assert_ne!(a, b); + + // Different host + let a: Url = url("http://foo.com/"); + let b: Url = url("http://bar.com/"); + assert_ne!(a, b); + + // Missing path, automatically substituted. Semantically the same. + let a: Url = url("http://foo.com"); + let b: Url = url("http://foo.com/"); + check_eq(&a, &b); +} + +#[test] +fn host() { + fn assert_host(input: &str, host: Host<&str>) { + assert_eq!(Url::parse(input).unwrap().host(), Some(host)); + } + assert_host("http://www.mozilla.org", Host::Domain("www.mozilla.org")); + assert_host( + "http://1.35.33.49", + Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49)), + ); + assert_host( + "http://[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", + Host::Ipv6(Ipv6Addr::new( + 0x2001, 0x0db8, 0x85a3, 0x08d3, 0x1319, 0x8a2e, 0x0370, 0x7344, + )), + ); + assert_host( + "http://[::]", + Host::Ipv6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 0)), + ); + assert_host( + "http://[::1]", + Host::Ipv6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1)), + ); + assert_host( + "http://0x1.0X23.0x21.061", + Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49)), + ); + assert_host("http://0x1232131", Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49))); + assert_host("http://111", Host::Ipv4(Ipv4Addr::new(0, 0, 0, 111))); + assert!(Url::parse("http://1.35.+33.49").is_err()); + assert!(Url::parse("http://2..2.3").is_err()); + assert!(Url::parse("http://42.0x1232131").is_err()); + assert!(Url::parse("http://192.168.0.257").is_err()); + + assert_eq!(Host::Domain("foo"), Host::Domain("foo").to_owned()); + assert_ne!(Host::Domain("foo"), Host::Domain("bar").to_owned()); +} + +#[test] +fn host_serialization() { + // libstd’s `Display for Ipv6Addr` serializes 0:0:0:0:0:0:_:_ and 0:0:0:0:0:ffff:_:_ + // using IPv4-like syntax, as suggested in https://tools.ietf.org/html/rfc5952#section-4 + // but https://url.spec.whatwg.org/#concept-ipv6-serializer specifies not to. + + // Not [::0.0.0.2] / [::ffff:0.0.0.2] + assert_eq!( + Url::parse("http://[0::2]").unwrap().host_str(), + Some("[::2]") + ); + assert_eq!( + Url::parse("http://[0::ffff:0:2]").unwrap().host_str(), + Some("[::ffff:0:2]") + ); +} + +#[test] +fn test_idna() { + assert!("http://goșu.ro".parse::().is_ok()); + assert_eq!( + Url::parse("http://☃.net/").unwrap().host(), + Some(Host::Domain("xn--n3h.net")) + ); + assert!("https://r2---sn-huoa-cvhl.googlevideo.com/crossdomain.xml" + .parse::() + .is_ok()); +} + +#[test] +fn test_serialization() { + let data = [ + ("http://example.com/", "http://example.com/"), + ("http://addslash.com", "http://addslash.com/"), + ("http://@emptyuser.com/", "http://emptyuser.com/"), + ("http://:@emptypass.com/", "http://emptypass.com/"), + ("http://user@user.com/", "http://user@user.com/"), + ( + "http://user:pass@userpass.com/", + "http://user:pass@userpass.com/", + ), + ( + "http://slashquery.com/path/?q=something", + "http://slashquery.com/path/?q=something", + ), + ( + "http://noslashquery.com/path?q=something", + "http://noslashquery.com/path?q=something", + ), + ]; + for &(input, result) in &data { + let url = Url::parse(input).unwrap(); + assert_eq!(url.as_str(), result); + } +} + +#[test] +fn test_form_urlencoded() { + let pairs: &[(Cow<'_, str>, Cow<'_, str>)] = &[ + ("foo".into(), "é&".into()), + ("bar".into(), "".into()), + ("foo".into(), "#".into()), + ]; + let encoded = form_urlencoded::Serializer::new(String::new()) + .extend_pairs(pairs) + .finish(); + assert_eq!(encoded, "foo=%C3%A9%26&bar=&foo=%23"); + assert_eq!( + form_urlencoded::parse(encoded.as_bytes()).collect::>(), + pairs.to_vec() + ); +} + +#[test] +fn test_form_serialize() { + let encoded = form_urlencoded::Serializer::new(String::new()) + .append_pair("foo", "é&") + .append_pair("bar", "") + .append_pair("foo", "#") + .append_key_only("json") + .finish(); + assert_eq!(encoded, "foo=%C3%A9%26&bar=&foo=%23&json"); +} + +#[test] +fn form_urlencoded_encoding_override() { + let encoded = form_urlencoded::Serializer::new(String::new()) + .encoding_override(Some(&|s| s.as_bytes().to_ascii_uppercase().into())) + .append_pair("foo", "bar") + .append_key_only("xml") + .finish(); + assert_eq!(encoded, "FOO=BAR&XML"); +} + +#[test] +/// https://github.com/servo/rust-url/issues/61 +fn issue_61() { + let mut url = Url::parse("http://mozilla.org").unwrap(); + url.set_scheme("https").unwrap(); + assert_eq!(url.port(), None); + assert_eq!(url.port_or_known_default(), Some(443)); + url.check_invariants().unwrap(); +} + +#[test] +#[cfg(not(windows))] +/// https://github.com/servo/rust-url/issues/197 +fn issue_197() { + let mut url = Url::from_file_path("/").expect("Failed to parse path"); + url.check_invariants().unwrap(); + assert_eq!( + url, + Url::parse("file:///").expect("Failed to parse path + protocol") + ); + url.path_segments_mut() + .expect("path_segments_mut") + .pop_if_empty(); +} + +#[test] +fn issue_241() { + Url::parse("mailto:").unwrap().cannot_be_a_base(); +} + +#[test] +/// https://github.com/servo/rust-url/issues/222 +fn append_trailing_slash() { + let mut url: Url = "http://localhost:6767/foo/bar?a=b".parse().unwrap(); + url.check_invariants().unwrap(); + url.path_segments_mut().unwrap().push(""); + url.check_invariants().unwrap(); + assert_eq!(url.to_string(), "http://localhost:6767/foo/bar/?a=b"); +} + +#[test] +/// https://github.com/servo/rust-url/issues/227 +fn extend_query_pairs_then_mutate() { + let mut url: Url = "http://localhost:6767/foo/bar".parse().unwrap(); + url.query_pairs_mut() + .extend_pairs(vec![("auth", "my-token")].into_iter()); + url.check_invariants().unwrap(); + assert_eq!( + url.to_string(), + "http://localhost:6767/foo/bar?auth=my-token" + ); + url.path_segments_mut().unwrap().push("some_other_path"); + url.check_invariants().unwrap(); + assert_eq!( + url.to_string(), + "http://localhost:6767/foo/bar/some_other_path?auth=my-token" + ); +} + +#[test] +/// https://github.com/servo/rust-url/issues/222 +fn append_empty_segment_then_mutate() { + let mut url: Url = "http://localhost:6767/foo/bar?a=b".parse().unwrap(); + url.check_invariants().unwrap(); + url.path_segments_mut().unwrap().push("").pop(); + url.check_invariants().unwrap(); + assert_eq!(url.to_string(), "http://localhost:6767/foo/bar?a=b"); +} + +#[test] +/// https://github.com/servo/rust-url/issues/243 +fn test_set_host() { + let mut url = Url::parse("https://example.net/hello").unwrap(); + url.set_host(Some("foo.com")).unwrap(); + assert_eq!(url.as_str(), "https://foo.com/hello"); + assert!(url.set_host(None).is_err()); + assert_eq!(url.as_str(), "https://foo.com/hello"); + assert!(url.set_host(Some("")).is_err()); + assert_eq!(url.as_str(), "https://foo.com/hello"); + + let mut url = Url::parse("foobar://example.net/hello").unwrap(); + url.set_host(None).unwrap(); + assert_eq!(url.as_str(), "foobar:/hello"); + + let mut url = Url::parse("foo://ș").unwrap(); + assert_eq!(url.as_str(), "foo://%C8%99"); + url.set_host(Some("goșu.ro")).unwrap(); + assert_eq!(url.as_str(), "foo://go%C8%99u.ro"); +} + +#[test] +// https://github.com/servo/rust-url/issues/166 +fn test_leading_dots() { + assert_eq!( + Host::parse(".org").unwrap(), + Host::Domain(".org".to_owned()) + ); + assert_eq!(Url::parse("file://./foo").unwrap().domain(), Some(".")); +} + +#[test] +/// https://github.com/servo/rust-url/issues/302 +fn test_origin_hash() { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + + fn hash(value: &T) -> u64 { + let mut hasher = DefaultHasher::new(); + value.hash(&mut hasher); + hasher.finish() + } + + let origin = &Url::parse("http://example.net/").unwrap().origin(); + + let origins_to_compare = [ + Url::parse("http://example.net:80/").unwrap().origin(), + Url::parse("http://example.net:81/").unwrap().origin(), + Url::parse("http://example.net").unwrap().origin(), + Url::parse("http://example.net/hello").unwrap().origin(), + Url::parse("https://example.net").unwrap().origin(), + Url::parse("ftp://example.net").unwrap().origin(), + Url::parse("file://example.net").unwrap().origin(), + Url::parse("http://user@example.net/").unwrap().origin(), + Url::parse("http://user:pass@example.net/") + .unwrap() + .origin(), + ]; + + for origin_to_compare in &origins_to_compare { + if origin == origin_to_compare { + assert_eq!(hash(origin), hash(origin_to_compare)); + } else { + assert_ne!(hash(origin), hash(origin_to_compare)); + } + } + + let opaque_origin = Url::parse("file://example.net").unwrap().origin(); + let same_opaque_origin = Url::parse("file://example.net").unwrap().origin(); + let other_opaque_origin = Url::parse("file://other").unwrap().origin(); + + assert_ne!(hash(&opaque_origin), hash(&same_opaque_origin)); + assert_ne!(hash(&opaque_origin), hash(&other_opaque_origin)); +} + +#[test] +fn test_origin_blob_equality() { + let origin = &Url::parse("http://example.net/").unwrap().origin(); + let blob_origin = &Url::parse("blob:http://example.net/").unwrap().origin(); + + assert_eq!(origin, blob_origin); +} + +#[test] +fn test_origin_opaque() { + assert!(!Origin::new_opaque().is_tuple()); + assert!(!&Url::parse("blob:malformed//").unwrap().origin().is_tuple()) +} + +#[test] +fn test_origin_unicode_serialization() { + let data = [ + ("http://😅.com", "http://😅.com"), + ("ftp://😅:🙂@🙂.com", "ftp://🙂.com"), + ("https://user@😅.com", "https://😅.com"), + ("http://😅.🙂:40", "http://😅.🙂:40"), + ]; + for &(unicode_url, expected_serialization) in &data { + let origin = Url::parse(unicode_url).unwrap().origin(); + assert_eq!(origin.unicode_serialization(), *expected_serialization); + } + + let ascii_origins = [ + Url::parse("http://example.net/").unwrap().origin(), + Url::parse("http://example.net:80/").unwrap().origin(), + Url::parse("http://example.net:81/").unwrap().origin(), + Url::parse("http://example.net").unwrap().origin(), + Url::parse("http://example.net/hello").unwrap().origin(), + Url::parse("https://example.net").unwrap().origin(), + Url::parse("ftp://example.net").unwrap().origin(), + Url::parse("file://example.net").unwrap().origin(), + Url::parse("http://user@example.net/").unwrap().origin(), + Url::parse("http://user:pass@example.net/") + .unwrap() + .origin(), + Url::parse("http://127.0.0.1").unwrap().origin(), + ]; + for ascii_origin in &ascii_origins { + assert_eq!( + ascii_origin.ascii_serialization(), + ascii_origin.unicode_serialization() + ); + } +} + +#[test] +fn test_socket_addrs() { + use std::net::ToSocketAddrs; + + let data = [ + ("https://127.0.0.1/", "127.0.0.1", 443), + ("https://127.0.0.1:9742/", "127.0.0.1", 9742), + ("custom-protocol://127.0.0.1:9742/", "127.0.0.1", 9742), + ("custom-protocol://127.0.0.1/", "127.0.0.1", 9743), + ("https://[::1]/", "::1", 443), + ("https://[::1]:9742/", "::1", 9742), + ("custom-protocol://[::1]:9742/", "::1", 9742), + ("custom-protocol://[::1]/", "::1", 9743), + ("https://localhost/", "localhost", 443), + ("https://localhost:9742/", "localhost", 9742), + ("custom-protocol://localhost:9742/", "localhost", 9742), + ("custom-protocol://localhost/", "localhost", 9743), + ]; + + for (url_string, host, port) in &data { + let url = url::Url::parse(url_string).unwrap(); + let addrs = url + .socket_addrs(|| match url.scheme() { + "custom-protocol" => Some(9743), + _ => None, + }) + .unwrap(); + assert_eq!( + Some(addrs[0]), + (*host, *port).to_socket_addrs().unwrap().next() + ); + } +} + +#[test] +fn test_no_base_url() { + let mut no_base_url = Url::parse("mailto:test@example.net").unwrap(); + + assert!(no_base_url.cannot_be_a_base()); + assert!(no_base_url.path_segments().is_none()); + assert!(no_base_url.path_segments_mut().is_err()); + assert!(no_base_url.set_host(Some("foo")).is_err()); + assert!(no_base_url + .set_ip_host("127.0.0.1".parse().unwrap()) + .is_err()); + + no_base_url.set_path("/foo"); + assert_eq!(no_base_url.path(), "%2Ffoo"); +} + +#[test] +fn test_domain() { + let url = Url::parse("https://127.0.0.1/").unwrap(); + assert_eq!(url.domain(), None); + + let url = Url::parse("mailto:test@example.net").unwrap(); + assert_eq!(url.domain(), None); + + let url = Url::parse("https://example.com/").unwrap(); + assert_eq!(url.domain(), Some("example.com")); +} + +#[test] +fn test_query() { + let url = Url::parse("https://example.com/products?page=2#fragment").unwrap(); + assert_eq!(url.query(), Some("page=2")); + assert_eq!( + url.query_pairs().next(), + Some((Cow::Borrowed("page"), Cow::Borrowed("2"))) + ); + + let url = Url::parse("https://example.com/products").unwrap(); + assert!(url.query().is_none()); + assert_eq!(url.query_pairs().count(), 0); + + let url = Url::parse("https://example.com/?country=español").unwrap(); + assert_eq!(url.query(), Some("country=espa%C3%B1ol")); + assert_eq!( + url.query_pairs().next(), + Some((Cow::Borrowed("country"), Cow::Borrowed("español"))) + ); + + let url = Url::parse("https://example.com/products?page=2&sort=desc").unwrap(); + assert_eq!(url.query(), Some("page=2&sort=desc")); + let mut pairs = url.query_pairs(); + assert_eq!(pairs.count(), 2); + assert_eq!( + pairs.next(), + Some((Cow::Borrowed("page"), Cow::Borrowed("2"))) + ); + assert_eq!( + pairs.next(), + Some((Cow::Borrowed("sort"), Cow::Borrowed("desc"))) + ); +} + +#[test] +fn test_fragment() { + let url = Url::parse("https://example.com/#fragment").unwrap(); + assert_eq!(url.fragment(), Some("fragment")); + + let url = Url::parse("https://example.com/").unwrap(); + assert_eq!(url.fragment(), None); +} + +#[test] +fn test_set_ip_host() { + let mut url = Url::parse("http://example.com").unwrap(); + + url.set_ip_host("127.0.0.1".parse().unwrap()).unwrap(); + assert_eq!(url.host_str(), Some("127.0.0.1")); + + url.set_ip_host("::1".parse().unwrap()).unwrap(); + assert_eq!(url.host_str(), Some("[::1]")); +} + +#[test] +fn test_set_href() { + use url::quirks::set_href; + + let mut url = Url::parse("https://existing.url").unwrap(); + + assert!(set_href(&mut url, "mal//formed").is_err()); + + assert!(set_href( + &mut url, + "https://user:pass@domain.com:9742/path/file.ext?key=val&key2=val2#fragment" + ) + .is_ok()); + assert_eq!( + url, + Url::parse("https://user:pass@domain.com:9742/path/file.ext?key=val&key2=val2#fragment") + .unwrap() + ); +} + +#[test] +fn test_domain_encoding_quirks() { + use url::quirks::{domain_to_ascii, domain_to_unicode}; + + let data = [ + ("http://example.com", "", ""), + ("😅.🙂", "xn--j28h.xn--938h", "😅.🙂"), + ("example.com", "example.com", "example.com"), + ("mailto:test@example.net", "", ""), + ]; + + for url in &data { + assert_eq!(domain_to_ascii(url.0), url.1); + assert_eq!(domain_to_unicode(url.0), url.2); + } +} + +#[cfg(feature = "expose_internals")] +#[test] +fn test_expose_internals() { + use url::quirks::internal_components; + use url::quirks::InternalComponents; + + let url = Url::parse("https://example.com/path/file.ext?key=val&key2=val2#fragment").unwrap(); + let InternalComponents { + scheme_end, + username_end, + host_start, + host_end, + port, + path_start, + query_start, + fragment_start, + } = internal_components(&url); + + assert_eq!(scheme_end, 5); + assert_eq!(username_end, 8); + assert_eq!(host_start, 8); + assert_eq!(host_end, 19); + assert_eq!(port, None); + assert_eq!(path_start, 19); + assert_eq!(query_start, Some(33)); + assert_eq!(fragment_start, Some(51)); +} + +#[test] +fn test_windows_unc_path() { + if !cfg!(windows) { + return; + } + + let url = Url::from_file_path(Path::new(r"\\host\share\path\file.txt")).unwrap(); + assert_eq!(url.as_str(), "file://host/share/path/file.txt"); + + let url = Url::from_file_path(Path::new(r"\\höst\share\path\file.txt")).unwrap(); + assert_eq!(url.as_str(), "file://xn--hst-sna/share/path/file.txt"); + + let url = Url::from_file_path(Path::new(r"\\192.168.0.1\share\path\file.txt")).unwrap(); + assert_eq!(url.host(), Some(Host::Ipv4(Ipv4Addr::new(192, 168, 0, 1)))); + + let path = url.to_file_path().unwrap(); + assert_eq!(path.to_str(), Some(r"\\192.168.0.1\share\path\file.txt")); + + // Another way to write these: + let url = Url::from_file_path(Path::new(r"\\?\UNC\host\share\path\file.txt")).unwrap(); + assert_eq!(url.as_str(), "file://host/share/path/file.txt"); + + // Paths starting with "\\.\" (Local Device Paths) are intentionally not supported. + let url = Url::from_file_path(Path::new(r"\\.\some\path\file.txt")); + assert!(url.is_err()); +} + +#[test] +fn test_syntax_violation_callback() { + use url::SyntaxViolation::*; + let violation = Cell::new(None); + let url = Url::options() + .syntax_violation_callback(Some(&|v| violation.set(Some(v)))) + .parse("http:////mozilla.org:42") + .unwrap(); + assert_eq!(url.port(), Some(42)); + + let v = violation.take().unwrap(); + assert_eq!(v, ExpectedDoubleSlash); + assert_eq!(v.description(), "expected //"); + assert_eq!(v.to_string(), "expected //"); +} + +#[test] +fn test_syntax_violation_callback_lifetimes() { + use url::SyntaxViolation::*; + let violation = Cell::new(None); + let vfn = |s| violation.set(Some(s)); + + let url = Url::options() + .syntax_violation_callback(Some(&vfn)) + .parse("http:////mozilla.org:42") + .unwrap(); + assert_eq!(url.port(), Some(42)); + assert_eq!(violation.take(), Some(ExpectedDoubleSlash)); + + let url = Url::options() + .syntax_violation_callback(Some(&vfn)) + .parse("http://mozilla.org\\path") + .unwrap(); + assert_eq!(url.path(), "/path"); + assert_eq!(violation.take(), Some(Backslash)); +} + +#[test] +fn test_syntax_violation_callback_types() { + use url::SyntaxViolation::*; + + let data = [ + ("http://mozilla.org/\\foo", Backslash, "backslash"), + (" http://mozilla.org", C0SpaceIgnored, "leading or trailing control or space character are ignored in URLs"), + ("http://user:pass@mozilla.org", EmbeddedCredentials, "embedding authentication information (username or password) in an URL is not recommended"), + ("http:///mozilla.org", ExpectedDoubleSlash, "expected //"), + ("file:/foo.txt", ExpectedFileDoubleSlash, "expected // after file:"), + ("file://mozilla.org/c:/file.txt", FileWithHostAndWindowsDrive, "file: with host and Windows drive letter"), + ("http://mozilla.org/^", NonUrlCodePoint, "non-URL code point"), + ("http://mozilla.org/#\x000", NullInFragment, "NULL characters are ignored in URL fragment identifiers"), + ("http://mozilla.org/%1", PercentDecode, "expected 2 hex digits after %"), + ("http://mozilla.org\t/foo", TabOrNewlineIgnored, "tabs or newlines are ignored in URLs"), + ("http://user@:pass@mozilla.org", UnencodedAtSign, "unencoded @ sign in username or password") + ]; + + for test_case in &data { + let violation = Cell::new(None); + Url::options() + .syntax_violation_callback(Some(&|v| violation.set(Some(v)))) + .parse(test_case.0) + .unwrap(); + + let v = violation.take(); + assert_eq!(v, Some(test_case.1)); + assert_eq!(v.unwrap().description(), test_case.2); + assert_eq!(v.unwrap().to_string(), test_case.2); + } +} + +#[test] +fn test_options_reuse() { + use url::SyntaxViolation::*; + let violations = RefCell::new(Vec::new()); + let vfn = |v| violations.borrow_mut().push(v); + + let options = Url::options().syntax_violation_callback(Some(&vfn)); + let url = options.parse("http:////mozilla.org").unwrap(); + + let options = options.base_url(Some(&url)); + let url = options.parse("/sub\\path").unwrap(); + assert_eq!(url.as_str(), "http://mozilla.org/sub/path"); + assert_eq!(*violations.borrow(), vec!(ExpectedDoubleSlash, Backslash)); +} + +/// https://github.com/servo/rust-url/issues/505 +#[cfg(windows)] +#[test] +fn test_url_from_file_path() { + use std::path::PathBuf; + use url::Url; + + let p = PathBuf::from("c:///"); + let u = Url::from_file_path(p).unwrap(); + let path = u.to_file_path().unwrap(); + assert_eq!("C:\\", path.to_str().unwrap()); +} + +/// https://github.com/servo/rust-url/issues/505 +#[cfg(not(windows))] +#[test] +fn test_url_from_file_path() { + use std::path::PathBuf; + use url::Url; + + let p = PathBuf::from("/c:/"); + let u = Url::from_file_path(p).unwrap(); + let path = u.to_file_path().unwrap(); + assert_eq!("/c:/", path.to_str().unwrap()); +} + +#[test] +fn test_non_special_path() { + let mut db_url = url::Url::parse("postgres://postgres@localhost/").unwrap(); + assert_eq!(db_url.as_str(), "postgres://postgres@localhost/"); + db_url.set_path("diesel_foo"); + assert_eq!(db_url.as_str(), "postgres://postgres@localhost/diesel_foo"); + assert_eq!(db_url.path(), "/diesel_foo"); +} + +#[test] +fn test_non_special_path2() { + let mut db_url = url::Url::parse("postgres://postgres@localhost/").unwrap(); + assert_eq!(db_url.as_str(), "postgres://postgres@localhost/"); + db_url.set_path(""); + assert_eq!(db_url.path(), ""); + assert_eq!(db_url.as_str(), "postgres://postgres@localhost"); + db_url.set_path("foo"); + assert_eq!(db_url.path(), "/foo"); + assert_eq!(db_url.as_str(), "postgres://postgres@localhost/foo"); + db_url.set_path("/bar"); + assert_eq!(db_url.path(), "/bar"); + assert_eq!(db_url.as_str(), "postgres://postgres@localhost/bar"); +} + +#[test] +fn test_non_special_path3() { + let mut db_url = url::Url::parse("postgres://postgres@localhost/").unwrap(); + assert_eq!(db_url.as_str(), "postgres://postgres@localhost/"); + db_url.set_path("/"); + assert_eq!(db_url.as_str(), "postgres://postgres@localhost/"); + assert_eq!(db_url.path(), "/"); + db_url.set_path("/foo"); + assert_eq!(db_url.as_str(), "postgres://postgres@localhost/foo"); + assert_eq!(db_url.path(), "/foo"); +} + +#[test] +fn test_set_scheme_to_file_with_host() { + let mut url: Url = "http://localhost:6767/foo/bar".parse().unwrap(); + let result = url.set_scheme("file"); + assert_eq!(url.to_string(), "http://localhost:6767/foo/bar"); + assert_eq!(result, Err(())); +} + +#[test] +fn no_panic() { + let mut url = Url::parse("arhttpsps:/.//eom/dae.com/\\\\t\\:").unwrap(); + url::quirks::set_hostname(&mut url, "//eom/datcom/\\\\t\\://eom/data.cs").unwrap(); +} + +#[test] +fn pop_if_empty_in_bounds() { + let mut url = Url::parse("m://").unwrap(); + let mut segments = url.path_segments_mut().unwrap(); + segments.pop_if_empty(); + segments.pop(); +} + +#[test] +fn test_slicing() { + use url::Position::*; + + #[derive(Default)] + struct ExpectedSlices<'a> { + full: &'a str, + scheme: &'a str, + username: &'a str, + password: &'a str, + host: &'a str, + port: &'a str, + path: &'a str, + query: &'a str, + fragment: &'a str, + } + + let data = [ + ExpectedSlices { + full: "https://user:pass@domain.com:9742/path/file.ext?key=val&key2=val2#fragment", + scheme: "https", + username: "user", + password: "pass", + host: "domain.com", + port: "9742", + path: "/path/file.ext", + query: "key=val&key2=val2", + fragment: "fragment", + }, + ExpectedSlices { + full: "https://domain.com:9742/path/file.ext#fragment", + scheme: "https", + host: "domain.com", + port: "9742", + path: "/path/file.ext", + fragment: "fragment", + ..Default::default() + }, + ExpectedSlices { + full: "https://domain.com:9742/path/file.ext", + scheme: "https", + host: "domain.com", + port: "9742", + path: "/path/file.ext", + ..Default::default() + }, + ExpectedSlices { + full: "blob:blob-info", + scheme: "blob", + path: "blob-info", + ..Default::default() + }, + ]; + + for expected_slices in &data { + let url = Url::parse(expected_slices.full).unwrap(); + assert_eq!(&url[..], expected_slices.full); + assert_eq!(&url[BeforeScheme..AfterScheme], expected_slices.scheme); + assert_eq!( + &url[BeforeUsername..AfterUsername], + expected_slices.username + ); + assert_eq!( + &url[BeforePassword..AfterPassword], + expected_slices.password + ); + assert_eq!(&url[BeforeHost..AfterHost], expected_slices.host); + assert_eq!(&url[BeforePort..AfterPort], expected_slices.port); + assert_eq!(&url[BeforePath..AfterPath], expected_slices.path); + assert_eq!(&url[BeforeQuery..AfterQuery], expected_slices.query); + assert_eq!( + &url[BeforeFragment..AfterFragment], + expected_slices.fragment + ); + assert_eq!(&url[..AfterFragment], expected_slices.full); + } +} + +#[test] +fn test_make_relative() { + let tests = [ + ( + "http://127.0.0.1:8080/test", + "http://127.0.0.1:8080/test", + "", + ), + ( + "http://127.0.0.1:8080/test", + "http://127.0.0.1:8080/test/", + "test/", + ), + ( + "http://127.0.0.1:8080/test/", + "http://127.0.0.1:8080/test", + "../test", + ), + ( + "http://127.0.0.1:8080/", + "http://127.0.0.1:8080/?foo=bar#123", + "?foo=bar#123", + ), + ( + "http://127.0.0.1:8080/", + "http://127.0.0.1:8080/test/video", + "test/video", + ), + ( + "http://127.0.0.1:8080/test", + "http://127.0.0.1:8080/test/video", + "test/video", + ), + ( + "http://127.0.0.1:8080/test/", + "http://127.0.0.1:8080/test/video", + "video", + ), + ( + "http://127.0.0.1:8080/test", + "http://127.0.0.1:8080/test2/video", + "test2/video", + ), + ( + "http://127.0.0.1:8080/test/", + "http://127.0.0.1:8080/test2/video", + "../test2/video", + ), + ( + "http://127.0.0.1:8080/test/bla", + "http://127.0.0.1:8080/test2/video", + "../test2/video", + ), + ( + "http://127.0.0.1:8080/test/bla/", + "http://127.0.0.1:8080/test2/video", + "../../test2/video", + ), + ( + "http://127.0.0.1:8080/test/?foo=bar#123", + "http://127.0.0.1:8080/test/video", + "video", + ), + ( + "http://127.0.0.1:8080/test/", + "http://127.0.0.1:8080/test/video?baz=meh#456", + "video?baz=meh#456", + ), + ( + "http://127.0.0.1:8080/test", + "http://127.0.0.1:8080/test?baz=meh#456", + "?baz=meh#456", + ), + ( + "http://127.0.0.1:8080/test/", + "http://127.0.0.1:8080/test?baz=meh#456", + "../test?baz=meh#456", + ), + ( + "http://127.0.0.1:8080/test/", + "http://127.0.0.1:8080/test/?baz=meh#456", + "?baz=meh#456", + ), + ( + "http://127.0.0.1:8080/test/?foo=bar#123", + "http://127.0.0.1:8080/test/video?baz=meh#456", + "video?baz=meh#456", + ), + ( + "http://127.0.0.1:8080/file.txt", + "http://127.0.0.1:8080/test/file.txt", + "test/file.txt", + ), + ( + "http://127.0.0.1:8080/not_equal.txt", + "http://127.0.0.1:8080/test/file.txt", + "test/file.txt", + ), + ]; + + for (base, uri, relative) in &tests { + let base_uri = url::Url::parse(base).unwrap(); + let relative_uri = url::Url::parse(uri).unwrap(); + let make_relative = base_uri.make_relative(&relative_uri).unwrap(); + assert_eq!( + make_relative, *relative, + "base: {}, uri: {}, relative: {}", + base, uri, relative + ); + assert_eq!( + base_uri.join(relative).unwrap().as_str(), + *uri, + "base: {}, uri: {}, relative: {}", + base, + uri, + relative + ); + } + + let error_tests = [ + ("http://127.0.0.1:8080/", "https://127.0.0.1:8080/test/"), + ("http://127.0.0.1:8080/", "http://127.0.0.1:8081/test/"), + ("http://127.0.0.1:8080/", "http://127.0.0.2:8080/test/"), + ("mailto:a@example.com", "mailto:b@example.com"), + ]; + + for (base, uri) in &error_tests { + let base_uri = url::Url::parse(base).unwrap(); + let relative_uri = url::Url::parse(uri).unwrap(); + let make_relative = base_uri.make_relative(&relative_uri); + assert_eq!(make_relative, None, "base: {}, uri: {}", base, uri); + } +} diff --git a/tests/urltestdata.json b/tests/urltestdata.json new file mode 100644 index 0000000..4265f59 --- /dev/null +++ b/tests/urltestdata.json @@ -0,0 +1,7908 @@ +[ + "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/script-tests/segments.js", + "# AS OF https://github.com/web-platform-tests/wpt/commit/2a64dae4641fbd61bd4257df460e188f425b492e", + { + "input": "http://example\t.\norg", + "base": "http://example.org/foo/bar", + "href": "http://example.org/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://user:pass@foo:21/bar;par?b#c", + "base": "http://example.org/foo/bar", + "href": "http://user:pass@foo:21/bar;par?b#c", + "origin": "http://foo:21", + "protocol": "http:", + "username": "user", + "password": "pass", + "host": "foo:21", + "hostname": "foo", + "port": "21", + "pathname": "/bar;par", + "search": "?b", + "hash": "#c" + }, + { + "input": "https://test:@test", + "base": "about:blank", + "href": "https://test@test/", + "origin": "https://test", + "protocol": "https:", + "username": "test", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https://:@test", + "base": "about:blank", + "href": "https://test/", + "origin": "https://test", + "protocol": "https:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "non-special://test:@test/x", + "base": "about:blank", + "href": "non-special://test@test/x", + "origin": "null", + "protocol": "non-special:", + "username": "test", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/x", + "search": "", + "hash": "" + }, + { + "input": "non-special://:@test/x", + "base": "about:blank", + "href": "non-special://test/x", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/x", + "search": "", + "hash": "" + }, + { + "input": "http:foo.com", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/foo.com", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/foo.com", + "search": "", + "hash": "" + }, + { + "input": "\t :foo.com \n", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:foo.com", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:foo.com", + "search": "", + "hash": "" + }, + { + "input": " foo.com ", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/foo.com", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/foo.com", + "search": "", + "hash": "" + }, + { + "input": "a:\t foo.com", + "base": "http://example.org/foo/bar", + "href": "a: foo.com", + "origin": "null", + "protocol": "a:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": " foo.com", + "search": "", + "hash": "" + }, + { + "input": "http://f:21/ b ? d # e ", + "base": "http://example.org/foo/bar", + "href": "http://f:21/%20b%20?%20d%20#%20e", + "origin": "http://f:21", + "protocol": "http:", + "username": "", + "password": "", + "host": "f:21", + "hostname": "f", + "port": "21", + "pathname": "/%20b%20", + "search": "?%20d%20", + "hash": "#%20e" + }, + { + "input": "lolscheme:x x#x x", + "base": "about:blank", + "href": "lolscheme:x x#x%20x", + "protocol": "lolscheme:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "x x", + "search": "", + "hash": "#x%20x" + }, + { + "input": "http://f:/c", + "base": "http://example.org/foo/bar", + "href": "http://f/c", + "origin": "http://f", + "protocol": "http:", + "username": "", + "password": "", + "host": "f", + "hostname": "f", + "port": "", + "pathname": "/c", + "search": "", + "hash": "" + }, + { + "input": "http://f:0/c", + "base": "http://example.org/foo/bar", + "href": "http://f:0/c", + "origin": "http://f:0", + "protocol": "http:", + "username": "", + "password": "", + "host": "f:0", + "hostname": "f", + "port": "0", + "pathname": "/c", + "search": "", + "hash": "" + }, + { + "input": "http://f:00000000000000/c", + "base": "http://example.org/foo/bar", + "href": "http://f:0/c", + "origin": "http://f:0", + "protocol": "http:", + "username": "", + "password": "", + "host": "f:0", + "hostname": "f", + "port": "0", + "pathname": "/c", + "search": "", + "hash": "" + }, + { + "input": "http://f:00000000000000000000080/c", + "base": "http://example.org/foo/bar", + "href": "http://f/c", + "origin": "http://f", + "protocol": "http:", + "username": "", + "password": "", + "host": "f", + "hostname": "f", + "port": "", + "pathname": "/c", + "search": "", + "hash": "" + }, + { + "input": "http://f:b/c", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://f: /c", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://f:\n/c", + "base": "http://example.org/foo/bar", + "href": "http://f/c", + "origin": "http://f", + "protocol": "http:", + "username": "", + "password": "", + "host": "f", + "hostname": "f", + "port": "", + "pathname": "/c", + "search": "", + "hash": "" + }, + { + "input": "http://f:fifty-two/c", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://f:999999/c", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "non-special://f:999999/c", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://f: 21 / b ? d # e ", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "" + }, + { + "input": " \t", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "" + }, + { + "input": ":foo.com/", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:foo.com/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:foo.com/", + "search": "", + "hash": "" + }, + { + "input": ":foo.com\\", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:foo.com/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:foo.com/", + "search": "", + "hash": "" + }, + { + "input": ":", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:", + "search": "", + "hash": "" + }, + { + "input": ":a", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:a", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:a", + "search": "", + "hash": "" + }, + { + "input": ":/", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:/", + "search": "", + "hash": "" + }, + { + "input": ":\\", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:/", + "search": "", + "hash": "" + }, + { + "input": ":#", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:#", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:", + "search": "", + "hash": "" + }, + { + "input": "#", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar#", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "#/", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar#/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "#/" + }, + { + "input": "#\\", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar#\\", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "#\\" + }, + { + "input": "#;?", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar#;?", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "#;?" + }, + { + "input": "?", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar?", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "/", + "base": "http://example.org/foo/bar", + "href": "http://example.org/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": ":23", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:23", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:23", + "search": "", + "hash": "" + }, + { + "input": "/:23", + "base": "http://example.org/foo/bar", + "href": "http://example.org/:23", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/:23", + "search": "", + "hash": "" + }, + { + "input": "\\x", + "base": "http://example.org/foo/bar", + "href": "http://example.org/x", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/x", + "search": "", + "hash": "" + }, + { + "input": "\\\\x\\hello", + "base": "http://example.org/foo/bar", + "href": "http://x/hello", + "origin": "http://x", + "protocol": "http:", + "username": "", + "password": "", + "host": "x", + "hostname": "x", + "port": "", + "pathname": "/hello", + "search": "", + "hash": "" + }, + { + "input": "::", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/::", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/::", + "search": "", + "hash": "" + }, + { + "input": "::23", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/::23", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/::23", + "search": "", + "hash": "" + }, + { + "input": "foo://", + "base": "http://example.org/foo/bar", + "href": "foo://", + "origin": "null", + "protocol": "foo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, + { + "input": "http://a:b@c:29/d", + "base": "http://example.org/foo/bar", + "href": "http://a:b@c:29/d", + "origin": "http://c:29", + "protocol": "http:", + "username": "a", + "password": "b", + "host": "c:29", + "hostname": "c", + "port": "29", + "pathname": "/d", + "search": "", + "hash": "" + }, + { + "input": "http::@c:29", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:@c:29", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:@c:29", + "search": "", + "hash": "" + }, + { + "input": "http://&a:foo(b]c@d:2/", + "base": "http://example.org/foo/bar", + "href": "http://&a:foo(b%5Dc@d:2/", + "origin": "http://d:2", + "protocol": "http:", + "username": "&a", + "password": "foo(b%5Dc", + "host": "d:2", + "hostname": "d", + "port": "2", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://::@c@d:2", + "base": "http://example.org/foo/bar", + "href": "http://:%3A%40c@d:2/", + "origin": "http://d:2", + "protocol": "http:", + "username": "", + "password": "%3A%40c", + "host": "d:2", + "hostname": "d", + "port": "2", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://foo.com:b@d/", + "base": "http://example.org/foo/bar", + "href": "http://foo.com:b@d/", + "origin": "http://d", + "protocol": "http:", + "username": "foo.com", + "password": "b", + "host": "d", + "hostname": "d", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://foo.com/\\@", + "base": "http://example.org/foo/bar", + "href": "http://foo.com//@", + "origin": "http://foo.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.com", + "hostname": "foo.com", + "port": "", + "pathname": "//@", + "search": "", + "hash": "" + }, + { + "input": "http:\\\\foo.com\\", + "base": "http://example.org/foo/bar", + "href": "http://foo.com/", + "origin": "http://foo.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.com", + "hostname": "foo.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:\\\\a\\b:c\\d@foo.com\\", + "base": "http://example.org/foo/bar", + "href": "http://a/b:c/d@foo.com/", + "origin": "http://a", + "protocol": "http:", + "username": "", + "password": "", + "host": "a", + "hostname": "a", + "port": "", + "pathname": "/b:c/d@foo.com/", + "search": "", + "hash": "" + }, + { + "input": "foo:/", + "base": "http://example.org/foo/bar", + "href": "foo:/", + "origin": "null", + "protocol": "foo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "foo:/bar.com/", + "base": "http://example.org/foo/bar", + "href": "foo:/bar.com/", + "origin": "null", + "protocol": "foo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/bar.com/", + "search": "", + "hash": "" + }, + { + "input": "foo://///////", + "base": "http://example.org/foo/bar", + "href": "foo://///////", + "origin": "null", + "protocol": "foo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "///////", + "search": "", + "hash": "" + }, + { + "input": "foo://///////bar.com/", + "base": "http://example.org/foo/bar", + "href": "foo://///////bar.com/", + "origin": "null", + "protocol": "foo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "///////bar.com/", + "search": "", + "hash": "" + }, + { + "input": "foo:////://///", + "base": "http://example.org/foo/bar", + "href": "foo:////://///", + "origin": "null", + "protocol": "foo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//://///", + "search": "", + "hash": "" + }, + { + "input": "c:/foo", + "base": "http://example.org/foo/bar", + "href": "c:/foo", + "origin": "null", + "protocol": "c:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/foo", + "search": "", + "hash": "" + }, + { + "input": "//foo/bar", + "base": "http://example.org/foo/bar", + "href": "http://foo/bar", + "origin": "http://foo", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/bar", + "search": "", + "hash": "" + }, + { + "input": "http://foo/path;a??e#f#g", + "base": "http://example.org/foo/bar", + "href": "http://foo/path;a??e#f#g", + "origin": "http://foo", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/path;a", + "search": "??e", + "hash": "#f#g" + }, + { + "input": "http://foo/abcd?efgh?ijkl", + "base": "http://example.org/foo/bar", + "href": "http://foo/abcd?efgh?ijkl", + "origin": "http://foo", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/abcd", + "search": "?efgh?ijkl", + "hash": "" + }, + { + "input": "http://foo/abcd#foo?bar", + "base": "http://example.org/foo/bar", + "href": "http://foo/abcd#foo?bar", + "origin": "http://foo", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/abcd", + "search": "", + "hash": "#foo?bar" + }, + { + "input": "[61:24:74]:98", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/[61:24:74]:98", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/[61:24:74]:98", + "search": "", + "hash": "" + }, + { + "input": "http:[61:27]/:foo", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/[61:27]/:foo", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/[61:27]/:foo", + "search": "", + "hash": "" + }, + { + "input": "http://[1::2]:3:4", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://2001::1", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://2001::1]", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://2001::1]:80", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://[2001::1]", + "base": "http://example.org/foo/bar", + "href": "http://[2001::1]/", + "origin": "http://[2001::1]", + "protocol": "http:", + "username": "", + "password": "", + "host": "[2001::1]", + "hostname": "[2001::1]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://[::127.0.0.1]", + "base": "http://example.org/foo/bar", + "href": "http://[::7f00:1]/", + "origin": "http://[::7f00:1]", + "protocol": "http:", + "username": "", + "password": "", + "host": "[::7f00:1]", + "hostname": "[::7f00:1]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://[0:0:0:0:0:0:13.1.68.3]", + "base": "http://example.org/foo/bar", + "href": "http://[::d01:4403]/", + "origin": "http://[::d01:4403]", + "protocol": "http:", + "username": "", + "password": "", + "host": "[::d01:4403]", + "hostname": "[::d01:4403]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://[2001::1]:80", + "base": "http://example.org/foo/bar", + "href": "http://[2001::1]/", + "origin": "http://[2001::1]", + "protocol": "http:", + "username": "", + "password": "", + "host": "[2001::1]", + "hostname": "[2001::1]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:/example.com/", + "base": "http://example.org/foo/bar", + "href": "http://example.org/example.com/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "ftp:/example.com/", + "base": "http://example.org/foo/bar", + "href": "ftp://example.com/", + "origin": "ftp://example.com", + "protocol": "ftp:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https:/example.com/", + "base": "http://example.org/foo/bar", + "href": "https://example.com/", + "origin": "https://example.com", + "protocol": "https:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "madeupscheme:/example.com/", + "base": "http://example.org/foo/bar", + "href": "madeupscheme:/example.com/", + "origin": "null", + "protocol": "madeupscheme:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "file:/example.com/", + "base": "http://example.org/foo/bar", + "href": "file:///example.com/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "file://example:1/", + "base": "about:blank", + "failure": true + }, + { + "input": "file://example:test/", + "base": "about:blank", + "failure": true + }, + { + "input": "file://example%/", + "base": "about:blank", + "failure": true + }, + { + "input": "file://[example]/", + "base": "about:blank", + "failure": true + }, + { + "input": "ftps:/example.com/", + "base": "http://example.org/foo/bar", + "href": "ftps:/example.com/", + "origin": "null", + "protocol": "ftps:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "gopher:/example.com/", + "base": "http://example.org/foo/bar", + "href": "gopher:/example.com/", + "origin": "null", + "protocol": "gopher:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "ws:/example.com/", + "base": "http://example.org/foo/bar", + "href": "ws://example.com/", + "origin": "ws://example.com", + "protocol": "ws:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss:/example.com/", + "base": "http://example.org/foo/bar", + "href": "wss://example.com/", + "origin": "wss://example.com", + "protocol": "wss:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "data:/example.com/", + "base": "http://example.org/foo/bar", + "href": "data:/example.com/", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "javascript:/example.com/", + "base": "http://example.org/foo/bar", + "href": "javascript:/example.com/", + "origin": "null", + "protocol": "javascript:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "mailto:/example.com/", + "base": "http://example.org/foo/bar", + "href": "mailto:/example.com/", + "origin": "null", + "protocol": "mailto:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "http:example.com/", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/example.com/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/example.com/", + "search": "", + "hash": "" + }, + { + "input": "ftp:example.com/", + "base": "http://example.org/foo/bar", + "href": "ftp://example.com/", + "origin": "ftp://example.com", + "protocol": "ftp:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https:example.com/", + "base": "http://example.org/foo/bar", + "href": "https://example.com/", + "origin": "https://example.com", + "protocol": "https:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "madeupscheme:example.com/", + "base": "http://example.org/foo/bar", + "href": "madeupscheme:example.com/", + "origin": "null", + "protocol": "madeupscheme:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "ftps:example.com/", + "base": "http://example.org/foo/bar", + "href": "ftps:example.com/", + "origin": "null", + "protocol": "ftps:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "gopher:example.com/", + "base": "http://example.org/foo/bar", + "href": "gopher:example.com/", + "origin": "null", + "protocol": "gopher:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "ws:example.com/", + "base": "http://example.org/foo/bar", + "href": "ws://example.com/", + "origin": "ws://example.com", + "protocol": "ws:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss:example.com/", + "base": "http://example.org/foo/bar", + "href": "wss://example.com/", + "origin": "wss://example.com", + "protocol": "wss:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "data:example.com/", + "base": "http://example.org/foo/bar", + "href": "data:example.com/", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "javascript:example.com/", + "base": "http://example.org/foo/bar", + "href": "javascript:example.com/", + "origin": "null", + "protocol": "javascript:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "mailto:example.com/", + "base": "http://example.org/foo/bar", + "href": "mailto:example.com/", + "origin": "null", + "protocol": "mailto:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "/a/b/c", + "base": "http://example.org/foo/bar", + "href": "http://example.org/a/b/c", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/a/b/c", + "search": "", + "hash": "" + }, + { + "input": "/a/ /c", + "base": "http://example.org/foo/bar", + "href": "http://example.org/a/%20/c", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/a/%20/c", + "search": "", + "hash": "" + }, + { + "input": "/a%2fc", + "base": "http://example.org/foo/bar", + "href": "http://example.org/a%2fc", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/a%2fc", + "search": "", + "hash": "" + }, + { + "input": "/a/%2f/c", + "base": "http://example.org/foo/bar", + "href": "http://example.org/a/%2f/c", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/a/%2f/c", + "search": "", + "hash": "" + }, + { + "input": "#β", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar#%CE%B2", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "#%CE%B2" + }, + { + "input": "data:text/html,test#test", + "base": "http://example.org/foo/bar", + "href": "data:text/html,test#test", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "text/html,test", + "search": "", + "hash": "#test" + }, + { + "input": "tel:1234567890", + "base": "http://example.org/foo/bar", + "href": "tel:1234567890", + "origin": "null", + "protocol": "tel:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "1234567890", + "search": "", + "hash": "" + }, + "# Based on https://felixfbecker.github.io/whatwg-url-custom-host-repro/", + { + "input": "ssh://example.com/foo/bar.git", + "base": "http://example.org/", + "href": "ssh://example.com/foo/bar.git", + "origin": "null", + "protocol": "ssh:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/bar.git", + "search": "", + "hash": "" + }, + "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/file.html", + { + "input": "file:c:\\foo\\bar.html", + "base": "file:///tmp/mock/path", + "href": "file:///c:/foo/bar.html", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/c:/foo/bar.html", + "search": "", + "hash": "" + }, + { + "input": " File:c|////foo\\bar.html", + "base": "file:///tmp/mock/path", + "href": "file:///c:////foo/bar.html", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/c:////foo/bar.html", + "search": "", + "hash": "" + }, + { + "input": "C|/foo/bar", + "base": "file:///tmp/mock/path", + "href": "file:///C:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "/C|\\foo\\bar", + "base": "file:///tmp/mock/path", + "href": "file:///C:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "//C|/foo/bar", + "base": "file:///tmp/mock/path", + "href": "file:///C:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "//server/file", + "base": "file:///tmp/mock/path", + "href": "file://server/file", + "protocol": "file:", + "username": "", + "password": "", + "host": "server", + "hostname": "server", + "port": "", + "pathname": "/file", + "search": "", + "hash": "" + }, + { + "input": "\\\\server\\file", + "base": "file:///tmp/mock/path", + "href": "file://server/file", + "protocol": "file:", + "username": "", + "password": "", + "host": "server", + "hostname": "server", + "port": "", + "pathname": "/file", + "search": "", + "hash": "" + }, + { + "input": "/\\server/file", + "base": "file:///tmp/mock/path", + "href": "file://server/file", + "protocol": "file:", + "username": "", + "password": "", + "host": "server", + "hostname": "server", + "port": "", + "pathname": "/file", + "search": "", + "hash": "" + }, + { + "input": "file:///foo/bar.txt", + "base": "file:///tmp/mock/path", + "href": "file:///foo/bar.txt", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/foo/bar.txt", + "search": "", + "hash": "" + }, + { + "input": "file:///home/me", + "base": "file:///tmp/mock/path", + "href": "file:///home/me", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/home/me", + "search": "", + "hash": "" + }, + { + "input": "//", + "base": "file:///tmp/mock/path", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "///", + "base": "file:///tmp/mock/path", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "///test", + "base": "file:///tmp/mock/path", + "href": "file:///test", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "", + "hash": "" + }, + { + "input": "file://test", + "base": "file:///tmp/mock/path", + "href": "file://test/", + "protocol": "file:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file://localhost", + "base": "file:///tmp/mock/path", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file://localhost/", + "base": "file:///tmp/mock/path", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file://localhost/test", + "base": "file:///tmp/mock/path", + "href": "file:///test", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "", + "hash": "" + }, + { + "input": "test", + "base": "file:///tmp/mock/path", + "href": "file:///tmp/mock/test", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/tmp/mock/test", + "search": "", + "hash": "" + }, + { + "input": "file:test", + "base": "file:///tmp/mock/path", + "href": "file:///tmp/mock/test", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/tmp/mock/test", + "search": "", + "hash": "" + }, + "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/script-tests/path.js", + { + "input": "http://example.com/././foo", + "base": "about:blank", + "href": "http://example.com/foo", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/./.foo", + "base": "about:blank", + "href": "http://example.com/.foo", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/.foo", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/.", + "base": "about:blank", + "href": "http://example.com/foo/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/./", + "base": "about:blank", + "href": "http://example.com/foo/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/bar/..", + "base": "about:blank", + "href": "http://example.com/foo/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/bar/../", + "base": "about:blank", + "href": "http://example.com/foo/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/..bar", + "base": "about:blank", + "href": "http://example.com/foo/..bar", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/..bar", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/bar/../ton", + "base": "about:blank", + "href": "http://example.com/foo/ton", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/ton", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/bar/../ton/../../a", + "base": "about:blank", + "href": "http://example.com/a", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/a", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/../../..", + "base": "about:blank", + "href": "http://example.com/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/../../../ton", + "base": "about:blank", + "href": "http://example.com/ton", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/ton", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/%2e", + "base": "about:blank", + "href": "http://example.com/foo/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/%2e%2", + "base": "about:blank", + "href": "http://example.com/foo/%2e%2", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/%2e%2", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/%2e./%2e%2e/.%2e/%2e.bar", + "base": "about:blank", + "href": "http://example.com/%2e.bar", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%2e.bar", + "search": "", + "hash": "" + }, + { + "input": "http://example.com////../..", + "base": "about:blank", + "href": "http://example.com//", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/bar//../..", + "base": "about:blank", + "href": "http://example.com/foo/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/bar//..", + "base": "about:blank", + "href": "http://example.com/foo/bar/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/bar/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo", + "base": "about:blank", + "href": "http://example.com/foo", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/%20foo", + "base": "about:blank", + "href": "http://example.com/%20foo", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%20foo", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo%", + "base": "about:blank", + "href": "http://example.com/foo%", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo%", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo%2", + "base": "about:blank", + "href": "http://example.com/foo%2", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo%2", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo%2zbar", + "base": "about:blank", + "href": "http://example.com/foo%2zbar", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo%2zbar", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo%2©zbar", + "base": "about:blank", + "href": "http://example.com/foo%2%C3%82%C2%A9zbar", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo%2%C3%82%C2%A9zbar", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo%41%7a", + "base": "about:blank", + "href": "http://example.com/foo%41%7a", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo%41%7a", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo\t\u0091%91", + "base": "about:blank", + "href": "http://example.com/foo%C2%91%91", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo%C2%91%91", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo%00%51", + "base": "about:blank", + "href": "http://example.com/foo%00%51", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo%00%51", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/(%28:%3A%29)", + "base": "about:blank", + "href": "http://example.com/(%28:%3A%29)", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/(%28:%3A%29)", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/%3A%3a%3C%3c", + "base": "about:blank", + "href": "http://example.com/%3A%3a%3C%3c", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%3A%3a%3C%3c", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo\tbar", + "base": "about:blank", + "href": "http://example.com/foobar", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foobar", + "search": "", + "hash": "" + }, + { + "input": "http://example.com\\\\foo\\\\bar", + "base": "about:blank", + "href": "http://example.com//foo//bar", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "//foo//bar", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/%7Ffp3%3Eju%3Dduvgw%3Dd", + "base": "about:blank", + "href": "http://example.com/%7Ffp3%3Eju%3Dduvgw%3Dd", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%7Ffp3%3Eju%3Dduvgw%3Dd", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/@asdf%40", + "base": "about:blank", + "href": "http://example.com/@asdf%40", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/@asdf%40", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/你好你好", + "base": "about:blank", + "href": "http://example.com/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/‥/foo", + "base": "about:blank", + "href": "http://example.com/%E2%80%A5/foo", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%E2%80%A5/foo", + "search": "", + "hash": "" + }, + { + "input": "http://example.com//foo", + "base": "about:blank", + "href": "http://example.com/%EF%BB%BF/foo", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%EF%BB%BF/foo", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/‮/foo/‭/bar", + "base": "about:blank", + "href": "http://example.com/%E2%80%AE/foo/%E2%80%AD/bar", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%E2%80%AE/foo/%E2%80%AD/bar", + "search": "", + "hash": "" + }, + "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/script-tests/relative.js", + { + "input": "http://www.google.com/foo?bar=baz#", + "base": "about:blank", + "href": "http://www.google.com/foo?bar=baz#", + "origin": "http://www.google.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.google.com", + "hostname": "www.google.com", + "port": "", + "pathname": "/foo", + "search": "?bar=baz", + "hash": "" + }, + { + "input": "http://www.google.com/foo?bar=baz# »", + "base": "about:blank", + "href": "http://www.google.com/foo?bar=baz#%20%C2%BB", + "origin": "http://www.google.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.google.com", + "hostname": "www.google.com", + "port": "", + "pathname": "/foo", + "search": "?bar=baz", + "hash": "#%20%C2%BB" + }, + { + "input": "data:test# »", + "base": "about:blank", + "href": "data:test#%20%C2%BB", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "test", + "search": "", + "hash": "#%20%C2%BB" + }, + { + "input": "http://www.google.com", + "base": "about:blank", + "href": "http://www.google.com/", + "origin": "http://www.google.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.google.com", + "hostname": "www.google.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://192.0x00A80001", + "base": "about:blank", + "href": "http://192.168.0.1/", + "origin": "http://192.168.0.1", + "protocol": "http:", + "username": "", + "password": "", + "host": "192.168.0.1", + "hostname": "192.168.0.1", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://www/foo%2Ehtml", + "base": "about:blank", + "href": "http://www/foo%2Ehtml", + "origin": "http://www", + "protocol": "http:", + "username": "", + "password": "", + "host": "www", + "hostname": "www", + "port": "", + "pathname": "/foo%2Ehtml", + "search": "", + "hash": "" + }, + { + "input": "http://www/foo/%2E/html", + "base": "about:blank", + "href": "http://www/foo/html", + "origin": "http://www", + "protocol": "http:", + "username": "", + "password": "", + "host": "www", + "hostname": "www", + "port": "", + "pathname": "/foo/html", + "search": "", + "hash": "" + }, + { + "input": "http://user:pass@/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://%25DOMAIN:foobar@foodomain.com/", + "base": "about:blank", + "href": "http://%25DOMAIN:foobar@foodomain.com/", + "origin": "http://foodomain.com", + "protocol": "http:", + "username": "%25DOMAIN", + "password": "foobar", + "host": "foodomain.com", + "hostname": "foodomain.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:\\\\www.google.com\\foo", + "base": "about:blank", + "href": "http://www.google.com/foo", + "origin": "http://www.google.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.google.com", + "hostname": "www.google.com", + "port": "", + "pathname": "/foo", + "search": "", + "hash": "" + }, + { + "input": "http://foo:80/", + "base": "about:blank", + "href": "http://foo/", + "origin": "http://foo", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://foo:81/", + "base": "about:blank", + "href": "http://foo:81/", + "origin": "http://foo:81", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo:81", + "hostname": "foo", + "port": "81", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "httpa://foo:80/", + "base": "about:blank", + "href": "httpa://foo:80/", + "origin": "null", + "protocol": "httpa:", + "username": "", + "password": "", + "host": "foo:80", + "hostname": "foo", + "port": "80", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://foo:-80/", + "base": "about:blank", + "failure": true + }, + { + "input": "https://foo:443/", + "base": "about:blank", + "href": "https://foo/", + "origin": "https://foo", + "protocol": "https:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https://foo:80/", + "base": "about:blank", + "href": "https://foo:80/", + "origin": "https://foo:80", + "protocol": "https:", + "username": "", + "password": "", + "host": "foo:80", + "hostname": "foo", + "port": "80", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ftp://foo:21/", + "base": "about:blank", + "href": "ftp://foo/", + "origin": "ftp://foo", + "protocol": "ftp:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ftp://foo:80/", + "base": "about:blank", + "href": "ftp://foo:80/", + "origin": "ftp://foo:80", + "protocol": "ftp:", + "username": "", + "password": "", + "host": "foo:80", + "hostname": "foo", + "port": "80", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "gopher://foo:70/", + "base": "about:blank", + "href": "gopher://foo:70/", + "origin": "null", + "protocol": "gopher:", + "username": "", + "password": "", + "host": "foo:70", + "hostname": "foo", + "port": "70", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "gopher://foo:443/", + "base": "about:blank", + "href": "gopher://foo:443/", + "origin": "null", + "protocol": "gopher:", + "username": "", + "password": "", + "host": "foo:443", + "hostname": "foo", + "port": "443", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ws://foo:80/", + "base": "about:blank", + "href": "ws://foo/", + "origin": "ws://foo", + "protocol": "ws:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ws://foo:81/", + "base": "about:blank", + "href": "ws://foo:81/", + "origin": "ws://foo:81", + "protocol": "ws:", + "username": "", + "password": "", + "host": "foo:81", + "hostname": "foo", + "port": "81", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ws://foo:443/", + "base": "about:blank", + "href": "ws://foo:443/", + "origin": "ws://foo:443", + "protocol": "ws:", + "username": "", + "password": "", + "host": "foo:443", + "hostname": "foo", + "port": "443", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ws://foo:815/", + "base": "about:blank", + "href": "ws://foo:815/", + "origin": "ws://foo:815", + "protocol": "ws:", + "username": "", + "password": "", + "host": "foo:815", + "hostname": "foo", + "port": "815", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss://foo:80/", + "base": "about:blank", + "href": "wss://foo:80/", + "origin": "wss://foo:80", + "protocol": "wss:", + "username": "", + "password": "", + "host": "foo:80", + "hostname": "foo", + "port": "80", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss://foo:81/", + "base": "about:blank", + "href": "wss://foo:81/", + "origin": "wss://foo:81", + "protocol": "wss:", + "username": "", + "password": "", + "host": "foo:81", + "hostname": "foo", + "port": "81", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss://foo:443/", + "base": "about:blank", + "href": "wss://foo/", + "origin": "wss://foo", + "protocol": "wss:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss://foo:815/", + "base": "about:blank", + "href": "wss://foo:815/", + "origin": "wss://foo:815", + "protocol": "wss:", + "username": "", + "password": "", + "host": "foo:815", + "hostname": "foo", + "port": "815", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:/example.com/", + "base": "about:blank", + "href": "http://example.com/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ftp:/example.com/", + "base": "about:blank", + "href": "ftp://example.com/", + "origin": "ftp://example.com", + "protocol": "ftp:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https:/example.com/", + "base": "about:blank", + "href": "https://example.com/", + "origin": "https://example.com", + "protocol": "https:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "madeupscheme:/example.com/", + "base": "about:blank", + "href": "madeupscheme:/example.com/", + "origin": "null", + "protocol": "madeupscheme:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "file:/example.com/", + "base": "about:blank", + "href": "file:///example.com/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "ftps:/example.com/", + "base": "about:blank", + "href": "ftps:/example.com/", + "origin": "null", + "protocol": "ftps:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "gopher:/example.com/", + "base": "about:blank", + "href": "gopher:/example.com/", + "origin": "null", + "protocol": "gopher:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "ws:/example.com/", + "base": "about:blank", + "href": "ws://example.com/", + "origin": "ws://example.com", + "protocol": "ws:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss:/example.com/", + "base": "about:blank", + "href": "wss://example.com/", + "origin": "wss://example.com", + "protocol": "wss:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "data:/example.com/", + "base": "about:blank", + "href": "data:/example.com/", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "javascript:/example.com/", + "base": "about:blank", + "href": "javascript:/example.com/", + "origin": "null", + "protocol": "javascript:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "mailto:/example.com/", + "base": "about:blank", + "href": "mailto:/example.com/", + "origin": "null", + "protocol": "mailto:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "http:example.com/", + "base": "about:blank", + "href": "http://example.com/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ftp:example.com/", + "base": "about:blank", + "href": "ftp://example.com/", + "origin": "ftp://example.com", + "protocol": "ftp:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https:example.com/", + "base": "about:blank", + "href": "https://example.com/", + "origin": "https://example.com", + "protocol": "https:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "madeupscheme:example.com/", + "base": "about:blank", + "href": "madeupscheme:example.com/", + "origin": "null", + "protocol": "madeupscheme:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "ftps:example.com/", + "base": "about:blank", + "href": "ftps:example.com/", + "origin": "null", + "protocol": "ftps:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "gopher:example.com/", + "base": "about:blank", + "href": "gopher:example.com/", + "origin": "null", + "protocol": "gopher:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "ws:example.com/", + "base": "about:blank", + "href": "ws://example.com/", + "origin": "ws://example.com", + "protocol": "ws:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss:example.com/", + "base": "about:blank", + "href": "wss://example.com/", + "origin": "wss://example.com", + "protocol": "wss:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "data:example.com/", + "base": "about:blank", + "href": "data:example.com/", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "javascript:example.com/", + "base": "about:blank", + "href": "javascript:example.com/", + "origin": "null", + "protocol": "javascript:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "mailto:example.com/", + "base": "about:blank", + "href": "mailto:example.com/", + "origin": "null", + "protocol": "mailto:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/segments-userinfo-vs-host.html", + { + "input": "http:@www.example.com", + "base": "about:blank", + "href": "http://www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:/@www.example.com", + "base": "about:blank", + "href": "http://www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://@www.example.com", + "base": "about:blank", + "href": "http://www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:a:b@www.example.com", + "base": "about:blank", + "href": "http://a:b@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "a", + "password": "b", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:/a:b@www.example.com", + "base": "about:blank", + "href": "http://a:b@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "a", + "password": "b", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://a:b@www.example.com", + "base": "about:blank", + "href": "http://a:b@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "a", + "password": "b", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://@pple.com", + "base": "about:blank", + "href": "http://pple.com/", + "origin": "http://pple.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "pple.com", + "hostname": "pple.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http::b@www.example.com", + "base": "about:blank", + "href": "http://:b@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "b", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:/:b@www.example.com", + "base": "about:blank", + "href": "http://:b@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "b", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://:b@www.example.com", + "base": "about:blank", + "href": "http://:b@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "b", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:/:@/www.example.com", + "base": "about:blank", + "failure": true, + "inputCanBeRelative": true + }, + { + "input": "http://user@/www.example.com", + "base": "about:blank", + "failure": true + }, + { + "input": "http:@/www.example.com", + "base": "about:blank", + "failure": true, + "inputCanBeRelative": true + }, + { + "input": "http:/@/www.example.com", + "base": "about:blank", + "failure": true, + "inputCanBeRelative": true + }, + { + "input": "http://@/www.example.com", + "base": "about:blank", + "failure": true + }, + { + "input": "https:@/www.example.com", + "base": "about:blank", + "failure": true, + "inputCanBeRelative": true + }, + { + "input": "http:a:b@/www.example.com", + "base": "about:blank", + "failure": true, + "inputCanBeRelative": true + }, + { + "input": "http:/a:b@/www.example.com", + "base": "about:blank", + "failure": true, + "inputCanBeRelative": true + }, + { + "input": "http://a:b@/www.example.com", + "base": "about:blank", + "failure": true + }, + { + "input": "http::@/www.example.com", + "base": "about:blank", + "failure": true, + "inputCanBeRelative": true + }, + { + "input": "http:a:@www.example.com", + "base": "about:blank", + "href": "http://a@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "a", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:/a:@www.example.com", + "base": "about:blank", + "href": "http://a@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "a", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://a:@www.example.com", + "base": "about:blank", + "href": "http://a@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "a", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://www.@pple.com", + "base": "about:blank", + "href": "http://www.@pple.com/", + "origin": "http://pple.com", + "protocol": "http:", + "username": "www.", + "password": "", + "host": "pple.com", + "hostname": "pple.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:@:www.example.com", + "base": "about:blank", + "failure": true, + "inputCanBeRelative": true + }, + { + "input": "http:/@:www.example.com", + "base": "about:blank", + "failure": true, + "inputCanBeRelative": true + }, + { + "input": "http://@:www.example.com", + "base": "about:blank", + "failure": true + }, + { + "input": "http://:@www.example.com", + "base": "about:blank", + "href": "http://www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# Others", + { + "input": "/", + "base": "http://www.example.com/test", + "href": "http://www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "/test.txt", + "base": "http://www.example.com/test", + "href": "http://www.example.com/test.txt", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/test.txt", + "search": "", + "hash": "" + }, + { + "input": ".", + "base": "http://www.example.com/test", + "href": "http://www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "..", + "base": "http://www.example.com/test", + "href": "http://www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "test.txt", + "base": "http://www.example.com/test", + "href": "http://www.example.com/test.txt", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/test.txt", + "search": "", + "hash": "" + }, + { + "input": "./test.txt", + "base": "http://www.example.com/test", + "href": "http://www.example.com/test.txt", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/test.txt", + "search": "", + "hash": "" + }, + { + "input": "../test.txt", + "base": "http://www.example.com/test", + "href": "http://www.example.com/test.txt", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/test.txt", + "search": "", + "hash": "" + }, + { + "input": "../aaa/test.txt", + "base": "http://www.example.com/test", + "href": "http://www.example.com/aaa/test.txt", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/aaa/test.txt", + "search": "", + "hash": "" + }, + { + "input": "../../test.txt", + "base": "http://www.example.com/test", + "href": "http://www.example.com/test.txt", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/test.txt", + "search": "", + "hash": "" + }, + { + "input": "中/test.txt", + "base": "http://www.example.com/test", + "href": "http://www.example.com/%E4%B8%AD/test.txt", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/%E4%B8%AD/test.txt", + "search": "", + "hash": "" + }, + { + "input": "http://www.example2.com", + "base": "http://www.example.com/test", + "href": "http://www.example2.com/", + "origin": "http://www.example2.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example2.com", + "hostname": "www.example2.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "//www.example2.com", + "base": "http://www.example.com/test", + "href": "http://www.example2.com/", + "origin": "http://www.example2.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example2.com", + "hostname": "www.example2.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:...", + "base": "http://www.example.com/test", + "href": "file:///...", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/...", + "search": "", + "hash": "" + }, + { + "input": "file:..", + "base": "http://www.example.com/test", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:a", + "base": "http://www.example.com/test", + "href": "file:///a", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/a", + "search": "", + "hash": "" + }, + "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/host.html", + "Basic canonicalization, uppercase should be converted to lowercase", + { + "input": "http://ExAmPlE.CoM", + "base": "http://other.com/", + "href": "http://example.com/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://example example.com", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://Goo%20 goo%7C|.com", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[:]", + "base": "http://other.com/", + "failure": true + }, + "U+3000 is mapped to U+0020 (space) which is disallowed", + { + "input": "http://GOO\u00a0\u3000goo.com", + "base": "http://other.com/", + "failure": true + }, + "Other types of space (no-break, zero-width, zero-width-no-break) are name-prepped away to nothing. U+200B, U+2060, and U+FEFF, are ignored", + { + "input": "http://GOO\u200b\u2060\ufeffgoo.com", + "base": "http://other.com/", + "href": "http://googoo.com/", + "origin": "http://googoo.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "googoo.com", + "hostname": "googoo.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Leading and trailing C0 control or space", + { + "input": "\u0000\u001b\u0004\u0012 http://example.com/\u001f \u000d ", + "base": "about:blank", + "href": "http://example.com/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Ideographic full stop (full-width period for Chinese, etc.) should be treated as a dot. U+3002 is mapped to U+002E (dot)", + { + "input": "http://www.foo。bar.com", + "base": "http://other.com/", + "href": "http://www.foo.bar.com/", + "origin": "http://www.foo.bar.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.foo.bar.com", + "hostname": "www.foo.bar.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Invalid unicode characters should fail... U+FDD0 is disallowed; %ef%b7%90 is U+FDD0", + { + "input": "http://\ufdd0zyx.com", + "base": "http://other.com/", + "failure": true + }, + "This is the same as previous but escaped", + { + "input": "http://%ef%b7%90zyx.com", + "base": "http://other.com/", + "failure": true + }, + "U+FFFD", + { + "input": "https://\ufffd", + "base": "about:blank", + "failure": true + }, + { + "input": "https://%EF%BF%BD", + "base": "about:blank", + "failure": true + }, + { + "input": "https://x/\ufffd?\ufffd#\ufffd", + "base": "about:blank", + "href": "https://x/%EF%BF%BD?%EF%BF%BD#%EF%BF%BD", + "origin": "https://x", + "protocol": "https:", + "username": "", + "password": "", + "host": "x", + "hostname": "x", + "port": "", + "pathname": "/%EF%BF%BD", + "search": "?%EF%BF%BD", + "hash": "#%EF%BF%BD" + }, + "Domain is ASCII, but a label is invalid IDNA", + { + "input": "http://a.b.c.xn--pokxncvks", + "base": "about:blank", + "failure": true + }, + { + "input": "http://10.0.0.xn--pokxncvks", + "base": "about:blank", + "failure": true + }, + "IDNA labels should be matched case-insensitively", + { + "input": "http://a.b.c.XN--pokxncvks", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a.b.c.Xn--pokxncvks", + "base": "about:blank", + "failure": true + }, + { + "input": "http://10.0.0.XN--pokxncvks", + "base": "about:blank", + "failure": true + }, + { + "input": "http://10.0.0.xN--pokxncvks", + "base": "about:blank", + "failure": true + }, + "Test name prepping, fullwidth input should be converted to ASCII and NOT IDN-ized. This is 'Go' in fullwidth UTF-8/UTF-16.", + { + "input": "http://Go.com", + "base": "http://other.com/", + "href": "http://go.com/", + "origin": "http://go.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "go.com", + "hostname": "go.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "URL spec forbids the following. https://www.w3.org/Bugs/Public/show_bug.cgi?id=24257", + { + "input": "http://%41.com", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://%ef%bc%85%ef%bc%94%ef%bc%91.com", + "base": "http://other.com/", + "failure": true + }, + "...%00 in fullwidth should fail (also as escaped UTF-8 input)", + { + "input": "http://%00.com", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://%ef%bc%85%ef%bc%90%ef%bc%90.com", + "base": "http://other.com/", + "failure": true + }, + "Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN", + { + "input": "http://你好你好", + "base": "http://other.com/", + "href": "http://xn--6qqa088eba/", + "origin": "http://xn--6qqa088eba", + "protocol": "http:", + "username": "", + "password": "", + "host": "xn--6qqa088eba", + "hostname": "xn--6qqa088eba", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https://faß.ExAmPlE/", + "base": "about:blank", + "href": "https://xn--fa-hia.example/", + "origin": "https://xn--fa-hia.example", + "protocol": "https:", + "username": "", + "password": "", + "host": "xn--fa-hia.example", + "hostname": "xn--fa-hia.example", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "sc://faß.ExAmPlE/", + "base": "about:blank", + "href": "sc://fa%C3%9F.ExAmPlE/", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "fa%C3%9F.ExAmPlE", + "hostname": "fa%C3%9F.ExAmPlE", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Invalid escaped characters should fail and the percents should be escaped. https://www.w3.org/Bugs/Public/show_bug.cgi?id=24191", + { + "input": "http://%zz%66%a.com", + "base": "http://other.com/", + "failure": true + }, + "If we get an invalid character that has been escaped.", + { + "input": "http://%25", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://hello%00", + "base": "http://other.com/", + "failure": true + }, + "Escaped numbers should be treated like IP addresses if they are.", + { + "input": "http://%30%78%63%30%2e%30%32%35%30.01", + "base": "http://other.com/", + "href": "http://192.168.0.1/", + "origin": "http://192.168.0.1", + "protocol": "http:", + "username": "", + "password": "", + "host": "192.168.0.1", + "hostname": "192.168.0.1", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://%30%78%63%30%2e%30%32%35%30.01%2e", + "base": "http://other.com/", + "href": "http://192.168.0.1/", + "origin": "http://192.168.0.1", + "protocol": "http:", + "username": "", + "password": "", + "host": "192.168.0.1", + "hostname": "192.168.0.1", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://192.168.0.257", + "base": "http://other.com/", + "failure": true + }, + "Invalid escaping in hosts causes failure", + { + "input": "http://%3g%78%63%30%2e%30%32%35%30%2E.01", + "base": "http://other.com/", + "failure": true + }, + "A space in a host causes failure", + { + "input": "http://192.168.0.1 hello", + "base": "http://other.com/", + "failure": true + }, + { + "input": "https://x x:12", + "base": "about:blank", + "failure": true + }, + "Fullwidth and escaped UTF-8 fullwidth should still be treated as IP", + { + "input": "http://0Xc0.0250.01", + "base": "http://other.com/", + "href": "http://192.168.0.1/", + "origin": "http://192.168.0.1", + "protocol": "http:", + "username": "", + "password": "", + "host": "192.168.0.1", + "hostname": "192.168.0.1", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Domains with empty labels", + { + "input": "http://./", + "base": "about:blank", + "href": "http://./", + "origin": "http://.", + "protocol": "http:", + "username": "", + "password": "", + "host": ".", + "hostname": ".", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://../", + "base": "about:blank", + "href": "http://../", + "origin": "http://..", + "protocol": "http:", + "username": "", + "password": "", + "host": "..", + "hostname": "..", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Broken IPv6", + { + "input": "http://[www.google.com]/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://[google.com]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[::1.2.3.4x]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[::1.2.3.]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[::1.2.]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[::1.]", + "base": "http://other.com/", + "failure": true + }, + "Misc Unicode", + { + "input": "http://foo:💩@example.com/bar", + "base": "http://other.com/", + "href": "http://foo:%F0%9F%92%A9@example.com/bar", + "origin": "http://example.com", + "protocol": "http:", + "username": "foo", + "password": "%F0%9F%92%A9", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/bar", + "search": "", + "hash": "" + }, + "# resolving a fragment against any scheme succeeds", + { + "input": "#", + "base": "test:test", + "href": "test:test#", + "origin": "null", + "protocol": "test:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "test", + "search": "", + "hash": "" + }, + { + "input": "#x", + "base": "mailto:x@x.com", + "href": "mailto:x@x.com#x", + "origin": "null", + "protocol": "mailto:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "x@x.com", + "search": "", + "hash": "#x" + }, + { + "input": "#x", + "base": "data:,", + "href": "data:,#x", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": ",", + "search": "", + "hash": "#x" + }, + { + "input": "#x", + "base": "about:blank", + "href": "about:blank#x", + "origin": "null", + "protocol": "about:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "blank", + "search": "", + "hash": "#x" + }, + { + "input": "#", + "base": "test:test?test", + "href": "test:test?test#", + "origin": "null", + "protocol": "test:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "test", + "search": "?test", + "hash": "" + }, + "# multiple @ in authority state", + { + "input": "https://@test@test@example:800/", + "base": "http://doesnotmatter/", + "href": "https://%40test%40test@example:800/", + "origin": "https://example:800", + "protocol": "https:", + "username": "%40test%40test", + "password": "", + "host": "example:800", + "hostname": "example", + "port": "800", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https://@@@example", + "base": "http://doesnotmatter/", + "href": "https://%40%40@example/", + "origin": "https://example", + "protocol": "https:", + "username": "%40%40", + "password": "", + "host": "example", + "hostname": "example", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "non-az-09 characters", + { + "input": "http://`{}:`{}@h/`{}?`{}", + "base": "http://doesnotmatter/", + "href": "http://%60%7B%7D:%60%7B%7D@h/%60%7B%7D?`{}", + "origin": "http://h", + "protocol": "http:", + "username": "%60%7B%7D", + "password": "%60%7B%7D", + "host": "h", + "hostname": "h", + "port": "", + "pathname": "/%60%7B%7D", + "search": "?`{}", + "hash": "" + }, + "byte is ' and url is special", + { + "input": "http://host/?'", + "base": "about:blank", + "href": "http://host/?%27", + "origin": "http://host", + "protocol": "http:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/", + "search": "?%27", + "hash": "" + }, + { + "input": "notspecial://host/?'", + "base": "about:blank", + "href": "notspecial://host/?'", + "origin": "null", + "protocol": "notspecial:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/", + "search": "?'", + "hash": "" + }, + "# Credentials in base", + { + "input": "/some/path", + "base": "http://user@example.org/smth", + "href": "http://user@example.org/some/path", + "origin": "http://example.org", + "protocol": "http:", + "username": "user", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/some/path", + "search": "", + "hash": "" + }, + { + "input": "", + "base": "http://user:pass@example.org:21/smth", + "href": "http://user:pass@example.org:21/smth", + "origin": "http://example.org:21", + "protocol": "http:", + "username": "user", + "password": "pass", + "host": "example.org:21", + "hostname": "example.org", + "port": "21", + "pathname": "/smth", + "search": "", + "hash": "" + }, + { + "input": "/some/path", + "base": "http://user:pass@example.org:21/smth", + "href": "http://user:pass@example.org:21/some/path", + "origin": "http://example.org:21", + "protocol": "http:", + "username": "user", + "password": "pass", + "host": "example.org:21", + "hostname": "example.org", + "port": "21", + "pathname": "/some/path", + "search": "", + "hash": "" + }, + "# a set of tests designed by zcorpan for relative URLs with unknown schemes", + { + "input": "i", + "base": "sc:sd", + "failure": true + }, + { + "input": "i", + "base": "sc:sd/sd", + "failure": true + }, + { + "input": "i", + "base": "sc:/pa/pa", + "href": "sc:/pa/i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pa/i", + "search": "", + "hash": "" + }, + { + "input": "i", + "base": "sc://ho/pa", + "href": "sc://ho/i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "ho", + "hostname": "ho", + "port": "", + "pathname": "/i", + "search": "", + "hash": "" + }, + { + "input": "i", + "base": "sc:///pa/pa", + "href": "sc:///pa/i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pa/i", + "search": "", + "hash": "" + }, + { + "input": "../i", + "base": "sc:sd", + "failure": true + }, + { + "input": "../i", + "base": "sc:sd/sd", + "failure": true + }, + { + "input": "../i", + "base": "sc:/pa/pa", + "href": "sc:/i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/i", + "search": "", + "hash": "" + }, + { + "input": "../i", + "base": "sc://ho/pa", + "href": "sc://ho/i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "ho", + "hostname": "ho", + "port": "", + "pathname": "/i", + "search": "", + "hash": "" + }, + { + "input": "../i", + "base": "sc:///pa/pa", + "href": "sc:///i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/i", + "search": "", + "hash": "" + }, + { + "input": "/i", + "base": "sc:sd", + "failure": true + }, + { + "input": "/i", + "base": "sc:sd/sd", + "failure": true + }, + { + "input": "/i", + "base": "sc:/pa/pa", + "href": "sc:/i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/i", + "search": "", + "hash": "" + }, + { + "input": "/i", + "base": "sc://ho/pa", + "href": "sc://ho/i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "ho", + "hostname": "ho", + "port": "", + "pathname": "/i", + "search": "", + "hash": "" + }, + { + "input": "/i", + "base": "sc:///pa/pa", + "href": "sc:///i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/i", + "search": "", + "hash": "" + }, + { + "input": "?i", + "base": "sc:sd", + "failure": true + }, + { + "input": "?i", + "base": "sc:sd/sd", + "failure": true + }, + { + "input": "?i", + "base": "sc:/pa/pa", + "href": "sc:/pa/pa?i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pa/pa", + "search": "?i", + "hash": "" + }, + { + "input": "?i", + "base": "sc://ho/pa", + "href": "sc://ho/pa?i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "ho", + "hostname": "ho", + "port": "", + "pathname": "/pa", + "search": "?i", + "hash": "" + }, + { + "input": "?i", + "base": "sc:///pa/pa", + "href": "sc:///pa/pa?i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pa/pa", + "search": "?i", + "hash": "" + }, + { + "input": "#i", + "base": "sc:sd", + "href": "sc:sd#i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "sd", + "search": "", + "hash": "#i" + }, + { + "input": "#i", + "base": "sc:sd/sd", + "href": "sc:sd/sd#i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "sd/sd", + "search": "", + "hash": "#i" + }, + { + "input": "#i", + "base": "sc:/pa/pa", + "href": "sc:/pa/pa#i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pa/pa", + "search": "", + "hash": "#i" + }, + { + "input": "#i", + "base": "sc://ho/pa", + "href": "sc://ho/pa#i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "ho", + "hostname": "ho", + "port": "", + "pathname": "/pa", + "search": "", + "hash": "#i" + }, + { + "input": "#i", + "base": "sc:///pa/pa", + "href": "sc:///pa/pa#i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pa/pa", + "search": "", + "hash": "#i" + }, + "# make sure that relative URL logic works on known typically non-relative schemes too", + { + "input": "about:/../", + "base": "about:blank", + "href": "about:/", + "origin": "null", + "protocol": "about:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "data:/../", + "base": "about:blank", + "href": "data:/", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "javascript:/../", + "base": "about:blank", + "href": "javascript:/", + "origin": "null", + "protocol": "javascript:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "mailto:/../", + "base": "about:blank", + "href": "mailto:/", + "origin": "null", + "protocol": "mailto:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# unknown schemes and their hosts", + { + "input": "sc://ñ.test/", + "base": "about:blank", + "href": "sc://%C3%B1.test/", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1.test", + "hostname": "%C3%B1.test", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "sc://%/", + "base": "about:blank", + "href": "sc://%/", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%", + "hostname": "%", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "sc://@/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://te@s:t@/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://:/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://:12/", + "base": "about:blank", + "failure": true + }, + { + "input": "x", + "base": "sc://ñ", + "href": "sc://%C3%B1/x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "/x", + "search": "", + "hash": "" + }, + "# unknown schemes and backslashes", + { + "input": "sc:\\../", + "base": "about:blank", + "href": "sc:\\../", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "\\../", + "search": "", + "hash": "" + }, + "# unknown scheme with path looking like a password", + { + "input": "sc::a@example.net", + "base": "about:blank", + "href": "sc::a@example.net", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": ":a@example.net", + "search": "", + "hash": "" + }, + "# unknown scheme with bogus percent-encoding", + { + "input": "wow:%NBD", + "base": "about:blank", + "href": "wow:%NBD", + "origin": "null", + "protocol": "wow:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "%NBD", + "search": "", + "hash": "" + }, + { + "input": "wow:%1G", + "base": "about:blank", + "href": "wow:%1G", + "origin": "null", + "protocol": "wow:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "%1G", + "search": "", + "hash": "" + }, + "# unknown scheme with non-URL characters", + { + "input": "wow:\uFFFF", + "base": "about:blank", + "href": "wow:%EF%BF%BF", + "origin": "null", + "protocol": "wow:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "%EF%BF%BF", + "search": "", + "hash": "" + }, + "Forbidden host code points", + { + "input": "sc://a\u0000b/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://a b/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://ab", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://a[b/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://a\\b/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://a]b/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://a^b", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://a|b/", + "base": "about:blank", + "failure": true + }, + "Forbidden host codepoints: tabs and newlines are removed during preprocessing", + { + "input": "foo://ho\u0009st/", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href":"foo://host/", + "password": "", + "pathname": "/", + "port":"", + "protocol": "foo:", + "search": "", + "username": "" + }, + { + "input": "foo://ho\u000Ast/", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href":"foo://host/", + "password": "", + "pathname": "/", + "port":"", + "protocol": "foo:", + "search": "", + "username": "" + }, + { + "input": "foo://ho\u000Dst/", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href":"foo://host/", + "password": "", + "pathname": "/", + "port":"", + "protocol": "foo:", + "search": "", + "username": "" + }, + "Forbidden domain code-points", + { + "input": "http://a\u0000b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u0001b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u0002b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u0003b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u0004b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u0005b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u0006b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u0007b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u0008b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u000Bb/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u000Cb/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u000Eb/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u000Fb/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u0010b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u0011b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u0012b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u0013b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u0014b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u0015b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u0016b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u0017b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u0018b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u0019b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u001Ab/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u001Bb/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u001Cb/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u001Db/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u001Eb/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u001Fb/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a%b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ab", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a[b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a]b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a^b", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a|b/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://a\u007Fb/", + "base": "about:blank", + "failure": true + }, + "Forbidden domain codepoints: tabs and newlines are removed during preprocessing", + { + "input": "http://ho\u0009st/", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href":"http://host/", + "password": "", + "pathname": "/", + "port":"", + "protocol": "http:", + "search": "", + "username": "" + }, + { + "input": "http://ho\u000Ast/", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href":"http://host/", + "password": "", + "pathname": "/", + "port":"", + "protocol": "http:", + "search": "", + "username": "" + }, + { + "input": "http://ho\u000Dst/", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href":"http://host/", + "password": "", + "pathname": "/", + "port":"", + "protocol": "http:", + "search": "", + "username": "" + }, + "Encoded forbidden domain codepoints in special URLs", + { + "input": "http://ho%00st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%01st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%02st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%03st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%04st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%05st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%06st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%07st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%08st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%09st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%0Ast/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%0Bst/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%0Cst/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%0Dst/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%0Est/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%0Fst/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%10st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%11st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%12st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%13st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%14st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%15st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%16st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%17st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%18st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%19st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%1Ast/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%1Bst/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%1Cst/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%1Dst/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%1Est/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%1Fst/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%20st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%23st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%25st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%2Fst/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%3Ast/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%3Cst/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%3Est/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%3Fst/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%40st/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%5Bst/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%5Cst/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%5Dst/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%7Cst/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://ho%7Fst/", + "base": "about:blank", + "failure": true + }, + "Allowed host/domain code points", + { + "input": "http://!\"$&'()*+,-.;=_`{}~/", + "base": "about:blank", + "href": "http://!\"$&'()*+,-.;=_`{}~/", + "origin": "http://!\"$&'()*+,-.;=_`{}~", + "protocol": "http:", + "username": "", + "password": "", + "host": "!\"$&'()*+,-.;=_`{}~", + "hostname": "!\"$&'()*+,-.;=_`{}~", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "sc://\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u000B\u000C\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F\u007F!\"$%&'()*+,-.;=_`{}~/", + "base": "about:blank", + "href": "sc://%01%02%03%04%05%06%07%08%0B%0C%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%7F!\"$%&'()*+,-.;=_`{}~/", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%01%02%03%04%05%06%07%08%0B%0C%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%7F!\"$%&'()*+,-.;=_`{}~", + "hostname": "%01%02%03%04%05%06%07%08%0B%0C%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%7F!\"$%&'()*+,-.;=_`{}~", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# Hosts and percent-encoding", + { + "input": "ftp://example.com%80/", + "base": "about:blank", + "failure": true + }, + { + "input": "ftp://example.com%A0/", + "base": "about:blank", + "failure": true + }, + { + "input": "https://example.com%80/", + "base": "about:blank", + "failure": true + }, + { + "input": "https://example.com%A0/", + "base": "about:blank", + "failure": true + }, + { + "input": "ftp://%e2%98%83", + "base": "about:blank", + "href": "ftp://xn--n3h/", + "origin": "ftp://xn--n3h", + "protocol": "ftp:", + "username": "", + "password": "", + "host": "xn--n3h", + "hostname": "xn--n3h", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https://%e2%98%83", + "base": "about:blank", + "href": "https://xn--n3h/", + "origin": "https://xn--n3h", + "protocol": "https:", + "username": "", + "password": "", + "host": "xn--n3h", + "hostname": "xn--n3h", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# tests from jsdom/whatwg-url designed for code coverage", + { + "input": "http://127.0.0.1:10100/relative_import.html", + "base": "about:blank", + "href": "http://127.0.0.1:10100/relative_import.html", + "origin": "http://127.0.0.1:10100", + "protocol": "http:", + "username": "", + "password": "", + "host": "127.0.0.1:10100", + "hostname": "127.0.0.1", + "port": "10100", + "pathname": "/relative_import.html", + "search": "", + "hash": "" + }, + { + "input": "http://facebook.com/?foo=%7B%22abc%22", + "base": "about:blank", + "href": "http://facebook.com/?foo=%7B%22abc%22", + "origin": "http://facebook.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "facebook.com", + "hostname": "facebook.com", + "port": "", + "pathname": "/", + "search": "?foo=%7B%22abc%22", + "hash": "" + }, + { + "input": "https://localhost:3000/jqueryui@1.2.3", + "base": "about:blank", + "href": "https://localhost:3000/jqueryui@1.2.3", + "origin": "https://localhost:3000", + "protocol": "https:", + "username": "", + "password": "", + "host": "localhost:3000", + "hostname": "localhost", + "port": "3000", + "pathname": "/jqueryui@1.2.3", + "search": "", + "hash": "" + }, + "# tab/LF/CR", + { + "input": "h\tt\nt\rp://h\to\ns\rt:9\t0\n0\r0/p\ta\nt\rh?q\tu\ne\rry#f\tr\na\rg", + "base": "about:blank", + "href": "http://host:9000/path?query#frag", + "origin": "http://host:9000", + "protocol": "http:", + "username": "", + "password": "", + "host": "host:9000", + "hostname": "host", + "port": "9000", + "pathname": "/path", + "search": "?query", + "hash": "#frag" + }, + "# Stringification of URL.searchParams", + { + "input": "?a=b&c=d", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar?a=b&c=d", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "?a=b&c=d", + "searchParams": "a=b&c=d", + "hash": "" + }, + { + "input": "??a=b&c=d", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar??a=b&c=d", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "??a=b&c=d", + "searchParams": "%3Fa=b&c=d", + "hash": "" + }, + "# Scheme only", + { + "input": "http:", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "searchParams": "", + "hash": "" + }, + { + "input": "http:", + "base": "https://example.org/foo/bar", + "failure": true + }, + { + "input": "sc:", + "base": "https://example.org/foo/bar", + "href": "sc:", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "searchParams": "", + "hash": "" + }, + "# Percent encoding of fragments", + { + "input": "http://foo.bar/baz?qux#foo\bbar", + "base": "about:blank", + "href": "http://foo.bar/baz?qux#foo%08bar", + "origin": "http://foo.bar", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.bar", + "hostname": "foo.bar", + "port": "", + "pathname": "/baz", + "search": "?qux", + "searchParams": "qux=", + "hash": "#foo%08bar" + }, + { + "input": "http://foo.bar/baz?qux#foo\"bar", + "base": "about:blank", + "href": "http://foo.bar/baz?qux#foo%22bar", + "origin": "http://foo.bar", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.bar", + "hostname": "foo.bar", + "port": "", + "pathname": "/baz", + "search": "?qux", + "searchParams": "qux=", + "hash": "#foo%22bar" + }, + { + "input": "http://foo.bar/baz?qux#foobar", + "base": "about:blank", + "href": "http://foo.bar/baz?qux#foo%3Ebar", + "origin": "http://foo.bar", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.bar", + "hostname": "foo.bar", + "port": "", + "pathname": "/baz", + "search": "?qux", + "searchParams": "qux=", + "hash": "#foo%3Ebar" + }, + { + "input": "http://foo.bar/baz?qux#foo`bar", + "base": "about:blank", + "href": "http://foo.bar/baz?qux#foo%60bar", + "origin": "http://foo.bar", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.bar", + "hostname": "foo.bar", + "port": "", + "pathname": "/baz", + "search": "?qux", + "searchParams": "qux=", + "hash": "#foo%60bar" + }, + "# IPv4 parsing (via https://github.com/nodejs/node/pull/10317)", + { + "input": "http://1.2.3.4/", + "base": "http://other.com/", + "href": "http://1.2.3.4/", + "origin": "http://1.2.3.4", + "protocol": "http:", + "username": "", + "password": "", + "host": "1.2.3.4", + "hostname": "1.2.3.4", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://1.2.3.4./", + "base": "http://other.com/", + "href": "http://1.2.3.4/", + "origin": "http://1.2.3.4", + "protocol": "http:", + "username": "", + "password": "", + "host": "1.2.3.4", + "hostname": "1.2.3.4", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://192.168.257", + "base": "http://other.com/", + "href": "http://192.168.1.1/", + "origin": "http://192.168.1.1", + "protocol": "http:", + "username": "", + "password": "", + "host": "192.168.1.1", + "hostname": "192.168.1.1", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://192.168.257.", + "base": "http://other.com/", + "href": "http://192.168.1.1/", + "origin": "http://192.168.1.1", + "protocol": "http:", + "username": "", + "password": "", + "host": "192.168.1.1", + "hostname": "192.168.1.1", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://192.168.257.com", + "base": "http://other.com/", + "href": "http://192.168.257.com/", + "origin": "http://192.168.257.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "192.168.257.com", + "hostname": "192.168.257.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://256", + "base": "http://other.com/", + "href": "http://0.0.1.0/", + "origin": "http://0.0.1.0", + "protocol": "http:", + "username": "", + "password": "", + "host": "0.0.1.0", + "hostname": "0.0.1.0", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://256.com", + "base": "http://other.com/", + "href": "http://256.com/", + "origin": "http://256.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "256.com", + "hostname": "256.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://999999999", + "base": "http://other.com/", + "href": "http://59.154.201.255/", + "origin": "http://59.154.201.255", + "protocol": "http:", + "username": "", + "password": "", + "host": "59.154.201.255", + "hostname": "59.154.201.255", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://999999999.", + "base": "http://other.com/", + "href": "http://59.154.201.255/", + "origin": "http://59.154.201.255", + "protocol": "http:", + "username": "", + "password": "", + "host": "59.154.201.255", + "hostname": "59.154.201.255", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://999999999.com", + "base": "http://other.com/", + "href": "http://999999999.com/", + "origin": "http://999999999.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "999999999.com", + "hostname": "999999999.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://10000000000", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://10000000000.com", + "base": "http://other.com/", + "href": "http://10000000000.com/", + "origin": "http://10000000000.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "10000000000.com", + "hostname": "10000000000.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://4294967295", + "base": "http://other.com/", + "href": "http://255.255.255.255/", + "origin": "http://255.255.255.255", + "protocol": "http:", + "username": "", + "password": "", + "host": "255.255.255.255", + "hostname": "255.255.255.255", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://4294967296", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://0xffffffff", + "base": "http://other.com/", + "href": "http://255.255.255.255/", + "origin": "http://255.255.255.255", + "protocol": "http:", + "username": "", + "password": "", + "host": "255.255.255.255", + "hostname": "255.255.255.255", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://0xffffffff1", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://256.256.256.256", + "base": "http://other.com/", + "failure": true + }, + { + "input": "https://0x.0x.0", + "base": "about:blank", + "href": "https://0.0.0.0/", + "origin": "https://0.0.0.0", + "protocol": "https:", + "username": "", + "password": "", + "host": "0.0.0.0", + "hostname": "0.0.0.0", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "More IPv4 parsing (via https://github.com/jsdom/whatwg-url/issues/92)", + { + "input": "https://0x100000000/test", + "base": "about:blank", + "failure": true + }, + { + "input": "https://256.0.0.1/test", + "base": "about:blank", + "failure": true + }, + "# file URLs containing percent-encoded Windows drive letters (shouldn't work)", + { + "input": "file:///C%3A/", + "base": "about:blank", + "href": "file:///C%3A/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C%3A/", + "search": "", + "hash": "" + }, + { + "input": "file:///C%7C/", + "base": "about:blank", + "href": "file:///C%7C/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C%7C/", + "search": "", + "hash": "" + }, + { + "input": "file://%43%3A", + "base": "about:blank", + "failure": true + }, + { + "input": "file://%43%7C", + "base": "about:blank", + "failure": true + }, + { + "input": "file://%43|", + "base": "about:blank", + "failure": true + }, + { + "input": "file://C%7C", + "base": "about:blank", + "failure": true + }, + { + "input": "file://%43%7C/", + "base": "about:blank", + "failure": true + }, + { + "input": "https://%43%7C/", + "base": "about:blank", + "failure": true + }, + { + "input": "asdf://%43|/", + "base": "about:blank", + "failure": true + }, + { + "input": "asdf://%43%7C/", + "base": "about:blank", + "href": "asdf://%43%7C/", + "origin": "null", + "protocol": "asdf:", + "username": "", + "password": "", + "host": "%43%7C", + "hostname": "%43%7C", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# file URLs relative to other file URLs (via https://github.com/jsdom/whatwg-url/pull/60)", + { + "input": "pix/submit.gif", + "base": "file:///C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/anchor.html", + "href": "file:///C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/pix/submit.gif", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/pix/submit.gif", + "search": "", + "hash": "" + }, + { + "input": "..", + "base": "file:///C:/", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "..", + "base": "file:///", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# More file URL tests by zcorpan and annevk", + { + "input": "/", + "base": "file:///C:/a/b", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "/", + "base": "file://h/a/b", + "href": "file://h/", + "protocol": "file:", + "username": "", + "password": "", + "host": "h", + "hostname": "h", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "//d:", + "base": "file:///C:/a/b", + "href": "file:///d:", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/d:", + "search": "", + "hash": "" + }, + { + "input": "//d:/..", + "base": "file:///C:/a/b", + "href": "file:///d:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/d:/", + "search": "", + "hash": "" + }, + { + "input": "..", + "base": "file:///ab:/", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "..", + "base": "file:///1:/", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "", + "base": "file:///test?test#test", + "href": "file:///test?test", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "?test", + "hash": "" + }, + { + "input": "file:", + "base": "file:///test?test#test", + "href": "file:///test?test", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "?test", + "hash": "" + }, + { + "input": "?x", + "base": "file:///test?test#test", + "href": "file:///test?x", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "?x", + "hash": "" + }, + { + "input": "file:?x", + "base": "file:///test?test#test", + "href": "file:///test?x", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "?x", + "hash": "" + }, + { + "input": "#x", + "base": "file:///test?test#test", + "href": "file:///test?test#x", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "?test", + "hash": "#x" + }, + { + "input": "file:#x", + "base": "file:///test?test#test", + "href": "file:///test?test#x", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "?test", + "hash": "#x" + }, + "# File URLs and many (back)slashes", + { + "input": "file:///localhost//cat", + "base": "about:blank", + "href": "file:///localhost//cat", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/localhost//cat", + "search": "", + "hash": "" + }, + { + "input": "\\//pig", + "base": "file://lion/", + "href": "file:///pig", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pig", + "search": "", + "hash": "" + }, + { + "input": "file://", + "base": "file://ape/", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# File URLs with non-empty hosts", + { + "input": "/rooibos", + "base": "file://tea/", + "href": "file://tea/rooibos", + "protocol": "file:", + "username": "", + "password": "", + "host": "tea", + "hostname": "tea", + "port": "", + "pathname": "/rooibos", + "search": "", + "hash": "" + }, + { + "input": "/?chai", + "base": "file://tea/", + "href": "file://tea/?chai", + "protocol": "file:", + "username": "", + "password": "", + "host": "tea", + "hostname": "tea", + "port": "", + "pathname": "/", + "search": "?chai", + "hash": "" + }, + "# Windows drive letter handling with the 'file:' base URL", + { + "input": "C", + "base": "file://host/dir/file", + "href": "file://host/dir/C", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/dir/C", + "search": "", + "hash": "" + }, + { + "input": "C|a", + "base": "file://host/dir/file", + "href": "file://host/dir/C|a", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/dir/C|a", + "search": "", + "hash": "" + }, + "# Windows drive letter quirk in the file slash state", + { + "input": "/c:/foo/bar", + "base": "file:///c:/baz/qux", + "href": "file:///c:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/c:/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "/c|/foo/bar", + "base": "file:///c:/baz/qux", + "href": "file:///c:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/c:/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "file:\\c:\\foo\\bar", + "base": "file:///c:/baz/qux", + "href": "file:///c:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/c:/foo/bar", + "search": "", + "hash": "" + }, + "# Copy the empty host from the input in the following cases", + { + "input": "//C:/", + "base": "file://host/", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file://C:/", + "base": "file://host/", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "///C:/", + "base": "file://host/", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file:///C:/", + "base": "file://host/", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + "# Windows drive letter quirk (no host)", + { + "input": "file:/C|/", + "base": "about:blank", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file://C|/", + "base": "about:blank", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + "# file URLs without base URL by Rimas Misevičius", + { + "input": "file:", + "base": "about:blank", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:?q=v", + "base": "about:blank", + "href": "file:///?q=v", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "?q=v", + "hash": "" + }, + { + "input": "file:#frag", + "base": "about:blank", + "href": "file:///#frag", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "#frag" + }, + "# file: drive letter cases from https://crbug.com/1078698", + { + "input": "file:///Y:", + "base": "about:blank", + "href": "file:///Y:", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/Y:", + "search": "", + "hash": "" + }, + { + "input": "file:///Y:/", + "base": "about:blank", + "href": "file:///Y:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/Y:/", + "search": "", + "hash": "" + }, + { + "input": "file:///./Y", + "base": "about:blank", + "href": "file:///Y", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/Y", + "search": "", + "hash": "" + }, + { + "input": "file:///./Y:", + "base": "about:blank", + "href": "file:///Y:", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/Y:", + "search": "", + "hash": "" + }, + { + "input": "\\\\\\.\\Y:", + "base": "about:blank", + "failure": true, + "inputCanBeRelative": true + }, + "# file: drive letter cases from https://crbug.com/1078698 but lowercased", + { + "input": "file:///y:", + "base": "about:blank", + "href": "file:///y:", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/y:", + "search": "", + "hash": "" + }, + { + "input": "file:///y:/", + "base": "about:blank", + "href": "file:///y:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/y:/", + "search": "", + "hash": "" + }, + { + "input": "file:///./y", + "base": "about:blank", + "href": "file:///y", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/y", + "search": "", + "hash": "" + }, + { + "input": "file:///./y:", + "base": "about:blank", + "href": "file:///y:", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/y:", + "search": "", + "hash": "" + }, + { + "input": "\\\\\\.\\y:", + "base": "about:blank", + "failure": true, + "inputCanBeRelative": true + }, + "# Additional file URL tests for (https://github.com/whatwg/url/issues/405)", + { + "input": "file:///one/two", + "base": "file:///", + "href": "file:///one/two", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/one/two", + "search": "", + "hash": "" + }, + { + "input": "//one/two", + "base": "file:///", + "href": "file://one/two", + "protocol": "file:", + "username": "", + "password": "", + "host": "one", + "hostname": "one", + "port": "", + "pathname": "/two", + "search": "", + "hash": "" + }, + { + "input": "///one/two", + "base": "file:///", + "href": "file:///one/two", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/one/two", + "search": "", + "hash": "" + }, + "# IPv6 tests", + { + "input": "http://[1:0::]", + "base": "http://example.net/", + "href": "http://[1::]/", + "origin": "http://[1::]", + "protocol": "http:", + "username": "", + "password": "", + "host": "[1::]", + "hostname": "[1::]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://[0:1:2:3:4:5:6:7:8]", + "base": "http://example.net/", + "failure": true + }, + { + "input": "https://[0::0::0]", + "base": "about:blank", + "failure": true + }, + { + "input": "https://[0:.0]", + "base": "about:blank", + "failure": true + }, + { + "input": "https://[0:0:]", + "base": "about:blank", + "failure": true + }, + { + "input": "https://[0:1:2:3:4:5:6:7.0.0.0.1]", + "base": "about:blank", + "failure": true + }, + { + "input": "https://[0:1.00.0.0.0]", + "base": "about:blank", + "failure": true + }, + { + "input": "https://[0:1.290.0.0.0]", + "base": "about:blank", + "failure": true + }, + { + "input": "https://[0:1.23.23]", + "base": "about:blank", + "failure": true + }, + "# Empty host", + { + "input": "http://?", + "base": "about:blank", + "failure": true + }, + { + "input": "http://#", + "base": "about:blank", + "failure": true + }, + "Port overflow (2^32 + 81)", + { + "input": "http://f:4294967377/c", + "base": "http://example.org/", + "failure": true + }, + "Port overflow (2^64 + 81)", + { + "input": "http://f:18446744073709551697/c", + "base": "http://example.org/", + "failure": true + }, + "Port overflow (2^128 + 81)", + { + "input": "http://f:340282366920938463463374607431768211537/c", + "base": "http://example.org/", + "failure": true + }, + "# Non-special-URL path tests", + { + "input": "sc://ñ", + "base": "about:blank", + "href": "sc://%C3%B1", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, + { + "input": "sc://ñ?x", + "base": "about:blank", + "href": "sc://%C3%B1?x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "?x", + "hash": "" + }, + { + "input": "sc://ñ#x", + "base": "about:blank", + "href": "sc://%C3%B1#x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "", + "hash": "#x" + }, + { + "input": "#x", + "base": "sc://ñ", + "href": "sc://%C3%B1#x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "", + "hash": "#x" + }, + { + "input": "?x", + "base": "sc://ñ", + "href": "sc://%C3%B1?x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "?x", + "hash": "" + }, + { + "input": "sc://?", + "base": "about:blank", + "href": "sc://?", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, + { + "input": "sc://#", + "base": "about:blank", + "href": "sc://#", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, + { + "input": "///", + "base": "sc://x/", + "href": "sc:///", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "////", + "base": "sc://x/", + "href": "sc:////", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "////x/", + "base": "sc://x/", + "href": "sc:////x/", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//x/", + "search": "", + "hash": "" + }, + { + "input": "tftp://foobar.com/someconfig;mode=netascii", + "base": "about:blank", + "href": "tftp://foobar.com/someconfig;mode=netascii", + "origin": "null", + "protocol": "tftp:", + "username": "", + "password": "", + "host": "foobar.com", + "hostname": "foobar.com", + "port": "", + "pathname": "/someconfig;mode=netascii", + "search": "", + "hash": "" + }, + { + "input": "telnet://user:pass@foobar.com:23/", + "base": "about:blank", + "href": "telnet://user:pass@foobar.com:23/", + "origin": "null", + "protocol": "telnet:", + "username": "user", + "password": "pass", + "host": "foobar.com:23", + "hostname": "foobar.com", + "port": "23", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ut2004://10.10.10.10:7777/Index.ut2", + "base": "about:blank", + "href": "ut2004://10.10.10.10:7777/Index.ut2", + "origin": "null", + "protocol": "ut2004:", + "username": "", + "password": "", + "host": "10.10.10.10:7777", + "hostname": "10.10.10.10", + "port": "7777", + "pathname": "/Index.ut2", + "search": "", + "hash": "" + }, + { + "input": "redis://foo:bar@somehost:6379/0?baz=bam&qux=baz", + "base": "about:blank", + "href": "redis://foo:bar@somehost:6379/0?baz=bam&qux=baz", + "origin": "null", + "protocol": "redis:", + "username": "foo", + "password": "bar", + "host": "somehost:6379", + "hostname": "somehost", + "port": "6379", + "pathname": "/0", + "search": "?baz=bam&qux=baz", + "hash": "" + }, + { + "input": "rsync://foo@host:911/sup", + "base": "about:blank", + "href": "rsync://foo@host:911/sup", + "origin": "null", + "protocol": "rsync:", + "username": "foo", + "password": "", + "host": "host:911", + "hostname": "host", + "port": "911", + "pathname": "/sup", + "search": "", + "hash": "" + }, + { + "input": "git://github.com/foo/bar.git", + "base": "about:blank", + "href": "git://github.com/foo/bar.git", + "origin": "null", + "protocol": "git:", + "username": "", + "password": "", + "host": "github.com", + "hostname": "github.com", + "port": "", + "pathname": "/foo/bar.git", + "search": "", + "hash": "" + }, + { + "input": "irc://myserver.com:6999/channel?passwd", + "base": "about:blank", + "href": "irc://myserver.com:6999/channel?passwd", + "origin": "null", + "protocol": "irc:", + "username": "", + "password": "", + "host": "myserver.com:6999", + "hostname": "myserver.com", + "port": "6999", + "pathname": "/channel", + "search": "?passwd", + "hash": "" + }, + { + "input": "dns://fw.example.org:9999/foo.bar.org?type=TXT", + "base": "about:blank", + "href": "dns://fw.example.org:9999/foo.bar.org?type=TXT", + "origin": "null", + "protocol": "dns:", + "username": "", + "password": "", + "host": "fw.example.org:9999", + "hostname": "fw.example.org", + "port": "9999", + "pathname": "/foo.bar.org", + "search": "?type=TXT", + "hash": "" + }, + { + "input": "ldap://localhost:389/ou=People,o=JNDITutorial", + "base": "about:blank", + "href": "ldap://localhost:389/ou=People,o=JNDITutorial", + "origin": "null", + "protocol": "ldap:", + "username": "", + "password": "", + "host": "localhost:389", + "hostname": "localhost", + "port": "389", + "pathname": "/ou=People,o=JNDITutorial", + "search": "", + "hash": "" + }, + { + "input": "git+https://github.com/foo/bar", + "base": "about:blank", + "href": "git+https://github.com/foo/bar", + "origin": "null", + "protocol": "git+https:", + "username": "", + "password": "", + "host": "github.com", + "hostname": "github.com", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "urn:ietf:rfc:2648", + "base": "about:blank", + "href": "urn:ietf:rfc:2648", + "origin": "null", + "protocol": "urn:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "ietf:rfc:2648", + "search": "", + "hash": "" + }, + { + "input": "tag:joe@example.org,2001:foo/bar", + "base": "about:blank", + "href": "tag:joe@example.org,2001:foo/bar", + "origin": "null", + "protocol": "tag:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "joe@example.org,2001:foo/bar", + "search": "", + "hash": "" + }, + "# percent encoded hosts in non-special-URLs", + { + "input": "non-special://%E2%80%A0/", + "base": "about:blank", + "href": "non-special://%E2%80%A0/", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "%E2%80%A0", + "hostname": "%E2%80%A0", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "non-special://H%4fSt/path", + "base": "about:blank", + "href": "non-special://H%4fSt/path", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "H%4fSt", + "hostname": "H%4fSt", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, + "# IPv6 in non-special-URLs", + { + "input": "non-special://[1:2:0:0:5:0:0:0]/", + "base": "about:blank", + "href": "non-special://[1:2:0:0:5::]/", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "[1:2:0:0:5::]", + "hostname": "[1:2:0:0:5::]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "non-special://[1:2:0:0:0:0:0:3]/", + "base": "about:blank", + "href": "non-special://[1:2::3]/", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "[1:2::3]", + "hostname": "[1:2::3]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "non-special://[1:2::3]:80/", + "base": "about:blank", + "href": "non-special://[1:2::3]:80/", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "[1:2::3]:80", + "hostname": "[1:2::3]", + "port": "80", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "non-special://[:80/", + "base": "about:blank", + "failure": true + }, + { + "input": "blob:https://example.com:443/", + "base": "about:blank", + "href": "blob:https://example.com:443/", + "origin": "https://example.com", + "protocol": "blob:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "https://example.com:443/", + "search": "", + "hash": "" + }, + { + "input": "blob:d3958f5c-0777-0845-9dcf-2cb28783acaf", + "base": "about:blank", + "href": "blob:d3958f5c-0777-0845-9dcf-2cb28783acaf", + "origin": "null", + "protocol": "blob:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "d3958f5c-0777-0845-9dcf-2cb28783acaf", + "search": "", + "hash": "" + }, + { + "input": "blob:", + "base": "about:blank", + "href": "blob:", + "origin": "null", + "protocol": "blob:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, + "Invalid IPv4 radix digits", + { + "input": "http://0x7f.0.0.0x7g", + "base": "about:blank", + "href": "http://0x7f.0.0.0x7g/", + "protocol": "http:", + "username": "", + "password": "", + "host": "0x7f.0.0.0x7g", + "hostname": "0x7f.0.0.0x7g", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://0X7F.0.0.0X7G", + "base": "about:blank", + "href": "http://0x7f.0.0.0x7g/", + "protocol": "http:", + "username": "", + "password": "", + "host": "0x7f.0.0.0x7g", + "hostname": "0x7f.0.0.0x7g", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Invalid IPv4 portion of IPv6 address", + { + "input": "http://[::127.0.0.0.1]", + "base": "about:blank", + "failure": true + }, + "Uncompressed IPv6 addresses with 0", + { + "input": "http://[0:1:0:1:0:1:0:1]", + "base": "about:blank", + "href": "http://[0:1:0:1:0:1:0:1]/", + "protocol": "http:", + "username": "", + "password": "", + "host": "[0:1:0:1:0:1:0:1]", + "hostname": "[0:1:0:1:0:1:0:1]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://[1:0:1:0:1:0:1:0]", + "base": "about:blank", + "href": "http://[1:0:1:0:1:0:1:0]/", + "protocol": "http:", + "username": "", + "password": "", + "host": "[1:0:1:0:1:0:1:0]", + "hostname": "[1:0:1:0:1:0:1:0]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Percent-encoded query and fragment", + { + "input": "http://example.org/test?\u0022", + "base": "about:blank", + "href": "http://example.org/test?%22", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?%22", + "hash": "" + }, + { + "input": "http://example.org/test?\u0023", + "base": "about:blank", + "href": "http://example.org/test?#", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "", + "hash": "" + }, + { + "input": "http://example.org/test?\u003C", + "base": "about:blank", + "href": "http://example.org/test?%3C", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?%3C", + "hash": "" + }, + { + "input": "http://example.org/test?\u003E", + "base": "about:blank", + "href": "http://example.org/test?%3E", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?%3E", + "hash": "" + }, + { + "input": "http://example.org/test?\u2323", + "base": "about:blank", + "href": "http://example.org/test?%E2%8C%A3", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?%E2%8C%A3", + "hash": "" + }, + { + "input": "http://example.org/test?%23%23", + "base": "about:blank", + "href": "http://example.org/test?%23%23", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?%23%23", + "hash": "" + }, + { + "input": "http://example.org/test?%GH", + "base": "about:blank", + "href": "http://example.org/test?%GH", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?%GH", + "hash": "" + }, + { + "input": "http://example.org/test?a#%EF", + "base": "about:blank", + "href": "http://example.org/test?a#%EF", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?a", + "hash": "#%EF" + }, + { + "input": "http://example.org/test?a#%GH", + "base": "about:blank", + "href": "http://example.org/test?a#%GH", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?a", + "hash": "#%GH" + }, + "URLs that require a non-about:blank base. (Also serve as invalid base tests.)", + { + "input": "a", + "base": "about:blank", + "failure": true, + "inputCanBeRelative": true + }, + { + "input": "a/", + "base": "about:blank", + "failure": true, + "inputCanBeRelative": true + }, + { + "input": "a//", + "base": "about:blank", + "failure": true, + "inputCanBeRelative": true + }, + "Bases that don't fail to parse but fail to be bases", + { + "input": "test-a-colon.html", + "base": "a:", + "failure": true + }, + { + "input": "test-a-colon-b.html", + "base": "a:b", + "failure": true + }, + "Other base URL tests, that must succeed", + { + "input": "test-a-colon-slash.html", + "base": "a:/", + "href": "a:/test-a-colon-slash.html", + "protocol": "a:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test-a-colon-slash.html", + "search": "", + "hash": "" + }, + { + "input": "test-a-colon-slash-slash.html", + "base": "a://", + "href": "a:///test-a-colon-slash-slash.html", + "protocol": "a:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test-a-colon-slash-slash.html", + "search": "", + "hash": "" + }, + { + "input": "test-a-colon-slash-b.html", + "base": "a:/b", + "href": "a:/test-a-colon-slash-b.html", + "protocol": "a:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test-a-colon-slash-b.html", + "search": "", + "hash": "" + }, + { + "input": "test-a-colon-slash-slash-b.html", + "base": "a://b", + "href": "a://b/test-a-colon-slash-slash-b.html", + "protocol": "a:", + "username": "", + "password": "", + "host": "b", + "hostname": "b", + "port": "", + "pathname": "/test-a-colon-slash-slash-b.html", + "search": "", + "hash": "" + }, + "Null code point in fragment", + { + "input": "http://example.org/test?a#b\u0000c", + "base": "about:blank", + "href": "http://example.org/test?a#b%00c", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?a", + "hash": "#b%00c" + }, + { + "input": "non-spec://example.org/test?a#b\u0000c", + "base": "about:blank", + "href": "non-spec://example.org/test?a#b%00c", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?a", + "hash": "#b%00c" + }, + { + "input": "non-spec:/test?a#b\u0000c", + "base": "about:blank", + "href": "non-spec:/test?a#b%00c", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "?a", + "hash": "#b%00c" + }, + "First scheme char - not allowed: https://github.com/whatwg/url/issues/464", + { + "input": "10.0.0.7:8080/foo.html", + "base": "file:///some/dir/bar.html", + "href": "file:///some/dir/10.0.0.7:8080/foo.html", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/some/dir/10.0.0.7:8080/foo.html", + "search": "", + "hash": "" + }, + "Subsequent scheme chars - not allowed", + { + "input": "a!@$*=/foo.html", + "base": "file:///some/dir/bar.html", + "href": "file:///some/dir/a!@$*=/foo.html", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/some/dir/a!@$*=/foo.html", + "search": "", + "hash": "" + }, + "First and subsequent scheme chars - allowed", + { + "input": "a1234567890-+.:foo/bar", + "base": "http://example.com/dir/file", + "href": "a1234567890-+.:foo/bar", + "protocol": "a1234567890-+.:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "foo/bar", + "search": "", + "hash": "" + }, + "IDNA ignored code points in file URLs hosts", + { + "input": "file://a\u00ADb/p", + "base": "about:blank", + "href": "file://ab/p", + "protocol": "file:", + "username": "", + "password": "", + "host": "ab", + "hostname": "ab", + "port": "", + "pathname": "/p", + "search": "", + "hash": "" + }, + { + "input": "file://a%C2%ADb/p", + "base": "about:blank", + "href": "file://ab/p", + "protocol": "file:", + "username": "", + "password": "", + "host": "ab", + "hostname": "ab", + "port": "", + "pathname": "/p", + "search": "", + "hash": "" + }, + "Empty host after the domain to ASCII", + { + "input": "file://\u00ad/p", + "base": "about:blank", + "failure": true + }, + { + "input": "file://%C2%AD/p", + "base": "about:blank", + "failure": true + }, + { + "input": "file://xn--/p", + "base": "about:blank", + "failure": true + }, + "https://bugzilla.mozilla.org/show_bug.cgi?id=1647058", + { + "input": "#link", + "base": "https://example.org/##link", + "href": "https://example.org/#link", + "protocol": "https:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/", + "search": "", + "hash": "#link" + }, + "UTF-8 percent-encode of C0 control percent-encode set and supersets", + { + "input": "non-special:cannot-be-a-base-url-\u0000\u0001\u001F\u001E\u007E\u007F\u0080", + "base": "about:blank", + "hash": "", + "host": "", + "hostname": "", + "href": "non-special:cannot-be-a-base-url-%00%01%1F%1E~%7F%C2%80", + "origin": "null", + "password": "", + "pathname": "cannot-be-a-base-url-%00%01%1F%1E~%7F%C2%80", + "port": "", + "protocol": "non-special:", + "search": "", + "username": "" + }, + { + "input": "https://www.example.com/path{\u007Fpath.html?query'\u007F=query#fragment<\u007Ffragment", + "base": "about:blank", + "hash": "#fragment%3C%7Ffragment", + "host": "www.example.com", + "hostname": "www.example.com", + "href": "https://www.example.com/path%7B%7Fpath.html?query%27%7F=query#fragment%3C%7Ffragment", + "origin": "https://www.example.com", + "password": "", + "pathname": "/path%7B%7Fpath.html", + "port": "", + "protocol": "https:", + "search": "?query%27%7F=query", + "username": "" + }, + { + "input": "https://user:pass[\u007F@foo/bar", + "base": "http://example.org", + "hash": "", + "host": "foo", + "hostname": "foo", + "href": "https://user:pass%5B%7F@foo/bar", + "origin": "https://foo", + "password": "pass%5B%7F", + "pathname": "/bar", + "port": "", + "protocol": "https:", + "search": "", + "username": "user" + }, + "Tests for the distinct percent-encode sets", + { + "input": "foo:// !\"$%&'()*+,-.;<=>@[\\]^_`{|}~@host/", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "foo://%20!%22$%&'()*+,-.%3B%3C%3D%3E%40%5B%5C%5D%5E_%60%7B%7C%7D~@host/", + "origin": "null", + "password": "", + "pathname": "/", + "port":"", + "protocol": "foo:", + "search": "", + "username": "%20!%22$%&'()*+,-.%3B%3C%3D%3E%40%5B%5C%5D%5E_%60%7B%7C%7D~" + }, + { + "input": "wss:// !\"$%&'()*+,-.;<=>@[]^_`{|}~@host/", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "wss://%20!%22$%&'()*+,-.%3B%3C%3D%3E%40%5B%5D%5E_%60%7B%7C%7D~@host/", + "origin": "wss://host", + "password": "", + "pathname": "/", + "port":"", + "protocol": "wss:", + "search": "", + "username": "%20!%22$%&'()*+,-.%3B%3C%3D%3E%40%5B%5D%5E_%60%7B%7C%7D~" + }, + { + "input": "foo://joe: !\"$%&'()*+,-.:;<=>@[\\]^_`{|}~@host/", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "foo://joe:%20!%22$%&'()*+,-.%3A%3B%3C%3D%3E%40%5B%5C%5D%5E_%60%7B%7C%7D~@host/", + "origin": "null", + "password": "%20!%22$%&'()*+,-.%3A%3B%3C%3D%3E%40%5B%5C%5D%5E_%60%7B%7C%7D~", + "pathname": "/", + "port":"", + "protocol": "foo:", + "search": "", + "username": "joe" + }, + { + "input": "wss://joe: !\"$%&'()*+,-.:;<=>@[]^_`{|}~@host/", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "wss://joe:%20!%22$%&'()*+,-.%3A%3B%3C%3D%3E%40%5B%5D%5E_%60%7B%7C%7D~@host/", + "origin": "wss://host", + "password": "%20!%22$%&'()*+,-.%3A%3B%3C%3D%3E%40%5B%5D%5E_%60%7B%7C%7D~", + "pathname": "/", + "port":"", + "protocol": "wss:", + "search": "", + "username": "joe" + }, + { + "input": "foo://!\"$%&'()*+,-.;=_`{}~/", + "base": "about:blank", + "hash": "", + "host": "!\"$%&'()*+,-.;=_`{}~", + "hostname": "!\"$%&'()*+,-.;=_`{}~", + "href":"foo://!\"$%&'()*+,-.;=_`{}~/", + "origin": "null", + "password": "", + "pathname": "/", + "port":"", + "protocol": "foo:", + "search": "", + "username": "" + }, + { + "input": "wss://!\"$&'()*+,-.;=_`{}~/", + "base": "about:blank", + "hash": "", + "host": "!\"$&'()*+,-.;=_`{}~", + "hostname": "!\"$&'()*+,-.;=_`{}~", + "href":"wss://!\"$&'()*+,-.;=_`{}~/", + "origin": "wss://!\"$&'()*+,-.;=_`{}~", + "password": "", + "pathname": "/", + "port":"", + "protocol": "wss:", + "search": "", + "username": "" + }, + { + "input": "foo://host/ !\"$%&'()*+,-./:;<=>@[\\]^_`{|}~", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "foo://host/%20!%22$%&'()*+,-./:;%3C=%3E@[\\]^_%60%7B|%7D~", + "origin": "null", + "password": "", + "pathname": "/%20!%22$%&'()*+,-./:;%3C=%3E@[\\]^_%60%7B|%7D~", + "port":"", + "protocol": "foo:", + "search": "", + "username": "" + }, + { + "input": "wss://host/ !\"$%&'()*+,-./:;<=>@[\\]^_`{|}~", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "wss://host/%20!%22$%&'()*+,-./:;%3C=%3E@[/]^_%60%7B|%7D~", + "origin": "wss://host", + "password": "", + "pathname": "/%20!%22$%&'()*+,-./:;%3C=%3E@[/]^_%60%7B|%7D~", + "port":"", + "protocol": "wss:", + "search": "", + "username": "" + }, + { + "input": "foo://host/dir/? !\"$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "foo://host/dir/?%20!%22$%&'()*+,-./:;%3C=%3E?@[\\]^_`{|}~", + "origin": "null", + "password": "", + "pathname": "/dir/", + "port":"", + "protocol": "foo:", + "search": "?%20!%22$%&'()*+,-./:;%3C=%3E?@[\\]^_`{|}~", + "username": "" + }, + { + "input": "wss://host/dir/? !\"$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + "base": "about:blank", + "hash": "", + "host": "host", + "hostname": "host", + "href": "wss://host/dir/?%20!%22$%&%27()*+,-./:;%3C=%3E?@[\\]^_`{|}~", + "origin": "wss://host", + "password": "", + "pathname": "/dir/", + "port":"", + "protocol": "wss:", + "search": "?%20!%22$%&%27()*+,-./:;%3C=%3E?@[\\]^_`{|}~", + "username": "" + }, + { + "input": "foo://host/dir/# !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + "base": "about:blank", + "hash": "#%20!%22#$%&'()*+,-./:;%3C=%3E?@[\\]^_%60{|}~", + "host": "host", + "hostname": "host", + "href": "foo://host/dir/#%20!%22#$%&'()*+,-./:;%3C=%3E?@[\\]^_%60{|}~", + "origin": "null", + "password": "", + "pathname": "/dir/", + "port":"", + "protocol": "foo:", + "search": "", + "username": "" + }, + { + "input": "wss://host/dir/# !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + "base": "about:blank", + "hash": "#%20!%22#$%&'()*+,-./:;%3C=%3E?@[\\]^_%60{|}~", + "host": "host", + "hostname": "host", + "href": "wss://host/dir/#%20!%22#$%&'()*+,-./:;%3C=%3E?@[\\]^_%60{|}~", + "origin": "wss://host", + "password": "", + "pathname": "/dir/", + "port":"", + "protocol": "wss:", + "search": "", + "username": "" + }, + "Ensure that input schemes are not ignored when resolving non-special URLs", + { + "input": "abc:rootless", + "base": "abc://host/path", + "hash": "", + "host": "", + "hostname": "", + "href":"abc:rootless", + "password": "", + "pathname": "rootless", + "port":"", + "protocol": "abc:", + "search": "", + "username": "" + }, + { + "input": "abc:rootless", + "base": "abc:/path", + "hash": "", + "host": "", + "hostname": "", + "href":"abc:rootless", + "password": "", + "pathname": "rootless", + "port":"", + "protocol": "abc:", + "search": "", + "username": "" + }, + { + "input": "abc:rootless", + "base": "abc:path", + "hash": "", + "host": "", + "hostname": "", + "href":"abc:rootless", + "password": "", + "pathname": "rootless", + "port":"", + "protocol": "abc:", + "search": "", + "username": "" + }, + { + "input": "abc:/rooted", + "base": "abc://host/path", + "hash": "", + "host": "", + "hostname": "", + "href":"abc:/rooted", + "password": "", + "pathname": "/rooted", + "port":"", + "protocol": "abc:", + "search": "", + "username": "" + }, + "Empty query and fragment with blank should throw an error", + { + "input": "#", + "base": null, + "failure": true + }, + { + "input": "?", + "base": null, + "failure": true + }, + "Last component looks like a number, but not valid IPv4", + { + "input": "http://1.2.3.4.5", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://1.2.3.4.5.", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://0..0x300/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://0..0x300./", + "base": "about:blank", + "failure": true + }, + { + "input": "http://256.256.256.256.256", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://256.256.256.256.256.", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://1.2.3.08", + "base": "about:blank", + "failure": true + }, + { + "input": "http://1.2.3.08.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://1.2.3.09", + "base": "about:blank", + "failure": true + }, + { + "input": "http://09.2.3.4", + "base": "about:blank", + "failure": true + }, + { + "input": "http://09.2.3.4.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://01.2.3.4.5", + "base": "about:blank", + "failure": true + }, + { + "input": "http://01.2.3.4.5.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://0x100.2.3.4", + "base": "about:blank", + "failure": true + }, + { + "input": "http://0x100.2.3.4.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://0x1.2.3.4.5", + "base": "about:blank", + "failure": true + }, + { + "input": "http://0x1.2.3.4.5.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.1.2.3.4", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.1.2.3.4.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.2.3.4", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.2.3.4.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.09", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.09.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.0x4", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.0x4.", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.09..", + "base": "about:blank", + "hash": "", + "host": "foo.09..", + "hostname": "foo.09..", + "href":"http://foo.09../", + "password": "", + "pathname": "/", + "port":"", + "protocol": "http:", + "search": "", + "username": "" + }, + { + "input": "http://0999999999999999999/", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.0x", + "base": "about:blank", + "failure": true + }, + { + "input": "http://foo.0XFfFfFfFfFfFfFfFfFfAcE123", + "base": "about:blank", + "failure": true + }, + { + "input": "http://💩.123/", + "base": "about:blank", + "failure": true + } +] -- 2.7.4