1 // Copyright 2015 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "url/scheme_host_port.h"
11 #include <string_view>
14 #include "base/check_op.h"
15 #include "base/containers/contains.h"
16 #include "base/notreached.h"
17 #include "base/numerics/safe_conversions.h"
18 #include "base/strings/string_number_conversions.h"
19 #include "base/trace_event/memory_usage_estimator.h"
21 #include "url/third_party/mozilla/url_parse.h"
22 #include "url/url_canon.h"
23 #include "url/url_canon_stdstring.h"
24 #include "url/url_constants.h"
25 #include "url/url_util.h"
31 bool IsCanonicalHost(const std::string_view& host) {
32 std::string canon_host;
34 // Try to canonicalize the host (copy/pasted from net/base. :( ).
35 const Component raw_host_component(0,
36 base::checked_cast<int>(host.length()));
37 StdStringCanonOutput canon_host_output(&canon_host);
38 CanonHostInfo host_info;
39 CanonicalizeHostVerbose(host.data(), raw_host_component,
40 &canon_host_output, &host_info);
42 if (host_info.out_host.is_nonempty() &&
43 host_info.family != CanonHostInfo::BROKEN) {
44 // Success! Assert that there's no extra garbage.
45 canon_host_output.Complete();
46 DCHECK_EQ(host_info.out_host.len, static_cast<int>(canon_host.length()));
48 // Empty host, or canonicalization failed.
52 return host == canon_host;
55 // Note: When changing IsValidInput, consider also updating
56 // ShouldTreatAsOpaqueOrigin in Blink (there might be existing differences in
57 // behavior between these 2 layers, but we should avoid introducing new
59 bool IsValidInput(const std::string_view& scheme,
60 const std::string_view& host,
62 SchemeHostPort::ConstructPolicy policy) {
63 // Empty schemes are never valid.
67 // about:blank and other no-access schemes translate into an opaque origin.
68 // This helps consistency with ShouldTreatAsOpaqueOrigin in Blink.
69 if (base::Contains(GetNoAccessSchemes(), scheme))
72 SchemeType scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
73 bool is_standard = GetStandardSchemeType(
75 Component(0, base::checked_cast<int>(scheme.length())),
78 // To be consistent with ShouldTreatAsOpaqueOrigin in Blink, local
79 // non-standard schemes are currently allowed to be tuple origins.
80 // Nonstandard schemes don't have hostnames, so their tuple is just
81 // ("protocol", "", 0).
83 // TODO: Migrate "content:" and "externalfile:" to be standard schemes, and
84 // remove this local scheme exception.
85 if (base::Contains(GetLocalSchemes(), scheme) && host.empty() && port == 0)
88 // Otherwise, allow non-standard schemes only if the Android WebView
89 // workaround is enabled.
90 return AllowNonStandardSchemesForAndroidWebView();
93 #if BUILDFLAG(IS_TIZEN_TV)
94 if (scheme == kDvbScheme || scheme == kOpAppScheme ||
95 scheme == kTVKeyScheme || scheme == kHbbTVCarouselScheme ||
100 switch (scheme_type) {
101 case SCHEME_WITH_HOST_AND_PORT:
102 case SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION:
103 // A URL with |scheme| is required to have the host and port, so return an
104 // invalid instance if host is not given. Note that a valid port is
105 // always provided by SchemeHostPort(const GURL&) constructor (a missing
106 // port is replaced with a default port if needed by
107 // GURL::EffectiveIntPort()).
111 // Don't do an expensive canonicalization if the host is already
113 DCHECK(policy == SchemeHostPort::CHECK_CANONICALIZATION ||
114 IsCanonicalHost(host));
115 if (policy == SchemeHostPort::CHECK_CANONICALIZATION &&
116 !IsCanonicalHost(host)) {
122 case SCHEME_WITH_HOST:
124 // Return an invalid object if a URL with the scheme never represents
125 // the port data but the given |port| is non-zero.
129 // Don't do an expensive canonicalization if the host is already
131 DCHECK(policy == SchemeHostPort::CHECK_CANONICALIZATION ||
132 IsCanonicalHost(host));
133 if (policy == SchemeHostPort::CHECK_CANONICALIZATION &&
134 !IsCanonicalHost(host)) {
140 case SCHEME_WITHOUT_AUTHORITY:
151 SchemeHostPort::SchemeHostPort() = default;
153 SchemeHostPort::SchemeHostPort(std::string scheme,
156 ConstructPolicy policy) {
157 if (!IsValidInput(scheme, host, port, policy)) {
162 scheme_ = std::move(scheme);
163 host_ = std::move(host);
165 DCHECK(IsValid()) << "Scheme: " << scheme_ << " Host: " << host_
166 << " Port: " << port;
169 SchemeHostPort::SchemeHostPort(std::string_view scheme,
170 std::string_view host,
172 : SchemeHostPort(std::string(scheme),
175 ConstructPolicy::CHECK_CANONICALIZATION) {}
177 SchemeHostPort::SchemeHostPort(const GURL& url) {
181 std::string_view scheme = url.scheme_piece();
182 std::string_view host = url.host_piece();
184 // A valid GURL never returns PORT_INVALID.
185 int port = url.EffectiveIntPort();
186 if (port == PORT_UNSPECIFIED) {
190 DCHECK_LE(port, 65535);
193 if (!IsValidInput(scheme, host, port, ALREADY_CANONICALIZED))
196 scheme_ = std::string(scheme);
197 host_ = std::string(host);
201 SchemeHostPort::~SchemeHostPort() = default;
203 bool SchemeHostPort::IsValid() const {
204 // It suffices to just check |scheme_| for emptiness; the other fields are
205 // never present without it.
206 DCHECK(!scheme_.empty() || host_.empty());
207 DCHECK(!scheme_.empty() || port_ == 0);
208 return !scheme_.empty();
211 std::string SchemeHostPort::Serialize() const {
212 // Null checking for |parsed| in SerializeInternal is probably slower than
213 // just filling it in and discarding it here.
215 return SerializeInternal(&parsed);
218 GURL SchemeHostPort::GetURL() const {
220 std::string serialized = SerializeInternal(&parsed);
223 return GURL(std::move(serialized), parsed, false);
225 // SchemeHostPort does not have enough information to determine if an empty
226 // host is valid or not for the given scheme. Force re-parsing.
227 DCHECK(!scheme_.empty());
229 return GURL(serialized);
231 // If the serialized string is passed to GURL for parsing, it will append an
232 // empty path "/". Add that here. Note: per RFC 6454 we cannot do this for
233 // normal Origin serialization.
234 DCHECK(!parsed.path.is_valid());
235 parsed.path = Component(serialized.length(), 1);
236 serialized.append("/");
237 return GURL(std::move(serialized), parsed, true);
240 size_t SchemeHostPort::EstimateMemoryUsage() const {
241 return base::trace_event::EstimateMemoryUsage(scheme_) +
242 base::trace_event::EstimateMemoryUsage(host_);
245 bool SchemeHostPort::operator<(const SchemeHostPort& other) const {
246 return std::tie(port_, scheme_, host_) <
247 std::tie(other.port_, other.scheme_, other.host_);
250 std::string SchemeHostPort::SerializeInternal(url::Parsed* parsed) const {
255 // Reserve enough space for the "normal" case of scheme://host/.
256 result.reserve(scheme_.size() + host_.size() + 4);
258 if (!scheme_.empty()) {
259 parsed->scheme = Component(0, scheme_.length());
260 result.append(scheme_);
263 result.append(kStandardSchemeSeparator);
265 if (!host_.empty()) {
266 parsed->host = Component(result.length(), host_.length());
267 result.append(host_);
270 // Omit the port component if the port matches with the default port
271 // defined for the scheme, if any.
272 int default_port = DefaultPortForScheme(scheme_.data(),
273 static_cast<int>(scheme_.length()));
274 if (default_port == PORT_UNSPECIFIED)
276 if (port_ != default_port) {
277 result.push_back(':');
278 std::string port(base::NumberToString(port_));
279 parsed->port = Component(result.length(), port.length());
280 result.append(std::move(port));
286 std::ostream& operator<<(std::ostream& out,
287 const SchemeHostPort& scheme_host_port) {
288 return out << scheme_host_port.Serialize();