#include "extensions/common/url_pattern.h"
+#include <ostream>
+
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
+#include "base/strings/stringprintf.h"
#include "content/public/common/url_constants.h"
#include "extensions/common/constants.h"
+#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
#include "url/gurl.h"
#include "url/url_util.h"
// TODO(aa): What about more obscure schemes like data: and javascript: ?
// Note: keep this array in sync with kValidSchemeMasks.
const char* kValidSchemes[] = {
- content::kHttpScheme,
- content::kHttpsScheme,
- chrome::kFileScheme,
- chrome::kFtpScheme,
- chrome::kChromeUIScheme,
- extensions::kExtensionScheme,
- chrome::kFileSystemScheme,
+ url::kHttpScheme,
+ url::kHttpsScheme,
+ url::kFileScheme,
+ url::kFtpScheme,
+ content::kChromeUIScheme,
+ extensions::kExtensionScheme,
+ url::kFileSystemScheme,
};
const int kValidSchemeMasks[] = {
const char kParseErrorInvalidHostWildcard[] = "Invalid host wildcard.";
const char kParseErrorEmptyPath[] = "Empty path.";
const char kParseErrorInvalidPort[] = "Invalid port.";
+const char kParseErrorInvalidHost[] = "Invalid host.";
// Message explaining each URLPattern::ParseResult.
const char* const kParseResultMessages[] = {
kParseErrorInvalidHostWildcard,
kParseErrorEmptyPath,
kParseErrorInvalidPort,
+ kParseErrorInvalidHost,
};
COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages),
if (scheme == "*")
return true;
- return url_util::IsStandard(scheme.c_str(),
- url_parse::Component(0, static_cast<int>(scheme.length())));
+ return url::IsStandard(scheme.c_str(),
+ url::Component(0, static_cast<int>(scheme.length())));
}
bool IsValidPortForScheme(const std::string& scheme, const std::string& port) {
return true;
// Only accept non-wildcard ports if the scheme uses ports.
- if (url_canon::DefaultPortForScheme(scheme.c_str(), scheme.length()) ==
- url_parse::PORT_UNSPECIFIED) {
+ if (url::DefaultPortForScheme(scheme.c_str(), scheme.length()) ==
+ url::PORT_UNSPECIFIED) {
return false;
}
- int parsed_port = url_parse::PORT_UNSPECIFIED;
+ int parsed_port = url::PORT_UNSPECIFIED;
if (!base::StringToInt(port, &parsed_port))
return false;
return (parsed_port >= 0) && (parsed_port < 65536);
} // namespace
+// static
+bool URLPattern::IsValidSchemeForExtensions(const std::string& scheme) {
+ for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
+ if (scheme == kValidSchemes[i])
+ return true;
+ }
+ return false;
+}
+
URLPattern::URLPattern()
: valid_schemes_(SCHEME_NONE),
match_all_urls_(false),
match_all_urls_(false),
match_subdomains_(false),
port_("*") {
- if (PARSE_SUCCESS != Parse(pattern))
- NOTREACHED() << "URLPattern is invalid: " << pattern;
+ ParseResult result = Parse(pattern);
+ if (PARSE_SUCCESS != result)
+ NOTREACHED() << "URLPattern invalid: " << pattern << " result " << result;
}
URLPattern::~URLPattern() {
return GetAsString() == other.GetAsString();
}
+std::ostream& operator<<(std::ostream& out, const URLPattern& url_pattern) {
+ return out << '"' << url_pattern.GetAsString() << '"';
+}
+
URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
spec_.clear();
SetMatchAllURLs(false);
}
// Parse out the scheme.
- size_t scheme_end_pos = pattern.find(content::kStandardSchemeSeparator);
+ size_t scheme_end_pos = pattern.find(url::kStandardSchemeSeparator);
bool has_standard_scheme_separator = true;
// Some urls also use ':' alone as the scheme separator.
// Advance past the scheme separator.
scheme_end_pos +=
- (standard_scheme ? strlen(content::kStandardSchemeSeparator) : 1);
+ (standard_scheme ? strlen(url::kStandardSchemeSeparator) : 1);
if (scheme_end_pos >= pattern.size())
return PARSE_ERROR_EMPTY_HOST;
if (!standard_scheme) {
path_start_pos = host_start_pos;
- } else if (scheme_ == chrome::kFileScheme) {
+ } else if (scheme_ == url::kFileScheme) {
size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
if (host_end_pos == std::string::npos) {
// Allow hostname omission.
// The first component can optionally be '*' to match all subdomains.
std::vector<std::string> host_components;
base::SplitString(host_, '.', &host_components);
+
+ // Could be empty if the host only consists of whitespace characters.
+ if (host_components.empty())
+ return PARSE_ERROR_EMPTY_HOST;
+
if (host_components[0] == "*") {
match_subdomains_ = true;
host_components.erase(host_components.begin(),
if (host_.find('*') != std::string::npos)
return PARSE_ERROR_INVALID_HOST_WILDCARD;
+ // Null characters are not allowed in hosts.
+ if (host_.find('\0') != std::string::npos)
+ return PARSE_ERROR_INVALID_HOST;
+
return PARSE_SUCCESS;
}
}
bool URLPattern::MatchesHost(const std::string& host) const {
- std::string test(content::kHttpScheme);
- test += content::kStandardSchemeSeparator;
+ std::string test(url::kHttpScheme);
+ test += url::kStandardSchemeSeparator;
test += host;
test += "/";
return MatchesHost(GURL(test));
return test.host()[test.host().length() - host_.length() - 1] == '.';
}
+bool URLPattern::ImpliesAllHosts() const {
+ // Check if it matches all urls or is a pattern like http://*/*.
+ if (match_all_urls_ ||
+ (match_subdomains_ && host_.empty() && port_ == "*" && path_ == "/*")) {
+ return true;
+ }
+
+ // If this doesn't even match subdomains, it can't possibly imply all hosts.
+ if (!match_subdomains_)
+ return false;
+
+ // If |host_| is a recognized TLD, this will be 0. We don't include private
+ // TLDs, so that, e.g., *.appspot.com does not imply all hosts.
+ size_t registry_length = net::registry_controlled_domains::GetRegistryLength(
+ host_,
+ net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
+ net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
+ // If there was more than just a TLD in the host (e.g., *.foobar.com), it
+ // doesn't imply all hosts.
+ if (registry_length > 0)
+ return false;
+
+ // At this point the host could either be just a TLD ("com") or some unknown
+ // TLD-like string ("notatld"). To disambiguate between them construct a
+ // fake URL, and check the registry. This returns 0 if the TLD is
+ // unrecognized, or the length of the recognized TLD.
+ registry_length = net::registry_controlled_domains::GetRegistryLength(
+ base::StringPrintf("foo.%s", host_.c_str()),
+ net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
+ net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
+ // If we recognized this TLD, then this is a pattern like *.com, and it
+ // should imply all hosts. Otherwise, this doesn't imply all hosts.
+ return registry_length > 0;
+}
+
+bool URLPattern::MatchesSingleOrigin() const {
+ // Strictly speaking, the port is part of the origin, but in URLPattern it
+ // defaults to *. It's not very interesting anyway, so leave it out.
+ return !ImpliesAllHosts() && scheme_ != "*" && !match_subdomains_;
+}
+
bool URLPattern::MatchesPath(const std::string& test) const {
// Make the behaviour of OverlapsWith consistent with MatchesURL, which is
// need to match hosted apps on e.g. 'google.com' also run on 'google.com/'.
bool standard_scheme = IsStandardScheme(scheme_);
std::string spec = scheme_ +
- (standard_scheme ? content::kStandardSchemeSeparator : ":");
+ (standard_scheme ? url::kStandardSchemeSeparator : ":");
- if (scheme_ != chrome::kFileScheme && standard_scheme) {
+ if (scheme_ != url::kFileScheme && standard_scheme) {
if (match_subdomains_) {
spec += "*";
if (!host_.empty())
bool URLPattern::MatchesSecurityOriginHelper(const GURL& test) const {
// Ignore hostname if scheme is file://.
- if (scheme_ != chrome::kFileScheme && !MatchesHost(test))
+ if (scheme_ != url::kFileScheme && !MatchesHost(test))
return false;
if (!MatchesPortPattern(base::IntToString(test.EffectiveIntPort())))