Upstream version 9.38.198.0

[platform/framework/web/crosswalk.git] / src / extensions / common / url_pattern.cc
diff --git a/src/extensions/common/url_pattern.cc b/src/extensions/common/url_pattern.cc

index 78aa920..71b522e 100644 (file)
--- a/src/extensions/common/url_pattern.cc
+++ b/src/extensions/common/url_pattern.cc
@@ -4,12 +4,16 @@
  
  #include "extensions/common/url_pattern.h"
  
+#include <ostream>
+
  #include "base/strings/string_number_conversions.h"
  #include "base/strings/string_piece.h"
  #include "base/strings/string_split.h"
  #include "base/strings/string_util.h"
+#include "base/strings/stringprintf.h"
  #include "content/public/common/url_constants.h"
  #include "extensions/common/constants.h"
+#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
  #include "url/gurl.h"
  #include "url/url_util.h"
  
@@ -20,13 +24,13 @@ namespace {
  // TODO(aa): What about more obscure schemes like data: and javascript: ?
  // Note: keep this array in sync with kValidSchemeMasks.
  const char* kValidSchemes[] = {
-  content::kHttpScheme,
-  content::kHttpsScheme,
-  chrome::kFileScheme,
-  chrome::kFtpScheme,
-  chrome::kChromeUIScheme,
-  extensions::kExtensionScheme,
-  chrome::kFileSystemScheme,
+    url::kHttpScheme,
+    url::kHttpsScheme,
+    url::kFileScheme,
+    url::kFtpScheme,
+    content::kChromeUIScheme,
+    extensions::kExtensionScheme,
+    url::kFileSystemScheme,
  };
  
  const int kValidSchemeMasks[] = {
@@ -50,6 +54,7 @@ const char kParseErrorEmptyHost[] = "Host can not be empty.";
  const char kParseErrorInvalidHostWildcard[] = "Invalid host wildcard.";
  const char kParseErrorEmptyPath[] = "Empty path.";
  const char kParseErrorInvalidPort[] = "Invalid port.";
+const char kParseErrorInvalidHost[] = "Invalid host.";
  
  // Message explaining each URLPattern::ParseResult.
  const char* const kParseResultMessages[] = {
@@ -61,6 +66,7 @@ const char* const kParseResultMessages[] = {
    kParseErrorInvalidHostWildcard,
    kParseErrorEmptyPath,
    kParseErrorInvalidPort,
+  kParseErrorInvalidHost,
  };
  
  COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages),
@@ -73,8 +79,8 @@ bool IsStandardScheme(const std::string& scheme) {
    if (scheme == "*")
      return true;
  
-  return url_util::IsStandard(scheme.c_str(),
-      url_parse::Component(0, static_cast<int>(scheme.length())));
+  return url::IsStandard(scheme.c_str(),
+                         url::Component(0, static_cast<int>(scheme.length())));
  }
  
  bool IsValidPortForScheme(const std::string& scheme, const std::string& port) {
@@ -82,12 +88,12 @@ bool IsValidPortForScheme(const std::string& scheme, const std::string& port) {
      return true;
  
    // Only accept non-wildcard ports if the scheme uses ports.
-  if (url_canon::DefaultPortForScheme(scheme.c_str(), scheme.length()) ==
-      url_parse::PORT_UNSPECIFIED) {
+  if (url::DefaultPortForScheme(scheme.c_str(), scheme.length()) ==
+      url::PORT_UNSPECIFIED) {
      return false;
    }
  
-  int parsed_port = url_parse::PORT_UNSPECIFIED;
+  int parsed_port = url::PORT_UNSPECIFIED;
    if (!base::StringToInt(port, &parsed_port))
      return false;
    return (parsed_port >= 0) && (parsed_port < 65536);
@@ -109,6 +115,15 @@ std::string StripTrailingWildcard(const std::string& path) {
  
  }  // namespace
  
+// static
+bool URLPattern::IsValidSchemeForExtensions(const std::string& scheme) {
+  for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
+    if (scheme == kValidSchemes[i])
+      return true;
+  }
+  return false;
+}
+
  URLPattern::URLPattern()
      : valid_schemes_(SCHEME_NONE),
        match_all_urls_(false),
@@ -128,8 +143,9 @@ URLPattern::URLPattern(int valid_schemes, const std::string& pattern)
        match_all_urls_(false),
        match_subdomains_(false),
        port_("*") {
-  if (PARSE_SUCCESS != Parse(pattern))
-    NOTREACHED() << "URLPattern is invalid: " << pattern;
+  ParseResult result = Parse(pattern);
+  if (PARSE_SUCCESS != result)
+    NOTREACHED() << "URLPattern invalid: " << pattern << " result " << result;
  }
  
  URLPattern::~URLPattern() {
@@ -147,6 +163,10 @@ bool URLPattern::operator==(const URLPattern& other) const {
    return GetAsString() == other.GetAsString();
  }
  
+std::ostream& operator<<(std::ostream& out, const URLPattern& url_pattern) {
+  return out << '"' << url_pattern.GetAsString() << '"';
+}
+
  URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
    spec_.clear();
    SetMatchAllURLs(false);
@@ -160,7 +180,7 @@ URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
    }
  
    // Parse out the scheme.
-  size_t scheme_end_pos = pattern.find(content::kStandardSchemeSeparator);
+  size_t scheme_end_pos = pattern.find(url::kStandardSchemeSeparator);
    bool has_standard_scheme_separator = true;
  
    // Some urls also use ':' alone as the scheme separator.
@@ -181,7 +201,7 @@ URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
  
    // Advance past the scheme separator.
    scheme_end_pos +=
-      (standard_scheme ? strlen(content::kStandardSchemeSeparator) : 1);
+      (standard_scheme ? strlen(url::kStandardSchemeSeparator) : 1);
    if (scheme_end_pos >= pattern.size())
      return PARSE_ERROR_EMPTY_HOST;
  
@@ -191,7 +211,7 @@ URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
  
    if (!standard_scheme) {
      path_start_pos = host_start_pos;
-  } else if (scheme_ == chrome::kFileScheme) {
+  } else if (scheme_ == url::kFileScheme) {
      size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
      if (host_end_pos == std::string::npos) {
        // Allow hostname omission.
@@ -218,6 +238,11 @@ URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
      // The first component can optionally be '*' to match all subdomains.
      std::vector<std::string> host_components;
      base::SplitString(host_, '.', &host_components);
+
+    // Could be empty if the host only consists of whitespace characters.
+    if (host_components.empty())
+      return PARSE_ERROR_EMPTY_HOST;
+
      if (host_components[0] == "*") {
        match_subdomains_ = true;
        host_components.erase(host_components.begin(),
@@ -243,6 +268,10 @@ URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
    if (host_.find('*') != std::string::npos)
      return PARSE_ERROR_INVALID_HOST_WILDCARD;
  
+  // Null characters are not allowed in hosts.
+  if (host_.find('\0') != std::string::npos)
+    return PARSE_ERROR_INVALID_HOST;
+
    return PARSE_SUCCESS;
  }
  
@@ -364,8 +393,8 @@ bool URLPattern::MatchesScheme(const std::string& test) const {
  }
  
  bool URLPattern::MatchesHost(const std::string& host) const {
-  std::string test(content::kHttpScheme);
-  test += content::kStandardSchemeSeparator;
+  std::string test(url::kHttpScheme);
+  test += url::kStandardSchemeSeparator;
    test += host;
    test += "/";
    return MatchesHost(GURL(test));
@@ -402,6 +431,47 @@ bool URLPattern::MatchesHost(const GURL& test) const {
    return test.host()[test.host().length() - host_.length() - 1] == '.';
  }
  
+bool URLPattern::ImpliesAllHosts() const {
+  // Check if it matches all urls or is a pattern like http://*/*.
+  if (match_all_urls_ ||
+      (match_subdomains_ && host_.empty() && port_ == "*" && path_ == "/*")) {
+    return true;
+  }
+
+  // If this doesn't even match subdomains, it can't possibly imply all hosts.
+  if (!match_subdomains_)
+    return false;
+
+  // If |host_| is a recognized TLD, this will be 0. We don't include private
+  // TLDs, so that, e.g., *.appspot.com does not imply all hosts.
+  size_t registry_length = net::registry_controlled_domains::GetRegistryLength(
+      host_,
+      net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
+      net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
+  // If there was more than just a TLD in the host (e.g., *.foobar.com), it
+  // doesn't imply all hosts.
+  if (registry_length > 0)
+    return false;
+
+  // At this point the host could either be just a TLD ("com") or some unknown
+  // TLD-like string ("notatld"). To disambiguate between them construct a
+  // fake URL, and check the registry. This returns 0 if the TLD is
+  // unrecognized, or the length of the recognized TLD.
+  registry_length = net::registry_controlled_domains::GetRegistryLength(
+      base::StringPrintf("foo.%s", host_.c_str()),
+      net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
+      net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
+  // If we recognized this TLD, then this is a pattern like *.com, and it
+  // should imply all hosts. Otherwise, this doesn't imply all hosts.
+  return registry_length > 0;
+}
+
+bool URLPattern::MatchesSingleOrigin() const {
+  // Strictly speaking, the port is part of the origin, but in URLPattern it
+  // defaults to *. It's not very interesting anyway, so leave it out.
+  return !ImpliesAllHosts() && scheme_ != "*" && !match_subdomains_;
+}
+
  bool URLPattern::MatchesPath(const std::string& test) const {
    // Make the behaviour of OverlapsWith consistent with MatchesURL, which is
    // need to match hosted apps on e.g. 'google.com' also run on 'google.com/'.
@@ -423,9 +493,9 @@ const std::string& URLPattern::GetAsString() const {
    bool standard_scheme = IsStandardScheme(scheme_);
  
    std::string spec = scheme_ +
-      (standard_scheme ? content::kStandardSchemeSeparator : ":");
+      (standard_scheme ? url::kStandardSchemeSeparator : ":");
  
-  if (scheme_ != chrome::kFileScheme && standard_scheme) {
+  if (scheme_ != url::kFileScheme && standard_scheme) {
      if (match_subdomains_) {
        spec += "*";
        if (!host_.empty())
@@ -492,7 +562,7 @@ bool URLPattern::MatchesAllSchemes(
  
  bool URLPattern::MatchesSecurityOriginHelper(const GURL& test) const {
    // Ignore hostname if scheme is file://.
-  if (scheme_ != chrome::kFileScheme && !MatchesHost(test))
+  if (scheme_ != url::kFileScheme && !MatchesHost(test))
      return false;
  
    if (!MatchesPortPattern(base::IntToString(test.EffectiveIntPort())))