url/gurl_unittest.cc

   1 // Copyright 2013 The Chromium Authors
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "url/gurl.h"
   6
   7 #include <stddef.h>
   8
   9 #include "base/strings/string_number_conversions.h"
  10 #include "base/strings/utf_string_conversions.h"
  11 #include "testing/gtest/include/gtest/gtest.h"
  12 #include "url/gurl_abstract_tests.h"
  13 #include "url/origin.h"
  14 #include "url/url_canon.h"
  15 #include "url/url_test_utils.h"
  16
  17 namespace url {
  18
  19 namespace {
  20
  21 // Returns the canonicalized string for the given URL string for the
  22 // GURLTest.Types test.
  23 std::string TypesTestCase(const char* src) {
  24   GURL gurl(src);
  25   return gurl.possibly_invalid_spec();
  26 }
  27
  28 }  // namespace
  29
  30 // Different types of URLs should be handled differently, and handed off to
  31 // different canonicalizers.
  32 TEST(GURLTest, Types) {
  33   // URLs with unknown schemes should be treated as path URLs, even when they
  34   // have things like "://".
  35   EXPECT_EQ("something:///HOSTNAME.com/",
  36             TypesTestCase("something:///HOSTNAME.com/"));
  37
  38   // Conversely, URLs with known schemes should always trigger standard URL
  39   // handling.
  40   EXPECT_EQ("http://hostname.com/", TypesTestCase("http:HOSTNAME.com"));
  41   EXPECT_EQ("http://hostname.com/", TypesTestCase("http:/HOSTNAME.com"));
  42   EXPECT_EQ("http://hostname.com/", TypesTestCase("http://HOSTNAME.com"));
  43   EXPECT_EQ("http://hostname.com/", TypesTestCase("http:///HOSTNAME.com"));
  44
  45 #ifdef WIN32
  46   // URLs that look like Windows absolute path specs.
  47   EXPECT_EQ("file:///C:/foo.txt", TypesTestCase("c:\\foo.txt"));
  48   EXPECT_EQ("file:///Z:/foo.txt", TypesTestCase("Z|foo.txt"));
  49   EXPECT_EQ("file://server/foo.txt", TypesTestCase("\\\\server\\foo.txt"));
  50   EXPECT_EQ("file://server/foo.txt", TypesTestCase("//server/foo.txt"));
  51 #endif
  52 }
  53
  54 // Test the basic creation and querying of components in a GURL. We assume that
  55 // the parser is already tested and works, so we are mostly interested if the
  56 // object does the right thing with the results.
  57 TEST(GURLTest, Components) {
  58   GURL empty_url(u"");
  59   EXPECT_TRUE(empty_url.is_empty());
  60   EXPECT_FALSE(empty_url.is_valid());
  61
  62   GURL url(u"http://user:pass@google.com:99/foo;bar?q=a#ref");
  63   EXPECT_FALSE(url.is_empty());
  64   EXPECT_TRUE(url.is_valid());
  65   EXPECT_TRUE(url.SchemeIs("http"));
  66   EXPECT_FALSE(url.SchemeIsFile());
  67
  68   // This is the narrow version of the URL, which should match the wide input.
  69   EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url.spec());
  70
  71   EXPECT_EQ("http", url.scheme());
  72   EXPECT_EQ("user", url.username());
  73   EXPECT_EQ("pass", url.password());
  74   EXPECT_EQ("google.com", url.host());
  75   EXPECT_EQ("99", url.port());
  76   EXPECT_EQ(99, url.IntPort());
  77   EXPECT_EQ("/foo;bar", url.path());
  78   EXPECT_EQ("q=a", url.query());
  79   EXPECT_EQ("ref", url.ref());
  80
  81   // Test parsing userinfo with special characters.
  82   GURL url_special_pass("http://user:%40!$&'()*+,;=:@google.com:12345");
  83   EXPECT_TRUE(url_special_pass.is_valid());
  84   // GURL canonicalizes some delimiters.
  85   EXPECT_EQ("%40!$&%27()*+,%3B%3D%3A", url_special_pass.password());
  86   EXPECT_EQ("google.com", url_special_pass.host());
  87   EXPECT_EQ("12345", url_special_pass.port());
  88 }
  89
  90 TEST(GURLTest, Empty) {
  91   GURL url;
  92   EXPECT_FALSE(url.is_valid());
  93   EXPECT_EQ("", url.spec());
  94
  95   EXPECT_EQ("", url.scheme());
  96   EXPECT_EQ("", url.username());
  97   EXPECT_EQ("", url.password());
  98   EXPECT_EQ("", url.host());
  99   EXPECT_EQ("", url.port());
 100   EXPECT_EQ(PORT_UNSPECIFIED, url.IntPort());
 101   EXPECT_EQ("", url.path());
 102   EXPECT_EQ("", url.query());
 103   EXPECT_EQ("", url.ref());
 104 }
 105
 106 TEST(GURLTest, Copy) {
 107   GURL url(u"http://user:pass@google.com:99/foo;bar?q=a#ref");
 108
 109   GURL url2(url);
 110   EXPECT_TRUE(url2.is_valid());
 111
 112   EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url2.spec());
 113   EXPECT_EQ("http", url2.scheme());
 114   EXPECT_EQ("user", url2.username());
 115   EXPECT_EQ("pass", url2.password());
 116   EXPECT_EQ("google.com", url2.host());
 117   EXPECT_EQ("99", url2.port());
 118   EXPECT_EQ(99, url2.IntPort());
 119   EXPECT_EQ("/foo;bar", url2.path());
 120   EXPECT_EQ("q=a", url2.query());
 121   EXPECT_EQ("ref", url2.ref());
 122
 123   // Copying of invalid URL should be invalid
 124   GURL invalid;
 125   GURL invalid2(invalid);
 126   EXPECT_FALSE(invalid2.is_valid());
 127   EXPECT_EQ("", invalid2.spec());
 128   EXPECT_EQ("", invalid2.scheme());
 129   EXPECT_EQ("", invalid2.username());
 130   EXPECT_EQ("", invalid2.password());
 131   EXPECT_EQ("", invalid2.host());
 132   EXPECT_EQ("", invalid2.port());
 133   EXPECT_EQ(PORT_UNSPECIFIED, invalid2.IntPort());
 134   EXPECT_EQ("", invalid2.path());
 135   EXPECT_EQ("", invalid2.query());
 136   EXPECT_EQ("", invalid2.ref());
 137 }
 138
 139 TEST(GURLTest, Assign) {
 140   GURL url(u"http://user:pass@google.com:99/foo;bar?q=a#ref");
 141
 142   GURL url2;
 143   url2 = url;
 144   EXPECT_TRUE(url2.is_valid());
 145
 146   EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url2.spec());
 147   EXPECT_EQ("http", url2.scheme());
 148   EXPECT_EQ("user", url2.username());
 149   EXPECT_EQ("pass", url2.password());
 150   EXPECT_EQ("google.com", url2.host());
 151   EXPECT_EQ("99", url2.port());
 152   EXPECT_EQ(99, url2.IntPort());
 153   EXPECT_EQ("/foo;bar", url2.path());
 154   EXPECT_EQ("q=a", url2.query());
 155   EXPECT_EQ("ref", url2.ref());
 156
 157   // Assignment of invalid URL should be invalid
 158   GURL invalid;
 159   GURL invalid2;
 160   invalid2 = invalid;
 161   EXPECT_FALSE(invalid2.is_valid());
 162   EXPECT_EQ("", invalid2.spec());
 163   EXPECT_EQ("", invalid2.scheme());
 164   EXPECT_EQ("", invalid2.username());
 165   EXPECT_EQ("", invalid2.password());
 166   EXPECT_EQ("", invalid2.host());
 167   EXPECT_EQ("", invalid2.port());
 168   EXPECT_EQ(PORT_UNSPECIFIED, invalid2.IntPort());
 169   EXPECT_EQ("", invalid2.path());
 170   EXPECT_EQ("", invalid2.query());
 171   EXPECT_EQ("", invalid2.ref());
 172 }
 173
 174 // This is a regression test for http://crbug.com/309975.
 175 TEST(GURLTest, SelfAssign) {
 176   GURL a("filesystem:http://example.com/temporary/");
 177   // This should not crash.
 178   a = *&a;  // The *& defeats Clang's -Wself-assign warning.
 179 }
 180
 181 TEST(GURLTest, CopyFileSystem) {
 182   GURL url(u"filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref");
 183
 184   GURL url2(url);
 185   EXPECT_TRUE(url2.is_valid());
 186
 187   EXPECT_EQ("filesystem:https://google.com:99/t/foo;bar?q=a#ref", url2.spec());
 188   EXPECT_EQ("filesystem", url2.scheme());
 189   EXPECT_EQ("", url2.username());
 190   EXPECT_EQ("", url2.password());
 191   EXPECT_EQ("", url2.host());
 192   EXPECT_EQ("", url2.port());
 193   EXPECT_EQ(PORT_UNSPECIFIED, url2.IntPort());
 194   EXPECT_EQ("/foo;bar", url2.path());
 195   EXPECT_EQ("q=a", url2.query());
 196   EXPECT_EQ("ref", url2.ref());
 197
 198   const GURL* inner = url2.inner_url();
 199   ASSERT_TRUE(inner);
 200   EXPECT_EQ("https", inner->scheme());
 201   EXPECT_EQ("", inner->username());
 202   EXPECT_EQ("", inner->password());
 203   EXPECT_EQ("google.com", inner->host());
 204   EXPECT_EQ("99", inner->port());
 205   EXPECT_EQ(99, inner->IntPort());
 206   EXPECT_EQ("/t", inner->path());
 207   EXPECT_EQ("", inner->query());
 208   EXPECT_EQ("", inner->ref());
 209 }
 210
 211 TEST(GURLTest, IsValid) {
 212   const char* valid_cases[] = {
 213       "http://google.com",
 214       "unknown://google.com",
 215       "http://user:pass@google.com",
 216       "http://google.com:12345",
 217       "http://google.com:0",  // 0 is a valid port
 218       "http://google.com/path",
 219       "http://google.com//path",
 220       "http://google.com?k=v#fragment",
 221       "http://user:pass@google.com:12345/path?k=v#fragment",
 222       "http:/path",
 223       "http:path",
 224   };
 225   for (size_t i = 0; i < std::size(valid_cases); i++) {
 226     EXPECT_TRUE(GURL(valid_cases[i]).is_valid())
 227         << "Case: " << valid_cases[i];
 228   }
 229
 230   const char* invalid_cases[] = {
 231       "http://?k=v",
 232       "http:://google.com",
 233       "http//google.com",
 234       "http://google.com:12three45",
 235       "file://server:123",  // file: URLs cannot have a port
 236       "file://server:0",
 237       "://google.com",
 238       "path",
 239   };
 240   for (size_t i = 0; i < std::size(invalid_cases); i++) {
 241     EXPECT_FALSE(GURL(invalid_cases[i]).is_valid())
 242         << "Case: " << invalid_cases[i];
 243   }
 244 }
 245
 246 TEST(GURLTest, ExtraSlashesBeforeAuthority) {
 247   // According to RFC3986, the hierarchical part for URI with an authority
 248   // must use only two slashes; GURL intentionally just ignores extra slashes
 249   // if there are more than 2, and parses the following part as an authority.
 250   GURL url("http:///host");
 251   EXPECT_EQ("host", url.host());
 252   EXPECT_EQ("/", url.path());
 253 }
 254
 255 // Given invalid URLs, we should still get most of the components.
 256 TEST(GURLTest, ComponentGettersWorkEvenForInvalidURL) {
 257   constexpr struct InvalidURLTestExpectations {
 258     const char* url;
 259     const char* spec;
 260     const char* scheme;
 261     const char* host;
 262     const char* port;
 263     const char* path;
 264     // Extend as needed...
 265   } expectations[] = {
 266       {
 267           "http:google.com:foo",
 268           "http://google.com:foo/",
 269           "http",
 270           "google.com",
 271           "foo",
 272           "/",
 273       },
 274       {
 275           "https:google.com:foo",
 276           "https://google.com:foo/",
 277           "https",
 278           "google.com",
 279           "foo",
 280           "/",
 281       },
 282   };
 283
 284   for (const auto& e : expectations) {
 285     const GURL url(e.url);
 286     EXPECT_FALSE(url.is_valid());
 287     EXPECT_EQ(e.spec, url.possibly_invalid_spec());
 288     EXPECT_EQ(e.scheme, url.scheme());
 289     EXPECT_EQ("", url.username());
 290     EXPECT_EQ("", url.password());
 291     EXPECT_EQ(e.host, url.host());
 292     EXPECT_EQ(e.port, url.port());
 293     EXPECT_EQ(PORT_INVALID, url.IntPort());
 294     EXPECT_EQ(e.path, url.path());
 295     EXPECT_EQ("", url.query());
 296     EXPECT_EQ("", url.ref());
 297   }
 298 }
 299
 300 TEST(GURLTest, Resolve) {
 301   // The tricky cases for relative URL resolving are tested in the
 302   // canonicalizer unit test. Here, we just test that the GURL integration
 303   // works properly.
 304   struct ResolveCase {
 305     const char* base;
 306     const char* relative;
 307     bool expected_valid;
 308     const char* expected;
 309   } resolve_cases[] = {
 310       {"http://www.google.com/", "foo.html", true,
 311        "http://www.google.com/foo.html"},
 312       {"http://www.google.com/foo/", "bar", true,
 313        "http://www.google.com/foo/bar"},
 314       {"http://www.google.com/foo/", "/bar", true, "http://www.google.com/bar"},
 315       {"http://www.google.com/foo", "bar", true, "http://www.google.com/bar"},
 316       {"http://www.google.com/", "http://images.google.com/foo.html", true,
 317        "http://images.google.com/foo.html"},
 318       {"http://www.google.com/", "http://images.\tgoogle.\ncom/\rfoo.html",
 319        true, "http://images.google.com/foo.html"},
 320       {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b",
 321        true, "http://www.google.com/hello/world.html?a#b"},
 322       {"http://www.google.com/foo#bar", "#com", true,
 323        "http://www.google.com/foo#com"},
 324       {"http://www.google.com/", "Https:images.google.com", true,
 325        "https://images.google.com/"},
 326       // A non-standard base can be replaced with a standard absolute URL.
 327       {"data:blahblah", "http://google.com/", true, "http://google.com/"},
 328       {"data:blahblah", "http:google.com", true, "http://google.com/"},
 329       {"data:blahblah", "https:google.com", true, "https://google.com/"},
 330       // Filesystem URLs have different paths to test.
 331       {"filesystem:http://www.google.com/type/", "foo.html", true,
 332        "filesystem:http://www.google.com/type/foo.html"},
 333       {"filesystem:http://www.google.com/type/", "../foo.html", true,
 334        "filesystem:http://www.google.com/type/foo.html"},
 335       // https://crbug.com/530123 - scheme validation (e.g. are "10.0.0.7:"
 336       // or "x1:" valid schemes) when deciding if |relative| is an absolute url.
 337       {"file:///some/dir/ip-relative.html", "10.0.0.7:8080/foo.html", true,
 338        "file:///some/dir/10.0.0.7:8080/foo.html"},
 339       {"file:///some/dir/", "1://host", true, "file:///some/dir/1://host"},
 340       {"file:///some/dir/", "x1://host", true, "x1://host"},
 341       {"file:///some/dir/", "X1://host", true, "x1://host"},
 342       {"file:///some/dir/", "x.://host", true, "x.://host"},
 343       {"file:///some/dir/", "x+://host", true, "x+://host"},
 344       {"file:///some/dir/", "x-://host", true, "x-://host"},
 345       {"file:///some/dir/", "x!://host", true, "file:///some/dir/x!://host"},
 346       {"file:///some/dir/", "://host", true, "file:///some/dir/://host"},
 347   };
 348
 349   for (size_t i = 0; i < std::size(resolve_cases); i++) {
 350     // 8-bit code path.
 351     GURL input(resolve_cases[i].base);
 352     GURL output = input.Resolve(resolve_cases[i].relative);
 353     EXPECT_EQ(resolve_cases[i].expected_valid, output.is_valid()) << i;
 354     EXPECT_EQ(resolve_cases[i].expected, output.spec()) << i;
 355     EXPECT_EQ(output.SchemeIsFileSystem(), output.inner_url() != NULL);
 356
 357     // Wide code path.
 358     GURL inputw(base::UTF8ToUTF16(resolve_cases[i].base));
 359     GURL outputw =
 360         input.Resolve(base::UTF8ToUTF16(resolve_cases[i].relative));
 361     EXPECT_EQ(resolve_cases[i].expected_valid, outputw.is_valid()) << i;
 362     EXPECT_EQ(resolve_cases[i].expected, outputw.spec()) << i;
 363     EXPECT_EQ(outputw.SchemeIsFileSystem(), outputw.inner_url() != NULL);
 364   }
 365 }
 366
 367 TEST(GURLTest, GetOrigin) {
 368   struct TestCase {
 369     const char* input;
 370     const char* expected;
 371   } cases[] = {
 372       {"http://www.google.com", "http://www.google.com/"},
 373       {"javascript:window.alert(\"hello,world\");", ""},
 374       {"http://user:pass@www.google.com:21/blah#baz",
 375        "http://www.google.com:21/"},
 376       {"http://user@www.google.com", "http://www.google.com/"},
 377       {"http://:pass@www.google.com", "http://www.google.com/"},
 378       {"http://:@www.google.com", "http://www.google.com/"},
 379       {"filesystem:http://www.google.com/temp/foo?q#b",
 380        "http://www.google.com/"},
 381       {"filesystem:http://user:pass@google.com:21/blah#baz",
 382        "http://google.com:21/"},
 383       {"blob:null/guid-goes-here", ""},
 384       {"blob:http://origin/guid-goes-here", "" /* should be http://origin/ */},
 385   };
 386   for (size_t i = 0; i < std::size(cases); i++) {
 387     GURL url(cases[i].input);
 388     GURL origin = url.DeprecatedGetOriginAsURL();
 389     EXPECT_EQ(cases[i].expected, origin.spec());
 390   }
 391 }
 392
 393 TEST(GURLTest, GetAsReferrer) {
 394   struct TestCase {
 395     const char* input;
 396     const char* expected;
 397   } cases[] = {
 398     {"http://www.google.com", "http://www.google.com/"},
 399     {"http://user:pass@www.google.com:21/blah#baz", "http://www.google.com:21/blah"},
 400     {"http://user@www.google.com", "http://www.google.com/"},
 401     {"http://:pass@www.google.com", "http://www.google.com/"},
 402     {"http://:@www.google.com", "http://www.google.com/"},
 403     {"http://www.google.com/temp/foo?q#b", "http://www.google.com/temp/foo?q"},
 404     {"not a url", ""},
 405     {"unknown-scheme://foo.html", ""},
 406     {"file:///tmp/test.html", ""},
 407     {"https://www.google.com", "https://www.google.com/"},
 408   };
 409   for (size_t i = 0; i < std::size(cases); i++) {
 410     GURL url(cases[i].input);
 411     GURL origin = url.GetAsReferrer();
 412     EXPECT_EQ(cases[i].expected, origin.spec());
 413   }
 414 }
 415
 416 TEST(GURLTest, GetWithEmptyPath) {
 417   struct TestCase {
 418     const char* input;
 419     const char* expected;
 420   } cases[] = {
 421     {"http://www.google.com", "http://www.google.com/"},
 422     {"javascript:window.alert(\"hello, world\");", ""},
 423     {"http://www.google.com/foo/bar.html?baz=22", "http://www.google.com/"},
 424     {"filesystem:http://www.google.com/temporary/bar.html?baz=22", "filesystem:http://www.google.com/temporary/"},
 425     {"filesystem:file:///temporary/bar.html?baz=22", "filesystem:file:///temporary/"},
 426   };
 427
 428   for (size_t i = 0; i < std::size(cases); i++) {
 429     GURL url(cases[i].input);
 430     GURL empty_path = url.GetWithEmptyPath();
 431     EXPECT_EQ(cases[i].expected, empty_path.spec());
 432   }
 433 }
 434
 435 TEST(GURLTest, GetWithoutFilename) {
 436   struct TestCase {
 437     const char* input;
 438     const char* expected;
 439   } cases[] = {
 440     // Common Standard URLs.
 441     {"https://www.google.com",                    "https://www.google.com/"},
 442     {"https://www.google.com/",                   "https://www.google.com/"},
 443     {"https://www.google.com/maps.htm",           "https://www.google.com/"},
 444     {"https://www.google.com/maps/",              "https://www.google.com/maps/"},
 445     {"https://www.google.com/index.html",         "https://www.google.com/"},
 446     {"https://www.google.com/index.html?q=maps",  "https://www.google.com/"},
 447     {"https://www.google.com/index.html#maps/",   "https://www.google.com/"},
 448     {"https://foo:bar@www.google.com/maps.htm",   "https://foo:bar@www.google.com/"},
 449     {"https://www.google.com/maps/au/index.html", "https://www.google.com/maps/au/"},
 450     {"https://www.google.com/maps/au/north",      "https://www.google.com/maps/au/"},
 451     {"https://www.google.com/maps/au/north/",     "https://www.google.com/maps/au/north/"},
 452     {"https://www.google.com/maps/au/index.html?q=maps#fragment/",     "https://www.google.com/maps/au/"},
 453     {"http://www.google.com:8000/maps/au/index.html?q=maps#fragment/", "http://www.google.com:8000/maps/au/"},
 454     {"https://www.google.com/maps/au/north/?q=maps#fragment",          "https://www.google.com/maps/au/north/"},
 455     {"https://www.google.com/maps/au/north?q=maps#fragment",           "https://www.google.com/maps/au/"},
 456     // Less common standard URLs.
 457     {"filesystem:http://www.google.com/temporary/bar.html?baz=22", "filesystem:http://www.google.com/temporary/"},
 458     {"file:///temporary/bar.html?baz=22","file:///temporary/"},
 459     {"ftp://foo/test/index.html",        "ftp://foo/test/"},
 460     {"gopher://foo/test/index.html",     "gopher://foo/test/"},
 461     {"ws://foo/test/index.html",         "ws://foo/test/"},
 462     // Non-standard, hierarchical URLs.
 463     {"chrome://foo/bar.html", "chrome://foo/"},
 464     {"httpa://foo/test/index.html", "httpa://foo/test/"},
 465     // Non-standard, non-hierarchical URLs.
 466     {"blob:https://foo.bar/test/index.html", ""},
 467     {"about:blank", ""},
 468     {"data:foobar", ""},
 469     {"scheme:opaque_data", ""},
 470     // Invalid URLs.
 471     {"foobar", ""},
 472   };
 473
 474   for (size_t i = 0; i < std::size(cases); i++) {
 475     GURL url(cases[i].input);
 476     GURL without_filename = url.GetWithoutFilename();
 477     EXPECT_EQ(cases[i].expected, without_filename.spec()) << i;
 478   }
 479 }
 480
 481 TEST(GURLTest, GetWithoutRef) {
 482   struct TestCase {
 483     const char* input;
 484     const char* expected;
 485   } cases[] = {
 486       // Common Standard URLs.
 487       {"https://www.google.com/index.html",
 488        "https://www.google.com/index.html"},
 489       {"https://www.google.com/index.html#maps/",
 490        "https://www.google.com/index.html"},
 491
 492       {"https://foo:bar@www.google.com/maps.htm",
 493        "https://foo:bar@www.google.com/maps.htm"},
 494       {"https://foo:bar@www.google.com/maps.htm#fragment",
 495        "https://foo:bar@www.google.com/maps.htm"},
 496
 497       {"https://www.google.com/maps/au/index.html?q=maps",
 498        "https://www.google.com/maps/au/index.html?q=maps"},
 499       {"https://www.google.com/maps/au/index.html?q=maps#fragment/",
 500        "https://www.google.com/maps/au/index.html?q=maps"},
 501
 502       {"http://www.google.com:8000/maps/au/index.html?q=maps",
 503        "http://www.google.com:8000/maps/au/index.html?q=maps"},
 504       {"http://www.google.com:8000/maps/au/index.html?q=maps#fragment/",
 505        "http://www.google.com:8000/maps/au/index.html?q=maps"},
 506
 507       {"https://www.google.com/maps/au/north/?q=maps",
 508        "https://www.google.com/maps/au/north/?q=maps"},
 509       {"https://www.google.com/maps/au/north?q=maps#fragment",
 510        "https://www.google.com/maps/au/north?q=maps"},
 511
 512       // Less common standard URLs.
 513       {"filesystem:http://www.google.com/temporary/bar.html?baz=22",
 514        "filesystem:http://www.google.com/temporary/bar.html?baz=22"},
 515       {"file:///temporary/bar.html?baz=22#fragment",
 516        "file:///temporary/bar.html?baz=22"},
 517
 518       {"ftp://foo/test/index.html", "ftp://foo/test/index.html"},
 519       {"ftp://foo/test/index.html#fragment", "ftp://foo/test/index.html"},
 520
 521       {"gopher://foo/test/index.html", "gopher://foo/test/index.html"},
 522       {"gopher://foo/test/index.html#fragment", "gopher://foo/test/index.html"},
 523
 524       {"ws://foo/test/index.html", "ws://foo/test/index.html"},
 525       {"ws://foo/test/index.html#fragment", "ws://foo/test/index.html"},
 526
 527       // Non-standard, hierarchical URLs.
 528       {"chrome://foo/bar.html", "chrome://foo/bar.html"},
 529       {"chrome://foo/bar.html#fragment", "chrome://foo/bar.html"},
 530
 531       {"httpa://foo/test/index.html", "httpa://foo/test/index.html"},
 532       {"httpa://foo/test/index.html#fragment", "httpa://foo/test/index.html"},
 533
 534       // Non-standard, non-hierarchical URLs.
 535       {"blob:https://foo.bar/test/index.html",
 536        "blob:https://foo.bar/test/index.html"},
 537       {"blob:https://foo.bar/test/index.html#fragment",
 538        "blob:https://foo.bar/test/index.html"},
 539
 540       {"about:blank", "about:blank"},
 541       {"about:blank#ref", "about:blank"},
 542
 543       {"data:foobar", "data:foobar"},
 544       {"scheme:opaque_data", "scheme:opaque_data"},
 545       // Invalid URLs.
 546       {"foobar", ""},
 547   };
 548
 549   for (size_t i = 0; i < std::size(cases); i++) {
 550     GURL url(cases[i].input);
 551     GURL without_ref = url.GetWithoutRef();
 552     EXPECT_EQ(cases[i].expected, without_ref.spec());
 553   }
 554 }
 555
 556 TEST(GURLTest, Replacements) {
 557   // The URL canonicalizer replacement test will handle most of these case.
 558   // The most important thing to do here is to check that the proper
 559   // canonicalizer gets called based on the scheme of the input.
 560   struct ReplaceCase {
 561     using ApplyReplacementsFunc = GURL(const GURL&);
 562
 563     const char* base;
 564     ApplyReplacementsFunc* apply_replacements;
 565     const char* expected;
 566   } replace_cases[] = {
 567       {.base = "http://www.google.com/foo/bar.html?foo#bar",
 568        .apply_replacements =
 569            +[](const GURL& url) {
 570              GURL::Replacements replacements;
 571              replacements.SetPathStr("/");
 572              replacements.ClearQuery();
 573              replacements.ClearRef();
 574              return url.ReplaceComponents(replacements);
 575            },
 576        .expected = "http://www.google.com/"},
 577       {.base = "http://www.google.com/foo/bar.html?foo#bar",
 578        .apply_replacements =
 579            +[](const GURL& url) {
 580              GURL::Replacements replacements;
 581              replacements.SetSchemeStr("javascript");
 582              replacements.ClearUsername();
 583              replacements.ClearPassword();
 584              replacements.ClearHost();
 585              replacements.ClearPort();
 586              replacements.SetPathStr("window.open('foo');");
 587              replacements.ClearQuery();
 588              replacements.ClearRef();
 589              return url.ReplaceComponents(replacements);
 590            },
 591        .expected = "javascript:window.open('foo');"},
 592       {.base = "file:///C:/foo/bar.txt",
 593        .apply_replacements =
 594            +[](const GURL& url) {
 595              GURL::Replacements replacements;
 596              replacements.SetSchemeStr("http");
 597              replacements.SetHostStr("www.google.com");
 598              replacements.SetPortStr("99");
 599              replacements.SetPathStr("/foo");
 600              replacements.SetQueryStr("search");
 601              replacements.SetRefStr("ref");
 602              return url.ReplaceComponents(replacements);
 603            },
 604        .expected = "http://www.google.com:99/foo?search#ref"},
 605 #ifdef WIN32
 606       {.base = "http://www.google.com/foo/bar.html?foo#bar",
 607        .apply_replacements =
 608            +[](const GURL& url) {
 609              GURL::Replacements replacements;
 610              replacements.SetSchemeStr("file");
 611              replacements.ClearUsername();
 612              replacements.ClearPassword();
 613              replacements.ClearHost();
 614              replacements.ClearPort();
 615              replacements.SetPathStr("c:\\");
 616              replacements.ClearQuery();
 617              replacements.ClearRef();
 618              return url.ReplaceComponents(replacements);
 619            },
 620        .expected = "file:///C:/"},
 621 #endif
 622       {.base = "filesystem:http://www.google.com/foo/bar.html?foo#bar",
 623        .apply_replacements =
 624            +[](const GURL& url) {
 625              GURL::Replacements replacements;
 626              replacements.SetPathStr("/");
 627              replacements.ClearQuery();
 628              replacements.ClearRef();
 629              return url.ReplaceComponents(replacements);
 630            },
 631        .expected = "filesystem:http://www.google.com/foo/"},
 632       // Lengthen the URL instead of shortening it, to test creation of
 633       // inner_url.
 634       {.base = "filesystem:http://www.google.com/foo/",
 635        .apply_replacements =
 636            +[](const GURL& url) {
 637              GURL::Replacements replacements;
 638              replacements.SetPathStr("bar.html");
 639              replacements.SetQueryStr("foo");
 640              replacements.SetRefStr("bar");
 641              return url.ReplaceComponents(replacements);
 642            },
 643        .expected = "filesystem:http://www.google.com/foo/bar.html?foo#bar"},
 644   };
 645
 646   for (const ReplaceCase& c : replace_cases) {
 647     GURL output = c.apply_replacements(GURL(c.base));
 648
 649     EXPECT_EQ(c.expected, output.spec());
 650
 651     EXPECT_EQ(output.SchemeIsFileSystem(), output.inner_url() != NULL);
 652     if (output.SchemeIsFileSystem()) {
 653       // TODO(mmenke): inner_url()->spec() is currently the same as the spec()
 654       // for the GURL itself.  This should be fixed.
 655       // See https://crbug.com/619596
 656       EXPECT_EQ(c.expected, output.inner_url()->spec());
 657     }
 658   }
 659 }
 660
 661 TEST(GURLTest, ClearFragmentOnDataUrl) {
 662   // http://crbug.com/291747 - a data URL may legitimately have trailing
 663   // whitespace in the spec after the ref is cleared. Test this does not trigger
 664   // the Parsed importing validation DCHECK in GURL.
 665   GURL url(" data: one # two ");
 666   EXPECT_TRUE(url.is_valid());
 667
 668   // By default the trailing whitespace will have been stripped.
 669   EXPECT_EQ("data: one #%20two", url.spec());
 670
 671   // Clear the URL's ref and observe the trailing whitespace.
 672   GURL::Replacements repl;
 673   repl.ClearRef();
 674   GURL url_no_ref = url.ReplaceComponents(repl);
 675   EXPECT_TRUE(url_no_ref.is_valid());
 676   EXPECT_EQ("data: one ", url_no_ref.spec());
 677
 678   // Importing a parsed URL via this constructor overload will retain trailing
 679   // whitespace.
 680   GURL import_url(url_no_ref.spec(),
 681                   url_no_ref.parsed_for_possibly_invalid_spec(),
 682                   url_no_ref.is_valid());
 683   EXPECT_TRUE(import_url.is_valid());
 684   EXPECT_EQ(url_no_ref, import_url);
 685   EXPECT_EQ("data: one ", import_url.spec());
 686   EXPECT_EQ(" one ", import_url.path());
 687
 688   // For completeness, test that re-parsing the same URL rather than importing
 689   // it trims the trailing whitespace.
 690   GURL reparsed_url(url_no_ref.spec());
 691   EXPECT_TRUE(reparsed_url.is_valid());
 692   EXPECT_EQ("data: one", reparsed_url.spec());
 693 }
 694
 695 TEST(GURLTest, PathForRequest) {
 696   struct TestCase {
 697     const char* input;
 698     const char* expected;
 699     const char* inner_expected;
 700   } cases[] = {
 701       {"http://www.google.com", "/", nullptr},
 702       {"http://www.google.com/", "/", nullptr},
 703       {"http://www.google.com/foo/bar.html?baz=22", "/foo/bar.html?baz=22",
 704        nullptr},
 705       {"http://www.google.com/foo/bar.html#ref", "/foo/bar.html", nullptr},
 706       {"http://www.google.com/foo/bar.html?query#ref", "/foo/bar.html?query",
 707        nullptr},
 708       {"filesystem:http://www.google.com/temporary/foo/bar.html?query#ref",
 709        "/foo/bar.html?query", "/temporary"},
 710       {"filesystem:http://www.google.com/temporary/foo/bar.html?query",
 711        "/foo/bar.html?query", "/temporary"},
 712   };
 713
 714   for (size_t i = 0; i < std::size(cases); i++) {
 715     GURL url(cases[i].input);
 716     EXPECT_EQ(cases[i].expected, url.PathForRequest());
 717     EXPECT_EQ(cases[i].expected, url.PathForRequestPiece());
 718     EXPECT_EQ(cases[i].inner_expected == NULL, url.inner_url() == NULL);
 719     if (url.inner_url() && cases[i].inner_expected) {
 720       EXPECT_EQ(cases[i].inner_expected, url.inner_url()->PathForRequest());
 721       EXPECT_EQ(cases[i].inner_expected,
 722                 url.inner_url()->PathForRequestPiece());
 723     }
 724   }
 725 }
 726
 727 TEST(GURLTest, EffectiveIntPort) {
 728   struct PortTest {
 729     const char* spec;
 730     int expected_int_port;
 731   } port_tests[] = {
 732     // http
 733     {"http://www.google.com/", 80},
 734     {"http://www.google.com:80/", 80},
 735     {"http://www.google.com:443/", 443},
 736
 737     // https
 738     {"https://www.google.com/", 443},
 739     {"https://www.google.com:443/", 443},
 740     {"https://www.google.com:80/", 80},
 741
 742     // ftp
 743     {"ftp://www.google.com/", 21},
 744     {"ftp://www.google.com:21/", 21},
 745     {"ftp://www.google.com:80/", 80},
 746
 747     // file - no port
 748     {"file://www.google.com/", PORT_UNSPECIFIED},
 749     {"file://www.google.com:443/", PORT_UNSPECIFIED},
 750
 751     // data - no port
 752     {"data:www.google.com:90", PORT_UNSPECIFIED},
 753     {"data:www.google.com", PORT_UNSPECIFIED},
 754
 755     // filesystem - no port
 756     {"filesystem:http://www.google.com:90/t/foo", PORT_UNSPECIFIED},
 757     {"filesystem:file:///t/foo", PORT_UNSPECIFIED},
 758   };
 759
 760   for (size_t i = 0; i < std::size(port_tests); i++) {
 761     GURL url(port_tests[i].spec);
 762     EXPECT_EQ(port_tests[i].expected_int_port, url.EffectiveIntPort());
 763   }
 764 }
 765
 766 TEST(GURLTest, IPAddress) {
 767   struct IPTest {
 768     const char* spec;
 769     bool expected_ip;
 770   } ip_tests[] = {
 771     {"http://www.google.com/", false},
 772     {"http://192.168.9.1/", true},
 773     {"http://192.168.9.1.2/", false},
 774     {"http://192.168.m.1/", false},
 775     {"http://2001:db8::1/", false},
 776     {"http://[2001:db8::1]/", true},
 777     {"", false},
 778     {"some random input!", false},
 779   };
 780
 781   for (size_t i = 0; i < std::size(ip_tests); i++) {
 782     GURL url(ip_tests[i].spec);
 783     EXPECT_EQ(ip_tests[i].expected_ip, url.HostIsIPAddress());
 784   }
 785 }
 786
 787 TEST(GURLTest, HostNoBrackets) {
 788   struct TestCase {
 789     const char* input;
 790     const char* expected_host;
 791     const char* expected_plainhost;
 792   } cases[] = {
 793     {"http://www.google.com", "www.google.com", "www.google.com"},
 794     {"http://[2001:db8::1]/", "[2001:db8::1]", "2001:db8::1"},
 795     {"http://[::]/", "[::]", "::"},
 796
 797     // Don't require a valid URL, but don't crash either.
 798     {"http://[]/", "[]", ""},
 799     {"http://[x]/", "[x]", "x"},
 800     {"http://[x/", "[x", "[x"},
 801     {"http://x]/", "x]", "x]"},
 802     {"http://[/", "[", "["},
 803     {"http://]/", "]", "]"},
 804     {"", "", ""},
 805   };
 806   for (size_t i = 0; i < std::size(cases); i++) {
 807     GURL url(cases[i].input);
 808     EXPECT_EQ(cases[i].expected_host, url.host());
 809     EXPECT_EQ(cases[i].expected_plainhost, url.HostNoBrackets());
 810     EXPECT_EQ(cases[i].expected_plainhost, url.HostNoBracketsPiece());
 811   }
 812 }
 813
 814 TEST(GURLTest, DomainIs) {
 815   GURL url_1("http://google.com/foo");
 816   EXPECT_TRUE(url_1.DomainIs("google.com"));
 817
 818   // Subdomain and port are ignored.
 819   GURL url_2("http://www.google.com:99/foo");
 820   EXPECT_TRUE(url_2.DomainIs("google.com"));
 821
 822   // Different top-level domain.
 823   GURL url_3("http://www.google.com.cn/foo");
 824   EXPECT_FALSE(url_3.DomainIs("google.com"));
 825
 826   // Different host name.
 827   GURL url_4("http://www.iamnotgoogle.com/foo");
 828   EXPECT_FALSE(url_4.DomainIs("google.com"));
 829
 830   // The input must be lower-cased otherwise DomainIs returns false.
 831   GURL url_5("http://www.google.com/foo");
 832   EXPECT_FALSE(url_5.DomainIs("Google.com"));
 833
 834   // If the URL is invalid, DomainIs returns false.
 835   GURL invalid_url("google.com");
 836   EXPECT_FALSE(invalid_url.is_valid());
 837   EXPECT_FALSE(invalid_url.DomainIs("google.com"));
 838
 839   GURL url_with_escape_chars("https://www.,.test");
 840   EXPECT_TRUE(url_with_escape_chars.is_valid());
 841   EXPECT_EQ(url_with_escape_chars.host(), "www.,.test");
 842   EXPECT_TRUE(url_with_escape_chars.DomainIs(",.test"));
 843 }
 844
 845 TEST(GURLTest, DomainIsTerminatingDotBehavior) {
 846   // If the host part ends with a dot, it matches input domains
 847   // with or without a dot.
 848   GURL url_with_dot("http://www.google.com./foo");
 849   EXPECT_TRUE(url_with_dot.DomainIs("google.com"));
 850   EXPECT_TRUE(url_with_dot.DomainIs("google.com."));
 851   EXPECT_TRUE(url_with_dot.DomainIs(".com"));
 852   EXPECT_TRUE(url_with_dot.DomainIs(".com."));
 853
 854   // But, if the host name doesn't end with a dot and the input
 855   // domain does, then it's considered to not match.
 856   GURL url_without_dot("http://google.com/foo");
 857   EXPECT_FALSE(url_without_dot.DomainIs("google.com."));
 858
 859   // If the URL ends with two dots, it doesn't match.
 860   GURL url_with_two_dots("http://www.google.com../foo");
 861   EXPECT_FALSE(url_with_two_dots.DomainIs("google.com"));
 862 }
 863
 864 TEST(GURLTest, DomainIsWithFilesystemScheme) {
 865   GURL url_1("filesystem:http://www.google.com:99/foo/");
 866   EXPECT_TRUE(url_1.DomainIs("google.com"));
 867
 868   GURL url_2("filesystem:http://www.iamnotgoogle.com/foo/");
 869   EXPECT_FALSE(url_2.DomainIs("google.com"));
 870 }
 871
 872 // Newlines should be stripped from inputs.
 873 TEST(GURLTest, Newlines) {
 874   // Constructor.
 875   GURL url_1(" \t ht\ntp://\twww.goo\rgle.com/as\ndf \n ");
 876   EXPECT_EQ("http://www.google.com/asdf", url_1.spec());
 877   EXPECT_FALSE(
 878       url_1.parsed_for_possibly_invalid_spec().potentially_dangling_markup);
 879
 880   // Relative path resolver.
 881   GURL url_2 = url_1.Resolve(" \n /fo\to\r ");
 882   EXPECT_EQ("http://www.google.com/foo", url_2.spec());
 883   EXPECT_FALSE(
 884       url_2.parsed_for_possibly_invalid_spec().potentially_dangling_markup);
 885
 886   // Constructor.
 887   GURL url_3(" \t ht\ntp://\twww.goo\rgle.com/as\ndf< \n ");
 888   EXPECT_EQ("http://www.google.com/asdf%3C", url_3.spec());
 889   EXPECT_TRUE(
 890       url_3.parsed_for_possibly_invalid_spec().potentially_dangling_markup);
 891
 892   // Relative path resolver.
 893   GURL url_4 = url_1.Resolve(" \n /fo\to<\r ");
 894   EXPECT_EQ("http://www.google.com/foo%3C", url_4.spec());
 895   EXPECT_TRUE(
 896       url_4.parsed_for_possibly_invalid_spec().potentially_dangling_markup);
 897
 898   // Note that newlines are NOT stripped from ReplaceComponents.
 899 }
 900
 901 TEST(GURLTest, IsStandard) {
 902   GURL a("http:foo/bar");
 903   EXPECT_TRUE(a.IsStandard());
 904
 905   GURL b("foo:bar/baz");
 906   EXPECT_FALSE(b.IsStandard());
 907
 908   GURL c("foo://bar/baz");
 909   EXPECT_FALSE(c.IsStandard());
 910
 911   GURL d("cid:bar@baz");
 912   EXPECT_FALSE(d.IsStandard());
 913 }
 914
 915 TEST(GURLTest, SchemeIsHTTPOrHTTPS) {
 916   EXPECT_TRUE(GURL("http://bar/").SchemeIsHTTPOrHTTPS());
 917   EXPECT_TRUE(GURL("HTTPS://BAR").SchemeIsHTTPOrHTTPS());
 918   EXPECT_FALSE(GURL("ftp://bar/").SchemeIsHTTPOrHTTPS());
 919 }
 920
 921 TEST(GURLTest, SchemeIsWSOrWSS) {
 922   EXPECT_TRUE(GURL("WS://BAR/").SchemeIsWSOrWSS());
 923   EXPECT_TRUE(GURL("wss://bar/").SchemeIsWSOrWSS());
 924   EXPECT_FALSE(GURL("http://bar/").SchemeIsWSOrWSS());
 925 }
 926
 927 TEST(GURLTest, SchemeIsCryptographic) {
 928   EXPECT_TRUE(GURL("https://foo.bar.com/").SchemeIsCryptographic());
 929   EXPECT_TRUE(GURL("HTTPS://foo.bar.com/").SchemeIsCryptographic());
 930   EXPECT_TRUE(GURL("HtTpS://foo.bar.com/").SchemeIsCryptographic());
 931
 932   EXPECT_TRUE(GURL("wss://foo.bar.com/").SchemeIsCryptographic());
 933   EXPECT_TRUE(GURL("WSS://foo.bar.com/").SchemeIsCryptographic());
 934   EXPECT_TRUE(GURL("WsS://foo.bar.com/").SchemeIsCryptographic());
 935
 936   EXPECT_FALSE(GURL("http://foo.bar.com/").SchemeIsCryptographic());
 937   EXPECT_FALSE(GURL("ws://foo.bar.com/").SchemeIsCryptographic());
 938 }
 939
 940 TEST(GURLTest, SchemeIsCryptographicStatic) {
 941   EXPECT_TRUE(GURL::SchemeIsCryptographic("https"));
 942   EXPECT_TRUE(GURL::SchemeIsCryptographic("wss"));
 943   EXPECT_FALSE(GURL::SchemeIsCryptographic("http"));
 944   EXPECT_FALSE(GURL::SchemeIsCryptographic("ws"));
 945   EXPECT_FALSE(GURL::SchemeIsCryptographic("ftp"));
 946 }
 947
 948 TEST(GURLTest, SchemeIsBlob) {
 949   EXPECT_TRUE(GURL("BLOB://BAR/").SchemeIsBlob());
 950   EXPECT_TRUE(GURL("blob://bar/").SchemeIsBlob());
 951   EXPECT_FALSE(GURL("http://bar/").SchemeIsBlob());
 952 }
 953
 954 TEST(GURLTest, SchemeIsLocal) {
 955   EXPECT_TRUE(GURL("BLOB://BAR/").SchemeIsLocal());
 956   EXPECT_TRUE(GURL("blob://bar/").SchemeIsLocal());
 957   EXPECT_TRUE(GURL("DATA:TEXT/HTML,BAR").SchemeIsLocal());
 958   EXPECT_TRUE(GURL("data:text/html,bar").SchemeIsLocal());
 959   EXPECT_TRUE(GURL("ABOUT:BAR").SchemeIsLocal());
 960   EXPECT_TRUE(GURL("about:bar").SchemeIsLocal());
 961   EXPECT_TRUE(GURL("FILESYSTEM:HTTP://FOO.EXAMPLE/BAR").SchemeIsLocal());
 962   EXPECT_TRUE(GURL("filesystem:http://foo.example/bar").SchemeIsLocal());
 963
 964   EXPECT_FALSE(GURL("http://bar/").SchemeIsLocal());
 965   EXPECT_FALSE(GURL("file:///bar").SchemeIsLocal());
 966 }
 967
 968 // Tests that the 'content' of the URL is properly extracted. This can be
 969 // complex in cases such as multiple schemes (view-source:http:) or for
 970 // javascript URLs. See GURL::GetContent for more details.
 971 TEST(GURLTest, ContentForNonStandardURLs) {
 972   struct TestCase {
 973     const char* url;
 974     const char* expected;
 975   } cases[] = {
 976       {"null", ""},
 977       {"not-a-standard-scheme:this is arbitrary content",
 978        "this is arbitrary content"},
 979
 980       // When there are multiple schemes, only the first is excluded from the
 981       // content. Note also that for e.g. 'http://', the '//' is part of the
 982       // content not the scheme.
 983       {"view-source:http://example.com/path", "http://example.com/path"},
 984       {"blob:http://example.com/GUID", "http://example.com/GUID"},
 985       {"blob://http://example.com/GUID", "//http://example.com/GUID"},
 986       {"blob:http://user:password@example.com/GUID",
 987        "http://user:password@example.com/GUID"},
 988
 989       // The octothorpe character ('#') marks the end of the URL content, and
 990       // the start of the fragment. It should not be included in the content.
 991       {"http://www.example.com/GUID#ref", "www.example.com/GUID"},
 992       {"http://me:secret@example.com/GUID/#ref", "me:secret@example.com/GUID/"},
 993       {"data:text/html,Question?<div style=\"color: #bad\">idea</div>",
 994        "text/html,Question?%3Cdiv%20style=%22color:%20"},
 995
 996       // TODO(mkwst): This seems like a bug. https://crbug.com/513600
 997       {"filesystem:http://example.com/path", "/"},
 998
 999       // Javascript URLs include '#' symbols in their content.
1000       {"javascript:#", "#"},
1001       {"javascript:alert('#');", "alert('#');"},
1002   };
1003
1004   for (const auto& test : cases) {
1005     GURL url(test.url);
1006     EXPECT_EQ(test.expected, url.GetContent()) << test.url;
1007     EXPECT_EQ(test.expected, url.GetContentPiece()) << test.url;
1008   }
1009 }
1010
1011 // Tests that the URL path is properly extracted for unusual URLs. This can be
1012 // complex in cases such as multiple schemes (view-source:http:) or when
1013 // octothorpes ('#') are involved.
1014 TEST(GURLTest, PathForNonStandardURLs) {
1015   struct TestCase {
1016     const char* url;
1017     const char* expected;
1018   } cases[] = {
1019       {"null", ""},
1020       {"not-a-standard-scheme:this is arbitrary content",
1021        "this is arbitrary content"},
1022       {"view-source:http://example.com/path", "http://example.com/path"},
1023       {"blob:http://example.com/GUID", "http://example.com/GUID"},
1024       {"blob://http://example.com/GUID", "//http://example.com/GUID"},
1025       {"blob:http://user:password@example.com/GUID",
1026        "http://user:password@example.com/GUID"},
1027
1028       {"http://www.example.com/GUID#ref", "/GUID"},
1029       {"http://me:secret@example.com/GUID/#ref", "/GUID/"},
1030       {"data:text/html,Question?<div style=\"color: #bad\">idea</div>",
1031        "text/html,Question"},
1032
1033       // TODO(mkwst): This seems like a bug. https://crbug.com/513600
1034       {"filesystem:http://example.com/path", "/"},
1035   };
1036
1037   for (const auto& test : cases) {
1038     GURL url(test.url);
1039     EXPECT_EQ(test.expected, url.path()) << test.url;
1040   }
1041 }
1042
1043 TEST(GURLTest, EqualsIgnoringRef) {
1044   const struct {
1045     const char* url_a;
1046     const char* url_b;
1047     bool are_equals;
1048   } kTestCases[] = {
1049       // No ref.
1050       {"http://a.com", "http://a.com", true},
1051       {"http://a.com", "http://b.com", false},
1052
1053       // Same Ref.
1054       {"http://a.com#foo", "http://a.com#foo", true},
1055       {"http://a.com#foo", "http://b.com#foo", false},
1056
1057       // Different Refs.
1058       {"http://a.com#foo", "http://a.com#bar", true},
1059       {"http://a.com#foo", "http://b.com#bar", false},
1060
1061       // One has a ref, the other doesn't.
1062       {"http://a.com#foo", "http://a.com", true},
1063       {"http://a.com#foo", "http://b.com", false},
1064
1065       // Empty refs.
1066       {"http://a.com#", "http://a.com#", true},
1067       {"http://a.com#", "http://a.com", true},
1068
1069       // URLs that differ only by their last character.
1070       {"http://aaa", "http://aab", false},
1071       {"http://aaa#foo", "http://aab#foo", false},
1072
1073       // Different size of the part before the ref.
1074       {"http://123#a", "http://123456#a", false},
1075
1076       // Blob URLs
1077       {"blob:http://a.com#foo", "blob:http://a.com#foo", true},
1078       {"blob:http://a.com#foo", "blob:http://a.com#bar", true},
1079       {"blob:http://a.com#foo", "blob:http://b.com#bar", false},
1080
1081       // Filesystem URLs
1082       {"filesystem:http://a.com#foo", "filesystem:http://a.com#foo", true},
1083       {"filesystem:http://a.com#foo", "filesystem:http://a.com#bar", true},
1084       {"filesystem:http://a.com#foo", "filesystem:http://b.com#bar", false},
1085
1086       // Data URLs
1087       {"data:text/html,a#foo", "data:text/html,a#bar", true},
1088       {"data:text/html,a#foo", "data:text/html,a#foo", true},
1089       {"data:text/html,a#foo", "data:text/html,b#foo", false},
1090   };
1091
1092   for (const auto& test_case : kTestCases) {
1093     SCOPED_TRACE(testing::Message()
1094                  << std::endl
1095                  << "url_a = " << test_case.url_a << std::endl
1096                  << "url_b = " << test_case.url_b << std::endl);
1097     // A versus B.
1098     EXPECT_EQ(test_case.are_equals,
1099               GURL(test_case.url_a).EqualsIgnoringRef(GURL(test_case.url_b)));
1100     // B versus A.
1101     EXPECT_EQ(test_case.are_equals,
1102               GURL(test_case.url_b).EqualsIgnoringRef(GURL(test_case.url_a)));
1103   }
1104 }
1105
1106 TEST(GURLTest, DebugAlias) {
1107   GURL url("https://foo.com/bar");
1108   DEBUG_ALIAS_FOR_GURL(url_debug_alias, url);
1109   EXPECT_STREQ("https://foo.com/bar", url_debug_alias);
1110 }
1111
1112 TEST(GURLTest, InvalidHost) {
1113   // This contains an invalid percent escape (%T%) and also a valid
1114   // percent escape that's not 7-bit ascii (%ae), so that the unescaped
1115   // host contains both an invalid percent escape and invalid UTF-8.
1116   GURL url("http://%T%Ae");
1117
1118   EXPECT_FALSE(url.is_valid());
1119   EXPECT_TRUE(url.SchemeIs(url::kHttpScheme));
1120
1121   // The invalid percent escape becomes an escaped percent sign (%25), and the
1122   // invalid UTF-8 character becomes REPLACEMENT CHARACTER' (U+FFFD) encoded as
1123   // UTF-8.
1124   EXPECT_EQ(url.host_piece(), "%25t%EF%BF%BD");
1125 }
1126
1127 TEST(GURLTest, PortZero) {
1128   GURL port_zero_url("http://127.0.0.1:0/blah");
1129
1130   // https://url.spec.whatwg.org/#port-state says that the port 1) consists of
1131   // ASCII digits (this excludes negative numbers) and 2) cannot be greater than
1132   // 2^16-1.  This means that port=0 should be valid.
1133   EXPECT_TRUE(port_zero_url.is_valid());
1134   EXPECT_EQ("0", port_zero_url.port());
1135   EXPECT_EQ("127.0.0.1", port_zero_url.host());
1136   EXPECT_EQ("http", port_zero_url.scheme());
1137
1138   // https://crbug.com/1065532: SchemeHostPort would previously incorrectly
1139   // consider port=0 to be invalid.
1140   SchemeHostPort scheme_host_port(port_zero_url);
1141   EXPECT_TRUE(scheme_host_port.IsValid());
1142   EXPECT_EQ(port_zero_url.scheme(), scheme_host_port.scheme());
1143   EXPECT_EQ(port_zero_url.host(), scheme_host_port.host());
1144   EXPECT_EQ(port_zero_url.port(),
1145             base::NumberToString(scheme_host_port.port()));
1146
1147   // https://crbug.com/1065532: The SchemeHostPort problem above would lead to
1148   // bizarre results below - resolved origin would incorrectly be returned as an
1149   // opaque origin derived from |another_origin|.
1150   url::Origin another_origin = url::Origin::Create(GURL("http://other.com"));
1151   url::Origin resolved_origin =
1152       url::Origin::Resolve(port_zero_url, another_origin);
1153   EXPECT_FALSE(resolved_origin.opaque());
1154   EXPECT_EQ(port_zero_url.scheme(), resolved_origin.scheme());
1155   EXPECT_EQ(port_zero_url.host(), resolved_origin.host());
1156   EXPECT_EQ(port_zero_url.port(), base::NumberToString(resolved_origin.port()));
1157
1158   // port=0 and default HTTP port are different.
1159   GURL default_port("http://127.0.0.1/foo");
1160   EXPECT_EQ(0, SchemeHostPort(port_zero_url).port());
1161   EXPECT_EQ(80, SchemeHostPort(default_port).port());
1162   url::Origin default_port_origin = url::Origin::Create(default_port);
1163   EXPECT_FALSE(default_port_origin.IsSameOriginWith(resolved_origin));
1164 }
1165
1166 class GURLTestTraits {
1167  public:
1168   using UrlType = GURL;
1169
1170   static UrlType CreateUrlFromString(std::string_view s) { return GURL(s); }
1171   static bool IsAboutBlank(const UrlType& url) { return url.IsAboutBlank(); }
1172   static bool IsAboutSrcdoc(const UrlType& url) { return url.IsAboutSrcdoc(); }
1173
1174   // Only static members.
1175   GURLTestTraits() = delete;
1176 };
1177
1178 INSTANTIATE_TYPED_TEST_SUITE_P(GURL, AbstractUrlTest, GURLTestTraits);
1179
1180 }  // namespace url