1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/history/top_sites_cache.h"
9 #include "base/basictypes.h"
10 #include "base/logging.h"
11 #include "base/strings/string16.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "testing/gtest/include/gtest/gtest.h"
20 class TopSitesCacheTest : public testing::Test {
26 // Initializes |top_sites_| on |spec|, which is a list of URL strings with
27 // optional indents: indentated URLs redirect to the last non-indented URL.
28 // Titles are assigned as "Title 1", "Title 2", etc., in the order of
29 // appearance. See |kTopSitesSpecBasic| for an example. This function does not
30 // update |cache_| so you can manipulate |top_sites_| before you update it.
31 void BuildTopSites(const char** spec, size_t size);
33 // Initializes |top_sites_| and |cache_| based on |spec|.
34 void InitTopSiteCache(const char** spec, size_t size);
36 MostVisitedURLList top_sites_;
40 DISALLOW_COPY_AND_ASSIGN(TopSitesCacheTest);
43 void TopSitesCacheTest::BuildTopSites(const char** spec, size_t size) {
44 std::set<std::string> urls_seen;
45 for (size_t i = 0; i < size; ++i) {
46 const char* spec_item = spec[i];
47 while (*spec_item && *spec_item == ' ') // Eat indent.
49 if (urls_seen.find(spec_item) != urls_seen.end())
50 NOTREACHED() << "Duplicate URL found: " << spec_item;
51 urls_seen.insert(spec_item);
52 if (spec_item == spec[i]) { // No indent: add new MostVisitedURL.
53 base::string16 title(base::ASCIIToUTF16("Title ") +
54 base::Uint64ToString16(top_sites_.size() + 1));
55 top_sites_.push_back(MostVisitedURL(GURL(spec_item), title));
57 ASSERT_TRUE(!top_sites_.empty());
58 // Set up redirect to canonical URL. Canonical URL redirects to itself, too.
59 top_sites_.back().redirects.push_back(GURL(spec_item));
63 void TopSitesCacheTest::InitTopSiteCache(const char** spec, size_t size) {
64 BuildTopSites(spec, size);
65 cache_.SetTopSites(top_sites_);
68 const char* kTopSitesSpecBasic[] = {
69 "http://www.google.com",
70 " http://www.gogle.com", // Redirects.
71 " http://www.gooogle.com", // Redirects.
72 "http://www.youtube.com/a/b",
73 " http://www.youtube.com/a/b?test=1", // Redirects.
74 "https://www.google.com/",
75 " https://www.gogle.com", // Redirects.
76 "http://www.example.com:3141/",
79 TEST_F(TopSitesCacheTest, GetCanonicalURL) {
80 InitTopSiteCache(kTopSitesSpecBasic, arraysize(kTopSitesSpecBasic));
85 // Already is canonical: redirects.
86 {"http://www.google.com/", "http://www.google.com"},
87 // Exact match with stored URL: redirects.
88 {"http://www.google.com/", "http://www.gooogle.com"},
89 // Recognizes despite trailing "/": redirects
90 {"http://www.google.com/", "http://www.gooogle.com/"},
91 // Exact match with URL with query: redirects.
92 {"http://www.youtube.com/a/b", "http://www.youtube.com/a/b?test=1"},
93 // No match with URL with query: as-is.
94 {"http://www.youtube.com/a/b?test", "http://www.youtube.com/a/b?test"},
95 // Never-seen-before URL: as-is.
96 {"http://maps.google.com/", "http://maps.google.com/"},
97 // Changing port number, does not match: as-is.
98 {"http://www.example.com:1234/", "http://www.example.com:1234"},
99 // Smart enough to know that port 80 is HTTP: redirects.
100 {"http://www.google.com/", "http://www.gooogle.com:80"},
101 // Prefix should not work: as-is.
102 {"http://www.youtube.com/a", "http://www.youtube.com/a"},
104 for (size_t i = 0; i < arraysize(test_cases); ++i) {
105 std::string expected(test_cases[i].expected);
106 std::string query(test_cases[i].query);
107 EXPECT_EQ(expected, cache_.GetCanonicalURL(GURL(query)).spec())
108 << " for test_case[" << i << "]";
112 TEST_F(TopSitesCacheTest, IsKnownUrl) {
113 InitTopSiteCache(kTopSitesSpecBasic, arraysize(kTopSitesSpecBasic));
115 EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.google.com")));
116 EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.gooogle.com")));
117 EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.google.com/")));
120 EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.com?")));
121 EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.net")));
122 EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.com/stuff")));
123 EXPECT_FALSE(cache_.IsKnownURL(GURL("https://www.gooogle.com")));
124 EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.youtube.com/a")));
127 const char* kTopSitesSpecPrefix[] = {
128 "http://www.google.com/",
129 " http://www.google.com/test?q=3", // Redirects.
130 " http://www.google.com/test/y?d", // Redirects.
131 " http://www.chromium.org/a/b", // Redirects.
132 "http://www.google.com/2",
133 " http://www.google.com/test/q", // Redirects.
134 " http://www.google.com/test/y?b", // Redirects.
135 "http://www.google.com/3",
136 " http://www.google.com/testing", // Redirects.
137 "http://www.google.com/test-hyphen",
138 "http://www.google.com/sh",
139 " http://www.google.com/sh/1/2/3", // Redirects.
140 "http://www.google.com/sh/1",
143 TEST_F(TopSitesCacheTest, GetCanonicalURLExactMatch) {
144 InitTopSiteCache(kTopSitesSpecPrefix, arraysize(kTopSitesSpecPrefix));
145 for (size_t i = 0; i < arraysize(kTopSitesSpecPrefix); ++i) {
146 // Go through each entry in kTopSitesSpecPrefix, trimming space.
147 const char* s = kTopSitesSpecPrefix[i];
148 while (*s && *s == ' ')
150 // Get the answer from direct lookup.
152 GURL expected(cache_.GetCanonicalURL(stored_url));
153 // Test generalization.
154 GURL result(cache_.GetGeneralizedCanonicalURL(stored_url));
155 EXPECT_EQ(expected, result) << " for kTopSitesSpecPrefix[" << i << "]";
159 TEST_F(TopSitesCacheTest, GetGeneralizedCanonicalURL) {
160 InitTopSiteCache(kTopSitesSpecPrefix, arraysize(kTopSitesSpecPrefix));
162 const char* expected;
165 // Exact match after trimming "?query": redirects.
166 {"http://www.google.com/", "http://www.google.com/test"},
167 // Same, but different code path: redirects.
168 {"http://www.google.com/", "http://www.google.com/test/y?e"},
169 {"http://www.google.com/", "http://www.google.com/test/y?c"},
170 // Same, but code path leads to different result: redirects.
171 {"http://www.google.com/2", "http://www.google.com/test/y?a"},
172 // Generalized match: redirects.
173 {"http://www.google.com/3", "http://www.google.com/3/1/4/1/5/9"},
174 // Generalized match with trailing "/": redirects.
175 {"http://www.google.com/3", "http://www.google.com/3/1/4/1/5/9/"},
176 // Unique generalization match: redirects.
177 {"http://www.google.com/", "http://www.chromium.org/a/b/c"},
178 // Multiple exact matches after trimming: redirects to first.
179 {"http://www.google.com/2", "http://www.google.com/test/y"},
180 // Multiple generalized matches: redirects to least general.
181 {"http://www.google.com/sh", "http://www.google.com/sh/1/2/3/4/"},
182 // Multiple generalized matches: redirects to least general.
183 {"http://www.google.com/sh", "http://www.google.com/sh/1/2/3/4/"},
184 // Competing generalized match: take the most specilized.
185 {"http://www.google.com/2", "http://www.google.com/test/q"},
186 // No generalized match, early element: fails.
187 {"", "http://www.a.com/"},
188 // No generalized match, intermediate element: fails.
189 {"", "http://www.e-is-between-chromium-and-google.com/"},
190 // No generalized match, late element: fails.
191 {"", "http://www.zzzzzzz.com/"},
192 // String prefix match but not URL-prefix match: fails.
193 {"", "http://www.chromium.org/a/beeswax"},
194 // String prefix match and URL-prefix match: redirects.
195 {"http://www.google.com/", "http://www.google.com/shhhhhh"},
196 // Different protocol: fails.
197 {"", "https://www.google.com/test"},
198 // Smart enough to know that port 80 is HTTP: redirects.
199 {"http://www.google.com/", "http://www.google.com:80/test"},
200 // Specialized match only: fails.
201 {"", "http://www.chromium.org/a"},
203 for (size_t i = 0; i < arraysize(test_cases); ++i) {
204 std::string expected(test_cases[i].expected);
205 std::string query(test_cases[i].query);
206 GURL result(cache_.GetGeneralizedCanonicalURL(GURL(query)));
207 EXPECT_EQ(expected, result.spec()) << " for test_case[" << i << "]";
211 // This tests a special case where there are 2 generalized matches, and both
212 // should be checked to find the correct match.
213 TEST_F(TopSitesCacheTest, GetPrefixCanonicalURLDiffByQuery) {
214 const char* top_sites_spec[] = {
215 "http://www.dest.com/1",
216 " http://www.source.com/a?m=5", // Redirects.
217 "http://www.dest.com/2",
218 " http://www.source.com/a/t?q=3", // Redirects.
220 InitTopSiteCache(top_sites_spec, arraysize(top_sites_spec));
223 const char* expected;
226 // Slightly before "http://www.source.com/a?m=5".
227 {"http://www.dest.com/1", "http://www.source.com/a?l=5"},
228 // Slightly after "http://www.source.com/a?m=5".
229 {"http://www.dest.com/1", "http://www.source.com/a?n=5"},
230 // Slightly before "http://www.source.com/a/t?q=3".
231 {"http://www.dest.com/2", "http://www.source.com/a/t?q=2"},
232 // Slightly after "http://www.source.com/a/t?q=3".
233 {"http://www.dest.com/2", "http://www.source.com/a/t?q=4"},
236 for (size_t i = 0; i < arraysize(test_cases); ++i) {
237 std::string expected(test_cases[i].expected);
238 std::string query(test_cases[i].query);
239 GURL result(cache_.GetGeneralizedCanonicalURL(GURL(query)));
240 EXPECT_EQ(expected, result.spec()) << " for test_case[" << i << "]";
244 // This test ensures forced URLs behave in the expected way.
245 TEST_F(TopSitesCacheTest, CacheForcedURLs) {
246 // Forced URLs must always appear at the beginning of the list.
247 BuildTopSites(kTopSitesSpecBasic, arraysize(kTopSitesSpecBasic));
248 top_sites_[0].last_forced_time = base::Time::FromJsTime(1000);
249 top_sites_[1].last_forced_time = base::Time::FromJsTime(2000);
250 cache_.SetTopSites(top_sites_);
252 EXPECT_EQ(2u, cache_.GetNumForcedURLs());
253 EXPECT_EQ(2u, cache_.GetNumNonForcedURLs());
258 } // namespace history