1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/history/top_sites_cache.h"
9 #include "base/basictypes.h"
10 #include "base/logging.h"
11 #include "base/strings/string16.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "testing/gtest/include/gtest/gtest.h"
20 class TopSitesCacheTest : public testing::Test {
26 // Initializes |top_sites_| and |cache_| based on |spec|, which is a list of
27 // URL strings with optional indents: indentated URLs redirect to the last
28 // non-indented URL. Titles are assigned as "Title 1", "Title 2", etc., in the
29 // order of appearance. See |kTopSitesSpecBasic| for an example.
30 void InitTopSiteCache(const char** spec, size_t size);
32 MostVisitedURLList top_sites_;
36 DISALLOW_COPY_AND_ASSIGN(TopSitesCacheTest);
39 void TopSitesCacheTest::InitTopSiteCache(const char** spec, size_t size) {
40 std::set<std::string> urls_seen;
41 for (size_t i = 0; i < size; ++i) {
42 const char* spec_item = spec[i];
43 while (*spec_item && *spec_item == ' ') // Eat indent.
45 if (urls_seen.find(spec_item) != urls_seen.end())
46 NOTREACHED() << "Duplicate URL found: " << spec_item;
47 urls_seen.insert(spec_item);
48 if (spec_item == spec[i]) { // No indent: add new MostVisitedURL.
49 string16 title(ASCIIToUTF16("Title ") +
50 base::Uint64ToString16(top_sites_.size() + 1));
51 top_sites_.push_back(MostVisitedURL(GURL(spec_item), title));
53 ASSERT_TRUE(!top_sites_.empty());
54 // Set up redirect to canonical URL. Canonical URL redirects to itself, too.
55 top_sites_.back().redirects.push_back(GURL(spec_item));
57 cache_.SetTopSites(top_sites_);
60 const char* kTopSitesSpecBasic[] = {
61 "http://www.google.com",
62 " http://www.gogle.com", // Redirects.
63 " http://www.gooogle.com", // Redirects.
64 "http://www.youtube.com/a/b",
65 " http://www.youtube.com/a/b?test=1", // Redirects.
66 "https://www.google.com/",
67 " https://www.gogle.com", // Redirects.
68 "http://www.example.com:3141/",
71 TEST_F(TopSitesCacheTest, GetCanonicalURL) {
72 InitTopSiteCache(kTopSitesSpecBasic, arraysize(kTopSitesSpecBasic));
77 // Already is canonical: redirects.
78 {"http://www.google.com/", "http://www.google.com"},
79 // Exact match with stored URL: redirects.
80 {"http://www.google.com/", "http://www.gooogle.com"},
81 // Recognizes despite trailing "/": redirects
82 {"http://www.google.com/", "http://www.gooogle.com/"},
83 // Exact match with URL with query: redirects.
84 {"http://www.youtube.com/a/b", "http://www.youtube.com/a/b?test=1"},
85 // No match with URL with query: as-is.
86 {"http://www.youtube.com/a/b?test", "http://www.youtube.com/a/b?test"},
87 // Never-seen-before URL: as-is.
88 {"http://maps.google.com/", "http://maps.google.com/"},
89 // Changing port number, does not match: as-is.
90 {"http://www.example.com:1234/", "http://www.example.com:1234"},
91 // Smart enough to know that port 80 is HTTP: redirects.
92 {"http://www.google.com/", "http://www.gooogle.com:80"},
93 // Prefix should not work: as-is.
94 {"http://www.youtube.com/a", "http://www.youtube.com/a"},
96 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
97 std::string expected(test_cases[i].expected);
98 std::string query(test_cases[i].query);
99 EXPECT_EQ(expected, cache_.GetCanonicalURL(GURL(query)).spec())
100 << " for test_case[" << i << "]";
104 TEST_F(TopSitesCacheTest, IsKnownUrl) {
105 InitTopSiteCache(kTopSitesSpecBasic, arraysize(kTopSitesSpecBasic));
107 EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.google.com")));
108 EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.gooogle.com")));
109 EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.google.com/")));
112 EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.com?")));
113 EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.net")));
114 EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.com/stuff")));
115 EXPECT_FALSE(cache_.IsKnownURL(GURL("https://www.gooogle.com")));
116 EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.youtube.com/a")));
119 const char* kTopSitesSpecPrefix[] = {
120 "http://www.google.com/",
121 " http://www.google.com/test?q=3", // Redirects.
122 " http://www.google.com/test/y?d", // Redirects.
123 " http://www.chromium.org/a/b", // Redirects.
124 "http://www.google.com/2",
125 " http://www.google.com/test/q", // Redirects.
126 " http://www.google.com/test/y?b", // Redirects.
127 "http://www.google.com/3",
128 " http://www.google.com/testing", // Redirects.
129 "http://www.google.com/test-hyphen",
130 "http://www.google.com/sh",
131 " http://www.google.com/sh/1/2/3", // Redirects.
132 "http://www.google.com/sh/1",
135 TEST_F(TopSitesCacheTest, GetCanonicalURLExactMatch) {
136 InitTopSiteCache(kTopSitesSpecPrefix, arraysize(kTopSitesSpecPrefix));
137 for (size_t i = 0; i < arraysize(kTopSitesSpecPrefix); ++i) {
138 // Go through each entry in kTopSitesSpecPrefix, trimming space.
139 const char* s = kTopSitesSpecPrefix[i];
140 while (*s && *s == ' ')
142 // Get the answer from direct lookup.
144 GURL expected(cache_.GetCanonicalURL(stored_url));
145 // Test specialization.
146 GURL result1(cache_.GetSpecializedCanonicalURL(stored_url));
147 EXPECT_EQ(expected, result1) << " for kTopSitesSpecPrefix[" << i << "]";
148 // Test generalization.
149 GURL result2(cache_.GetGeneralizedCanonicalURL(stored_url));
150 EXPECT_EQ(expected, result2) << " for kTopSitesSpecPrefix[" << i << "]";
154 TEST_F(TopSitesCacheTest, GetSpecializedCanonicalURL) {
155 InitTopSiteCache(kTopSitesSpecPrefix, arraysize(kTopSitesSpecPrefix));
157 const char* expected;
160 // Exact match after trimming "?query": redirects.
161 {"http://www.google.com/", "http://www.google.com/test"},
162 // Specialized match: redirects.
163 {"http://www.google.com/sh", "http://www.google.com/sh/1/2"},
164 // Specialized match with trailing "/": redirects.
165 {"http://www.google.com/sh", "http://www.google.com/sh/1/2/"},
166 // Unique specialization match: redirects.
167 {"http://www.google.com/", "http://www.chromium.org/a"},
168 // Multiple exact matches after trimming: redirects to first.
169 {"http://www.google.com/2", "http://www.google.com/test/y"},
170 // Multiple specialized matches: redirects to least specialized.
171 {"http://www.google.com/2", "http://www.google.com/test/q"},
172 // No specialized match: fails.
173 {"", "http://www.google.com/no-match"},
174 // String prefix match but not URL-prefix match: fails.
175 {"", "http://www.google.com/t"},
176 // Different protocol: fails.
177 {"", "https://www.google.com/test"},
178 // Smart enough to know that port 80 is HTTP: redirects.
179 {"http://www.google.com/", "http://www.google.com:80/test"},
180 // Generalization match only: fails.
181 {"", "http://www.google.com/sh/1/2/3/4"},
183 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
184 std::string expected(test_cases[i].expected);
185 std::string query(test_cases[i].query);
186 GURL result(cache_.GetSpecializedCanonicalURL(GURL(query)));
187 EXPECT_EQ(expected, result.spec()) << " for test_case[" << i << "]";
191 TEST_F(TopSitesCacheTest, GetGeneralizedCanonicalURL) {
192 InitTopSiteCache(kTopSitesSpecPrefix, arraysize(kTopSitesSpecPrefix));
194 const char* expected;
197 // Exact match after trimming "?query": redirects.
198 {"http://www.google.com/", "http://www.google.com/test"},
199 // Same, but different code path: redirects.
200 {"http://www.google.com/", "http://www.google.com/test/y?e"},
201 {"http://www.google.com/", "http://www.google.com/test/y?c"},
202 // Same, but code path leads to different result: redirects.
203 {"http://www.google.com/2", "http://www.google.com/test/y?a"},
204 // Generalized match: redirects.
205 {"http://www.google.com/3", "http://www.google.com/3/1/4/1/5/9"},
206 // Generalized match with trailing "/": redirects.
207 {"http://www.google.com/3", "http://www.google.com/3/1/4/1/5/9/"},
208 // Unique generalization match: redirects.
209 {"http://www.google.com/", "http://www.chromium.org/a/b/c"},
210 // Multiple exact matches after trimming: redirects to first.
211 {"http://www.google.com/2", "http://www.google.com/test/y"},
212 // Multiple generalized matches: redirects to least general.
213 {"http://www.google.com/sh", "http://www.google.com/sh/1/2/3/4/"},
214 // Multiple generalized matches: redirects to least general.
215 {"http://www.google.com/sh", "http://www.google.com/sh/1/2/3/4/"},
216 // Competing generalized match: take the most specilized.
217 {"http://www.google.com/2", "http://www.google.com/test/q"},
218 // No generalized match, early element: fails.
219 {"", "http://www.a.com/"},
220 // No generalized match, intermediate element: fails.
221 {"", "http://www.e-is-between-chromium-and-google.com/"},
222 // No generalized match, late element: fails.
223 {"", "http://www.zzzzzzz.com/"},
224 // String prefix match but not URL-prefix match: fails.
225 {"", "http://www.chromium.org/a/beeswax"},
226 // String prefix match and URL-prefix match: redirects.
227 {"http://www.google.com/", "http://www.google.com/shhhhhh"},
228 // Different protocol: fails.
229 {"", "https://www.google.com/test"},
230 // Smart enough to know that port 80 is HTTP: redirects.
231 {"http://www.google.com/", "http://www.google.com:80/test"},
232 // Specialized match only: fails.
233 {"", "http://www.chromium.org/a"},
235 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
236 std::string expected(test_cases[i].expected);
237 std::string query(test_cases[i].query);
238 GURL result(cache_.GetGeneralizedCanonicalURL(GURL(query)));
239 EXPECT_EQ(expected, result.spec()) << " for test_case[" << i << "]";
243 // This tests a special case where there are 2 specialized and generalized
244 // matches, and both should be checked to find the correct match.
245 TEST_F(TopSitesCacheTest, GetPrefixCanonicalURLDiffByQuery) {
246 const char* top_sites_spec[] = {
247 "http://www.dest.com/1",
248 " http://www.source.com/a?m=5", // Redirects.
249 "http://www.dest.com/2",
250 " http://www.source.com/a/t?q=3", // Redirects.
252 InitTopSiteCache(top_sites_spec, arraysize(top_sites_spec));
254 // Shared by GetSpecializedCanonicalURL() and GetGeneralizedCanonicalURL
256 const char* expected;
259 // Slightly before "http://www.source.com/a?m=5".
260 {"http://www.dest.com/1", "http://www.source.com/a?l=5"},
261 // Slightly after "http://www.source.com/a?m=5".
262 {"http://www.dest.com/1", "http://www.source.com/a?n=5"},
263 // Slightly before "http://www.source.com/a/t?q=3".
264 {"http://www.dest.com/2", "http://www.source.com/a/t?q=2"},
265 // Slightly after "http://www.source.com/a/t?q=3".
266 {"http://www.dest.com/2", "http://www.source.com/a/t?q=4"},
269 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
270 std::string expected(test_cases[i].expected);
271 std::string query(test_cases[i].query);
272 GURL result1(cache_.GetSpecializedCanonicalURL(GURL(query)));
273 EXPECT_EQ(expected, result1.spec()) << " for test_case[" << i << "]";
274 GURL result2(cache_.GetGeneralizedCanonicalURL(GURL(query)));
275 EXPECT_EQ(expected, result2.spec()) << " for test_case[" << i << "]";
281 } // namespace history