1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_
6 #define CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_
11 #include "base/memory/ref_counted.h"
12 #include "chrome/browser/history/url_utils.h"
13 #include "components/history/core/browser/history_types.h"
20 // TopSiteCache caches thumbnails for visited pages. Retrieving thumbnails from
21 // a given input URL is a two-stage process:
23 // input URL --(map 1)--> canonical URL --(map 2)--> image.
25 // (map 1) searches for an URL in |canonical_urls_| that "matches" (see below)
26 // input URL. If found, canonical URL assigned to the result. Otherwise the
27 // input URL is considered to already be a canonical URL.
29 // (map 2) simply looks up canonical URL in |images_|.
31 // The rule to "match" URL in |canonical_urls_| always favors exact match.
32 // - In GetCanonicalURL(), exact match is the only case examined.
33 // - In GetGeneralizedCanonicalURL(), we also perform "generalized" URL matches,
34 // i.e., stored URLs in |canonical_urls_| that are prefixes of input URL,
35 // ignoring "?query#ref".
36 // For the latter two "URL prefix matches", we prefer the match that is closest
37 // to input URL, w.r.t. path hierarchy.
39 // TopSitesCache caches the top sites and thumbnails for TopSites.
45 // Set the top sites. In |top_sites| all forced URLs must appear before
46 // non-forced URLs. This is only checked in debug.
47 void SetTopSites(const MostVisitedURLList& top_sites);
48 const MostVisitedURLList& top_sites() const { return top_sites_; }
51 void SetThumbnails(const URLToImagesMap& images);
52 const URLToImagesMap& images() const { return images_; }
54 // Returns the thumbnail as an Image for the specified url. This adds an entry
55 // for |url| if one has not yet been added.
56 Images* GetImage(const GURL& url);
58 // Fetches the thumbnail for the specified url. Returns true if there is a
59 // thumbnail for the specified url. It is possible for a URL to be in TopSites
60 // but not have an thumbnail.
61 bool GetPageThumbnail(const GURL& url,
62 scoped_refptr<base::RefCountedMemory>* bytes) const;
64 // Fetches the thumbnail score for the specified url. Returns true if
65 // there is a thumbnail score for the specified url.
66 bool GetPageThumbnailScore(const GURL& url, ThumbnailScore* score) const;
68 // Returns the canonical URL for |url|.
69 const GURL& GetCanonicalURL(const GURL& url) const;
71 // Searches for a URL in |canonical_urls_| that is a URL prefix of |url|.
72 // Prefers an exact match if it exists, or the least generalized match while
73 // ignoring "?query#ref". Returns the resulting canonical URL if match is
74 // found, otherwise returns an empty GURL.
75 GURL GetGeneralizedCanonicalURL(const GURL& url) const;
77 // Returns true if |url| is known.
78 bool IsKnownURL(const GURL& url) const;
80 // Returns the index into |top_sites_| for |url|.
81 size_t GetURLIndex(const GURL& url) const;
83 // Returns the number of non-forced URLs in the cache.
84 size_t GetNumNonForcedURLs() const;
86 // Returns the number of forced URLs in the cache.
87 size_t GetNumForcedURLs() const;
90 // The entries in CanonicalURLs, see CanonicalURLs for details. The second
91 // argument gives the index of the URL into MostVisitedURLs redirects.
92 typedef std::pair<MostVisitedURL*, size_t> CanonicalURLEntry;
94 // Comparator used for CanonicalURLs.
95 class CanonicalURLComparator {
97 bool operator()(const CanonicalURLEntry& e1,
98 const CanonicalURLEntry& e2) const {
99 return CanonicalURLStringCompare(e1.first->redirects[e1.second].spec(),
100 e2.first->redirects[e2.second].spec());
104 // Creates the object needed to form std::map queries into |canonical_urls_|,
105 // wrapping all required temporary data to allow inlining.
106 class CanonicalURLQuery {
108 explicit CanonicalURLQuery(const GURL& url);
109 ~CanonicalURLQuery();
110 const CanonicalURLEntry& entry() { return entry_; }
113 MostVisitedURL most_visited_url_;
114 CanonicalURLEntry entry_;
117 // This is used to map from redirect url to the MostVisitedURL the redirect is
118 // from. Ideally this would be map<GURL, size_t> (second param indexing into
119 // top_sites_), but this results in duplicating all redirect urls. As some
120 // sites have a lot of redirects, we instead use the MostVisitedURL* and the
121 // index of the redirect as the key, and the index into top_sites_ as the
122 // value. This way we aren't duplicating GURLs. CanonicalURLComparator
123 // enforces the ordering as if we were using GURLs.
124 typedef std::map<CanonicalURLEntry, size_t,
125 CanonicalURLComparator> CanonicalURLs;
127 // Count the number of forced URLs.
128 void CountForcedURLs();
130 // Generates the set of canonical urls from |top_sites_|.
131 void GenerateCanonicalURLs();
133 // Stores a set of redirects. This is used by GenerateCanonicalURLs.
134 void StoreRedirectChain(const RedirectList& redirects, size_t destination);
136 // Returns the iterator into |canonical_urls_| for the |url|.
137 CanonicalURLs::const_iterator GetCanonicalURLsIterator(const GURL& url) const;
139 // Returns the GURL corresponding to an iterator in |canonical_urls_|.
140 const GURL& GetURLFromIterator(CanonicalURLs::const_iterator it) const;
142 // The number of top sites with forced URLs.
143 size_t num_forced_urls_;
145 // The top sites. This list must always contain the forced URLs first followed
146 // by the non-forced URLs. This is not strictly enforced but is checked in
148 MostVisitedURLList top_sites_;
150 // The images. These map from canonical url to image.
151 URLToImagesMap images_;
153 // Generated from the redirects to and from the most visited pages. See
154 // description above typedef for details.
155 CanonicalURLs canonical_urls_;
157 // Helper to clear "?query#ref" from any GURL. This is set in the constructor
158 // and never modified after.
159 GURL::Replacements clear_query_ref_;
161 // Helper to clear "/path?query#ref" from any GURL. This is set in the
162 // constructor and never modified after.
163 GURL::Replacements clear_path_query_ref_;
165 DISALLOW_COPY_AND_ASSIGN(TopSitesCache);
168 } // namespace history
170 #endif // CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_