f27229a3f223569fa60e75c8d9860ef549067b6e
[platform/framework/web/crosswalk.git] / src / chrome / browser / history / top_sites_impl.cc
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/history/top_sites_impl.h"
6
7 #include <algorithm>
8 #include <set>
9
10 #include "base/bind.h"
11 #include "base/bind_helpers.h"
12 #include "base/logging.h"
13 #include "base/md5.h"
14 #include "base/memory/ref_counted_memory.h"
15 #include "base/message_loop/message_loop_proxy.h"
16 #include "base/metrics/histogram.h"
17 #include "base/prefs/pref_service.h"
18 #include "base/prefs/scoped_user_pref_update.h"
19 #include "base/strings/string_util.h"
20 #include "base/strings/utf_string_conversions.h"
21 #include "base/task_runner.h"
22 #include "base/values.h"
23 #include "chrome/browser/chrome_notification_types.h"
24 #include "chrome/browser/history/history_backend.h"
25 #include "chrome/browser/history/history_db_task.h"
26 #include "chrome/browser/history/history_notifications.h"
27 #include "chrome/browser/history/history_service_factory.h"
28 #include "chrome/browser/history/page_usage_data.h"
29 #include "chrome/browser/history/top_sites_cache.h"
30 #include "chrome/browser/history/url_utils.h"
31 #include "chrome/browser/profiles/profile.h"
32 #include "chrome/common/pref_names.h"
33 #include "chrome/common/thumbnail_score.h"
34 #include "content/public/browser/browser_thread.h"
35 #include "content/public/browser/navigation_controller.h"
36 #include "content/public/browser/navigation_details.h"
37 #include "content/public/browser/navigation_entry.h"
38 #include "content/public/browser/notification_service.h"
39 #include "content/public/browser/web_contents.h"
40 #include "ui/base/l10n/l10n_util.h"
41 #include "ui/base/layout.h"
42 #include "ui/base/resource/resource_bundle.h"
43 #include "ui/gfx/image/image_util.h"
44
45 using base::DictionaryValue;
46 using content::BrowserThread;
47 using content::NavigationController;
48
49 namespace history {
50
51 namespace {
52
53 void RunOrPostGetMostVisitedURLsCallback(
54     base::TaskRunner* task_runner,
55     bool include_forced_urls,
56     const TopSitesImpl::GetMostVisitedURLsCallback& callback,
57     const MostVisitedURLList& all_urls,
58     const MostVisitedURLList& nonforced_urls) {
59   const MostVisitedURLList* urls =
60       include_forced_urls ? &all_urls : &nonforced_urls;
61   if (task_runner->RunsTasksOnCurrentThread())
62     callback.Run(*urls);
63   else
64     task_runner->PostTask(FROM_HERE, base::Bind(callback, *urls));
65 }
66
67 // Compares two MostVisitedURL having a non-null |last_forced_time|.
68 bool ForcedURLComparator(const MostVisitedURL& first,
69                          const MostVisitedURL& second) {
70   DCHECK(!first.last_forced_time.is_null() &&
71          !second.last_forced_time.is_null());
72   return first.last_forced_time < second.last_forced_time;
73 }
74
75 }  // namespace
76
77 // How many non-forced top sites to store in the cache.
78 static const size_t kNonForcedTopSitesNumber = 20;
79
80 // How many forced top sites to store in the cache.
81 static const size_t kForcedTopSitesNumber = 20;
82
83 // Max number of temporary images we'll cache. See comment above
84 // temp_images_ for details.
85 static const size_t kMaxTempTopImages = 8;
86
87 static const int kDaysOfHistory = 90;
88 // Time from startup to first HistoryService query.
89 static const int64 kUpdateIntervalSecs = 15;
90 // Intervals between requests to HistoryService.
91 static const int64 kMinUpdateIntervalMinutes = 1;
92 static const int64 kMaxUpdateIntervalMinutes = 60;
93
94 // Use 100 quality (highest quality) because we're very sensitive to
95 // artifacts for these small sized, highly detailed images.
96 static const int kTopSitesImageQuality = 100;
97
98 TopSitesImpl::TopSitesImpl(Profile* profile)
99     : backend_(NULL),
100       cache_(new TopSitesCache()),
101       thread_safe_cache_(new TopSitesCache()),
102       profile_(profile),
103       last_num_urls_changed_(0),
104       loaded_(false) {
105   if (!profile_)
106     return;
107
108   if (content::NotificationService::current()) {
109     registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URLS_DELETED,
110                    content::Source<Profile>(profile_));
111     // Listen for any nav commits. We'll ignore those not related to this
112     // profile when we get the notification.
113     registrar_.Add(this, content::NOTIFICATION_NAV_ENTRY_COMMITTED,
114                    content::NotificationService::AllSources());
115   }
116   for (size_t i = 0; i < arraysize(kPrepopulatedPages); i++) {
117     int url_id = kPrepopulatedPages[i].url_id;
118     prepopulated_page_urls_.push_back(
119         GURL(l10n_util::GetStringUTF8(url_id)));
120   }
121 }
122
123 void TopSitesImpl::Init(const base::FilePath& db_name) {
124   // Create the backend here, rather than in the constructor, so that
125   // unit tests that do not need the backend can run without a problem.
126   backend_ = new TopSitesBackend;
127   backend_->Init(db_name);
128   backend_->GetMostVisitedThumbnails(
129       base::Bind(&TopSitesImpl::OnGotMostVisitedThumbnails,
130                  base::Unretained(this)),
131       &cancelable_task_tracker_);
132 }
133
134 bool TopSitesImpl::SetPageThumbnail(const GURL& url,
135                                     const gfx::Image& thumbnail,
136                                     const ThumbnailScore& score) {
137   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
138
139   if (!loaded_) {
140     // TODO(sky): I need to cache these and apply them after the load
141     // completes.
142     return false;
143   }
144
145   bool add_temp_thumbnail = false;
146   if (!IsKnownURL(url)) {
147     if (!IsNonForcedFull()) {
148       add_temp_thumbnail = true;
149     } else {
150       return false;  // This URL is not known to us.
151     }
152   }
153
154   if (!HistoryService::CanAddURL(url))
155     return false;  // It's not a real webpage.
156
157   scoped_refptr<base::RefCountedBytes> thumbnail_data;
158   if (!EncodeBitmap(thumbnail, &thumbnail_data))
159     return false;
160
161   if (add_temp_thumbnail) {
162     // Always remove the existing entry and then add it back. That way if we end
163     // up with too many temp thumbnails we'll prune the oldest first.
164     RemoveTemporaryThumbnailByURL(url);
165     AddTemporaryThumbnail(url, thumbnail_data.get(), score);
166     return true;
167   }
168
169   return SetPageThumbnailEncoded(url, thumbnail_data.get(), score);
170 }
171
172 bool TopSitesImpl::SetPageThumbnailToJPEGBytes(
173     const GURL& url,
174     const base::RefCountedMemory* memory,
175     const ThumbnailScore& score) {
176   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
177
178   if (!loaded_) {
179     // TODO(sky): I need to cache these and apply them after the load
180     // completes.
181     return false;
182   }
183
184   bool add_temp_thumbnail = false;
185   if (!IsKnownURL(url)) {
186     if (!IsNonForcedFull()) {
187       add_temp_thumbnail = true;
188     } else {
189       return false;  // This URL is not known to us.
190     }
191   }
192
193   if (!HistoryService::CanAddURL(url))
194     return false;  // It's not a real webpage.
195
196   if (add_temp_thumbnail) {
197     // Always remove the existing entry and then add it back. That way if we end
198     // up with too many temp thumbnails we'll prune the oldest first.
199     RemoveTemporaryThumbnailByURL(url);
200     AddTemporaryThumbnail(url, memory, score);
201     return true;
202   }
203
204   return SetPageThumbnailEncoded(url, memory, score);
205 }
206
207 // WARNING: this function may be invoked on any thread.
208 void TopSitesImpl::GetMostVisitedURLs(
209     const GetMostVisitedURLsCallback& callback,
210     bool include_forced_urls) {
211   MostVisitedURLList filtered_urls;
212   {
213     base::AutoLock lock(lock_);
214     if (!loaded_) {
215       // A request came in before we finished loading. Store the callback and
216       // we'll run it on current thread when we finish loading.
217       pending_callbacks_.push_back(
218           base::Bind(&RunOrPostGetMostVisitedURLsCallback,
219                      base::MessageLoopProxy::current(),
220                      include_forced_urls,
221                      callback));
222       return;
223     }
224     if (include_forced_urls) {
225       filtered_urls = thread_safe_cache_->top_sites();
226     } else {
227       filtered_urls.assign(thread_safe_cache_->top_sites().begin() +
228                               thread_safe_cache_->GetNumForcedURLs(),
229                            thread_safe_cache_->top_sites().end());
230     }
231   }
232   callback.Run(filtered_urls);
233 }
234
235 bool TopSitesImpl::GetPageThumbnail(
236     const GURL& url,
237     bool prefix_match,
238     scoped_refptr<base::RefCountedMemory>* bytes) {
239   // WARNING: this may be invoked on any thread.
240   // Perform exact match.
241   {
242     base::AutoLock lock(lock_);
243     if (thread_safe_cache_->GetPageThumbnail(url, bytes))
244       return true;
245   }
246
247   // Resource bundle is thread safe.
248   for (size_t i = 0; i < arraysize(kPrepopulatedPages); i++) {
249     if (url == prepopulated_page_urls_[i]) {
250       *bytes = ResourceBundle::GetSharedInstance().
251           LoadDataResourceBytesForScale(
252               kPrepopulatedPages[i].thumbnail_id,
253               ui::SCALE_FACTOR_100P);
254       return true;
255     }
256   }
257
258   if (prefix_match) {
259     // If http or https, search with |url| first, then try the other one.
260     std::vector<GURL> url_list;
261     url_list.push_back(url);
262     if (url.SchemeIsHTTPOrHTTPS())
263       url_list.push_back(ToggleHTTPAndHTTPS(url));
264
265     for (std::vector<GURL>::iterator it = url_list.begin();
266          it != url_list.end(); ++it) {
267       base::AutoLock lock(lock_);
268
269       GURL canonical_url;
270       // Test whether any stored URL is a prefix of |url|.
271       canonical_url = thread_safe_cache_->GetGeneralizedCanonicalURL(*it);
272       if (!canonical_url.is_empty() &&
273           thread_safe_cache_->GetPageThumbnail(canonical_url, bytes)) {
274         return true;
275       }
276     }
277   }
278
279   return false;
280 }
281
282 bool TopSitesImpl::GetPageThumbnailScore(const GURL& url,
283                                          ThumbnailScore* score) {
284   // WARNING: this may be invoked on any thread.
285   base::AutoLock lock(lock_);
286   return thread_safe_cache_->GetPageThumbnailScore(url, score);
287 }
288
289 bool TopSitesImpl::GetTemporaryPageThumbnailScore(const GURL& url,
290                                                   ThumbnailScore* score) {
291   for (TempImages::iterator i = temp_images_.begin(); i != temp_images_.end();
292        ++i) {
293     if (i->first == url) {
294       *score = i->second.thumbnail_score;
295       return true;
296     }
297   }
298   return false;
299 }
300
301
302 // Returns the index of |url| in |urls|, or -1 if not found.
303 static int IndexOf(const MostVisitedURLList& urls, const GURL& url) {
304   for (size_t i = 0; i < urls.size(); i++) {
305     if (urls[i].url == url)
306       return i;
307   }
308   return -1;
309 }
310
311 void TopSitesImpl::SyncWithHistory() {
312   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
313   if (loaded_ && temp_images_.size()) {
314     // If we have temporary thumbnails it means there isn't much data, and most
315     // likely the user is first running Chrome. During this time we throttle
316     // updating from history by 30 seconds. If the user creates a new tab page
317     // during this window of time we force updating from history so that the new
318     // tab page isn't so far out of date.
319     timer_.Stop();
320     StartQueryForMostVisited();
321   }
322 }
323
324 bool TopSitesImpl::HasBlacklistedItems() const {
325   const base::DictionaryValue* blacklist =
326       profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist);
327   return blacklist && !blacklist->empty();
328 }
329
330 void TopSitesImpl::AddBlacklistedURL(const GURL& url) {
331   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
332
333   base::Value* dummy = base::Value::CreateNullValue();
334   {
335     DictionaryPrefUpdate update(profile_->GetPrefs(),
336                                 prefs::kNtpMostVisitedURLsBlacklist);
337     base::DictionaryValue* blacklist = update.Get();
338     blacklist->SetWithoutPathExpansion(GetURLHash(url), dummy);
339   }
340
341   ResetThreadSafeCache();
342   NotifyTopSitesChanged();
343 }
344
345 void TopSitesImpl::RemoveBlacklistedURL(const GURL& url) {
346   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
347   {
348     DictionaryPrefUpdate update(profile_->GetPrefs(),
349                                 prefs::kNtpMostVisitedURLsBlacklist);
350     base::DictionaryValue* blacklist = update.Get();
351     blacklist->RemoveWithoutPathExpansion(GetURLHash(url), NULL);
352   }
353   ResetThreadSafeCache();
354   NotifyTopSitesChanged();
355 }
356
357 bool TopSitesImpl::IsBlacklisted(const GURL& url) {
358   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
359   const base::DictionaryValue* blacklist =
360       profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist);
361   return blacklist && blacklist->HasKey(GetURLHash(url));
362 }
363
364 void TopSitesImpl::ClearBlacklistedURLs() {
365   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
366   {
367     DictionaryPrefUpdate update(profile_->GetPrefs(),
368                                 prefs::kNtpMostVisitedURLsBlacklist);
369     base::DictionaryValue* blacklist = update.Get();
370     blacklist->Clear();
371   }
372   ResetThreadSafeCache();
373   NotifyTopSitesChanged();
374 }
375
376 void TopSitesImpl::Shutdown() {
377   profile_ = NULL;
378   // Cancel all requests so that the service doesn't callback to us after we've
379   // invoked Shutdown (this could happen if we have a pending request and
380   // Shutdown is invoked).
381   history_consumer_.CancelAllRequests();
382   backend_->Shutdown();
383 }
384
385 // static
386 void TopSitesImpl::DiffMostVisited(const MostVisitedURLList& old_list,
387                                    const MostVisitedURLList& new_list,
388                                    TopSitesDelta* delta) {
389
390   // Add all the old URLs for quick lookup. This maps URLs to the corresponding
391   // index in the input.
392   std::map<GURL, size_t> all_old_urls;
393   size_t num_old_forced = 0;
394   for (size_t i = 0; i < old_list.size(); i++) {
395     if (!old_list[i].last_forced_time.is_null())
396       num_old_forced++;
397     DCHECK(old_list[i].last_forced_time.is_null() || i < num_old_forced)
398         << "Forced URLs must all appear before non-forced URLs.";
399     all_old_urls[old_list[i].url] = i;
400   }
401
402   // Check all the URLs in the new set to see which ones are new or just moved.
403   // When we find a match in the old set, we'll reset its index to our special
404   // marker. This allows us to quickly identify the deleted ones in a later
405   // pass.
406   const size_t kAlreadyFoundMarker = static_cast<size_t>(-1);
407   int rank = -1;  // Forced URLs have a rank of -1.
408   for (size_t i = 0; i < new_list.size(); i++) {
409     // Increase the rank if we're going through forced URLs. This works because
410     // non-forced URLs all come after forced URLs.
411     if (new_list[i].last_forced_time.is_null())
412       rank++;
413     DCHECK(new_list[i].last_forced_time.is_null() == (rank != -1))
414         << "Forced URLs must all appear before non-forced URLs.";
415     std::map<GURL, size_t>::iterator found = all_old_urls.find(new_list[i].url);
416     if (found == all_old_urls.end()) {
417       MostVisitedURLWithRank added;
418       added.url = new_list[i];
419       added.rank = rank;
420       delta->added.push_back(added);
421     } else {
422       DCHECK(found->second != kAlreadyFoundMarker)
423           << "Same URL appears twice in the new list.";
424       int old_rank = found->second >= num_old_forced ?
425           found->second - num_old_forced : -1;
426       if (old_rank != rank ||
427           old_list[found->second].last_forced_time !=
428               new_list[i].last_forced_time) {
429         MostVisitedURLWithRank moved;
430         moved.url = new_list[i];
431         moved.rank = rank;
432         delta->moved.push_back(moved);
433       }
434       found->second = kAlreadyFoundMarker;
435     }
436   }
437
438   // Any member without the special marker in the all_old_urls list means that
439   // there wasn't a "new" URL that mapped to it, so it was deleted.
440   for (std::map<GURL, size_t>::const_iterator i = all_old_urls.begin();
441        i != all_old_urls.end(); ++i) {
442     if (i->second != kAlreadyFoundMarker)
443       delta->deleted.push_back(old_list[i->second]);
444   }
445 }
446
447 CancelableRequestProvider::Handle TopSitesImpl::StartQueryForMostVisited() {
448   DCHECK(loaded_);
449   if (!profile_)
450     return 0;
451
452   HistoryService* hs = HistoryServiceFactory::GetForProfile(
453       profile_, Profile::EXPLICIT_ACCESS);
454   // |hs| may be null during unit tests.
455   if (hs) {
456     return hs->QueryMostVisitedURLs(
457         num_results_to_request_from_history(),
458         kDaysOfHistory,
459         &history_consumer_,
460         base::Bind(&TopSitesImpl::OnTopSitesAvailableFromHistory,
461                    base::Unretained(this)));
462   }
463   return 0;
464 }
465
466 bool TopSitesImpl::IsKnownURL(const GURL& url) {
467   return loaded_ && cache_->IsKnownURL(url);
468 }
469
470 const std::string& TopSitesImpl::GetCanonicalURLString(const GURL& url) const {
471   return cache_->GetCanonicalURL(url).spec();
472 }
473
474 bool TopSitesImpl::IsNonForcedFull() {
475   return loaded_ && cache_->GetNumNonForcedURLs() >= kNonForcedTopSitesNumber;
476 }
477
478 bool TopSitesImpl::IsForcedFull() {
479   return loaded_ && cache_->GetNumForcedURLs() >= kForcedTopSitesNumber;
480 }
481
482 TopSitesImpl::~TopSitesImpl() {
483 }
484
485 bool TopSitesImpl::SetPageThumbnailNoDB(
486     const GURL& url,
487     const base::RefCountedMemory* thumbnail_data,
488     const ThumbnailScore& score) {
489   // This should only be invoked when we know about the url.
490   DCHECK(cache_->IsKnownURL(url));
491
492   const MostVisitedURL& most_visited =
493       cache_->top_sites()[cache_->GetURLIndex(url)];
494   Images* image = cache_->GetImage(url);
495
496   // When comparing the thumbnail scores, we need to take into account the
497   // redirect hops, which are not generated when the thumbnail is because the
498   // redirects weren't known. We fill that in here since we know the redirects.
499   ThumbnailScore new_score_with_redirects(score);
500   new_score_with_redirects.redirect_hops_from_dest =
501       GetRedirectDistanceForURL(most_visited, url);
502
503   if (!ShouldReplaceThumbnailWith(image->thumbnail_score,
504                                   new_score_with_redirects) &&
505       image->thumbnail.get())
506     return false;  // The one we already have is better.
507
508   image->thumbnail = const_cast<base::RefCountedMemory*>(thumbnail_data);
509   image->thumbnail_score = new_score_with_redirects;
510
511   ResetThreadSafeImageCache();
512   return true;
513 }
514
515 bool TopSitesImpl::SetPageThumbnailEncoded(
516     const GURL& url,
517     const base::RefCountedMemory* thumbnail,
518     const ThumbnailScore& score) {
519   if (!SetPageThumbnailNoDB(url, thumbnail, score))
520     return false;
521
522   // Update the database.
523   if (!cache_->IsKnownURL(url))
524     return false;
525
526   size_t index = cache_->GetURLIndex(url);
527   int url_rank = index - cache_->GetNumForcedURLs();
528   const MostVisitedURL& most_visited = cache_->top_sites()[index];
529   backend_->SetPageThumbnail(most_visited,
530                              url_rank < 0 ? -1 : url_rank,
531                              *(cache_->GetImage(most_visited.url)));
532   return true;
533 }
534
535 // static
536 bool TopSitesImpl::EncodeBitmap(const gfx::Image& bitmap,
537                                 scoped_refptr<base::RefCountedBytes>* bytes) {
538   if (bitmap.IsEmpty())
539     return false;
540   *bytes = new base::RefCountedBytes();
541   std::vector<unsigned char> data;
542   if (!gfx::JPEG1xEncodedDataFromImage(bitmap, kTopSitesImageQuality, &data))
543     return false;
544
545   // As we're going to cache this data, make sure the vector is only as big as
546   // it needs to be, as JPEGCodec::Encode() over-allocates data.capacity().
547   // (In a C++0x future, we can just call shrink_to_fit() in Encode())
548   (*bytes)->data() = data;
549   return true;
550 }
551
552 void TopSitesImpl::RemoveTemporaryThumbnailByURL(const GURL& url) {
553   for (TempImages::iterator i = temp_images_.begin(); i != temp_images_.end();
554        ++i) {
555     if (i->first == url) {
556       temp_images_.erase(i);
557       return;
558     }
559   }
560 }
561
562 void TopSitesImpl::AddTemporaryThumbnail(
563     const GURL& url,
564     const base::RefCountedMemory* thumbnail,
565     const ThumbnailScore& score) {
566   if (temp_images_.size() == kMaxTempTopImages)
567     temp_images_.erase(temp_images_.begin());
568
569   TempImage image;
570   image.first = url;
571   image.second.thumbnail = const_cast<base::RefCountedMemory*>(thumbnail);
572   image.second.thumbnail_score = score;
573   temp_images_.push_back(image);
574 }
575
576 void TopSitesImpl::TimerFired() {
577   StartQueryForMostVisited();
578 }
579
580 // static
581 int TopSitesImpl::GetRedirectDistanceForURL(const MostVisitedURL& most_visited,
582                                             const GURL& url) {
583   for (size_t i = 0; i < most_visited.redirects.size(); i++) {
584     if (most_visited.redirects[i] == url)
585       return static_cast<int>(most_visited.redirects.size() - i - 1);
586   }
587   NOTREACHED() << "URL should always be found.";
588   return 0;
589 }
590
591 MostVisitedURLList TopSitesImpl::GetPrepopulatePages() {
592   MostVisitedURLList urls;
593   urls.resize(arraysize(kPrepopulatedPages));
594   for (size_t i = 0; i < urls.size(); ++i) {
595     MostVisitedURL& url = urls[i];
596     url.url = GURL(prepopulated_page_urls_[i]);
597     url.redirects.push_back(url.url);
598     url.title = l10n_util::GetStringUTF16(kPrepopulatedPages[i].title_id);
599   }
600   return urls;
601 }
602
603 bool TopSitesImpl::loaded() const {
604   return loaded_;
605 }
606
607 bool TopSitesImpl::AddForcedURL(const GURL& url, const base::Time& time) {
608   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
609   size_t num_forced = cache_->GetNumForcedURLs();
610   MostVisitedURLList new_list(cache_->top_sites());
611   MostVisitedURL new_url;
612
613   if (cache_->IsKnownURL(url)) {
614     size_t index = cache_->GetURLIndex(url);
615     // Do nothing if we currently have that URL as non-forced.
616     if (new_list[index].last_forced_time.is_null())
617       return false;
618
619     // Update the |last_forced_time| of the already existing URL. Delete it and
620     // reinsert it at the right location.
621     new_url = new_list[index];
622     new_list.erase(new_list.begin() + index);
623     num_forced--;
624   } else {
625     new_url.url = url;
626     new_url.redirects.push_back(url);
627   }
628   new_url.last_forced_time = time;
629   // Add forced URLs and sort. Added to the end of the list of forced URLs
630   // since this is almost always where it needs to go, unless the user's local
631   // clock is fiddled with.
632   MostVisitedURLList::iterator mid = new_list.begin() + num_forced;
633   new_list.insert(mid, new_url);
634   mid = new_list.begin() + num_forced;  // Mid was invalidated.
635   std::inplace_merge(new_list.begin(), mid, mid + 1, ForcedURLComparator);
636   SetTopSites(new_list);
637   return true;
638 }
639
640 bool TopSitesImpl::AddPrepopulatedPages(MostVisitedURLList* urls,
641                                         size_t num_forced_urls) {
642   bool added = false;
643   MostVisitedURLList prepopulate_urls = GetPrepopulatePages();
644   for (size_t i = 0; i < prepopulate_urls.size(); ++i) {
645     if (urls->size() - num_forced_urls < kNonForcedTopSitesNumber &&
646         IndexOf(*urls, prepopulate_urls[i].url) == -1) {
647       urls->push_back(prepopulate_urls[i]);
648       added = true;
649     }
650   }
651   return added;
652 }
653
654 size_t TopSitesImpl::MergeCachedForcedURLs(MostVisitedURLList* new_list) {
655   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
656   // Add all the new URLs for quick lookup. Take that opportunity to count the
657   // number of forced URLs in |new_list|.
658   std::set<GURL> all_new_urls;
659   size_t num_forced = 0;
660   for (size_t i = 0; i < new_list->size(); ++i) {
661     for (size_t j = 0; j < (*new_list)[i].redirects.size(); j++) {
662       all_new_urls.insert((*new_list)[i].redirects[j]);
663     }
664     if (!(*new_list)[i].last_forced_time.is_null())
665       ++num_forced;
666   }
667
668   // Keep the forced URLs from |cache_| that are not found in |new_list|.
669   MostVisitedURLList filtered_forced_urls;
670   for (size_t i = 0; i < cache_->GetNumForcedURLs(); ++i) {
671     if (all_new_urls.find(cache_->top_sites()[i].url) == all_new_urls.end())
672       filtered_forced_urls.push_back(cache_->top_sites()[i]);
673   }
674   num_forced += filtered_forced_urls.size();
675
676   // Prepend forced URLs and sort in order of ascending |last_forced_time|.
677   new_list->insert(new_list->begin(), filtered_forced_urls.begin(),
678                    filtered_forced_urls.end());
679   std::inplace_merge(
680       new_list->begin(), new_list->begin() + filtered_forced_urls.size(),
681       new_list->begin() + num_forced, ForcedURLComparator);
682
683   // Drop older forced URLs if the list overflows. Since forced URLs are always
684   // sort in increasing order of |last_forced_time|, drop the first ones.
685   if (num_forced > kForcedTopSitesNumber) {
686     new_list->erase(new_list->begin(),
687                     new_list->begin() + (num_forced - kForcedTopSitesNumber));
688     num_forced = kForcedTopSitesNumber;
689   }
690
691   return num_forced;
692 }
693
694 void TopSitesImpl::ApplyBlacklist(const MostVisitedURLList& urls,
695                                   MostVisitedURLList* out) {
696   // Log the number of times ApplyBlacklist is called so we can compute the
697   // average number of blacklisted items per user.
698   const base::DictionaryValue* blacklist =
699       profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist);
700   UMA_HISTOGRAM_BOOLEAN("TopSites.NumberOfApplyBlacklist", true);
701   UMA_HISTOGRAM_COUNTS_100("TopSites.NumberOfBlacklistedItems",
702       (blacklist ? blacklist->size() : 0));
703   size_t num_non_forced_urls = 0;
704   size_t num_forced_urls = 0;
705   for (size_t i = 0; i < urls.size(); ++i) {
706     if (!IsBlacklisted(urls[i].url)) {
707       if (urls[i].last_forced_time.is_null()) {
708         // Non-forced URL.
709         if (num_non_forced_urls >= kNonForcedTopSitesNumber)
710           continue;
711         num_non_forced_urls++;
712       } else {
713         // Forced URL.
714         if (num_forced_urls >= kForcedTopSitesNumber)
715           continue;
716         num_forced_urls++;
717       }
718       out->push_back(urls[i]);
719     }
720   }
721 }
722
723 std::string TopSitesImpl::GetURLHash(const GURL& url) {
724   // We don't use canonical URLs here to be able to blacklist only one of
725   // the two 'duplicate' sites, e.g. 'gmail.com' and 'mail.google.com'.
726   return base::MD5String(url.spec());
727 }
728
729 base::TimeDelta TopSitesImpl::GetUpdateDelay() {
730   if (cache_->top_sites().size() <= arraysize(kPrepopulatedPages))
731     return base::TimeDelta::FromSeconds(30);
732
733   int64 range = kMaxUpdateIntervalMinutes - kMinUpdateIntervalMinutes;
734   int64 minutes = kMaxUpdateIntervalMinutes -
735       last_num_urls_changed_ * range / cache_->top_sites().size();
736   return base::TimeDelta::FromMinutes(minutes);
737 }
738
739 void TopSitesImpl::Observe(int type,
740                            const content::NotificationSource& source,
741                            const content::NotificationDetails& details) {
742   if (!loaded_)
743     return;
744
745   if (type == chrome::NOTIFICATION_HISTORY_URLS_DELETED) {
746     content::Details<history::URLsDeletedDetails> deleted_details(details);
747     if (deleted_details->all_history) {
748       SetTopSites(MostVisitedURLList());
749       backend_->ResetDatabase();
750     } else {
751       std::set<size_t> indices_to_delete;  // Indices into top_sites_.
752       for (URLRows::const_iterator i = deleted_details->rows.begin();
753            i != deleted_details->rows.end(); ++i) {
754         if (cache_->IsKnownURL(i->url()))
755           indices_to_delete.insert(cache_->GetURLIndex(i->url()));
756       }
757
758       if (indices_to_delete.empty())
759         return;
760
761       MostVisitedURLList new_top_sites(cache_->top_sites());
762       for (std::set<size_t>::reverse_iterator i = indices_to_delete.rbegin();
763            i != indices_to_delete.rend(); i++) {
764         new_top_sites.erase(new_top_sites.begin() + *i);
765       }
766       SetTopSites(new_top_sites);
767     }
768     StartQueryForMostVisited();
769   } else if (type == content::NOTIFICATION_NAV_ENTRY_COMMITTED) {
770     NavigationController* controller =
771         content::Source<NavigationController>(source).ptr();
772     Profile* profile = Profile::FromBrowserContext(
773         controller->GetWebContents()->GetBrowserContext());
774     if (profile == profile_ && !IsNonForcedFull()) {
775       content::LoadCommittedDetails* load_details =
776           content::Details<content::LoadCommittedDetails>(details).ptr();
777       if (!load_details)
778         return;
779       const GURL& url = load_details->entry->GetURL();
780       if (!cache_->IsKnownURL(url) && HistoryService::CanAddURL(url)) {
781         // To avoid slamming history we throttle requests when the url updates.
782         // To do otherwise negatively impacts perf tests.
783         RestartQueryForTopSitesTimer(GetUpdateDelay());
784       }
785     }
786   }
787 }
788
789 void TopSitesImpl::SetTopSites(const MostVisitedURLList& new_top_sites) {
790   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
791
792   MostVisitedURLList top_sites(new_top_sites);
793   size_t num_forced_urls = MergeCachedForcedURLs(&top_sites);
794   AddPrepopulatedPages(&top_sites, num_forced_urls);
795
796   TopSitesDelta delta;
797   DiffMostVisited(cache_->top_sites(), top_sites, &delta);
798   if (!delta.deleted.empty() || !delta.added.empty() || !delta.moved.empty()) {
799     backend_->UpdateTopSites(delta);
800   }
801
802   last_num_urls_changed_ = delta.added.size() + delta.moved.size();
803
804   // We always do the following steps (setting top sites in cache, and resetting
805   // thread safe cache ...) as this method is invoked during startup at which
806   // point the caches haven't been updated yet.
807   cache_->SetTopSites(top_sites);
808
809   // See if we have any tmp thumbnails for the new sites.
810   if (!temp_images_.empty()) {
811     for (size_t i = 0; i < top_sites.size(); ++i) {
812       const MostVisitedURL& mv = top_sites[i];
813       GURL canonical_url = cache_->GetCanonicalURL(mv.url);
814       // At the time we get the thumbnail redirects aren't known, so we have to
815       // iterate through all the images.
816       for (TempImages::iterator it = temp_images_.begin();
817            it != temp_images_.end(); ++it) {
818         if (canonical_url == cache_->GetCanonicalURL(it->first)) {
819           SetPageThumbnailEncoded(
820               mv.url, it->second.thumbnail.get(), it->second.thumbnail_score);
821           temp_images_.erase(it);
822           break;
823         }
824       }
825     }
826   }
827
828   if (top_sites.size() - num_forced_urls >= kNonForcedTopSitesNumber)
829     temp_images_.clear();
830
831   ResetThreadSafeCache();
832   ResetThreadSafeImageCache();
833   NotifyTopSitesChanged();
834
835   // Restart the timer that queries history for top sites. This is done to
836   // ensure we stay in sync with history.
837   RestartQueryForTopSitesTimer(GetUpdateDelay());
838 }
839
840 int TopSitesImpl::num_results_to_request_from_history() const {
841   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
842
843   const base::DictionaryValue* blacklist =
844       profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist);
845   return kNonForcedTopSitesNumber + (blacklist ? blacklist->size() : 0);
846 }
847
848 void TopSitesImpl::MoveStateToLoaded() {
849   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
850
851   MostVisitedURLList filtered_urls_all;
852   MostVisitedURLList filtered_urls_nonforced;
853   PendingCallbacks pending_callbacks;
854   {
855     base::AutoLock lock(lock_);
856
857     if (loaded_)
858       return;  // Don't do anything if we're already loaded.
859     loaded_ = true;
860
861     // Now that we're loaded we can service the queued up callbacks. Copy them
862     // here and service them outside the lock.
863     if (!pending_callbacks_.empty()) {
864       // We always filter out forced URLs because callers of GetMostVisitedURLs
865       // are not interested in them.
866       filtered_urls_all = thread_safe_cache_->top_sites();
867       filtered_urls_nonforced.assign(thread_safe_cache_->top_sites().begin() +
868                                        thread_safe_cache_->GetNumForcedURLs(),
869                                      thread_safe_cache_->top_sites().end());
870       pending_callbacks.swap(pending_callbacks_);
871     }
872   }
873
874   for (size_t i = 0; i < pending_callbacks.size(); i++)
875     pending_callbacks[i].Run(filtered_urls_all, filtered_urls_nonforced);
876
877   content::NotificationService::current()->Notify(
878       chrome::NOTIFICATION_TOP_SITES_LOADED,
879       content::Source<Profile>(profile_),
880       content::Details<TopSites>(this));
881 }
882
883 void TopSitesImpl::ResetThreadSafeCache() {
884   base::AutoLock lock(lock_);
885   MostVisitedURLList cached;
886   ApplyBlacklist(cache_->top_sites(), &cached);
887   thread_safe_cache_->SetTopSites(cached);
888 }
889
890 void TopSitesImpl::ResetThreadSafeImageCache() {
891   base::AutoLock lock(lock_);
892   thread_safe_cache_->SetThumbnails(cache_->images());
893 }
894
895 void TopSitesImpl::NotifyTopSitesChanged() {
896   content::NotificationService::current()->Notify(
897       chrome::NOTIFICATION_TOP_SITES_CHANGED,
898       content::Source<TopSites>(this),
899       content::NotificationService::NoDetails());
900 }
901
902 void TopSitesImpl::RestartQueryForTopSitesTimer(base::TimeDelta delta) {
903   if (timer_.IsRunning() && ((timer_start_time_ + timer_.GetCurrentDelay()) <
904                              (base::TimeTicks::Now() + delta))) {
905     return;
906   }
907
908   timer_start_time_ = base::TimeTicks::Now();
909   timer_.Stop();
910   timer_.Start(FROM_HERE, delta, this, &TopSitesImpl::TimerFired);
911 }
912
913 void TopSitesImpl::OnGotMostVisitedThumbnails(
914     const scoped_refptr<MostVisitedThumbnails>& thumbnails) {
915   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
916
917   // Set the top sites directly in the cache so that SetTopSites diffs
918   // correctly.
919   cache_->SetTopSites(thumbnails->most_visited);
920   SetTopSites(thumbnails->most_visited);
921   cache_->SetThumbnails(thumbnails->url_to_images_map);
922
923   ResetThreadSafeImageCache();
924
925   MoveStateToLoaded();
926
927   // Start a timer that refreshes top sites from history.
928   RestartQueryForTopSitesTimer(
929       base::TimeDelta::FromSeconds(kUpdateIntervalSecs));
930 }
931
932 void TopSitesImpl::OnTopSitesAvailableFromHistory(
933     CancelableRequestProvider::Handle handle,
934     MostVisitedURLList pages) {
935   SetTopSites(pages);
936 }
937
938 }  // namespace history