f76d0b6c554df1a4675e800b9eb4efc9f0953de1
[platform/framework/web/crosswalk.git] / src / chrome / browser / history / top_sites_impl.cc
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/history/top_sites_impl.h"
6
7 #include <algorithm>
8 #include <set>
9
10 #include "base/bind.h"
11 #include "base/bind_helpers.h"
12 #include "base/logging.h"
13 #include "base/md5.h"
14 #include "base/memory/ref_counted_memory.h"
15 #include "base/message_loop/message_loop_proxy.h"
16 #include "base/metrics/histogram.h"
17 #include "base/prefs/pref_service.h"
18 #include "base/prefs/scoped_user_pref_update.h"
19 #include "base/strings/string_util.h"
20 #include "base/strings/utf_string_conversions.h"
21 #include "base/task_runner.h"
22 #include "base/values.h"
23 #include "chrome/browser/chrome_notification_types.h"
24 #include "chrome/browser/history/history_backend.h"
25 #include "chrome/browser/history/history_db_task.h"
26 #include "chrome/browser/history/history_notifications.h"
27 #include "chrome/browser/history/history_service_factory.h"
28 #include "chrome/browser/history/page_usage_data.h"
29 #include "chrome/browser/history/top_sites_cache.h"
30 #include "chrome/browser/history/url_utils.h"
31 #include "chrome/browser/profiles/profile.h"
32 #include "chrome/browser/ui/webui/ntp/most_visited_handler.h"
33 #include "chrome/browser/ui/webui/ntp/new_tab_ui.h"
34 #include "chrome/common/pref_names.h"
35 #include "chrome/common/thumbnail_score.h"
36 #include "content/public/browser/browser_thread.h"
37 #include "content/public/browser/navigation_controller.h"
38 #include "content/public/browser/navigation_details.h"
39 #include "content/public/browser/navigation_entry.h"
40 #include "content/public/browser/notification_service.h"
41 #include "content/public/browser/web_contents.h"
42 #include "grit/locale_settings.h"
43 #include "ui/base/l10n/l10n_util.h"
44 #include "ui/base/layout.h"
45 #include "ui/base/resource/resource_bundle.h"
46 #include "ui/gfx/image/image_util.h"
47
48 using base::DictionaryValue;
49 using content::BrowserThread;
50 using content::NavigationController;
51
52 namespace history {
53
54 namespace {
55
56 void RunOrPostGetMostVisitedURLsCallback(
57     base::TaskRunner* task_runner,
58     bool include_forced_urls,
59     const TopSitesImpl::GetMostVisitedURLsCallback& callback,
60     const MostVisitedURLList& all_urls,
61     const MostVisitedURLList& nonforced_urls) {
62   const MostVisitedURLList* urls =
63       include_forced_urls ? &all_urls : &nonforced_urls;
64   if (task_runner->RunsTasksOnCurrentThread())
65     callback.Run(*urls);
66   else
67     task_runner->PostTask(FROM_HERE, base::Bind(callback, *urls));
68 }
69
70 // Compares two MostVisitedURL having a non-null |last_forced_time|.
71 bool ForcedURLComparator(const MostVisitedURL& first,
72                          const MostVisitedURL& second) {
73   DCHECK(!first.last_forced_time.is_null() &&
74          !second.last_forced_time.is_null());
75   return first.last_forced_time < second.last_forced_time;
76 }
77
78 }  // namespace
79
80 // How many non-forced top sites to store in the cache.
81 static const size_t kNonForcedTopSitesNumber = 20;
82
83 // How many forced top sites to store in the cache.
84 static const size_t kForcedTopSitesNumber = 20;
85
86 // Max number of temporary images we'll cache. See comment above
87 // temp_images_ for details.
88 static const size_t kMaxTempTopImages = 8;
89
90 static const int kDaysOfHistory = 90;
91 // Time from startup to first HistoryService query.
92 static const int64 kUpdateIntervalSecs = 15;
93 // Intervals between requests to HistoryService.
94 static const int64 kMinUpdateIntervalMinutes = 1;
95 static const int64 kMaxUpdateIntervalMinutes = 60;
96
97 // Use 100 quality (highest quality) because we're very sensitive to
98 // artifacts for these small sized, highly detailed images.
99 static const int kTopSitesImageQuality = 100;
100
101 TopSitesImpl::TopSitesImpl(Profile* profile)
102     : backend_(NULL),
103       cache_(new TopSitesCache()),
104       thread_safe_cache_(new TopSitesCache()),
105       profile_(profile),
106       last_num_urls_changed_(0),
107       loaded_(false) {
108   if (!profile_)
109     return;
110
111   if (content::NotificationService::current()) {
112     registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URLS_DELETED,
113                    content::Source<Profile>(profile_));
114     // Listen for any nav commits. We'll ignore those not related to this
115     // profile when we get the notification.
116     registrar_.Add(this, content::NOTIFICATION_NAV_ENTRY_COMMITTED,
117                    content::NotificationService::AllSources());
118   }
119   for (size_t i = 0; i < arraysize(kPrepopulatedPages); i++) {
120     int url_id = kPrepopulatedPages[i].url_id;
121     prepopulated_page_urls_.push_back(
122         GURL(l10n_util::GetStringUTF8(url_id)));
123   }
124 }
125
126 void TopSitesImpl::Init(const base::FilePath& db_name) {
127   // Create the backend here, rather than in the constructor, so that
128   // unit tests that do not need the backend can run without a problem.
129   backend_ = new TopSitesBackend;
130   backend_->Init(db_name);
131   backend_->GetMostVisitedThumbnails(
132       base::Bind(&TopSitesImpl::OnGotMostVisitedThumbnails,
133                  base::Unretained(this)),
134       &cancelable_task_tracker_);
135 }
136
137 bool TopSitesImpl::SetPageThumbnail(const GURL& url,
138                                     const gfx::Image& thumbnail,
139                                     const ThumbnailScore& score) {
140   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
141
142   if (!loaded_) {
143     // TODO(sky): I need to cache these and apply them after the load
144     // completes.
145     return false;
146   }
147
148   bool add_temp_thumbnail = false;
149   if (!IsKnownURL(url)) {
150     if (!IsNonForcedFull()) {
151       add_temp_thumbnail = true;
152     } else {
153       return false;  // This URL is not known to us.
154     }
155   }
156
157   if (!HistoryService::CanAddURL(url))
158     return false;  // It's not a real webpage.
159
160   scoped_refptr<base::RefCountedBytes> thumbnail_data;
161   if (!EncodeBitmap(thumbnail, &thumbnail_data))
162     return false;
163
164   if (add_temp_thumbnail) {
165     // Always remove the existing entry and then add it back. That way if we end
166     // up with too many temp thumbnails we'll prune the oldest first.
167     RemoveTemporaryThumbnailByURL(url);
168     AddTemporaryThumbnail(url, thumbnail_data.get(), score);
169     return true;
170   }
171
172   return SetPageThumbnailEncoded(url, thumbnail_data.get(), score);
173 }
174
175 bool TopSitesImpl::SetPageThumbnailToJPEGBytes(
176     const GURL& url,
177     const base::RefCountedMemory* memory,
178     const ThumbnailScore& score) {
179   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
180
181   if (!loaded_) {
182     // TODO(sky): I need to cache these and apply them after the load
183     // completes.
184     return false;
185   }
186
187   bool add_temp_thumbnail = false;
188   if (!IsKnownURL(url)) {
189     if (!IsNonForcedFull()) {
190       add_temp_thumbnail = true;
191     } else {
192       return false;  // This URL is not known to us.
193     }
194   }
195
196   if (!HistoryService::CanAddURL(url))
197     return false;  // It's not a real webpage.
198
199   if (add_temp_thumbnail) {
200     // Always remove the existing entry and then add it back. That way if we end
201     // up with too many temp thumbnails we'll prune the oldest first.
202     RemoveTemporaryThumbnailByURL(url);
203     AddTemporaryThumbnail(url, memory, score);
204     return true;
205   }
206
207   return SetPageThumbnailEncoded(url, memory, score);
208 }
209
210 // WARNING: this function may be invoked on any thread.
211 void TopSitesImpl::GetMostVisitedURLs(
212     const GetMostVisitedURLsCallback& callback,
213     bool include_forced_urls) {
214   MostVisitedURLList filtered_urls;
215   {
216     base::AutoLock lock(lock_);
217     if (!loaded_) {
218       // A request came in before we finished loading. Store the callback and
219       // we'll run it on current thread when we finish loading.
220       pending_callbacks_.push_back(
221           base::Bind(&RunOrPostGetMostVisitedURLsCallback,
222                      base::MessageLoopProxy::current(),
223                      include_forced_urls,
224                      callback));
225       return;
226     }
227     if (include_forced_urls) {
228       filtered_urls = thread_safe_cache_->top_sites();
229     } else {
230       filtered_urls.assign(thread_safe_cache_->top_sites().begin() +
231                               thread_safe_cache_->GetNumForcedURLs(),
232                            thread_safe_cache_->top_sites().end());
233     }
234   }
235   callback.Run(filtered_urls);
236 }
237
238 bool TopSitesImpl::GetPageThumbnail(
239     const GURL& url,
240     bool prefix_match,
241     scoped_refptr<base::RefCountedMemory>* bytes) {
242   // WARNING: this may be invoked on any thread.
243   // Perform exact match.
244   {
245     base::AutoLock lock(lock_);
246     if (thread_safe_cache_->GetPageThumbnail(url, bytes))
247       return true;
248   }
249
250   // Resource bundle is thread safe.
251   for (size_t i = 0; i < arraysize(kPrepopulatedPages); i++) {
252     if (url == prepopulated_page_urls_[i]) {
253       *bytes = ResourceBundle::GetSharedInstance().
254           LoadDataResourceBytesForScale(
255               kPrepopulatedPages[i].thumbnail_id,
256               ui::SCALE_FACTOR_100P);
257       return true;
258     }
259   }
260
261   if (prefix_match) {
262     // If http or https, search with |url| first, then try the other one.
263     std::vector<GURL> url_list;
264     url_list.push_back(url);
265     if (url.SchemeIsHTTPOrHTTPS())
266       url_list.push_back(ToggleHTTPAndHTTPS(url));
267
268     for (std::vector<GURL>::iterator it = url_list.begin();
269          it != url_list.end(); ++it) {
270       base::AutoLock lock(lock_);
271
272       GURL canonical_url;
273       // Test whether any stored URL is a prefix of |url|.
274       canonical_url = thread_safe_cache_->GetGeneralizedCanonicalURL(*it);
275       if (!canonical_url.is_empty() &&
276           thread_safe_cache_->GetPageThumbnail(canonical_url, bytes)) {
277         return true;
278       }
279     }
280   }
281
282   return false;
283 }
284
285 bool TopSitesImpl::GetPageThumbnailScore(const GURL& url,
286                                          ThumbnailScore* score) {
287   // WARNING: this may be invoked on any thread.
288   base::AutoLock lock(lock_);
289   return thread_safe_cache_->GetPageThumbnailScore(url, score);
290 }
291
292 bool TopSitesImpl::GetTemporaryPageThumbnailScore(const GURL& url,
293                                                   ThumbnailScore* score) {
294   for (TempImages::iterator i = temp_images_.begin(); i != temp_images_.end();
295        ++i) {
296     if (i->first == url) {
297       *score = i->second.thumbnail_score;
298       return true;
299     }
300   }
301   return false;
302 }
303
304
305 // Returns the index of |url| in |urls|, or -1 if not found.
306 static int IndexOf(const MostVisitedURLList& urls, const GURL& url) {
307   for (size_t i = 0; i < urls.size(); i++) {
308     if (urls[i].url == url)
309       return i;
310   }
311   return -1;
312 }
313
314 void TopSitesImpl::SyncWithHistory() {
315   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
316   if (loaded_ && temp_images_.size()) {
317     // If we have temporary thumbnails it means there isn't much data, and most
318     // likely the user is first running Chrome. During this time we throttle
319     // updating from history by 30 seconds. If the user creates a new tab page
320     // during this window of time we force updating from history so that the new
321     // tab page isn't so far out of date.
322     timer_.Stop();
323     StartQueryForMostVisited();
324   }
325 }
326
327 bool TopSitesImpl::HasBlacklistedItems() const {
328   const base::DictionaryValue* blacklist =
329       profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist);
330   return blacklist && !blacklist->empty();
331 }
332
333 void TopSitesImpl::AddBlacklistedURL(const GURL& url) {
334   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
335
336   base::Value* dummy = base::Value::CreateNullValue();
337   {
338     DictionaryPrefUpdate update(profile_->GetPrefs(),
339                                 prefs::kNtpMostVisitedURLsBlacklist);
340     base::DictionaryValue* blacklist = update.Get();
341     blacklist->SetWithoutPathExpansion(GetURLHash(url), dummy);
342   }
343
344   ResetThreadSafeCache();
345   NotifyTopSitesChanged();
346 }
347
348 void TopSitesImpl::RemoveBlacklistedURL(const GURL& url) {
349   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
350   {
351     DictionaryPrefUpdate update(profile_->GetPrefs(),
352                                 prefs::kNtpMostVisitedURLsBlacklist);
353     base::DictionaryValue* blacklist = update.Get();
354     blacklist->RemoveWithoutPathExpansion(GetURLHash(url), NULL);
355   }
356   ResetThreadSafeCache();
357   NotifyTopSitesChanged();
358 }
359
360 bool TopSitesImpl::IsBlacklisted(const GURL& url) {
361   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
362   const base::DictionaryValue* blacklist =
363       profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist);
364   return blacklist && blacklist->HasKey(GetURLHash(url));
365 }
366
367 void TopSitesImpl::ClearBlacklistedURLs() {
368   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
369   {
370     DictionaryPrefUpdate update(profile_->GetPrefs(),
371                                 prefs::kNtpMostVisitedURLsBlacklist);
372     base::DictionaryValue* blacklist = update.Get();
373     blacklist->Clear();
374   }
375   ResetThreadSafeCache();
376   NotifyTopSitesChanged();
377 }
378
379 void TopSitesImpl::Shutdown() {
380   profile_ = NULL;
381   // Cancel all requests so that the service doesn't callback to us after we've
382   // invoked Shutdown (this could happen if we have a pending request and
383   // Shutdown is invoked).
384   history_consumer_.CancelAllRequests();
385   backend_->Shutdown();
386 }
387
388 // static
389 void TopSitesImpl::DiffMostVisited(const MostVisitedURLList& old_list,
390                                    const MostVisitedURLList& new_list,
391                                    TopSitesDelta* delta) {
392
393   // Add all the old URLs for quick lookup. This maps URLs to the corresponding
394   // index in the input.
395   std::map<GURL, size_t> all_old_urls;
396   size_t num_old_forced = 0;
397   for (size_t i = 0; i < old_list.size(); i++) {
398     if (!old_list[i].last_forced_time.is_null())
399       num_old_forced++;
400     DCHECK(old_list[i].last_forced_time.is_null() || i < num_old_forced)
401         << "Forced URLs must all appear before non-forced URLs.";
402     all_old_urls[old_list[i].url] = i;
403   }
404
405   // Check all the URLs in the new set to see which ones are new or just moved.
406   // When we find a match in the old set, we'll reset its index to our special
407   // marker. This allows us to quickly identify the deleted ones in a later
408   // pass.
409   const size_t kAlreadyFoundMarker = static_cast<size_t>(-1);
410   int rank = -1;  // Forced URLs have a rank of -1.
411   for (size_t i = 0; i < new_list.size(); i++) {
412     // Increase the rank if we're going through forced URLs. This works because
413     // non-forced URLs all come after forced URLs.
414     if (new_list[i].last_forced_time.is_null())
415       rank++;
416     DCHECK(new_list[i].last_forced_time.is_null() == (rank != -1))
417         << "Forced URLs must all appear before non-forced URLs.";
418     std::map<GURL, size_t>::iterator found = all_old_urls.find(new_list[i].url);
419     if (found == all_old_urls.end()) {
420       MostVisitedURLWithRank added;
421       added.url = new_list[i];
422       added.rank = rank;
423       delta->added.push_back(added);
424     } else {
425       DCHECK(found->second != kAlreadyFoundMarker)
426           << "Same URL appears twice in the new list.";
427       int old_rank = found->second >= num_old_forced ?
428           found->second - num_old_forced : -1;
429       if (old_rank != rank ||
430           old_list[found->second].last_forced_time !=
431               new_list[i].last_forced_time) {
432         MostVisitedURLWithRank moved;
433         moved.url = new_list[i];
434         moved.rank = rank;
435         delta->moved.push_back(moved);
436       }
437       found->second = kAlreadyFoundMarker;
438     }
439   }
440
441   // Any member without the special marker in the all_old_urls list means that
442   // there wasn't a "new" URL that mapped to it, so it was deleted.
443   for (std::map<GURL, size_t>::const_iterator i = all_old_urls.begin();
444        i != all_old_urls.end(); ++i) {
445     if (i->second != kAlreadyFoundMarker)
446       delta->deleted.push_back(old_list[i->second]);
447   }
448 }
449
450 CancelableRequestProvider::Handle TopSitesImpl::StartQueryForMostVisited() {
451   DCHECK(loaded_);
452   if (!profile_)
453     return 0;
454
455   HistoryService* hs = HistoryServiceFactory::GetForProfile(
456       profile_, Profile::EXPLICIT_ACCESS);
457   // |hs| may be null during unit tests.
458   if (hs) {
459     return hs->QueryMostVisitedURLs(
460         num_results_to_request_from_history(),
461         kDaysOfHistory,
462         &history_consumer_,
463         base::Bind(&TopSitesImpl::OnTopSitesAvailableFromHistory,
464                    base::Unretained(this)));
465   }
466   return 0;
467 }
468
469 bool TopSitesImpl::IsKnownURL(const GURL& url) {
470   return loaded_ && cache_->IsKnownURL(url);
471 }
472
473 const std::string& TopSitesImpl::GetCanonicalURLString(const GURL& url) const {
474   return cache_->GetCanonicalURL(url).spec();
475 }
476
477 bool TopSitesImpl::IsNonForcedFull() {
478   return loaded_ && cache_->GetNumNonForcedURLs() >= kNonForcedTopSitesNumber;
479 }
480
481 bool TopSitesImpl::IsForcedFull() {
482   return loaded_ && cache_->GetNumForcedURLs() >= kForcedTopSitesNumber;
483 }
484
485 TopSitesImpl::~TopSitesImpl() {
486 }
487
488 bool TopSitesImpl::SetPageThumbnailNoDB(
489     const GURL& url,
490     const base::RefCountedMemory* thumbnail_data,
491     const ThumbnailScore& score) {
492   // This should only be invoked when we know about the url.
493   DCHECK(cache_->IsKnownURL(url));
494
495   const MostVisitedURL& most_visited =
496       cache_->top_sites()[cache_->GetURLIndex(url)];
497   Images* image = cache_->GetImage(url);
498
499   // When comparing the thumbnail scores, we need to take into account the
500   // redirect hops, which are not generated when the thumbnail is because the
501   // redirects weren't known. We fill that in here since we know the redirects.
502   ThumbnailScore new_score_with_redirects(score);
503   new_score_with_redirects.redirect_hops_from_dest =
504       GetRedirectDistanceForURL(most_visited, url);
505
506   if (!ShouldReplaceThumbnailWith(image->thumbnail_score,
507                                   new_score_with_redirects) &&
508       image->thumbnail.get())
509     return false;  // The one we already have is better.
510
511   image->thumbnail = const_cast<base::RefCountedMemory*>(thumbnail_data);
512   image->thumbnail_score = new_score_with_redirects;
513
514   ResetThreadSafeImageCache();
515   return true;
516 }
517
518 bool TopSitesImpl::SetPageThumbnailEncoded(
519     const GURL& url,
520     const base::RefCountedMemory* thumbnail,
521     const ThumbnailScore& score) {
522   if (!SetPageThumbnailNoDB(url, thumbnail, score))
523     return false;
524
525   // Update the database.
526   if (!cache_->IsKnownURL(url))
527     return false;
528
529   size_t index = cache_->GetURLIndex(url);
530   int url_rank = index - cache_->GetNumForcedURLs();
531   const MostVisitedURL& most_visited = cache_->top_sites()[index];
532   backend_->SetPageThumbnail(most_visited,
533                              url_rank < 0 ? -1 : url_rank,
534                              *(cache_->GetImage(most_visited.url)));
535   return true;
536 }
537
538 // static
539 bool TopSitesImpl::EncodeBitmap(const gfx::Image& bitmap,
540                                 scoped_refptr<base::RefCountedBytes>* bytes) {
541   if (bitmap.IsEmpty())
542     return false;
543   *bytes = new base::RefCountedBytes();
544   std::vector<unsigned char> data;
545   if (!gfx::JPEG1xEncodedDataFromImage(bitmap, kTopSitesImageQuality, &data))
546     return false;
547
548   // As we're going to cache this data, make sure the vector is only as big as
549   // it needs to be, as JPEGCodec::Encode() over-allocates data.capacity().
550   // (In a C++0x future, we can just call shrink_to_fit() in Encode())
551   (*bytes)->data() = data;
552   return true;
553 }
554
555 void TopSitesImpl::RemoveTemporaryThumbnailByURL(const GURL& url) {
556   for (TempImages::iterator i = temp_images_.begin(); i != temp_images_.end();
557        ++i) {
558     if (i->first == url) {
559       temp_images_.erase(i);
560       return;
561     }
562   }
563 }
564
565 void TopSitesImpl::AddTemporaryThumbnail(
566     const GURL& url,
567     const base::RefCountedMemory* thumbnail,
568     const ThumbnailScore& score) {
569   if (temp_images_.size() == kMaxTempTopImages)
570     temp_images_.erase(temp_images_.begin());
571
572   TempImage image;
573   image.first = url;
574   image.second.thumbnail = const_cast<base::RefCountedMemory*>(thumbnail);
575   image.second.thumbnail_score = score;
576   temp_images_.push_back(image);
577 }
578
579 void TopSitesImpl::TimerFired() {
580   StartQueryForMostVisited();
581 }
582
583 // static
584 int TopSitesImpl::GetRedirectDistanceForURL(const MostVisitedURL& most_visited,
585                                             const GURL& url) {
586   for (size_t i = 0; i < most_visited.redirects.size(); i++) {
587     if (most_visited.redirects[i] == url)
588       return static_cast<int>(most_visited.redirects.size() - i - 1);
589   }
590   NOTREACHED() << "URL should always be found.";
591   return 0;
592 }
593
594 MostVisitedURLList TopSitesImpl::GetPrepopulatePages() {
595   MostVisitedURLList urls;
596   urls.resize(arraysize(kPrepopulatedPages));
597   for (size_t i = 0; i < urls.size(); ++i) {
598     MostVisitedURL& url = urls[i];
599     url.url = GURL(prepopulated_page_urls_[i]);
600     url.redirects.push_back(url.url);
601     url.title = l10n_util::GetStringUTF16(kPrepopulatedPages[i].title_id);
602   }
603   return urls;
604 }
605
606 bool TopSitesImpl::loaded() const {
607   return loaded_;
608 }
609
610 bool TopSitesImpl::AddForcedURL(const GURL& url, const base::Time& time) {
611   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
612   size_t num_forced = cache_->GetNumForcedURLs();
613   MostVisitedURLList new_list(cache_->top_sites());
614   MostVisitedURL new_url;
615
616   if (cache_->IsKnownURL(url)) {
617     size_t index = cache_->GetURLIndex(url);
618     // Do nothing if we currently have that URL as non-forced.
619     if (new_list[index].last_forced_time.is_null())
620       return false;
621
622     // Update the |last_forced_time| of the already existing URL. Delete it and
623     // reinsert it at the right location.
624     new_url = new_list[index];
625     new_list.erase(new_list.begin() + index);
626     num_forced--;
627   } else {
628     new_url.url = url;
629     new_url.redirects.push_back(url);
630   }
631   new_url.last_forced_time = time;
632   // Add forced URLs and sort. Added to the end of the list of forced URLs
633   // since this is almost always where it needs to go, unless the user's local
634   // clock is fiddled with.
635   MostVisitedURLList::iterator mid = new_list.begin() + num_forced;
636   new_list.insert(mid, new_url);
637   mid = new_list.begin() + num_forced;  // Mid was invalidated.
638   std::inplace_merge(new_list.begin(), mid, mid + 1, ForcedURLComparator);
639   SetTopSites(new_list);
640   return true;
641 }
642
643 bool TopSitesImpl::AddPrepopulatedPages(MostVisitedURLList* urls,
644                                         size_t num_forced_urls) {
645   bool added = false;
646   MostVisitedURLList prepopulate_urls = GetPrepopulatePages();
647   for (size_t i = 0; i < prepopulate_urls.size(); ++i) {
648     if (urls->size() - num_forced_urls < kNonForcedTopSitesNumber &&
649         IndexOf(*urls, prepopulate_urls[i].url) == -1) {
650       urls->push_back(prepopulate_urls[i]);
651       added = true;
652     }
653   }
654   return added;
655 }
656
657 size_t TopSitesImpl::MergeCachedForcedURLs(MostVisitedURLList* new_list) {
658   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
659   // Add all the new URLs for quick lookup. Take that opportunity to count the
660   // number of forced URLs in |new_list|.
661   std::set<GURL> all_new_urls;
662   size_t num_forced = 0;
663   for (size_t i = 0; i < new_list->size(); ++i) {
664     all_new_urls.insert((*new_list)[i].url);
665     if (!(*new_list)[i].last_forced_time.is_null())
666       ++num_forced;
667   }
668
669   // Keep the forced URLs from |cache_| that are not found in |new_list|.
670   MostVisitedURLList filtered_forced_urls;
671   for (size_t i = 0; i < cache_->GetNumForcedURLs(); ++i) {
672     if (all_new_urls.find(cache_->top_sites()[i].url) == all_new_urls.end())
673       filtered_forced_urls.push_back(cache_->top_sites()[i]);
674   }
675   num_forced += filtered_forced_urls.size();
676
677   // Prepend forced URLs and sort in order of ascending |last_forced_time|.
678   new_list->insert(new_list->begin(), filtered_forced_urls.begin(),
679                    filtered_forced_urls.end());
680   std::inplace_merge(
681       new_list->begin(), new_list->begin() + filtered_forced_urls.size(),
682       new_list->begin() + num_forced, ForcedURLComparator);
683
684   // Drop older forced URLs if the list overflows. Since forced URLs are always
685   // sort in increasing order of |last_forced_time|, drop the first ones.
686   if (num_forced > kForcedTopSitesNumber) {
687     new_list->erase(new_list->begin(),
688                     new_list->begin() + (num_forced - kForcedTopSitesNumber));
689     num_forced = kForcedTopSitesNumber;
690   }
691
692   return num_forced;
693 }
694
695 void TopSitesImpl::ApplyBlacklist(const MostVisitedURLList& urls,
696                                   MostVisitedURLList* out) {
697   // Log the number of times ApplyBlacklist is called so we can compute the
698   // average number of blacklisted items per user.
699   const base::DictionaryValue* blacklist =
700       profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist);
701   UMA_HISTOGRAM_BOOLEAN("TopSites.NumberOfApplyBlacklist", true);
702   UMA_HISTOGRAM_COUNTS_100("TopSites.NumberOfBlacklistedItems",
703       (blacklist ? blacklist->size() : 0));
704   size_t num_non_forced_urls = 0;
705   size_t num_forced_urls = 0;
706   for (size_t i = 0; i < urls.size(); ++i) {
707     if (!IsBlacklisted(urls[i].url)) {
708       if (urls[i].last_forced_time.is_null()) {
709         // Non-forced URL.
710         if (num_non_forced_urls >= kNonForcedTopSitesNumber)
711           continue;
712         num_non_forced_urls++;
713       } else {
714         // Forced URL.
715         if (num_forced_urls >= kForcedTopSitesNumber)
716           continue;
717         num_forced_urls++;
718       }
719       out->push_back(urls[i]);
720     }
721   }
722 }
723
724 std::string TopSitesImpl::GetURLHash(const GURL& url) {
725   // We don't use canonical URLs here to be able to blacklist only one of
726   // the two 'duplicate' sites, e.g. 'gmail.com' and 'mail.google.com'.
727   return base::MD5String(url.spec());
728 }
729
730 base::TimeDelta TopSitesImpl::GetUpdateDelay() {
731   if (cache_->top_sites().size() <= arraysize(kPrepopulatedPages))
732     return base::TimeDelta::FromSeconds(30);
733
734   int64 range = kMaxUpdateIntervalMinutes - kMinUpdateIntervalMinutes;
735   int64 minutes = kMaxUpdateIntervalMinutes -
736       last_num_urls_changed_ * range / cache_->top_sites().size();
737   return base::TimeDelta::FromMinutes(minutes);
738 }
739
740 void TopSitesImpl::Observe(int type,
741                            const content::NotificationSource& source,
742                            const content::NotificationDetails& details) {
743   if (!loaded_)
744     return;
745
746   if (type == chrome::NOTIFICATION_HISTORY_URLS_DELETED) {
747     content::Details<history::URLsDeletedDetails> deleted_details(details);
748     if (deleted_details->all_history) {
749       SetTopSites(MostVisitedURLList());
750       backend_->ResetDatabase();
751     } else {
752       std::set<size_t> indices_to_delete;  // Indices into top_sites_.
753       for (URLRows::const_iterator i = deleted_details->rows.begin();
754            i != deleted_details->rows.end(); ++i) {
755         if (cache_->IsKnownURL(i->url()))
756           indices_to_delete.insert(cache_->GetURLIndex(i->url()));
757       }
758
759       if (indices_to_delete.empty())
760         return;
761
762       MostVisitedURLList new_top_sites(cache_->top_sites());
763       for (std::set<size_t>::reverse_iterator i = indices_to_delete.rbegin();
764            i != indices_to_delete.rend(); i++) {
765         new_top_sites.erase(new_top_sites.begin() + *i);
766       }
767       SetTopSites(new_top_sites);
768     }
769     StartQueryForMostVisited();
770   } else if (type == content::NOTIFICATION_NAV_ENTRY_COMMITTED) {
771     NavigationController* controller =
772         content::Source<NavigationController>(source).ptr();
773     Profile* profile = Profile::FromBrowserContext(
774         controller->GetWebContents()->GetBrowserContext());
775     if (profile == profile_ && !IsNonForcedFull()) {
776       content::LoadCommittedDetails* load_details =
777           content::Details<content::LoadCommittedDetails>(details).ptr();
778       if (!load_details)
779         return;
780       const GURL& url = load_details->entry->GetURL();
781       if (!cache_->IsKnownURL(url) && HistoryService::CanAddURL(url)) {
782         // To avoid slamming history we throttle requests when the url updates.
783         // To do otherwise negatively impacts perf tests.
784         RestartQueryForTopSitesTimer(GetUpdateDelay());
785       }
786     }
787   }
788 }
789
790 void TopSitesImpl::SetTopSites(const MostVisitedURLList& new_top_sites) {
791   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
792
793   MostVisitedURLList top_sites(new_top_sites);
794   size_t num_forced_urls = MergeCachedForcedURLs(&top_sites);
795   AddPrepopulatedPages(&top_sites, num_forced_urls);
796
797   TopSitesDelta delta;
798   DiffMostVisited(cache_->top_sites(), top_sites, &delta);
799   if (!delta.deleted.empty() || !delta.added.empty() || !delta.moved.empty()) {
800     backend_->UpdateTopSites(delta);
801   }
802
803   last_num_urls_changed_ = delta.added.size() + delta.moved.size();
804
805   // We always do the following steps (setting top sites in cache, and resetting
806   // thread safe cache ...) as this method is invoked during startup at which
807   // point the caches haven't been updated yet.
808   cache_->SetTopSites(top_sites);
809
810   // See if we have any tmp thumbnails for the new sites.
811   if (!temp_images_.empty()) {
812     for (size_t i = 0; i < top_sites.size(); ++i) {
813       const MostVisitedURL& mv = top_sites[i];
814       GURL canonical_url = cache_->GetCanonicalURL(mv.url);
815       // At the time we get the thumbnail redirects aren't known, so we have to
816       // iterate through all the images.
817       for (TempImages::iterator it = temp_images_.begin();
818            it != temp_images_.end(); ++it) {
819         if (canonical_url == cache_->GetCanonicalURL(it->first)) {
820           SetPageThumbnailEncoded(
821               mv.url, it->second.thumbnail.get(), it->second.thumbnail_score);
822           temp_images_.erase(it);
823           break;
824         }
825       }
826     }
827   }
828
829   if (top_sites.size() - num_forced_urls >= kNonForcedTopSitesNumber)
830     temp_images_.clear();
831
832   ResetThreadSafeCache();
833   ResetThreadSafeImageCache();
834   NotifyTopSitesChanged();
835
836   // Restart the timer that queries history for top sites. This is done to
837   // ensure we stay in sync with history.
838   RestartQueryForTopSitesTimer(GetUpdateDelay());
839 }
840
841 int TopSitesImpl::num_results_to_request_from_history() const {
842   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
843
844   const base::DictionaryValue* blacklist =
845       profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist);
846   return kNonForcedTopSitesNumber + (blacklist ? blacklist->size() : 0);
847 }
848
849 void TopSitesImpl::MoveStateToLoaded() {
850   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
851
852   MostVisitedURLList filtered_urls_all;
853   MostVisitedURLList filtered_urls_nonforced;
854   PendingCallbacks pending_callbacks;
855   {
856     base::AutoLock lock(lock_);
857
858     if (loaded_)
859       return;  // Don't do anything if we're already loaded.
860     loaded_ = true;
861
862     // Now that we're loaded we can service the queued up callbacks. Copy them
863     // here and service them outside the lock.
864     if (!pending_callbacks_.empty()) {
865       // We always filter out forced URLs because callers of GetMostVisitedURLs
866       // are not interested in them.
867       filtered_urls_all = thread_safe_cache_->top_sites();
868       filtered_urls_nonforced.assign(thread_safe_cache_->top_sites().begin() +
869                                        thread_safe_cache_->GetNumForcedURLs(),
870                                      thread_safe_cache_->top_sites().end());
871       pending_callbacks.swap(pending_callbacks_);
872     }
873   }
874
875   for (size_t i = 0; i < pending_callbacks.size(); i++)
876     pending_callbacks[i].Run(filtered_urls_all, filtered_urls_nonforced);
877
878   content::NotificationService::current()->Notify(
879       chrome::NOTIFICATION_TOP_SITES_LOADED,
880       content::Source<Profile>(profile_),
881       content::Details<TopSites>(this));
882 }
883
884 void TopSitesImpl::ResetThreadSafeCache() {
885   base::AutoLock lock(lock_);
886   MostVisitedURLList cached;
887   ApplyBlacklist(cache_->top_sites(), &cached);
888   thread_safe_cache_->SetTopSites(cached);
889 }
890
891 void TopSitesImpl::ResetThreadSafeImageCache() {
892   base::AutoLock lock(lock_);
893   thread_safe_cache_->SetThumbnails(cache_->images());
894 }
895
896 void TopSitesImpl::NotifyTopSitesChanged() {
897   content::NotificationService::current()->Notify(
898       chrome::NOTIFICATION_TOP_SITES_CHANGED,
899       content::Source<TopSites>(this),
900       content::NotificationService::NoDetails());
901 }
902
903 void TopSitesImpl::RestartQueryForTopSitesTimer(base::TimeDelta delta) {
904   if (timer_.IsRunning() && ((timer_start_time_ + timer_.GetCurrentDelay()) <
905                              (base::TimeTicks::Now() + delta))) {
906     return;
907   }
908
909   timer_start_time_ = base::TimeTicks::Now();
910   timer_.Stop();
911   timer_.Start(FROM_HERE, delta, this, &TopSitesImpl::TimerFired);
912 }
913
914 void TopSitesImpl::OnGotMostVisitedThumbnails(
915     const scoped_refptr<MostVisitedThumbnails>& thumbnails) {
916   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
917
918   // Set the top sites directly in the cache so that SetTopSites diffs
919   // correctly.
920   cache_->SetTopSites(thumbnails->most_visited);
921   SetTopSites(thumbnails->most_visited);
922   cache_->SetThumbnails(thumbnails->url_to_images_map);
923
924   ResetThreadSafeImageCache();
925
926   MoveStateToLoaded();
927
928   // Start a timer that refreshes top sites from history.
929   RestartQueryForTopSitesTimer(
930       base::TimeDelta::FromSeconds(kUpdateIntervalSecs));
931 }
932
933 void TopSitesImpl::OnTopSitesAvailableFromHistory(
934     CancelableRequestProvider::Handle handle,
935     MostVisitedURLList pages) {
936   SetTopSites(pages);
937
938   // Used only in testing.
939   content::NotificationService::current()->Notify(
940       chrome::NOTIFICATION_TOP_SITES_UPDATED,
941       content::Source<TopSitesImpl>(this),
942       content::Details<CancelableRequestProvider::Handle>(&handle));
943 }
944
945 }  // namespace history