- add sources.
[platform/framework/web/crosswalk.git] / src / chrome / browser / predictors / autocomplete_action_predictor.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/predictors/autocomplete_action_predictor.h"
6
7 #include <math.h>
8
9 #include <vector>
10
11 #include "base/bind.h"
12 #include "base/guid.h"
13 #include "base/i18n/case_conversion.h"
14 #include "base/metrics/histogram.h"
15 #include "base/strings/string_util.h"
16 #include "base/strings/stringprintf.h"
17 #include "base/strings/utf_string_conversions.h"
18 #include "chrome/browser/autocomplete/autocomplete_match.h"
19 #include "chrome/browser/autocomplete/autocomplete_result.h"
20 #include "chrome/browser/chrome_notification_types.h"
21 #include "chrome/browser/history/history_notifications.h"
22 #include "chrome/browser/history/history_service.h"
23 #include "chrome/browser/history/history_service_factory.h"
24 #include "chrome/browser/history/in_memory_database.h"
25 #include "chrome/browser/omnibox/omnibox_log.h"
26 #include "chrome/browser/predictors/autocomplete_action_predictor_factory.h"
27 #include "chrome/browser/predictors/predictor_database.h"
28 #include "chrome/browser/predictors/predictor_database_factory.h"
29 #include "chrome/browser/prerender/prerender_field_trial.h"
30 #include "chrome/browser/prerender/prerender_handle.h"
31 #include "chrome/browser/prerender/prerender_manager.h"
32 #include "chrome/browser/prerender/prerender_manager_factory.h"
33 #include "chrome/browser/profiles/profile.h"
34 #include "content/public/browser/browser_thread.h"
35 #include "content/public/browser/notification_details.h"
36 #include "content/public/browser/notification_service.h"
37 #include "content/public/browser/notification_source.h"
38
39 namespace {
40
41 const float kConfidenceCutoff[] = {
42   0.8f,
43   0.5f
44 };
45
46 COMPILE_ASSERT(arraysize(kConfidenceCutoff) ==
47                predictors::AutocompleteActionPredictor::LAST_PREDICT_ACTION,
48                ConfidenceCutoff_count_mismatch);
49
50 const size_t kMinimumUserTextLength = 1;
51 const int kMinimumNumberOfHits = 3;
52
53 enum DatabaseAction {
54   DATABASE_ACTION_ADD,
55   DATABASE_ACTION_UPDATE,
56   DATABASE_ACTION_DELETE_SOME,
57   DATABASE_ACTION_DELETE_ALL,
58   DATABASE_ACTION_COUNT
59 };
60
61 }  // namespace
62
63 namespace predictors {
64
65 const int AutocompleteActionPredictor::kMaximumDaysToKeepEntry = 14;
66
67 AutocompleteActionPredictor::AutocompleteActionPredictor(Profile* profile)
68     : profile_(profile),
69       main_profile_predictor_(NULL),
70       incognito_predictor_(NULL),
71       initialized_(false) {
72   if (profile_->IsOffTheRecord()) {
73     main_profile_predictor_ = AutocompleteActionPredictorFactory::GetForProfile(
74         profile_->GetOriginalProfile());
75     DCHECK(main_profile_predictor_);
76     main_profile_predictor_->incognito_predictor_ = this;
77     if (main_profile_predictor_->initialized_)
78       CopyFromMainProfile();
79   } else {
80     // Request the in-memory database from the history to force it to load so
81     // it's available as soon as possible.
82     HistoryService* history_service = HistoryServiceFactory::GetForProfile(
83         profile_, Profile::EXPLICIT_ACCESS);
84     if (history_service)
85       history_service->InMemoryDatabase();
86
87     table_ =
88         PredictorDatabaseFactory::GetForProfile(profile_)->autocomplete_table();
89
90     // Observe all main frame loads so we can wait for the first to complete
91     // before accessing DB and IO threads to build the local cache.
92     notification_registrar_.Add(this,
93                                 content::NOTIFICATION_LOAD_COMPLETED_MAIN_FRAME,
94                                 content::NotificationService::AllSources());
95   }
96 }
97
98 AutocompleteActionPredictor::~AutocompleteActionPredictor() {
99   if (main_profile_predictor_)
100     main_profile_predictor_->incognito_predictor_ = NULL;
101   else if (incognito_predictor_)
102     incognito_predictor_->main_profile_predictor_ = NULL;
103   if (prerender_handle_.get())
104     prerender_handle_->OnCancel();
105 }
106
107 void AutocompleteActionPredictor::RegisterTransitionalMatches(
108     const string16& user_text,
109     const AutocompleteResult& result) {
110   if (user_text.length() < kMinimumUserTextLength)
111     return;
112   const string16 lower_user_text(base::i18n::ToLower(user_text));
113
114   // Merge this in to an existing match if we already saw |user_text|
115   std::vector<TransitionalMatch>::iterator match_it =
116       std::find(transitional_matches_.begin(), transitional_matches_.end(),
117                 lower_user_text);
118
119   if (match_it == transitional_matches_.end()) {
120     TransitionalMatch transitional_match;
121     transitional_match.user_text = lower_user_text;
122     match_it = transitional_matches_.insert(transitional_matches_.end(),
123                                             transitional_match);
124   }
125
126   for (AutocompleteResult::const_iterator i(result.begin()); i != result.end();
127        ++i) {
128     if (std::find(match_it->urls.begin(), match_it->urls.end(),
129                   i->destination_url) == match_it->urls.end()) {
130       match_it->urls.push_back(i->destination_url);
131     }
132   }
133 }
134
135 void AutocompleteActionPredictor::ClearTransitionalMatches() {
136   transitional_matches_.clear();
137 }
138
139 void AutocompleteActionPredictor::StartPrerendering(
140     const GURL& url,
141     const content::SessionStorageNamespaceMap& session_storage_namespace_map,
142     const gfx::Size& size) {
143   // Only cancel the old prerender after starting the new one, so if the URLs
144   // are the same, the underlying prerender will be reused.
145   scoped_ptr<prerender::PrerenderHandle> old_prerender_handle(
146       prerender_handle_.release());
147   if (prerender::PrerenderManager* prerender_manager =
148           prerender::PrerenderManagerFactory::GetForProfile(profile_)) {
149     content::SessionStorageNamespace* session_storage_namespace = NULL;
150     content::SessionStorageNamespaceMap::const_iterator it =
151         session_storage_namespace_map.find(std::string());
152     if (it != session_storage_namespace_map.end())
153       session_storage_namespace = it->second.get();
154     prerender_handle_.reset(prerender_manager->AddPrerenderFromOmnibox(
155         url, session_storage_namespace, size));
156   }
157   if (old_prerender_handle)
158     old_prerender_handle->OnCancel();
159 }
160
161 // Given a match, return a recommended action.
162 AutocompleteActionPredictor::Action
163     AutocompleteActionPredictor::RecommendAction(
164         const string16& user_text,
165         const AutocompleteMatch& match) const {
166   bool is_in_db = false;
167   const double confidence = CalculateConfidence(user_text, match, &is_in_db);
168   DCHECK(confidence >= 0.0 && confidence <= 1.0);
169
170   UMA_HISTOGRAM_BOOLEAN("AutocompleteActionPredictor.MatchIsInDb", is_in_db);
171
172   if (is_in_db) {
173     // Multiple enties with the same URL are fine as the confidence may be
174     // different.
175     tracked_urls_.push_back(std::make_pair(match.destination_url, confidence));
176     UMA_HISTOGRAM_COUNTS_100("AutocompleteActionPredictor.Confidence",
177                              confidence * 100);
178   }
179
180   // Map the confidence to an action.
181   Action action = ACTION_NONE;
182   for (int i = 0; i < LAST_PREDICT_ACTION; ++i) {
183     if (confidence >= kConfidenceCutoff[i]) {
184       action = static_cast<Action>(i);
185       break;
186     }
187   }
188
189   // Downgrade prerender to preconnect if this is a search match or if omnibox
190   // prerendering is disabled. There are cases when Instant will not handle a
191   // search suggestion and in those cases it would be good to prerender the
192   // search results, however search engines have not been set up to correctly
193   // handle being prerendered and until they are we should avoid it.
194   // http://crbug.com/117495
195   if (action == ACTION_PRERENDER &&
196       (AutocompleteMatch::IsSearchType(match.type) ||
197        !prerender::IsOmniboxEnabled(profile_))) {
198     action = ACTION_PRECONNECT;
199   }
200
201   return action;
202 }
203
204 // Return true if the suggestion type warrants a TCP/IP preconnection.
205 // i.e., it is now quite likely that the user will select the related domain.
206 // static
207 bool AutocompleteActionPredictor::IsPreconnectable(
208     const AutocompleteMatch& match) {
209   return AutocompleteMatch::IsSearchType(match.type);
210 }
211
212 void AutocompleteActionPredictor::Observe(
213     int type,
214     const content::NotificationSource& source,
215     const content::NotificationDetails& details) {
216   switch (type) {
217     case content::NOTIFICATION_LOAD_COMPLETED_MAIN_FRAME:
218       CreateLocalCachesFromDatabase();
219       notification_registrar_.Remove(
220           this,
221           content::NOTIFICATION_LOAD_COMPLETED_MAIN_FRAME,
222           content::NotificationService::AllSources());
223       break;
224
225     case chrome::NOTIFICATION_HISTORY_URLS_DELETED: {
226       DCHECK(initialized_);
227       const content::Details<const history::URLsDeletedDetails>
228           urls_deleted_details =
229               content::Details<const history::URLsDeletedDetails>(details);
230       if (urls_deleted_details->all_history)
231         DeleteAllRows();
232       else
233         DeleteRowsWithURLs(urls_deleted_details->rows);
234       break;
235     }
236
237     // This notification does not catch all instances of the user navigating
238     // from the Omnibox, but it does catch the cases where the dropdown is open
239     // and those are the events we're most interested in.
240     case chrome::NOTIFICATION_OMNIBOX_OPENED_URL: {
241       DCHECK(initialized_);
242
243       // TODO(dominich): This doesn't need to be synchronous. Investigate
244       // posting it as a task to be run later.
245       OnOmniboxOpenedUrl(*content::Details<OmniboxLog>(details).ptr());
246       break;
247     }
248
249     case chrome::NOTIFICATION_HISTORY_LOADED: {
250       TryDeleteOldEntries(content::Details<HistoryService>(details).ptr());
251
252       notification_registrar_.Remove(this,
253                                      chrome::NOTIFICATION_HISTORY_LOADED,
254                                      content::Source<Profile>(profile_));
255       break;
256     }
257
258     default:
259       NOTREACHED() << "Unexpected notification observed.";
260       break;
261   }
262 }
263
264 void AutocompleteActionPredictor::CreateLocalCachesFromDatabase() {
265   // Create local caches using the database as loaded. We will garbage collect
266   // rows from the caches and the database once the history service is
267   // available.
268   std::vector<AutocompleteActionPredictorTable::Row>* rows =
269       new std::vector<AutocompleteActionPredictorTable::Row>();
270   content::BrowserThread::PostTaskAndReply(content::BrowserThread::DB,
271       FROM_HERE,
272       base::Bind(&AutocompleteActionPredictorTable::GetAllRows, table_, rows),
273       base::Bind(&AutocompleteActionPredictor::CreateCaches, AsWeakPtr(),
274                  base::Owned(rows)));
275 }
276
277 void AutocompleteActionPredictor::DeleteAllRows() {
278   if (!initialized_)
279     return;
280
281   db_cache_.clear();
282   db_id_cache_.clear();
283
284   if (table_.get()) {
285     content::BrowserThread::PostTask(content::BrowserThread::DB, FROM_HERE,
286         base::Bind(&AutocompleteActionPredictorTable::DeleteAllRows,
287                    table_));
288   }
289
290   UMA_HISTOGRAM_ENUMERATION("AutocompleteActionPredictor.DatabaseAction",
291                             DATABASE_ACTION_DELETE_ALL, DATABASE_ACTION_COUNT);
292 }
293
294 void AutocompleteActionPredictor::DeleteRowsWithURLs(
295     const history::URLRows& rows) {
296   if (!initialized_)
297     return;
298
299   std::vector<AutocompleteActionPredictorTable::Row::Id> id_list;
300
301   for (DBCacheMap::iterator it = db_cache_.begin(); it != db_cache_.end();) {
302     if (std::find_if(rows.begin(), rows.end(),
303         history::URLRow::URLRowHasURL(it->first.url)) != rows.end()) {
304       const DBIdCacheMap::iterator id_it = db_id_cache_.find(it->first);
305       DCHECK(id_it != db_id_cache_.end());
306       id_list.push_back(id_it->second);
307       db_id_cache_.erase(id_it);
308       db_cache_.erase(it++);
309     } else {
310       ++it;
311     }
312   }
313
314   if (table_.get()) {
315     content::BrowserThread::PostTask(content::BrowserThread::DB, FROM_HERE,
316         base::Bind(&AutocompleteActionPredictorTable::DeleteRows, table_,
317                    id_list));
318   }
319
320   UMA_HISTOGRAM_ENUMERATION("AutocompleteActionPredictor.DatabaseAction",
321                             DATABASE_ACTION_DELETE_SOME, DATABASE_ACTION_COUNT);
322 }
323
324 void AutocompleteActionPredictor::OnOmniboxOpenedUrl(const OmniboxLog& log) {
325   if (log.text.length() < kMinimumUserTextLength)
326     return;
327
328   const AutocompleteMatch& match = log.result.match_at(log.selected_index);
329
330   UMA_HISTOGRAM_BOOLEAN(
331       base::StringPrintf("Prerender.OmniboxNavigationsCouldPrerender%s",
332                          prerender::PrerenderManager::GetModeString()).c_str(),
333       prerender::IsOmniboxEnabled(profile_));
334
335   const GURL& opened_url = match.destination_url;
336   const string16 lower_user_text(base::i18n::ToLower(log.text));
337
338   // Traverse transitional matches for those that have a user_text that is a
339   // prefix of |lower_user_text|.
340   std::vector<AutocompleteActionPredictorTable::Row> rows_to_add;
341   std::vector<AutocompleteActionPredictorTable::Row> rows_to_update;
342
343   for (std::vector<TransitionalMatch>::const_iterator it =
344         transitional_matches_.begin(); it != transitional_matches_.end();
345         ++it) {
346     if (!StartsWith(lower_user_text, it->user_text, true))
347       continue;
348
349     // Add entries to the database for those matches.
350     for (std::vector<GURL>::const_iterator url_it = it->urls.begin();
351           url_it != it->urls.end(); ++url_it) {
352       DCHECK(it->user_text.length() >= kMinimumUserTextLength);
353       const DBCacheKey key = { it->user_text, *url_it };
354       const bool is_hit = (*url_it == opened_url);
355
356       AutocompleteActionPredictorTable::Row row;
357       row.user_text = key.user_text;
358       row.url = key.url;
359
360       DBCacheMap::iterator it = db_cache_.find(key);
361       if (it == db_cache_.end()) {
362         row.id = base::GenerateGUID();
363         row.number_of_hits = is_hit ? 1 : 0;
364         row.number_of_misses = is_hit ? 0 : 1;
365
366         rows_to_add.push_back(row);
367       } else {
368         DCHECK(db_id_cache_.find(key) != db_id_cache_.end());
369         row.id = db_id_cache_.find(key)->second;
370         row.number_of_hits = it->second.number_of_hits + (is_hit ? 1 : 0);
371         row.number_of_misses = it->second.number_of_misses + (is_hit ? 0 : 1);
372
373         rows_to_update.push_back(row);
374       }
375     }
376   }
377   if (rows_to_add.size() > 0 || rows_to_update.size() > 0)
378     AddAndUpdateRows(rows_to_add, rows_to_update);
379
380   ClearTransitionalMatches();
381
382   // Check against tracked urls and log accuracy for the confidence we
383   // predicted.
384   for (std::vector<std::pair<GURL, double> >::const_iterator it =
385        tracked_urls_.begin(); it != tracked_urls_.end();
386        ++it) {
387     if (opened_url == it->first) {
388       UMA_HISTOGRAM_COUNTS_100("AutocompleteActionPredictor.AccurateCount",
389                                it->second * 100);
390     }
391   }
392   tracked_urls_.clear();
393 }
394
395 void AutocompleteActionPredictor::AddAndUpdateRows(
396     const AutocompleteActionPredictorTable::Rows& rows_to_add,
397     const AutocompleteActionPredictorTable::Rows& rows_to_update) {
398   if (!initialized_)
399     return;
400
401   for (AutocompleteActionPredictorTable::Rows::const_iterator it =
402        rows_to_add.begin(); it != rows_to_add.end(); ++it) {
403     const DBCacheKey key = { it->user_text, it->url };
404     DBCacheValue value = { it->number_of_hits, it->number_of_misses };
405
406     DCHECK(db_cache_.find(key) == db_cache_.end());
407
408     db_cache_[key] = value;
409     db_id_cache_[key] = it->id;
410     UMA_HISTOGRAM_ENUMERATION("AutocompleteActionPredictor.DatabaseAction",
411                               DATABASE_ACTION_ADD, DATABASE_ACTION_COUNT);
412   }
413   for (AutocompleteActionPredictorTable::Rows::const_iterator it =
414        rows_to_update.begin(); it != rows_to_update.end(); ++it) {
415     const DBCacheKey key = { it->user_text, it->url };
416
417     DBCacheMap::iterator db_it = db_cache_.find(key);
418     DCHECK(db_it != db_cache_.end());
419     DCHECK(db_id_cache_.find(key) != db_id_cache_.end());
420
421     db_it->second.number_of_hits = it->number_of_hits;
422     db_it->second.number_of_misses = it->number_of_misses;
423     UMA_HISTOGRAM_ENUMERATION("AutocompleteActionPredictor.DatabaseAction",
424                               DATABASE_ACTION_UPDATE, DATABASE_ACTION_COUNT);
425   }
426
427   if (table_.get()) {
428     content::BrowserThread::PostTask(content::BrowserThread::DB, FROM_HERE,
429         base::Bind(&AutocompleteActionPredictorTable::AddAndUpdateRows,
430                    table_, rows_to_add, rows_to_update));
431   }
432 }
433
434 void AutocompleteActionPredictor::CreateCaches(
435     std::vector<AutocompleteActionPredictorTable::Row>* rows) {
436   CHECK(content::BrowserThread::CurrentlyOn(content::BrowserThread::UI));
437   DCHECK(!profile_->IsOffTheRecord());
438   DCHECK(!initialized_);
439   DCHECK(db_cache_.empty());
440   DCHECK(db_id_cache_.empty());
441
442   for (std::vector<AutocompleteActionPredictorTable::Row>::const_iterator it =
443        rows->begin(); it != rows->end(); ++it) {
444     const DBCacheKey key = { it->user_text, it->url };
445     const DBCacheValue value = { it->number_of_hits, it->number_of_misses };
446     db_cache_[key] = value;
447     db_id_cache_[key] = it->id;
448   }
449
450   // If the history service is ready, delete any old or invalid entries.
451   HistoryService* history_service =
452       HistoryServiceFactory::GetForProfile(profile_, Profile::EXPLICIT_ACCESS);
453   if (!TryDeleteOldEntries(history_service)) {
454     // Wait for the notification that the history service is ready and the URL
455     // DB is loaded.
456     notification_registrar_.Add(this, chrome::NOTIFICATION_HISTORY_LOADED,
457                                 content::Source<Profile>(profile_));
458   }
459 }
460
461 bool AutocompleteActionPredictor::TryDeleteOldEntries(HistoryService* service) {
462   CHECK(content::BrowserThread::CurrentlyOn(content::BrowserThread::UI));
463   DCHECK(!profile_->IsOffTheRecord());
464   DCHECK(!initialized_);
465
466   if (!service)
467     return false;
468
469   history::URLDatabase* url_db = service->InMemoryDatabase();
470   if (!url_db)
471     return false;
472
473   DeleteOldEntries(url_db);
474   return true;
475 }
476
477 void AutocompleteActionPredictor::DeleteOldEntries(
478     history::URLDatabase* url_db) {
479   CHECK(content::BrowserThread::CurrentlyOn(content::BrowserThread::UI));
480   DCHECK(!profile_->IsOffTheRecord());
481   DCHECK(!initialized_);
482   DCHECK(table_.get());
483
484   std::vector<AutocompleteActionPredictorTable::Row::Id> ids_to_delete;
485   DeleteOldIdsFromCaches(url_db, &ids_to_delete);
486
487   content::BrowserThread::PostTask(content::BrowserThread::DB, FROM_HERE,
488       base::Bind(&AutocompleteActionPredictorTable::DeleteRows, table_,
489                  ids_to_delete));
490
491   FinishInitialization();
492   if (incognito_predictor_)
493     incognito_predictor_->CopyFromMainProfile();
494 }
495
496 void AutocompleteActionPredictor::DeleteOldIdsFromCaches(
497     history::URLDatabase* url_db,
498     std::vector<AutocompleteActionPredictorTable::Row::Id>* id_list) {
499   CHECK(content::BrowserThread::CurrentlyOn(content::BrowserThread::UI));
500   DCHECK(!profile_->IsOffTheRecord());
501   DCHECK(!initialized_);
502   DCHECK(url_db);
503   DCHECK(id_list);
504
505   id_list->clear();
506   for (DBCacheMap::iterator it = db_cache_.begin(); it != db_cache_.end();) {
507     history::URLRow url_row;
508
509     if ((url_db->GetRowForURL(it->first.url, &url_row) == 0) ||
510         ((base::Time::Now() - url_row.last_visit()).InDays() >
511          kMaximumDaysToKeepEntry)) {
512       const DBIdCacheMap::iterator id_it = db_id_cache_.find(it->first);
513       DCHECK(id_it != db_id_cache_.end());
514       id_list->push_back(id_it->second);
515       db_id_cache_.erase(id_it);
516       db_cache_.erase(it++);
517     } else {
518       ++it;
519     }
520   }
521 }
522
523 void AutocompleteActionPredictor::CopyFromMainProfile() {
524   CHECK(content::BrowserThread::CurrentlyOn(content::BrowserThread::UI));
525   DCHECK(profile_->IsOffTheRecord());
526   DCHECK(!initialized_);
527   DCHECK(main_profile_predictor_);
528   DCHECK(main_profile_predictor_->initialized_);
529
530   db_cache_ = main_profile_predictor_->db_cache_;
531   db_id_cache_ = main_profile_predictor_->db_id_cache_;
532   FinishInitialization();
533 }
534
535 void AutocompleteActionPredictor::FinishInitialization() {
536   CHECK(content::BrowserThread::CurrentlyOn(content::BrowserThread::UI));
537   DCHECK(!initialized_);
538
539   // Incognito and normal profiles should listen only to omnibox notifications
540   // from their own profile, but both should listen to history deletions from
541   // the main profile, since opening the history page in either case actually
542   // opens the non-incognito history (and lets users delete from there).
543   notification_registrar_.Add(this, chrome::NOTIFICATION_OMNIBOX_OPENED_URL,
544                               content::Source<Profile>(profile_));
545   notification_registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URLS_DELETED,
546       content::Source<Profile>(profile_->GetOriginalProfile()));
547   initialized_ = true;
548 }
549
550 double AutocompleteActionPredictor::CalculateConfidence(
551     const string16& user_text,
552     const AutocompleteMatch& match,
553     bool* is_in_db) const {
554   const DBCacheKey key = { user_text, match.destination_url };
555
556   *is_in_db = false;
557   if (user_text.length() < kMinimumUserTextLength)
558     return 0.0;
559
560   const DBCacheMap::const_iterator iter = db_cache_.find(key);
561   if (iter == db_cache_.end())
562     return 0.0;
563
564   *is_in_db = true;
565   return CalculateConfidenceForDbEntry(iter);
566 }
567
568 double AutocompleteActionPredictor::CalculateConfidenceForDbEntry(
569     DBCacheMap::const_iterator iter) const {
570   const DBCacheValue& value = iter->second;
571   if (value.number_of_hits < kMinimumNumberOfHits)
572     return 0.0;
573
574   const double number_of_hits = static_cast<double>(value.number_of_hits);
575   return number_of_hits / (number_of_hits + value.number_of_misses);
576 }
577
578 AutocompleteActionPredictor::TransitionalMatch::TransitionalMatch() {
579 }
580
581 AutocompleteActionPredictor::TransitionalMatch::~TransitionalMatch() {
582 }
583
584 }  // namespace predictors