Upstream version 5.34.104.0
[platform/framework/web/crosswalk.git] / src / chrome / browser / autocomplete / history_quick_provider.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/autocomplete/history_quick_provider.h"
6
7 #include <vector>
8
9 #include "base/basictypes.h"
10 #include "base/command_line.h"
11 #include "base/i18n/break_iterator.h"
12 #include "base/logging.h"
13 #include "base/metrics/field_trial.h"
14 #include "base/metrics/histogram.h"
15 #include "base/prefs/pref_service.h"
16 #include "base/strings/string_number_conversions.h"
17 #include "base/strings/string_util.h"
18 #include "base/strings/utf_string_conversions.h"
19 #include "base/time/time.h"
20 #include "chrome/browser/autocomplete/autocomplete_result.h"
21 #include "chrome/browser/autocomplete/history_url_provider.h"
22 #include "chrome/browser/history/history_database.h"
23 #include "chrome/browser/history/history_service.h"
24 #include "chrome/browser/history/history_service_factory.h"
25 #include "chrome/browser/history/in_memory_url_index.h"
26 #include "chrome/browser/history/in_memory_url_index_types.h"
27 #include "chrome/browser/history/scored_history_match.h"
28 #include "chrome/browser/omnibox/omnibox_field_trial.h"
29 #include "chrome/browser/profiles/profile.h"
30 #include "chrome/browser/search/search.h"
31 #include "chrome/browser/search_engines/template_url.h"
32 #include "chrome/browser/search_engines/template_url_service.h"
33 #include "chrome/browser/search_engines/template_url_service_factory.h"
34 #include "chrome/common/autocomplete_match_type.h"
35 #include "chrome/common/chrome_switches.h"
36 #include "chrome/common/net/url_fixer_upper.h"
37 #include "chrome/common/pref_names.h"
38 #include "chrome/common/url_constants.h"
39 #include "content/public/browser/notification_source.h"
40 #include "content/public/browser/notification_types.h"
41 #include "net/base/escape.h"
42 #include "net/base/net_util.h"
43 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
44 #include "url/url_parse.h"
45 #include "url/url_util.h"
46
47 using history::InMemoryURLIndex;
48 using history::ScoredHistoryMatch;
49 using history::ScoredHistoryMatches;
50
51 bool HistoryQuickProvider::disabled_ = false;
52
53 HistoryQuickProvider::HistoryQuickProvider(
54     AutocompleteProviderListener* listener,
55     Profile* profile)
56     : HistoryProvider(listener, profile,
57           AutocompleteProvider::TYPE_HISTORY_QUICK),
58       languages_(profile_->GetPrefs()->GetString(prefs::kAcceptLanguages)) {
59 }
60
61 void HistoryQuickProvider::Start(const AutocompleteInput& input,
62                                  bool minimal_changes) {
63   matches_.clear();
64   if (disabled_)
65     return;
66
67   // Don't bother with INVALID and FORCED_QUERY.  Also pass when looking for
68   // BEST_MATCH and there is no inline autocompletion because none of the HQP
69   // matches can score highly enough to qualify.
70   if ((input.type() == AutocompleteInput::INVALID) ||
71       (input.type() == AutocompleteInput::FORCED_QUERY) ||
72       (input.matches_requested() == AutocompleteInput::BEST_MATCH &&
73        input.prevent_inline_autocomplete()))
74     return;
75
76   autocomplete_input_ = input;
77
78   // TODO(pkasting): We should just block here until this loads.  Any time
79   // someone unloads the history backend, we'll get inconsistent inline
80   // autocomplete behavior here.
81   if (GetIndex()) {
82     base::TimeTicks start_time = base::TimeTicks::Now();
83     DoAutocomplete();
84     if (input.text().length() < 6) {
85       base::TimeTicks end_time = base::TimeTicks::Now();
86       std::string name = "HistoryQuickProvider.QueryIndexTime." +
87           base::IntToString(input.text().length());
88       base::HistogramBase* counter = base::Histogram::FactoryGet(
89           name, 1, 1000, 50, base::Histogram::kUmaTargetedHistogramFlag);
90       counter->Add(static_cast<int>((end_time - start_time).InMilliseconds()));
91     }
92     UpdateStarredStateOfMatches();
93   }
94 }
95
96 void HistoryQuickProvider::DeleteMatch(const AutocompleteMatch& match) {
97   DCHECK(match.deletable);
98   DCHECK(match.destination_url.is_valid());
99   // Delete the match from the InMemoryURLIndex.
100   GetIndex()->DeleteURL(match.destination_url);
101   DeleteMatchFromMatches(match);
102 }
103
104 HistoryQuickProvider::~HistoryQuickProvider() {}
105
106 void HistoryQuickProvider::DoAutocomplete() {
107   // Get the matching URLs from the DB.
108   ScoredHistoryMatches matches = GetIndex()->HistoryItemsForTerms(
109       autocomplete_input_.text(),
110       autocomplete_input_.cursor_position());
111   if (matches.empty())
112     return;
113
114   // Figure out if HistoryURL provider has a URL-what-you-typed match
115   // that ought to go first and what its score will be.
116   bool will_have_url_what_you_typed_match_first = false;
117   int url_what_you_typed_match_score = -1;  // undefined
118   // These are necessary (but not sufficient) conditions for the omnibox
119   // input to be a URL-what-you-typed match.  The username test checks that
120   // either the username does not exist (a regular URL such as http://site/)
121   // or, if the username exists (http://user@site/), there must be either
122   // a password or a port.  Together these exclude pure username@site
123   // inputs because these are likely to be an e-mail address.  HistoryURL
124   // provider won't promote the URL-what-you-typed match to first
125   // for these inputs.
126   const bool can_have_url_what_you_typed_match_first =
127       autocomplete_input_.canonicalized_url().is_valid() &&
128       (autocomplete_input_.type() != AutocompleteInput::QUERY) &&
129       (autocomplete_input_.type() != AutocompleteInput::FORCED_QUERY) &&
130       (!autocomplete_input_.parts().username.is_nonempty() ||
131        autocomplete_input_.parts().password.is_nonempty() ||
132        autocomplete_input_.parts().path.is_nonempty());
133   if (can_have_url_what_you_typed_match_first) {
134     HistoryService* const history_service =
135         HistoryServiceFactory::GetForProfile(profile_,
136                                              Profile::EXPLICIT_ACCESS);
137     // We expect HistoryService to be available.  In case it's not,
138     // (e.g., due to Profile corruption) we let HistoryQuick provider
139     // completions (which may be available because it's a different
140     // data structure) compete with the URL-what-you-typed match as
141     // normal.
142     if (history_service) {
143       history::URLDatabase* url_db = history_service->InMemoryDatabase();
144       // url_db can be NULL if it hasn't finished initializing (or
145       // failed to to initialize).  In this case, we let HistoryQuick
146       // provider completions compete with the URL-what-you-typed
147       // match as normal.
148       if (url_db) {
149         const std::string host(base::UTF16ToUTF8(
150             autocomplete_input_.text().substr(
151                 autocomplete_input_.parts().host.begin,
152                 autocomplete_input_.parts().host.len)));
153         // We want to put the URL-what-you-typed match first if either
154         // * the user visited the URL before (intranet or internet).
155         // * it's a URL on a host that user visited before and this
156         //   is the root path of the host.  (If the user types some
157         //   of a path--more than a simple "/"--we let autocomplete compete
158         //   normally with the URL-what-you-typed match.)
159         // TODO(mpearson): Remove this hacky code and simply score URL-what-
160         // you-typed in some sane way relative to possible completions:
161         // URL-what-you-typed should get some sort of a boost relative
162         // to completions, but completions should naturally win if
163         // they're a lot more popular.  In this process, if the input
164         // is a bare intranet hostname that has been visited before, we
165         // may want to enforce that the only completions that can outscore
166         // the URL-what-you-typed match are on the same host (i.e., aren't
167         // from a longer internet hostname for which the omnibox input is
168         // a prefix).
169         if (url_db->GetRowForURL(
170             autocomplete_input_.canonicalized_url(), NULL) != 0) {
171           // We visited this URL before.
172           will_have_url_what_you_typed_match_first = true;
173           // HistoryURLProvider gives visited what-you-typed URLs a high score.
174           url_what_you_typed_match_score =
175               HistoryURLProvider::kScoreForBestInlineableResult;
176         } else if (url_db->IsTypedHost(host) &&
177              (!autocomplete_input_.parts().path.is_nonempty() ||
178               ((autocomplete_input_.parts().path.len == 1) &&
179                (autocomplete_input_.text()[
180                    autocomplete_input_.parts().path.begin] == '/'))) &&
181              !autocomplete_input_.parts().query.is_nonempty() &&
182              !autocomplete_input_.parts().ref.is_nonempty()) {
183           // Not visited, but we've seen the host before.
184           will_have_url_what_you_typed_match_first = true;
185           const size_t registry_length =
186               net::registry_controlled_domains::GetRegistryLength(
187                   host,
188                   net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
189                   net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
190           if (registry_length == 0) {
191             // Known intranet hosts get one score.
192             url_what_you_typed_match_score =
193                 HistoryURLProvider::kScoreForUnvisitedIntranetResult;
194           } else {
195             // Known internet hosts get another.
196             url_what_you_typed_match_score =
197                 HistoryURLProvider::kScoreForWhatYouTypedResult;
198           }
199         }
200       }
201     }
202   }
203
204   // Loop over every result and add it to matches_.  In the process,
205   // guarantee that scores are decreasing.  |max_match_score| keeps
206   // track of the highest score we can assign to any later results we
207   // see.  Also, if we're not allowing inline autocompletions in
208   // general or the current best suggestion isn't inlineable,
209   // artificially reduce the starting |max_match_score| (which
210   // therefore applies to all results) to something low enough that
211   // guarantees no result will be offered as an inline autocomplete
212   // suggestion.  Also do a similar reduction if we think there will be
213   // a URL-what-you-typed match.  (We want URL-what-you-typed matches for
214   // visited URLs to beat out any longer URLs, no matter how frequently
215   // they're visited.)  The strength of this last reduction depends on the
216   // likely score for the URL-what-you-typed result.
217
218   // |template_url_service| or |template_url| can be NULL in unit tests.
219   TemplateURLService* template_url_service =
220       TemplateURLServiceFactory::GetForProfile(profile_);
221   TemplateURL* template_url = template_url_service ?
222       template_url_service->GetDefaultSearchProvider() : NULL;
223   int max_match_score =
224       (OmniboxFieldTrial::ReorderForLegalDefaultMatch(
225          autocomplete_input_.current_page_classification()) ||
226        (!PreventInlineAutocomplete(autocomplete_input_) &&
227         matches.begin()->can_inline())) ?
228       matches.begin()->raw_score() :
229       (AutocompleteResult::kLowestDefaultScore - 1);
230   if (will_have_url_what_you_typed_match_first) {
231     max_match_score = std::min(max_match_score,
232         url_what_you_typed_match_score - 1);
233   }
234   for (ScoredHistoryMatches::const_iterator match_iter = matches.begin();
235        match_iter != matches.end(); ++match_iter) {
236     const ScoredHistoryMatch& history_match(*match_iter);
237     // Culls results corresponding to queries from the default search engine.
238     // These are low-quality, difficult-to-understand matches for users, and the
239     // SearchProvider should surface past queries in a better way anyway.
240     if (!template_url ||
241         !template_url->IsSearchURL(history_match.url_info.url())) {
242       // Set max_match_score to the score we'll assign this result:
243       max_match_score = std::min(max_match_score, history_match.raw_score());
244       matches_.push_back(QuickMatchToACMatch(history_match, max_match_score));
245       // Mark this max_match_score as being used:
246       max_match_score--;
247     }
248   }
249 }
250
251 AutocompleteMatch HistoryQuickProvider::QuickMatchToACMatch(
252     const ScoredHistoryMatch& history_match,
253     int score) {
254   const history::URLRow& info = history_match.url_info;
255   AutocompleteMatch match(
256       this, score, !!info.visit_count(),
257       history_match.url_matches().empty() ?
258           AutocompleteMatchType::HISTORY_TITLE :
259           AutocompleteMatchType::HISTORY_URL);
260   match.typed_count = info.typed_count();
261   match.destination_url = info.url();
262   DCHECK(match.destination_url.is_valid());
263
264   // Format the URL autocomplete presentation.
265   std::vector<size_t> offsets =
266       OffsetsFromTermMatches(history_match.url_matches());
267   const net::FormatUrlTypes format_types = net::kFormatUrlOmitAll &
268       ~(!history_match.match_in_scheme ? 0 : net::kFormatUrlOmitHTTP);
269   match.fill_into_edit =
270       AutocompleteInput::FormattedStringWithEquivalentMeaning(info.url(),
271           net::FormatUrlWithOffsets(info.url(), languages_, format_types,
272               net::UnescapeRule::SPACES, NULL, NULL, &offsets));
273   history::TermMatches new_matches =
274       ReplaceOffsetsInTermMatches(history_match.url_matches(), offsets);
275   match.contents = net::FormatUrl(info.url(), languages_, format_types,
276               net::UnescapeRule::SPACES, NULL, NULL, NULL);
277   match.contents_class =
278       SpansFromTermMatch(new_matches, match.contents.length(), true);
279
280   if (history_match.can_inline()) {
281     DCHECK(!new_matches.empty());
282     size_t inline_autocomplete_offset = new_matches[0].offset +
283         new_matches[0].length;
284     // |inline_autocomplete_offset| may be beyond the end of the
285     // |fill_into_edit| if the user has typed an URL with a scheme and the
286     // last character typed is a slash.  That slash is removed by the
287     // FormatURLWithOffsets call above.
288     if (inline_autocomplete_offset < match.fill_into_edit.length()) {
289       match.inline_autocompletion =
290           match.fill_into_edit.substr(inline_autocomplete_offset);
291     }
292     match.allowed_to_be_default_match = match.inline_autocompletion.empty() ||
293         !PreventInlineAutocomplete(autocomplete_input_);
294   }
295
296   // Format the description autocomplete presentation.
297   match.description = info.title();
298   match.description_class = SpansFromTermMatch(
299       history_match.title_matches(), match.description.length(), false);
300
301   match.RecordAdditionalInfo("typed count", info.typed_count());
302   match.RecordAdditionalInfo("visit count", info.visit_count());
303   match.RecordAdditionalInfo("last visit", info.last_visit());
304
305   return match;
306 }
307
308 history::InMemoryURLIndex* HistoryQuickProvider::GetIndex() {
309   if (index_for_testing_.get())
310     return index_for_testing_.get();
311
312   HistoryService* const history_service =
313       HistoryServiceFactory::GetForProfile(profile_, Profile::EXPLICIT_ACCESS);
314   if (!history_service)
315     return NULL;
316
317   return history_service->InMemoryIndex();
318 }