1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/autocomplete/history_quick_provider.h"
9 #include "base/basictypes.h"
10 #include "base/command_line.h"
11 #include "base/i18n/break_iterator.h"
12 #include "base/logging.h"
13 #include "base/metrics/field_trial.h"
14 #include "base/metrics/histogram.h"
15 #include "base/prefs/pref_service.h"
16 #include "base/strings/string_number_conversions.h"
17 #include "base/strings/string_util.h"
18 #include "base/strings/utf_string_conversions.h"
19 #include "base/time/time.h"
20 #include "chrome/browser/autocomplete/autocomplete_result.h"
21 #include "chrome/browser/autocomplete/history_url_provider.h"
22 #include "chrome/browser/history/history_database.h"
23 #include "chrome/browser/history/history_service.h"
24 #include "chrome/browser/history/history_service_factory.h"
25 #include "chrome/browser/history/in_memory_url_index.h"
26 #include "chrome/browser/history/in_memory_url_index_types.h"
27 #include "chrome/browser/history/scored_history_match.h"
28 #include "chrome/browser/omnibox/omnibox_field_trial.h"
29 #include "chrome/browser/profiles/profile.h"
30 #include "chrome/browser/search/search.h"
31 #include "chrome/browser/search_engines/template_url.h"
32 #include "chrome/browser/search_engines/template_url_service.h"
33 #include "chrome/browser/search_engines/template_url_service_factory.h"
34 #include "chrome/common/autocomplete_match_type.h"
35 #include "chrome/common/chrome_switches.h"
36 #include "chrome/common/net/url_fixer_upper.h"
37 #include "chrome/common/pref_names.h"
38 #include "chrome/common/url_constants.h"
39 #include "content/public/browser/browser_thread.h"
40 #include "content/public/browser/notification_source.h"
41 #include "content/public/browser/notification_types.h"
42 #include "net/base/escape.h"
43 #include "net/base/net_util.h"
44 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
45 #include "url/url_parse.h"
46 #include "url/url_util.h"
48 using history::InMemoryURLIndex;
49 using history::ScoredHistoryMatch;
50 using history::ScoredHistoryMatches;
52 bool HistoryQuickProvider::disabled_ = false;
54 HistoryQuickProvider::HistoryQuickProvider(
55 AutocompleteProviderListener* listener,
57 : HistoryProvider(listener, profile,
58 AutocompleteProvider::TYPE_HISTORY_QUICK),
59 languages_(profile_->GetPrefs()->GetString(prefs::kAcceptLanguages)) {
62 void HistoryQuickProvider::Start(const AutocompleteInput& input,
63 bool minimal_changes) {
68 // Don't bother with INVALID and FORCED_QUERY. Also pass when looking for
69 // BEST_MATCH and there is no inline autocompletion because none of the HQP
70 // matches can score highly enough to qualify.
71 if ((input.type() == AutocompleteInput::INVALID) ||
72 (input.type() == AutocompleteInput::FORCED_QUERY) ||
73 (input.matches_requested() == AutocompleteInput::BEST_MATCH &&
74 input.prevent_inline_autocomplete()))
77 autocomplete_input_ = input;
79 // TODO(pkasting): We should just block here until this loads. Any time
80 // someone unloads the history backend, we'll get inconsistent inline
81 // autocomplete behavior here.
83 base::TimeTicks start_time = base::TimeTicks::Now();
85 if (input.text().length() < 6) {
86 base::TimeTicks end_time = base::TimeTicks::Now();
87 std::string name = "HistoryQuickProvider.QueryIndexTime." +
88 base::IntToString(input.text().length());
89 base::HistogramBase* counter = base::Histogram::FactoryGet(
90 name, 1, 1000, 50, base::Histogram::kUmaTargetedHistogramFlag);
91 counter->Add(static_cast<int>((end_time - start_time).InMilliseconds()));
93 UpdateStarredStateOfMatches();
97 void HistoryQuickProvider::DeleteMatch(const AutocompleteMatch& match) {
98 DCHECK(match.deletable);
99 DCHECK(match.destination_url.is_valid());
100 // Delete the match from the InMemoryURLIndex.
101 GetIndex()->DeleteURL(match.destination_url);
102 DeleteMatchFromMatches(match);
105 HistoryQuickProvider::~HistoryQuickProvider() {}
107 void HistoryQuickProvider::DoAutocomplete() {
108 // Get the matching URLs from the DB.
109 ScoredHistoryMatches matches = GetIndex()->HistoryItemsForTerms(
110 autocomplete_input_.text(),
111 autocomplete_input_.cursor_position());
115 // Figure out if HistoryURL provider has a URL-what-you-typed match
116 // that ought to go first and what its score will be.
117 bool will_have_url_what_you_typed_match_first = false;
118 int url_what_you_typed_match_score = -1; // undefined
119 // These are necessary (but not sufficient) conditions for the omnibox
120 // input to be a URL-what-you-typed match. The username test checks that
121 // either the username does not exist (a regular URL such as http://site/)
122 // or, if the username exists (http://user@site/), there must be either
123 // a password or a port. Together these exclude pure username@site
124 // inputs because these are likely to be an e-mail address. HistoryURL
125 // provider won't promote the URL-what-you-typed match to first
127 const bool can_have_url_what_you_typed_match_first =
128 autocomplete_input_.canonicalized_url().is_valid() &&
129 (autocomplete_input_.type() != AutocompleteInput::QUERY) &&
130 (autocomplete_input_.type() != AutocompleteInput::FORCED_QUERY) &&
131 (!autocomplete_input_.parts().username.is_nonempty() ||
132 autocomplete_input_.parts().password.is_nonempty() ||
133 autocomplete_input_.parts().path.is_nonempty());
134 if (can_have_url_what_you_typed_match_first) {
135 HistoryService* const history_service =
136 HistoryServiceFactory::GetForProfile(profile_,
137 Profile::EXPLICIT_ACCESS);
138 // We expect HistoryService to be available. In case it's not,
139 // (e.g., due to Profile corruption) we let HistoryQuick provider
140 // completions (which may be available because it's a different
141 // data structure) compete with the URL-what-you-typed match as
143 if (history_service) {
144 history::URLDatabase* url_db = history_service->InMemoryDatabase();
145 // url_db can be NULL if it hasn't finished initializing (or
146 // failed to to initialize). In this case, we let HistoryQuick
147 // provider completions compete with the URL-what-you-typed
150 const std::string host(UTF16ToUTF8(autocomplete_input_.text().substr(
151 autocomplete_input_.parts().host.begin,
152 autocomplete_input_.parts().host.len)));
153 // We want to put the URL-what-you-typed match first if either
154 // * the user visited the URL before (intranet or internet).
155 // * it's a URL on a host that user visited before and this
156 // is the root path of the host. (If the user types some
157 // of a path--more than a simple "/"--we let autocomplete compete
158 // normally with the URL-what-you-typed match.)
159 // TODO(mpearson): Remove this hacky code and simply score URL-what-
160 // you-typed in some sane way relative to possible completions:
161 // URL-what-you-typed should get some sort of a boost relative
162 // to completions, but completions should naturally win if
163 // they're a lot more popular. In this process, if the input
164 // is a bare intranet hostname that has been visited before, we
165 // may want to enforce that the only completions that can outscore
166 // the URL-what-you-typed match are on the same host (i.e., aren't
167 // from a longer internet hostname for which the omnibox input is
169 if (url_db->GetRowForURL(
170 autocomplete_input_.canonicalized_url(), NULL) != 0) {
171 // We visited this URL before.
172 will_have_url_what_you_typed_match_first = true;
173 // HistoryURLProvider gives visited what-you-typed URLs a high score.
174 url_what_you_typed_match_score =
175 HistoryURLProvider::kScoreForBestInlineableResult;
176 } else if (url_db->IsTypedHost(host) &&
177 (!autocomplete_input_.parts().path.is_nonempty() ||
178 ((autocomplete_input_.parts().path.len == 1) &&
179 (autocomplete_input_.text()[
180 autocomplete_input_.parts().path.begin] == '/'))) &&
181 !autocomplete_input_.parts().query.is_nonempty() &&
182 !autocomplete_input_.parts().ref.is_nonempty()) {
183 // Not visited, but we've seen the host before.
184 will_have_url_what_you_typed_match_first = true;
185 const size_t registry_length =
186 net::registry_controlled_domains::GetRegistryLength(
188 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
189 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
190 if (registry_length == 0) {
191 // Known intranet hosts get one score.
192 url_what_you_typed_match_score =
193 HistoryURLProvider::kScoreForUnvisitedIntranetResult;
195 // Known internet hosts get another.
196 url_what_you_typed_match_score =
197 HistoryURLProvider::kScoreForWhatYouTypedResult;
204 // Loop over every result and add it to matches_. In the process,
205 // guarantee that scores are decreasing. |max_match_score| keeps
206 // track of the highest score we can assign to any later results we
207 // see. Also, if we're not allowing inline autocompletions in
208 // general or the current best suggestion isn't inlineable,
209 // artificially reduce the starting |max_match_score| (which
210 // therefore applies to all results) to something low enough that
211 // guarantees no result will be offered as an inline autocomplete
212 // suggestion. Also do a similar reduction if we think there will be
213 // a URL-what-you-typed match. (We want URL-what-you-typed matches for
214 // visited URLs to beat out any longer URLs, no matter how frequently
215 // they're visited.) The strength of this last reduction depends on the
216 // likely score for the URL-what-you-typed result.
218 // |template_url_service| or |template_url| can be NULL in unit tests.
219 TemplateURLService* template_url_service =
220 TemplateURLServiceFactory::GetForProfile(profile_);
221 TemplateURL* template_url = template_url_service ?
222 template_url_service->GetDefaultSearchProvider() : NULL;
223 int max_match_score =
224 (OmniboxFieldTrial::ReorderForLegalDefaultMatch(
225 autocomplete_input_.current_page_classification()) ||
226 (!PreventInlineAutocomplete(autocomplete_input_) &&
227 matches.begin()->can_inline)) ?
228 matches.begin()->raw_score :
229 (AutocompleteResult::kLowestDefaultScore - 1);
230 if (will_have_url_what_you_typed_match_first) {
231 max_match_score = std::min(max_match_score,
232 url_what_you_typed_match_score - 1);
234 for (ScoredHistoryMatches::const_iterator match_iter = matches.begin();
235 match_iter != matches.end(); ++match_iter) {
236 const ScoredHistoryMatch& history_match(*match_iter);
237 // Culls results corresponding to queries from the default search engine.
238 // These are low-quality, difficult-to-understand matches for users, and the
239 // SearchProvider should surface past queries in a better way anyway.
241 !template_url->IsSearchURL(history_match.url_info.url())) {
242 // Set max_match_score to the score we'll assign this result:
243 max_match_score = std::min(max_match_score, history_match.raw_score);
244 matches_.push_back(QuickMatchToACMatch(history_match, max_match_score));
245 // Mark this max_match_score as being used:
251 AutocompleteMatch HistoryQuickProvider::QuickMatchToACMatch(
252 const ScoredHistoryMatch& history_match,
254 const history::URLRow& info = history_match.url_info;
255 AutocompleteMatch match(this, score, !!info.visit_count(),
256 history_match.url_matches.empty() ? AutocompleteMatchType::HISTORY_TITLE :
257 AutocompleteMatchType::HISTORY_URL);
258 match.typed_count = info.typed_count();
259 match.destination_url = info.url();
260 DCHECK(match.destination_url.is_valid());
262 // Format the URL autocomplete presentation.
263 std::vector<size_t> offsets =
264 OffsetsFromTermMatches(history_match.url_matches);
265 const net::FormatUrlTypes format_types = net::kFormatUrlOmitAll &
266 ~(!history_match.match_in_scheme ? 0 : net::kFormatUrlOmitHTTP);
267 match.fill_into_edit =
268 AutocompleteInput::FormattedStringWithEquivalentMeaning(info.url(),
269 net::FormatUrlWithOffsets(info.url(), languages_, format_types,
270 net::UnescapeRule::SPACES, NULL, NULL, &offsets));
271 history::TermMatches new_matches =
272 ReplaceOffsetsInTermMatches(history_match.url_matches, offsets);
273 match.contents = net::FormatUrl(info.url(), languages_, format_types,
274 net::UnescapeRule::SPACES, NULL, NULL, NULL);
275 match.contents_class =
276 SpansFromTermMatch(new_matches, match.contents.length(), true);
278 match.allowed_to_be_default_match = history_match.can_inline &&
279 !PreventInlineAutocomplete(autocomplete_input_);
280 if (match.allowed_to_be_default_match) {
281 DCHECK(!new_matches.empty());
282 size_t inline_autocomplete_offset = new_matches[0].offset +
283 new_matches[0].length;
284 // |inline_autocomplete_offset| may be beyond the end of the
285 // |fill_into_edit| if the user has typed an URL with a scheme and the
286 // last character typed is a slash. That slash is removed by the
287 // FormatURLWithOffsets call above.
288 if (inline_autocomplete_offset < match.fill_into_edit.length()) {
289 match.inline_autocompletion =
290 match.fill_into_edit.substr(inline_autocomplete_offset);
294 // Format the description autocomplete presentation.
295 match.description = info.title();
296 match.description_class = SpansFromTermMatch(
297 history_match.title_matches, match.description.length(), false);
299 match.RecordAdditionalInfo("typed count", info.typed_count());
300 match.RecordAdditionalInfo("visit count", info.visit_count());
301 match.RecordAdditionalInfo("last visit", info.last_visit());
306 history::InMemoryURLIndex* HistoryQuickProvider::GetIndex() {
307 if (index_for_testing_.get())
308 return index_for_testing_.get();
310 HistoryService* const history_service =
311 HistoryServiceFactory::GetForProfile(profile_, Profile::EXPLICIT_ACCESS);
312 if (!history_service)
315 return history_service->InMemoryIndex();
319 ACMatchClassifications HistoryQuickProvider::SpansFromTermMatch(
320 const history::TermMatches& matches,
323 ACMatchClassification::Style url_style =
324 is_url ? ACMatchClassification::URL : ACMatchClassification::NONE;
325 ACMatchClassifications spans;
326 if (matches.empty()) {
328 spans.push_back(ACMatchClassification(0, url_style));
331 if (matches[0].offset)
332 spans.push_back(ACMatchClassification(0, url_style));
333 size_t match_count = matches.size();
334 for (size_t i = 0; i < match_count;) {
335 size_t offset = matches[i].offset;
336 spans.push_back(ACMatchClassification(offset,
337 ACMatchClassification::MATCH | url_style));
338 // Skip all adjacent matches.
340 offset += matches[i].length;
342 } while ((i < match_count) && (offset == matches[i].offset));
343 if (offset < text_length)
344 spans.push_back(ACMatchClassification(offset, url_style));