#include <math.h>
+#include "base/logging.h"
#include "base/metrics/histogram.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "chrome/browser/autocomplete/history_url_provider.h"
#include "chrome/browser/autocomplete/url_prefix.h"
-#include "chrome/browser/bookmarks/bookmark_service.h"
#include "chrome/browser/omnibox/omnibox_field_trial.h"
-#include "chrome/common/chrome_switches.h"
+#include "components/bookmarks/core/browser/bookmark_service.h"
+#include "components/bookmarks/core/browser/bookmark_utils.h"
#include "content/public/browser/browser_thread.h"
namespace history {
float* ScoredHistoryMatch::days_ago_to_recency_score_ = NULL;
bool ScoredHistoryMatch::initialized_ = false;
int ScoredHistoryMatch::bookmark_value_ = 1;
-bool ScoredHistoryMatch::discount_frecency_when_few_visits_ = false;
bool ScoredHistoryMatch::allow_tld_matches_ = false;
bool ScoredHistoryMatch::allow_scheme_matches_ = false;
bool ScoredHistoryMatch::also_do_hup_like_scoring_ = false;
Init();
}
-ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row,
- const VisitInfoVector& visits,
- const std::string& languages,
- const base::string16& lower_string,
- const String16Vector& terms,
- const RowWordStarts& word_starts,
- const base::Time now,
- BookmarkService* bookmark_service)
+ScoredHistoryMatch::ScoredHistoryMatch(
+ const URLRow& row,
+ const VisitInfoVector& visits,
+ const std::string& languages,
+ const base::string16& lower_string,
+ const String16Vector& terms,
+ const WordStarts& terms_to_word_starts_offsets,
+ const RowWordStarts& word_starts,
+ const base::Time now,
+ BookmarkService* bookmark_service)
: HistoryMatch(row, 0, false, false),
raw_score_(0),
can_inline_(false) {
// Figure out where each search term appears in the URL and/or page title
// so that we can score as well as provide autocomplete highlighting.
- base::string16 url = CleanUpUrlForMatching(gurl, languages);
- base::string16 title = CleanUpTitleForMatching(row.title());
+ base::OffsetAdjuster::Adjustments adjustments;
+ base::string16 url =
+ bookmark_utils::CleanUpUrlForMatching(gurl, languages, &adjustments);
+ base::string16 title = bookmark_utils::CleanUpTitleForMatching(row.title());
int term_num = 0;
for (String16Vector::const_iterator iter = terms.begin(); iter != terms.end();
++iter, ++term_num) {
num_components_in_best_prefix);
}
- const float topicality_score =
- GetTopicalityScore(terms.size(), url, word_starts);
+ const float topicality_score = GetTopicalityScore(
+ terms.size(), url, terms_to_word_starts_offsets, word_starts);
const float frecency_score = GetFrecency(
now, (bookmark_service && bookmark_service->IsBookmarked(gurl)), visits);
raw_score_ = GetFinalRelevancyScore(topicality_score, frecency_score);
raw_score_ = std::min(max_assigned_score_for_non_inlineable_matches_,
raw_score_);
}
+
+ // Now that we're done processing this entry, correct the offsets of the
+ // matches in |url_matches_| so they point to offsets in the original URL
+ // spec, not the cleaned-up URL string that we used for matching.
+ std::vector<size_t> offsets = OffsetsFromTermMatches(url_matches_);
+ base::OffsetAdjuster::UnadjustOffsets(adjustments, &offsets);
+ url_matches_ = ReplaceOffsetsInTermMatches(url_matches_, offsets);
}
ScoredHistoryMatch::~ScoredHistoryMatch() {}
// static
TermMatches ScoredHistoryMatch::FilterTermMatchesByWordStarts(
const TermMatches& term_matches,
+ const WordStarts& terms_to_word_starts_offsets,
const WordStarts& word_starts,
size_t start_pos,
size_t end_pos) {
WordStarts::const_iterator end_word_starts = word_starts.end();
for (TermMatches::const_iterator iter = term_matches.begin();
iter != term_matches.end(); ++iter) {
- // Advance next_word_starts until it's >= the position of the term
- // we're considering.
+ const size_t term_offset = terms_to_word_starts_offsets[iter->term_num];
+ // Advance next_word_starts until it's >= the position of the term we're
+ // considering (adjusted for where the word begins within the term).
while ((next_word_starts != end_word_starts) &&
- (*next_word_starts < iter->offset))
+ (*next_word_starts < (iter->offset + term_offset)))
++next_word_starts;
// Add the match if it's before the position we start filtering at or
// after the position we stop filtering at (assuming we have a position
if ((iter->offset < start_pos) ||
((end_pos != std::string::npos) && (iter->offset >= end_pos)) ||
((next_word_starts != end_word_starts) &&
- (*next_word_starts == iter->offset)))
+ (*next_word_starts == iter->offset + term_offset)))
filtered_matches.push_back(*iter);
}
return filtered_matches;
float ScoredHistoryMatch::GetTopicalityScore(
const int num_terms,
const base::string16& url,
+ const WordStarts& terms_to_word_starts_offsets,
const RowWordStarts& word_starts) {
// Because the below thread is not thread safe, we check that we're
// only calling it from one thread: the UI thread. Specifically,
// First, filter all matches not at a word boundary and in the path (or
// later).
url_matches_ = FilterTermMatchesByWordStarts(
- url_matches_, word_starts.url_word_starts_, end_of_hostname_pos,
+ url_matches_, terms_to_word_starts_offsets, word_starts.url_word_starts_,
+ end_of_hostname_pos,
std::string::npos);
if (colon_pos != std::string::npos) {
// Also filter matches not at a word boundary and in the scheme.
url_matches_ = FilterTermMatchesByWordStarts(
- url_matches_, word_starts.url_word_starts_, 0, colon_pos);
+ url_matches_, terms_to_word_starts_offsets,
+ word_starts.url_word_starts_, 0, colon_pos);
}
for (TermMatches::const_iterator iter = url_matches_.begin();
iter != url_matches_.end(); ++iter) {
- // Advance next_word_starts until it's >= the position of the term
- // we're considering.
+ const size_t term_offset = terms_to_word_starts_offsets[iter->term_num];
+ // Advance next_word_starts until it's >= the position of the term we're
+ // considering (adjusted for where the word begins within the term).
while ((next_word_starts != end_word_starts) &&
- (*next_word_starts < iter->offset)) {
+ (*next_word_starts < (iter->offset + term_offset))) {
++next_word_starts;
}
const bool at_word_boundary = (next_word_starts != end_word_starts) &&
- (*next_word_starts == iter->offset);
+ (*next_word_starts == iter->offset + term_offset);
if ((question_mark_pos != std::string::npos) &&
(iter->offset > question_mark_pos)) {
// The match is in a CGI ?... fragment.
end_word_starts = word_starts.title_word_starts_.end();
int word_num = 0;
title_matches_ = FilterTermMatchesByWordStarts(
- title_matches_, word_starts.title_word_starts_, 0, std::string::npos);
+ title_matches_, terms_to_word_starts_offsets,
+ word_starts.title_word_starts_, 0, std::string::npos);
for (TermMatches::const_iterator iter = title_matches_.begin();
iter != title_matches_.end(); ++iter) {
- // Advance next_word_starts until it's >= the position of the term
- // we're considering.
+ const size_t term_offset = terms_to_word_starts_offsets[iter->term_num];
+ // Advance next_word_starts until it's >= the position of the term we're
+ // considering (adjusted for where the word begins within the term).
while ((next_word_starts != end_word_starts) &&
- (*next_word_starts < iter->offset)) {
+ (*next_word_starts < (iter->offset + term_offset))) {
++next_word_starts;
++word_num;
}
if (word_num >= 10) break; // only count the first ten words
- DCHECK((next_word_starts != end_word_starts) &&
- (*next_word_starts == iter->offset)) << "not at word boundary";
+ DCHECK(next_word_starts != end_word_starts);
+ DCHECK_EQ(*next_word_starts, iter->offset + term_offset)
+ << "not at word boundary";
term_scores[iter->term_num] += 8;
}
// TODO(mpearson): Restore logic for penalizing out-of-order matches.
// kMaxVisitsToScore as the denominator for the average regardless of
// how many visits there were in order to penalize a match that has
// fewer visits than kMaxVisitsToScore.
- const int total_sampled_visits = std::min(visits.size(), kMaxVisitsToScore);
- if (total_sampled_visits == 0)
- return 0.0f;
float summed_visit_points = 0;
- for (int i = 0; i < total_sampled_visits; ++i) {
+ for (size_t i = 0; i < std::min(visits.size(), kMaxVisitsToScore); ++i) {
int value_of_transition =
(visits[i].second == content::PAGE_TRANSITION_TYPED) ? 20 : 1;
if (bookmarked)
GetRecencyScore((now - visits[i].first).InDays());
summed_visit_points += (value_of_transition * bucket_weight);
}
- return visits.size() * summed_visit_points /
- (discount_frecency_when_few_visits_ ?
- kMaxVisitsToScore : total_sampled_visits);
+ return visits.size() * summed_visit_points / kMaxVisitsToScore;
}
// static
HistoryURLProvider::kScoreForBestInlineableResult - 1;
}
bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue();
- discount_frecency_when_few_visits_ =
- OmniboxFieldTrial::HQPDiscountFrecencyWhenFewVisits();
allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue();
allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue();
initialized_ = true;