Upstream version 7.36.149.0
[platform/framework/web/crosswalk.git] / src / chrome / browser / history / scored_history_match.cc
index c462df9..e44ea87 100644 (file)
 
 #include <math.h>
 
+#include "base/logging.h"
 #include "base/metrics/histogram.h"
 #include "base/strings/string_util.h"
 #include "base/strings/utf_string_conversions.h"
 #include "chrome/browser/autocomplete/history_url_provider.h"
 #include "chrome/browser/autocomplete/url_prefix.h"
-#include "chrome/browser/bookmarks/bookmark_service.h"
 #include "chrome/browser/omnibox/omnibox_field_trial.h"
-#include "chrome/common/chrome_switches.h"
+#include "components/bookmarks/core/browser/bookmark_service.h"
+#include "components/bookmarks/core/browser/bookmark_utils.h"
 #include "content/public/browser/browser_thread.h"
 
 namespace history {
@@ -34,7 +35,6 @@ float* ScoredHistoryMatch::raw_term_score_to_topicality_score_ = NULL;
 float* ScoredHistoryMatch::days_ago_to_recency_score_ = NULL;
 bool ScoredHistoryMatch::initialized_ = false;
 int ScoredHistoryMatch::bookmark_value_ = 1;
-bool ScoredHistoryMatch::discount_frecency_when_few_visits_ = false;
 bool ScoredHistoryMatch::allow_tld_matches_ = false;
 bool ScoredHistoryMatch::allow_scheme_matches_ = false;
 bool ScoredHistoryMatch::also_do_hup_like_scoring_ = false;
@@ -46,14 +46,16 @@ ScoredHistoryMatch::ScoredHistoryMatch()
   Init();
 }
 
-ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row,
-                                       const VisitInfoVector& visits,
-                                       const std::string& languages,
-                                       const base::string16& lower_string,
-                                       const String16Vector& terms,
-                                       const RowWordStarts& word_starts,
-                                       const base::Time now,
-                                       BookmarkService* bookmark_service)
+ScoredHistoryMatch::ScoredHistoryMatch(
+    const URLRow& row,
+    const VisitInfoVector& visits,
+    const std::string& languages,
+    const base::string16& lower_string,
+    const String16Vector& terms,
+    const WordStarts& terms_to_word_starts_offsets,
+    const RowWordStarts& word_starts,
+    const base::Time now,
+    BookmarkService* bookmark_service)
     : HistoryMatch(row, 0, false, false),
       raw_score_(0),
       can_inline_(false) {
@@ -65,8 +67,10 @@ ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row,
 
   // Figure out where each search term appears in the URL and/or page title
   // so that we can score as well as provide autocomplete highlighting.
-  base::string16 url = CleanUpUrlForMatching(gurl, languages);
-  base::string16 title = CleanUpTitleForMatching(row.title());
+  base::OffsetAdjuster::Adjustments adjustments;
+  base::string16 url =
+      bookmark_utils::CleanUpUrlForMatching(gurl, languages, &adjustments);
+  base::string16 title = bookmark_utils::CleanUpTitleForMatching(row.title());
   int term_num = 0;
   for (String16Vector::const_iterator iter = terms.begin(); iter != terms.end();
        ++iter, ++term_num) {
@@ -146,8 +150,8 @@ ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row,
         num_components_in_best_prefix);
   }
 
-  const float topicality_score =
-      GetTopicalityScore(terms.size(), url, word_starts);
+  const float topicality_score = GetTopicalityScore(
+      terms.size(), url, terms_to_word_starts_offsets, word_starts);
   const float frecency_score = GetFrecency(
       now, (bookmark_service && bookmark_service->IsBookmarked(gurl)), visits);
   raw_score_ = GetFinalRelevancyScore(topicality_score, frecency_score);
@@ -204,6 +208,13 @@ ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row,
     raw_score_ = std::min(max_assigned_score_for_non_inlineable_matches_,
                           raw_score_);
   }
+
+  // Now that we're done processing this entry, correct the offsets of the
+  // matches in |url_matches_| so they point to offsets in the original URL
+  // spec, not the cleaned-up URL string that we used for matching.
+  std::vector<size_t> offsets = OffsetsFromTermMatches(url_matches_);
+  base::OffsetAdjuster::UnadjustOffsets(adjustments, &offsets);
+  url_matches_ = ReplaceOffsetsInTermMatches(url_matches_, offsets);
 }
 
 ScoredHistoryMatch::~ScoredHistoryMatch() {}
@@ -250,6 +261,7 @@ bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1,
 // static
 TermMatches ScoredHistoryMatch::FilterTermMatchesByWordStarts(
     const TermMatches& term_matches,
+    const WordStarts& terms_to_word_starts_offsets,
     const WordStarts& word_starts,
     size_t start_pos,
     size_t end_pos) {
@@ -261,10 +273,11 @@ TermMatches ScoredHistoryMatch::FilterTermMatchesByWordStarts(
   WordStarts::const_iterator end_word_starts = word_starts.end();
   for (TermMatches::const_iterator iter = term_matches.begin();
        iter != term_matches.end(); ++iter) {
-    // Advance next_word_starts until it's >= the position of the term
-    // we're considering.
+    const size_t term_offset = terms_to_word_starts_offsets[iter->term_num];
+    // Advance next_word_starts until it's >= the position of the term we're
+    // considering (adjusted for where the word begins within the term).
     while ((next_word_starts != end_word_starts) &&
-           (*next_word_starts < iter->offset))
+           (*next_word_starts < (iter->offset + term_offset)))
       ++next_word_starts;
     // Add the match if it's before the position we start filtering at or
     // after the position we stop filtering at (assuming we have a position
@@ -272,7 +285,7 @@ TermMatches ScoredHistoryMatch::FilterTermMatchesByWordStarts(
     if ((iter->offset < start_pos) ||
         ((end_pos != std::string::npos) && (iter->offset >= end_pos)) ||
         ((next_word_starts != end_word_starts) &&
-         (*next_word_starts == iter->offset)))
+         (*next_word_starts == iter->offset + term_offset)))
       filtered_matches.push_back(*iter);
   }
   return filtered_matches;
@@ -281,6 +294,7 @@ TermMatches ScoredHistoryMatch::FilterTermMatchesByWordStarts(
 float ScoredHistoryMatch::GetTopicalityScore(
     const int num_terms,
     const base::string16& url,
+    const WordStarts& terms_to_word_starts_offsets,
     const RowWordStarts& word_starts) {
   // Because the below thread is not thread safe, we check that we're
   // only calling it from one thread: the UI thread.  Specifically,
@@ -324,23 +338,26 @@ float ScoredHistoryMatch::GetTopicalityScore(
   // First, filter all matches not at a word boundary and in the path (or
   // later).
   url_matches_ = FilterTermMatchesByWordStarts(
-      url_matches_, word_starts.url_word_starts_, end_of_hostname_pos,
+      url_matches_, terms_to_word_starts_offsets, word_starts.url_word_starts_,
+      end_of_hostname_pos,
       std::string::npos);
   if (colon_pos != std::string::npos) {
     // Also filter matches not at a word boundary and in the scheme.
     url_matches_ = FilterTermMatchesByWordStarts(
-        url_matches_, word_starts.url_word_starts_, 0, colon_pos);
+        url_matches_, terms_to_word_starts_offsets,
+        word_starts.url_word_starts_, 0, colon_pos);
   }
   for (TermMatches::const_iterator iter = url_matches_.begin();
        iter != url_matches_.end(); ++iter) {
-    // Advance next_word_starts until it's >= the position of the term
-    // we're considering.
+    const size_t term_offset = terms_to_word_starts_offsets[iter->term_num];
+    // Advance next_word_starts until it's >= the position of the term we're
+    // considering (adjusted for where the word begins within the term).
     while ((next_word_starts != end_word_starts) &&
-           (*next_word_starts < iter->offset)) {
+           (*next_word_starts < (iter->offset + term_offset))) {
       ++next_word_starts;
     }
     const bool at_word_boundary = (next_word_starts != end_word_starts) &&
-        (*next_word_starts == iter->offset);
+        (*next_word_starts == iter->offset + term_offset);
     if ((question_mark_pos != std::string::npos) &&
         (iter->offset > question_mark_pos)) {
       // The match is in a CGI ?... fragment.
@@ -379,19 +396,22 @@ float ScoredHistoryMatch::GetTopicalityScore(
   end_word_starts = word_starts.title_word_starts_.end();
   int word_num = 0;
   title_matches_ = FilterTermMatchesByWordStarts(
-      title_matches_, word_starts.title_word_starts_, 0, std::string::npos);
+      title_matches_, terms_to_word_starts_offsets,
+      word_starts.title_word_starts_, 0, std::string::npos);
   for (TermMatches::const_iterator iter = title_matches_.begin();
        iter != title_matches_.end(); ++iter) {
-    // Advance next_word_starts until it's >= the position of the term
-    // we're considering.
+    const size_t term_offset = terms_to_word_starts_offsets[iter->term_num];
+    // Advance next_word_starts until it's >= the position of the term we're
+    // considering (adjusted for where the word begins within the term).
     while ((next_word_starts != end_word_starts) &&
-           (*next_word_starts < iter->offset)) {
+           (*next_word_starts < (iter->offset + term_offset))) {
       ++next_word_starts;
       ++word_num;
     }
     if (word_num >= 10) break;  // only count the first ten words
-    DCHECK((next_word_starts != end_word_starts) &&
-        (*next_word_starts == iter->offset)) << "not at word boundary";
+    DCHECK(next_word_starts != end_word_starts);
+    DCHECK_EQ(*next_word_starts, iter->offset + term_offset)
+        << "not at word boundary";
     term_scores[iter->term_num] += 8;
   }
   // TODO(mpearson): Restore logic for penalizing out-of-order matches.
@@ -508,11 +528,8 @@ float ScoredHistoryMatch::GetFrecency(const base::Time& now,
   // kMaxVisitsToScore as the denominator for the average regardless of
   // how many visits there were in order to penalize a match that has
   // fewer visits than kMaxVisitsToScore.
-  const int total_sampled_visits = std::min(visits.size(), kMaxVisitsToScore);
-  if (total_sampled_visits == 0)
-    return 0.0f;
   float summed_visit_points = 0;
-  for (int i = 0; i < total_sampled_visits; ++i) {
+  for (size_t i = 0; i < std::min(visits.size(), kMaxVisitsToScore); ++i) {
     int value_of_transition =
         (visits[i].second == content::PAGE_TRANSITION_TYPED) ? 20 : 1;
     if (bookmarked)
@@ -521,9 +538,7 @@ float ScoredHistoryMatch::GetFrecency(const base::Time& now,
         GetRecencyScore((now - visits[i].first).InDays());
     summed_visit_points += (value_of_transition * bucket_weight);
   }
-  return visits.size() * summed_visit_points /
-      (discount_frecency_when_few_visits_ ?
-          kMaxVisitsToScore : total_sampled_visits);
+  return visits.size() * summed_visit_points / kMaxVisitsToScore;
 }
 
 // static
@@ -585,8 +600,6 @@ void ScoredHistoryMatch::Init() {
         HistoryURLProvider::kScoreForBestInlineableResult - 1;
   }
   bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue();
-  discount_frecency_when_few_visits_ =
-      OmniboxFieldTrial::HQPDiscountFrecencyWhenFewVisits();
   allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue();
   allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue();
   initialized_ = true;