src/chrome/browser/bookmarks/bookmark_index.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "chrome/browser/bookmarks/bookmark_index.h"
   6
   7 #include <algorithm>
   8 #include <iterator>
   9 #include <list>
  10
  11 #include "base/i18n/case_conversion.h"
  12 #include "base/strings/string16.h"
  13 #include "chrome/browser/bookmarks/bookmark_model.h"
  14 #include "chrome/browser/bookmarks/bookmark_title_match.h"
  15 #include "chrome/browser/history/history_service.h"
  16 #include "chrome/browser/history/history_service_factory.h"
  17 #include "chrome/browser/history/query_parser.h"
  18 #include "chrome/browser/history/url_database.h"
  19
  20 // Used when finding the set of bookmarks that match a query. Each match
  21 // represents a set of terms (as an interator into the Index) matching the
  22 // query as well as the set of nodes that contain those terms in their titles.
  23 struct BookmarkIndex::Match {
  24   // List of terms matching the query.
  25   std::list<Index::const_iterator> terms;
  26
  27   // The set of nodes matching the terms. As an optimization this is empty
  28   // when we match only one term, and is filled in when we get more than one
  29   // term. We can do this as when we have only one matching term we know
  30   // the set of matching nodes is terms.front()->second.
  31   //
  32   // Use nodes_begin() and nodes_end() to get an iterator over the set as
  33   // it handles the necessary switching between nodes and terms.front().
  34   NodeSet nodes;
  35
  36   // Returns an iterator to the beginning of the matching nodes. See
  37   // description of nodes for why this should be used over nodes.begin().
  38   NodeSet::const_iterator nodes_begin() const;
  39
  40   // Returns an iterator to the beginning of the matching nodes. See
  41   // description of nodes for why this should be used over nodes.end().
  42   NodeSet::const_iterator nodes_end() const;
  43 };
  44
  45 BookmarkIndex::NodeSet::const_iterator
  46     BookmarkIndex::Match::nodes_begin() const {
  47   return nodes.empty() ? terms.front()->second.begin() : nodes.begin();
  48 }
  49
  50 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const {
  51   return nodes.empty() ? terms.front()->second.end() : nodes.end();
  52 }
  53
  54 BookmarkIndex::BookmarkIndex(content::BrowserContext* browser_context)
  55     : browser_context_(browser_context) {
  56 }
  57
  58 BookmarkIndex::~BookmarkIndex() {
  59 }
  60
  61 void BookmarkIndex::Add(const BookmarkNode* node) {
  62   if (!node->is_url())
  63     return;
  64   std::vector<base::string16> terms = ExtractQueryWords(node->GetTitle());
  65   for (size_t i = 0; i < terms.size(); ++i)
  66     RegisterNode(terms[i], node);
  67 }
  68
  69 void BookmarkIndex::Remove(const BookmarkNode* node) {
  70   if (!node->is_url())
  71     return;
  72
  73   std::vector<base::string16> terms = ExtractQueryWords(node->GetTitle());
  74   for (size_t i = 0; i < terms.size(); ++i)
  75     UnregisterNode(terms[i], node);
  76 }
  77
  78 void BookmarkIndex::GetBookmarksWithTitlesMatching(
  79     const base::string16& query,
  80     size_t max_count,
  81     std::vector<BookmarkTitleMatch>* results) {
  82   std::vector<base::string16> terms = ExtractQueryWords(query);
  83   if (terms.empty())
  84     return;
  85
  86   Matches matches;
  87   for (size_t i = 0; i < terms.size(); ++i) {
  88     if (!GetBookmarksWithTitleMatchingTerm(terms[i], i == 0, &matches))
  89       return;
  90   }
  91
  92   NodeTypedCountPairs node_typed_counts;
  93   SortMatches(matches, &node_typed_counts);
  94
  95   // We use a QueryParser to fill in match positions for us. It's not the most
  96   // efficient way to go about this, but by the time we get here we know what
  97   // matches and so this shouldn't be performance critical.
  98   QueryParser parser;
  99   ScopedVector<QueryNode> query_nodes;
 100   parser.ParseQueryNodes(query, &query_nodes.get());
 101
 102   // The highest typed counts should be at the beginning of the results vector
 103   // so that the best matches will always be included in the results. The loop
 104   // that calculates result relevance in HistoryContentsProvider::ConvertResults
 105   // will run backwards to assure higher relevance will be attributed to the
 106   // best matches.
 107   for (NodeTypedCountPairs::const_iterator i = node_typed_counts.begin();
 108        i != node_typed_counts.end() && results->size() < max_count; ++i)
 109     AddMatchToResults(i->first, &parser, query_nodes.get(), results);
 110 }
 111
 112 void BookmarkIndex::SortMatches(const Matches& matches,
 113                                 NodeTypedCountPairs* node_typed_counts) const {
 114   HistoryService* const history_service = browser_context_ ?
 115       HistoryServiceFactory::GetForProfile(
 116           Profile::FromBrowserContext(browser_context_),
 117           Profile::EXPLICIT_ACCESS) : NULL;
 118
 119   history::URLDatabase* url_db = history_service ?
 120       history_service->InMemoryDatabase() : NULL;
 121
 122   for (Matches::const_iterator i = matches.begin(); i != matches.end(); ++i)
 123     ExtractBookmarkNodePairs(url_db, *i, node_typed_counts);
 124
 125   std::sort(node_typed_counts->begin(), node_typed_counts->end(),
 126             &NodeTypedCountPairSortFunc);
 127   // Eliminate duplicates.
 128   node_typed_counts->erase(std::unique(node_typed_counts->begin(),
 129                                        node_typed_counts->end()),
 130                            node_typed_counts->end());
 131 }
 132
 133 void BookmarkIndex::ExtractBookmarkNodePairs(
 134     history::URLDatabase* url_db,
 135     const Match& match,
 136     NodeTypedCountPairs* node_typed_counts) const {
 137
 138   for (NodeSet::const_iterator i = match.nodes_begin();
 139        i != match.nodes_end(); ++i) {
 140     history::URLRow url;
 141     if (url_db)
 142       url_db->GetRowForURL((*i)->url(), &url);
 143     NodeTypedCountPair pair(*i, url.typed_count());
 144     node_typed_counts->push_back(pair);
 145   }
 146 }
 147
 148 void BookmarkIndex::AddMatchToResults(
 149     const BookmarkNode* node,
 150     QueryParser* parser,
 151     const std::vector<QueryNode*>& query_nodes,
 152     std::vector<BookmarkTitleMatch>* results) {
 153   BookmarkTitleMatch title_match;
 154   // Check that the result matches the query.  The previous search
 155   // was a simple per-word search, while the more complex matching
 156   // of QueryParser may filter it out.  For example, the query
 157   // ["thi"] will match the bookmark titled [Thinking], but since
 158   // ["thi"] is quoted we don't want to do a prefix match.
 159   if (parser->DoesQueryMatch(node->GetTitle(), query_nodes,
 160                              &(title_match.match_positions))) {
 161     title_match.node = node;
 162     results->push_back(title_match);
 163   }
 164 }
 165
 166 bool BookmarkIndex::GetBookmarksWithTitleMatchingTerm(const base::string16& term,
 167                                                       bool first_term,
 168                                                       Matches* matches) {
 169   Index::const_iterator i = index_.lower_bound(term);
 170   if (i == index_.end())
 171     return false;
 172
 173   if (!QueryParser::IsWordLongEnoughForPrefixSearch(term)) {
 174     // Term is too short for prefix match, compare using exact match.
 175     if (i->first != term)
 176       return false;  // No bookmarks with this term.
 177
 178     if (first_term) {
 179       Match match;
 180       match.terms.push_back(i);
 181       matches->push_back(match);
 182       return true;
 183     }
 184     CombineMatchesInPlace(i, matches);
 185   } else if (first_term) {
 186     // This is the first term and we're doing a prefix match. Loop through
 187     // index adding all entries that start with term to matches.
 188     while (i != index_.end() &&
 189            i->first.size() >= term.size() &&
 190            term.compare(0, term.size(), i->first, 0, term.size()) == 0) {
 191       Match match;
 192       match.terms.push_back(i);
 193       matches->push_back(match);
 194       ++i;
 195     }
 196   } else {
 197     // Prefix match and not the first term. Loop through index combining
 198     // current matches in matches with term, placing result in result.
 199     Matches result;
 200     while (i != index_.end() &&
 201            i->first.size() >= term.size() &&
 202            term.compare(0, term.size(), i->first, 0, term.size()) == 0) {
 203       CombineMatches(i, *matches, &result);
 204       ++i;
 205     }
 206     matches->swap(result);
 207   }
 208   return !matches->empty();
 209 }
 210
 211 void BookmarkIndex::CombineMatchesInPlace(const Index::const_iterator& index_i,
 212                                           Matches* matches) {
 213   for (size_t i = 0; i < matches->size(); ) {
 214     Match* match = &((*matches)[i]);
 215     NodeSet intersection;
 216     std::set_intersection(match->nodes_begin(), match->nodes_end(),
 217                           index_i->second.begin(), index_i->second.end(),
 218                           std::inserter(intersection, intersection.begin()));
 219     if (intersection.empty()) {
 220       matches->erase(matches->begin() + i);
 221     } else {
 222       match->terms.push_back(index_i);
 223       match->nodes.swap(intersection);
 224       ++i;
 225     }
 226   }
 227 }
 228
 229 void BookmarkIndex::CombineMatches(const Index::const_iterator& index_i,
 230                                    const Matches& current_matches,
 231                                    Matches* result) {
 232   for (size_t i = 0; i < current_matches.size(); ++i) {
 233     const Match& match = current_matches[i];
 234     NodeSet intersection;
 235     std::set_intersection(match.nodes_begin(), match.nodes_end(),
 236                           index_i->second.begin(), index_i->second.end(),
 237                           std::inserter(intersection, intersection.begin()));
 238     if (!intersection.empty()) {
 239       result->push_back(Match());
 240       Match& combined_match = result->back();
 241       combined_match.terms = match.terms;
 242       combined_match.terms.push_back(index_i);
 243       combined_match.nodes.swap(intersection);
 244     }
 245   }
 246 }
 247
 248 std::vector<base::string16> BookmarkIndex::ExtractQueryWords(
 249     const base::string16& query) {
 250   std::vector<base::string16> terms;
 251   if (query.empty())
 252     return std::vector<base::string16>();
 253   QueryParser parser;
 254   // TODO(brettw): use ICU normalization:
 255   // http://userguide.icu-project.org/transforms/normalization
 256   parser.ParseQueryWords(base::i18n::ToLower(query), &terms);
 257   return terms;
 258 }
 259
 260 void BookmarkIndex::RegisterNode(const base::string16& term,
 261                                  const BookmarkNode* node) {
 262   index_[term].insert(node);
 263 }
 264
 265 void BookmarkIndex::UnregisterNode(const base::string16& term,
 266                                    const BookmarkNode* node) {
 267   Index::iterator i = index_.find(term);
 268   if (i == index_.end()) {
 269     // We can get here if the node has the same term more than once. For
 270     // example, a bookmark with the title 'foo foo' would end up here.
 271     return;
 272   }
 273   i->second.erase(node);
 274   if (i->second.empty())
 275     index_.erase(i);
 276 }