1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_
6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_
14 #include "base/basictypes.h"
15 #include "base/files/file_path.h"
16 #include "base/gtest_prod_util.h"
17 #include "base/memory/ref_counted.h"
18 #include "base/memory/weak_ptr.h"
19 #include "base/strings/string16.h"
20 #include "chrome/browser/autocomplete/autocomplete_match.h"
21 #include "chrome/browser/autocomplete/history_provider_util.h"
22 #include "chrome/browser/common/cancelable_request.h"
23 #include "chrome/browser/history/history_db_task.h"
24 #include "chrome/browser/history/history_types.h"
25 #include "chrome/browser/history/in_memory_url_index_types.h"
26 #include "chrome/browser/history/scored_history_match.h"
27 #include "content/public/browser/notification_observer.h"
28 #include "content/public/browser/notification_registrar.h"
29 #include "sql/connection.h"
31 class HistoryQuickProviderTest;
38 namespace in_memory_url_index {
39 class InMemoryURLIndexCacheItem;
44 namespace imui = in_memory_url_index;
46 class HistoryDatabase;
47 class URLIndexPrivateData;
48 struct URLsDeletedDetails;
49 struct URLsModifiedDetails;
50 struct URLVisitedDetails;
52 // The URL history source.
53 // Holds portions of the URL database in memory in an indexed form. Used to
54 // quickly look up matching URLs for a given query string. Used by
55 // the HistoryURLProvider for inline autocomplete and to provide URL
56 // matches to the omnibox.
58 // Note about multi-byte codepoints and the data structures in the
59 // InMemoryURLIndex class: One will quickly notice that no effort is made to
60 // insure that multi-byte character boundaries are detected when indexing the
61 // words and characters in the URL history database except when converting
62 // URL strings to lowercase. Multi-byte-edness makes no difference when
63 // indexing or when searching the index as the final filtering of results
64 // is dependent on the comparison of a string of bytes, not individual
65 // characters. While the lookup of those bytes during a search in the
66 // |char_word_map_| could serve up words in which the individual char16
67 // occurs as a portion of a composite character the next filtering step
68 // will eliminate such words except in the case where a single character
69 // is being searched on and which character occurs as the second char16 of a
70 // multi-char16 instance.
71 class InMemoryURLIndex : public content::NotificationObserver,
72 public base::SupportsWeakPtr<InMemoryURLIndex> {
74 // Defines an abstract class which is notified upon completion of restoring
75 // the index's private data either by reading from the cache file or by
76 // rebuilding from the history database.
77 class RestoreCacheObserver {
79 virtual ~RestoreCacheObserver();
81 // Callback that lets the observer know that the restore operation has
82 // completed. |succeeded| indicates if the restore was successful. This is
83 // called on the UI thread.
84 virtual void OnCacheRestoreFinished(bool succeeded) = 0;
87 // Defines an abstract class which is notified upon completion of saving
88 // the index's private data to the cache file.
89 class SaveCacheObserver {
91 virtual ~SaveCacheObserver();
93 // Callback that lets the observer know that the save succeeded.
94 // This is called on the UI thread.
95 virtual void OnCacheSaveFinished(bool succeeded) = 0;
98 // |profile|, which may be NULL during unit testing, is used to register for
99 // history changes. |history_dir| is a path to the directory containing the
100 // history database within the profile wherein the cache and transaction
101 // journals will be stored. |languages| gives a list of language encodings by
102 // which URLs and omnibox searches are broken down into words and characters.
103 InMemoryURLIndex(Profile* profile,
104 const base::FilePath& history_dir,
105 const std::string& languages);
106 virtual ~InMemoryURLIndex();
108 // Opens and prepares the index of historical URL visits. If the index private
109 // data cannot be restored from its cache file then it is rebuilt from the
113 // Signals that any outstanding initialization should be canceled and
114 // flushes the cache to disk.
117 // Scans the history index and returns a vector with all scored, matching
118 // history items. This entry point simply forwards the call on to the
119 // URLIndexPrivateData class. For a complete description of this function
120 // refer to that class. If |cursor_position| is string16::npos, the
121 // function doesn't do anything special with the cursor; this is equivalent
122 // to the cursor being at the end.
123 ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string,
124 size_t cursor_position);
126 // Deletes the index entry, if any, for the given |url|.
127 void DeleteURL(const GURL& url);
129 // Sets the optional observers for completion of restoral and saving of the
130 // index's private data.
131 void set_restore_cache_observer(
132 RestoreCacheObserver* restore_cache_observer) {
133 restore_cache_observer_ = restore_cache_observer;
135 void set_save_cache_observer(SaveCacheObserver* save_cache_observer) {
136 save_cache_observer_ = save_cache_observer;
139 // Indicates that the index restoration is complete.
140 bool restored() const {
145 friend class ::HistoryQuickProviderTest;
146 friend class InMemoryURLIndexTest;
147 friend class InMemoryURLIndexCacheTest;
148 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization);
150 // Creating one of me without a history path is not allowed (tests excepted).
153 // HistoryDBTask used to rebuild our private data from the history database.
154 class RebuildPrivateDataFromHistoryDBTask : public HistoryDBTask {
156 explicit RebuildPrivateDataFromHistoryDBTask(
157 InMemoryURLIndex* index,
158 const std::string& languages,
159 const std::set<std::string>& scheme_whitelist);
161 virtual bool RunOnDBThread(HistoryBackend* backend,
162 history::HistoryDatabase* db) OVERRIDE;
163 virtual void DoneRunOnMainThread() OVERRIDE;
166 virtual ~RebuildPrivateDataFromHistoryDBTask();
168 InMemoryURLIndex* index_; // Call back to this index at completion.
169 std::string languages_; // Languages for word-breaking.
170 std::set<std::string> scheme_whitelist_; // Schemes to be indexed.
171 bool succeeded_; // Indicates if the rebuild was successful.
172 scoped_refptr<URLIndexPrivateData> data_; // The rebuilt private data.
174 DISALLOW_COPY_AND_ASSIGN(RebuildPrivateDataFromHistoryDBTask);
177 // Initializes all index data members in preparation for restoring the index
178 // from the cache or a complete rebuild from the history database.
179 void ClearPrivateData();
181 // Constructs a file path for the cache file within the same directory where
182 // the history database is kept and saves that path to |file_path|. Returns
183 // true if |file_path| can be successfully constructed. (This function
184 // provided as a hook for unit testing.)
185 bool GetCacheFilePath(base::FilePath* file_path);
187 // Restores the index's private data from the cache file stored in the
188 // profile directory.
189 void PostRestoreFromCacheFileTask();
191 // Schedules a history task to rebuild our private data from the history
193 void ScheduleRebuildFromHistory();
195 // Callback used by RebuildPrivateDataFromHistoryDBTask to signal completion
196 // or rebuilding our private data from the history database. |succeeded|
197 // will be true if the rebuild was successful. |data| will point to a new
198 // instanceof the private data just rebuilt.
199 void DoneRebuidingPrivateDataFromHistoryDB(
201 scoped_refptr<URLIndexPrivateData> private_data);
203 // Rebuilds the history index from the history database in |history_db|.
204 // Used for unit testing only.
205 void RebuildFromHistory(HistoryDatabase* history_db);
207 // Determines if the private data was successfully reloaded from the cache
208 // file or if the private data must be rebuilt from the history database.
209 // |private_data_ptr|'s data will be NULL if the cache file load failed. If
210 // successful, sets the private data and notifies any
211 // |restore_cache_observer_|. Otherwise, kicks off a rebuild from the history
213 void OnCacheLoadDone(scoped_refptr<URLIndexPrivateData> private_data_ptr);
215 // Callback function that sets the private data from the just-restored-from-
216 // file |private_data|. Notifies any |restore_cache_observer_| that the
217 // restore has succeeded.
218 void OnCacheRestored(URLIndexPrivateData* private_data);
220 // Posts a task to cache the index private data and write the cache file to
221 // the profile directory.
222 void PostSaveToCacheFileTask();
224 // Saves private_data_ to the given |path|. Runs on the UI thread.
225 // Provided for unit testing so that a test cache file can be used.
226 void DoSaveToCacheFile(const base::FilePath& path);
228 // Notifies the observer, if any, of the success of the private data caching.
229 // |succeeded| is true on a successful save.
230 void OnCacheSaveDone(bool succeeded);
232 // Handles notifications of history changes.
233 virtual void Observe(int notification_type,
234 const content::NotificationSource& source,
235 const content::NotificationDetails& details) OVERRIDE;
237 // Notification handlers.
238 void OnURLVisited(const URLVisitedDetails* details);
239 void OnURLsModified(const URLsModifiedDetails* details);
240 void OnURLsDeleted(const URLsDeletedDetails* details);
242 // Sets the directory wherein the cache file will be maintained.
243 // For unit test usage only.
244 void set_history_dir(const base::FilePath& dir_path) {
245 history_dir_ = dir_path;
248 // Returns a pointer to our private data. For unit testing only.
249 URLIndexPrivateData* private_data() { return private_data_.get(); }
251 // Returns the set of whitelisted schemes. For unit testing only.
252 const std::set<std::string>& scheme_whitelist() { return scheme_whitelist_; }
254 // The profile, may be null when testing.
257 // Directory where cache file resides. This is, except when unit testing,
258 // the same directory in which the profile's history database is found. It
259 // should never be empty.
260 base::FilePath history_dir_;
262 // Languages used during the word-breaking process during indexing.
263 std::string languages_;
265 // Only URLs with a whitelisted scheme are indexed.
266 std::set<std::string> scheme_whitelist_;
268 // The index's durable private data.
269 scoped_refptr<URLIndexPrivateData> private_data_;
271 // Observers to notify upon restoral or save of the private data cache.
272 RestoreCacheObserver* restore_cache_observer_;
273 SaveCacheObserver* save_cache_observer_;
275 CancelableRequestConsumer cache_reader_consumer_;
276 content::NotificationRegistrar registrar_;
278 // Set to true once the shutdown process has begun.
281 // Set to true once the index restoration is complete.
284 // Set to true when changes to the index have been made and the index needs
285 // to be cached. Set to false when the index has been cached. Used as a
286 // temporary safety check to insure that the cache is saved before the
287 // index has been destructed.
288 // TODO(mrossetti): Eliminate once the transition to SQLite has been done.
289 // http://crbug.com/83659
290 bool needs_to_be_cached_;
292 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex);
295 } // namespace history
297 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_