Upstream version 9.38.198.0
[platform/framework/web/crosswalk.git] / src / chrome / browser / history / in_memory_url_index_unittest.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <algorithm>
6 #include <fstream>
7
8 #include "base/auto_reset.h"
9 #include "base/file_util.h"
10 #include "base/files/file_path.h"
11 #include "base/files/scoped_temp_dir.h"
12 #include "base/path_service.h"
13 #include "base/run_loop.h"
14 #include "base/strings/string16.h"
15 #include "base/strings/string_util.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "chrome/browser/bookmarks/bookmark_model_factory.h"
18 #include "chrome/browser/chrome_notification_types.h"
19 #include "chrome/browser/history/history_backend.h"
20 #include "chrome/browser/history/history_database.h"
21 #include "chrome/browser/history/history_notifications.h"
22 #include "chrome/browser/history/history_service.h"
23 #include "chrome/browser/history/history_service_factory.h"
24 #include "chrome/browser/history/in_memory_url_index.h"
25 #include "chrome/browser/history/in_memory_url_index_types.h"
26 #include "chrome/browser/history/url_index_private_data.h"
27 #include "chrome/common/chrome_paths.h"
28 #include "chrome/test/base/history_index_restore_observer.h"
29 #include "chrome/test/base/testing_profile.h"
30 #include "components/bookmarks/test/bookmark_test_helpers.h"
31 #include "components/history/core/browser/history_client.h"
32 #include "content/public/browser/notification_details.h"
33 #include "content/public/browser/notification_source.h"
34 #include "content/public/test/test_browser_thread_bundle.h"
35 #include "sql/transaction.h"
36 #include "testing/gtest/include/gtest/gtest.h"
37
38 using base::ASCIIToUTF16;
39
40 namespace {
41 const size_t kMaxMatches = 3;
42 }  // namespace
43
44 // The test version of the history url database table ('url') is contained in
45 // a database file created from a text file('url_history_provider_test.db.txt').
46 // The only difference between this table and a live 'urls' table from a
47 // profile is that the last_visit_time column in the test table contains a
48 // number specifying the number of days relative to 'today' to which the
49 // absolute time should be set during the test setup stage.
50 //
51 // The format of the test database text file is of a SQLite .dump file.
52 // Note that only lines whose first character is an upper-case letter are
53 // processed when creating the test database.
54
55 namespace history {
56
57 // -----------------------------------------------------------------------------
58
59 // Observer class so the unit tests can wait while the cache is being saved.
60 class CacheFileSaverObserver : public InMemoryURLIndex::SaveCacheObserver {
61  public:
62   explicit CacheFileSaverObserver(const base::Closure& task);
63
64   bool succeeded() { return succeeded_; }
65
66  private:
67   // SaveCacheObserver implementation.
68   virtual void OnCacheSaveFinished(bool succeeded) OVERRIDE;
69
70   base::Closure task_;
71   bool succeeded_;
72
73   DISALLOW_COPY_AND_ASSIGN(CacheFileSaverObserver);
74 };
75
76 CacheFileSaverObserver::CacheFileSaverObserver(const base::Closure& task)
77     : task_(task),
78       succeeded_(false) {
79 }
80
81 void CacheFileSaverObserver::OnCacheSaveFinished(bool succeeded) {
82   succeeded_ = succeeded;
83   task_.Run();
84 }
85
86 // -----------------------------------------------------------------------------
87
88 class InMemoryURLIndexTest : public testing::Test {
89  public:
90   InMemoryURLIndexTest();
91
92  protected:
93   // Test setup.
94   virtual void SetUp();
95
96   // Allows the database containing the test data to be customized by
97   // subclasses.
98   virtual base::FilePath::StringType TestDBName() const;
99
100   // Validates that the given |term| is contained in |cache| and that it is
101   // marked as in-use.
102   void CheckTerm(const URLIndexPrivateData::SearchTermCacheMap& cache,
103                  base::string16 term) const;
104
105   // Pass-through function to simplify our friendship with HistoryService.
106   sql::Connection& GetDB();
107
108   // Pass-through functions to simplify our friendship with InMemoryURLIndex.
109   URLIndexPrivateData* GetPrivateData() const;
110   base::CancelableTaskTracker* GetPrivateDataTracker() const;
111   void ClearPrivateData();
112   void set_history_dir(const base::FilePath& dir_path);
113   bool GetCacheFilePath(base::FilePath* file_path) const;
114   void PostRestoreFromCacheFileTask();
115   void PostSaveToCacheFileTask();
116   void Observe(int notification_type,
117                const content::NotificationSource& source,
118                const content::NotificationDetails& details);
119   const std::set<std::string>& scheme_whitelist();
120
121
122   // Pass-through functions to simplify our friendship with URLIndexPrivateData.
123   bool UpdateURL(const URLRow& row);
124   bool DeleteURL(const GURL& url);
125
126   // Data verification helper functions.
127   void ExpectPrivateDataNotEmpty(const URLIndexPrivateData& data);
128   void ExpectPrivateDataEmpty(const URLIndexPrivateData& data);
129   void ExpectPrivateDataEqual(const URLIndexPrivateData& expected,
130                               const URLIndexPrivateData& actual);
131
132   content::TestBrowserThreadBundle thread_bundle_;
133   TestingProfile profile_;
134   HistoryService* history_service_;
135
136   scoped_ptr<InMemoryURLIndex> url_index_;
137   HistoryDatabase* history_database_;
138 };
139
140 InMemoryURLIndexTest::InMemoryURLIndexTest() {
141 }
142
143 sql::Connection& InMemoryURLIndexTest::GetDB() {
144   return history_database_->GetDB();
145 }
146
147 URLIndexPrivateData* InMemoryURLIndexTest::GetPrivateData() const {
148   DCHECK(url_index_->private_data());
149   return url_index_->private_data();
150 }
151
152 base::CancelableTaskTracker* InMemoryURLIndexTest::GetPrivateDataTracker()
153     const {
154   DCHECK(url_index_->private_data_tracker());
155   return url_index_->private_data_tracker();
156 }
157
158 void InMemoryURLIndexTest::ClearPrivateData() {
159   return url_index_->ClearPrivateData();
160 }
161
162 void InMemoryURLIndexTest::set_history_dir(const base::FilePath& dir_path) {
163   return url_index_->set_history_dir(dir_path);
164 }
165
166 bool InMemoryURLIndexTest::GetCacheFilePath(base::FilePath* file_path) const {
167   DCHECK(file_path);
168   return url_index_->GetCacheFilePath(file_path);
169 }
170
171 void InMemoryURLIndexTest::PostRestoreFromCacheFileTask() {
172   url_index_->PostRestoreFromCacheFileTask();
173 }
174
175 void InMemoryURLIndexTest::PostSaveToCacheFileTask() {
176   url_index_->PostSaveToCacheFileTask();
177 }
178
179 void InMemoryURLIndexTest::Observe(
180     int notification_type,
181     const content::NotificationSource& source,
182     const content::NotificationDetails& details) {
183   url_index_->Observe(notification_type, source, details);
184 }
185
186 const std::set<std::string>& InMemoryURLIndexTest::scheme_whitelist() {
187   return url_index_->scheme_whitelist();
188 }
189
190 bool InMemoryURLIndexTest::UpdateURL(const URLRow& row) {
191   return GetPrivateData()->UpdateURL(history_service_,
192                                      row,
193                                      url_index_->languages_,
194                                      url_index_->scheme_whitelist_,
195                                      GetPrivateDataTracker());
196 }
197
198 bool InMemoryURLIndexTest::DeleteURL(const GURL& url) {
199   return GetPrivateData()->DeleteURL(url);
200 }
201
202 void InMemoryURLIndexTest::SetUp() {
203   // We cannot access the database until the backend has been loaded.
204   ASSERT_TRUE(profile_.CreateHistoryService(true, false));
205   profile_.CreateBookmarkModel(true);
206   test::WaitForBookmarkModelToLoad(
207       BookmarkModelFactory::GetForProfile(&profile_));
208   profile_.BlockUntilHistoryProcessesPendingRequests();
209   profile_.BlockUntilHistoryIndexIsRefreshed();
210   history_service_ = HistoryServiceFactory::GetForProfile(
211       &profile_, Profile::EXPLICIT_ACCESS);
212   ASSERT_TRUE(history_service_);
213   HistoryBackend* backend = history_service_->history_backend_.get();
214   history_database_ = backend->db();
215
216   // Create and populate a working copy of the URL history database.
217   base::FilePath history_proto_path;
218   PathService::Get(chrome::DIR_TEST_DATA, &history_proto_path);
219   history_proto_path = history_proto_path.Append(
220       FILE_PATH_LITERAL("History"));
221   history_proto_path = history_proto_path.Append(TestDBName());
222   EXPECT_TRUE(base::PathExists(history_proto_path));
223
224   std::ifstream proto_file(history_proto_path.value().c_str());
225   static const size_t kCommandBufferMaxSize = 2048;
226   char sql_cmd_line[kCommandBufferMaxSize];
227
228   sql::Connection& db(GetDB());
229   ASSERT_TRUE(db.is_open());
230   {
231     sql::Transaction transaction(&db);
232     transaction.Begin();
233     while (!proto_file.eof()) {
234       proto_file.getline(sql_cmd_line, kCommandBufferMaxSize);
235       if (!proto_file.eof()) {
236         // We only process lines which begin with a upper-case letter.
237         // TODO(mrossetti): Can iswupper() be used here?
238         if (sql_cmd_line[0] >= 'A' && sql_cmd_line[0] <= 'Z') {
239           std::string sql_cmd(sql_cmd_line);
240           sql::Statement sql_stmt(db.GetUniqueStatement(sql_cmd_line));
241           EXPECT_TRUE(sql_stmt.Run());
242         }
243       }
244     }
245     transaction.Commit();
246   }
247
248   // Update the last_visit_time table column in the "urls" table
249   // such that it represents a time relative to 'now'.
250   sql::Statement statement(db.GetUniqueStatement(
251       "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls;"));
252   ASSERT_TRUE(statement.is_valid());
253   base::Time time_right_now = base::Time::NowFromSystemTime();
254   base::TimeDelta day_delta = base::TimeDelta::FromDays(1);
255   {
256     sql::Transaction transaction(&db);
257     transaction.Begin();
258     while (statement.Step()) {
259       URLRow row;
260       history_database_->FillURLRow(statement, &row);
261       base::Time last_visit = time_right_now;
262       for (int64 i = row.last_visit().ToInternalValue(); i > 0; --i)
263         last_visit -= day_delta;
264       row.set_last_visit(last_visit);
265       history_database_->UpdateURLRow(row.id(), row);
266     }
267     transaction.Commit();
268   }
269
270   // Update the visit_time table column in the "visits" table
271   // such that it represents a time relative to 'now'.
272   statement.Assign(db.GetUniqueStatement(
273       "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits;"));
274   ASSERT_TRUE(statement.is_valid());
275   {
276     sql::Transaction transaction(&db);
277     transaction.Begin();
278     while (statement.Step()) {
279       VisitRow row;
280       history_database_->FillVisitRow(statement, &row);
281       base::Time last_visit = time_right_now;
282       for (int64 i = row.visit_time.ToInternalValue(); i > 0; --i)
283         last_visit -= day_delta;
284       row.visit_time = last_visit;
285       history_database_->UpdateVisitRow(row);
286     }
287     transaction.Commit();
288   }
289
290   url_index_.reset(new InMemoryURLIndex(
291       &profile_, base::FilePath(), "en,ja,hi,zh",
292       history_service_->history_client()));
293   url_index_->Init();
294   url_index_->RebuildFromHistory(history_database_);
295 }
296
297 base::FilePath::StringType InMemoryURLIndexTest::TestDBName() const {
298     return FILE_PATH_LITERAL("url_history_provider_test.db.txt");
299 }
300
301 void InMemoryURLIndexTest::CheckTerm(
302     const URLIndexPrivateData::SearchTermCacheMap& cache,
303     base::string16 term) const {
304   URLIndexPrivateData::SearchTermCacheMap::const_iterator cache_iter(
305       cache.find(term));
306   ASSERT_TRUE(cache.end() != cache_iter)
307       << "Cache does not contain '" << term << "' but should.";
308   URLIndexPrivateData::SearchTermCacheItem cache_item = cache_iter->second;
309   EXPECT_TRUE(cache_item.used_)
310       << "Cache item '" << term << "' should be marked as being in use.";
311 }
312
313 void InMemoryURLIndexTest::ExpectPrivateDataNotEmpty(
314     const URLIndexPrivateData& data) {
315   EXPECT_FALSE(data.word_list_.empty());
316   // available_words_ will be empty since we have freshly built the
317   // data set for these tests.
318   EXPECT_TRUE(data.available_words_.empty());
319   EXPECT_FALSE(data.word_map_.empty());
320   EXPECT_FALSE(data.char_word_map_.empty());
321   EXPECT_FALSE(data.word_id_history_map_.empty());
322   EXPECT_FALSE(data.history_id_word_map_.empty());
323   EXPECT_FALSE(data.history_info_map_.empty());
324 }
325
326 void InMemoryURLIndexTest::ExpectPrivateDataEmpty(
327     const URLIndexPrivateData& data) {
328   EXPECT_TRUE(data.word_list_.empty());
329   EXPECT_TRUE(data.available_words_.empty());
330   EXPECT_TRUE(data.word_map_.empty());
331   EXPECT_TRUE(data.char_word_map_.empty());
332   EXPECT_TRUE(data.word_id_history_map_.empty());
333   EXPECT_TRUE(data.history_id_word_map_.empty());
334   EXPECT_TRUE(data.history_info_map_.empty());
335 }
336
337 // Helper function which compares two maps for equivalence. The maps' values
338 // are associative containers and their contents are compared as well.
339 template<typename T>
340 void ExpectMapOfContainersIdentical(const T& expected, const T& actual) {
341   ASSERT_EQ(expected.size(), actual.size());
342   for (typename T::const_iterator expected_iter = expected.begin();
343        expected_iter != expected.end(); ++expected_iter) {
344     typename T::const_iterator actual_iter = actual.find(expected_iter->first);
345     ASSERT_TRUE(actual.end() != actual_iter);
346     typename T::mapped_type const& expected_values(expected_iter->second);
347     typename T::mapped_type const& actual_values(actual_iter->second);
348     ASSERT_EQ(expected_values.size(), actual_values.size());
349     for (typename T::mapped_type::const_iterator set_iter =
350          expected_values.begin(); set_iter != expected_values.end(); ++set_iter)
351       EXPECT_EQ(actual_values.count(*set_iter),
352                 expected_values.count(*set_iter));
353   }
354 }
355
356 void InMemoryURLIndexTest::ExpectPrivateDataEqual(
357     const URLIndexPrivateData& expected,
358     const URLIndexPrivateData& actual) {
359   EXPECT_EQ(expected.word_list_.size(), actual.word_list_.size());
360   EXPECT_EQ(expected.word_map_.size(), actual.word_map_.size());
361   EXPECT_EQ(expected.char_word_map_.size(), actual.char_word_map_.size());
362   EXPECT_EQ(expected.word_id_history_map_.size(),
363             actual.word_id_history_map_.size());
364   EXPECT_EQ(expected.history_id_word_map_.size(),
365             actual.history_id_word_map_.size());
366   EXPECT_EQ(expected.history_info_map_.size(), actual.history_info_map_.size());
367   EXPECT_EQ(expected.word_starts_map_.size(), actual.word_starts_map_.size());
368   // WordList must be index-by-index equal.
369   size_t count = expected.word_list_.size();
370   for (size_t i = 0; i < count; ++i)
371     EXPECT_EQ(expected.word_list_[i], actual.word_list_[i]);
372
373   ExpectMapOfContainersIdentical(expected.char_word_map_,
374                                  actual.char_word_map_);
375   ExpectMapOfContainersIdentical(expected.word_id_history_map_,
376                                  actual.word_id_history_map_);
377   ExpectMapOfContainersIdentical(expected.history_id_word_map_,
378                                  actual.history_id_word_map_);
379
380   for (HistoryInfoMap::const_iterator expected_info =
381       expected.history_info_map_.begin();
382       expected_info != expected.history_info_map_.end(); ++expected_info) {
383     HistoryInfoMap::const_iterator actual_info =
384         actual.history_info_map_.find(expected_info->first);
385     // NOTE(yfriedman): ASSERT_NE can't be used due to incompatibility between
386     // gtest and STLPort in the Android build. See
387     // http://code.google.com/p/googletest/issues/detail?id=359
388     ASSERT_TRUE(actual_info != actual.history_info_map_.end());
389     const URLRow& expected_row(expected_info->second.url_row);
390     const URLRow& actual_row(actual_info->second.url_row);
391     EXPECT_EQ(expected_row.visit_count(), actual_row.visit_count());
392     EXPECT_EQ(expected_row.typed_count(), actual_row.typed_count());
393     EXPECT_EQ(expected_row.last_visit(), actual_row.last_visit());
394     EXPECT_EQ(expected_row.url(), actual_row.url());
395     const VisitInfoVector& expected_visits(expected_info->second.visits);
396     const VisitInfoVector& actual_visits(actual_info->second.visits);
397     EXPECT_EQ(expected_visits.size(), actual_visits.size());
398     for (size_t i = 0;
399          i < std::min(expected_visits.size(), actual_visits.size()); ++i) {
400       EXPECT_EQ(expected_visits[i].first, actual_visits[i].first);
401       EXPECT_EQ(expected_visits[i].second, actual_visits[i].second);
402     }
403   }
404
405   for (WordStartsMap::const_iterator expected_starts =
406       expected.word_starts_map_.begin();
407       expected_starts != expected.word_starts_map_.end();
408       ++expected_starts) {
409     WordStartsMap::const_iterator actual_starts =
410         actual.word_starts_map_.find(expected_starts->first);
411     // NOTE(yfriedman): ASSERT_NE can't be used due to incompatibility between
412     // gtest and STLPort in the Android build. See
413     // http://code.google.com/p/googletest/issues/detail?id=359
414     ASSERT_TRUE(actual_starts != actual.word_starts_map_.end());
415     const RowWordStarts& expected_word_starts(expected_starts->second);
416     const RowWordStarts& actual_word_starts(actual_starts->second);
417     EXPECT_EQ(expected_word_starts.url_word_starts_.size(),
418               actual_word_starts.url_word_starts_.size());
419     EXPECT_TRUE(std::equal(expected_word_starts.url_word_starts_.begin(),
420                            expected_word_starts.url_word_starts_.end(),
421                            actual_word_starts.url_word_starts_.begin()));
422     EXPECT_EQ(expected_word_starts.title_word_starts_.size(),
423               actual_word_starts.title_word_starts_.size());
424     EXPECT_TRUE(std::equal(expected_word_starts.title_word_starts_.begin(),
425                            expected_word_starts.title_word_starts_.end(),
426                            actual_word_starts.title_word_starts_.begin()));
427   }
428 }
429
430 //------------------------------------------------------------------------------
431
432 class LimitedInMemoryURLIndexTest : public InMemoryURLIndexTest {
433  protected:
434   virtual base::FilePath::StringType TestDBName() const OVERRIDE;
435 };
436
437 base::FilePath::StringType LimitedInMemoryURLIndexTest::TestDBName() const {
438   return FILE_PATH_LITERAL("url_history_provider_test_limited.db.txt");
439 }
440
441 TEST_F(LimitedInMemoryURLIndexTest, Initialization) {
442   // Verify that the database contains the expected number of items, which
443   // is the pre-filtered count, i.e. all of the items.
444   sql::Statement statement(GetDB().GetUniqueStatement("SELECT * FROM urls;"));
445   ASSERT_TRUE(statement.is_valid());
446   uint64 row_count = 0;
447   while (statement.Step()) ++row_count;
448   EXPECT_EQ(1U, row_count);
449   url_index_.reset(new InMemoryURLIndex(
450       &profile_, base::FilePath(), "en,ja,hi,zh",
451       history_service_->history_client()));
452   url_index_->Init();
453   url_index_->RebuildFromHistory(history_database_);
454   URLIndexPrivateData& private_data(*GetPrivateData());
455
456   // history_info_map_ should have the same number of items as were filtered.
457   EXPECT_EQ(1U, private_data.history_info_map_.size());
458   EXPECT_EQ(35U, private_data.char_word_map_.size());
459   EXPECT_EQ(17U, private_data.word_map_.size());
460 }
461
462 #if defined(OS_WIN)
463 // Flaky on windows trybots: http://crbug.com/351500
464 #define MAYBE_Retrieval DISABLED_Retrieval
465 #else
466 #define MAYBE_Retrieval Retrieval
467 #endif
468 TEST_F(InMemoryURLIndexTest, MAYBE_Retrieval) {
469   // See if a very specific term gives a single result.
470   ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(
471       ASCIIToUTF16("DrudgeReport"), base::string16::npos, kMaxMatches);
472   ASSERT_EQ(1U, matches.size());
473
474   // Verify that we got back the result we expected.
475   EXPECT_EQ(5, matches[0].url_info.id());
476   EXPECT_EQ("http://drudgereport.com/", matches[0].url_info.url().spec());
477   EXPECT_EQ(ASCIIToUTF16("DRUDGE REPORT 2010"), matches[0].url_info.title());
478   EXPECT_TRUE(matches[0].can_inline());
479
480   // Make sure a trailing space prevents inline-ability but still results
481   // in the expected result.
482   matches = url_index_->HistoryItemsForTerms(
483       ASCIIToUTF16("DrudgeReport "), base::string16::npos, kMaxMatches);
484   ASSERT_EQ(1U, matches.size());
485   EXPECT_EQ(5, matches[0].url_info.id());
486   EXPECT_EQ("http://drudgereport.com/", matches[0].url_info.url().spec());
487   EXPECT_EQ(ASCIIToUTF16("DRUDGE REPORT 2010"), matches[0].url_info.title());
488   EXPECT_FALSE(matches[0].can_inline());
489
490   // Search which should result in multiple results.
491   matches = url_index_->HistoryItemsForTerms(
492       ASCIIToUTF16("drudge"), base::string16::npos, kMaxMatches);
493   ASSERT_EQ(2U, matches.size());
494   // The results should be in descending score order.
495   EXPECT_GE(matches[0].raw_score(), matches[1].raw_score());
496
497   // Search which should result in nearly perfect result.
498   matches = url_index_->HistoryItemsForTerms(
499       ASCIIToUTF16("Nearly Perfect Result"), base::string16::npos, kMaxMatches);
500   ASSERT_EQ(1U, matches.size());
501   // The results should have a very high score.
502   EXPECT_GT(matches[0].raw_score(), 900);
503   EXPECT_EQ(32, matches[0].url_info.id());
504   EXPECT_EQ("https://nearlyperfectresult.com/",
505             matches[0].url_info.url().spec());  // Note: URL gets lowercased.
506   EXPECT_EQ(ASCIIToUTF16("Practically Perfect Search Result"),
507             matches[0].url_info.title());
508   EXPECT_FALSE(matches[0].can_inline());
509
510   // Search which should result in very poor result.
511   matches = url_index_->HistoryItemsForTerms(
512       ASCIIToUTF16("qui c"), base::string16::npos, kMaxMatches);
513   ASSERT_EQ(1U, matches.size());
514   // The results should have a poor score.
515   EXPECT_LT(matches[0].raw_score(), 500);
516   EXPECT_EQ(33, matches[0].url_info.id());
517   EXPECT_EQ("http://quiteuselesssearchresultxyz.com/",
518             matches[0].url_info.url().spec());  // Note: URL gets lowercased.
519   EXPECT_EQ(ASCIIToUTF16("Practically Useless Search Result"),
520             matches[0].url_info.title());
521   EXPECT_FALSE(matches[0].can_inline());
522
523   // Search which will match at the end of an URL with encoded characters.
524   matches = url_index_->HistoryItemsForTerms(
525       ASCIIToUTF16("Mice"), base::string16::npos, kMaxMatches);
526   ASSERT_EQ(1U, matches.size());
527   EXPECT_EQ(30, matches[0].url_info.id());
528   EXPECT_FALSE(matches[0].can_inline());
529
530   // Check that URLs are not escaped an escape time.
531   matches = url_index_->HistoryItemsForTerms(
532        ASCIIToUTF16("1% wikipedia"), base::string16::npos, kMaxMatches);
533   ASSERT_EQ(1U, matches.size());
534   EXPECT_EQ(35, matches[0].url_info.id());
535   EXPECT_EQ("http://en.wikipedia.org/wiki/1%25_rule_(Internet_culture)",
536             matches[0].url_info.url().spec());
537
538   // Verify that a single term can appear multiple times in the URL and as long
539   // as one starts the URL it is still inlined.
540   matches = url_index_->HistoryItemsForTerms(
541       ASCIIToUTF16("fubar"), base::string16::npos, kMaxMatches);
542   ASSERT_EQ(1U, matches.size());
543   EXPECT_EQ(34, matches[0].url_info.id());
544   EXPECT_EQ("http://fubarfubarandfubar.com/", matches[0].url_info.url().spec());
545   EXPECT_EQ(ASCIIToUTF16("Situation Normal -- FUBARED"),
546             matches[0].url_info.title());
547   EXPECT_TRUE(matches[0].can_inline());
548 }
549
550 TEST_F(InMemoryURLIndexTest, CursorPositionRetrieval) {
551   // See if a very specific term with no cursor gives an empty result.
552   ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(
553       ASCIIToUTF16("DrudReport"), base::string16::npos, kMaxMatches);
554   ASSERT_EQ(0U, matches.size());
555
556   // The same test with the cursor at the end should give an empty result.
557   matches = url_index_->HistoryItemsForTerms(
558       ASCIIToUTF16("DrudReport"), 10u, kMaxMatches);
559   ASSERT_EQ(0U, matches.size());
560
561   // If the cursor is between Drud and Report, we should find the desired
562   // result.
563   matches = url_index_->HistoryItemsForTerms(
564       ASCIIToUTF16("DrudReport"), 4u, kMaxMatches);
565   ASSERT_EQ(1U, matches.size());
566   EXPECT_EQ("http://drudgereport.com/", matches[0].url_info.url().spec());
567   EXPECT_EQ(ASCIIToUTF16("DRUDGE REPORT 2010"), matches[0].url_info.title());
568
569   // Now check multi-word inputs.  No cursor should fail to find a
570   // result on this input.
571   matches = url_index_->HistoryItemsForTerms(
572       ASCIIToUTF16("MORTGAGERATE DROPS"), base::string16::npos, kMaxMatches);
573   ASSERT_EQ(0U, matches.size());
574
575   // Ditto with cursor at end.
576   matches = url_index_->HistoryItemsForTerms(
577       ASCIIToUTF16("MORTGAGERATE DROPS"), 18u, kMaxMatches);
578   ASSERT_EQ(0U, matches.size());
579
580   // If the cursor is between MORTAGE And RATE, we should find the
581   // desired result.
582   matches = url_index_->HistoryItemsForTerms(
583       ASCIIToUTF16("MORTGAGERATE DROPS"), 8u, kMaxMatches);
584   ASSERT_EQ(1U, matches.size());
585   EXPECT_EQ("http://www.reuters.com/article/idUSN0839880620100708",
586             matches[0].url_info.url().spec());
587   EXPECT_EQ(ASCIIToUTF16(
588       "UPDATE 1-US 30-yr mortgage rate drops to new record low | Reuters"),
589             matches[0].url_info.title());
590 }
591
592 TEST_F(InMemoryURLIndexTest, URLPrefixMatching) {
593   // "drudgere" - found, can inline
594   ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(
595       ASCIIToUTF16("drudgere"), base::string16::npos, kMaxMatches);
596   ASSERT_EQ(1U, matches.size());
597   EXPECT_TRUE(matches[0].can_inline());
598
599   // "drudgere" - found, can inline
600   matches = url_index_->HistoryItemsForTerms(
601       ASCIIToUTF16("drudgere"), base::string16::npos, kMaxMatches);
602   ASSERT_EQ(1U, matches.size());
603   EXPECT_TRUE(matches[0].can_inline());
604
605   // "www.atdmt" - not found
606   matches = url_index_->HistoryItemsForTerms(
607       ASCIIToUTF16("www.atdmt"), base::string16::npos, kMaxMatches);
608   EXPECT_EQ(0U, matches.size());
609
610   // "atdmt" - found, cannot inline
611   matches = url_index_->HistoryItemsForTerms(
612       ASCIIToUTF16("atdmt"), base::string16::npos, kMaxMatches);
613   ASSERT_EQ(1U, matches.size());
614   EXPECT_FALSE(matches[0].can_inline());
615
616   // "view.atdmt" - found, can inline
617   matches = url_index_->HistoryItemsForTerms(
618       ASCIIToUTF16("view.atdmt"), base::string16::npos, kMaxMatches);
619   ASSERT_EQ(1U, matches.size());
620   EXPECT_TRUE(matches[0].can_inline());
621
622   // "view.atdmt" - found, can inline
623   matches = url_index_->HistoryItemsForTerms(
624       ASCIIToUTF16("view.atdmt"), base::string16::npos, kMaxMatches);
625   ASSERT_EQ(1U, matches.size());
626   EXPECT_TRUE(matches[0].can_inline());
627
628   // "cnn.com" - found, can inline
629   matches = url_index_->HistoryItemsForTerms(
630       ASCIIToUTF16("cnn.com"), base::string16::npos, kMaxMatches);
631   ASSERT_EQ(2U, matches.size());
632   // One match should be inline-able, the other not.
633   EXPECT_TRUE(matches[0].can_inline() != matches[1].can_inline());
634
635   // "www.cnn.com" - found, can inline
636   matches = url_index_->HistoryItemsForTerms(
637       ASCIIToUTF16("www.cnn.com"), base::string16::npos, kMaxMatches);
638   ASSERT_EQ(1U, matches.size());
639   EXPECT_TRUE(matches[0].can_inline());
640
641   // "ww.cnn.com" - found because we allow mid-term matches in hostnames
642   matches = url_index_->HistoryItemsForTerms(
643       ASCIIToUTF16("ww.cnn.com"), base::string16::npos, kMaxMatches);
644   ASSERT_EQ(1U, matches.size());
645
646   // "www.cnn.com" - found, can inline
647   matches = url_index_->HistoryItemsForTerms(
648       ASCIIToUTF16("www.cnn.com"), base::string16::npos, kMaxMatches);
649   ASSERT_EQ(1U, matches.size());
650   EXPECT_TRUE(matches[0].can_inline());
651
652   // "tp://www.cnn.com" - not found because we don't allow tp as a mid-term
653   // match
654   matches = url_index_->HistoryItemsForTerms(
655       ASCIIToUTF16("tp://www.cnn.com"), base::string16::npos, kMaxMatches);
656   ASSERT_EQ(0U, matches.size());
657 }
658
659 TEST_F(InMemoryURLIndexTest, ProperStringMatching) {
660   // Search for the following with the expected results:
661   // "atdmt view" - found
662   // "atdmt.view" - not found
663   // "view.atdmt" - found
664   ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(
665       ASCIIToUTF16("atdmt view"), base::string16::npos, kMaxMatches);
666   ASSERT_EQ(1U, matches.size());
667   matches = url_index_->HistoryItemsForTerms(
668        ASCIIToUTF16("atdmt.view"), base::string16::npos, kMaxMatches);
669   ASSERT_EQ(0U, matches.size());
670   matches = url_index_->HistoryItemsForTerms(
671       ASCIIToUTF16("view.atdmt"), base::string16::npos, kMaxMatches);
672   ASSERT_EQ(1U, matches.size());
673 }
674
675 TEST_F(InMemoryURLIndexTest, HugeResultSet) {
676   // Create a huge set of qualifying history items.
677   for (URLID row_id = 5000; row_id < 6000; ++row_id) {
678     URLRow new_row(GURL("http://www.brokeandaloneinmanitoba.com/"), row_id);
679     new_row.set_last_visit(base::Time::Now());
680     EXPECT_TRUE(UpdateURL(new_row));
681   }
682
683   ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(
684       ASCIIToUTF16("b"), base::string16::npos, kMaxMatches);
685   URLIndexPrivateData& private_data(*GetPrivateData());
686   ASSERT_EQ(kMaxMatches, matches.size());
687   // There are 7 matches already in the database.
688   ASSERT_EQ(1008U, private_data.pre_filter_item_count_);
689   ASSERT_EQ(500U, private_data.post_filter_item_count_);
690   ASSERT_EQ(kMaxMatches, private_data.post_scoring_item_count_);
691 }
692
693 #if defined(OS_WIN)
694 // Flaky on windows trybots: http://crbug.com/351500
695 #define MAYBE_TitleSearch DISABLED_TitleSearch
696 #else
697 #define MAYBE_TitleSearch TitleSearch
698 #endif
699 TEST_F(InMemoryURLIndexTest, MAYBE_TitleSearch) {
700   // Signal if someone has changed the test DB.
701   EXPECT_EQ(29U, GetPrivateData()->history_info_map_.size());
702
703   // Ensure title is being searched.
704   ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(
705       ASCIIToUTF16("MORTGAGE RATE DROPS"), base::string16::npos, kMaxMatches);
706   ASSERT_EQ(1U, matches.size());
707
708   // Verify that we got back the result we expected.
709   EXPECT_EQ(1, matches[0].url_info.id());
710   EXPECT_EQ("http://www.reuters.com/article/idUSN0839880620100708",
711             matches[0].url_info.url().spec());
712   EXPECT_EQ(ASCIIToUTF16(
713       "UPDATE 1-US 30-yr mortgage rate drops to new record low | Reuters"),
714       matches[0].url_info.title());
715 }
716
717 TEST_F(InMemoryURLIndexTest, TitleChange) {
718   // Verify current title terms retrieves desired item.
719   base::string16 original_terms =
720       ASCIIToUTF16("lebronomics could high taxes influence");
721   ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(
722       original_terms, base::string16::npos, kMaxMatches);
723   ASSERT_EQ(1U, matches.size());
724
725   // Verify that we got back the result we expected.
726   const URLID expected_id = 3;
727   EXPECT_EQ(expected_id, matches[0].url_info.id());
728   EXPECT_EQ("http://www.businessandmedia.org/articles/2010/20100708120415.aspx",
729             matches[0].url_info.url().spec());
730   EXPECT_EQ(ASCIIToUTF16(
731       "LeBronomics: Could High Taxes Influence James' Team Decision?"),
732       matches[0].url_info.title());
733   URLRow old_row(matches[0].url_info);
734
735   // Verify new title terms retrieves nothing.
736   base::string16 new_terms = ASCIIToUTF16("does eat oats little lambs ivy");
737   matches = url_index_->HistoryItemsForTerms(
738       new_terms, base::string16::npos, kMaxMatches);
739   ASSERT_EQ(0U, matches.size());
740
741   // Update the row.
742   old_row.set_title(ASCIIToUTF16("Does eat oats and little lambs eat ivy"));
743   EXPECT_TRUE(UpdateURL(old_row));
744
745   // Verify we get the row using the new terms but not the original terms.
746   matches = url_index_->HistoryItemsForTerms(
747       new_terms, base::string16::npos, kMaxMatches);
748   ASSERT_EQ(1U, matches.size());
749   EXPECT_EQ(expected_id, matches[0].url_info.id());
750   matches = url_index_->HistoryItemsForTerms(
751       original_terms, base::string16::npos, kMaxMatches);
752   ASSERT_EQ(0U, matches.size());
753 }
754
755 TEST_F(InMemoryURLIndexTest, NonUniqueTermCharacterSets) {
756   // The presence of duplicate characters should succeed. Exercise by cycling
757   // through a string with several duplicate characters.
758   ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(
759       ASCIIToUTF16("ABRA"), base::string16::npos, kMaxMatches);
760   ASSERT_EQ(1U, matches.size());
761   EXPECT_EQ(28, matches[0].url_info.id());
762   EXPECT_EQ("http://www.ddj.com/windows/184416623",
763             matches[0].url_info.url().spec());
764
765   matches = url_index_->HistoryItemsForTerms(
766       ASCIIToUTF16("ABRACAD"), base::string16::npos, kMaxMatches);
767   ASSERT_EQ(1U, matches.size());
768   EXPECT_EQ(28, matches[0].url_info.id());
769
770   matches = url_index_->HistoryItemsForTerms(
771       ASCIIToUTF16("ABRACADABRA"), base::string16::npos, kMaxMatches);
772   ASSERT_EQ(1U, matches.size());
773   EXPECT_EQ(28, matches[0].url_info.id());
774
775   matches = url_index_->HistoryItemsForTerms(
776       ASCIIToUTF16("ABRACADABR"), base::string16::npos, kMaxMatches);
777   ASSERT_EQ(1U, matches.size());
778   EXPECT_EQ(28, matches[0].url_info.id());
779
780   matches = url_index_->HistoryItemsForTerms(
781       ASCIIToUTF16("ABRACA"), base::string16::npos, kMaxMatches);
782   ASSERT_EQ(1U, matches.size());
783   EXPECT_EQ(28, matches[0].url_info.id());
784 }
785
786 TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) {
787   // Verify that match results for previously typed characters are retained
788   // (in the term_char_word_set_cache_) and reused, if possible, in future
789   // autocompletes.
790
791   URLIndexPrivateData::SearchTermCacheMap& cache(
792       GetPrivateData()->search_term_cache_);
793
794   // The cache should be empty at this point.
795   EXPECT_EQ(0U, cache.size());
796
797   // Now simulate typing search terms into the omnibox and check the state of
798   // the cache as each item is 'typed'.
799
800   // Simulate typing "r" giving "r" in the simulated omnibox. The results for
801   // 'r' will be not cached because it is only 1 character long.
802   url_index_->HistoryItemsForTerms(
803       ASCIIToUTF16("r"), base::string16::npos, kMaxMatches);
804   EXPECT_EQ(0U, cache.size());
805
806   // Simulate typing "re" giving "r re" in the simulated omnibox.
807   // 're' should be cached at this point but not 'r' as it is a single
808   // character.
809   url_index_->HistoryItemsForTerms(
810       ASCIIToUTF16("r re"), base::string16::npos, kMaxMatches);
811   ASSERT_EQ(1U, cache.size());
812   CheckTerm(cache, ASCIIToUTF16("re"));
813
814   // Simulate typing "reco" giving "r re reco" in the simulated omnibox.
815   // 're' and 'reco' should be cached at this point but not 'r' as it is a
816   // single character.
817   url_index_->HistoryItemsForTerms(
818       ASCIIToUTF16("r re reco"), base::string16::npos, kMaxMatches);
819   ASSERT_EQ(2U, cache.size());
820   CheckTerm(cache, ASCIIToUTF16("re"));
821   CheckTerm(cache, ASCIIToUTF16("reco"));
822
823   // Simulate typing "mort".
824   // Since we now have only one search term, the cached results for 're' and
825   // 'reco' should be purged, giving us only 1 item in the cache (for 'mort').
826   url_index_->HistoryItemsForTerms(
827       ASCIIToUTF16("mort"), base::string16::npos, kMaxMatches);
828   ASSERT_EQ(1U, cache.size());
829   CheckTerm(cache, ASCIIToUTF16("mort"));
830
831   // Simulate typing "reco" giving "mort reco" in the simulated omnibox.
832   url_index_->HistoryItemsForTerms(
833       ASCIIToUTF16("mort reco"), base::string16::npos, kMaxMatches);
834   ASSERT_EQ(2U, cache.size());
835   CheckTerm(cache, ASCIIToUTF16("mort"));
836   CheckTerm(cache, ASCIIToUTF16("reco"));
837
838   // Simulate a <DELETE> by removing the 'reco' and adding back the 'rec'.
839   url_index_->HistoryItemsForTerms(
840       ASCIIToUTF16("mort rec"), base::string16::npos, kMaxMatches);
841   ASSERT_EQ(2U, cache.size());
842   CheckTerm(cache, ASCIIToUTF16("mort"));
843   CheckTerm(cache, ASCIIToUTF16("rec"));
844 }
845
846 TEST_F(InMemoryURLIndexTest, AddNewRows) {
847   // Verify that the row we're going to add does not already exist.
848   URLID new_row_id = 87654321;
849   // Newly created URLRows get a last_visit time of 'right now' so it should
850   // qualify as a quick result candidate.
851   EXPECT_TRUE(url_index_->HistoryItemsForTerms(
852       ASCIIToUTF16("brokeandalone"), base::string16::npos, kMaxMatches)
853           .empty());
854
855   // Add a new row.
856   URLRow new_row(GURL("http://www.brokeandaloneinmanitoba.com/"), new_row_id++);
857   new_row.set_last_visit(base::Time::Now());
858   EXPECT_TRUE(UpdateURL(new_row));
859
860   // Verify that we can retrieve it.
861   EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(
862       ASCIIToUTF16("brokeandalone"), base::string16::npos, kMaxMatches).size());
863
864   // Add it again just to be sure that is harmless and that it does not update
865   // the index.
866   EXPECT_FALSE(UpdateURL(new_row));
867   EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(
868       ASCIIToUTF16("brokeandalone"), base::string16::npos, kMaxMatches).size());
869
870   // Make up an URL that does not qualify and try to add it.
871   URLRow unqualified_row(GURL("http://www.brokeandaloneinmanitoba.com/"),
872                          new_row_id++);
873   EXPECT_FALSE(UpdateURL(new_row));
874 }
875
876 TEST_F(InMemoryURLIndexTest, DeleteRows) {
877   ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(
878       ASCIIToUTF16("DrudgeReport"), base::string16::npos, kMaxMatches);
879   ASSERT_EQ(1U, matches.size());
880
881   // Delete the URL then search again.
882   EXPECT_TRUE(DeleteURL(matches[0].url_info.url()));
883   EXPECT_TRUE(url_index_->HistoryItemsForTerms(
884       ASCIIToUTF16("DrudgeReport"), base::string16::npos, kMaxMatches).empty());
885
886   // Make up an URL that does not exist in the database and delete it.
887   GURL url("http://www.hokeypokey.com/putyourrightfootin.html");
888   EXPECT_FALSE(DeleteURL(url));
889 }
890
891 TEST_F(InMemoryURLIndexTest, ExpireRow) {
892   ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(
893       ASCIIToUTF16("DrudgeReport"), base::string16::npos, kMaxMatches);
894   ASSERT_EQ(1U, matches.size());
895
896   // Determine the row id for the result, remember that id, broadcast a
897   // delete notification, then ensure that the row has been deleted.
898   URLsDeletedDetails deleted_details;
899   deleted_details.all_history = false;
900   deleted_details.rows.push_back(matches[0].url_info);
901   Observe(chrome::NOTIFICATION_HISTORY_URLS_DELETED,
902           content::Source<InMemoryURLIndexTest>(this),
903           content::Details<history::HistoryDetails>(&deleted_details));
904   EXPECT_TRUE(url_index_->HistoryItemsForTerms(
905       ASCIIToUTF16("DrudgeReport"), base::string16::npos, kMaxMatches).empty());
906 }
907
908 TEST_F(InMemoryURLIndexTest, WhitelistedURLs) {
909   struct TestData {
910     const std::string url_spec;
911     const bool expected_is_whitelisted;
912   } data[] = {
913     // URLs with whitelisted schemes.
914     { "about:histograms", true },
915     { "chrome://settings", true },
916     { "file://localhost/Users/joeschmoe/sekrets", true },
917     { "ftp://public.mycompany.com/myfile.txt", true },
918     { "http://www.google.com/translate", true },
919     { "https://www.gmail.com/", true },
920     { "mailto:support@google.com", true },
921     // URLs with unacceptable schemes.
922     { "aaa://www.dummyhost.com;frammy", false },
923     { "aaas://www.dummyhost.com;frammy", false },
924     { "acap://suzie@somebody.com", false },
925     { "cap://cal.example.com/Company/Holidays", false },
926     { "cid:foo4*foo1@bar.net", false },
927     { "crid://example.com/foobar", false },
928     { "data:image/png;base64,iVBORw0KGgoAAAANSUhE=", false },
929     { "dict://dict.org/d:shortcake:", false },
930     { "dns://192.168.1.1/ftp.example.org?type=A", false },
931     { "fax:+358.555.1234567", false },
932     { "geo:13.4125,103.8667", false },
933     { "go:Mercedes%20Benz", false },
934     { "gopher://farnsworth.ca:666/gopher", false },
935     { "h323:farmer-john;sixpence", false },
936     { "iax:johnQ@example.com/12022561414", false },
937     { "icap://icap.net/service?mode=translate&lang=french", false },
938     { "im:fred@example.com", false },
939     { "imap://michael@minbari.org/users.*", false },
940     { "info:ddc/22/eng//004.678", false },
941     { "ipp://example.com/printer/fox", false },
942     { "iris:dreg1//example.com/local/myhosts", false },
943     { "iris.beep:dreg1//example.com/local/myhosts", false },
944     { "iris.lws:dreg1//example.com/local/myhosts", false },
945     { "iris.xpc:dreg1//example.com/local/myhosts", false },
946     { "iris.xpcs:dreg1//example.com/local/myhosts", false },
947     { "ldap://ldap.itd.umich.edu/o=University%20of%20Michigan,c=US", false },
948     { "mid:foo4%25foo1@bar.net", false },
949     { "modem:+3585551234567;type=v32b?7e1;type=v110", false },
950     { "msrp://atlanta.example.com:7654/jshA7weztas;tcp", false },
951     { "msrps://atlanta.example.com:7654/jshA7weztas;tcp", false },
952     { "news:colorectal.info.banned", false },
953     { "nfs://server/d/e/f", false },
954     { "nntp://www.example.com:6543/info.comp.lies/1234", false },
955     { "pop://rg;AUTH=+APOP@mail.mycompany.com:8110", false },
956     { "pres:fred@example.com", false },
957     { "prospero://host.dom//pros/name", false },
958     { "rsync://syler@lost.com/Source", false },
959     { "rtsp://media.example.com:554/twister/audiotrack", false },
960     { "service:acap://some.where.net;authentication=KERBEROSV4", false },
961     { "shttp://www.terces.com/secret", false },
962     { "sieve://example.com//script", false },
963     { "sip:+1-212-555-1212:1234@gateway.com;user=phone", false },
964     { "sips:+1-212-555-1212:1234@gateway.com;user=phone", false },
965     { "sms:+15105551212?body=hello%20there", false },
966     { "snmp://tester5@example.com:8161/bridge1;800002b804616263", false },
967     { "soap.beep://stockquoteserver.example.com/StockQuote", false },
968     { "soap.beeps://stockquoteserver.example.com/StockQuote", false },
969     { "tag:blogger.com,1999:blog-555", false },
970     { "tel:+358-555-1234567;postd=pp22", false },
971     { "telnet://mayor_margie:one2rule4All@www.mycity.com:6789/", false },
972     { "tftp://example.com/mystartupfile", false },
973     { "tip://123.123.123.123/?urn:xopen:xid", false },
974     { "tv:nbc.com", false },
975     { "urn:foo:A123,456", false },
976     { "vemmi://zeus.mctel.fr/demo", false },
977     { "wais://www.mydomain.net:8765/mydatabase", false },
978     { "xmpp:node@example.com", false },
979     { "xmpp://guest@example.com", false },
980   };
981
982   URLIndexPrivateData& private_data(*GetPrivateData());
983   const std::set<std::string>& whitelist(scheme_whitelist());
984   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) {
985     GURL url(data[i].url_spec);
986     EXPECT_EQ(data[i].expected_is_whitelisted,
987               private_data.URLSchemeIsWhitelisted(url, whitelist));
988   }
989 }
990
991 TEST_F(InMemoryURLIndexTest, ReadVisitsFromHistory) {
992   const HistoryInfoMap& history_info_map = GetPrivateData()->history_info_map_;
993
994   // Check (for URL with id 1) that the number of visits and their
995   // transition types are what we expect.  We don't bother checking
996   // the timestamps because it's too much trouble.  (The timestamps go
997   // through a transformation in InMemoryURLIndexTest::SetUp().  We
998   // assume that if the count and transitions show up with the right
999   // information, we're getting the right information from the history
1000   // database file.)
1001   HistoryInfoMap::const_iterator entry = history_info_map.find(1);
1002   ASSERT_TRUE(entry != history_info_map.end());
1003   {
1004     const VisitInfoVector& visits = entry->second.visits;
1005     EXPECT_EQ(3u, visits.size());
1006     EXPECT_EQ(0u, visits[0].second);
1007     EXPECT_EQ(1u, visits[1].second);
1008     EXPECT_EQ(0u, visits[2].second);
1009   }
1010
1011   // Ditto but for URL with id 35.
1012   entry = history_info_map.find(35);
1013   ASSERT_TRUE(entry != history_info_map.end());
1014   {
1015     const VisitInfoVector& visits = entry->second.visits;
1016     EXPECT_EQ(2u, visits.size());
1017     EXPECT_EQ(1u, visits[0].second);
1018     EXPECT_EQ(1u, visits[1].second);
1019   }
1020
1021   // The URL with id 32 has many visits listed in the database, but we
1022   // should only read the most recent 10 (which are all transition type 0).
1023   entry = history_info_map.find(32);
1024   ASSERT_TRUE(entry != history_info_map.end());
1025   {
1026     const VisitInfoVector& visits = entry->second.visits;
1027     EXPECT_EQ(10u, visits.size());
1028     for (size_t i = 0; i < visits.size(); ++i)
1029       EXPECT_EQ(0u, visits[i].second);
1030   }
1031 }
1032
1033 TEST_F(InMemoryURLIndexTest, CacheSaveRestore) {
1034   base::ScopedTempDir temp_directory;
1035   ASSERT_TRUE(temp_directory.CreateUniqueTempDir());
1036   set_history_dir(temp_directory.path());
1037
1038   URLIndexPrivateData& private_data(*GetPrivateData());
1039
1040   // Ensure that there is really something there to be saved.
1041   EXPECT_FALSE(private_data.word_list_.empty());
1042   // available_words_ will already be empty since we have freshly built the
1043   // data set for this test.
1044   EXPECT_TRUE(private_data.available_words_.empty());
1045   EXPECT_FALSE(private_data.word_map_.empty());
1046   EXPECT_FALSE(private_data.char_word_map_.empty());
1047   EXPECT_FALSE(private_data.word_id_history_map_.empty());
1048   EXPECT_FALSE(private_data.history_id_word_map_.empty());
1049   EXPECT_FALSE(private_data.history_info_map_.empty());
1050   EXPECT_FALSE(private_data.word_starts_map_.empty());
1051
1052   // Make sure the data we have was built from history.  (Version 0
1053   // means rebuilt from history.)
1054   EXPECT_EQ(0, private_data.restored_cache_version_);
1055
1056   // Capture the current private data for later comparison to restored data.
1057   scoped_refptr<URLIndexPrivateData> old_data(private_data.Duplicate());
1058   const base::Time rebuild_time = private_data.last_time_rebuilt_from_history_;
1059
1060   {
1061     // Save then restore our private data.
1062     base::RunLoop run_loop;
1063     CacheFileSaverObserver save_observer(run_loop.QuitClosure());
1064     url_index_->set_save_cache_observer(&save_observer);
1065     PostSaveToCacheFileTask();
1066     run_loop.Run();
1067     EXPECT_TRUE(save_observer.succeeded());
1068   }
1069
1070   // Clear and then prove it's clear before restoring.
1071   ClearPrivateData();
1072   EXPECT_TRUE(private_data.word_list_.empty());
1073   EXPECT_TRUE(private_data.available_words_.empty());
1074   EXPECT_TRUE(private_data.word_map_.empty());
1075   EXPECT_TRUE(private_data.char_word_map_.empty());
1076   EXPECT_TRUE(private_data.word_id_history_map_.empty());
1077   EXPECT_TRUE(private_data.history_id_word_map_.empty());
1078   EXPECT_TRUE(private_data.history_info_map_.empty());
1079   EXPECT_TRUE(private_data.word_starts_map_.empty());
1080
1081   {
1082     base::RunLoop run_loop;
1083     HistoryIndexRestoreObserver restore_observer(run_loop.QuitClosure());
1084     url_index_->set_restore_cache_observer(&restore_observer);
1085     PostRestoreFromCacheFileTask();
1086     run_loop.Run();
1087     EXPECT_TRUE(restore_observer.succeeded());
1088   }
1089
1090   URLIndexPrivateData& new_data(*GetPrivateData());
1091
1092   // Make sure the data we have was reloaded from cache.  (Version 0
1093   // means rebuilt from history; anything else means restored from
1094   // a cache version.)  Also, the rebuild time should not have changed.
1095   EXPECT_GT(new_data.restored_cache_version_, 0);
1096   EXPECT_EQ(rebuild_time, new_data.last_time_rebuilt_from_history_);
1097
1098   // Compare the captured and restored for equality.
1099   ExpectPrivateDataEqual(*old_data.get(), new_data);
1100 }
1101
1102 #if defined(OS_WIN)
1103 // http://crbug.com/351500
1104 #define MAYBE_RebuildFromHistoryIfCacheOld DISABLED_RebuildFromHistoryIfCacheOld
1105 #else
1106 #define MAYBE_RebuildFromHistoryIfCacheOld RebuildFromHistoryIfCacheOld
1107 #endif
1108 TEST_F(InMemoryURLIndexTest, MAYBE_RebuildFromHistoryIfCacheOld) {
1109   base::ScopedTempDir temp_directory;
1110   ASSERT_TRUE(temp_directory.CreateUniqueTempDir());
1111   set_history_dir(temp_directory.path());
1112
1113   URLIndexPrivateData& private_data(*GetPrivateData());
1114
1115   // Ensure that there is really something there to be saved.
1116   EXPECT_FALSE(private_data.word_list_.empty());
1117   // available_words_ will already be empty since we have freshly built the
1118   // data set for this test.
1119   EXPECT_TRUE(private_data.available_words_.empty());
1120   EXPECT_FALSE(private_data.word_map_.empty());
1121   EXPECT_FALSE(private_data.char_word_map_.empty());
1122   EXPECT_FALSE(private_data.word_id_history_map_.empty());
1123   EXPECT_FALSE(private_data.history_id_word_map_.empty());
1124   EXPECT_FALSE(private_data.history_info_map_.empty());
1125   EXPECT_FALSE(private_data.word_starts_map_.empty());
1126
1127   // Make sure the data we have was built from history.  (Version 0
1128   // means rebuilt from history.)
1129   EXPECT_EQ(0, private_data.restored_cache_version_);
1130
1131   // Overwrite the build time so that we'll think the data is too old
1132   // and rebuild the cache from history.
1133   const base::Time fake_rebuild_time =
1134       private_data.last_time_rebuilt_from_history_ -
1135       base::TimeDelta::FromDays(30);
1136   private_data.last_time_rebuilt_from_history_ = fake_rebuild_time;
1137
1138   // Capture the current private data for later comparison to restored data.
1139   scoped_refptr<URLIndexPrivateData> old_data(private_data.Duplicate());
1140
1141   {
1142     // Save then restore our private data.
1143     base::RunLoop run_loop;
1144     CacheFileSaverObserver save_observer(run_loop.QuitClosure());
1145     url_index_->set_save_cache_observer(&save_observer);
1146     PostSaveToCacheFileTask();
1147     run_loop.Run();
1148     EXPECT_TRUE(save_observer.succeeded());
1149   }
1150
1151   // Clear and then prove it's clear before restoring.
1152   ClearPrivateData();
1153   EXPECT_TRUE(private_data.word_list_.empty());
1154   EXPECT_TRUE(private_data.available_words_.empty());
1155   EXPECT_TRUE(private_data.word_map_.empty());
1156   EXPECT_TRUE(private_data.char_word_map_.empty());
1157   EXPECT_TRUE(private_data.word_id_history_map_.empty());
1158   EXPECT_TRUE(private_data.history_id_word_map_.empty());
1159   EXPECT_TRUE(private_data.history_info_map_.empty());
1160   EXPECT_TRUE(private_data.word_starts_map_.empty());
1161
1162   {
1163     base::RunLoop run_loop;
1164     HistoryIndexRestoreObserver restore_observer(run_loop.QuitClosure());
1165     url_index_->set_restore_cache_observer(&restore_observer);
1166     PostRestoreFromCacheFileTask();
1167     run_loop.Run();
1168     EXPECT_TRUE(restore_observer.succeeded());
1169   }
1170
1171   URLIndexPrivateData& new_data(*GetPrivateData());
1172
1173   // Make sure the data we have was rebuilt from history.  (Version 0
1174   // means rebuilt from history; anything else means restored from
1175   // a cache version.)
1176   EXPECT_EQ(0, new_data.restored_cache_version_);
1177   EXPECT_NE(fake_rebuild_time, new_data.last_time_rebuilt_from_history_);
1178
1179   // Compare the captured and restored for equality.
1180   ExpectPrivateDataEqual(*old_data.get(), new_data);
1181 }
1182
1183 class InMemoryURLIndexCacheTest : public testing::Test {
1184  public:
1185   InMemoryURLIndexCacheTest() {}
1186
1187  protected:
1188   virtual void SetUp() OVERRIDE;
1189
1190   // Pass-through functions to simplify our friendship with InMemoryURLIndex.
1191   void set_history_dir(const base::FilePath& dir_path);
1192   bool GetCacheFilePath(base::FilePath* file_path) const;
1193
1194   base::ScopedTempDir temp_dir_;
1195   scoped_ptr<InMemoryURLIndex> url_index_;
1196 };
1197
1198 void InMemoryURLIndexCacheTest::SetUp() {
1199   ASSERT_TRUE(temp_dir_.CreateUniqueTempDir());
1200   HistoryClient history_client;
1201   base::FilePath path(temp_dir_.path());
1202   url_index_.reset(new InMemoryURLIndex(
1203       NULL, path, "en,ja,hi,zh", &history_client));
1204 }
1205
1206 void InMemoryURLIndexCacheTest::set_history_dir(
1207     const base::FilePath& dir_path) {
1208   return url_index_->set_history_dir(dir_path);
1209 }
1210
1211 bool InMemoryURLIndexCacheTest::GetCacheFilePath(
1212     base::FilePath* file_path) const {
1213   DCHECK(file_path);
1214   return url_index_->GetCacheFilePath(file_path);
1215 }
1216
1217 TEST_F(InMemoryURLIndexCacheTest, CacheFilePath) {
1218   base::FilePath expectedPath =
1219       temp_dir_.path().Append(FILE_PATH_LITERAL("History Provider Cache"));
1220   std::vector<base::FilePath::StringType> expected_parts;
1221   expectedPath.GetComponents(&expected_parts);
1222   base::FilePath full_file_path;
1223   ASSERT_TRUE(GetCacheFilePath(&full_file_path));
1224   std::vector<base::FilePath::StringType> actual_parts;
1225   full_file_path.GetComponents(&actual_parts);
1226   ASSERT_EQ(expected_parts.size(), actual_parts.size());
1227   size_t count = expected_parts.size();
1228   for (size_t i = 0; i < count; ++i)
1229     EXPECT_EQ(expected_parts[i], actual_parts[i]);
1230   // Must clear the history_dir_ to satisfy the dtor's DCHECK.
1231   set_history_dir(base::FilePath());
1232 }
1233
1234 }  // namespace history