1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/history/url_database.h"
12 #include "base/i18n/case_conversion.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "chrome/common/url_constants.h"
15 #include "net/base/net_util.h"
16 #include "sql/statement.h"
17 #include "ui/base/l10n/l10n_util.h"
22 const char URLDatabase::kURLRowFields[] = HISTORY_URL_ROW_FIELDS;
23 const int URLDatabase::kNumURLRowFields = 9;
25 URLDatabase::URLEnumeratorBase::URLEnumeratorBase()
26 : initialized_(false) {
29 URLDatabase::URLEnumeratorBase::~URLEnumeratorBase() {
32 URLDatabase::URLEnumerator::URLEnumerator() {
35 bool URLDatabase::URLEnumerator::GetNextURL(URLRow* r) {
36 if (statement_.Step()) {
37 FillURLRow(statement_, r);
43 URLDatabase::URLDatabase()
44 : has_keyword_search_terms_(false) {
47 URLDatabase::~URLDatabase() {
51 std::string URLDatabase::GURLToDatabaseURL(const GURL& gurl) {
52 // TODO(brettw): do something fancy here with encoding, etc.
54 // Strip username and password from URL before sending to DB.
55 GURL::Replacements replacements;
56 replacements.ClearUsername();
57 replacements.ClearPassword();
59 return (gurl.ReplaceComponents(replacements)).spec();
62 // Convenience to fill a history::URLRow. Must be in sync with the fields in
64 void URLDatabase::FillURLRow(sql::Statement& s, history::URLRow* i) {
66 i->id_ = s.ColumnInt64(0);
67 i->url_ = GURL(s.ColumnString(1));
68 i->title_ = s.ColumnString16(2);
69 i->visit_count_ = s.ColumnInt(3);
70 i->typed_count_ = s.ColumnInt(4);
71 i->last_visit_ = base::Time::FromInternalValue(s.ColumnInt64(5));
72 i->hidden_ = s.ColumnInt(6) != 0;
75 bool URLDatabase::GetURLRow(URLID url_id, URLRow* info) {
76 // TODO(brettw) We need check for empty URLs to handle the case where
77 // there are old URLs in the database that are empty that got in before
78 // we added any checks. We should eventually be able to remove it
79 // when all inputs are using GURL (which prohibit empty input).
80 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
81 "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE id=?"));
82 statement.BindInt64(0, url_id);
84 if (statement.Step()) {
85 FillURLRow(statement, info);
91 bool URLDatabase::GetAllTypedUrls(URLRows* urls) {
92 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
93 "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE typed_count > 0"));
95 while (statement.Step()) {
97 FillURLRow(statement, &info);
98 urls->push_back(info);
103 URLID URLDatabase::GetRowForURL(const GURL& url, history::URLRow* info) {
104 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
105 "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE url=?"));
106 std::string url_string = GURLToDatabaseURL(url);
107 statement.BindString(0, url_string);
109 if (!statement.Step())
113 FillURLRow(statement, info);
114 return statement.ColumnInt64(0);
117 bool URLDatabase::UpdateURLRow(URLID url_id,
118 const history::URLRow& info) {
119 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
120 "UPDATE urls SET title=?,visit_count=?,typed_count=?,last_visit_time=?,"
123 statement.BindString16(0, info.title());
124 statement.BindInt(1, info.visit_count());
125 statement.BindInt(2, info.typed_count());
126 statement.BindInt64(3, info.last_visit().ToInternalValue());
127 statement.BindInt(4, info.hidden() ? 1 : 0);
128 statement.BindInt64(5, url_id);
130 return statement.Run();
133 URLID URLDatabase::AddURLInternal(const history::URLRow& info,
135 // This function is used to insert into two different tables, so we have to
136 // do some shuffling. Unfortinately, we can't use the macro
137 // HISTORY_URL_ROW_FIELDS because that specifies the table name which is
138 // invalid in the insert syntax.
139 #define ADDURL_COMMON_SUFFIX \
140 " (url, title, visit_count, typed_count, "\
141 "last_visit_time, hidden) "\
142 "VALUES (?,?,?,?,?,?)"
143 const char* statement_name;
144 const char* statement_sql;
146 statement_name = "AddURLTemporary";
147 statement_sql = "INSERT INTO temp_urls" ADDURL_COMMON_SUFFIX;
149 statement_name = "AddURL";
150 statement_sql = "INSERT INTO urls" ADDURL_COMMON_SUFFIX;
152 #undef ADDURL_COMMON_SUFFIX
154 sql::Statement statement(GetDB().GetCachedStatement(
155 sql::StatementID(statement_name), statement_sql));
156 statement.BindString(0, GURLToDatabaseURL(info.url()));
157 statement.BindString16(1, info.title());
158 statement.BindInt(2, info.visit_count());
159 statement.BindInt(3, info.typed_count());
160 statement.BindInt64(4, info.last_visit().ToInternalValue());
161 statement.BindInt(5, info.hidden() ? 1 : 0);
163 if (!statement.Run()) {
164 VLOG(0) << "Failed to add url " << info.url().possibly_invalid_spec()
165 << " to table history.urls.";
168 return GetDB().GetLastInsertRowId();
171 bool URLDatabase::DeleteURLRow(URLID id) {
172 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
173 "DELETE FROM urls WHERE id = ?"));
174 statement.BindInt64(0, id);
176 if (!statement.Run())
179 // And delete any keyword visits.
180 return !has_keyword_search_terms_ || DeleteKeywordSearchTermForURL(id);
183 bool URLDatabase::CreateTemporaryURLTable() {
184 return CreateURLTable(true);
187 bool URLDatabase::CommitTemporaryURLTable() {
188 // See the comments in the header file as well as
189 // HistoryBackend::DeleteAllHistory() for more information on how this works
190 // and why it does what it does.
192 // Note that the main database overrides this to additionally create the
193 // supplimentary indices that the archived database doesn't need.
195 // Swap the url table out and replace it with the temporary one.
196 if (!GetDB().Execute("DROP TABLE urls")) {
197 NOTREACHED() << GetDB().GetErrorMessage();
200 if (!GetDB().Execute("ALTER TABLE temp_urls RENAME TO urls")) {
201 NOTREACHED() << GetDB().GetErrorMessage();
205 // Create the index over URLs. This is needed for the main, in-memory, and
206 // archived databases, so we always do it. The supplimentary indices used by
207 // the main database are not created here. When deleting all history, they
208 // are created by HistoryDatabase::RecreateAllButStarAndURLTables().
209 CreateMainURLIndex();
214 bool URLDatabase::InitURLEnumeratorForEverything(URLEnumerator* enumerator) {
215 DCHECK(!enumerator->initialized_);
216 std::string sql("SELECT ");
217 sql.append(kURLRowFields);
218 sql.append(" FROM urls");
219 enumerator->statement_.Assign(GetDB().GetUniqueStatement(sql.c_str()));
220 enumerator->initialized_ = enumerator->statement_.is_valid();
221 return enumerator->statement_.is_valid();
224 bool URLDatabase::InitURLEnumeratorForSignificant(URLEnumerator* enumerator) {
225 DCHECK(!enumerator->initialized_);
226 std::string sql("SELECT ");
227 sql.append(kURLRowFields);
228 sql.append(" FROM urls WHERE last_visit_time >= ? OR visit_count >= ? OR "
230 enumerator->statement_.Assign(GetDB().GetUniqueStatement(sql.c_str()));
231 enumerator->statement_.BindInt64(
232 0, AutocompleteAgeThreshold().ToInternalValue());
233 enumerator->statement_.BindInt(1, kLowQualityMatchVisitLimit);
234 enumerator->statement_.BindInt(2, kLowQualityMatchTypedLimit);
235 enumerator->initialized_ = enumerator->statement_.is_valid();
236 return enumerator->statement_.is_valid();
239 bool URLDatabase::AutocompleteForPrefix(const std::string& prefix,
243 // NOTE: this query originally sorted by starred as the second parameter. But
244 // as bookmarks is no longer part of the db we no longer include the order
250 sql = "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls "
251 "WHERE url >= ? AND url < ? AND hidden = 0 AND typed_count > 0 "
252 "ORDER BY typed_count DESC, visit_count DESC, last_visit_time DESC "
256 sql = "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls "
257 "WHERE url >= ? AND url < ? AND hidden = 0 "
258 "ORDER BY typed_count DESC, visit_count DESC, last_visit_time DESC "
262 sql::Statement statement(
263 GetDB().GetCachedStatement(sql::StatementID(__FILE__, line), sql));
265 // We will find all strings between "prefix" and this string, which is prefix
266 // followed by the maximum character size. Use 8-bit strings for everything
267 // so we can be sure sqlite is comparing everything in 8-bit mode. Otherwise,
268 // it will have to convert strings either to UTF-8 or UTF-16 for comparison.
269 std::string end_query(prefix);
270 end_query.push_back(std::numeric_limits<unsigned char>::max());
272 statement.BindString(0, prefix);
273 statement.BindString(1, end_query);
274 statement.BindInt(2, static_cast<int>(max_results));
276 while (statement.Step()) {
277 history::URLRow info;
278 FillURLRow(statement, &info);
279 if (info.url().is_valid())
280 results->push_back(info);
282 return !results->empty();
285 bool URLDatabase::IsTypedHost(const std::string& host) {
286 const char* schemes[] = {
287 content::kHttpScheme,
288 content::kHttpsScheme,
292 for (size_t i = 0; i < arraysize(schemes); ++i) {
293 std::string scheme_and_host(schemes[i]);
294 scheme_and_host += content::kStandardSchemeSeparator + host;
295 if (AutocompleteForPrefix(scheme_and_host + '/', 1, true, &dummy) ||
296 AutocompleteForPrefix(scheme_and_host + ':', 1, true, &dummy))
302 bool URLDatabase::FindShortestURLFromBase(const std::string& base,
303 const std::string& url,
307 history::URLRow* info) {
308 // Select URLs that start with |base| and are prefixes of |url|. All parts
309 // of this query except the substr() call can be done using the index. We
310 // could do this query with a couple of LIKE or GLOB statements as well, but
311 // those wouldn't use the index, and would run into problems with "wildcard"
312 // characters that appear in URLs (% for LIKE, or *, ? for GLOB).
313 std::string sql("SELECT ");
314 sql.append(kURLRowFields);
315 sql.append(" FROM urls WHERE url ");
316 sql.append(allow_base ? ">=" : ">");
317 sql.append(" ? AND url < :end AND url = substr(:end, 1, length(url)) "
318 "AND hidden = 0 AND visit_count >= ? AND typed_count >= ? "
319 "ORDER BY url LIMIT 1");
320 sql::Statement statement(GetDB().GetUniqueStatement(sql.c_str()));
321 statement.BindString(0, base);
322 statement.BindString(1, url); // :end
323 statement.BindInt(2, min_visits);
324 statement.BindInt(3, min_typed);
326 if (!statement.Step())
330 FillURLRow(statement, info);
334 bool URLDatabase::GetTextMatches(const string16& query,
336 ScopedVector<QueryNode> query_nodes;
337 query_parser_.ParseQueryNodes(query, &query_nodes.get());
340 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
341 "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE hidden = 0"));
343 while (statement.Step()) {
344 std::vector<QueryWord> query_words;
345 string16 url = base::i18n::ToLower(statement.ColumnString16(1));
346 query_parser_.ExtractQueryWords(url, &query_words);
348 if (gurl.is_valid()) {
349 // Decode punycode to match IDN.
350 // |query_words| won't be shown to user - therefore we can use empty
351 // |languages| to reduce dependency (no need to call PrefService).
352 string16 ascii = base::ASCIIToUTF16(gurl.host());
353 string16 utf = net::IDNToUnicode(gurl.host(), std::string());
355 query_parser_.ExtractQueryWords(utf, &query_words);
357 string16 title = base::i18n::ToLower(statement.ColumnString16(2));
358 query_parser_.ExtractQueryWords(title, &query_words);
360 if (query_parser_.DoesQueryMatch(query_words, query_nodes.get())) {
361 history::URLResult info;
362 FillURLRow(statement, &info);
363 if (info.url().is_valid())
364 results->push_back(info);
367 return !results->empty();
370 bool URLDatabase::InitKeywordSearchTermsTable() {
371 has_keyword_search_terms_ = true;
372 if (!GetDB().DoesTableExist("keyword_search_terms")) {
373 if (!GetDB().Execute("CREATE TABLE keyword_search_terms ("
374 "keyword_id INTEGER NOT NULL," // ID of the TemplateURL.
375 "url_id INTEGER NOT NULL," // ID of the url.
376 "lower_term LONGVARCHAR NOT NULL," // The search term, in lower case.
377 "term LONGVARCHAR NOT NULL)")) // The actual search term.
383 bool URLDatabase::CreateKeywordSearchTermsIndices() {
385 if (!GetDB().Execute(
386 "CREATE INDEX IF NOT EXISTS keyword_search_terms_index1 ON "
387 "keyword_search_terms (keyword_id, lower_term)")) {
392 if (!GetDB().Execute(
393 "CREATE INDEX IF NOT EXISTS keyword_search_terms_index2 ON "
394 "keyword_search_terms (url_id)")) {
398 // For query or deletion by term.
399 if (!GetDB().Execute(
400 "CREATE INDEX IF NOT EXISTS keyword_search_terms_index3 ON "
401 "keyword_search_terms (term)")) {
407 bool URLDatabase::DropKeywordSearchTermsTable() {
408 // This will implicitly delete the indices over the table.
409 return GetDB().Execute("DROP TABLE keyword_search_terms");
412 bool URLDatabase::SetKeywordSearchTermsForURL(URLID url_id,
413 TemplateURLID keyword_id,
414 const string16& term) {
415 DCHECK(url_id && keyword_id && !term.empty());
417 sql::Statement exist_statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
418 "SELECT term FROM keyword_search_terms "
419 "WHERE keyword_id = ? AND url_id = ?"));
420 exist_statement.BindInt64(0, keyword_id);
421 exist_statement.BindInt64(1, url_id);
423 if (exist_statement.Step())
424 return true; // Term already exists, no need to add it.
426 if (!exist_statement.Succeeded())
429 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
430 "INSERT INTO keyword_search_terms (keyword_id, url_id, lower_term, term) "
431 "VALUES (?,?,?,?)"));
432 statement.BindInt64(0, keyword_id);
433 statement.BindInt64(1, url_id);
434 statement.BindString16(2, base::i18n::ToLower(term));
435 statement.BindString16(3, term);
436 return statement.Run();
439 bool URLDatabase::GetKeywordSearchTermRow(URLID url_id,
440 KeywordSearchTermRow* row) {
442 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
443 "SELECT keyword_id, term FROM keyword_search_terms WHERE url_id=?"));
444 statement.BindInt64(0, url_id);
446 if (!statement.Step())
450 row->url_id = url_id;
451 row->keyword_id = statement.ColumnInt64(0);
452 row->term = statement.ColumnString16(1);
457 bool URLDatabase::GetKeywordSearchTermRows(
458 const string16& term,
459 std::vector<KeywordSearchTermRow>* rows) {
460 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
461 "SELECT keyword_id, url_id FROM keyword_search_terms WHERE term=?"));
462 statement.BindString16(0, term);
464 if (!statement.is_valid())
467 while (statement.Step()) {
468 KeywordSearchTermRow row;
469 row.url_id = statement.ColumnInt64(1);
470 row.keyword_id = statement.ColumnInt64(0);
472 rows->push_back(row);
477 void URLDatabase::DeleteAllSearchTermsForKeyword(
478 TemplateURLID keyword_id) {
480 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
481 "DELETE FROM keyword_search_terms WHERE keyword_id=?"));
482 statement.BindInt64(0, keyword_id);
487 void URLDatabase::GetMostRecentKeywordSearchTerms(
488 TemplateURLID keyword_id,
489 const string16& prefix,
491 std::vector<KeywordSearchTermVisit>* matches) {
492 // NOTE: the keyword_id can be zero if on first run the user does a query
493 // before the TemplateURLService has finished loading. As the chances of this
494 // occurring are small, we ignore it.
498 DCHECK(!prefix.empty());
499 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
500 "SELECT DISTINCT kv.term, u.visit_count, u.last_visit_time "
501 "FROM keyword_search_terms kv "
502 "JOIN urls u ON kv.url_id = u.id "
503 "WHERE kv.keyword_id = ? AND kv.lower_term >= ? AND kv.lower_term < ? "
504 "ORDER BY u.last_visit_time DESC LIMIT ?"));
506 // NOTE: Keep this ToLower() call in sync with search_provider.cc.
507 string16 lower_prefix = base::i18n::ToLower(prefix);
508 // This magic gives us a prefix search.
509 string16 next_prefix = lower_prefix;
510 next_prefix[next_prefix.size() - 1] =
511 next_prefix[next_prefix.size() - 1] + 1;
512 statement.BindInt64(0, keyword_id);
513 statement.BindString16(1, lower_prefix);
514 statement.BindString16(2, next_prefix);
515 statement.BindInt(3, max_count);
517 KeywordSearchTermVisit visit;
518 while (statement.Step()) {
519 visit.term = statement.ColumnString16(0);
520 visit.visits = statement.ColumnInt(1);
521 visit.time = base::Time::FromInternalValue(statement.ColumnInt64(2));
522 matches->push_back(visit);
526 bool URLDatabase::DeleteKeywordSearchTerm(const string16& term) {
527 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
528 "DELETE FROM keyword_search_terms WHERE term=?"));
529 statement.BindString16(0, term);
531 return statement.Run();
534 bool URLDatabase::DeleteKeywordSearchTermForURL(URLID url_id) {
535 sql::Statement statement(GetDB().GetCachedStatement(
536 SQL_FROM_HERE, "DELETE FROM keyword_search_terms WHERE url_id=?"));
537 statement.BindInt64(0, url_id);
538 return statement.Run();
541 bool URLDatabase::DropStarredIDFromURLs() {
542 if (!GetDB().DoesColumnExist("urls", "starred_id"))
543 return true; // urls is already updated, no need to continue.
545 // Create a temporary table to contain the new URLs table.
546 if (!CreateTemporaryURLTable()) {
551 // Copy the contents.
552 if (!GetDB().Execute(
553 "INSERT INTO temp_urls (id, url, title, visit_count, typed_count, "
554 "last_visit_time, hidden, favicon_id) "
555 "SELECT id, url, title, visit_count, typed_count, last_visit_time, "
556 "hidden, favicon_id FROM urls")) {
557 NOTREACHED() << GetDB().GetErrorMessage();
561 // Rename/commit the tmp table.
562 CommitTemporaryURLTable();
567 bool URLDatabase::CreateURLTable(bool is_temporary) {
568 const char* name = is_temporary ? "temp_urls" : "urls";
569 if (GetDB().DoesTableExist(name))
573 sql.append("CREATE TABLE ");
576 "id INTEGER PRIMARY KEY,"
579 "visit_count INTEGER DEFAULT 0 NOT NULL,"
580 "typed_count INTEGER DEFAULT 0 NOT NULL,"
581 "last_visit_time INTEGER NOT NULL,"
582 "hidden INTEGER DEFAULT 0 NOT NULL,"
583 "favicon_id INTEGER DEFAULT 0 NOT NULL)"); // favicon_id is not used now.
585 return GetDB().Execute(sql.c_str());
588 bool URLDatabase::CreateMainURLIndex() {
589 // Index over URLs so we can quickly look up based on URL.
590 return GetDB().Execute(
591 "CREATE INDEX IF NOT EXISTS urls_url_index ON urls (url)");
594 } // namespace history