1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/history/visit_database.h"
12 #include "base/logging.h"
13 #include "base/strings/string_number_conversions.h"
14 #include "chrome/browser/history/url_database.h"
15 #include "chrome/browser/history/visit_filter.h"
16 #include "chrome/common/url_constants.h"
17 #include "content/public/common/page_transition_types.h"
18 #include "sql/statement.h"
22 VisitDatabase::VisitDatabase() {
25 VisitDatabase::~VisitDatabase() {
28 bool VisitDatabase::InitVisitTable() {
29 if (!GetDB().DoesTableExist("visits")) {
30 if (!GetDB().Execute("CREATE TABLE visits("
31 "id INTEGER PRIMARY KEY,"
32 "url INTEGER NOT NULL," // key of the URL this corresponds to
33 "visit_time INTEGER NOT NULL,"
35 "transition INTEGER DEFAULT 0 NOT NULL,"
37 // Some old DBs may have an "is_indexed" field here, but this is no
38 // longer used and should NOT be read or written from any longer.
39 "visit_duration INTEGER DEFAULT 0 NOT NULL)"))
43 // Visit source table contains the source information for all the visits. To
44 // save space, we do not record those user browsed visits which would be the
45 // majority in this table. Only other sources are recorded.
46 // Due to the tight relationship between visit_source and visits table, they
47 // should be created and dropped at the same time.
48 if (!GetDB().DoesTableExist("visit_source")) {
49 if (!GetDB().Execute("CREATE TABLE visit_source("
50 "id INTEGER PRIMARY KEY,source INTEGER NOT NULL)"))
54 // Index over url so we can quickly find visits for a page.
56 "CREATE INDEX IF NOT EXISTS visits_url_index ON visits (url)"))
59 // Create an index over from visits so that we can efficiently find
60 // referrers and redirects.
62 "CREATE INDEX IF NOT EXISTS visits_from_index ON "
63 "visits (from_visit)"))
66 // Create an index over time so that we can efficiently find the visits in a
67 // given time range (most history views are time-based).
69 "CREATE INDEX IF NOT EXISTS visits_time_index ON "
70 "visits (visit_time)"))
76 bool VisitDatabase::DropVisitTable() {
77 // This will also drop the indices over the table.
79 GetDB().Execute("DROP TABLE IF EXISTS visit_source") &&
80 GetDB().Execute("DROP TABLE visits");
83 // Must be in sync with HISTORY_VISIT_ROW_FIELDS.
85 void VisitDatabase::FillVisitRow(sql::Statement& statement, VisitRow* visit) {
86 visit->visit_id = statement.ColumnInt64(0);
87 visit->url_id = statement.ColumnInt64(1);
88 visit->visit_time = base::Time::FromInternalValue(statement.ColumnInt64(2));
89 visit->referring_visit = statement.ColumnInt64(3);
90 visit->transition = content::PageTransitionFromInt(statement.ColumnInt(4));
91 visit->segment_id = statement.ColumnInt64(5);
92 visit->visit_duration =
93 base::TimeDelta::FromInternalValue(statement.ColumnInt64(6));
97 bool VisitDatabase::FillVisitVector(sql::Statement& statement,
98 VisitVector* visits) {
99 if (!statement.is_valid())
102 while (statement.Step()) {
103 history::VisitRow visit;
104 FillVisitRow(statement, &visit);
105 visits->push_back(visit);
108 return statement.Succeeded();
112 bool VisitDatabase::FillVisitVectorWithOptions(sql::Statement& statement,
113 const QueryOptions& options,
114 VisitVector* visits) {
115 std::set<URLID> found_urls;
117 // Keeps track of the day that |found_urls| is holding the URLs for, in order
118 // to handle removing per-day duplicates.
119 base::Time found_urls_midnight;
121 while (statement.Step()) {
123 FillVisitRow(statement, &visit);
125 if (options.duplicate_policy != QueryOptions::KEEP_ALL_DUPLICATES) {
126 if (options.duplicate_policy == QueryOptions::REMOVE_DUPLICATES_PER_DAY &&
127 found_urls_midnight != visit.visit_time.LocalMidnight()) {
129 found_urls_midnight = visit.visit_time.LocalMidnight();
131 // Make sure the URL this visit corresponds to is unique.
132 if (found_urls.find(visit.url_id) != found_urls.end())
134 found_urls.insert(visit.url_id);
137 if (static_cast<int>(visits->size()) >= options.EffectiveMaxCount())
139 visits->push_back(visit);
144 VisitID VisitDatabase::AddVisit(VisitRow* visit, VisitSource source) {
145 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
146 "INSERT INTO visits "
147 "(url, visit_time, from_visit, transition, segment_id, "
148 "visit_duration) VALUES (?,?,?,?,?,?)"));
149 statement.BindInt64(0, visit->url_id);
150 statement.BindInt64(1, visit->visit_time.ToInternalValue());
151 statement.BindInt64(2, visit->referring_visit);
152 statement.BindInt64(3, visit->transition);
153 statement.BindInt64(4, visit->segment_id);
154 statement.BindInt64(5, visit->visit_duration.ToInternalValue());
156 if (!statement.Run()) {
157 VLOG(0) << "Failed to execute visit insert statement: "
158 << "url_id = " << visit->url_id;
162 visit->visit_id = GetDB().GetLastInsertRowId();
164 if (source != SOURCE_BROWSED) {
165 // Record the source of this visit when it is not browsed.
166 sql::Statement statement1(GetDB().GetCachedStatement(SQL_FROM_HERE,
167 "INSERT INTO visit_source (id, source) VALUES (?,?)"));
168 statement1.BindInt64(0, visit->visit_id);
169 statement1.BindInt64(1, source);
171 if (!statement1.Run()) {
172 VLOG(0) << "Failed to execute visit_source insert statement: "
173 << "id = " << visit->visit_id;
178 return visit->visit_id;
181 void VisitDatabase::DeleteVisit(const VisitRow& visit) {
182 // Patch around this visit. Any visits that this went to will now have their
183 // "source" be the deleted visit's source.
184 sql::Statement update_chain(GetDB().GetCachedStatement(SQL_FROM_HERE,
185 "UPDATE visits SET from_visit=? WHERE from_visit=?"));
186 update_chain.BindInt64(0, visit.referring_visit);
187 update_chain.BindInt64(1, visit.visit_id);
188 if (!update_chain.Run())
191 // Now delete the actual visit.
192 sql::Statement del(GetDB().GetCachedStatement(SQL_FROM_HERE,
193 "DELETE FROM visits WHERE id=?"));
194 del.BindInt64(0, visit.visit_id);
198 // Try to delete the entry in visit_source table as well.
199 // If the visit was browsed, there is no corresponding entry in visit_source
200 // table, and nothing will be deleted.
201 del.Assign(GetDB().GetCachedStatement(SQL_FROM_HERE,
202 "DELETE FROM visit_source WHERE id=?"));
203 del.BindInt64(0, visit.visit_id);
207 bool VisitDatabase::GetRowForVisit(VisitID visit_id, VisitRow* out_visit) {
208 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
209 "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits WHERE id=?"));
210 statement.BindInt64(0, visit_id);
212 if (!statement.Step())
215 FillVisitRow(statement, out_visit);
217 // We got a different visit than we asked for, something is wrong.
218 DCHECK_EQ(visit_id, out_visit->visit_id);
219 if (visit_id != out_visit->visit_id)
225 bool VisitDatabase::UpdateVisitRow(const VisitRow& visit) {
226 // Don't store inconsistent data to the database.
227 DCHECK_NE(visit.visit_id, visit.referring_visit);
228 if (visit.visit_id == visit.referring_visit)
231 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
233 "url=?,visit_time=?,from_visit=?,transition=?,segment_id=?,"
234 "visit_duration=? WHERE id=?"));
235 statement.BindInt64(0, visit.url_id);
236 statement.BindInt64(1, visit.visit_time.ToInternalValue());
237 statement.BindInt64(2, visit.referring_visit);
238 statement.BindInt64(3, visit.transition);
239 statement.BindInt64(4, visit.segment_id);
240 statement.BindInt64(5, visit.visit_duration.ToInternalValue());
241 statement.BindInt64(6, visit.visit_id);
243 return statement.Run();
246 bool VisitDatabase::GetVisitsForURL(URLID url_id, VisitVector* visits) {
249 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
250 "SELECT" HISTORY_VISIT_ROW_FIELDS
253 "ORDER BY visit_time ASC"));
254 statement.BindInt64(0, url_id);
255 return FillVisitVector(statement, visits);
258 bool VisitDatabase::GetVisibleVisitsForURL(URLID url_id,
259 const QueryOptions& options,
260 VisitVector* visits) {
263 if (options.REMOVE_ALL_DUPLICATES) {
265 VisitID visit_id = GetMostRecentVisitForURL(url_id, &visit_row);
266 if (visit_id && options.EffectiveMaxCount() != 0) {
267 visits->push_back(visit_row);
269 return options.EffectiveMaxCount() == 0 && visit_id;
271 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
272 "SELECT" HISTORY_VISIT_ROW_FIELDS
274 "WHERE url=? AND visit_time >= ? AND visit_time < ? "
275 "AND (transition & ?) != 0 " // CHAIN_END
276 "AND (transition & ?) NOT IN (?, ?, ?) " // NO SUBFRAME or
278 "ORDER BY visit_time DESC"));
279 statement.BindInt64(0, url_id);
280 statement.BindInt64(1, options.EffectiveBeginTime());
281 statement.BindInt64(2, options.EffectiveEndTime());
282 statement.BindInt(3, content::PAGE_TRANSITION_CHAIN_END);
283 statement.BindInt(4, content::PAGE_TRANSITION_CORE_MASK);
284 statement.BindInt(5, content::PAGE_TRANSITION_AUTO_SUBFRAME);
285 statement.BindInt(6, content::PAGE_TRANSITION_MANUAL_SUBFRAME);
286 statement.BindInt(7, content::PAGE_TRANSITION_KEYWORD_GENERATED);
288 return FillVisitVectorWithOptions(statement, options, visits);
292 bool VisitDatabase::GetVisitsForTimes(const std::vector<base::Time>& times,
293 VisitVector* visits) {
296 for (std::vector<base::Time>::const_iterator it = times.begin();
297 it != times.end(); ++it) {
298 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
299 "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
300 "WHERE visit_time == ?"));
302 statement.BindInt64(0, it->ToInternalValue());
304 if (!FillVisitVector(statement, visits))
310 bool VisitDatabase::GetAllVisitsInRange(base::Time begin_time,
313 VisitVector* visits) {
316 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
317 "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
318 "WHERE visit_time >= ? AND visit_time < ?"
319 "ORDER BY visit_time LIMIT ?"));
321 // See GetVisibleVisitsInRange for more info on how these times are bound.
322 int64 end = end_time.ToInternalValue();
323 statement.BindInt64(0, begin_time.ToInternalValue());
324 statement.BindInt64(1, end ? end : std::numeric_limits<int64>::max());
325 statement.BindInt64(2,
326 max_results ? max_results : std::numeric_limits<int64>::max());
328 return FillVisitVector(statement, visits);
331 bool VisitDatabase::GetVisitsInRangeForTransition(
332 base::Time begin_time,
335 content::PageTransition transition,
336 VisitVector* visits) {
340 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
341 "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
342 "WHERE visit_time >= ? AND visit_time < ? "
343 "AND (transition & ?) == ?"
344 "ORDER BY visit_time LIMIT ?"));
346 // See GetVisibleVisitsInRange for more info on how these times are bound.
347 int64 end = end_time.ToInternalValue();
348 statement.BindInt64(0, begin_time.ToInternalValue());
349 statement.BindInt64(1, end ? end : std::numeric_limits<int64>::max());
350 statement.BindInt(2, content::PAGE_TRANSITION_CORE_MASK);
351 statement.BindInt(3, transition);
352 statement.BindInt64(4,
353 max_results ? max_results : std::numeric_limits<int64>::max());
355 return FillVisitVector(statement, visits);
358 bool VisitDatabase::GetVisibleVisitsInRange(const QueryOptions& options,
359 VisitVector* visits) {
361 // The visit_time values can be duplicated in a redirect chain, so we sort
362 // by id too, to ensure a consistent ordering just in case.
363 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
364 "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
365 "WHERE visit_time >= ? AND visit_time < ? "
366 "AND (transition & ?) != 0 " // CHAIN_END
367 "AND (transition & ?) NOT IN (?, ?, ?) " // NO SUBFRAME or
369 "ORDER BY visit_time DESC, id DESC"));
371 statement.BindInt64(0, options.EffectiveBeginTime());
372 statement.BindInt64(1, options.EffectiveEndTime());
373 statement.BindInt(2, content::PAGE_TRANSITION_CHAIN_END);
374 statement.BindInt(3, content::PAGE_TRANSITION_CORE_MASK);
375 statement.BindInt(4, content::PAGE_TRANSITION_AUTO_SUBFRAME);
376 statement.BindInt(5, content::PAGE_TRANSITION_MANUAL_SUBFRAME);
377 statement.BindInt(6, content::PAGE_TRANSITION_KEYWORD_GENERATED);
379 return FillVisitVectorWithOptions(statement, options, visits);
382 void VisitDatabase::GetDirectVisitsDuringTimes(const VisitFilter& time_filter,
384 VisitVector* visits) {
387 visits->reserve(max_results);
388 for (VisitFilter::TimeVector::const_iterator it = time_filter.times().begin();
389 it != time_filter.times().end(); ++it) {
390 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
391 "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
392 "WHERE visit_time >= ? AND visit_time < ? "
393 "AND (transition & ?) != 0 " // CHAIN_START
394 "AND (transition & ?) IN (?, ?) " // TYPED or AUTO_BOOKMARK only
395 "ORDER BY visit_time DESC, id DESC"));
397 statement.BindInt64(0, it->first.ToInternalValue());
398 statement.BindInt64(1, it->second.ToInternalValue());
399 statement.BindInt(2, content::PAGE_TRANSITION_CHAIN_START);
400 statement.BindInt(3, content::PAGE_TRANSITION_CORE_MASK);
401 statement.BindInt(4, content::PAGE_TRANSITION_TYPED);
402 statement.BindInt(5, content::PAGE_TRANSITION_AUTO_BOOKMARK);
404 while (statement.Step()) {
406 FillVisitRow(statement, &visit);
407 visits->push_back(visit);
409 if (max_results > 0 && static_cast<int>(visits->size()) >= max_results)
415 VisitID VisitDatabase::GetMostRecentVisitForURL(URLID url_id,
416 VisitRow* visit_row) {
417 // The visit_time values can be duplicated in a redirect chain, so we sort
418 // by id too, to ensure a consistent ordering just in case.
419 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
420 "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
422 "ORDER BY visit_time DESC, id DESC "
424 statement.BindInt64(0, url_id);
425 if (!statement.Step())
426 return 0; // No visits for this URL.
429 FillVisitRow(statement, visit_row);
430 return visit_row->visit_id;
432 return statement.ColumnInt64(0);
435 bool VisitDatabase::GetMostRecentVisitsForURL(URLID url_id,
437 VisitVector* visits) {
440 // The visit_time values can be duplicated in a redirect chain, so we sort
441 // by id too, to ensure a consistent ordering just in case.
442 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
443 "SELECT" HISTORY_VISIT_ROW_FIELDS
446 "ORDER BY visit_time DESC, id DESC "
448 statement.BindInt64(0, url_id);
449 statement.BindInt(1, max_results);
451 return FillVisitVector(statement, visits);
454 bool VisitDatabase::GetRedirectFromVisit(VisitID from_visit,
457 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
459 "FROM visits v JOIN urls u ON v.url = u.id "
460 "WHERE v.from_visit = ? "
461 "AND (v.transition & ?) != 0")); // IS_REDIRECT_MASK
462 statement.BindInt64(0, from_visit);
463 statement.BindInt(1, content::PAGE_TRANSITION_IS_REDIRECT_MASK);
465 if (!statement.Step())
466 return false; // No redirect from this visit. (Or SQL error)
468 *to_visit = statement.ColumnInt64(0);
470 *to_url = GURL(statement.ColumnString(1));
474 bool VisitDatabase::GetRedirectToVisit(VisitID to_visit,
478 if (!GetRowForVisit(to_visit, &row))
482 *from_visit = row.referring_visit;
485 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
487 "FROM visits v JOIN urls u ON v.url = u.id "
489 statement.BindInt64(0, row.referring_visit);
491 if (!statement.Step())
494 *from_url = GURL(statement.ColumnString(0));
499 bool VisitDatabase::GetVisibleVisitCountToHost(const GURL& url,
501 base::Time* first_visit) {
502 if (!url.SchemeIs(content::kHttpScheme) &&
503 !url.SchemeIs(content::kHttpsScheme))
506 // We need to search for URLs with a matching host/port. One way to query for
507 // this is to use the LIKE operator, eg 'url LIKE http://google.com/%'. This
508 // is inefficient though in that it doesn't use the index and each entry must
509 // be visited. The same query can be executed by using >= and < operator.
510 // The query becomes:
511 // 'url >= http://google.com/' and url < http://google.com0'.
512 // 0 is used as it is one character greater than '/'.
513 const std::string host_query_min = url.GetOrigin().spec();
514 if (host_query_min.empty())
517 // We also want to restrict ourselves to main frame navigations that are not
518 // in the middle of redirect chains, hence the transition checks.
519 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
520 "SELECT MIN(v.visit_time), COUNT(*) "
521 "FROM visits v INNER JOIN urls u ON v.url = u.id "
522 "WHERE u.url >= ? AND u.url < ? "
523 "AND (transition & ?) != 0 "
524 "AND (transition & ?) NOT IN (?, ?, ?)"));
525 statement.BindString(0, host_query_min);
526 statement.BindString(1,
527 host_query_min.substr(0, host_query_min.size() - 1) + '0');
528 statement.BindInt(2, content::PAGE_TRANSITION_CHAIN_END);
529 statement.BindInt(3, content::PAGE_TRANSITION_CORE_MASK);
530 statement.BindInt(4, content::PAGE_TRANSITION_AUTO_SUBFRAME);
531 statement.BindInt(5, content::PAGE_TRANSITION_MANUAL_SUBFRAME);
532 statement.BindInt(6, content::PAGE_TRANSITION_KEYWORD_GENERATED);
534 if (!statement.Step()) {
535 // We've never been to this page before.
540 if (!statement.Succeeded())
543 *first_visit = base::Time::FromInternalValue(statement.ColumnInt64(0));
544 *count = statement.ColumnInt(1);
548 bool VisitDatabase::GetStartDate(base::Time* first_visit) {
549 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
550 "SELECT MIN(visit_time) FROM visits WHERE visit_time != 0"));
551 if (!statement.Step() || statement.ColumnInt64(0) == 0) {
552 *first_visit = base::Time::Now();
555 *first_visit = base::Time::FromInternalValue(statement.ColumnInt64(0));
559 void VisitDatabase::GetVisitsSource(const VisitVector& visits,
560 VisitSourceMap* sources) {
564 // We query the source in batch. Here defines the batch size.
565 const size_t batch_size = 500;
566 size_t visits_size = visits.size();
568 size_t start_index = 0, end_index = 0;
569 while (end_index < visits_size) {
570 start_index = end_index;
571 end_index = end_index + batch_size < visits_size ? end_index + batch_size
574 // Compose the sql statement with a list of ids.
575 std::string sql = "SELECT id,source FROM visit_source ";
576 sql.append("WHERE id IN (");
577 // Append all the ids in the statement.
578 for (size_t j = start_index; j < end_index; j++) {
579 if (j != start_index)
581 sql.append(base::Int64ToString(visits[j].visit_id));
583 sql.append(") ORDER BY id");
584 sql::Statement statement(GetDB().GetUniqueStatement(sql.c_str()));
586 // Get the source entries out of the query result.
587 while (statement.Step()) {
588 std::pair<VisitID, VisitSource> source_entry(statement.ColumnInt64(0),
589 static_cast<VisitSource>(statement.ColumnInt(1)));
590 sources->insert(source_entry);
595 bool VisitDatabase::MigrateVisitsWithoutDuration() {
596 if (!GetDB().DoesTableExist("visits")) {
597 NOTREACHED() << " Visits table should exist before migration";
601 if (!GetDB().DoesColumnExist("visits", "visit_duration")) {
602 // Old versions don't have the visit_duration column, we modify the table
603 // to add that field.
604 if (!GetDB().Execute("ALTER TABLE visits "
605 "ADD COLUMN visit_duration INTEGER DEFAULT 0 NOT NULL"))
611 void VisitDatabase::GetBriefVisitInfoOfMostRecentVisits(
613 std::vector<BriefVisitInfo>* result_vector) {
614 result_vector->clear();
616 sql::Statement statement(GetDB().GetUniqueStatement(
617 "SELECT url,visit_time,transition FROM visits "
618 "ORDER BY id DESC LIMIT ?"));
620 statement.BindInt64(0, max_visits);
622 if (!statement.is_valid())
625 while (statement.Step()) {
627 info.url_id = statement.ColumnInt64(0);
628 info.time = base::Time::FromInternalValue(statement.ColumnInt64(1));
629 info.transition = content::PageTransitionFromInt(statement.ColumnInt(2));
630 result_vector->push_back(info);
634 } // namespace history