1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/history/typed_url_syncable_service.h"
7 #include "base/auto_reset.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram.h"
10 #include "base/strings/utf_string_conversions.h"
11 #include "chrome/browser/history/history_backend.h"
12 #include "net/base/net_util.h"
13 #include "sync/protocol/sync.pb.h"
14 #include "sync/protocol/typed_url_specifics.pb.h"
18 // The server backend can't handle arbitrarily large node sizes, so to keep
19 // the size under control we limit the visit array.
20 static const int kMaxTypedUrlVisits = 100;
22 // There's no limit on how many visits the history DB could have for a given
23 // typed URL, so we limit how many we fetch from the DB to avoid crashes due to
24 // running out of memory (http://crbug.com/89793). This value is different
25 // from kMaxTypedUrlVisits, as some of the visits fetched from the DB may be
26 // RELOAD visits, which will be stripped.
27 static const int kMaxVisitsToFetch = 1000;
29 // This is the threshold at which we start throttling sync updates for typed
30 // URLs - any URLs with a typed_count >= this threshold will be throttled.
31 static const int kTypedUrlVisitThrottleThreshold = 10;
33 // This is the multiple we use when throttling sync updates. If the multiple is
34 // N, we sync up every Nth update (i.e. when typed_count % N == 0).
35 static const int kTypedUrlVisitThrottleMultiple = 10;
41 const char kTypedUrlTag[] = "google_chrome_typed_urls";
43 static bool CheckVisitOrdering(const VisitVector& visits) {
44 int64 previous_visit_time = 0;
45 for (VisitVector::const_iterator visit = visits.begin();
46 visit != visits.end(); ++visit) {
47 if (visit != visits.begin()) {
48 // We allow duplicate visits here - they shouldn't really be allowed, but
49 // they still seem to show up sometimes and we haven't figured out the
50 // source, so we just log an error instead of failing an assertion.
51 // (http://crbug.com/91473).
52 if (previous_visit_time == visit->visit_time.ToInternalValue())
53 DVLOG(1) << "Duplicate visit time encountered";
54 else if (previous_visit_time > visit->visit_time.ToInternalValue())
58 previous_visit_time = visit->visit_time.ToInternalValue();
63 TypedUrlSyncableService::TypedUrlSyncableService(
64 HistoryBackend* history_backend)
65 : history_backend_(history_backend),
66 processing_syncer_changes_(false),
67 expected_loop_(base::MessageLoop::current()) {
68 DCHECK(history_backend_);
69 DCHECK(expected_loop_ == base::MessageLoop::current());
72 TypedUrlSyncableService::~TypedUrlSyncableService() {
73 DCHECK(expected_loop_ == base::MessageLoop::current());
76 syncer::SyncMergeResult TypedUrlSyncableService::MergeDataAndStartSyncing(
77 syncer::ModelType type,
78 const syncer::SyncDataList& initial_sync_data,
79 scoped_ptr<syncer::SyncChangeProcessor> sync_processor,
80 scoped_ptr<syncer::SyncErrorFactory> error_handler) {
81 DCHECK(expected_loop_ == base::MessageLoop::current());
82 DCHECK(!sync_processor_.get());
83 DCHECK(sync_processor.get());
84 DCHECK(error_handler.get());
85 DCHECK_EQ(type, syncer::TYPED_URLS);
87 syncer::SyncMergeResult merge_result(type);
88 sync_processor_ = sync_processor.Pass();
89 sync_error_handler_ = error_handler.Pass();
91 // TODO(mgist): Add implementation
96 void TypedUrlSyncableService::StopSyncing(syncer::ModelType type) {
97 DCHECK(expected_loop_ == base::MessageLoop::current());
98 DCHECK_EQ(type, syncer::TYPED_URLS);
100 sync_processor_.reset();
101 sync_error_handler_.reset();
104 syncer::SyncDataList TypedUrlSyncableService::GetAllSyncData(
105 syncer::ModelType type) const {
106 DCHECK(expected_loop_ == base::MessageLoop::current());
107 syncer::SyncDataList list;
109 // TODO(mgist): Add implementation
114 syncer::SyncError TypedUrlSyncableService::ProcessSyncChanges(
115 const tracked_objects::Location& from_here,
116 const syncer::SyncChangeList& change_list) {
117 DCHECK(expected_loop_ == base::MessageLoop::current());
119 // TODO(mgist): Add implementation
121 return syncer::SyncError(FROM_HERE,
122 syncer::SyncError::DATATYPE_ERROR,
123 "Typed url syncable service is not implemented.",
127 void TypedUrlSyncableService::OnUrlsModified(URLRows* changed_urls) {
128 DCHECK(expected_loop_ == base::MessageLoop::current());
129 DCHECK(changed_urls);
131 if (processing_syncer_changes_)
132 return; // These are changes originating from us, ignore.
133 if (!sync_processor_.get())
134 return; // Sync processor not yet initialized, don't sync.
136 // Create SyncChangeList.
137 syncer::SyncChangeList changes;
139 for (URLRows::iterator url = changed_urls->begin();
140 url != changed_urls->end(); ++url) {
141 // Only care if the modified URL is typed.
142 if (url->typed_count() > 0) {
143 // If there were any errors updating the sync node, just ignore them and
144 // continue on to process the next URL.
145 CreateOrUpdateSyncNode(*url, &changes);
149 // Send SyncChangeList to server if there are any changes.
150 if (changes.size() > 0)
151 sync_processor_->ProcessSyncChanges(FROM_HERE, changes);
154 void TypedUrlSyncableService::OnUrlVisited(content::PageTransition transition,
156 DCHECK(expected_loop_ == base::MessageLoop::current());
159 if (processing_syncer_changes_)
160 return; // These are changes originating from us, ignore.
161 if (!sync_processor_.get())
162 return; // Sync processor not yet initialized, don't sync.
163 if (!ShouldSyncVisit(transition, row))
166 // Create SyncChangeList.
167 syncer::SyncChangeList changes;
169 CreateOrUpdateSyncNode(*row, &changes);
171 // Send SyncChangeList to server if there are any changes.
172 if (changes.size() > 0)
173 sync_processor_->ProcessSyncChanges(FROM_HERE, changes);
176 void TypedUrlSyncableService::OnUrlsDeleted(bool all_history,
179 DCHECK(expected_loop_ == base::MessageLoop::current());
181 if (processing_syncer_changes_)
182 return; // These are changes originating from us, ignore.
183 if (!sync_processor_.get())
184 return; // Sync processor not yet initialized, don't sync.
186 // Ignore archivals (we don't want to sync them as deletions, to avoid
187 // extra traffic up to the server, and also to make sure that a client with
188 // a bad clock setting won't go on an archival rampage and delete all
189 // history from every client). The server will gracefully age out the sync DB
190 // entries when they've been idle for long enough.
194 // Create SyncChangeList.
195 syncer::SyncChangeList changes;
198 // Delete all synced typed urls.
199 for (std::set<GURL>::const_iterator url = synced_typed_urls_.begin();
200 url != synced_typed_urls_.end(); ++url) {
203 AddTypedUrlToChangeList(syncer::SyncChange::ACTION_DELETE,
204 row, visits, url->spec(), &changes);
206 // Clear cache of server state.
207 synced_typed_urls_.clear();
211 for (URLRows::const_iterator row = rows->begin();
212 row != rows->end(); ++row) {
213 // Add specifics to change list for all synced urls that were deleted.
214 if (synced_typed_urls_.find(row->url()) != synced_typed_urls_.end()) {
216 AddTypedUrlToChangeList(syncer::SyncChange::ACTION_DELETE,
217 *row, visits, row->url().spec(), &changes);
218 // Delete typed url from cache.
219 synced_typed_urls_.erase(row->url());
224 // Send SyncChangeList to server if there are any changes.
225 if (changes.size() > 0)
226 sync_processor_->ProcessSyncChanges(FROM_HERE, changes);
229 bool TypedUrlSyncableService::ShouldIgnoreUrl(const GURL& url) {
230 // Ignore empty URLs. Not sure how this can happen (maybe import from other
231 // busted browsers, or misuse of the history API, or just plain bugs) but we
232 // can't deal with them.
233 if (url.spec().empty())
236 // Ignore local file URLs.
237 if (url.SchemeIsFile())
240 // Ignore localhost URLs.
241 if (net::IsLocalhost(url.host()))
247 bool TypedUrlSyncableService::ShouldSyncVisit(
248 content::PageTransition page_transition,
252 int typed_count = row->typed_count();
253 content::PageTransition transition = static_cast<content::PageTransition>(
254 page_transition & content::PAGE_TRANSITION_CORE_MASK);
256 // Just use an ad-hoc criteria to determine whether to ignore this
257 // notification. For most users, the distribution of visits is roughly a bell
258 // curve with a long tail - there are lots of URLs with < 5 visits so we want
259 // to make sure we sync up every visit to ensure the proper ordering of
260 // suggestions. But there are relatively few URLs with > 10 visits, and those
261 // tend to be more broadly distributed such that there's no need to sync up
262 // every visit to preserve their relative ordering.
263 return (transition == content::PAGE_TRANSITION_TYPED &&
265 (typed_count < kTypedUrlVisitThrottleThreshold ||
266 (typed_count % kTypedUrlVisitThrottleMultiple) == 0));
269 bool TypedUrlSyncableService::CreateOrUpdateSyncNode(
271 syncer::SyncChangeList* changes) {
272 DCHECK_GT(url.typed_count(), 0);
274 if (ShouldIgnoreUrl(url.url()))
277 // Get the visits for this node.
278 VisitVector visit_vector;
279 if (!FixupURLAndGetVisits(&url, &visit_vector)) {
280 DLOG(ERROR) << "Could not load visits for url: " << url.url();
283 DCHECK(!visit_vector.empty());
285 std::string title = url.url().spec();
286 syncer::SyncChange::SyncChangeType change_type;
288 // If server already has URL, then send a sync update, else add it.
290 (synced_typed_urls_.find(url.url()) != synced_typed_urls_.end()) ?
291 syncer::SyncChange::ACTION_UPDATE :
292 syncer::SyncChange::ACTION_ADD;
294 // Ensure cache of server state is up to date.
295 synced_typed_urls_.insert(url.url());
297 AddTypedUrlToChangeList(change_type, url, visit_vector, title, changes);
302 void TypedUrlSyncableService::AddTypedUrlToChangeList(
303 syncer::SyncChange::SyncChangeType change_type,
305 const VisitVector& visits,
307 syncer::SyncChangeList* change_list) {
308 sync_pb::EntitySpecifics entity_specifics;
309 sync_pb::TypedUrlSpecifics* typed_url = entity_specifics.mutable_typed_url();
311 if (change_type == syncer::SyncChange::ACTION_DELETE) {
312 typed_url->set_url(row.url().spec());
314 WriteToTypedUrlSpecifics(row, visits, typed_url);
317 change_list->push_back(
318 syncer::SyncChange(FROM_HERE, change_type,
319 syncer::SyncData::CreateLocalData(
320 kTypedUrlTag, title, entity_specifics)));
323 void TypedUrlSyncableService::WriteToTypedUrlSpecifics(
325 const VisitVector& visits,
326 sync_pb::TypedUrlSpecifics* typed_url) {
328 DCHECK(!url.last_visit().is_null());
329 DCHECK(!visits.empty());
330 DCHECK_EQ(url.last_visit().ToInternalValue(),
331 visits.back().visit_time.ToInternalValue());
333 typed_url->set_url(url.url().spec());
334 typed_url->set_title(UTF16ToUTF8(url.title()));
335 typed_url->set_hidden(url.hidden());
337 DCHECK(CheckVisitOrdering(visits));
339 bool only_typed = false;
342 if (visits.size() > static_cast<size_t>(kMaxTypedUrlVisits)) {
345 // Walk the passed-in visit vector and count the # of typed visits.
346 for (VisitVector::const_iterator visit = visits.begin();
347 visit != visits.end(); ++visit) {
348 content::PageTransition transition = content::PageTransitionFromInt(
349 visit->transition & content::PAGE_TRANSITION_CORE_MASK);
350 // We ignore reload visits.
351 if (transition == content::PAGE_TRANSITION_RELOAD)
354 if (transition == content::PAGE_TRANSITION_TYPED)
357 // We should have at least one typed visit. This can sometimes happen if
358 // the history DB has an inaccurate count for some reason (there's been
359 // bugs in the history code in the past which has left users in the wild
360 // with incorrect counts - http://crbug.com/84258).
361 DCHECK(typed_count > 0);
363 if (typed_count > kMaxTypedUrlVisits) {
365 skip_count = typed_count - kMaxTypedUrlVisits;
366 } else if (total > kMaxTypedUrlVisits) {
367 skip_count = total - kMaxTypedUrlVisits;
371 for (VisitVector::const_iterator visit = visits.begin();
372 visit != visits.end(); ++visit) {
373 content::PageTransition transition = content::PageTransitionFromInt(
374 visit->transition & content::PAGE_TRANSITION_CORE_MASK);
375 // Skip reload visits.
376 if (transition == content::PAGE_TRANSITION_RELOAD)
379 // If we only have room for typed visits, then only add typed visits.
380 if (only_typed && transition != content::PAGE_TRANSITION_TYPED)
383 if (skip_count > 0) {
384 // We have too many entries to fit, so we need to skip the oldest ones.
385 // Only skip typed URLs if there are too many typed URLs to fit.
386 if (only_typed || transition != content::PAGE_TRANSITION_TYPED) {
391 typed_url->add_visits(visit->visit_time.ToInternalValue());
392 typed_url->add_visit_transitions(visit->transition);
394 DCHECK_EQ(skip_count, 0);
396 if (typed_url->visits_size() == 0) {
397 // If we get here, it's because we don't actually have any TYPED visits
398 // even though the visit's typed_count > 0 (corrupted typed_count). So
399 // let's go ahead and add a RELOAD visit at the most recent visit since
400 // it's not legal to have an empty visit array (yet another workaround
401 // for http://crbug.com/84258).
402 typed_url->add_visits(url.last_visit().ToInternalValue());
403 typed_url->add_visit_transitions(content::PAGE_TRANSITION_RELOAD);
405 CHECK_GT(typed_url->visits_size(), 0);
406 CHECK_LE(typed_url->visits_size(), kMaxTypedUrlVisits);
407 CHECK_EQ(typed_url->visits_size(), typed_url->visit_transitions_size());
410 bool TypedUrlSyncableService::FixupURLAndGetVisits(
412 VisitVector* visits) {
414 CHECK(history_backend_);
415 if (!history_backend_->GetMostRecentVisitsForURL(
416 url->id(), kMaxVisitsToFetch, visits)) {
421 // Sometimes (due to a bug elsewhere in the history or sync code, or due to
422 // a crash between adding a URL to the history database and updating the
423 // visit DB) the visit vector for a URL can be empty. If this happens, just
424 // create a new visit whose timestamp is the same as the last_visit time.
425 // This is a workaround for http://crbug.com/84258.
426 if (visits->empty()) {
427 DVLOG(1) << "Found empty visits for URL: " << url->url();
429 url->id(), url->last_visit(), 0, content::PAGE_TRANSITION_TYPED, 0);
430 visits->push_back(visit);
433 // GetMostRecentVisitsForURL() returns the data in the opposite order that
434 // we need it, so reverse it.
435 std::reverse(visits->begin(), visits->end());
437 // Sometimes, the last_visit field in the URL doesn't match the timestamp of
438 // the last visit in our visit array (they come from different tables, so
439 // crashes/bugs can cause them to mismatch), so just set it here.
440 url->set_last_visit(visits->back().visit_time);
441 DCHECK(CheckVisitOrdering(*visits));
445 } // namespace history