src/chrome/browser/sync/glue/typed_url_model_associator.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "chrome/browser/sync/glue/typed_url_model_associator.h"
   6
   7 #include <algorithm>
   8 #include <set>
   9
  10 #include "base/location.h"
  11 #include "base/logging.h"
  12 #include "base/metrics/histogram.h"
  13 #include "base/strings/utf_string_conversions.h"
  14 #include "chrome/browser/history/history_backend.h"
  15 #include "chrome/browser/sync/profile_sync_service.h"
  16 #include "content/public/browser/browser_thread.h"
  17 #include "net/base/net_util.h"
  18 #include "sync/api/sync_error.h"
  19 #include "sync/internal_api/public/read_node.h"
  20 #include "sync/internal_api/public/read_transaction.h"
  21 #include "sync/internal_api/public/write_node.h"
  22 #include "sync/internal_api/public/write_transaction.h"
  23 #include "sync/protocol/typed_url_specifics.pb.h"
  24
  25 using content::BrowserThread;
  26
  27 namespace browser_sync {
  28
  29 // The server backend can't handle arbitrarily large node sizes, so to keep
  30 // the size under control we limit the visit array.
  31 static const int kMaxTypedUrlVisits = 100;
  32
  33 // There's no limit on how many visits the history DB could have for a given
  34 // typed URL, so we limit how many we fetch from the DB to avoid crashes due to
  35 // running out of memory (http://crbug.com/89793). This value is different
  36 // from kMaxTypedUrlVisits, as some of the visits fetched from the DB may be
  37 // RELOAD visits, which will be stripped.
  38 static const int kMaxVisitsToFetch = 1000;
  39
  40 const char kTypedUrlTag[] = "google_chrome_typed_urls";
  41
  42 static bool CheckVisitOrdering(const history::VisitVector& visits) {
  43   int64 previous_visit_time = 0;
  44   for (history::VisitVector::const_iterator visit = visits.begin();
  45        visit != visits.end(); ++visit) {
  46     if (visit != visits.begin()) {
  47       // We allow duplicate visits here - they shouldn't really be allowed, but
  48       // they still seem to show up sometimes and we haven't figured out the
  49       // source, so we just log an error instead of failing an assertion.
  50       // (http://crbug.com/91473).
  51       if (previous_visit_time == visit->visit_time.ToInternalValue())
  52         DVLOG(1) << "Duplicate visit time encountered";
  53       else if (previous_visit_time > visit->visit_time.ToInternalValue())
  54         return false;
  55     }
  56
  57     previous_visit_time = visit->visit_time.ToInternalValue();
  58   }
  59   return true;
  60 }
  61
  62 TypedUrlModelAssociator::TypedUrlModelAssociator(
  63     ProfileSyncService* sync_service,
  64     history::HistoryBackend* history_backend,
  65     DataTypeErrorHandler* error_handler)
  66     : sync_service_(sync_service),
  67       history_backend_(history_backend),
  68       expected_loop_(base::MessageLoop::current()),
  69       abort_requested_(false),
  70       error_handler_(error_handler),
  71       num_db_accesses_(0),
  72       num_db_errors_(0) {
  73   DCHECK(sync_service_);
  74   // history_backend_ may be null for unit tests (since it's not mockable).
  75   DCHECK(!BrowserThread::CurrentlyOn(BrowserThread::UI));
  76 }
  77
  78 TypedUrlModelAssociator::~TypedUrlModelAssociator() {}
  79
  80
  81 bool TypedUrlModelAssociator::FixupURLAndGetVisits(
  82     history::URLRow* url,
  83     history::VisitVector* visits) {
  84   ++num_db_accesses_;
  85   CHECK(history_backend_);
  86   if (!history_backend_->GetMostRecentVisitsForURL(
  87           url->id(), kMaxVisitsToFetch, visits)) {
  88     ++num_db_errors_;
  89     return false;
  90   }
  91
  92   // Sometimes (due to a bug elsewhere in the history or sync code, or due to
  93   // a crash between adding a URL to the history database and updating the
  94   // visit DB) the visit vector for a URL can be empty. If this happens, just
  95   // create a new visit whose timestamp is the same as the last_visit time.
  96   // This is a workaround for http://crbug.com/84258.
  97   if (visits->empty()) {
  98     DVLOG(1) << "Found empty visits for URL: " << url->url();
  99     history::VisitRow visit(
 100         url->id(), url->last_visit(), 0, content::PAGE_TRANSITION_TYPED, 0);
 101     visits->push_back(visit);
 102   }
 103
 104   // GetMostRecentVisitsForURL() returns the data in the opposite order that
 105   // we need it, so reverse it.
 106   std::reverse(visits->begin(), visits->end());
 107
 108   // Sometimes, the last_visit field in the URL doesn't match the timestamp of
 109   // the last visit in our visit array (they come from different tables, so
 110   // crashes/bugs can cause them to mismatch), so just set it here.
 111   url->set_last_visit(visits->back().visit_time);
 112   DCHECK(CheckVisitOrdering(*visits));
 113   return true;
 114 }
 115
 116 bool TypedUrlModelAssociator::ShouldIgnoreUrl(const GURL& url) {
 117   // Ignore empty URLs. Not sure how this can happen (maybe import from other
 118   // busted browsers, or misuse of the history API, or just plain bugs) but we
 119   // can't deal with them.
 120   if (url.spec().empty())
 121     return true;
 122
 123   // Ignore local file URLs.
 124   if (url.SchemeIsFile())
 125     return true;
 126
 127   // Ignore localhost URLs.
 128   if (net::IsLocalhost(url.host()))
 129     return true;
 130
 131   return false;
 132 }
 133
 134 bool TypedUrlModelAssociator::ShouldIgnoreVisits(
 135     const history::VisitVector& visits) {
 136   // We ignore URLs that were imported, but have never been visited by
 137   // chromium.
 138   static const int kLastImportedSource = history::SOURCE_EXTENSION;
 139   history::VisitSourceMap map;
 140   if (!history_backend_->GetVisitsSource(visits, &map))
 141     return false;  // If we can't read the visit, assume it's not imported.
 142
 143   // Walk the list of visits and look for a non-imported item.
 144   for (history::VisitVector::const_iterator it = visits.begin();
 145        it != visits.end(); ++it) {
 146     if (map.count(it->visit_id) == 0 ||
 147         map[it->visit_id] <= kLastImportedSource) {
 148       return false;
 149     }
 150   }
 151   // We only saw imported visits, so tell the caller to ignore them.
 152   return true;
 153 }
 154
 155 syncer::SyncError TypedUrlModelAssociator::AssociateModels(
 156     syncer::SyncMergeResult* local_merge_result,
 157     syncer::SyncMergeResult* syncer_merge_result) {
 158   ClearErrorStats();
 159   syncer::SyncError error = DoAssociateModels();
 160   UMA_HISTOGRAM_PERCENTAGE("Sync.TypedUrlModelAssociationErrors",
 161                            GetErrorPercentage());
 162   ClearErrorStats();
 163   return error;
 164 }
 165
 166 void TypedUrlModelAssociator::ClearErrorStats() {
 167   num_db_accesses_ = 0;
 168   num_db_errors_ = 0;
 169 }
 170
 171 int TypedUrlModelAssociator::GetErrorPercentage() const {
 172   return num_db_accesses_ ? (100 * num_db_errors_ / num_db_accesses_) : 0;
 173 }
 174
 175 syncer::SyncError TypedUrlModelAssociator::DoAssociateModels() {
 176   DVLOG(1) << "Associating TypedUrl Models";
 177   DCHECK(expected_loop_ == base::MessageLoop::current());
 178
 179   history::URLRows typed_urls;
 180   ++num_db_accesses_;
 181   bool query_succeeded =
 182       history_backend_ && history_backend_->GetAllTypedURLs(&typed_urls);
 183
 184   history::URLRows new_urls;
 185   TypedUrlVisitVector new_visits;
 186   TypedUrlUpdateVector updated_urls;
 187   {
 188     base::AutoLock au(abort_lock_);
 189     if (abort_requested_) {
 190       return syncer::SyncError(FROM_HERE,
 191                                syncer::SyncError::DATATYPE_ERROR,
 192                                "Association was aborted.",
 193                                model_type());
 194     }
 195
 196     // Must lock and check first to make sure |error_handler_| is valid.
 197     if (!query_succeeded) {
 198       ++num_db_errors_;
 199       return error_handler_->CreateAndUploadError(
 200           FROM_HERE,
 201           "Could not get the typed_url entries.",
 202           model_type());
 203     }
 204
 205     // Get all the visits.
 206     std::map<history::URLID, history::VisitVector> visit_vectors;
 207     for (history::URLRows::iterator ix = typed_urls.begin();
 208          ix != typed_urls.end();) {
 209       DCHECK_EQ(0U, visit_vectors.count(ix->id()));
 210       if (!FixupURLAndGetVisits(&(*ix), &(visit_vectors[ix->id()])) ||
 211           ShouldIgnoreUrl(ix->url()) ||
 212           ShouldIgnoreVisits(visit_vectors[ix->id()])) {
 213         // Ignore this URL if we couldn't load the visits or if there's some
 214         // other problem with it (it was empty, or imported and never visited).
 215         ix = typed_urls.erase(ix);
 216       } else {
 217         ++ix;
 218       }
 219     }
 220
 221     syncer::WriteTransaction trans(FROM_HERE, sync_service_->GetUserShare());
 222     syncer::ReadNode typed_url_root(&trans);
 223     if (typed_url_root.InitByTagLookup(kTypedUrlTag) !=
 224             syncer::BaseNode::INIT_OK) {
 225       return error_handler_->CreateAndUploadError(
 226           FROM_HERE,
 227           "Server did not create the top-level typed_url node. We "
 228           "might be running against an out-of-date server.",
 229           model_type());
 230     }
 231
 232     std::set<std::string> current_urls;
 233     for (history::URLRows::iterator ix = typed_urls.begin();
 234          ix != typed_urls.end(); ++ix) {
 235       std::string tag = ix->url().spec();
 236       // Empty URLs should be filtered out by ShouldIgnoreUrl() previously.
 237       DCHECK(!tag.empty());
 238       history::VisitVector& visits = visit_vectors[ix->id()];
 239
 240       syncer::ReadNode node(&trans);
 241       if (node.InitByClientTagLookup(syncer::TYPED_URLS, tag) ==
 242               syncer::BaseNode::INIT_OK) {
 243         // Same URL exists in sync data and in history data - compare the
 244         // entries to see if there's any difference.
 245         sync_pb::TypedUrlSpecifics typed_url(
 246             FilterExpiredVisits(node.GetTypedUrlSpecifics()));
 247         DCHECK_EQ(tag, typed_url.url());
 248
 249         // Initialize fields in |new_url| to the same values as the fields in
 250         // the existing URLRow in the history DB. This is needed because we
 251         // overwrite the existing value below in WriteToHistoryBackend(), but
 252         // some of the values in that structure are not synced (like
 253         // typed_count).
 254         history::URLRow new_url(*ix);
 255
 256         std::vector<history::VisitInfo> added_visits;
 257         MergeResult difference =
 258             MergeUrls(typed_url, *ix, &visits, &new_url, &added_visits);
 259         if (difference & DIFF_UPDATE_NODE) {
 260           syncer::WriteNode write_node(&trans);
 261           if (write_node.InitByClientTagLookup(syncer::TYPED_URLS, tag) !=
 262                   syncer::BaseNode::INIT_OK) {
 263             return error_handler_->CreateAndUploadError(
 264                 FROM_HERE,
 265                 "Failed to edit typed_url sync node.",
 266                 model_type());
 267           }
 268           // We don't want to resurrect old visits that have been aged out by
 269           // other clients, so remove all visits that are older than the
 270           // earliest existing visit in the sync node.
 271           if (typed_url.visits_size() > 0) {
 272             base::Time earliest_visit =
 273                 base::Time::FromInternalValue(typed_url.visits(0));
 274             for (history::VisitVector::iterator it = visits.begin();
 275                  it != visits.end() && it->visit_time < earliest_visit; ) {
 276               it = visits.erase(it);
 277             }
 278             // Should never be possible to delete all the items, since the
 279             // visit vector contains all the items in typed_url.visits.
 280             DCHECK(visits.size() > 0);
 281           }
 282           DCHECK_EQ(new_url.last_visit().ToInternalValue(),
 283                     visits.back().visit_time.ToInternalValue());
 284           WriteToSyncNode(new_url, visits, &write_node);
 285         }
 286         if (difference & DIFF_LOCAL_ROW_CHANGED) {
 287           updated_urls.push_back(
 288               std::pair<history::URLID, history::URLRow>(ix->id(), new_url));
 289         }
 290         if (difference & DIFF_LOCAL_VISITS_ADDED) {
 291           new_visits.push_back(
 292               std::pair<GURL, std::vector<history::VisitInfo> >(ix->url(),
 293                                                                 added_visits));
 294         }
 295       } else {
 296         // Sync has never seen this URL before.
 297         syncer::WriteNode node(&trans);
 298         syncer::WriteNode::InitUniqueByCreationResult result =
 299             node.InitUniqueByCreation(syncer::TYPED_URLS,
 300                                       typed_url_root, tag);
 301         if (result != syncer::WriteNode::INIT_SUCCESS) {
 302           return error_handler_->CreateAndUploadError(
 303               FROM_HERE,
 304               "Failed to create typed_url sync node: " + tag,
 305               model_type());
 306         }
 307
 308         node.SetTitle(base::UTF8ToWide(tag));
 309         WriteToSyncNode(*ix, visits, &node);
 310       }
 311
 312       current_urls.insert(tag);
 313     }
 314
 315     // Now walk the sync nodes and detect any URLs that exist there, but not in
 316     // the history DB, so we can add them to our local history DB.
 317     std::vector<int64> obsolete_nodes;
 318     int64 sync_child_id = typed_url_root.GetFirstChildId();
 319     while (sync_child_id != syncer::kInvalidId) {
 320       syncer::ReadNode sync_child_node(&trans);
 321       if (sync_child_node.InitByIdLookup(sync_child_id) !=
 322               syncer::BaseNode::INIT_OK) {
 323         return error_handler_->CreateAndUploadError(
 324             FROM_HERE,
 325             "Failed to fetch child node.",
 326             model_type());
 327       }
 328       const sync_pb::TypedUrlSpecifics& typed_url(
 329           sync_child_node.GetTypedUrlSpecifics());
 330
 331       sync_child_id = sync_child_node.GetSuccessorId();
 332
 333       // Ignore old sync nodes that don't have any transition data stored with
 334       // them, or transition data that does not match the visit data (will be
 335       // deleted below).
 336       if (typed_url.visit_transitions_size() == 0 ||
 337           typed_url.visit_transitions_size() != typed_url.visits_size()) {
 338         // Generate a debug assertion to help track down http://crbug.com/91473,
 339         // even though we gracefully handle this case by throwing away this
 340         // node.
 341         DCHECK_EQ(typed_url.visits_size(), typed_url.visit_transitions_size());
 342         DVLOG(1) << "Deleting obsolete sync node with no visit "
 343                  << "transition info.";
 344         obsolete_nodes.push_back(sync_child_node.GetId());
 345         continue;
 346       }
 347
 348       if (typed_url.url().empty()) {
 349         DVLOG(1) << "Ignoring empty URL in sync DB";
 350         continue;
 351       }
 352
 353       // Now, get rid of the expired visits, and if there are no un-expired
 354       // visits left, just ignore this node.
 355       sync_pb::TypedUrlSpecifics filtered_url = FilterExpiredVisits(typed_url);
 356       if (filtered_url.visits_size() == 0) {
 357         DVLOG(1) << "Ignoring expired URL in sync DB: " << filtered_url.url();
 358         continue;
 359       }
 360
 361       if (current_urls.find(filtered_url.url()) == current_urls.end()) {
 362         // Update the local DB from the sync DB. Since we are doing our
 363         // initial model association, we don't want to remove any of the
 364         // existing visits (pass NULL as |visits_to_remove|).
 365         UpdateFromSyncDB(filtered_url,
 366                          &new_visits,
 367                          NULL,
 368                          &updated_urls,
 369                          &new_urls);
 370       }
 371     }
 372
 373     // If we encountered any obsolete nodes, remove them so they don't hang
 374     // around and confuse people looking at the sync node browser.
 375     if (!obsolete_nodes.empty()) {
 376       for (std::vector<int64>::const_iterator it = obsolete_nodes.begin();
 377            it != obsolete_nodes.end();
 378            ++it) {
 379         syncer::WriteNode sync_node(&trans);
 380         if (sync_node.InitByIdLookup(*it) != syncer::BaseNode::INIT_OK) {
 381           return error_handler_->CreateAndUploadError(
 382               FROM_HERE,
 383               "Failed to fetch obsolete node.",
 384               model_type());
 385         }
 386         sync_node.Tombstone();
 387       }
 388     }
 389   }
 390
 391   // Since we're on the history thread, we don't have to worry about updating
 392   // the history database after closing the write transaction, since
 393   // this is the only thread that writes to the database.  We also don't have
 394   // to worry about the sync model getting out of sync, because changes are
 395   // propagated to the ChangeProcessor on this thread.
 396   WriteToHistoryBackend(&new_urls, &updated_urls, &new_visits, NULL);
 397   return syncer::SyncError();
 398 }
 399
 400 void TypedUrlModelAssociator::UpdateFromSyncDB(
 401     const sync_pb::TypedUrlSpecifics& typed_url,
 402     TypedUrlVisitVector* visits_to_add,
 403     history::VisitVector* visits_to_remove,
 404     TypedUrlUpdateVector* updated_urls,
 405     history::URLRows* new_urls) {
 406   history::URLRow new_url(GURL(typed_url.url()));
 407   history::VisitVector existing_visits;
 408   bool existing_url = history_backend_->GetURL(new_url.url(), &new_url);
 409   if (existing_url) {
 410     // This URL already exists locally - fetch the visits so we can
 411     // merge them below.
 412     if (!FixupURLAndGetVisits(&new_url, &existing_visits)) {
 413       // Couldn't load the visits for this URL due to some kind of DB error.
 414       // Don't bother writing this URL to the history DB (if we ignore the
 415       // error and continue, we might end up duplicating existing visits).
 416       DLOG(ERROR) << "Could not load visits for url: " << new_url.url();
 417       return;
 418     }
 419   }
 420   visits_to_add->push_back(std::pair<GURL, std::vector<history::VisitInfo> >(
 421       new_url.url(), std::vector<history::VisitInfo>()));
 422
 423   // Update the URL with information from the typed URL.
 424   UpdateURLRowFromTypedUrlSpecifics(typed_url, &new_url);
 425
 426   // Figure out which visits we need to add.
 427   DiffVisits(existing_visits, typed_url, &visits_to_add->back().second,
 428              visits_to_remove);
 429
 430   if (existing_url) {
 431     updated_urls->push_back(
 432         std::pair<history::URLID, history::URLRow>(new_url.id(), new_url));
 433   } else {
 434     new_urls->push_back(new_url);
 435   }
 436 }
 437
 438 sync_pb::TypedUrlSpecifics TypedUrlModelAssociator::FilterExpiredVisits(
 439     const sync_pb::TypedUrlSpecifics& source) {
 440   // Make a copy of the source, then regenerate the visits.
 441   sync_pb::TypedUrlSpecifics specifics(source);
 442   specifics.clear_visits();
 443   specifics.clear_visit_transitions();
 444   for (int i = 0; i < source.visits_size(); ++i) {
 445     base::Time time = base::Time::FromInternalValue(source.visits(i));
 446     if (!history_backend_->IsExpiredVisitTime(time)) {
 447       specifics.add_visits(source.visits(i));
 448       specifics.add_visit_transitions(source.visit_transitions(i));
 449     }
 450   }
 451   DCHECK(specifics.visits_size() == specifics.visit_transitions_size());
 452   return specifics;
 453 }
 454
 455 bool TypedUrlModelAssociator::DeleteAllNodes(
 456     syncer::WriteTransaction* trans) {
 457   DCHECK(expected_loop_ == base::MessageLoop::current());
 458
 459   // Just walk through all our child nodes and delete them.
 460   syncer::ReadNode typed_url_root(trans);
 461   if (typed_url_root.InitByTagLookup(kTypedUrlTag) !=
 462           syncer::BaseNode::INIT_OK) {
 463     LOG(ERROR) << "Could not lookup root node";
 464     return false;
 465   }
 466   int64 sync_child_id = typed_url_root.GetFirstChildId();
 467   while (sync_child_id != syncer::kInvalidId) {
 468     syncer::WriteNode sync_child_node(trans);
 469     if (sync_child_node.InitByIdLookup(sync_child_id) !=
 470             syncer::BaseNode::INIT_OK) {
 471       LOG(ERROR) << "Typed url node lookup failed.";
 472       return false;
 473     }
 474     sync_child_id = sync_child_node.GetSuccessorId();
 475     sync_child_node.Tombstone();
 476   }
 477   return true;
 478 }
 479
 480 syncer::SyncError TypedUrlModelAssociator::DisassociateModels() {
 481   return syncer::SyncError();
 482 }
 483
 484 void TypedUrlModelAssociator::AbortAssociation() {
 485   base::AutoLock lock(abort_lock_);
 486   abort_requested_ = true;
 487 }
 488
 489 bool TypedUrlModelAssociator::SyncModelHasUserCreatedNodes(bool* has_nodes) {
 490   DCHECK(has_nodes);
 491   *has_nodes = false;
 492   syncer::ReadTransaction trans(FROM_HERE, sync_service_->GetUserShare());
 493   syncer::ReadNode sync_node(&trans);
 494   if (sync_node.InitByTagLookup(kTypedUrlTag) != syncer::BaseNode::INIT_OK) {
 495     LOG(ERROR) << "Server did not create the top-level typed_url node. We "
 496                << "might be running against an out-of-date server.";
 497     return false;
 498   }
 499
 500   // The sync model has user created nodes if the typed_url folder has any
 501   // children.
 502   *has_nodes = sync_node.HasChildren();
 503   return true;
 504 }
 505
 506 void TypedUrlModelAssociator::WriteToHistoryBackend(
 507     const history::URLRows* new_urls,
 508     const TypedUrlUpdateVector* updated_urls,
 509     const TypedUrlVisitVector* new_visits,
 510     const history::VisitVector* deleted_visits) {
 511   if (new_urls) {
 512     history_backend_->AddPagesWithDetails(*new_urls, history::SOURCE_SYNCED);
 513   }
 514   if (updated_urls) {
 515     for (TypedUrlUpdateVector::const_iterator url = updated_urls->begin();
 516          url != updated_urls->end(); ++url) {
 517       // This is an existing entry in the URL database. We don't verify the
 518       // visit_count or typed_count values here, because either one (or both)
 519       // could be zero in the case of bookmarks, or in the case of a URL
 520       // transitioning from non-typed to typed as a result of this sync.
 521       ++num_db_accesses_;
 522       if (!history_backend_->UpdateURL(url->first, url->second)) {
 523         // In the field we sometimes run into errors on specific URLs. It's OK
 524         // to just continue on (we can try writing again on the next model
 525         // association).
 526         ++num_db_errors_;
 527         DLOG(ERROR) << "Could not update page: " << url->second.url().spec();
 528       }
 529     }
 530   }
 531   if (new_visits) {
 532     for (TypedUrlVisitVector::const_iterator visits = new_visits->begin();
 533          visits != new_visits->end(); ++visits) {
 534       // If there are no visits to add, just skip this.
 535       if (visits->second.empty())
 536         continue;
 537       ++num_db_accesses_;
 538       if (!history_backend_->AddVisits(visits->first, visits->second,
 539                                        history::SOURCE_SYNCED)) {
 540         ++num_db_errors_;
 541         DLOG(ERROR) << "Could not add visits.";
 542       }
 543     }
 544   }
 545   if (deleted_visits) {
 546     ++num_db_accesses_;
 547     if (!history_backend_->RemoveVisits(*deleted_visits)) {
 548       ++num_db_errors_;
 549       DLOG(ERROR) << "Could not remove visits.";
 550       // This is bad news, since it means we may end up resurrecting history
 551       // entries on the next reload. It's unavoidable so we'll just keep on
 552       // syncing.
 553     }
 554   }
 555 }
 556
 557 // static
 558 TypedUrlModelAssociator::MergeResult TypedUrlModelAssociator::MergeUrls(
 559     const sync_pb::TypedUrlSpecifics& node,
 560     const history::URLRow& url,
 561     history::VisitVector* visits,
 562     history::URLRow* new_url,
 563     std::vector<history::VisitInfo>* new_visits) {
 564   DCHECK(new_url);
 565   DCHECK(!node.url().compare(url.url().spec()));
 566   DCHECK(!node.url().compare(new_url->url().spec()));
 567   DCHECK(visits->size());
 568   CHECK_EQ(node.visits_size(), node.visit_transitions_size());
 569
 570   // If we have an old-format node (before we added the visits and
 571   // visit_transitions arrays to the protobuf) or else the node only contained
 572   // expired visits, so just overwrite it with our local history data.
 573   if (node.visits_size() == 0)
 574     return DIFF_UPDATE_NODE;
 575
 576   // Convert these values only once.
 577   base::string16 node_title(base::UTF8ToUTF16(node.title()));
 578   base::Time node_last_visit = base::Time::FromInternalValue(
 579       node.visits(node.visits_size() - 1));
 580
 581   // This is a bitfield representing what we'll need to update with the output
 582   // value.
 583   MergeResult different = DIFF_NONE;
 584
 585   // Check if the non-incremented values changed.
 586   if ((node_title.compare(url.title()) != 0) ||
 587       (node.hidden() != url.hidden())) {
 588     // Use the values from the most recent visit.
 589     if (node_last_visit >= url.last_visit()) {
 590       new_url->set_title(node_title);
 591       new_url->set_hidden(node.hidden());
 592       different |= DIFF_LOCAL_ROW_CHANGED;
 593     } else {
 594       new_url->set_title(url.title());
 595       new_url->set_hidden(url.hidden());
 596       different |= DIFF_UPDATE_NODE;
 597     }
 598   } else {
 599     // No difference.
 600     new_url->set_title(url.title());
 601     new_url->set_hidden(url.hidden());
 602   }
 603
 604   size_t node_num_visits = node.visits_size();
 605   size_t history_num_visits = visits->size();
 606   size_t node_visit_index = 0;
 607   size_t history_visit_index = 0;
 608   base::Time earliest_history_time = (*visits)[0].visit_time;
 609   // Walk through the two sets of visits and figure out if any new visits were
 610   // added on either side.
 611   while (node_visit_index < node_num_visits ||
 612          history_visit_index < history_num_visits) {
 613     // Time objects are initialized to "earliest possible time".
 614     base::Time node_time, history_time;
 615     if (node_visit_index < node_num_visits)
 616       node_time = base::Time::FromInternalValue(node.visits(node_visit_index));
 617     if (history_visit_index < history_num_visits)
 618       history_time = (*visits)[history_visit_index].visit_time;
 619     if (node_visit_index >= node_num_visits ||
 620         (history_visit_index < history_num_visits &&
 621          node_time > history_time)) {
 622       // We found a visit in the history DB that doesn't exist in the sync DB,
 623       // so mark the node as modified so the caller will update the sync node.
 624       different |= DIFF_UPDATE_NODE;
 625       ++history_visit_index;
 626     } else if (history_visit_index >= history_num_visits ||
 627                node_time < history_time) {
 628       // Found a visit in the sync node that doesn't exist in the history DB, so
 629       // add it to our list of new visits and set the appropriate flag so the
 630       // caller will update the history DB.
 631       // If the node visit is older than any existing visit in the history DB,
 632       // don't re-add it - this keeps us from resurrecting visits that were
 633       // aged out locally.
 634       if (node_time > earliest_history_time) {
 635         different |= DIFF_LOCAL_VISITS_ADDED;
 636         new_visits->push_back(history::VisitInfo(
 637             node_time,
 638             content::PageTransitionFromInt(
 639                 node.visit_transitions(node_visit_index))));
 640       }
 641       // This visit is added to visits below.
 642       ++node_visit_index;
 643     } else {
 644       // Same (already synced) entry found in both DBs - no need to do anything.
 645       ++node_visit_index;
 646       ++history_visit_index;
 647     }
 648   }
 649
 650   DCHECK(CheckVisitOrdering(*visits));
 651   if (different & DIFF_LOCAL_VISITS_ADDED) {
 652     // Insert new visits into the apropriate place in the visits vector.
 653     history::VisitVector::iterator visit_ix = visits->begin();
 654     for (std::vector<history::VisitInfo>::iterator new_visit =
 655              new_visits->begin();
 656          new_visit != new_visits->end(); ++new_visit) {
 657       while (visit_ix != visits->end() &&
 658              new_visit->first > visit_ix->visit_time) {
 659         ++visit_ix;
 660       }
 661       visit_ix = visits->insert(visit_ix,
 662                                 history::VisitRow(url.id(), new_visit->first,
 663                                                   0, new_visit->second, 0));
 664       ++visit_ix;
 665     }
 666   }
 667   DCHECK(CheckVisitOrdering(*visits));
 668
 669   new_url->set_last_visit(visits->back().visit_time);
 670   return different;
 671 }
 672
 673 // static
 674 void TypedUrlModelAssociator::WriteToSyncNode(
 675     const history::URLRow& url,
 676     const history::VisitVector& visits,
 677     syncer::WriteNode* node) {
 678   sync_pb::TypedUrlSpecifics typed_url;
 679   WriteToTypedUrlSpecifics(url, visits, &typed_url);
 680   node->SetTypedUrlSpecifics(typed_url);
 681 }
 682
 683 void TypedUrlModelAssociator::WriteToTypedUrlSpecifics(
 684     const history::URLRow& url,
 685     const history::VisitVector& visits,
 686     sync_pb::TypedUrlSpecifics* typed_url) {
 687
 688   DCHECK(!url.last_visit().is_null());
 689   DCHECK(!visits.empty());
 690   DCHECK_EQ(url.last_visit().ToInternalValue(),
 691             visits.back().visit_time.ToInternalValue());
 692
 693   typed_url->set_url(url.url().spec());
 694   typed_url->set_title(base::UTF16ToUTF8(url.title()));
 695   typed_url->set_hidden(url.hidden());
 696
 697   DCHECK(CheckVisitOrdering(visits));
 698
 699   bool only_typed = false;
 700   int skip_count = 0;
 701
 702   if (visits.size() > static_cast<size_t>(kMaxTypedUrlVisits)) {
 703     int typed_count = 0;
 704     int total = 0;
 705     // Walk the passed-in visit vector and count the # of typed visits.
 706     for (history::VisitVector::const_iterator visit = visits.begin();
 707          visit != visits.end(); ++visit) {
 708       content::PageTransition transition = content::PageTransitionFromInt(
 709           visit->transition & content::PAGE_TRANSITION_CORE_MASK);
 710       // We ignore reload visits.
 711       if (transition == content::PAGE_TRANSITION_RELOAD)
 712         continue;
 713       ++total;
 714       if (transition == content::PAGE_TRANSITION_TYPED)
 715         ++typed_count;
 716     }
 717     // We should have at least one typed visit. This can sometimes happen if
 718     // the history DB has an inaccurate count for some reason (there's been
 719     // bugs in the history code in the past which has left users in the wild
 720     // with incorrect counts - http://crbug.com/84258).
 721     DCHECK(typed_count > 0);
 722
 723     if (typed_count > kMaxTypedUrlVisits) {
 724       only_typed = true;
 725       skip_count = typed_count - kMaxTypedUrlVisits;
 726     } else if (total > kMaxTypedUrlVisits) {
 727       skip_count = total - kMaxTypedUrlVisits;
 728     }
 729   }
 730
 731
 732   for (history::VisitVector::const_iterator visit = visits.begin();
 733        visit != visits.end(); ++visit) {
 734     content::PageTransition transition = content::PageTransitionFromInt(
 735         visit->transition & content::PAGE_TRANSITION_CORE_MASK);
 736     // Skip reload visits.
 737     if (transition == content::PAGE_TRANSITION_RELOAD)
 738       continue;
 739
 740     // If we only have room for typed visits, then only add typed visits.
 741     if (only_typed && transition != content::PAGE_TRANSITION_TYPED)
 742       continue;
 743
 744     if (skip_count > 0) {
 745       // We have too many entries to fit, so we need to skip the oldest ones.
 746       // Only skip typed URLs if there are too many typed URLs to fit.
 747       if (only_typed || transition != content::PAGE_TRANSITION_TYPED) {
 748         --skip_count;
 749         continue;
 750       }
 751     }
 752     typed_url->add_visits(visit->visit_time.ToInternalValue());
 753     typed_url->add_visit_transitions(visit->transition);
 754   }
 755   DCHECK_EQ(skip_count, 0);
 756
 757   if (typed_url->visits_size() == 0) {
 758     // If we get here, it's because we don't actually have any TYPED visits
 759     // even though the visit's typed_count > 0 (corrupted typed_count). So
 760     // let's go ahead and add a RELOAD visit at the most recent visit since
 761     // it's not legal to have an empty visit array (yet another workaround
 762     // for http://crbug.com/84258).
 763     typed_url->add_visits(url.last_visit().ToInternalValue());
 764     typed_url->add_visit_transitions(content::PAGE_TRANSITION_RELOAD);
 765   }
 766   CHECK_GT(typed_url->visits_size(), 0);
 767   CHECK_LE(typed_url->visits_size(), kMaxTypedUrlVisits);
 768   CHECK_EQ(typed_url->visits_size(), typed_url->visit_transitions_size());
 769 }
 770
 771 // static
 772 void TypedUrlModelAssociator::DiffVisits(
 773     const history::VisitVector& old_visits,
 774     const sync_pb::TypedUrlSpecifics& new_url,
 775     std::vector<history::VisitInfo>* new_visits,
 776     history::VisitVector* removed_visits) {
 777   DCHECK(new_visits);
 778   size_t old_visit_count = old_visits.size();
 779   size_t new_visit_count = new_url.visits_size();
 780   size_t old_index = 0;
 781   size_t new_index = 0;
 782   while (old_index < old_visit_count && new_index < new_visit_count) {
 783     base::Time new_visit_time =
 784         base::Time::FromInternalValue(new_url.visits(new_index));
 785     if (old_visits[old_index].visit_time < new_visit_time) {
 786       if (new_index > 0 && removed_visits) {
 787         // If there are visits missing from the start of the node, that
 788         // means that they were probably clipped off due to our code that
 789         // limits the size of the sync nodes - don't delete them from our
 790         // local history.
 791         removed_visits->push_back(old_visits[old_index]);
 792       }
 793       ++old_index;
 794     } else if (old_visits[old_index].visit_time > new_visit_time) {
 795       new_visits->push_back(history::VisitInfo(
 796           new_visit_time,
 797           content::PageTransitionFromInt(
 798               new_url.visit_transitions(new_index))));
 799       ++new_index;
 800     } else {
 801       ++old_index;
 802       ++new_index;
 803     }
 804   }
 805
 806   if (removed_visits) {
 807     for ( ; old_index < old_visit_count; ++old_index) {
 808       removed_visits->push_back(old_visits[old_index]);
 809     }
 810   }
 811
 812   for ( ; new_index < new_visit_count; ++new_index) {
 813     new_visits->push_back(history::VisitInfo(
 814         base::Time::FromInternalValue(new_url.visits(new_index)),
 815         content::PageTransitionFromInt(new_url.visit_transitions(new_index))));
 816   }
 817 }
 818
 819
 820 // static
 821 void TypedUrlModelAssociator::UpdateURLRowFromTypedUrlSpecifics(
 822     const sync_pb::TypedUrlSpecifics& typed_url, history::URLRow* new_url) {
 823   DCHECK_GT(typed_url.visits_size(), 0);
 824   CHECK_EQ(typed_url.visit_transitions_size(), typed_url.visits_size());
 825   new_url->set_title(base::UTF8ToUTF16(typed_url.title()));
 826   new_url->set_hidden(typed_url.hidden());
 827   // Only provide the initial value for the last_visit field - after that, let
 828   // the history code update the last_visit field on its own.
 829   if (new_url->last_visit().is_null()) {
 830     new_url->set_last_visit(base::Time::FromInternalValue(
 831         typed_url.visits(typed_url.visits_size() - 1)));
 832   }
 833 }
 834
 835 bool TypedUrlModelAssociator::CryptoReadyIfNecessary() {
 836   // We only access the cryptographer while holding a transaction.
 837   syncer::ReadTransaction trans(FROM_HERE, sync_service_->GetUserShare());
 838   const syncer::ModelTypeSet encrypted_types = trans.GetEncryptedTypes();
 839   return !encrypted_types.Has(syncer::TYPED_URLS) ||
 840          sync_service_->IsCryptographerReady(&trans);
 841 }
 842
 843 }  // namespace browser_sync