src/chrome/browser/sync/glue/typed_url_model_associator.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "chrome/browser/sync/glue/typed_url_model_associator.h"
   6
   7 #include <algorithm>
   8 #include <set>
   9
  10 #include "base/location.h"
  11 #include "base/logging.h"
  12 #include "base/metrics/histogram.h"
  13 #include "base/strings/utf_string_conversions.h"
  14 #include "chrome/browser/history/history_backend.h"
  15 #include "chrome/browser/sync/profile_sync_service.h"
  16 #include "content/public/browser/browser_thread.h"
  17 #include "net/base/net_util.h"
  18 #include "sync/api/sync_error.h"
  19 #include "sync/internal_api/public/read_node.h"
  20 #include "sync/internal_api/public/read_transaction.h"
  21 #include "sync/internal_api/public/write_node.h"
  22 #include "sync/internal_api/public/write_transaction.h"
  23 #include "sync/protocol/typed_url_specifics.pb.h"
  24
  25 using content::BrowserThread;
  26
  27 namespace browser_sync {
  28
  29 // The server backend can't handle arbitrarily large node sizes, so to keep
  30 // the size under control we limit the visit array.
  31 static const int kMaxTypedUrlVisits = 100;
  32
  33 // There's no limit on how many visits the history DB could have for a given
  34 // typed URL, so we limit how many we fetch from the DB to avoid crashes due to
  35 // running out of memory (http://crbug.com/89793). This value is different
  36 // from kMaxTypedUrlVisits, as some of the visits fetched from the DB may be
  37 // RELOAD visits, which will be stripped.
  38 static const int kMaxVisitsToFetch = 1000;
  39
  40 static bool CheckVisitOrdering(const history::VisitVector& visits) {
  41   int64 previous_visit_time = 0;
  42   for (history::VisitVector::const_iterator visit = visits.begin();
  43        visit != visits.end(); ++visit) {
  44     if (visit != visits.begin()) {
  45       // We allow duplicate visits here - they shouldn't really be allowed, but
  46       // they still seem to show up sometimes and we haven't figured out the
  47       // source, so we just log an error instead of failing an assertion.
  48       // (http://crbug.com/91473).
  49       if (previous_visit_time == visit->visit_time.ToInternalValue())
  50         DVLOG(1) << "Duplicate visit time encountered";
  51       else if (previous_visit_time > visit->visit_time.ToInternalValue())
  52         return false;
  53     }
  54
  55     previous_visit_time = visit->visit_time.ToInternalValue();
  56   }
  57   return true;
  58 }
  59
  60 TypedUrlModelAssociator::TypedUrlModelAssociator(
  61     ProfileSyncService* sync_service,
  62     history::HistoryBackend* history_backend,
  63     sync_driver::DataTypeErrorHandler* error_handler)
  64     : sync_service_(sync_service),
  65       history_backend_(history_backend),
  66       expected_loop_(base::MessageLoop::current()),
  67       abort_requested_(false),
  68       error_handler_(error_handler),
  69       num_db_accesses_(0),
  70       num_db_errors_(0) {
  71   DCHECK(sync_service_);
  72   // history_backend_ may be null for unit tests (since it's not mockable).
  73   DCHECK(!BrowserThread::CurrentlyOn(BrowserThread::UI));
  74 }
  75
  76 TypedUrlModelAssociator::~TypedUrlModelAssociator() {}
  77
  78
  79 bool TypedUrlModelAssociator::FixupURLAndGetVisits(
  80     history::URLRow* url,
  81     history::VisitVector* visits) {
  82   ++num_db_accesses_;
  83   CHECK(history_backend_);
  84   if (!history_backend_->GetMostRecentVisitsForURL(
  85           url->id(), kMaxVisitsToFetch, visits)) {
  86     ++num_db_errors_;
  87     return false;
  88   }
  89
  90   // Sometimes (due to a bug elsewhere in the history or sync code, or due to
  91   // a crash between adding a URL to the history database and updating the
  92   // visit DB) the visit vector for a URL can be empty. If this happens, just
  93   // create a new visit whose timestamp is the same as the last_visit time.
  94   // This is a workaround for http://crbug.com/84258.
  95   if (visits->empty()) {
  96     DVLOG(1) << "Found empty visits for URL: " << url->url();
  97
  98     if (url->last_visit().is_null()) {
  99       // If modified URL is bookmarked, history backend treats it as modified
 100       // even if all its visits are deleted. Return false to stop further
 101       // processing because sync expects valid visit time for modified entry.
 102       return false;
 103     }
 104
 105     history::VisitRow visit(
 106         url->id(), url->last_visit(), 0, ui::PAGE_TRANSITION_TYPED, 0);
 107     visits->push_back(visit);
 108   }
 109
 110   // GetMostRecentVisitsForURL() returns the data in the opposite order that
 111   // we need it, so reverse it.
 112   std::reverse(visits->begin(), visits->end());
 113
 114   // Sometimes, the last_visit field in the URL doesn't match the timestamp of
 115   // the last visit in our visit array (they come from different tables, so
 116   // crashes/bugs can cause them to mismatch), so just set it here.
 117   url->set_last_visit(visits->back().visit_time);
 118   DCHECK(CheckVisitOrdering(*visits));
 119   return true;
 120 }
 121
 122 bool TypedUrlModelAssociator::ShouldIgnoreUrl(const GURL& url) {
 123   // Ignore empty URLs. Not sure how this can happen (maybe import from other
 124   // busted browsers, or misuse of the history API, or just plain bugs) but we
 125   // can't deal with them.
 126   if (url.spec().empty())
 127     return true;
 128
 129   // Ignore local file URLs.
 130   if (url.SchemeIsFile())
 131     return true;
 132
 133   // Ignore localhost URLs.
 134   if (net::IsLocalhost(url.host()))
 135     return true;
 136
 137   return false;
 138 }
 139
 140 bool TypedUrlModelAssociator::ShouldIgnoreVisits(
 141     const history::VisitVector& visits) {
 142   // We ignore URLs that were imported, but have never been visited by
 143   // chromium.
 144   static const int kLastImportedSource = history::SOURCE_EXTENSION;
 145   history::VisitSourceMap map;
 146   if (!history_backend_->GetVisitsSource(visits, &map))
 147     return false;  // If we can't read the visit, assume it's not imported.
 148
 149   // Walk the list of visits and look for a non-imported item.
 150   for (history::VisitVector::const_iterator it = visits.begin();
 151        it != visits.end(); ++it) {
 152     if (map.count(it->visit_id) == 0 ||
 153         map[it->visit_id] <= kLastImportedSource) {
 154       return false;
 155     }
 156   }
 157   // We only saw imported visits, so tell the caller to ignore them.
 158   return true;
 159 }
 160
 161 syncer::SyncError TypedUrlModelAssociator::AssociateModels(
 162     syncer::SyncMergeResult* local_merge_result,
 163     syncer::SyncMergeResult* syncer_merge_result) {
 164   ClearErrorStats();
 165   syncer::SyncError error = DoAssociateModels();
 166   UMA_HISTOGRAM_PERCENTAGE("Sync.TypedUrlModelAssociationErrors",
 167                            GetErrorPercentage());
 168   ClearErrorStats();
 169   return error;
 170 }
 171
 172 void TypedUrlModelAssociator::ClearErrorStats() {
 173   num_db_accesses_ = 0;
 174   num_db_errors_ = 0;
 175 }
 176
 177 int TypedUrlModelAssociator::GetErrorPercentage() const {
 178   return num_db_accesses_ ? (100 * num_db_errors_ / num_db_accesses_) : 0;
 179 }
 180
 181 syncer::SyncError TypedUrlModelAssociator::DoAssociateModels() {
 182   DVLOG(1) << "Associating TypedUrl Models";
 183   DCHECK(expected_loop_ == base::MessageLoop::current());
 184
 185   history::URLRows typed_urls;
 186   ++num_db_accesses_;
 187   bool query_succeeded =
 188       history_backend_ && history_backend_->GetAllTypedURLs(&typed_urls);
 189
 190   history::URLRows new_urls;
 191   history::URLRows updated_urls;
 192   TypedUrlVisitVector new_visits;
 193   {
 194     base::AutoLock au(abort_lock_);
 195     if (abort_requested_) {
 196       return syncer::SyncError(FROM_HERE,
 197                                syncer::SyncError::DATATYPE_ERROR,
 198                                "Association was aborted.",
 199                                model_type());
 200     }
 201
 202     // Must lock and check first to make sure |error_handler_| is valid.
 203     if (!query_succeeded) {
 204       ++num_db_errors_;
 205       return error_handler_->CreateAndUploadError(
 206           FROM_HERE,
 207           "Could not get the typed_url entries.",
 208           model_type());
 209     }
 210
 211     // Get all the visits.
 212     std::map<history::URLID, history::VisitVector> visit_vectors;
 213     for (history::URLRows::iterator ix = typed_urls.begin();
 214          ix != typed_urls.end();) {
 215       DCHECK_EQ(0U, visit_vectors.count(ix->id()));
 216       if (!FixupURLAndGetVisits(&(*ix), &(visit_vectors[ix->id()])) ||
 217           ShouldIgnoreUrl(ix->url()) ||
 218           ShouldIgnoreVisits(visit_vectors[ix->id()])) {
 219         // Ignore this URL if we couldn't load the visits or if there's some
 220         // other problem with it (it was empty, or imported and never visited).
 221         ix = typed_urls.erase(ix);
 222       } else {
 223         ++ix;
 224       }
 225     }
 226
 227     syncer::WriteTransaction trans(FROM_HERE, sync_service_->GetUserShare());
 228     syncer::ReadNode typed_url_root(&trans);
 229     if (typed_url_root.InitTypeRoot(syncer::TYPED_URLS) !=
 230         syncer::BaseNode::INIT_OK) {
 231       return error_handler_->CreateAndUploadError(
 232           FROM_HERE,
 233           "Server did not create the top-level typed_url node. We "
 234           "might be running against an out-of-date server.",
 235           model_type());
 236     }
 237
 238     std::set<std::string> current_urls;
 239     for (history::URLRows::iterator ix = typed_urls.begin();
 240          ix != typed_urls.end(); ++ix) {
 241       std::string tag = ix->url().spec();
 242       // Empty URLs should be filtered out by ShouldIgnoreUrl() previously.
 243       DCHECK(!tag.empty());
 244       history::VisitVector& visits = visit_vectors[ix->id()];
 245
 246       syncer::ReadNode node(&trans);
 247       if (node.InitByClientTagLookup(syncer::TYPED_URLS, tag) ==
 248               syncer::BaseNode::INIT_OK) {
 249         // Same URL exists in sync data and in history data - compare the
 250         // entries to see if there's any difference.
 251         sync_pb::TypedUrlSpecifics typed_url(
 252             FilterExpiredVisits(node.GetTypedUrlSpecifics()));
 253         DCHECK_EQ(tag, typed_url.url());
 254
 255         // Initialize fields in |new_url| to the same values as the fields in
 256         // the existing URLRow in the history DB. This is needed because we
 257         // overwrite the existing value below in WriteToHistoryBackend(), but
 258         // some of the values in that structure are not synced (like
 259         // typed_count).
 260         history::URLRow new_url(*ix);
 261
 262         std::vector<history::VisitInfo> added_visits;
 263         MergeResult difference =
 264             MergeUrls(typed_url, *ix, &visits, &new_url, &added_visits);
 265         if (difference & DIFF_UPDATE_NODE) {
 266           syncer::WriteNode write_node(&trans);
 267           if (write_node.InitByClientTagLookup(syncer::TYPED_URLS, tag) !=
 268                   syncer::BaseNode::INIT_OK) {
 269             return error_handler_->CreateAndUploadError(
 270                 FROM_HERE,
 271                 "Failed to edit typed_url sync node.",
 272                 model_type());
 273           }
 274           // We don't want to resurrect old visits that have been aged out by
 275           // other clients, so remove all visits that are older than the
 276           // earliest existing visit in the sync node.
 277           if (typed_url.visits_size() > 0) {
 278             base::Time earliest_visit =
 279                 base::Time::FromInternalValue(typed_url.visits(0));
 280             for (history::VisitVector::iterator it = visits.begin();
 281                  it != visits.end() && it->visit_time < earliest_visit; ) {
 282               it = visits.erase(it);
 283             }
 284             // Should never be possible to delete all the items, since the
 285             // visit vector contains all the items in typed_url.visits.
 286             DCHECK(visits.size() > 0);
 287           }
 288           DCHECK_EQ(new_url.last_visit().ToInternalValue(),
 289                     visits.back().visit_time.ToInternalValue());
 290           WriteToSyncNode(new_url, visits, &write_node);
 291         }
 292         if (difference & DIFF_LOCAL_ROW_CHANGED) {
 293           DCHECK_EQ(ix->id(), new_url.id());
 294           updated_urls.push_back(new_url);
 295         }
 296         if (difference & DIFF_LOCAL_VISITS_ADDED) {
 297           new_visits.push_back(
 298               std::pair<GURL, std::vector<history::VisitInfo> >(ix->url(),
 299                                                                 added_visits));
 300         }
 301       } else {
 302         // Sync has never seen this URL before.
 303         syncer::WriteNode node(&trans);
 304         syncer::WriteNode::InitUniqueByCreationResult result =
 305             node.InitUniqueByCreation(syncer::TYPED_URLS,
 306                                       typed_url_root, tag);
 307         if (result != syncer::WriteNode::INIT_SUCCESS) {
 308           return error_handler_->CreateAndUploadError(
 309               FROM_HERE,
 310               "Failed to create typed_url sync node: " + tag,
 311               model_type());
 312         }
 313
 314         node.SetTitle(tag);
 315         WriteToSyncNode(*ix, visits, &node);
 316       }
 317
 318       current_urls.insert(tag);
 319     }
 320
 321     // Now walk the sync nodes and detect any URLs that exist there, but not in
 322     // the history DB, so we can add them to our local history DB.
 323     std::vector<int64> obsolete_nodes;
 324     int64 sync_child_id = typed_url_root.GetFirstChildId();
 325     while (sync_child_id != syncer::kInvalidId) {
 326       syncer::ReadNode sync_child_node(&trans);
 327       if (sync_child_node.InitByIdLookup(sync_child_id) !=
 328               syncer::BaseNode::INIT_OK) {
 329         return error_handler_->CreateAndUploadError(
 330             FROM_HERE,
 331             "Failed to fetch child node.",
 332             model_type());
 333       }
 334       const sync_pb::TypedUrlSpecifics& typed_url(
 335           sync_child_node.GetTypedUrlSpecifics());
 336
 337       sync_child_id = sync_child_node.GetSuccessorId();
 338
 339       // Ignore old sync nodes that don't have any transition data stored with
 340       // them, or transition data that does not match the visit data (will be
 341       // deleted below).
 342       if (typed_url.visit_transitions_size() == 0 ||
 343           typed_url.visit_transitions_size() != typed_url.visits_size()) {
 344         // Generate a debug assertion to help track down http://crbug.com/91473,
 345         // even though we gracefully handle this case by throwing away this
 346         // node.
 347         DCHECK_EQ(typed_url.visits_size(), typed_url.visit_transitions_size());
 348         DVLOG(1) << "Deleting obsolete sync node with no visit "
 349                  << "transition info.";
 350         obsolete_nodes.push_back(sync_child_node.GetId());
 351         continue;
 352       }
 353
 354       if (typed_url.url().empty()) {
 355         DVLOG(1) << "Ignoring empty URL in sync DB";
 356         continue;
 357       }
 358
 359       // Now, get rid of the expired visits, and if there are no un-expired
 360       // visits left, just ignore this node.
 361       sync_pb::TypedUrlSpecifics filtered_url = FilterExpiredVisits(typed_url);
 362       if (filtered_url.visits_size() == 0) {
 363         DVLOG(1) << "Ignoring expired URL in sync DB: " << filtered_url.url();
 364         continue;
 365       }
 366
 367       if (current_urls.find(filtered_url.url()) == current_urls.end()) {
 368         // Update the local DB from the sync DB. Since we are doing our
 369         // initial model association, we don't want to remove any of the
 370         // existing visits (pass NULL as |visits_to_remove|).
 371         UpdateFromSyncDB(filtered_url,
 372                          &new_visits,
 373                          NULL,
 374                          &updated_urls,
 375                          &new_urls);
 376       }
 377     }
 378
 379     // If we encountered any obsolete nodes, remove them so they don't hang
 380     // around and confuse people looking at the sync node browser.
 381     if (!obsolete_nodes.empty()) {
 382       for (std::vector<int64>::const_iterator it = obsolete_nodes.begin();
 383            it != obsolete_nodes.end();
 384            ++it) {
 385         syncer::WriteNode sync_node(&trans);
 386         if (sync_node.InitByIdLookup(*it) != syncer::BaseNode::INIT_OK) {
 387           return error_handler_->CreateAndUploadError(
 388               FROM_HERE,
 389               "Failed to fetch obsolete node.",
 390               model_type());
 391         }
 392         sync_node.Tombstone();
 393       }
 394     }
 395   }
 396
 397   // Since we're on the history thread, we don't have to worry about updating
 398   // the history database after closing the write transaction, since
 399   // this is the only thread that writes to the database.  We also don't have
 400   // to worry about the sync model getting out of sync, because changes are
 401   // propagated to the ChangeProcessor on this thread.
 402   WriteToHistoryBackend(&new_urls, &updated_urls, &new_visits, NULL);
 403   return syncer::SyncError();
 404 }
 405
 406 void TypedUrlModelAssociator::UpdateFromSyncDB(
 407     const sync_pb::TypedUrlSpecifics& typed_url,
 408     TypedUrlVisitVector* visits_to_add,
 409     history::VisitVector* visits_to_remove,
 410     history::URLRows* updated_urls,
 411     history::URLRows* new_urls) {
 412   history::URLRow new_url(GURL(typed_url.url()));
 413   history::VisitVector existing_visits;
 414   bool existing_url = history_backend_->GetURL(new_url.url(), &new_url);
 415   if (existing_url) {
 416     // This URL already exists locally - fetch the visits so we can
 417     // merge them below.
 418     if (!FixupURLAndGetVisits(&new_url, &existing_visits)) {
 419       // Couldn't load the visits for this URL due to some kind of DB error.
 420       // Don't bother writing this URL to the history DB (if we ignore the
 421       // error and continue, we might end up duplicating existing visits).
 422       DLOG(ERROR) << "Could not load visits for url: " << new_url.url();
 423       return;
 424     }
 425   }
 426   visits_to_add->push_back(std::pair<GURL, std::vector<history::VisitInfo> >(
 427       new_url.url(), std::vector<history::VisitInfo>()));
 428
 429   // Update the URL with information from the typed URL.
 430   UpdateURLRowFromTypedUrlSpecifics(typed_url, &new_url);
 431
 432   // Figure out which visits we need to add.
 433   DiffVisits(existing_visits, typed_url, &visits_to_add->back().second,
 434              visits_to_remove);
 435
 436   if (existing_url) {
 437     updated_urls->push_back(new_url);
 438   } else {
 439     new_urls->push_back(new_url);
 440   }
 441 }
 442
 443 sync_pb::TypedUrlSpecifics TypedUrlModelAssociator::FilterExpiredVisits(
 444     const sync_pb::TypedUrlSpecifics& source) {
 445   // Make a copy of the source, then regenerate the visits.
 446   sync_pb::TypedUrlSpecifics specifics(source);
 447   specifics.clear_visits();
 448   specifics.clear_visit_transitions();
 449   for (int i = 0; i < source.visits_size(); ++i) {
 450     base::Time time = base::Time::FromInternalValue(source.visits(i));
 451     if (!history_backend_->IsExpiredVisitTime(time)) {
 452       specifics.add_visits(source.visits(i));
 453       specifics.add_visit_transitions(source.visit_transitions(i));
 454     }
 455   }
 456   DCHECK(specifics.visits_size() == specifics.visit_transitions_size());
 457   return specifics;
 458 }
 459
 460 bool TypedUrlModelAssociator::DeleteAllNodes(
 461     syncer::WriteTransaction* trans) {
 462   DCHECK(expected_loop_ == base::MessageLoop::current());
 463
 464   // Just walk through all our child nodes and delete them.
 465   syncer::ReadNode typed_url_root(trans);
 466   if (typed_url_root.InitTypeRoot(syncer::TYPED_URLS) !=
 467           syncer::BaseNode::INIT_OK) {
 468     LOG(ERROR) << "Could not lookup root node";
 469     return false;
 470   }
 471   int64 sync_child_id = typed_url_root.GetFirstChildId();
 472   while (sync_child_id != syncer::kInvalidId) {
 473     syncer::WriteNode sync_child_node(trans);
 474     if (sync_child_node.InitByIdLookup(sync_child_id) !=
 475             syncer::BaseNode::INIT_OK) {
 476       LOG(ERROR) << "Typed url node lookup failed.";
 477       return false;
 478     }
 479     sync_child_id = sync_child_node.GetSuccessorId();
 480     sync_child_node.Tombstone();
 481   }
 482   return true;
 483 }
 484
 485 syncer::SyncError TypedUrlModelAssociator::DisassociateModels() {
 486   return syncer::SyncError();
 487 }
 488
 489 void TypedUrlModelAssociator::AbortAssociation() {
 490   base::AutoLock lock(abort_lock_);
 491   abort_requested_ = true;
 492 }
 493
 494 bool TypedUrlModelAssociator::SyncModelHasUserCreatedNodes(bool* has_nodes) {
 495   DCHECK(has_nodes);
 496   *has_nodes = false;
 497   syncer::ReadTransaction trans(FROM_HERE, sync_service_->GetUserShare());
 498   syncer::ReadNode sync_node(&trans);
 499   if (sync_node.InitTypeRoot(syncer::TYPED_URLS) != syncer::BaseNode::INIT_OK) {
 500     LOG(ERROR) << "Server did not create the top-level typed_url node. We "
 501                << "might be running against an out-of-date server.";
 502     return false;
 503   }
 504
 505   // The sync model has user created nodes if the typed_url folder has any
 506   // children.
 507   *has_nodes = sync_node.HasChildren();
 508   return true;
 509 }
 510
 511 void TypedUrlModelAssociator::WriteToHistoryBackend(
 512     const history::URLRows* new_urls,
 513     const history::URLRows* updated_urls,
 514     const TypedUrlVisitVector* new_visits,
 515     const history::VisitVector* deleted_visits) {
 516   if (new_urls) {
 517     history_backend_->AddPagesWithDetails(*new_urls, history::SOURCE_SYNCED);
 518   }
 519   if (updated_urls) {
 520     ++num_db_accesses_;
 521     // These are existing entries in the URL database. We don't verify the
 522     // visit_count or typed_count values here, because either one (or both)
 523     // could be zero in the case of bookmarks, or in the case of a URL
 524     // transitioning from non-typed to typed as a result of this sync.
 525     // In the field we sometimes run into errors on specific URLs. It's OK to
 526     // just continue, as we can try writing again on the next model association.
 527     size_t num_successful_updates = history_backend_->UpdateURLs(*updated_urls);
 528     num_db_errors_ += updated_urls->size() - num_successful_updates;
 529   }
 530   if (new_visits) {
 531     for (TypedUrlVisitVector::const_iterator visits = new_visits->begin();
 532          visits != new_visits->end(); ++visits) {
 533       // If there are no visits to add, just skip this.
 534       if (visits->second.empty())
 535         continue;
 536       ++num_db_accesses_;
 537       if (!history_backend_->AddVisits(visits->first, visits->second,
 538                                        history::SOURCE_SYNCED)) {
 539         ++num_db_errors_;
 540         DLOG(ERROR) << "Could not add visits.";
 541       }
 542     }
 543   }
 544   if (deleted_visits) {
 545     ++num_db_accesses_;
 546     if (!history_backend_->RemoveVisits(*deleted_visits)) {
 547       ++num_db_errors_;
 548       DLOG(ERROR) << "Could not remove visits.";
 549       // This is bad news, since it means we may end up resurrecting history
 550       // entries on the next reload. It's unavoidable so we'll just keep on
 551       // syncing.
 552     }
 553   }
 554 }
 555
 556 // static
 557 TypedUrlModelAssociator::MergeResult TypedUrlModelAssociator::MergeUrls(
 558     const sync_pb::TypedUrlSpecifics& node,
 559     const history::URLRow& url,
 560     history::VisitVector* visits,
 561     history::URLRow* new_url,
 562     std::vector<history::VisitInfo>* new_visits) {
 563   DCHECK(new_url);
 564   DCHECK(!node.url().compare(url.url().spec()));
 565   DCHECK(!node.url().compare(new_url->url().spec()));
 566   DCHECK(visits->size());
 567   CHECK_EQ(node.visits_size(), node.visit_transitions_size());
 568
 569   // If we have an old-format node (before we added the visits and
 570   // visit_transitions arrays to the protobuf) or else the node only contained
 571   // expired visits, so just overwrite it with our local history data.
 572   if (node.visits_size() == 0)
 573     return DIFF_UPDATE_NODE;
 574
 575   // Convert these values only once.
 576   base::string16 node_title(base::UTF8ToUTF16(node.title()));
 577   base::Time node_last_visit = base::Time::FromInternalValue(
 578       node.visits(node.visits_size() - 1));
 579
 580   // This is a bitfield representing what we'll need to update with the output
 581   // value.
 582   MergeResult different = DIFF_NONE;
 583
 584   // Check if the non-incremented values changed.
 585   if ((node_title.compare(url.title()) != 0) ||
 586       (node.hidden() != url.hidden())) {
 587     // Use the values from the most recent visit.
 588     if (node_last_visit >= url.last_visit()) {
 589       new_url->set_title(node_title);
 590       new_url->set_hidden(node.hidden());
 591       different |= DIFF_LOCAL_ROW_CHANGED;
 592     } else {
 593       new_url->set_title(url.title());
 594       new_url->set_hidden(url.hidden());
 595       different |= DIFF_UPDATE_NODE;
 596     }
 597   } else {
 598     // No difference.
 599     new_url->set_title(url.title());
 600     new_url->set_hidden(url.hidden());
 601   }
 602
 603   size_t node_num_visits = node.visits_size();
 604   size_t history_num_visits = visits->size();
 605   size_t node_visit_index = 0;
 606   size_t history_visit_index = 0;
 607   base::Time earliest_history_time = (*visits)[0].visit_time;
 608   // Walk through the two sets of visits and figure out if any new visits were
 609   // added on either side.
 610   while (node_visit_index < node_num_visits ||
 611          history_visit_index < history_num_visits) {
 612     // Time objects are initialized to "earliest possible time".
 613     base::Time node_time, history_time;
 614     if (node_visit_index < node_num_visits)
 615       node_time = base::Time::FromInternalValue(node.visits(node_visit_index));
 616     if (history_visit_index < history_num_visits)
 617       history_time = (*visits)[history_visit_index].visit_time;
 618     if (node_visit_index >= node_num_visits ||
 619         (history_visit_index < history_num_visits &&
 620          node_time > history_time)) {
 621       // We found a visit in the history DB that doesn't exist in the sync DB,
 622       // so mark the node as modified so the caller will update the sync node.
 623       different |= DIFF_UPDATE_NODE;
 624       ++history_visit_index;
 625     } else if (history_visit_index >= history_num_visits ||
 626                node_time < history_time) {
 627       // Found a visit in the sync node that doesn't exist in the history DB, so
 628       // add it to our list of new visits and set the appropriate flag so the
 629       // caller will update the history DB.
 630       // If the node visit is older than any existing visit in the history DB,
 631       // don't re-add it - this keeps us from resurrecting visits that were
 632       // aged out locally.
 633       if (node_time > earliest_history_time) {
 634         different |= DIFF_LOCAL_VISITS_ADDED;
 635         new_visits->push_back(history::VisitInfo(
 636             node_time,
 637             ui::PageTransitionFromInt(
 638                 node.visit_transitions(node_visit_index))));
 639       }
 640       // This visit is added to visits below.
 641       ++node_visit_index;
 642     } else {
 643       // Same (already synced) entry found in both DBs - no need to do anything.
 644       ++node_visit_index;
 645       ++history_visit_index;
 646     }
 647   }
 648
 649   DCHECK(CheckVisitOrdering(*visits));
 650   if (different & DIFF_LOCAL_VISITS_ADDED) {
 651     // Insert new visits into the apropriate place in the visits vector.
 652     history::VisitVector::iterator visit_ix = visits->begin();
 653     for (std::vector<history::VisitInfo>::iterator new_visit =
 654              new_visits->begin();
 655          new_visit != new_visits->end(); ++new_visit) {
 656       while (visit_ix != visits->end() &&
 657              new_visit->first > visit_ix->visit_time) {
 658         ++visit_ix;
 659       }
 660       visit_ix = visits->insert(visit_ix,
 661                                 history::VisitRow(url.id(), new_visit->first,
 662                                                   0, new_visit->second, 0));
 663       ++visit_ix;
 664     }
 665   }
 666   DCHECK(CheckVisitOrdering(*visits));
 667
 668   new_url->set_last_visit(visits->back().visit_time);
 669   return different;
 670 }
 671
 672 // static
 673 void TypedUrlModelAssociator::WriteToSyncNode(
 674     const history::URLRow& url,
 675     const history::VisitVector& visits,
 676     syncer::WriteNode* node) {
 677   sync_pb::TypedUrlSpecifics typed_url;
 678   WriteToTypedUrlSpecifics(url, visits, &typed_url);
 679   node->SetTypedUrlSpecifics(typed_url);
 680 }
 681
 682 void TypedUrlModelAssociator::WriteToTypedUrlSpecifics(
 683     const history::URLRow& url,
 684     const history::VisitVector& visits,
 685     sync_pb::TypedUrlSpecifics* typed_url) {
 686
 687   DCHECK(!url.last_visit().is_null());
 688   DCHECK(!visits.empty());
 689   DCHECK_EQ(url.last_visit().ToInternalValue(),
 690             visits.back().visit_time.ToInternalValue());
 691
 692   typed_url->set_url(url.url().spec());
 693   typed_url->set_title(base::UTF16ToUTF8(url.title()));
 694   typed_url->set_hidden(url.hidden());
 695
 696   DCHECK(CheckVisitOrdering(visits));
 697
 698   bool only_typed = false;
 699   int skip_count = 0;
 700
 701   if (visits.size() > static_cast<size_t>(kMaxTypedUrlVisits)) {
 702     int typed_count = 0;
 703     int total = 0;
 704     // Walk the passed-in visit vector and count the # of typed visits.
 705     for (history::VisitVector::const_iterator visit = visits.begin();
 706          visit != visits.end(); ++visit) {
 707       ui::PageTransition transition =
 708           ui::PageTransitionStripQualifier(visit->transition);
 709       // We ignore reload visits.
 710       if (transition == ui::PAGE_TRANSITION_RELOAD)
 711         continue;
 712       ++total;
 713       if (transition == ui::PAGE_TRANSITION_TYPED)
 714         ++typed_count;
 715     }
 716     // We should have at least one typed visit. This can sometimes happen if
 717     // the history DB has an inaccurate count for some reason (there's been
 718     // bugs in the history code in the past which has left users in the wild
 719     // with incorrect counts - http://crbug.com/84258).
 720     DCHECK(typed_count > 0);
 721
 722     if (typed_count > kMaxTypedUrlVisits) {
 723       only_typed = true;
 724       skip_count = typed_count - kMaxTypedUrlVisits;
 725     } else if (total > kMaxTypedUrlVisits) {
 726       skip_count = total - kMaxTypedUrlVisits;
 727     }
 728   }
 729
 730
 731   for (history::VisitVector::const_iterator visit = visits.begin();
 732        visit != visits.end(); ++visit) {
 733     ui::PageTransition transition =
 734         ui::PageTransitionStripQualifier(visit->transition);
 735     // Skip reload visits.
 736     if (transition == ui::PAGE_TRANSITION_RELOAD)
 737       continue;
 738
 739     // If we only have room for typed visits, then only add typed visits.
 740     if (only_typed && transition != ui::PAGE_TRANSITION_TYPED)
 741       continue;
 742
 743     if (skip_count > 0) {
 744       // We have too many entries to fit, so we need to skip the oldest ones.
 745       // Only skip typed URLs if there are too many typed URLs to fit.
 746       if (only_typed || transition != ui::PAGE_TRANSITION_TYPED) {
 747         --skip_count;
 748         continue;
 749       }
 750     }
 751     typed_url->add_visits(visit->visit_time.ToInternalValue());
 752     typed_url->add_visit_transitions(visit->transition);
 753   }
 754   DCHECK_EQ(skip_count, 0);
 755
 756   if (typed_url->visits_size() == 0) {
 757     // If we get here, it's because we don't actually have any TYPED visits
 758     // even though the visit's typed_count > 0 (corrupted typed_count). So
 759     // let's go ahead and add a RELOAD visit at the most recent visit since
 760     // it's not legal to have an empty visit array (yet another workaround
 761     // for http://crbug.com/84258).
 762     typed_url->add_visits(url.last_visit().ToInternalValue());
 763     typed_url->add_visit_transitions(ui::PAGE_TRANSITION_RELOAD);
 764   }
 765   CHECK_GT(typed_url->visits_size(), 0);
 766   CHECK_LE(typed_url->visits_size(), kMaxTypedUrlVisits);
 767   CHECK_EQ(typed_url->visits_size(), typed_url->visit_transitions_size());
 768 }
 769
 770 // static
 771 void TypedUrlModelAssociator::DiffVisits(
 772     const history::VisitVector& old_visits,
 773     const sync_pb::TypedUrlSpecifics& new_url,
 774     std::vector<history::VisitInfo>* new_visits,
 775     history::VisitVector* removed_visits) {
 776   DCHECK(new_visits);
 777   size_t old_visit_count = old_visits.size();
 778   size_t new_visit_count = new_url.visits_size();
 779   size_t old_index = 0;
 780   size_t new_index = 0;
 781   while (old_index < old_visit_count && new_index < new_visit_count) {
 782     base::Time new_visit_time =
 783         base::Time::FromInternalValue(new_url.visits(new_index));
 784     if (old_visits[old_index].visit_time < new_visit_time) {
 785       if (new_index > 0 && removed_visits) {
 786         // If there are visits missing from the start of the node, that
 787         // means that they were probably clipped off due to our code that
 788         // limits the size of the sync nodes - don't delete them from our
 789         // local history.
 790         removed_visits->push_back(old_visits[old_index]);
 791       }
 792       ++old_index;
 793     } else if (old_visits[old_index].visit_time > new_visit_time) {
 794       new_visits->push_back(history::VisitInfo(
 795           new_visit_time,
 796           ui::PageTransitionFromInt(
 797               new_url.visit_transitions(new_index))));
 798       ++new_index;
 799     } else {
 800       ++old_index;
 801       ++new_index;
 802     }
 803   }
 804
 805   if (removed_visits) {
 806     for ( ; old_index < old_visit_count; ++old_index) {
 807       removed_visits->push_back(old_visits[old_index]);
 808     }
 809   }
 810
 811   for ( ; new_index < new_visit_count; ++new_index) {
 812     new_visits->push_back(history::VisitInfo(
 813         base::Time::FromInternalValue(new_url.visits(new_index)),
 814         ui::PageTransitionFromInt(new_url.visit_transitions(new_index))));
 815   }
 816 }
 817
 818
 819 // static
 820 void TypedUrlModelAssociator::UpdateURLRowFromTypedUrlSpecifics(
 821     const sync_pb::TypedUrlSpecifics& typed_url, history::URLRow* new_url) {
 822   DCHECK_GT(typed_url.visits_size(), 0);
 823   CHECK_EQ(typed_url.visit_transitions_size(), typed_url.visits_size());
 824   new_url->set_title(base::UTF8ToUTF16(typed_url.title()));
 825   new_url->set_hidden(typed_url.hidden());
 826   // Only provide the initial value for the last_visit field - after that, let
 827   // the history code update the last_visit field on its own.
 828   if (new_url->last_visit().is_null()) {
 829     new_url->set_last_visit(base::Time::FromInternalValue(
 830         typed_url.visits(typed_url.visits_size() - 1)));
 831   }
 832 }
 833
 834 bool TypedUrlModelAssociator::CryptoReadyIfNecessary() {
 835   // We only access the cryptographer while holding a transaction.
 836   syncer::ReadTransaction trans(FROM_HERE, sync_service_->GetUserShare());
 837   const syncer::ModelTypeSet encrypted_types = trans.GetEncryptedTypes();
 838   return !encrypted_types.Has(syncer::TYPED_URLS) ||
 839          sync_service_->IsCryptographerReady(&trans);
 840 }
 841
 842 }  // namespace browser_sync