src/chrome/browser/safe_browsing/safe_browsing_store.cc

   1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "chrome/browser/safe_browsing/safe_browsing_store.h"
   6
   7 #include <algorithm>
   8
   9 #include "base/logging.h"
  10
  11 namespace {
  12
  13 // Return |true| if the range is sorted by the given comparator.
  14 template <typename CTI, typename LESS>
  15 bool sorted(CTI beg, CTI end, LESS less) {
  16   while ((end - beg) > 2) {
  17     CTI n = beg++;
  18     if (less(*beg, *n))
  19       return false;
  20   }
  21   return true;
  22 }
  23
  24 // Find items matching between |subs| and |adds|, and remove them,
  25 // recording the item from |adds| in |adds_removed|.  To minimize
  26 // copies, the inputs are processing in parallel, so |subs| and |adds|
  27 // should be compatibly ordered (either by SBAddPrefixLess or
  28 // SBAddPrefixHashLess).
  29 //
  30 // |predAddSub| provides add < sub, |predSubAdd| provides sub < add,
  31 // for the tightest compare appropriate (see calls in SBProcessSubs).
  32 template <typename SubsT, typename AddsT,
  33           typename PredAddSubT, typename PredSubAddT>
  34 void KnockoutSubs(SubsT* subs, AddsT* adds,
  35                   PredAddSubT predAddSub, PredSubAddT predSubAdd,
  36                   AddsT* adds_removed) {
  37   // Keep a pair of output iterators for writing kept items.  Due to
  38   // deletions, these may lag the main iterators.  Using erase() on
  39   // individual items would result in O(N^2) copies.  Using std::list
  40   // would work around that, at double or triple the memory cost.
  41   typename AddsT::iterator add_out = adds->begin();
  42   typename SubsT::iterator sub_out = subs->begin();
  43
  44   // Current location in containers.
  45   // TODO(shess): I want these to be const_iterator, but then
  46   // std::copy() gets confused.  Could snag a const_iterator add_end,
  47   // or write an inline std::copy(), but it seems like I'm doing
  48   // something wrong.
  49   typename AddsT::iterator add_iter = adds->begin();
  50   typename SubsT::iterator sub_iter = subs->begin();
  51
  52   while (add_iter != adds->end() && sub_iter != subs->end()) {
  53     // If |*sub_iter| < |*add_iter|, retain the sub.
  54     if (predSubAdd(*sub_iter, *add_iter)) {
  55       *sub_out = *sub_iter;
  56       ++sub_out;
  57       ++sub_iter;
  58
  59       // If |*add_iter| < |*sub_iter|, retain the add.
  60     } else if (predAddSub(*add_iter, *sub_iter)) {
  61       *add_out = *add_iter;
  62       ++add_out;
  63       ++add_iter;
  64
  65       // Record equal items and drop them.
  66     } else {
  67       adds_removed->push_back(*add_iter);
  68       ++add_iter;
  69       ++sub_iter;
  70     }
  71   }
  72
  73   // Erase any leftover gap.
  74   adds->erase(add_out, add_iter);
  75   subs->erase(sub_out, sub_iter);
  76 }
  77
  78 // Remove items in |removes| from |full_hashes|.  |full_hashes| and
  79 // |removes| should be ordered by SBAddPrefix component.
  80 template <typename HashesT, typename AddsT>
  81 void RemoveMatchingPrefixes(const AddsT& removes, HashesT* full_hashes) {
  82   // This is basically an inline of std::set_difference().
  83   // Unfortunately, that algorithm requires that the two iterator
  84   // pairs use the same value types.
  85
  86   // Where to store kept items.
  87   typename HashesT::iterator out = full_hashes->begin();
  88
  89   typename HashesT::iterator hash_iter = full_hashes->begin();
  90   typename AddsT::const_iterator remove_iter = removes.begin();
  91
  92   while (hash_iter != full_hashes->end() && remove_iter != removes.end()) {
  93     // Keep items less than |*remove_iter|.
  94     if (SBAddPrefixLess(*hash_iter, *remove_iter)) {
  95       *out = *hash_iter;
  96       ++out;
  97       ++hash_iter;
  98
  99       // No hit for |*remove_iter|, bump it forward.
 100     } else if (SBAddPrefixLess(*remove_iter, *hash_iter)) {
 101       ++remove_iter;
 102
 103       // Drop equal items, there may be multiple hits.
 104     } else {
 105       do {
 106         ++hash_iter;
 107       } while (hash_iter != full_hashes->end() &&
 108                !SBAddPrefixLess(*remove_iter, *hash_iter));
 109       ++remove_iter;
 110     }
 111   }
 112
 113   // Erase any leftover gap.
 114   full_hashes->erase(out, hash_iter);
 115 }
 116
 117 // Remove deleted items (|chunk_id| in |del_set|) from the container.
 118 template <typename ItemsT>
 119 void RemoveDeleted(ItemsT* items, const base::hash_set<int32>& del_set) {
 120   DCHECK(items);
 121
 122   // Move items from |iter| to |end_iter|, skipping items in |del_set|.
 123   typename ItemsT::iterator end_iter = items->begin();
 124   for (typename ItemsT::iterator iter = end_iter;
 125        iter != items->end(); ++iter) {
 126     if (del_set.count(iter->chunk_id) == 0) {
 127       *end_iter = *iter;
 128       ++end_iter;
 129     }
 130   }
 131   items->erase(end_iter, items->end());
 132 }
 133
 134 }  // namespace
 135
 136 void SBProcessSubs(SBAddPrefixes* add_prefixes,
 137                    SBSubPrefixes* sub_prefixes,
 138                    std::vector<SBAddFullHash>* add_full_hashes,
 139                    std::vector<SBSubFullHash>* sub_full_hashes,
 140                    const base::hash_set<int32>& add_chunks_deleted,
 141                    const base::hash_set<int32>& sub_chunks_deleted) {
 142   // It is possible to structure templates and template
 143   // specializations such that the following calls work without having
 144   // to qualify things.  It becomes very arbitrary, though, and less
 145   // clear how things are working.
 146
 147   // Make sure things are sorted appropriately.
 148   DCHECK(sorted(add_prefixes->begin(), add_prefixes->end(),
 149                 SBAddPrefixLess<SBAddPrefix,SBAddPrefix>));
 150   DCHECK(sorted(sub_prefixes->begin(), sub_prefixes->end(),
 151                 SBAddPrefixLess<SBSubPrefix,SBSubPrefix>));
 152   DCHECK(sorted(add_full_hashes->begin(), add_full_hashes->end(),
 153                 SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>));
 154   DCHECK(sorted(sub_full_hashes->begin(), sub_full_hashes->end(),
 155                 SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>));
 156
 157   // Factor out the prefix subs.
 158   SBAddPrefixes removed_adds;
 159   KnockoutSubs(sub_prefixes, add_prefixes,
 160                SBAddPrefixLess<SBAddPrefix,SBSubPrefix>,
 161                SBAddPrefixLess<SBSubPrefix,SBAddPrefix>,
 162                &removed_adds);
 163
 164   // Remove the full-hashes corrosponding to the adds which
 165   // KnockoutSubs() removed.  Processing these w/in KnockoutSubs()
 166   // would make the code more complicated, and they are very small
 167   // relative to the prefix lists so the gain would be modest.
 168   RemoveMatchingPrefixes(removed_adds, add_full_hashes);
 169   RemoveMatchingPrefixes(removed_adds, sub_full_hashes);
 170
 171   // Factor out the full-hash subs.
 172   std::vector<SBAddFullHash> removed_full_adds;
 173   KnockoutSubs(sub_full_hashes, add_full_hashes,
 174                SBAddPrefixHashLess<SBAddFullHash,SBSubFullHash>,
 175                SBAddPrefixHashLess<SBSubFullHash,SBAddFullHash>,
 176                &removed_full_adds);
 177
 178   // Remove items from the deleted chunks.  This is done after other
 179   // processing to allow subs to knock out adds (and be removed) even
 180   // if the add's chunk is deleted.
 181   RemoveDeleted(add_prefixes, add_chunks_deleted);
 182   RemoveDeleted(sub_prefixes, sub_chunks_deleted);
 183   RemoveDeleted(add_full_hashes, add_chunks_deleted);
 184   RemoveDeleted(sub_full_hashes, sub_chunks_deleted);
 185 }