src/chrome/browser/safe_browsing/safe_browsing_store.cc

   1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "chrome/browser/safe_browsing/safe_browsing_store.h"
   6
   7 #include <algorithm>
   8
   9 #include "base/metrics/histogram.h"
  10
  11 namespace {
  12
  13 // Find items matching between |subs| and |adds|, and remove them,
  14 // recording the item from |adds| in |adds_removed|.  To minimize
  15 // copies, the inputs are processing in parallel, so |subs| and |adds|
  16 // should be compatibly ordered (either by SBAddPrefixLess or
  17 // SBAddPrefixHashLess).
  18 //
  19 // |predAddSub| provides add < sub, |predSubAdd| provides sub < add,
  20 // for the tightest compare appropriate (see calls in SBProcessSubs).
  21 template <typename SubsT, typename AddsT,
  22           typename PredAddSubT, typename PredSubAddT>
  23 void KnockoutSubs(SubsT* subs, AddsT* adds,
  24                   PredAddSubT predAddSub, PredSubAddT predSubAdd,
  25                   AddsT* adds_removed) {
  26   // Keep a pair of output iterators for writing kept items.  Due to
  27   // deletions, these may lag the main iterators.  Using erase() on
  28   // individual items would result in O(N^2) copies.  Using std::list
  29   // would work around that, at double or triple the memory cost.
  30   typename AddsT::iterator add_out = adds->begin();
  31   typename SubsT::iterator sub_out = subs->begin();
  32
  33   // Current location in containers.
  34   // TODO(shess): I want these to be const_iterator, but then
  35   // std::copy() gets confused.  Could snag a const_iterator add_end,
  36   // or write an inline std::copy(), but it seems like I'm doing
  37   // something wrong.
  38   typename AddsT::iterator add_iter = adds->begin();
  39   typename SubsT::iterator sub_iter = subs->begin();
  40
  41   while (add_iter != adds->end() && sub_iter != subs->end()) {
  42     // If |*sub_iter| < |*add_iter|, retain the sub.
  43     if (predSubAdd(*sub_iter, *add_iter)) {
  44       *sub_out = *sub_iter;
  45       ++sub_out;
  46       ++sub_iter;
  47
  48       // If |*add_iter| < |*sub_iter|, retain the add.
  49     } else if (predAddSub(*add_iter, *sub_iter)) {
  50       *add_out = *add_iter;
  51       ++add_out;
  52       ++add_iter;
  53
  54       // Record equal items and drop them.
  55     } else {
  56       adds_removed->push_back(*add_iter);
  57       ++add_iter;
  58       ++sub_iter;
  59     }
  60   }
  61
  62   // Erase any leftover gap.
  63   adds->erase(add_out, add_iter);
  64   subs->erase(sub_out, sub_iter);
  65 }
  66
  67 // Remove items in |removes| from |full_hashes|.  |full_hashes| and
  68 // |removes| should be ordered by SBAddPrefix component.
  69 template <typename HashesT, typename AddsT>
  70 void RemoveMatchingPrefixes(const AddsT& removes, HashesT* full_hashes) {
  71   // This is basically an inline of std::set_difference().
  72   // Unfortunately, that algorithm requires that the two iterator
  73   // pairs use the same value types.
  74
  75   // Where to store kept items.
  76   typename HashesT::iterator out = full_hashes->begin();
  77
  78   typename HashesT::iterator hash_iter = full_hashes->begin();
  79   typename AddsT::const_iterator remove_iter = removes.begin();
  80
  81   while (hash_iter != full_hashes->end() && remove_iter != removes.end()) {
  82     // Keep items less than |*remove_iter|.
  83     if (SBAddPrefixLess(*hash_iter, *remove_iter)) {
  84       *out = *hash_iter;
  85       ++out;
  86       ++hash_iter;
  87
  88       // No hit for |*remove_iter|, bump it forward.
  89     } else if (SBAddPrefixLess(*remove_iter, *hash_iter)) {
  90       ++remove_iter;
  91
  92       // Drop equal items, there may be multiple hits.
  93     } else {
  94       do {
  95         ++hash_iter;
  96       } while (hash_iter != full_hashes->end() &&
  97                !SBAddPrefixLess(*remove_iter, *hash_iter));
  98       ++remove_iter;
  99     }
 100   }
 101
 102   // Erase any leftover gap.
 103   full_hashes->erase(out, hash_iter);
 104 }
 105
 106 // Remove deleted items (|chunk_id| in |del_set|) from the container.
 107 template <typename ItemsT>
 108 void RemoveDeleted(ItemsT* items, const base::hash_set<int32>& del_set) {
 109   DCHECK(items);
 110
 111   // Move items from |iter| to |end_iter|, skipping items in |del_set|.
 112   typename ItemsT::iterator end_iter = items->begin();
 113   for (typename ItemsT::iterator iter = end_iter;
 114        iter != items->end(); ++iter) {
 115     if (del_set.count(iter->chunk_id) == 0) {
 116       *end_iter = *iter;
 117       ++end_iter;
 118     }
 119   }
 120   items->erase(end_iter, items->end());
 121 }
 122
 123 enum MissTypes {
 124   MISS_TYPE_ALL,
 125   MISS_TYPE_FALSE,
 126
 127   // Always at the end.
 128   MISS_TYPE_MAX
 129 };
 130
 131 }  // namespace
 132
 133 void SBCheckPrefixMisses(const SBAddPrefixes& add_prefixes,
 134                          const std::set<SBPrefix>& prefix_misses) {
 135   if (prefix_misses.empty())
 136     return;
 137
 138   // Record a hit for all prefixes which missed when sent to the
 139   // server.
 140   for (size_t i = 0; i < prefix_misses.size(); ++i) {
 141     UMA_HISTOGRAM_ENUMERATION("SB2.BloomFilterFalsePositives",
 142                               MISS_TYPE_ALL, MISS_TYPE_MAX);
 143   }
 144
 145   // Collect the misses which are not present in |add_prefixes|.
 146   // Since |add_prefixes| can contain multiple copies of the same
 147   // prefix, it is not sufficient to count the number of elements
 148   // present in both collections.
 149   std::set<SBPrefix> false_misses(prefix_misses.begin(), prefix_misses.end());
 150   for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
 151        iter != add_prefixes.end(); ++iter) {
 152     // |erase()| on an absent element should cost like |find()|.
 153     false_misses.erase(iter->prefix);
 154   }
 155
 156   // Record a hit for prefixes which we shouldn't have sent in the
 157   // first place.
 158   for (size_t i = 0; i < false_misses.size(); ++i) {
 159     UMA_HISTOGRAM_ENUMERATION("SB2.BloomFilterFalsePositives",
 160                               MISS_TYPE_FALSE, MISS_TYPE_MAX);
 161   }
 162
 163   // Divide |MISS_TYPE_FALSE| by |MISS_TYPE_ALL| to get the
 164   // bloom-filter false-positive rate.
 165 }
 166
 167 void SBProcessSubs(SBAddPrefixes* add_prefixes,
 168                    SBSubPrefixes* sub_prefixes,
 169                    std::vector<SBAddFullHash>* add_full_hashes,
 170                    std::vector<SBSubFullHash>* sub_full_hashes,
 171                    const base::hash_set<int32>& add_chunks_deleted,
 172                    const base::hash_set<int32>& sub_chunks_deleted) {
 173   // It is possible to structure templates and template
 174   // specializations such that the following calls work without having
 175   // to qualify things.  It becomes very arbitrary, though, and less
 176   // clear how things are working.
 177
 178   // Sort the inputs by the SBAddPrefix bits.
 179   std::sort(add_prefixes->begin(), add_prefixes->end(),
 180             SBAddPrefixLess<SBAddPrefix,SBAddPrefix>);
 181   std::sort(sub_prefixes->begin(), sub_prefixes->end(),
 182             SBAddPrefixLess<SBSubPrefix,SBSubPrefix>);
 183   std::sort(add_full_hashes->begin(), add_full_hashes->end(),
 184             SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>);
 185   std::sort(sub_full_hashes->begin(), sub_full_hashes->end(),
 186             SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>);
 187
 188   // Factor out the prefix subs.
 189   SBAddPrefixes removed_adds;
 190   KnockoutSubs(sub_prefixes, add_prefixes,
 191                SBAddPrefixLess<SBAddPrefix,SBSubPrefix>,
 192                SBAddPrefixLess<SBSubPrefix,SBAddPrefix>,
 193                &removed_adds);
 194
 195   // Remove the full-hashes corrosponding to the adds which
 196   // KnockoutSubs() removed.  Processing these w/in KnockoutSubs()
 197   // would make the code more complicated, and they are very small
 198   // relative to the prefix lists so the gain would be modest.
 199   RemoveMatchingPrefixes(removed_adds, add_full_hashes);
 200   RemoveMatchingPrefixes(removed_adds, sub_full_hashes);
 201
 202   // Factor out the full-hash subs.
 203   std::vector<SBAddFullHash> removed_full_adds;
 204   KnockoutSubs(sub_full_hashes, add_full_hashes,
 205                SBAddPrefixHashLess<SBAddFullHash,SBSubFullHash>,
 206                SBAddPrefixHashLess<SBSubFullHash,SBAddFullHash>,
 207                &removed_full_adds);
 208
 209   // Remove items from the deleted chunks.  This is done after other
 210   // processing to allow subs to knock out adds (and be removed) even
 211   // if the add's chunk is deleted.
 212   RemoveDeleted(add_prefixes, add_chunks_deleted);
 213   RemoveDeleted(sub_prefixes, sub_chunks_deleted);
 214   RemoveDeleted(add_full_hashes, add_chunks_deleted);
 215   RemoveDeleted(sub_full_hashes, sub_chunks_deleted);
 216 }