1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/safe_browsing/safe_browsing_store.h"
9 #include "base/logging.h"
13 // Return |true| if the range is sorted by the given comparator.
14 template <typename CTI, typename LESS>
15 bool sorted(CTI beg, CTI end, LESS less) {
16 while ((end - beg) > 2) {
24 // Find items matching between |subs| and |adds|, and remove them,
25 // recording the item from |adds| in |adds_removed|. To minimize
26 // copies, the inputs are processing in parallel, so |subs| and |adds|
27 // should be compatibly ordered (either by SBAddPrefixLess or
28 // SBAddPrefixHashLess).
30 // |predAddSub| provides add < sub, |predSubAdd| provides sub < add,
31 // for the tightest compare appropriate (see calls in SBProcessSubs).
32 template <typename SubsT, typename AddsT,
33 typename PredAddSubT, typename PredSubAddT>
34 void KnockoutSubs(SubsT* subs, AddsT* adds,
35 PredAddSubT predAddSub, PredSubAddT predSubAdd,
36 AddsT* adds_removed) {
37 // Keep a pair of output iterators for writing kept items. Due to
38 // deletions, these may lag the main iterators. Using erase() on
39 // individual items would result in O(N^2) copies. Using std::list
40 // would work around that, at double or triple the memory cost.
41 typename AddsT::iterator add_out = adds->begin();
42 typename SubsT::iterator sub_out = subs->begin();
44 // Current location in containers.
45 // TODO(shess): I want these to be const_iterator, but then
46 // std::copy() gets confused. Could snag a const_iterator add_end,
47 // or write an inline std::copy(), but it seems like I'm doing
49 typename AddsT::iterator add_iter = adds->begin();
50 typename SubsT::iterator sub_iter = subs->begin();
52 while (add_iter != adds->end() && sub_iter != subs->end()) {
53 // If |*sub_iter| < |*add_iter|, retain the sub.
54 if (predSubAdd(*sub_iter, *add_iter)) {
59 // If |*add_iter| < |*sub_iter|, retain the add.
60 } else if (predAddSub(*add_iter, *sub_iter)) {
65 // Record equal items and drop them.
67 adds_removed->push_back(*add_iter);
73 // Erase any leftover gap.
74 adds->erase(add_out, add_iter);
75 subs->erase(sub_out, sub_iter);
78 // Remove items in |removes| from |full_hashes|. |full_hashes| and
79 // |removes| should be ordered by SBAddPrefix component.
80 template <typename HashesT, typename AddsT>
81 void RemoveMatchingPrefixes(const AddsT& removes, HashesT* full_hashes) {
82 // This is basically an inline of std::set_difference().
83 // Unfortunately, that algorithm requires that the two iterator
84 // pairs use the same value types.
86 // Where to store kept items.
87 typename HashesT::iterator out = full_hashes->begin();
89 typename HashesT::iterator hash_iter = full_hashes->begin();
90 typename AddsT::const_iterator remove_iter = removes.begin();
92 while (hash_iter != full_hashes->end() && remove_iter != removes.end()) {
93 // Keep items less than |*remove_iter|.
94 if (SBAddPrefixLess(*hash_iter, *remove_iter)) {
99 // No hit for |*remove_iter|, bump it forward.
100 } else if (SBAddPrefixLess(*remove_iter, *hash_iter)) {
103 // Drop equal items, there may be multiple hits.
107 } while (hash_iter != full_hashes->end() &&
108 !SBAddPrefixLess(*remove_iter, *hash_iter));
113 // Erase any leftover gap.
114 full_hashes->erase(out, hash_iter);
117 // Remove deleted items (|chunk_id| in |del_set|) from the container.
118 template <typename ItemsT>
119 void RemoveDeleted(ItemsT* items, const base::hash_set<int32>& del_set) {
122 // Move items from |iter| to |end_iter|, skipping items in |del_set|.
123 typename ItemsT::iterator end_iter = items->begin();
124 for (typename ItemsT::iterator iter = end_iter;
125 iter != items->end(); ++iter) {
126 if (del_set.count(iter->chunk_id) == 0) {
131 items->erase(end_iter, items->end());
136 void SBProcessSubs(SBAddPrefixes* add_prefixes,
137 SBSubPrefixes* sub_prefixes,
138 std::vector<SBAddFullHash>* add_full_hashes,
139 std::vector<SBSubFullHash>* sub_full_hashes,
140 const base::hash_set<int32>& add_chunks_deleted,
141 const base::hash_set<int32>& sub_chunks_deleted) {
142 // It is possible to structure templates and template
143 // specializations such that the following calls work without having
144 // to qualify things. It becomes very arbitrary, though, and less
145 // clear how things are working.
147 // Make sure things are sorted appropriately.
148 DCHECK(sorted(add_prefixes->begin(), add_prefixes->end(),
149 SBAddPrefixLess<SBAddPrefix,SBAddPrefix>));
150 DCHECK(sorted(sub_prefixes->begin(), sub_prefixes->end(),
151 SBAddPrefixLess<SBSubPrefix,SBSubPrefix>));
152 DCHECK(sorted(add_full_hashes->begin(), add_full_hashes->end(),
153 SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>));
154 DCHECK(sorted(sub_full_hashes->begin(), sub_full_hashes->end(),
155 SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>));
157 // Factor out the prefix subs.
158 SBAddPrefixes removed_adds;
159 KnockoutSubs(sub_prefixes, add_prefixes,
160 SBAddPrefixLess<SBAddPrefix,SBSubPrefix>,
161 SBAddPrefixLess<SBSubPrefix,SBAddPrefix>,
164 // Remove the full-hashes corrosponding to the adds which
165 // KnockoutSubs() removed. Processing these w/in KnockoutSubs()
166 // would make the code more complicated, and they are very small
167 // relative to the prefix lists so the gain would be modest.
168 RemoveMatchingPrefixes(removed_adds, add_full_hashes);
169 RemoveMatchingPrefixes(removed_adds, sub_full_hashes);
171 // Factor out the full-hash subs.
172 std::vector<SBAddFullHash> removed_full_adds;
173 KnockoutSubs(sub_full_hashes, add_full_hashes,
174 SBAddPrefixHashLess<SBAddFullHash,SBSubFullHash>,
175 SBAddPrefixHashLess<SBSubFullHash,SBAddFullHash>,
178 // Remove items from the deleted chunks. This is done after other
179 // processing to allow subs to knock out adds (and be removed) even
180 // if the add's chunk is deleted.
181 RemoveDeleted(add_prefixes, add_chunks_deleted);
182 RemoveDeleted(sub_prefixes, sub_chunks_deleted);
183 RemoveDeleted(add_full_hashes, add_chunks_deleted);
184 RemoveDeleted(sub_full_hashes, sub_chunks_deleted);