#include "chrome/browser/safe_browsing/prefix_set.h"
#include <algorithm>
-#include <math.h>
#include "base/file_util.h"
#include "base/files/scoped_file.h"
#include "base/logging.h"
#include "base/md5.h"
#include "base/metrics/histogram.h"
+#include "base/metrics/sparse_histogram.h"
namespace {
// md5 -qs chrome/browser/safe_browsing/prefix_set.cc | colrm 9
static uint32 kMagic = 0x864088dd;
-// TODO(shess): Update v2 history info once landed.
-
// Version history:
// Version 1: b6cb7cfe/r74487 by shess@chromium.org on 2011-02-10
-// version 2: ????????/r????? by shess@chromium.org on 2014-02-24
+// Version 2: 2b59b0a6/r253924 by shess@chromium.org on 2014-02-27
+// Version 3: dd07faf5/r268145 by shess@chromium.org on 2014-05-05
// Version 2 layout is identical to version 1. The sort order of |index_|
// changed from |int32| to |uint32| to match the change of |SBPrefix|.
-static uint32 kVersion = 0x2;
+// Version 3 adds storage for full hashes.
+static uint32 kVersion = 3;
+static uint32 kDeprecatedVersion = 2; // And lower.
typedef struct {
uint32 magic;
uint32 version;
uint32 index_size;
uint32 deltas_size;
+ uint32 full_hashes_size;
} FileHeader;
// Common std::vector<> implementations add capacity by multiplying from the
PrefixSet::PrefixSet() {
}
-PrefixSet::PrefixSet(IndexVector* index, std::vector<uint16>* deltas) {
- DCHECK(index && deltas);
+PrefixSet::PrefixSet(IndexVector* index,
+ std::vector<uint16>* deltas,
+ std::vector<SBFullHash>* full_hashes) {
+ DCHECK(index && deltas && full_hashes);
index_.swap(*index);
deltas_.swap(*deltas);
+ full_hashes_.swap(*full_hashes);
}
PrefixSet::~PrefixSet() {}
-bool PrefixSet::Exists(SBPrefix prefix) const {
+bool PrefixSet::PrefixExists(SBPrefix prefix) const {
if (index_.empty())
return false;
return current == prefix;
}
+bool PrefixSet::Exists(const SBFullHash& hash) const {
+ if (std::binary_search(full_hashes_.begin(), full_hashes_.end(),
+ hash, SBFullHashLess)) {
+ return true;
+ }
+ return PrefixExists(hash.prefix);
+}
+
void PrefixSet::GetPrefixes(std::vector<SBPrefix>* prefixes) const {
prefixes->reserve(index_.size() + deltas_.size());
if (read != 1)
return scoped_ptr<PrefixSet>();
- // TODO(shess): Version 1 and 2 use the same file structure, with version 1
- // data using a signed sort. For M-35, the data is re-sorted before return.
- // After M-35, just drop v1 support. <http://crbug.com/346405>
- if (header.magic != kMagic ||
- (header.version != kVersion && header.version != 1)) {
+ // The file looks valid, start building the digest.
+ base::MD5Context context;
+ base::MD5Init(&context);
+ base::MD5Update(&context, base::StringPiece(reinterpret_cast<char*>(&header),
+ sizeof(header)));
+
+ if (header.magic != kMagic)
+ return scoped_ptr<PrefixSet>();
+
+ // Track version read to inform removal of support for older versions.
+ UMA_HISTOGRAM_SPARSE_SLOWLY("SB2.PrefixSetVersionRead", header.version);
+
+ if (header.version <= kDeprecatedVersion) {
+ return scoped_ptr<PrefixSet>();
+ } else if (header.version != kVersion) {
return scoped_ptr<PrefixSet>();
}
std::vector<uint16> deltas;
const size_t deltas_bytes = sizeof(deltas[0]) * header.deltas_size;
+ std::vector<SBFullHash> full_hashes;
+ const size_t full_hashes_bytes =
+ sizeof(full_hashes[0]) * header.full_hashes_size;
+
// Check for bogus sizes before allocating any space.
- const size_t expected_bytes =
- sizeof(header) + index_bytes + deltas_bytes + sizeof(MD5Digest);
+ const size_t expected_bytes = sizeof(header) +
+ index_bytes + deltas_bytes + full_hashes_bytes + sizeof(MD5Digest);
if (static_cast<int64>(expected_bytes) != size_64)
return scoped_ptr<PrefixSet>();
- // The file looks valid, start building the digest.
- base::MD5Context context;
- base::MD5Init(&context);
- base::MD5Update(&context, base::StringPiece(reinterpret_cast<char*>(&header),
- sizeof(header)));
-
// Read the index vector. Herb Sutter indicates that vectors are
// guaranteed to be contiuguous, so reading to where element 0 lives
// is valid.
deltas_bytes));
}
+ // Read vector of full hashes.
+ if (header.full_hashes_size) {
+ full_hashes.resize(header.full_hashes_size);
+ read = fread(&(full_hashes[0]), sizeof(full_hashes[0]), full_hashes.size(),
+ file.get());
+ if (read != full_hashes.size())
+ return scoped_ptr<PrefixSet>();
+ base::MD5Update(&context,
+ base::StringPiece(
+ reinterpret_cast<char*>(&(full_hashes[0])),
+ full_hashes_bytes));
+ }
+
base::MD5Digest calculated_digest;
base::MD5Final(&calculated_digest, &context);
if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest)))
return scoped_ptr<PrefixSet>();
- // For version 1, fetch the prefixes and re-sort.
- if (header.version == 1) {
- std::vector<SBPrefix> prefixes;
- PrefixSet(&index, &deltas).GetPrefixes(&prefixes);
- std::sort(prefixes.begin(), prefixes.end());
- return PrefixSetBuilder(prefixes).GetPrefixSet().Pass();
- }
-
- // Steals contents of |index| and |deltas| via swap().
- return scoped_ptr<PrefixSet>(new PrefixSet(&index, &deltas));
+ // Steals vector contents using swap().
+ return scoped_ptr<PrefixSet>(new PrefixSet(&index, &deltas, &full_hashes));
}
bool PrefixSet::WriteFile(const base::FilePath& filter_name) const {
header.version = kVersion;
header.index_size = static_cast<uint32>(index_.size());
header.deltas_size = static_cast<uint32>(deltas_.size());
+ header.full_hashes_size = static_cast<uint32>(full_hashes_.size());
// Sanity check that the 32-bit values never mess things up.
if (static_cast<size_t>(header.index_size) != index_.size() ||
- static_cast<size_t>(header.deltas_size) != deltas_.size()) {
+ static_cast<size_t>(header.deltas_size) != deltas_.size() ||
+ static_cast<size_t>(header.full_hashes_size) != full_hashes_.size()) {
NOTREACHED();
return false;
}
deltas_bytes));
}
+ if (full_hashes_.size()) {
+ const size_t elt_size = sizeof(full_hashes_[0]);
+ const size_t elts = full_hashes_.size();
+ const size_t full_hashes_bytes = elt_size * elts;
+ written = fwrite(&(full_hashes_[0]), elt_size, elts, file.get());
+ if (written != elts)
+ return false;
+ base::MD5Update(&context,
+ base::StringPiece(
+ reinterpret_cast<const char*>(&(full_hashes_[0])),
+ full_hashes_bytes));
+ }
+
base::MD5Digest digest;
base::MD5Final(&digest, &context);
written = fwrite(&digest, sizeof(digest), 1, file.get());
PrefixSetBuilder::~PrefixSetBuilder() {
}
-scoped_ptr<PrefixSet> PrefixSetBuilder::GetPrefixSet() {
+scoped_ptr<PrefixSet> PrefixSetBuilder::GetPrefixSet(
+ const std::vector<SBFullHash>& hashes) {
DCHECK(prefix_set_.get());
// Flush runs until buffered data is gone.
// they're almost free.
PrefixSet::IndexVector(prefix_set_->index_).swap(prefix_set_->index_);
+ prefix_set_->full_hashes_ = hashes;
+ std::sort(prefix_set_->full_hashes_.begin(), prefix_set_->full_hashes_.end(),
+ SBFullHashLess);
+
return prefix_set_.Pass();
}
+scoped_ptr<PrefixSet> PrefixSetBuilder::GetPrefixSetNoHashes() {
+ return GetPrefixSet(std::vector<SBFullHash>()).Pass();
+}
void PrefixSetBuilder::EmitRun() {
DCHECK(prefix_set_.get());