1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
8 #include "base/metrics/histogram.h"
12 // NOTE(shess): kFileMagic should not be a byte-wise palindrome, so
13 // that byte-order changes force corruption.
14 const int32 kFileMagic = 0x600D71FE;
15 const int32 kFileVersion = 7; // SQLite storage was 6...
17 // Header at the front of the main database file.
20 uint32 add_chunk_count, sub_chunk_count;
21 uint32 add_prefix_count, sub_prefix_count;
22 uint32 add_hash_count, sub_hash_count;
25 // Header for each chunk in the chunk-accumulation file.
27 uint32 add_prefix_count, sub_prefix_count;
28 uint32 add_hash_count, sub_hash_count;
31 // Rewind the file. Using fseek(2) because rewind(3) errors are
33 bool FileRewind(FILE* fp) {
34 int rv = fseek(fp, 0, SEEK_SET);
39 // Move file read pointer forward by |bytes| relative to current position.
40 bool FileSkip(size_t bytes, FILE* fp) {
41 // Although fseek takes negative values, for this case, we only want
43 DCHECK(static_cast<long>(bytes) >= 0);
44 if (static_cast<long>(bytes) < 0)
46 int rv = fseek(fp, static_cast<long>(bytes), SEEK_CUR);
51 // Read from |fp| into |item|, and fold the input data into the
52 // checksum in |context|, if non-NULL. Return true on success.
54 bool ReadItem(T* item, FILE* fp, base::MD5Context* context) {
55 const size_t ret = fread(item, sizeof(T), 1, fp);
60 base::MD5Update(context,
61 base::StringPiece(reinterpret_cast<char*>(item),
67 // Write |item| to |fp|, and fold the output data into the checksum in
68 // |context|, if non-NULL. Return true on success.
70 bool WriteItem(const T& item, FILE* fp, base::MD5Context* context) {
71 const size_t ret = fwrite(&item, sizeof(T), 1, fp);
76 base::MD5Update(context,
77 base::StringPiece(reinterpret_cast<const char*>(&item),
84 // Read |count| items into |values| from |fp|, and fold them into the
85 // checksum in |context|. Returns true on success.
86 template <typename CT>
87 bool ReadToContainer(CT* values, size_t count, FILE* fp,
88 base::MD5Context* context) {
92 for (size_t i = 0; i < count; ++i) {
93 typename CT::value_type value;
94 if (!ReadItem(&value, fp, context))
97 // push_back() is more obvious, but coded this way std::set can
99 values->insert(values->end(), value);
105 // Write all of |values| to |fp|, and fold the data into the checksum
106 // in |context|, if non-NULL. Returns true on succsess.
107 template <typename CT>
108 bool WriteContainer(const CT& values, FILE* fp,
109 base::MD5Context* context) {
113 for (typename CT::const_iterator iter = values.begin();
114 iter != values.end(); ++iter) {
115 if (!WriteItem(*iter, fp, context))
121 // Delete the chunks in |deleted| from |chunks|.
122 void DeleteChunksFromSet(const base::hash_set<int32>& deleted,
123 std::set<int32>* chunks) {
124 for (std::set<int32>::iterator iter = chunks->begin();
125 iter != chunks->end();) {
126 std::set<int32>::iterator prev = iter++;
127 if (deleted.count(*prev) > 0)
132 // Sanity-check the header against the file's size to make sure our
133 // vectors aren't gigantic. This doubles as a cheap way to detect
134 // corruption without having to checksum the entire file.
135 bool FileHeaderSanityCheck(const base::FilePath& filename,
136 const FileHeader& header) {
138 if (!file_util::GetFileSize(filename, &size))
141 int64 expected_size = sizeof(FileHeader);
142 expected_size += header.add_chunk_count * sizeof(int32);
143 expected_size += header.sub_chunk_count * sizeof(int32);
144 expected_size += header.add_prefix_count * sizeof(SBAddPrefix);
145 expected_size += header.sub_prefix_count * sizeof(SBSubPrefix);
146 expected_size += header.add_hash_count * sizeof(SBAddFullHash);
147 expected_size += header.sub_hash_count * sizeof(SBSubFullHash);
148 expected_size += sizeof(base::MD5Digest);
149 if (size != expected_size)
155 // This a helper function that reads header to |header|. Returns true if the
156 // magic number is correct and santiy check passes.
157 bool ReadAndVerifyHeader(const base::FilePath& filename,
160 base::MD5Context* context) {
161 if (!ReadItem(header, fp, context))
163 if (header->magic != kFileMagic || header->version != kFileVersion)
165 if (!FileHeaderSanityCheck(filename, *header))
173 void SafeBrowsingStoreFile::RecordFormatEvent(FormatEventType event_type) {
174 UMA_HISTOGRAM_ENUMERATION("SB2.FormatEvent", event_type, FORMAT_EVENT_MAX);
178 void SafeBrowsingStoreFile::CheckForOriginalAndDelete(
179 const base::FilePath& current_filename) {
180 const base::FilePath original_filename(
181 current_filename.DirName().AppendASCII("Safe Browsing"));
182 if (base::PathExists(original_filename)) {
184 if (file_util::GetFileSize(original_filename, &size)) {
185 UMA_HISTOGRAM_COUNTS("SB2.OldDatabaseKilobytes",
186 static_cast<int>(size / 1024));
189 if (base::DeleteFile(original_filename, false)) {
190 RecordFormatEvent(FORMAT_EVENT_DELETED_ORIGINAL);
192 RecordFormatEvent(FORMAT_EVENT_DELETED_ORIGINAL_FAILED);
195 // Just best-effort on the journal file, don't want to get lost in
197 const base::FilePath journal_filename(
198 current_filename.DirName().AppendASCII("Safe Browsing-journal"));
199 base::DeleteFile(journal_filename, false);
203 SafeBrowsingStoreFile::SafeBrowsingStoreFile()
204 : chunks_written_(0), empty_(false), corruption_seen_(false) {}
206 SafeBrowsingStoreFile::~SafeBrowsingStoreFile() {
210 bool SafeBrowsingStoreFile::Delete() {
211 // The database should not be open at this point. But, just in
212 // case, close everything before deleting.
218 return DeleteStore(filename_);
221 bool SafeBrowsingStoreFile::CheckValidity() {
222 // The file was either empty or never opened. The empty case is
223 // presumed not to be invalid. The never-opened case can happen if
224 // BeginUpdate() fails for any databases, and should already have
225 // caused the corruption callback to fire.
229 if (!FileRewind(file_.get()))
230 return OnCorruptDatabase();
233 if (!file_util::GetFileSize(filename_, &size))
234 return OnCorruptDatabase();
236 base::MD5Context context;
237 base::MD5Init(&context);
239 // Read everything except the final digest.
240 size_t bytes_left = static_cast<size_t>(size);
241 CHECK(size == static_cast<int64>(bytes_left));
242 if (bytes_left < sizeof(base::MD5Digest))
243 return OnCorruptDatabase();
244 bytes_left -= sizeof(base::MD5Digest);
246 // Fold the contents of the file into the checksum.
247 while (bytes_left > 0) {
249 const size_t c = std::min(sizeof(buf), bytes_left);
250 const size_t ret = fread(buf, 1, c, file_.get());
252 // The file's size changed while reading, give up.
254 return OnCorruptDatabase();
255 base::MD5Update(&context, base::StringPiece(buf, c));
259 // Calculate the digest to this point.
260 base::MD5Digest calculated_digest;
261 base::MD5Final(&calculated_digest, &context);
263 // Read the stored digest and verify it.
264 base::MD5Digest file_digest;
265 if (!ReadItem(&file_digest, file_.get(), NULL))
266 return OnCorruptDatabase();
267 if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest))) {
268 RecordFormatEvent(FORMAT_EVENT_VALIDITY_CHECKSUM_FAILURE);
269 return OnCorruptDatabase();
275 void SafeBrowsingStoreFile::Init(
276 const base::FilePath& filename,
277 const base::Closure& corruption_callback
279 filename_ = filename;
280 corruption_callback_ = corruption_callback;
283 bool SafeBrowsingStoreFile::BeginChunk() {
284 return ClearChunkBuffers();
287 bool SafeBrowsingStoreFile::WriteAddPrefix(int32 chunk_id, SBPrefix prefix) {
288 add_prefixes_.push_back(SBAddPrefix(chunk_id, prefix));
292 bool SafeBrowsingStoreFile::GetAddPrefixes(SBAddPrefixes* add_prefixes) {
293 add_prefixes->clear();
295 file_util::ScopedFILE file(file_util::OpenFile(filename_, "rb"));
296 if (file.get() == NULL) return false;
299 if (!ReadAndVerifyHeader(filename_, file.get(), &header, NULL))
300 return OnCorruptDatabase();
302 size_t add_prefix_offset = header.add_chunk_count * sizeof(int32) +
303 header.sub_chunk_count * sizeof(int32);
304 if (!FileSkip(add_prefix_offset, file.get()))
307 if (!ReadToContainer(add_prefixes, header.add_prefix_count, file.get(), NULL))
313 bool SafeBrowsingStoreFile::GetAddFullHashes(
314 std::vector<SBAddFullHash>* add_full_hashes) {
315 add_full_hashes->clear();
317 file_util::ScopedFILE file(file_util::OpenFile(filename_, "rb"));
318 if (file.get() == NULL) return false;
321 if (!ReadAndVerifyHeader(filename_, file.get(), &header, NULL))
322 return OnCorruptDatabase();
325 header.add_chunk_count * sizeof(int32) +
326 header.sub_chunk_count * sizeof(int32) +
327 header.add_prefix_count * sizeof(SBAddPrefix) +
328 header.sub_prefix_count * sizeof(SBSubPrefix);
329 if (!FileSkip(offset, file.get()))
332 return ReadToContainer(add_full_hashes,
333 header.add_hash_count,
338 bool SafeBrowsingStoreFile::WriteAddHash(int32 chunk_id,
339 base::Time receive_time,
340 const SBFullHash& full_hash) {
341 add_hashes_.push_back(SBAddFullHash(chunk_id, receive_time, full_hash));
345 bool SafeBrowsingStoreFile::WriteSubPrefix(int32 chunk_id,
348 sub_prefixes_.push_back(SBSubPrefix(chunk_id, add_chunk_id, prefix));
352 bool SafeBrowsingStoreFile::WriteSubHash(int32 chunk_id, int32 add_chunk_id,
353 const SBFullHash& full_hash) {
354 sub_hashes_.push_back(SBSubFullHash(chunk_id, add_chunk_id, full_hash));
358 bool SafeBrowsingStoreFile::OnCorruptDatabase() {
359 if (!corruption_seen_)
360 RecordFormatEvent(FORMAT_EVENT_FILE_CORRUPT);
361 corruption_seen_ = true;
363 corruption_callback_.Run();
365 // Return false as a convenience to callers.
369 bool SafeBrowsingStoreFile::Close() {
370 ClearUpdateBuffers();
372 // Make sure the files are closed.
378 bool SafeBrowsingStoreFile::BeginUpdate() {
379 DCHECK(!file_.get() && !new_file_.get());
381 // Structures should all be clear unless something bad happened.
382 DCHECK(add_chunks_cache_.empty());
383 DCHECK(sub_chunks_cache_.empty());
384 DCHECK(add_del_cache_.empty());
385 DCHECK(sub_del_cache_.empty());
386 DCHECK(add_prefixes_.empty());
387 DCHECK(sub_prefixes_.empty());
388 DCHECK(add_hashes_.empty());
389 DCHECK(sub_hashes_.empty());
390 DCHECK_EQ(chunks_written_, 0);
392 // Since the following code will already hit the profile looking for
393 // database files, this is a reasonable to time delete any old
395 CheckForOriginalAndDelete(filename_);
397 corruption_seen_ = false;
399 const base::FilePath new_filename = TemporaryFileForFilename(filename_);
400 file_util::ScopedFILE new_file(file_util::OpenFile(new_filename, "wb+"));
401 if (new_file.get() == NULL)
404 file_util::ScopedFILE file(file_util::OpenFile(filename_, "rb"));
405 empty_ = (file.get() == NULL);
407 // If the file exists but cannot be opened, try to delete it (not
408 // deleting directly, the bloom filter needs to be deleted, too).
409 if (base::PathExists(filename_))
410 return OnCorruptDatabase();
412 new_file_.swap(new_file);
417 if (!ReadItem(&header, file.get(), NULL))
418 return OnCorruptDatabase();
420 if (header.magic != kFileMagic || header.version != kFileVersion) {
421 if (!strcmp(reinterpret_cast<char*>(&header.magic), "SQLite format 3")) {
422 RecordFormatEvent(FORMAT_EVENT_FOUND_SQLITE);
424 RecordFormatEvent(FORMAT_EVENT_FOUND_UNKNOWN);
427 // Close the file so that it can be deleted.
430 return OnCorruptDatabase();
433 // TODO(shess): Under POSIX it is possible that this could size a
434 // file different from the file which was opened.
435 if (!FileHeaderSanityCheck(filename_, header))
436 return OnCorruptDatabase();
438 // Pull in the chunks-seen data for purposes of implementing
439 // |GetAddChunks()| and |GetSubChunks()|. This data is sent up to
440 // the server at the beginning of an update.
441 if (!ReadToContainer(&add_chunks_cache_, header.add_chunk_count,
443 !ReadToContainer(&sub_chunks_cache_, header.sub_chunk_count,
445 return OnCorruptDatabase();
448 new_file_.swap(new_file);
452 bool SafeBrowsingStoreFile::FinishChunk() {
453 if (!add_prefixes_.size() && !sub_prefixes_.size() &&
454 !add_hashes_.size() && !sub_hashes_.size())
458 header.add_prefix_count = add_prefixes_.size();
459 header.sub_prefix_count = sub_prefixes_.size();
460 header.add_hash_count = add_hashes_.size();
461 header.sub_hash_count = sub_hashes_.size();
462 if (!WriteItem(header, new_file_.get(), NULL))
465 if (!WriteContainer(add_prefixes_, new_file_.get(), NULL) ||
466 !WriteContainer(sub_prefixes_, new_file_.get(), NULL) ||
467 !WriteContainer(add_hashes_, new_file_.get(), NULL) ||
468 !WriteContainer(sub_hashes_, new_file_.get(), NULL))
473 // Clear everything to save memory.
474 return ClearChunkBuffers();
477 bool SafeBrowsingStoreFile::DoUpdate(
478 const std::vector<SBAddFullHash>& pending_adds,
479 const std::set<SBPrefix>& prefix_misses,
480 SBAddPrefixes* add_prefixes_result,
481 std::vector<SBAddFullHash>* add_full_hashes_result) {
482 DCHECK(file_.get() || empty_);
483 DCHECK(new_file_.get());
484 CHECK(add_prefixes_result);
485 CHECK(add_full_hashes_result);
487 SBAddPrefixes add_prefixes;
488 SBSubPrefixes sub_prefixes;
489 std::vector<SBAddFullHash> add_full_hashes;
490 std::vector<SBSubFullHash> sub_full_hashes;
492 // Read original data into the vectors.
496 if (!FileRewind(file_.get()))
497 return OnCorruptDatabase();
499 base::MD5Context context;
500 base::MD5Init(&context);
502 // Read the file header and make sure it looks right.
504 if (!ReadAndVerifyHeader(filename_, file_.get(), &header, &context))
505 return OnCorruptDatabase();
507 // Re-read the chunks-seen data to get to the later data in the
508 // file and calculate the checksum. No new elements should be
509 // added to the sets.
510 if (!ReadToContainer(&add_chunks_cache_, header.add_chunk_count,
511 file_.get(), &context) ||
512 !ReadToContainer(&sub_chunks_cache_, header.sub_chunk_count,
513 file_.get(), &context))
514 return OnCorruptDatabase();
516 if (!ReadToContainer(&add_prefixes, header.add_prefix_count,
517 file_.get(), &context) ||
518 !ReadToContainer(&sub_prefixes, header.sub_prefix_count,
519 file_.get(), &context) ||
520 !ReadToContainer(&add_full_hashes, header.add_hash_count,
521 file_.get(), &context) ||
522 !ReadToContainer(&sub_full_hashes, header.sub_hash_count,
523 file_.get(), &context))
524 return OnCorruptDatabase();
526 // Calculate the digest to this point.
527 base::MD5Digest calculated_digest;
528 base::MD5Final(&calculated_digest, &context);
530 // Read the stored checksum and verify it.
531 base::MD5Digest file_digest;
532 if (!ReadItem(&file_digest, file_.get(), NULL))
533 return OnCorruptDatabase();
535 if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest))) {
536 RecordFormatEvent(FORMAT_EVENT_UPDATE_CHECKSUM_FAILURE);
537 return OnCorruptDatabase();
540 // Close the file so we can later rename over it.
543 DCHECK(!file_.get());
545 // Rewind the temporary storage.
546 if (!FileRewind(new_file_.get()))
549 // Get chunk file's size for validating counts.
551 if (!file_util::GetFileSize(TemporaryFileForFilename(filename_), &size))
552 return OnCorruptDatabase();
554 // Track update size to answer questions at http://crbug.com/72216 .
555 // Log small updates as 1k so that the 0 (underflow) bucket can be
556 // used for "empty" in SafeBrowsingDatabase.
557 UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes",
558 std::max(static_cast<int>(size / 1024), 1));
560 // Append the accumulated chunks onto the vectors read from |file_|.
561 for (int i = 0; i < chunks_written_; ++i) {
564 int64 ofs = ftell(new_file_.get());
568 if (!ReadItem(&header, new_file_.get(), NULL))
571 // As a safety measure, make sure that the header describes a sane
572 // chunk, given the remaining file size.
573 int64 expected_size = ofs + sizeof(ChunkHeader);
574 expected_size += header.add_prefix_count * sizeof(SBAddPrefix);
575 expected_size += header.sub_prefix_count * sizeof(SBSubPrefix);
576 expected_size += header.add_hash_count * sizeof(SBAddFullHash);
577 expected_size += header.sub_hash_count * sizeof(SBSubFullHash);
578 if (expected_size > size)
581 // TODO(shess): If the vectors were kept sorted, then this code
582 // could use std::inplace_merge() to merge everything together in
583 // sorted order. That might still be slower than just sorting at
584 // the end if there were a large number of chunks. In that case
585 // some sort of recursive binary merge might be in order (merge
586 // chunks pairwise, merge those chunks pairwise, and so on, then
587 // merge the result with the main list).
588 if (!ReadToContainer(&add_prefixes, header.add_prefix_count,
589 new_file_.get(), NULL) ||
590 !ReadToContainer(&sub_prefixes, header.sub_prefix_count,
591 new_file_.get(), NULL) ||
592 !ReadToContainer(&add_full_hashes, header.add_hash_count,
593 new_file_.get(), NULL) ||
594 !ReadToContainer(&sub_full_hashes, header.sub_hash_count,
595 new_file_.get(), NULL))
599 // Append items from |pending_adds|.
600 add_full_hashes.insert(add_full_hashes.end(),
601 pending_adds.begin(), pending_adds.end());
603 // Check how often a prefix was checked which wasn't in the
605 SBCheckPrefixMisses(add_prefixes, prefix_misses);
607 // Knock the subs from the adds and process deleted chunks.
608 SBProcessSubs(&add_prefixes, &sub_prefixes,
609 &add_full_hashes, &sub_full_hashes,
610 add_del_cache_, sub_del_cache_);
612 // We no longer need to track deleted chunks.
613 DeleteChunksFromSet(add_del_cache_, &add_chunks_cache_);
614 DeleteChunksFromSet(sub_del_cache_, &sub_chunks_cache_);
616 // Write the new data to new_file_.
617 if (!FileRewind(new_file_.get()))
620 base::MD5Context context;
621 base::MD5Init(&context);
623 // Write a file header.
625 header.magic = kFileMagic;
626 header.version = kFileVersion;
627 header.add_chunk_count = add_chunks_cache_.size();
628 header.sub_chunk_count = sub_chunks_cache_.size();
629 header.add_prefix_count = add_prefixes.size();
630 header.sub_prefix_count = sub_prefixes.size();
631 header.add_hash_count = add_full_hashes.size();
632 header.sub_hash_count = sub_full_hashes.size();
633 if (!WriteItem(header, new_file_.get(), &context))
636 // Write all the chunk data.
637 if (!WriteContainer(add_chunks_cache_, new_file_.get(), &context) ||
638 !WriteContainer(sub_chunks_cache_, new_file_.get(), &context) ||
639 !WriteContainer(add_prefixes, new_file_.get(), &context) ||
640 !WriteContainer(sub_prefixes, new_file_.get(), &context) ||
641 !WriteContainer(add_full_hashes, new_file_.get(), &context) ||
642 !WriteContainer(sub_full_hashes, new_file_.get(), &context))
645 // Write the checksum at the end.
646 base::MD5Digest digest;
647 base::MD5Final(&digest, &context);
648 if (!WriteItem(digest, new_file_.get(), NULL))
651 // Trim any excess left over from the temporary chunk data.
652 if (!file_util::TruncateFile(new_file_.get()))
655 // Close the file handle and swizzle the file into place.
657 if (!base::DeleteFile(filename_, false) &&
658 base::PathExists(filename_))
661 const base::FilePath new_filename = TemporaryFileForFilename(filename_);
662 if (!base::Move(new_filename, filename_))
665 // Record counts before swapping to caller.
666 UMA_HISTOGRAM_COUNTS("SB2.AddPrefixes", add_prefixes.size());
667 UMA_HISTOGRAM_COUNTS("SB2.SubPrefixes", sub_prefixes.size());
669 // Pass the resulting data off to the caller.
670 add_prefixes_result->swap(add_prefixes);
671 add_full_hashes_result->swap(add_full_hashes);
676 bool SafeBrowsingStoreFile::FinishUpdate(
677 const std::vector<SBAddFullHash>& pending_adds,
678 const std::set<SBPrefix>& prefix_misses,
679 SBAddPrefixes* add_prefixes_result,
680 std::vector<SBAddFullHash>* add_full_hashes_result) {
681 DCHECK(add_prefixes_result);
682 DCHECK(add_full_hashes_result);
684 bool ret = DoUpdate(pending_adds, prefix_misses,
685 add_prefixes_result, add_full_hashes_result);
692 DCHECK(!new_file_.get());
693 DCHECK(!file_.get());
698 bool SafeBrowsingStoreFile::CancelUpdate() {
702 void SafeBrowsingStoreFile::SetAddChunk(int32 chunk_id) {
703 add_chunks_cache_.insert(chunk_id);
706 bool SafeBrowsingStoreFile::CheckAddChunk(int32 chunk_id) {
707 return add_chunks_cache_.count(chunk_id) > 0;
710 void SafeBrowsingStoreFile::GetAddChunks(std::vector<int32>* out) {
712 out->insert(out->end(), add_chunks_cache_.begin(), add_chunks_cache_.end());
715 void SafeBrowsingStoreFile::SetSubChunk(int32 chunk_id) {
716 sub_chunks_cache_.insert(chunk_id);
719 bool SafeBrowsingStoreFile::CheckSubChunk(int32 chunk_id) {
720 return sub_chunks_cache_.count(chunk_id) > 0;
723 void SafeBrowsingStoreFile::GetSubChunks(std::vector<int32>* out) {
725 out->insert(out->end(), sub_chunks_cache_.begin(), sub_chunks_cache_.end());
728 void SafeBrowsingStoreFile::DeleteAddChunk(int32 chunk_id) {
729 add_del_cache_.insert(chunk_id);
732 void SafeBrowsingStoreFile::DeleteSubChunk(int32 chunk_id) {
733 sub_del_cache_.insert(chunk_id);
737 bool SafeBrowsingStoreFile::DeleteStore(const base::FilePath& basename) {
738 if (!base::DeleteFile(basename, false) &&
739 base::PathExists(basename)) {
744 const base::FilePath new_filename = TemporaryFileForFilename(basename);
745 if (!base::DeleteFile(new_filename, false) &&
746 base::PathExists(new_filename)) {
751 // With SQLite support gone, one way to get to this code is if the
752 // existing file is a SQLite file. Make sure the journal file is
754 const base::FilePath journal_filename(
755 basename.value() + FILE_PATH_LITERAL("-journal"));
756 if (base::PathExists(journal_filename))
757 base::DeleteFile(journal_filename, false);