1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/safe_browsing/safe_browsing_database.h"
10 #include "base/bind.h"
11 #include "base/file_util.h"
12 #include "base/message_loop/message_loop.h"
13 #include "base/metrics/histogram.h"
14 #include "base/metrics/stats_counters.h"
15 #include "base/process/process.h"
16 #include "base/process/process_metrics.h"
17 #include "base/sha1.h"
18 #include "base/strings/string_number_conversions.h"
19 #include "base/strings/stringprintf.h"
20 #include "base/time/time.h"
21 #include "chrome/browser/safe_browsing/prefix_set.h"
22 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
23 #include "content/public/browser/browser_thread.h"
24 #include "crypto/sha2.h"
25 #include "net/base/net_util.h"
28 #if defined(OS_MACOSX)
29 #include "base/mac/mac_util.h"
32 using content::BrowserThread;
36 // Filename suffix for the bloom filter.
37 const base::FilePath::CharType kBloomFilterFile[] =
38 FILE_PATH_LITERAL(" Filter 2");
39 // Filename suffix for the prefix set.
40 const base::FilePath::CharType kPrefixSetFile[] =
41 FILE_PATH_LITERAL(" Prefix Set");
42 // Filename suffix for download store.
43 const base::FilePath::CharType kDownloadDBFile[] =
44 FILE_PATH_LITERAL(" Download");
45 // Filename suffix for client-side phishing detection whitelist store.
46 const base::FilePath::CharType kCsdWhitelistDBFile[] =
47 FILE_PATH_LITERAL(" Csd Whitelist");
48 // Filename suffix for the download whitelist store.
49 const base::FilePath::CharType kDownloadWhitelistDBFile[] =
50 FILE_PATH_LITERAL(" Download Whitelist");
51 // Filename suffix for the extension blacklist store.
52 const base::FilePath::CharType kExtensionBlacklistDBFile[] =
53 FILE_PATH_LITERAL(" Extension Blacklist");
54 // Filename suffix for the side-effect free whitelist store.
55 const base::FilePath::CharType kSideEffectFreeWhitelistDBFile[] =
56 FILE_PATH_LITERAL(" Side-Effect Free Whitelist");
57 // Filename suffix for the csd malware IP blacklist store.
58 const base::FilePath::CharType kIPBlacklistDBFile[] =
59 FILE_PATH_LITERAL(" IP Blacklist");
61 // Filename suffix for browse store.
62 // TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win.
63 // Unfortunately, to change the name implies lots of transition code
64 // for little benefit. If/when file formats change (say to put all
65 // the data in one file), that would be a convenient point to rectify
67 const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
69 // The maximum staleness for a cached entry.
70 const int kMaxStalenessMinutes = 45;
72 // Maximum number of entries we allow in any of the whitelists.
73 // If a whitelist on disk contains more entries then all lookups to
74 // the whitelist will be considered a match.
75 const size_t kMaxWhitelistSize = 5000;
77 // If the hash of this exact expression is on a whitelist then all
78 // lookups to this whitelist will be considered a match.
79 const char kWhitelistKillSwitchUrl[] =
80 "sb-ssl.google.com/safebrowsing/csd/killswitch"; // Don't change this!
82 // If the hash of this exact expression is on a whitelist then the
83 // malware IP blacklisting feature will be disabled in csd.
85 const char kMalwareIPKillSwitchUrl[] =
86 "sb-ssl.google.com/safebrowsing/csd/killswitch_malware";
88 const size_t kMaxIpPrefixSize = 128;
89 const size_t kMinIpPrefixSize = 1;
91 // To save space, the incoming |chunk_id| and |list_id| are combined
92 // into an |encoded_chunk_id| for storage by shifting the |list_id|
93 // into the low-order bits. These functions decode that information.
94 // TODO(lzheng): It was reasonable when database is saved in sqlite, but
95 // there should be better ways to save chunk_id and list_id after we use
96 // SafeBrowsingStoreFile.
97 int GetListIdBit(const int encoded_chunk_id) {
98 return encoded_chunk_id & 1;
100 int DecodeChunkId(int encoded_chunk_id) {
101 return encoded_chunk_id >> 1;
103 int EncodeChunkId(const int chunk, const int list_id) {
104 DCHECK_NE(list_id, safe_browsing_util::INVALID);
105 return chunk << 1 | list_id % 2;
108 // Generate the set of full hashes to check for |url|. If
109 // |include_whitelist_hashes| is true we will generate additional path-prefixes
110 // to match against the csd whitelist. E.g., if the path-prefix /foo is on the
111 // whitelist it should also match /foo/bar which is not the case for all the
112 // other lists. We'll also always add a pattern for the empty path.
113 // TODO(shess): This function is almost the same as
114 // |CompareFullHashes()| in safe_browsing_util.cc, except that code
115 // does an early exit on match. Since match should be the infrequent
116 // case (phishing or malware found), consider combining this function
118 void BrowseFullHashesToCheck(const GURL& url,
119 bool include_whitelist_hashes,
120 std::vector<SBFullHash>* full_hashes) {
121 std::vector<std::string> hosts;
122 if (url.HostIsIPAddress()) {
123 hosts.push_back(url.host());
125 safe_browsing_util::GenerateHostsToCheck(url, &hosts);
128 std::vector<std::string> paths;
129 safe_browsing_util::GeneratePathsToCheck(url, &paths);
131 for (size_t i = 0; i < hosts.size(); ++i) {
132 for (size_t j = 0; j < paths.size(); ++j) {
133 const std::string& path = paths[j];
134 full_hashes->push_back(SBFullHashForString(hosts[i] + path));
136 // We may have /foo as path-prefix in the whitelist which should
137 // also match with /foo/bar and /foo?bar. Hence, for every path
138 // that ends in '/' we also add the path without the slash.
139 if (include_whitelist_hashes &&
141 path[path.size() - 1] == '/') {
142 full_hashes->push_back(
143 SBFullHashForString(hosts[i] + path.substr(0, path.size() - 1)));
149 // Get the prefixes matching the download |urls|.
150 void GetDownloadUrlPrefixes(const std::vector<GURL>& urls,
151 std::vector<SBPrefix>* prefixes) {
152 std::vector<SBFullHash> full_hashes;
153 for (size_t i = 0; i < urls.size(); ++i)
154 BrowseFullHashesToCheck(urls[i], false, &full_hashes);
156 for (size_t i = 0; i < full_hashes.size(); ++i)
157 prefixes->push_back(full_hashes[i].prefix);
160 // Helper function to compare addprefixes in |store| with |prefixes|.
161 // The |list_bit| indicates which list (url or hash) to compare.
163 // Returns true if there is a match, |*prefix_hits| (if non-NULL) will contain
164 // the actual matching prefixes.
165 bool MatchAddPrefixes(SafeBrowsingStore* store,
167 const std::vector<SBPrefix>& prefixes,
168 std::vector<SBPrefix>* prefix_hits) {
169 prefix_hits->clear();
170 bool found_match = false;
172 SBAddPrefixes add_prefixes;
173 store->GetAddPrefixes(&add_prefixes);
174 for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
175 iter != add_prefixes.end(); ++iter) {
176 for (size_t j = 0; j < prefixes.size(); ++j) {
177 const SBPrefix& prefix = prefixes[j];
178 if (prefix == iter->prefix &&
179 GetListIdBit(iter->chunk_id) == list_bit) {
180 prefix_hits->push_back(prefix);
188 // Find the entries in |full_hashes| with prefix in |prefix_hits|, and
189 // add them to |full_hits| if not expired. "Not expired" is when
190 // either |last_update| was recent enough, or the item has been
191 // received recently enough. Expired items are not deleted because a
192 // future update may make them acceptable again.
194 // For efficiency reasons the code walks |prefix_hits| and
195 // |full_hashes| in parallel, so they must be sorted by prefix.
196 void GetCachedFullHashesForBrowse(
197 const std::vector<SBPrefix>& prefix_hits,
198 const std::vector<SBFullHashCached>& full_hashes,
199 std::vector<SBFullHashResult>* full_hits,
200 base::Time last_update) {
201 const base::Time expire_time =
202 base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes);
204 std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin();
205 std::vector<SBFullHashCached>::const_iterator hiter = full_hashes.begin();
207 while (piter != prefix_hits.end() && hiter != full_hashes.end()) {
208 if (*piter < hiter->hash.prefix) {
210 } else if (hiter->hash.prefix < *piter) {
213 if (expire_time < last_update ||
214 expire_time.ToTimeT() < hiter->received) {
215 SBFullHashResult result;
216 result.list_id = hiter->list_id;
217 result.hash = hiter->hash;
218 full_hits->push_back(result);
221 // Only increment |hiter|, |piter| might have multiple hits.
227 // This function generates a chunk range string for |chunks|. It
228 // outputs one chunk range string per list and writes it to the
229 // |list_ranges| vector. We expect |list_ranges| to already be of the
230 // right size. E.g., if |chunks| contains chunks with two different
231 // list ids then |list_ranges| must contain two elements.
232 void GetChunkRanges(const std::vector<int>& chunks,
233 std::vector<std::string>* list_ranges) {
234 // Since there are 2 possible list ids, there must be exactly two
235 // list ranges. Even if the chunk data should only contain one
236 // line, this code has to somehow handle corruption.
237 DCHECK_EQ(2U, list_ranges->size());
239 std::vector<std::vector<int> > decoded_chunks(list_ranges->size());
240 for (std::vector<int>::const_iterator iter = chunks.begin();
241 iter != chunks.end(); ++iter) {
242 int mod_list_id = GetListIdBit(*iter);
243 DCHECK_GE(mod_list_id, 0);
244 DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size());
245 decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter));
247 for (size_t i = 0; i < decoded_chunks.size(); ++i) {
248 ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i]));
252 // Helper function to create chunk range lists for Browse related
254 void UpdateChunkRanges(SafeBrowsingStore* store,
255 const std::vector<std::string>& listnames,
256 std::vector<SBListChunkRanges>* lists) {
260 DCHECK_GT(listnames.size(), 0U);
261 DCHECK_LE(listnames.size(), 2U);
262 std::vector<int> add_chunks;
263 std::vector<int> sub_chunks;
264 store->GetAddChunks(&add_chunks);
265 store->GetSubChunks(&sub_chunks);
267 // Always decode 2 ranges, even if only the first one is expected.
268 // The loop below will only load as many into |lists| as |listnames|
270 std::vector<std::string> adds(2);
271 std::vector<std::string> subs(2);
272 GetChunkRanges(add_chunks, &adds);
273 GetChunkRanges(sub_chunks, &subs);
275 for (size_t i = 0; i < listnames.size(); ++i) {
276 const std::string& listname = listnames[i];
277 DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2,
278 static_cast<int>(i % 2));
279 DCHECK_NE(safe_browsing_util::GetListId(listname),
280 safe_browsing_util::INVALID);
281 lists->push_back(SBListChunkRanges(listname));
282 lists->back().adds.swap(adds[i]);
283 lists->back().subs.swap(subs[i]);
287 void UpdateChunkRangesForLists(SafeBrowsingStore* store,
288 const std::string& listname0,
289 const std::string& listname1,
290 std::vector<SBListChunkRanges>* lists) {
291 std::vector<std::string> listnames;
292 listnames.push_back(listname0);
293 listnames.push_back(listname1);
294 UpdateChunkRanges(store, listnames, lists);
297 void UpdateChunkRangesForList(SafeBrowsingStore* store,
298 const std::string& listname,
299 std::vector<SBListChunkRanges>* lists) {
300 UpdateChunkRanges(store, std::vector<std::string>(1, listname), lists);
303 // Order |SBFullHashCached| items on the prefix part.
304 bool SBFullHashCachedPrefixLess(const SBFullHashCached& a,
305 const SBFullHashCached& b) {
306 return a.hash.prefix < b.hash.prefix;
309 // This code always checks for non-zero file size. This helper makes
310 // that less verbose.
311 int64 GetFileSizeOrZero(const base::FilePath& file_path) {
313 if (!base::GetFileSize(file_path, &size_64))
320 // The default SafeBrowsingDatabaseFactory.
321 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
323 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
324 bool enable_download_protection,
325 bool enable_client_side_whitelist,
326 bool enable_download_whitelist,
327 bool enable_extension_blacklist,
328 bool enable_side_effect_free_whitelist,
329 bool enable_ip_blacklist) OVERRIDE {
330 return new SafeBrowsingDatabaseNew(
331 new SafeBrowsingStoreFile,
332 enable_download_protection ? new SafeBrowsingStoreFile : NULL,
333 enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL,
334 enable_download_whitelist ? new SafeBrowsingStoreFile : NULL,
335 enable_extension_blacklist ? new SafeBrowsingStoreFile : NULL,
336 enable_side_effect_free_whitelist ? new SafeBrowsingStoreFile : NULL,
337 enable_ip_blacklist ? new SafeBrowsingStoreFile : NULL);
340 SafeBrowsingDatabaseFactoryImpl() { }
343 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl);
347 SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL;
349 // Factory method, non-thread safe. Caller has to make sure this s called
350 // on SafeBrowsing Thread.
351 // TODO(shess): There's no need for a factory any longer. Convert
352 // SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create()
353 // callers just construct things directly.
354 SafeBrowsingDatabase* SafeBrowsingDatabase::Create(
355 bool enable_download_protection,
356 bool enable_client_side_whitelist,
357 bool enable_download_whitelist,
358 bool enable_extension_blacklist,
359 bool enable_side_effect_free_whitelist,
360 bool enable_ip_blacklist) {
362 factory_ = new SafeBrowsingDatabaseFactoryImpl();
363 return factory_->CreateSafeBrowsingDatabase(
364 enable_download_protection,
365 enable_client_side_whitelist,
366 enable_download_whitelist,
367 enable_extension_blacklist,
368 enable_side_effect_free_whitelist,
369 enable_ip_blacklist);
372 SafeBrowsingDatabase::~SafeBrowsingDatabase() {
376 base::FilePath SafeBrowsingDatabase::BrowseDBFilename(
377 const base::FilePath& db_base_filename) {
378 return base::FilePath(db_base_filename.value() + kBrowseDBFile);
382 base::FilePath SafeBrowsingDatabase::DownloadDBFilename(
383 const base::FilePath& db_base_filename) {
384 return base::FilePath(db_base_filename.value() + kDownloadDBFile);
388 base::FilePath SafeBrowsingDatabase::BloomFilterForFilename(
389 const base::FilePath& db_filename) {
390 return base::FilePath(db_filename.value() + kBloomFilterFile);
394 base::FilePath SafeBrowsingDatabase::PrefixSetForFilename(
395 const base::FilePath& db_filename) {
396 return base::FilePath(db_filename.value() + kPrefixSetFile);
400 base::FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename(
401 const base::FilePath& db_filename) {
402 return base::FilePath(db_filename.value() + kCsdWhitelistDBFile);
406 base::FilePath SafeBrowsingDatabase::DownloadWhitelistDBFilename(
407 const base::FilePath& db_filename) {
408 return base::FilePath(db_filename.value() + kDownloadWhitelistDBFile);
412 base::FilePath SafeBrowsingDatabase::ExtensionBlacklistDBFilename(
413 const base::FilePath& db_filename) {
414 return base::FilePath(db_filename.value() + kExtensionBlacklistDBFile);
418 base::FilePath SafeBrowsingDatabase::SideEffectFreeWhitelistDBFilename(
419 const base::FilePath& db_filename) {
420 return base::FilePath(db_filename.value() + kSideEffectFreeWhitelistDBFile);
424 base::FilePath SafeBrowsingDatabase::IpBlacklistDBFilename(
425 const base::FilePath& db_filename) {
426 return base::FilePath(db_filename.value() + kIPBlacklistDBFile);
429 SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) {
430 if (list_id == safe_browsing_util::PHISH ||
431 list_id == safe_browsing_util::MALWARE) {
432 return browse_store_.get();
433 } else if (list_id == safe_browsing_util::BINURL) {
434 return download_store_.get();
435 } else if (list_id == safe_browsing_util::CSDWHITELIST) {
436 return csd_whitelist_store_.get();
437 } else if (list_id == safe_browsing_util::DOWNLOADWHITELIST) {
438 return download_whitelist_store_.get();
439 } else if (list_id == safe_browsing_util::EXTENSIONBLACKLIST) {
440 return extension_blacklist_store_.get();
441 } else if (list_id == safe_browsing_util::SIDEEFFECTFREEWHITELIST) {
442 return side_effect_free_whitelist_store_.get();
443 } else if (list_id == safe_browsing_util::IPBLACKLIST) {
444 return ip_blacklist_store_.get();
450 void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) {
451 UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type,
452 FAILURE_DATABASE_MAX);
455 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew()
456 : creation_loop_(base::MessageLoop::current()),
457 browse_store_(new SafeBrowsingStoreFile),
458 reset_factory_(this),
459 corruption_detected_(false),
460 change_detected_(false) {
461 DCHECK(browse_store_.get());
462 DCHECK(!download_store_.get());
463 DCHECK(!csd_whitelist_store_.get());
464 DCHECK(!download_whitelist_store_.get());
465 DCHECK(!extension_blacklist_store_.get());
466 DCHECK(!side_effect_free_whitelist_store_.get());
467 DCHECK(!ip_blacklist_store_.get());
470 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew(
471 SafeBrowsingStore* browse_store,
472 SafeBrowsingStore* download_store,
473 SafeBrowsingStore* csd_whitelist_store,
474 SafeBrowsingStore* download_whitelist_store,
475 SafeBrowsingStore* extension_blacklist_store,
476 SafeBrowsingStore* side_effect_free_whitelist_store,
477 SafeBrowsingStore* ip_blacklist_store)
478 : creation_loop_(base::MessageLoop::current()),
479 browse_store_(browse_store),
480 download_store_(download_store),
481 csd_whitelist_store_(csd_whitelist_store),
482 download_whitelist_store_(download_whitelist_store),
483 extension_blacklist_store_(extension_blacklist_store),
484 side_effect_free_whitelist_store_(side_effect_free_whitelist_store),
485 ip_blacklist_store_(ip_blacklist_store),
486 reset_factory_(this),
487 corruption_detected_(false) {
488 DCHECK(browse_store_.get());
491 SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() {
492 // The DCHECK is disabled due to crbug.com/338486 .
493 // DCHECK_EQ(creation_loop_, base::MessageLoop::current());
496 void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) {
497 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
498 // Ensure we haven't been run before.
499 DCHECK(browse_filename_.empty());
500 DCHECK(download_filename_.empty());
501 DCHECK(csd_whitelist_filename_.empty());
502 DCHECK(download_whitelist_filename_.empty());
503 DCHECK(extension_blacklist_filename_.empty());
504 DCHECK(side_effect_free_whitelist_filename_.empty());
505 DCHECK(ip_blacklist_filename_.empty());
507 browse_filename_ = BrowseDBFilename(filename_base);
508 browse_prefix_set_filename_ = PrefixSetForFilename(browse_filename_);
512 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
513 base::Unretained(this)));
514 DVLOG(1) << "Init browse store: " << browse_filename_.value();
517 // NOTE: There is no need to grab the lock in this function, since
518 // until it returns, there are no pointers to this class on other
519 // threads. Then again, that means there is no possibility of
520 // contention on the lock...
521 base::AutoLock locked(lookup_lock_);
522 cached_browse_hashes_.clear();
526 if (download_store_.get()) {
527 download_filename_ = DownloadDBFilename(filename_base);
528 download_store_->Init(
530 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
531 base::Unretained(this)));
532 DVLOG(1) << "Init download store: " << download_filename_.value();
535 if (csd_whitelist_store_.get()) {
536 csd_whitelist_filename_ = CsdWhitelistDBFilename(filename_base);
537 csd_whitelist_store_->Init(
538 csd_whitelist_filename_,
539 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
540 base::Unretained(this)));
541 DVLOG(1) << "Init csd whitelist store: " << csd_whitelist_filename_.value();
542 std::vector<SBAddFullHash> full_hashes;
543 if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) {
544 LoadWhitelist(full_hashes, &csd_whitelist_);
546 WhitelistEverything(&csd_whitelist_);
549 WhitelistEverything(&csd_whitelist_); // Just to be safe.
552 if (download_whitelist_store_.get()) {
553 download_whitelist_filename_ = DownloadWhitelistDBFilename(filename_base);
554 download_whitelist_store_->Init(
555 download_whitelist_filename_,
556 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
557 base::Unretained(this)));
558 DVLOG(1) << "Init download whitelist store: "
559 << download_whitelist_filename_.value();
560 std::vector<SBAddFullHash> full_hashes;
561 if (download_whitelist_store_->GetAddFullHashes(&full_hashes)) {
562 LoadWhitelist(full_hashes, &download_whitelist_);
564 WhitelistEverything(&download_whitelist_);
567 WhitelistEverything(&download_whitelist_); // Just to be safe.
570 if (extension_blacklist_store_.get()) {
571 extension_blacklist_filename_ = ExtensionBlacklistDBFilename(filename_base);
572 extension_blacklist_store_->Init(
573 extension_blacklist_filename_,
574 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
575 base::Unretained(this)));
576 DVLOG(1) << "Init extension blacklist store: "
577 << extension_blacklist_filename_.value();
580 if (side_effect_free_whitelist_store_.get()) {
581 side_effect_free_whitelist_filename_ =
582 SideEffectFreeWhitelistDBFilename(filename_base);
583 side_effect_free_whitelist_prefix_set_filename_ =
584 PrefixSetForFilename(side_effect_free_whitelist_filename_);
585 side_effect_free_whitelist_store_->Init(
586 side_effect_free_whitelist_filename_,
587 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
588 base::Unretained(this)));
589 DVLOG(1) << "Init side-effect free whitelist store: "
590 << side_effect_free_whitelist_filename_.value();
592 // If there is no database, the filter cannot be used.
593 base::File::Info db_info;
594 if (base::GetFileInfo(side_effect_free_whitelist_filename_, &db_info)
595 && db_info.size != 0) {
596 const base::TimeTicks before = base::TimeTicks::Now();
597 side_effect_free_whitelist_prefix_set_ =
598 safe_browsing::PrefixSet::LoadFile(
599 side_effect_free_whitelist_prefix_set_filename_);
600 DVLOG(1) << "SafeBrowsingDatabaseNew read side-effect free whitelist "
602 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
603 UMA_HISTOGRAM_TIMES("SB2.SideEffectFreeWhitelistPrefixSetLoad",
604 base::TimeTicks::Now() - before);
605 if (!side_effect_free_whitelist_prefix_set_.get())
606 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ);
609 // Delete any files of the side-effect free sidelist that may be around
610 // from when it was previously enabled.
611 SafeBrowsingStoreFile::DeleteStore(
612 SideEffectFreeWhitelistDBFilename(filename_base));
615 if (ip_blacklist_store_.get()) {
616 ip_blacklist_filename_ = IpBlacklistDBFilename(filename_base);
617 ip_blacklist_store_->Init(
618 ip_blacklist_filename_,
619 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
620 base::Unretained(this)));
621 DVLOG(1) << "SafeBrowsingDatabaseNew read ip blacklist: "
622 << ip_blacklist_filename_.value();
623 std::vector<SBAddFullHash> full_hashes;
624 if (ip_blacklist_store_->GetAddFullHashes(&full_hashes)) {
625 LoadIpBlacklist(full_hashes);
627 DVLOG(1) << "Unable to load full hashes from the IP blacklist.";
628 LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list.
633 bool SafeBrowsingDatabaseNew::ResetDatabase() {
634 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
636 // Delete files on disk.
637 // TODO(shess): Hard to see where one might want to delete without a
638 // reset. Perhaps inline |Delete()|?
642 // Reset objects in memory.
644 base::AutoLock locked(lookup_lock_);
645 cached_browse_hashes_.clear();
646 prefix_miss_cache_.clear();
647 browse_prefix_set_.reset();
648 side_effect_free_whitelist_prefix_set_.reset();
649 ip_blacklist_.clear();
651 // Wants to acquire the lock itself.
652 WhitelistEverything(&csd_whitelist_);
653 WhitelistEverything(&download_whitelist_);
657 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
659 std::vector<SBPrefix>* prefix_hits,
660 std::vector<SBFullHashResult>* cached_hits,
661 base::Time last_update) {
662 // Clear the results first.
663 prefix_hits->clear();
664 cached_hits->clear();
666 std::vector<SBFullHash> full_hashes;
667 BrowseFullHashesToCheck(url, false, &full_hashes);
668 if (full_hashes.empty())
671 // This function is called on the I/O thread, prevent changes to
672 // filter and caches.
673 base::AutoLock locked(lookup_lock_);
675 // |browse_prefix_set_| is empty until it is either read from disk, or the
676 // first update populates it. Bail out without a hit if not yet
678 if (!browse_prefix_set_.get())
681 size_t miss_count = 0;
682 for (size_t i = 0; i < full_hashes.size(); ++i) {
683 if (browse_prefix_set_->Exists(full_hashes[i])) {
684 const SBPrefix prefix = full_hashes[i].prefix;
685 prefix_hits->push_back(prefix);
686 if (prefix_miss_cache_.count(prefix) > 0)
691 // If all the prefixes are cached as 'misses', don't issue a GetHash.
692 if (miss_count == prefix_hits->size())
695 // Find matching cached gethash responses.
696 std::sort(prefix_hits->begin(), prefix_hits->end());
697 GetCachedFullHashesForBrowse(*prefix_hits, cached_browse_hashes_,
698 cached_hits, last_update);
703 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
704 const std::vector<GURL>& urls,
705 std::vector<SBPrefix>* prefix_hits) {
706 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
708 // Ignore this check when download checking is not enabled.
709 if (!download_store_.get())
712 std::vector<SBPrefix> prefixes;
713 GetDownloadUrlPrefixes(urls, &prefixes);
714 return MatchAddPrefixes(download_store_.get(),
715 safe_browsing_util::BINURL % 2,
720 bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) {
721 // This method is theoretically thread-safe but we expect all calls to
722 // originate from the IO thread.
723 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
724 std::vector<SBFullHash> full_hashes;
725 BrowseFullHashesToCheck(url, true, &full_hashes);
726 return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
729 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedUrl(const GURL& url) {
730 std::vector<SBFullHash> full_hashes;
731 BrowseFullHashesToCheck(url, true, &full_hashes);
732 return ContainsWhitelistedHashes(download_whitelist_, full_hashes);
735 bool SafeBrowsingDatabaseNew::ContainsExtensionPrefixes(
736 const std::vector<SBPrefix>& prefixes,
737 std::vector<SBPrefix>* prefix_hits) {
738 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
739 if (!extension_blacklist_store_)
742 return MatchAddPrefixes(extension_blacklist_store_.get(),
743 safe_browsing_util::EXTENSIONBLACKLIST % 2,
748 bool SafeBrowsingDatabaseNew::ContainsSideEffectFreeWhitelistUrl(
753 safe_browsing_util::CanonicalizeUrl(url, &host, &path, &query);
754 std::string url_to_check = host + path;
756 url_to_check += "?" + query;
757 SBFullHash full_hash = SBFullHashForString(url_to_check);
759 // This function can be called on any thread, so lock against any changes
760 base::AutoLock locked(lookup_lock_);
762 // |side_effect_free_whitelist_prefix_set_| is empty until it is either read
763 // from disk, or the first update populates it. Bail out without a hit if
764 // not yet available.
765 if (!side_effect_free_whitelist_prefix_set_.get())
768 return side_effect_free_whitelist_prefix_set_->Exists(full_hash);
771 bool SafeBrowsingDatabaseNew::ContainsMalwareIP(const std::string& ip_address) {
772 net::IPAddressNumber ip_number;
773 if (!net::ParseIPLiteralToNumber(ip_address, &ip_number)) {
774 DVLOG(2) << "Unable to parse IP address: '" << ip_address << "'";
777 if (ip_number.size() == net::kIPv4AddressSize) {
778 ip_number = net::ConvertIPv4NumberToIPv6Number(ip_number);
780 if (ip_number.size() != net::kIPv6AddressSize) {
781 DVLOG(2) << "Unable to convert IPv4 address to IPv6: '"
782 << ip_address << "'";
783 return false; // better safe than sorry.
785 // This function can be called from any thread.
786 base::AutoLock locked(lookup_lock_);
787 for (IPBlacklist::const_iterator it = ip_blacklist_.begin();
788 it != ip_blacklist_.end();
790 const std::string& mask = it->first;
791 DCHECK_EQ(mask.size(), ip_number.size());
792 std::string subnet(net::kIPv6AddressSize, '\0');
793 for (size_t i = 0; i < net::kIPv6AddressSize; ++i) {
794 subnet[i] = ip_number[i] & mask[i];
796 const std::string hash = base::SHA1HashString(subnet);
797 DVLOG(2) << "Lookup Malware IP: "
798 << " ip:" << ip_address
799 << " mask:" << base::HexEncode(mask.data(), mask.size())
800 << " subnet:" << base::HexEncode(subnet.data(), subnet.size())
801 << " hash:" << base::HexEncode(hash.data(), hash.size());
802 if (it->second.count(hash) > 0) {
809 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedString(
810 const std::string& str) {
811 std::vector<SBFullHash> hashes;
812 hashes.push_back(SBFullHashForString(str));
813 return ContainsWhitelistedHashes(download_whitelist_, hashes);
816 bool SafeBrowsingDatabaseNew::ContainsWhitelistedHashes(
817 const SBWhitelist& whitelist,
818 const std::vector<SBFullHash>& hashes) {
819 base::AutoLock l(lookup_lock_);
820 if (whitelist.second)
822 for (std::vector<SBFullHash>::const_iterator it = hashes.begin();
823 it != hashes.end(); ++it) {
824 if (std::binary_search(whitelist.first.begin(), whitelist.first.end(),
825 *it, SBFullHashLess)) {
832 // Helper to insert entries for all of the prefixes or full hashes in
833 // |entry| into the store.
834 void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host,
835 const SBEntry* entry, int list_id) {
836 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
838 SafeBrowsingStore* store = GetStore(list_id);
841 STATS_COUNTER("SB.HostInsert", 1);
842 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
843 const int count = entry->prefix_count();
845 DCHECK(!entry->IsSub());
847 // No prefixes, use host instead.
848 STATS_COUNTER("SB.PrefixAdd", 1);
849 store->WriteAddPrefix(encoded_chunk_id, host);
850 } else if (entry->IsPrefix()) {
852 for (int i = 0; i < count; i++) {
853 const SBPrefix prefix = entry->PrefixAt(i);
854 STATS_COUNTER("SB.PrefixAdd", 1);
855 store->WriteAddPrefix(encoded_chunk_id, prefix);
859 for (int i = 0; i < count; ++i) {
860 const SBFullHash full_hash = entry->FullHashAt(i);
862 STATS_COUNTER("SB.PrefixAddFull", 1);
863 store->WriteAddHash(encoded_chunk_id, full_hash);
868 // Helper to iterate over all the entries in the hosts in |chunks| and
869 // add them to the store.
870 void SafeBrowsingDatabaseNew::InsertAddChunks(
871 const safe_browsing_util::ListType list_id,
872 const SBChunkList& chunks) {
873 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
875 SafeBrowsingStore* store = GetStore(list_id);
878 for (SBChunkList::const_iterator citer = chunks.begin();
879 citer != chunks.end(); ++citer) {
880 const int chunk_id = citer->chunk_number;
882 // The server can give us a chunk that we already have because
883 // it's part of a range. Don't add it again.
884 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
885 if (store->CheckAddChunk(encoded_chunk_id))
888 store->SetAddChunk(encoded_chunk_id);
889 for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
890 hiter != citer->hosts.end(); ++hiter) {
891 // NOTE: Could pass |encoded_chunk_id|, but then inserting add
892 // chunks would look different from inserting sub chunks.
893 InsertAdd(chunk_id, hiter->host, hiter->entry, list_id);
898 // Helper to insert entries for all of the prefixes or full hashes in
899 // |entry| into the store.
900 void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host,
901 const SBEntry* entry, int list_id) {
902 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
904 SafeBrowsingStore* store = GetStore(list_id);
907 STATS_COUNTER("SB.HostDelete", 1);
908 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
909 const int count = entry->prefix_count();
911 DCHECK(entry->IsSub());
913 // No prefixes, use host instead.
914 STATS_COUNTER("SB.PrefixSub", 1);
915 const int add_chunk_id = EncodeChunkId(entry->chunk_id(), list_id);
916 store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, host);
917 } else if (entry->IsPrefix()) {
919 for (int i = 0; i < count; i++) {
920 const SBPrefix prefix = entry->PrefixAt(i);
921 const int add_chunk_id =
922 EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
924 STATS_COUNTER("SB.PrefixSub", 1);
925 store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix);
929 for (int i = 0; i < count; ++i) {
930 const SBFullHash full_hash = entry->FullHashAt(i);
931 const int add_chunk_id =
932 EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
934 STATS_COUNTER("SB.PrefixSubFull", 1);
935 store->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash);
940 // Helper to iterate over all the entries in the hosts in |chunks| and
941 // add them to the store.
942 void SafeBrowsingDatabaseNew::InsertSubChunks(
943 safe_browsing_util::ListType list_id,
944 const SBChunkList& chunks) {
945 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
947 SafeBrowsingStore* store = GetStore(list_id);
950 for (SBChunkList::const_iterator citer = chunks.begin();
951 citer != chunks.end(); ++citer) {
952 const int chunk_id = citer->chunk_number;
954 // The server can give us a chunk that we already have because
955 // it's part of a range. Don't add it again.
956 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
957 if (store->CheckSubChunk(encoded_chunk_id))
960 store->SetSubChunk(encoded_chunk_id);
961 for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
962 hiter != citer->hosts.end(); ++hiter) {
963 InsertSub(chunk_id, hiter->host, hiter->entry, list_id);
968 void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name,
969 const SBChunkList& chunks) {
970 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
972 if (corruption_detected_ || chunks.empty())
975 const base::TimeTicks before = base::TimeTicks::Now();
977 const safe_browsing_util::ListType list_id =
978 safe_browsing_util::GetListId(list_name);
979 DVLOG(2) << list_name << ": " << list_id;
981 SafeBrowsingStore* store = GetStore(list_id);
984 change_detected_ = true;
987 if (chunks.front().is_add) {
988 InsertAddChunks(list_id, chunks);
990 InsertSubChunks(list_id, chunks);
992 store->FinishChunk();
994 UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before);
997 void SafeBrowsingDatabaseNew::DeleteChunks(
998 const std::vector<SBChunkDelete>& chunk_deletes) {
999 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1001 if (corruption_detected_ || chunk_deletes.empty())
1004 const std::string& list_name = chunk_deletes.front().list_name;
1005 const safe_browsing_util::ListType list_id =
1006 safe_browsing_util::GetListId(list_name);
1008 SafeBrowsingStore* store = GetStore(list_id);
1011 change_detected_ = true;
1013 for (size_t i = 0; i < chunk_deletes.size(); ++i) {
1014 std::vector<int> chunk_numbers;
1015 RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers);
1016 for (size_t j = 0; j < chunk_numbers.size(); ++j) {
1017 const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id);
1018 if (chunk_deletes[i].is_sub_del)
1019 store->DeleteSubChunk(encoded_chunk_id);
1021 store->DeleteAddChunk(encoded_chunk_id);
1026 void SafeBrowsingDatabaseNew::CacheHashResults(
1027 const std::vector<SBPrefix>& prefixes,
1028 const std::vector<SBFullHashResult>& full_hits) {
1029 // This is called on the I/O thread, lock against updates.
1030 base::AutoLock locked(lookup_lock_);
1032 if (full_hits.empty()) {
1033 prefix_miss_cache_.insert(prefixes.begin(), prefixes.end());
1037 const base::Time now = base::Time::Now();
1038 const size_t orig_size = cached_browse_hashes_.size();
1039 for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin();
1040 iter != full_hits.end(); ++iter) {
1041 if (iter->list_id == safe_browsing_util::MALWARE ||
1042 iter->list_id == safe_browsing_util::PHISH) {
1043 SBFullHashCached cached_hash;
1044 cached_hash.hash = iter->hash;
1045 cached_hash.list_id = iter->list_id;
1046 cached_hash.received = static_cast<int>(now.ToTimeT());
1047 cached_browse_hashes_.push_back(cached_hash);
1051 // Sort new entries then merge with the previously-sorted entries.
1052 std::vector<SBFullHashCached>::iterator
1053 orig_end = cached_browse_hashes_.begin() + orig_size;
1054 std::sort(orig_end, cached_browse_hashes_.end(), SBFullHashCachedPrefixLess);
1055 std::inplace_merge(cached_browse_hashes_.begin(),
1056 orig_end, cached_browse_hashes_.end(),
1057 SBFullHashCachedPrefixLess);
1060 bool SafeBrowsingDatabaseNew::UpdateStarted(
1061 std::vector<SBListChunkRanges>* lists) {
1062 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1065 // If |BeginUpdate()| fails, reset the database.
1066 if (!browse_store_->BeginUpdate()) {
1067 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN);
1068 HandleCorruptDatabase();
1072 if (download_store_.get() && !download_store_->BeginUpdate()) {
1073 RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN);
1074 HandleCorruptDatabase();
1078 if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) {
1079 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
1080 HandleCorruptDatabase();
1084 if (download_whitelist_store_.get() &&
1085 !download_whitelist_store_->BeginUpdate()) {
1086 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
1087 HandleCorruptDatabase();
1091 if (extension_blacklist_store_ &&
1092 !extension_blacklist_store_->BeginUpdate()) {
1093 RecordFailure(FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN);
1094 HandleCorruptDatabase();
1098 if (side_effect_free_whitelist_store_ &&
1099 !side_effect_free_whitelist_store_->BeginUpdate()) {
1100 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN);
1101 HandleCorruptDatabase();
1105 if (ip_blacklist_store_ && !ip_blacklist_store_->BeginUpdate()) {
1106 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN);
1107 HandleCorruptDatabase();
1111 UpdateChunkRangesForLists(browse_store_.get(),
1112 safe_browsing_util::kMalwareList,
1113 safe_browsing_util::kPhishingList,
1116 // NOTE(shess): |download_store_| used to contain kBinHashList, which has been
1117 // deprecated. Code to delete the list from the store shows ~15k hits/day as
1118 // of Feb 2014, so it has been removed. Everything _should_ be resilient to
1119 // extra data of that sort.
1120 UpdateChunkRangesForList(download_store_.get(),
1121 safe_browsing_util::kBinUrlList, lists);
1123 UpdateChunkRangesForList(csd_whitelist_store_.get(),
1124 safe_browsing_util::kCsdWhiteList, lists);
1126 UpdateChunkRangesForList(download_whitelist_store_.get(),
1127 safe_browsing_util::kDownloadWhiteList, lists);
1129 UpdateChunkRangesForList(extension_blacklist_store_.get(),
1130 safe_browsing_util::kExtensionBlacklist, lists);
1132 UpdateChunkRangesForList(side_effect_free_whitelist_store_.get(),
1133 safe_browsing_util::kSideEffectFreeWhitelist, lists);
1135 UpdateChunkRangesForList(ip_blacklist_store_.get(),
1136 safe_browsing_util::kIPBlacklist, lists);
1138 corruption_detected_ = false;
1139 change_detected_ = false;
1143 void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) {
1144 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1146 // The update may have failed due to corrupt storage (for instance,
1147 // an excessive number of invalid add_chunks and sub_chunks).
1148 // Double-check that the databases are valid.
1149 // TODO(shess): Providing a checksum for the add_chunk and sub_chunk
1150 // sections would allow throwing a corruption error in
1152 if (!update_succeeded) {
1153 if (!browse_store_->CheckValidity())
1154 DLOG(ERROR) << "Safe-browsing browse database corrupt.";
1156 if (download_store_.get() && !download_store_->CheckValidity())
1157 DLOG(ERROR) << "Safe-browsing download database corrupt.";
1159 if (csd_whitelist_store_.get() && !csd_whitelist_store_->CheckValidity())
1160 DLOG(ERROR) << "Safe-browsing csd whitelist database corrupt.";
1162 if (download_whitelist_store_.get() &&
1163 !download_whitelist_store_->CheckValidity()) {
1164 DLOG(ERROR) << "Safe-browsing download whitelist database corrupt.";
1167 if (extension_blacklist_store_ &&
1168 !extension_blacklist_store_->CheckValidity()) {
1169 DLOG(ERROR) << "Safe-browsing extension blacklist database corrupt.";
1172 if (side_effect_free_whitelist_store_ &&
1173 !side_effect_free_whitelist_store_->CheckValidity()) {
1174 DLOG(ERROR) << "Safe-browsing side-effect free whitelist database "
1178 if (ip_blacklist_store_ && !ip_blacklist_store_->CheckValidity()) {
1179 DLOG(ERROR) << "Safe-browsing IP blacklist database corrupt.";
1183 if (corruption_detected_)
1186 // Unroll the transaction if there was a protocol error or if the
1187 // transaction was empty. This will leave the prefix set, the
1188 // pending hashes, and the prefix miss cache in place.
1189 if (!update_succeeded || !change_detected_) {
1190 // Track empty updates to answer questions at http://crbug.com/72216 .
1191 if (update_succeeded && !change_detected_)
1192 UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0);
1193 browse_store_->CancelUpdate();
1194 if (download_store_.get())
1195 download_store_->CancelUpdate();
1196 if (csd_whitelist_store_.get())
1197 csd_whitelist_store_->CancelUpdate();
1198 if (download_whitelist_store_.get())
1199 download_whitelist_store_->CancelUpdate();
1200 if (extension_blacklist_store_)
1201 extension_blacklist_store_->CancelUpdate();
1202 if (side_effect_free_whitelist_store_)
1203 side_effect_free_whitelist_store_->CancelUpdate();
1204 if (ip_blacklist_store_)
1205 ip_blacklist_store_->CancelUpdate();
1209 if (download_store_) {
1210 int64 size_bytes = UpdateHashPrefixStore(
1212 download_store_.get(),
1213 FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH);
1214 UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
1215 static_cast<int>(size_bytes / 1024));
1218 UpdateBrowseStore();
1219 UpdateWhitelistStore(csd_whitelist_filename_,
1220 csd_whitelist_store_.get(),
1222 UpdateWhitelistStore(download_whitelist_filename_,
1223 download_whitelist_store_.get(),
1224 &download_whitelist_);
1226 if (extension_blacklist_store_) {
1227 int64 size_bytes = UpdateHashPrefixStore(
1228 extension_blacklist_filename_,
1229 extension_blacklist_store_.get(),
1230 FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH);
1231 UMA_HISTOGRAM_COUNTS("SB2.ExtensionBlacklistKilobytes",
1232 static_cast<int>(size_bytes / 1024));
1235 if (side_effect_free_whitelist_store_)
1236 UpdateSideEffectFreeWhitelistStore();
1238 if (ip_blacklist_store_)
1239 UpdateIpBlacklistStore();
1242 void SafeBrowsingDatabaseNew::UpdateWhitelistStore(
1243 const base::FilePath& store_filename,
1244 SafeBrowsingStore* store,
1245 SBWhitelist* whitelist) {
1249 // Note: |builder| will not be empty. The current data store implementation
1250 // stores all full-length hashes as both full and prefix hashes.
1251 safe_browsing::PrefixSetBuilder builder;
1252 std::vector<SBAddFullHash> full_hashes;
1253 if (!store->FinishUpdate(&builder, &full_hashes)) {
1254 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH);
1255 WhitelistEverything(whitelist);
1259 #if defined(OS_MACOSX)
1260 base::mac::SetFileBackupExclusion(store_filename);
1263 LoadWhitelist(full_hashes, whitelist);
1266 int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore(
1267 const base::FilePath& store_filename,
1268 SafeBrowsingStore* store,
1269 FailureType failure_type) {
1270 // These results are not used after this call. Simply ignore the
1271 // returned value after FinishUpdate(...).
1272 safe_browsing::PrefixSetBuilder builder;
1273 std::vector<SBAddFullHash> add_full_hashes_result;
1275 if (!store->FinishUpdate(&builder, &add_full_hashes_result))
1276 RecordFailure(failure_type);
1278 #if defined(OS_MACOSX)
1279 base::mac::SetFileBackupExclusion(store_filename);
1282 return GetFileSizeOrZero(store_filename);
1285 void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
1286 // Measure the amount of IO during the filter build.
1287 base::IoCounters io_before, io_after;
1288 base::ProcessHandle handle = base::Process::Current().handle();
1289 scoped_ptr<base::ProcessMetrics> metric(
1290 #if !defined(OS_MACOSX)
1291 base::ProcessMetrics::CreateProcessMetrics(handle)
1293 // Getting stats only for the current process is enough, so NULL is fine.
1294 base::ProcessMetrics::CreateProcessMetrics(handle, NULL)
1298 // IoCounters are currently not supported on Mac, and may not be
1299 // available for Linux, so we check the result and only show IO
1300 // stats if they are available.
1301 const bool got_counters = metric->GetIOCounters(&io_before);
1303 const base::TimeTicks before = base::TimeTicks::Now();
1305 // TODO(shess): Perhaps refactor to let builder accumulate full hashes on the
1306 // fly? Other clients use the SBAddFullHash vector, but AFAICT they only use
1307 // the SBFullHash portion. It would need an accessor on PrefixSet.
1308 safe_browsing::PrefixSetBuilder builder;
1309 std::vector<SBAddFullHash> add_full_hashes;
1310 if (!browse_store_->FinishUpdate(&builder, &add_full_hashes)) {
1311 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
1315 std::vector<SBFullHash> full_hash_results;
1316 for (size_t i = 0; i < add_full_hashes.size(); ++i) {
1317 full_hash_results.push_back(add_full_hashes[i].full_hash);
1320 scoped_ptr<safe_browsing::PrefixSet>
1321 prefix_set(builder.GetPrefixSet(full_hash_results));
1323 // Swap in the newly built filter and cache.
1325 base::AutoLock locked(lookup_lock_);
1327 // TODO(shess): If |CacheHashResults()| is posted between the
1328 // earlier lock and this clear, those pending hashes will be lost.
1329 // It could be fixed by only removing hashes which were collected
1330 // at the earlier point. I believe that is fail-safe as-is (the
1331 // hash will be fetched again).
1332 cached_browse_hashes_.clear();
1333 prefix_miss_cache_.clear();
1334 browse_prefix_set_.swap(prefix_set);
1337 DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in "
1338 << (base::TimeTicks::Now() - before).InMilliseconds()
1340 UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before);
1342 // Persist the prefix set to disk. Since only this thread changes
1343 // |browse_prefix_set_|, there is no need to lock.
1346 // Gather statistics.
1347 if (got_counters && metric->GetIOCounters(&io_after)) {
1348 UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes",
1349 static_cast<int>(io_after.ReadTransferCount -
1350 io_before.ReadTransferCount) / 1024);
1351 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes",
1352 static_cast<int>(io_after.WriteTransferCount -
1353 io_before.WriteTransferCount) / 1024);
1354 UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
1355 static_cast<int>(io_after.ReadOperationCount -
1356 io_before.ReadOperationCount));
1357 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
1358 static_cast<int>(io_after.WriteOperationCount -
1359 io_before.WriteOperationCount));
1362 int64 file_size = GetFileSizeOrZero(browse_prefix_set_filename_);
1363 UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes",
1364 static_cast<int>(file_size / 1024));
1365 file_size = GetFileSizeOrZero(browse_filename_);
1366 UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
1367 static_cast<int>(file_size / 1024));
1369 #if defined(OS_MACOSX)
1370 base::mac::SetFileBackupExclusion(browse_filename_);
1374 void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() {
1375 safe_browsing::PrefixSetBuilder builder;
1376 std::vector<SBAddFullHash> add_full_hashes_result;
1378 if (!side_effect_free_whitelist_store_->FinishUpdate(
1379 &builder, &add_full_hashes_result)) {
1380 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH);
1383 scoped_ptr<safe_browsing::PrefixSet>
1384 prefix_set(builder.GetPrefixSetNoHashes());
1386 // Swap in the newly built prefix set.
1388 base::AutoLock locked(lookup_lock_);
1389 side_effect_free_whitelist_prefix_set_.swap(prefix_set);
1392 const base::TimeTicks before = base::TimeTicks::Now();
1393 const bool write_ok = side_effect_free_whitelist_prefix_set_->WriteFile(
1394 side_effect_free_whitelist_prefix_set_filename_);
1395 DVLOG(1) << "SafeBrowsingDatabaseNew wrote side-effect free whitelist prefix "
1396 << "set in " << (base::TimeTicks::Now() - before).InMilliseconds()
1398 UMA_HISTOGRAM_TIMES("SB2.SideEffectFreePrefixSetWrite",
1399 base::TimeTicks::Now() - before);
1402 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE);
1404 // Gather statistics.
1405 int64 file_size = GetFileSizeOrZero(
1406 side_effect_free_whitelist_prefix_set_filename_);
1407 UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistPrefixSetKilobytes",
1408 static_cast<int>(file_size / 1024));
1409 file_size = GetFileSizeOrZero(side_effect_free_whitelist_filename_);
1410 UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistDatabaseKilobytes",
1411 static_cast<int>(file_size / 1024));
1413 #if defined(OS_MACOSX)
1414 base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename_);
1415 base::mac::SetFileBackupExclusion(
1416 side_effect_free_whitelist_prefix_set_filename_);
1420 void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() {
1421 // Note: prefixes will not be empty. The current data store implementation
1422 // stores all full-length hashes as both full and prefix hashes.
1423 safe_browsing::PrefixSetBuilder builder;
1424 std::vector<SBAddFullHash> full_hashes;
1425 if (!ip_blacklist_store_->FinishUpdate(&builder, &full_hashes)) {
1426 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH);
1427 LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list.
1431 #if defined(OS_MACOSX)
1432 base::mac::SetFileBackupExclusion(ip_blacklist_filename_);
1435 LoadIpBlacklist(full_hashes);
1438 void SafeBrowsingDatabaseNew::HandleCorruptDatabase() {
1439 // Reset the database after the current task has unwound (but only
1440 // reset once within the scope of a given task).
1441 if (!reset_factory_.HasWeakPtrs()) {
1442 RecordFailure(FAILURE_DATABASE_CORRUPT);
1443 base::MessageLoop::current()->PostTask(FROM_HERE,
1444 base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase,
1445 reset_factory_.GetWeakPtr()));
1449 void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
1450 RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER);
1451 corruption_detected_ = true; // Stop updating the database.
1453 DLOG(FATAL) << "SafeBrowsing database was corrupt and reset";
1456 // TODO(shess): I'm not clear why this code doesn't have any
1457 // real error-handling.
1458 void SafeBrowsingDatabaseNew::LoadPrefixSet() {
1459 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1460 DCHECK(!browse_prefix_set_filename_.empty());
1462 // If there is no database, the filter cannot be used.
1463 base::File::Info db_info;
1464 if (!base::GetFileInfo(browse_filename_, &db_info) || db_info.size == 0)
1467 // Cleanup any stale bloom filter (no longer used).
1468 // TODO(shess): Track failure to delete?
1469 base::FilePath bloom_filter_filename =
1470 BloomFilterForFilename(browse_filename_);
1471 base::DeleteFile(bloom_filter_filename, false);
1473 const base::TimeTicks before = base::TimeTicks::Now();
1474 browse_prefix_set_ = safe_browsing::PrefixSet::LoadFile(
1475 browse_prefix_set_filename_);
1476 DVLOG(1) << "SafeBrowsingDatabaseNew read prefix set in "
1477 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
1478 UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before);
1480 if (!browse_prefix_set_.get())
1481 RecordFailure(FAILURE_BROWSE_PREFIX_SET_READ);
1484 bool SafeBrowsingDatabaseNew::Delete() {
1485 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1487 const bool r1 = browse_store_->Delete();
1489 RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1491 const bool r2 = download_store_.get() ? download_store_->Delete() : true;
1493 RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1495 const bool r3 = csd_whitelist_store_.get() ?
1496 csd_whitelist_store_->Delete() : true;
1498 RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1500 const bool r4 = download_whitelist_store_.get() ?
1501 download_whitelist_store_->Delete() : true;
1503 RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1505 base::FilePath bloom_filter_filename =
1506 BloomFilterForFilename(browse_filename_);
1507 const bool r5 = base::DeleteFile(bloom_filter_filename, false);
1509 RecordFailure(FAILURE_DATABASE_FILTER_DELETE);
1511 const bool r6 = base::DeleteFile(browse_prefix_set_filename_, false);
1513 RecordFailure(FAILURE_BROWSE_PREFIX_SET_DELETE);
1515 const bool r7 = base::DeleteFile(extension_blacklist_filename_, false);
1517 RecordFailure(FAILURE_EXTENSION_BLACKLIST_DELETE);
1519 const bool r8 = base::DeleteFile(side_effect_free_whitelist_filename_,
1522 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE);
1524 const bool r9 = base::DeleteFile(
1525 side_effect_free_whitelist_prefix_set_filename_,
1528 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE);
1530 const bool r10 = base::DeleteFile(ip_blacklist_filename_, false);
1532 RecordFailure(FAILURE_IP_BLACKLIST_DELETE);
1534 return r1 && r2 && r3 && r4 && r5 && r6 && r7 && r8 && r9 && r10;
1537 void SafeBrowsingDatabaseNew::WritePrefixSet() {
1538 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1540 if (!browse_prefix_set_.get())
1543 const base::TimeTicks before = base::TimeTicks::Now();
1544 const bool write_ok = browse_prefix_set_->WriteFile(
1545 browse_prefix_set_filename_);
1546 DVLOG(1) << "SafeBrowsingDatabaseNew wrote prefix set in "
1547 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
1548 UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before);
1551 RecordFailure(FAILURE_BROWSE_PREFIX_SET_WRITE);
1553 #if defined(OS_MACOSX)
1554 base::mac::SetFileBackupExclusion(browse_prefix_set_filename_);
1558 void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist* whitelist) {
1559 base::AutoLock locked(lookup_lock_);
1560 whitelist->second = true;
1561 whitelist->first.clear();
1564 void SafeBrowsingDatabaseNew::LoadWhitelist(
1565 const std::vector<SBAddFullHash>& full_hashes,
1566 SBWhitelist* whitelist) {
1567 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1568 if (full_hashes.size() > kMaxWhitelistSize) {
1569 WhitelistEverything(whitelist);
1573 std::vector<SBFullHash> new_whitelist;
1574 new_whitelist.reserve(full_hashes.size());
1575 for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
1576 it != full_hashes.end(); ++it) {
1577 new_whitelist.push_back(it->full_hash);
1579 std::sort(new_whitelist.begin(), new_whitelist.end(), SBFullHashLess);
1581 SBFullHash kill_switch = SBFullHashForString(kWhitelistKillSwitchUrl);
1582 if (std::binary_search(new_whitelist.begin(), new_whitelist.end(),
1583 kill_switch, SBFullHashLess)) {
1584 // The kill switch is whitelisted hence we whitelist all URLs.
1585 WhitelistEverything(whitelist);
1587 base::AutoLock locked(lookup_lock_);
1588 whitelist->second = false;
1589 whitelist->first.swap(new_whitelist);
1593 void SafeBrowsingDatabaseNew::LoadIpBlacklist(
1594 const std::vector<SBAddFullHash>& full_hashes) {
1595 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1596 IPBlacklist new_blacklist;
1597 DVLOG(2) << "Writing IP blacklist of size: " << full_hashes.size();
1598 for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
1599 it != full_hashes.end();
1601 const char* full_hash = it->full_hash.full_hash;
1602 DCHECK_EQ(crypto::kSHA256Length, arraysize(it->full_hash.full_hash));
1603 // The format of the IP blacklist is:
1604 // SHA-1(IPv6 prefix) + uint8(prefix size) + 11 unused bytes.
1605 std::string hashed_ip_prefix(full_hash, base::kSHA1Length);
1606 size_t prefix_size = static_cast<uint8>(full_hash[base::kSHA1Length]);
1607 if (prefix_size > kMaxIpPrefixSize || prefix_size < kMinIpPrefixSize) {
1608 DVLOG(2) << "Invalid IP prefix size in IP blacklist: " << prefix_size;
1609 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_INVALID);
1610 new_blacklist.clear(); // Load empty blacklist.
1614 // We precompute the mask for the given subnet size to speed up lookups.
1615 // Basically we need to create a 16B long string which has the highest
1616 // |size| bits sets to one.
1617 std::string mask(net::kIPv6AddressSize, '\0');
1618 mask.replace(0, prefix_size / 8, prefix_size / 8, '\xFF');
1619 if ((prefix_size % 8) != 0) {
1620 mask[prefix_size / 8] = 0xFF << (8 - (prefix_size % 8));
1622 DVLOG(2) << "Inserting malicious IP: "
1623 << " raw:" << base::HexEncode(full_hash, crypto::kSHA256Length)
1624 << " mask:" << base::HexEncode(mask.data(), mask.size())
1625 << " prefix_size:" << prefix_size
1626 << " hashed_ip:" << base::HexEncode(hashed_ip_prefix.data(),
1627 hashed_ip_prefix.size());
1628 new_blacklist[mask].insert(hashed_ip_prefix);
1631 base::AutoLock locked(lookup_lock_);
1632 ip_blacklist_.swap(new_blacklist);
1635 bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() {
1636 SBFullHash malware_kill_switch = SBFullHashForString(kMalwareIPKillSwitchUrl);
1637 std::vector<SBFullHash> full_hashes;
1638 full_hashes.push_back(malware_kill_switch);
1639 return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);