1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/safe_browsing/safe_browsing_database.h"
10 #include "base/bind.h"
11 #include "base/files/file_util.h"
12 #include "base/message_loop/message_loop.h"
13 #include "base/metrics/histogram.h"
14 #include "base/metrics/stats_counters.h"
15 #include "base/process/process_handle.h"
16 #include "base/process/process_metrics.h"
17 #include "base/sha1.h"
18 #include "base/strings/string_number_conversions.h"
19 #include "base/strings/stringprintf.h"
20 #include "base/time/time.h"
21 #include "chrome/browser/safe_browsing/prefix_set.h"
22 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
23 #include "content/public/browser/browser_thread.h"
24 #include "crypto/sha2.h"
25 #include "net/base/net_util.h"
28 #if defined(OS_MACOSX)
29 #include "base/mac/mac_util.h"
32 using content::BrowserThread;
36 // Filename suffix for the bloom filter.
37 const base::FilePath::CharType kBloomFilterFile[] =
38 FILE_PATH_LITERAL(" Filter 2");
39 // Filename suffix for the prefix set.
40 const base::FilePath::CharType kPrefixSetFile[] =
41 FILE_PATH_LITERAL(" Prefix Set");
42 // Filename suffix for download store.
43 const base::FilePath::CharType kDownloadDBFile[] =
44 FILE_PATH_LITERAL(" Download");
45 // Filename suffix for client-side phishing detection whitelist store.
46 const base::FilePath::CharType kCsdWhitelistDBFile[] =
47 FILE_PATH_LITERAL(" Csd Whitelist");
48 // Filename suffix for the download whitelist store.
49 const base::FilePath::CharType kDownloadWhitelistDBFile[] =
50 FILE_PATH_LITERAL(" Download Whitelist");
51 // Filename suffix for the extension blacklist store.
52 const base::FilePath::CharType kExtensionBlacklistDBFile[] =
53 FILE_PATH_LITERAL(" Extension Blacklist");
54 // Filename suffix for the side-effect free whitelist store.
55 const base::FilePath::CharType kSideEffectFreeWhitelistDBFile[] =
56 FILE_PATH_LITERAL(" Side-Effect Free Whitelist");
57 // Filename suffix for the csd malware IP blacklist store.
58 const base::FilePath::CharType kIPBlacklistDBFile[] =
59 FILE_PATH_LITERAL(" IP Blacklist");
60 // Filename suffix for the unwanted software blacklist store.
61 const base::FilePath::CharType kUnwantedSoftwareDBFile[] =
62 FILE_PATH_LITERAL(" UwS List");
64 // Filename suffix for browse store.
65 // TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win.
66 // Unfortunately, to change the name implies lots of transition code
67 // for little benefit. If/when file formats change (say to put all
68 // the data in one file), that would be a convenient point to rectify
70 // TODO(shess): This shouldn't be OS-driven <http://crbug.com/394379>
71 #if defined(OS_ANDROID)
72 // NOTE(shess): This difference is also reflected in the list name in
73 // safe_browsing_util.cc.
74 // TODO(shess): Spin up an alternate list id which can be persisted in the
75 // store. Then if a mistake is made, it won't cause confusion between
76 // incompatible lists.
77 const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Mobile");
79 const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
82 // Maximum number of entries we allow in any of the whitelists.
83 // If a whitelist on disk contains more entries then all lookups to
84 // the whitelist will be considered a match.
85 const size_t kMaxWhitelistSize = 5000;
87 // If the hash of this exact expression is on a whitelist then all
88 // lookups to this whitelist will be considered a match.
89 const char kWhitelistKillSwitchUrl[] =
90 "sb-ssl.google.com/safebrowsing/csd/killswitch"; // Don't change this!
92 // If the hash of this exact expression is on a whitelist then the
93 // malware IP blacklisting feature will be disabled in csd.
95 const char kMalwareIPKillSwitchUrl[] =
96 "sb-ssl.google.com/safebrowsing/csd/killswitch_malware";
98 const size_t kMaxIpPrefixSize = 128;
99 const size_t kMinIpPrefixSize = 1;
101 // To save space, the incoming |chunk_id| and |list_id| are combined
102 // into an |encoded_chunk_id| for storage by shifting the |list_id|
103 // into the low-order bits. These functions decode that information.
104 // TODO(lzheng): It was reasonable when database is saved in sqlite, but
105 // there should be better ways to save chunk_id and list_id after we use
106 // SafeBrowsingStoreFile.
107 int GetListIdBit(const int encoded_chunk_id) {
108 return encoded_chunk_id & 1;
110 int DecodeChunkId(int encoded_chunk_id) {
111 return encoded_chunk_id >> 1;
113 int EncodeChunkId(const int chunk, const int list_id) {
114 DCHECK_NE(list_id, safe_browsing_util::INVALID);
115 return chunk << 1 | list_id % 2;
118 // Generate the set of full hashes to check for |url|. If
119 // |include_whitelist_hashes| is true we will generate additional path-prefixes
120 // to match against the csd whitelist. E.g., if the path-prefix /foo is on the
121 // whitelist it should also match /foo/bar which is not the case for all the
122 // other lists. We'll also always add a pattern for the empty path.
123 // TODO(shess): This function is almost the same as
124 // |CompareFullHashes()| in safe_browsing_util.cc, except that code
125 // does an early exit on match. Since match should be the infrequent
126 // case (phishing or malware found), consider combining this function
128 void UrlToFullHashes(const GURL& url,
129 bool include_whitelist_hashes,
130 std::vector<SBFullHash>* full_hashes) {
131 std::vector<std::string> hosts;
132 if (url.HostIsIPAddress()) {
133 hosts.push_back(url.host());
135 safe_browsing_util::GenerateHostsToCheck(url, &hosts);
138 std::vector<std::string> paths;
139 safe_browsing_util::GeneratePathsToCheck(url, &paths);
141 for (size_t i = 0; i < hosts.size(); ++i) {
142 for (size_t j = 0; j < paths.size(); ++j) {
143 const std::string& path = paths[j];
144 full_hashes->push_back(SBFullHashForString(hosts[i] + path));
146 // We may have /foo as path-prefix in the whitelist which should
147 // also match with /foo/bar and /foo?bar. Hence, for every path
148 // that ends in '/' we also add the path without the slash.
149 if (include_whitelist_hashes &&
151 path[path.size() - 1] == '/') {
152 full_hashes->push_back(
153 SBFullHashForString(hosts[i] + path.substr(0, path.size() - 1)));
159 // Get the prefixes matching the download |urls|.
160 void GetDownloadUrlPrefixes(const std::vector<GURL>& urls,
161 std::vector<SBPrefix>* prefixes) {
162 std::vector<SBFullHash> full_hashes;
163 for (size_t i = 0; i < urls.size(); ++i)
164 UrlToFullHashes(urls[i], false, &full_hashes);
166 for (size_t i = 0; i < full_hashes.size(); ++i)
167 prefixes->push_back(full_hashes[i].prefix);
170 // Helper function to compare addprefixes in |store| with |prefixes|.
171 // The |list_bit| indicates which list (url or hash) to compare.
173 // Returns true if there is a match, |*prefix_hits| (if non-NULL) will contain
174 // the actual matching prefixes.
175 bool MatchAddPrefixes(SafeBrowsingStore* store,
177 const std::vector<SBPrefix>& prefixes,
178 std::vector<SBPrefix>* prefix_hits) {
179 prefix_hits->clear();
180 bool found_match = false;
182 SBAddPrefixes add_prefixes;
183 store->GetAddPrefixes(&add_prefixes);
184 for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
185 iter != add_prefixes.end(); ++iter) {
186 for (size_t j = 0; j < prefixes.size(); ++j) {
187 const SBPrefix& prefix = prefixes[j];
188 if (prefix == iter->prefix &&
189 GetListIdBit(iter->chunk_id) == list_bit) {
190 prefix_hits->push_back(prefix);
198 // This function generates a chunk range string for |chunks|. It
199 // outputs one chunk range string per list and writes it to the
200 // |list_ranges| vector. We expect |list_ranges| to already be of the
201 // right size. E.g., if |chunks| contains chunks with two different
202 // list ids then |list_ranges| must contain two elements.
203 void GetChunkRanges(const std::vector<int>& chunks,
204 std::vector<std::string>* list_ranges) {
205 // Since there are 2 possible list ids, there must be exactly two
206 // list ranges. Even if the chunk data should only contain one
207 // line, this code has to somehow handle corruption.
208 DCHECK_EQ(2U, list_ranges->size());
210 std::vector<std::vector<int> > decoded_chunks(list_ranges->size());
211 for (std::vector<int>::const_iterator iter = chunks.begin();
212 iter != chunks.end(); ++iter) {
213 int mod_list_id = GetListIdBit(*iter);
214 DCHECK_GE(mod_list_id, 0);
215 DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size());
216 decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter));
218 for (size_t i = 0; i < decoded_chunks.size(); ++i) {
219 ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i]));
223 // Helper function to create chunk range lists for Browse related
225 void UpdateChunkRanges(SafeBrowsingStore* store,
226 const std::vector<std::string>& listnames,
227 std::vector<SBListChunkRanges>* lists) {
231 DCHECK_GT(listnames.size(), 0U);
232 DCHECK_LE(listnames.size(), 2U);
233 std::vector<int> add_chunks;
234 std::vector<int> sub_chunks;
235 store->GetAddChunks(&add_chunks);
236 store->GetSubChunks(&sub_chunks);
238 // Always decode 2 ranges, even if only the first one is expected.
239 // The loop below will only load as many into |lists| as |listnames|
241 std::vector<std::string> adds(2);
242 std::vector<std::string> subs(2);
243 GetChunkRanges(add_chunks, &adds);
244 GetChunkRanges(sub_chunks, &subs);
246 for (size_t i = 0; i < listnames.size(); ++i) {
247 const std::string& listname = listnames[i];
248 DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2,
249 static_cast<int>(i % 2));
250 DCHECK_NE(safe_browsing_util::GetListId(listname),
251 safe_browsing_util::INVALID);
252 lists->push_back(SBListChunkRanges(listname));
253 lists->back().adds.swap(adds[i]);
254 lists->back().subs.swap(subs[i]);
258 void UpdateChunkRangesForLists(SafeBrowsingStore* store,
259 const std::string& listname0,
260 const std::string& listname1,
261 std::vector<SBListChunkRanges>* lists) {
262 std::vector<std::string> listnames;
263 listnames.push_back(listname0);
264 listnames.push_back(listname1);
265 UpdateChunkRanges(store, listnames, lists);
268 void UpdateChunkRangesForList(SafeBrowsingStore* store,
269 const std::string& listname,
270 std::vector<SBListChunkRanges>* lists) {
271 UpdateChunkRanges(store, std::vector<std::string>(1, listname), lists);
274 // This code always checks for non-zero file size. This helper makes
275 // that less verbose.
276 int64 GetFileSizeOrZero(const base::FilePath& file_path) {
278 if (!base::GetFileSize(file_path, &size_64))
283 // Helper for PrefixSetContainsUrlHashes(). Returns true if an un-expired match
284 // for |full_hash| is found in |cache|, with any matches appended to |results|
285 // (true can be returned with zero matches). |expire_base| is used to check the
286 // cache lifetime of matches, expired matches will be discarded from |cache|.
287 bool GetCachedFullHash(std::map<SBPrefix, SBCachedFullHashResult>* cache,
288 const SBFullHash& full_hash,
289 const base::Time& expire_base,
290 std::vector<SBFullHashResult>* results) {
291 // First check if there is a valid cached result for this prefix.
292 std::map<SBPrefix, SBCachedFullHashResult>::iterator
293 citer = cache->find(full_hash.prefix);
294 if (citer == cache->end())
297 // Remove expired entries.
298 SBCachedFullHashResult& cached_result = citer->second;
299 if (cached_result.expire_after <= expire_base) {
304 // Find full-hash matches.
305 std::vector<SBFullHashResult>& cached_hashes = cached_result.full_hashes;
306 for (size_t i = 0; i < cached_hashes.size(); ++i) {
307 if (SBFullHashEqual(full_hash, cached_hashes[i].hash))
308 results->push_back(cached_hashes[i]);
316 // The default SafeBrowsingDatabaseFactory.
317 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
319 SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
320 bool enable_download_protection,
321 bool enable_client_side_whitelist,
322 bool enable_download_whitelist,
323 bool enable_extension_blacklist,
324 bool enable_side_effect_free_whitelist,
325 bool enable_ip_blacklist,
326 bool enable_unwanted_software_list) override {
327 return new SafeBrowsingDatabaseNew(
328 new SafeBrowsingStoreFile,
329 enable_download_protection ? new SafeBrowsingStoreFile : NULL,
330 enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL,
331 enable_download_whitelist ? new SafeBrowsingStoreFile : NULL,
332 enable_extension_blacklist ? new SafeBrowsingStoreFile : NULL,
333 enable_side_effect_free_whitelist ? new SafeBrowsingStoreFile : NULL,
334 enable_ip_blacklist ? new SafeBrowsingStoreFile : NULL,
335 enable_unwanted_software_list ? new SafeBrowsingStoreFile : NULL);
338 SafeBrowsingDatabaseFactoryImpl() { }
341 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl);
345 SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL;
347 // Factory method, non-thread safe. Caller has to make sure this s called
348 // on SafeBrowsing Thread.
349 // TODO(shess): There's no need for a factory any longer. Convert
350 // SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create()
351 // callers just construct things directly.
352 SafeBrowsingDatabase* SafeBrowsingDatabase::Create(
353 bool enable_download_protection,
354 bool enable_client_side_whitelist,
355 bool enable_download_whitelist,
356 bool enable_extension_blacklist,
357 bool enable_side_effect_free_whitelist,
358 bool enable_ip_blacklist,
359 bool enable_unwanted_software_list) {
361 factory_ = new SafeBrowsingDatabaseFactoryImpl();
362 return factory_->CreateSafeBrowsingDatabase(enable_download_protection,
363 enable_client_side_whitelist,
364 enable_download_whitelist,
365 enable_extension_blacklist,
366 enable_side_effect_free_whitelist,
368 enable_unwanted_software_list);
371 SafeBrowsingDatabase::~SafeBrowsingDatabase() {
375 base::FilePath SafeBrowsingDatabase::BrowseDBFilename(
376 const base::FilePath& db_base_filename) {
377 return base::FilePath(db_base_filename.value() + kBrowseDBFile);
381 base::FilePath SafeBrowsingDatabase::DownloadDBFilename(
382 const base::FilePath& db_base_filename) {
383 return base::FilePath(db_base_filename.value() + kDownloadDBFile);
387 base::FilePath SafeBrowsingDatabase::BloomFilterForFilename(
388 const base::FilePath& db_filename) {
389 return base::FilePath(db_filename.value() + kBloomFilterFile);
393 base::FilePath SafeBrowsingDatabase::PrefixSetForFilename(
394 const base::FilePath& db_filename) {
395 return base::FilePath(db_filename.value() + kPrefixSetFile);
399 base::FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename(
400 const base::FilePath& db_filename) {
401 return base::FilePath(db_filename.value() + kCsdWhitelistDBFile);
405 base::FilePath SafeBrowsingDatabase::DownloadWhitelistDBFilename(
406 const base::FilePath& db_filename) {
407 return base::FilePath(db_filename.value() + kDownloadWhitelistDBFile);
411 base::FilePath SafeBrowsingDatabase::ExtensionBlacklistDBFilename(
412 const base::FilePath& db_filename) {
413 return base::FilePath(db_filename.value() + kExtensionBlacklistDBFile);
417 base::FilePath SafeBrowsingDatabase::SideEffectFreeWhitelistDBFilename(
418 const base::FilePath& db_filename) {
419 return base::FilePath(db_filename.value() + kSideEffectFreeWhitelistDBFile);
423 base::FilePath SafeBrowsingDatabase::IpBlacklistDBFilename(
424 const base::FilePath& db_filename) {
425 return base::FilePath(db_filename.value() + kIPBlacklistDBFile);
429 base::FilePath SafeBrowsingDatabase::UnwantedSoftwareDBFilename(
430 const base::FilePath& db_filename) {
431 return base::FilePath(db_filename.value() + kUnwantedSoftwareDBFile);
434 SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) {
435 if (list_id == safe_browsing_util::PHISH ||
436 list_id == safe_browsing_util::MALWARE) {
437 return browse_store_.get();
438 } else if (list_id == safe_browsing_util::BINURL) {
439 return download_store_.get();
440 } else if (list_id == safe_browsing_util::CSDWHITELIST) {
441 return csd_whitelist_store_.get();
442 } else if (list_id == safe_browsing_util::DOWNLOADWHITELIST) {
443 return download_whitelist_store_.get();
444 } else if (list_id == safe_browsing_util::EXTENSIONBLACKLIST) {
445 return extension_blacklist_store_.get();
446 } else if (list_id == safe_browsing_util::SIDEEFFECTFREEWHITELIST) {
447 return side_effect_free_whitelist_store_.get();
448 } else if (list_id == safe_browsing_util::IPBLACKLIST) {
449 return ip_blacklist_store_.get();
450 } else if (list_id == safe_browsing_util::UNWANTEDURL) {
451 return unwanted_software_store_.get();
457 void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) {
458 UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type,
459 FAILURE_DATABASE_MAX);
462 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew()
463 : creation_loop_(base::MessageLoop::current()),
464 browse_store_(new SafeBrowsingStoreFile),
465 corruption_detected_(false),
466 change_detected_(false),
467 reset_factory_(this) {
468 DCHECK(browse_store_.get());
469 DCHECK(!download_store_.get());
470 DCHECK(!csd_whitelist_store_.get());
471 DCHECK(!download_whitelist_store_.get());
472 DCHECK(!extension_blacklist_store_.get());
473 DCHECK(!side_effect_free_whitelist_store_.get());
474 DCHECK(!ip_blacklist_store_.get());
475 DCHECK(!unwanted_software_store_.get());
478 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew(
479 SafeBrowsingStore* browse_store,
480 SafeBrowsingStore* download_store,
481 SafeBrowsingStore* csd_whitelist_store,
482 SafeBrowsingStore* download_whitelist_store,
483 SafeBrowsingStore* extension_blacklist_store,
484 SafeBrowsingStore* side_effect_free_whitelist_store,
485 SafeBrowsingStore* ip_blacklist_store,
486 SafeBrowsingStore* unwanted_software_store)
487 : creation_loop_(base::MessageLoop::current()),
488 browse_store_(browse_store),
489 download_store_(download_store),
490 csd_whitelist_store_(csd_whitelist_store),
491 download_whitelist_store_(download_whitelist_store),
492 extension_blacklist_store_(extension_blacklist_store),
493 side_effect_free_whitelist_store_(side_effect_free_whitelist_store),
494 ip_blacklist_store_(ip_blacklist_store),
495 unwanted_software_store_(unwanted_software_store),
496 corruption_detected_(false),
497 reset_factory_(this) {
498 DCHECK(browse_store_.get());
501 SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() {
502 // The DCHECK is disabled due to crbug.com/338486 .
503 // DCHECK_EQ(creation_loop_, base::MessageLoop::current());
506 void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) {
507 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
509 // This should not be run multiple times.
510 DCHECK(filename_base_.empty());
512 filename_base_ = filename_base;
514 // TODO(shess): The various stores are really only necessary while doing
515 // updates (see |UpdateFinished()|) or when querying a store directly (see
516 // |ContainsDownloadUrl()|).
517 // The store variables are also tested to see if a list is enabled. Perhaps
518 // the stores could be refactored into an update object so that they are only
519 // live in memory while being actively used. The sense of enabled probably
520 // belongs in protocol_manager or database_manager.
523 BrowseDBFilename(filename_base_),
524 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
525 base::Unretained(this)));
527 if (unwanted_software_store_.get()) {
528 unwanted_software_store_->Init(
529 UnwantedSoftwareDBFilename(filename_base_),
530 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
531 base::Unretained(this)));
535 // NOTE: There is no need to grab the lock in this function, since
536 // until it returns, there are no pointers to this class on other
537 // threads. Then again, that means there is no possibility of
538 // contention on the lock...
539 base::AutoLock locked(lookup_lock_);
540 prefix_gethash_cache_.clear();
541 LoadPrefixSet(BrowseDBFilename(filename_base_),
543 FAILURE_BROWSE_PREFIX_SET_READ);
544 if (unwanted_software_store_.get()) {
545 LoadPrefixSet(UnwantedSoftwareDBFilename(filename_base_),
546 &unwanted_software_prefix_set_,
547 FAILURE_UNWANTED_SOFTWARE_PREFIX_SET_READ);
551 if (download_store_.get()) {
552 download_store_->Init(
553 DownloadDBFilename(filename_base_),
554 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
555 base::Unretained(this)));
558 if (csd_whitelist_store_.get()) {
559 csd_whitelist_store_->Init(
560 CsdWhitelistDBFilename(filename_base_),
561 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
562 base::Unretained(this)));
564 std::vector<SBAddFullHash> full_hashes;
565 if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) {
566 LoadWhitelist(full_hashes, &csd_whitelist_);
568 WhitelistEverything(&csd_whitelist_);
571 WhitelistEverything(&csd_whitelist_); // Just to be safe.
574 if (download_whitelist_store_.get()) {
575 download_whitelist_store_->Init(
576 DownloadWhitelistDBFilename(filename_base_),
577 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
578 base::Unretained(this)));
580 std::vector<SBAddFullHash> full_hashes;
581 if (download_whitelist_store_->GetAddFullHashes(&full_hashes)) {
582 LoadWhitelist(full_hashes, &download_whitelist_);
584 WhitelistEverything(&download_whitelist_);
587 WhitelistEverything(&download_whitelist_); // Just to be safe.
590 if (extension_blacklist_store_.get()) {
591 extension_blacklist_store_->Init(
592 ExtensionBlacklistDBFilename(filename_base_),
593 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
594 base::Unretained(this)));
597 if (side_effect_free_whitelist_store_.get()) {
598 const base::FilePath side_effect_free_whitelist_filename =
599 SideEffectFreeWhitelistDBFilename(filename_base_);
600 const base::FilePath side_effect_free_whitelist_prefix_set_filename =
601 PrefixSetForFilename(side_effect_free_whitelist_filename);
602 side_effect_free_whitelist_store_->Init(
603 side_effect_free_whitelist_filename,
604 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
605 base::Unretained(this)));
607 // Only use the prefix set if database is present and non-empty.
608 if (GetFileSizeOrZero(side_effect_free_whitelist_filename)) {
609 const base::TimeTicks before = base::TimeTicks::Now();
610 side_effect_free_whitelist_prefix_set_ =
611 safe_browsing::PrefixSet::LoadFile(
612 side_effect_free_whitelist_prefix_set_filename);
613 UMA_HISTOGRAM_TIMES("SB2.SideEffectFreeWhitelistPrefixSetLoad",
614 base::TimeTicks::Now() - before);
615 if (!side_effect_free_whitelist_prefix_set_.get())
616 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ);
619 // Delete any files of the side-effect free sidelist that may be around
620 // from when it was previously enabled.
621 SafeBrowsingStoreFile::DeleteStore(
622 SideEffectFreeWhitelistDBFilename(filename_base_));
624 PrefixSetForFilename(SideEffectFreeWhitelistDBFilename(filename_base_)),
628 if (ip_blacklist_store_.get()) {
629 ip_blacklist_store_->Init(
630 IpBlacklistDBFilename(filename_base_),
631 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
632 base::Unretained(this)));
634 std::vector<SBAddFullHash> full_hashes;
635 if (ip_blacklist_store_->GetAddFullHashes(&full_hashes)) {
636 LoadIpBlacklist(full_hashes);
638 LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list.
643 bool SafeBrowsingDatabaseNew::ResetDatabase() {
644 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
646 // Delete files on disk.
647 // TODO(shess): Hard to see where one might want to delete without a
648 // reset. Perhaps inline |Delete()|?
652 // Reset objects in memory.
654 base::AutoLock locked(lookup_lock_);
655 prefix_gethash_cache_.clear();
656 browse_prefix_set_.reset();
657 side_effect_free_whitelist_prefix_set_.reset();
658 ip_blacklist_.clear();
659 unwanted_software_prefix_set_.reset();
661 // Wants to acquire the lock itself.
662 WhitelistEverything(&csd_whitelist_);
663 WhitelistEverything(&download_whitelist_);
667 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
669 std::vector<SBPrefix>* prefix_hits,
670 std::vector<SBFullHashResult>* cache_hits) {
671 return PrefixSetContainsUrl(
672 url, &browse_prefix_set_, prefix_hits, cache_hits);
675 bool SafeBrowsingDatabaseNew::ContainsUnwantedSoftwareUrl(
677 std::vector<SBPrefix>* prefix_hits,
678 std::vector<SBFullHashResult>* cache_hits) {
679 return PrefixSetContainsUrl(
680 url, &unwanted_software_prefix_set_, prefix_hits, cache_hits);
683 bool SafeBrowsingDatabaseNew::PrefixSetContainsUrl(
685 scoped_ptr<safe_browsing::PrefixSet>* prefix_set_getter,
686 std::vector<SBPrefix>* prefix_hits,
687 std::vector<SBFullHashResult>* cache_hits) {
688 // Clear the results first.
689 prefix_hits->clear();
692 std::vector<SBFullHash> full_hashes;
693 UrlToFullHashes(url, false, &full_hashes);
694 if (full_hashes.empty())
697 return PrefixSetContainsUrlHashes(
698 full_hashes, prefix_set_getter, prefix_hits, cache_hits);
701 bool SafeBrowsingDatabaseNew::ContainsBrowseUrlHashesForTesting(
702 const std::vector<SBFullHash>& full_hashes,
703 std::vector<SBPrefix>* prefix_hits,
704 std::vector<SBFullHashResult>* cache_hits) {
705 return PrefixSetContainsUrlHashes(
706 full_hashes, &browse_prefix_set_, prefix_hits, cache_hits);
709 bool SafeBrowsingDatabaseNew::PrefixSetContainsUrlHashes(
710 const std::vector<SBFullHash>& full_hashes,
711 scoped_ptr<safe_browsing::PrefixSet>* prefix_set_getter,
712 std::vector<SBPrefix>* prefix_hits,
713 std::vector<SBFullHashResult>* cache_hits) {
714 // Used to determine cache expiration.
715 const base::Time now = base::Time::Now();
717 // This function is called on the I/O thread, prevent changes to
718 // filter and caches.
719 base::AutoLock locked(lookup_lock_);
721 // |prefix_set| is empty until it is either read from disk, or the first
722 // update populates it. Bail out without a hit if not yet available.
723 // |prefix_set_getter| can only be accessed while holding |lookup_lock_| hence
724 // why it is passed as a parameter rather than passing the |prefix_set|
726 safe_browsing::PrefixSet* prefix_set = prefix_set_getter->get();
730 for (size_t i = 0; i < full_hashes.size(); ++i) {
731 if (!GetCachedFullHash(
732 &prefix_gethash_cache_, full_hashes[i], now, cache_hits)) {
733 // No valid cached result, check the database.
734 if (prefix_set->Exists(full_hashes[i]))
735 prefix_hits->push_back(full_hashes[i].prefix);
739 // Multiple full hashes could share prefix, remove duplicates.
740 std::sort(prefix_hits->begin(), prefix_hits->end());
741 prefix_hits->erase(std::unique(prefix_hits->begin(), prefix_hits->end()),
744 return !prefix_hits->empty() || !cache_hits->empty();
747 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
748 const std::vector<GURL>& urls,
749 std::vector<SBPrefix>* prefix_hits) {
750 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
752 // Ignore this check when download checking is not enabled.
753 if (!download_store_.get())
756 std::vector<SBPrefix> prefixes;
757 GetDownloadUrlPrefixes(urls, &prefixes);
758 return MatchAddPrefixes(download_store_.get(),
759 safe_browsing_util::BINURL % 2,
764 bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) {
765 // This method is theoretically thread-safe but we expect all calls to
766 // originate from the IO thread.
767 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
768 std::vector<SBFullHash> full_hashes;
769 UrlToFullHashes(url, true, &full_hashes);
770 return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
773 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedUrl(const GURL& url) {
774 std::vector<SBFullHash> full_hashes;
775 UrlToFullHashes(url, true, &full_hashes);
776 return ContainsWhitelistedHashes(download_whitelist_, full_hashes);
779 bool SafeBrowsingDatabaseNew::ContainsExtensionPrefixes(
780 const std::vector<SBPrefix>& prefixes,
781 std::vector<SBPrefix>* prefix_hits) {
782 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
783 if (!extension_blacklist_store_)
786 return MatchAddPrefixes(extension_blacklist_store_.get(),
787 safe_browsing_util::EXTENSIONBLACKLIST % 2,
792 bool SafeBrowsingDatabaseNew::ContainsSideEffectFreeWhitelistUrl(
797 safe_browsing_util::CanonicalizeUrl(url, &host, &path, &query);
798 std::string url_to_check = host + path;
800 url_to_check += "?" + query;
801 SBFullHash full_hash = SBFullHashForString(url_to_check);
803 // This function can be called on any thread, so lock against any changes
804 base::AutoLock locked(lookup_lock_);
806 // |side_effect_free_whitelist_prefix_set_| is empty until it is either read
807 // from disk, or the first update populates it. Bail out without a hit if
808 // not yet available.
809 if (!side_effect_free_whitelist_prefix_set_.get())
812 return side_effect_free_whitelist_prefix_set_->Exists(full_hash);
815 bool SafeBrowsingDatabaseNew::ContainsMalwareIP(const std::string& ip_address) {
816 net::IPAddressNumber ip_number;
817 if (!net::ParseIPLiteralToNumber(ip_address, &ip_number))
819 if (ip_number.size() == net::kIPv4AddressSize)
820 ip_number = net::ConvertIPv4NumberToIPv6Number(ip_number);
821 if (ip_number.size() != net::kIPv6AddressSize)
822 return false; // better safe than sorry.
824 // This function can be called from any thread.
825 base::AutoLock locked(lookup_lock_);
826 for (IPBlacklist::const_iterator it = ip_blacklist_.begin();
827 it != ip_blacklist_.end();
829 const std::string& mask = it->first;
830 DCHECK_EQ(mask.size(), ip_number.size());
831 std::string subnet(net::kIPv6AddressSize, '\0');
832 for (size_t i = 0; i < net::kIPv6AddressSize; ++i) {
833 subnet[i] = ip_number[i] & mask[i];
835 const std::string hash = base::SHA1HashString(subnet);
836 DVLOG(2) << "Lookup Malware IP: "
837 << " ip:" << ip_address
838 << " mask:" << base::HexEncode(mask.data(), mask.size())
839 << " subnet:" << base::HexEncode(subnet.data(), subnet.size())
840 << " hash:" << base::HexEncode(hash.data(), hash.size());
841 if (it->second.count(hash) > 0) {
848 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedString(
849 const std::string& str) {
850 std::vector<SBFullHash> hashes;
851 hashes.push_back(SBFullHashForString(str));
852 return ContainsWhitelistedHashes(download_whitelist_, hashes);
855 bool SafeBrowsingDatabaseNew::ContainsWhitelistedHashes(
856 const SBWhitelist& whitelist,
857 const std::vector<SBFullHash>& hashes) {
858 base::AutoLock l(lookup_lock_);
859 if (whitelist.second)
861 for (std::vector<SBFullHash>::const_iterator it = hashes.begin();
862 it != hashes.end(); ++it) {
863 if (std::binary_search(whitelist.first.begin(), whitelist.first.end(),
864 *it, SBFullHashLess)) {
871 // Helper to insert add-chunk entries.
872 void SafeBrowsingDatabaseNew::InsertAddChunk(
873 SafeBrowsingStore* store,
874 const safe_browsing_util::ListType list_id,
875 const SBChunkData& chunk_data) {
876 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
879 // The server can give us a chunk that we already have because
880 // it's part of a range. Don't add it again.
881 const int chunk_id = chunk_data.ChunkNumber();
882 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
883 if (store->CheckAddChunk(encoded_chunk_id))
886 store->SetAddChunk(encoded_chunk_id);
887 if (chunk_data.IsPrefix()) {
888 const size_t c = chunk_data.PrefixCount();
889 for (size_t i = 0; i < c; ++i) {
890 STATS_COUNTER("SB.PrefixAdd", 1);
891 store->WriteAddPrefix(encoded_chunk_id, chunk_data.PrefixAt(i));
894 const size_t c = chunk_data.FullHashCount();
895 for (size_t i = 0; i < c; ++i) {
896 STATS_COUNTER("SB.PrefixAddFull", 1);
897 store->WriteAddHash(encoded_chunk_id, chunk_data.FullHashAt(i));
902 // Helper to insert sub-chunk entries.
903 void SafeBrowsingDatabaseNew::InsertSubChunk(
904 SafeBrowsingStore* store,
905 const safe_browsing_util::ListType list_id,
906 const SBChunkData& chunk_data) {
907 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
910 // The server can give us a chunk that we already have because
911 // it's part of a range. Don't add it again.
912 const int chunk_id = chunk_data.ChunkNumber();
913 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
914 if (store->CheckSubChunk(encoded_chunk_id))
917 store->SetSubChunk(encoded_chunk_id);
918 if (chunk_data.IsPrefix()) {
919 const size_t c = chunk_data.PrefixCount();
920 for (size_t i = 0; i < c; ++i) {
921 STATS_COUNTER("SB.PrefixSub", 1);
922 const int add_chunk_id = chunk_data.AddChunkNumberAt(i);
923 const int encoded_add_chunk_id = EncodeChunkId(add_chunk_id, list_id);
924 store->WriteSubPrefix(encoded_chunk_id, encoded_add_chunk_id,
925 chunk_data.PrefixAt(i));
928 const size_t c = chunk_data.FullHashCount();
929 for (size_t i = 0; i < c; ++i) {
930 STATS_COUNTER("SB.PrefixSubFull", 1);
931 const int add_chunk_id = chunk_data.AddChunkNumberAt(i);
932 const int encoded_add_chunk_id = EncodeChunkId(add_chunk_id, list_id);
933 store->WriteSubHash(encoded_chunk_id, encoded_add_chunk_id,
934 chunk_data.FullHashAt(i));
939 void SafeBrowsingDatabaseNew::InsertChunks(
940 const std::string& list_name,
941 const std::vector<SBChunkData*>& chunks) {
942 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
944 if (corruption_detected_ || chunks.empty())
947 const base::TimeTicks before = base::TimeTicks::Now();
949 // TODO(shess): The caller should just pass list_id.
950 const safe_browsing_util::ListType list_id =
951 safe_browsing_util::GetListId(list_name);
953 SafeBrowsingStore* store = GetStore(list_id);
956 change_detected_ = true;
958 // TODO(shess): I believe that the list is always add or sub. Can this use
959 // that productively?
961 for (size_t i = 0; i < chunks.size(); ++i) {
962 if (chunks[i]->IsAdd()) {
963 InsertAddChunk(store, list_id, *chunks[i]);
964 } else if (chunks[i]->IsSub()) {
965 InsertSubChunk(store, list_id, *chunks[i]);
970 store->FinishChunk();
972 UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before);
975 void SafeBrowsingDatabaseNew::DeleteChunks(
976 const std::vector<SBChunkDelete>& chunk_deletes) {
977 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
979 if (corruption_detected_ || chunk_deletes.empty())
982 const std::string& list_name = chunk_deletes.front().list_name;
983 const safe_browsing_util::ListType list_id =
984 safe_browsing_util::GetListId(list_name);
986 SafeBrowsingStore* store = GetStore(list_id);
989 change_detected_ = true;
991 for (size_t i = 0; i < chunk_deletes.size(); ++i) {
992 std::vector<int> chunk_numbers;
993 RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers);
994 for (size_t j = 0; j < chunk_numbers.size(); ++j) {
995 const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id);
996 if (chunk_deletes[i].is_sub_del)
997 store->DeleteSubChunk(encoded_chunk_id);
999 store->DeleteAddChunk(encoded_chunk_id);
1004 void SafeBrowsingDatabaseNew::CacheHashResults(
1005 const std::vector<SBPrefix>& prefixes,
1006 const std::vector<SBFullHashResult>& full_hits,
1007 const base::TimeDelta& cache_lifetime) {
1008 const base::Time expire_after = base::Time::Now() + cache_lifetime;
1010 // This is called on the I/O thread, lock against updates.
1011 base::AutoLock locked(lookup_lock_);
1013 // Create or reset all cached results for these prefixes.
1014 for (size_t i = 0; i < prefixes.size(); ++i) {
1015 prefix_gethash_cache_[prefixes[i]] = SBCachedFullHashResult(expire_after);
1018 // Insert any fullhash hits. Note that there may be one, multiple, or no
1019 // fullhashes for any given entry in |prefixes|.
1020 for (size_t i = 0; i < full_hits.size(); ++i) {
1021 const SBPrefix prefix = full_hits[i].hash.prefix;
1022 prefix_gethash_cache_[prefix].full_hashes.push_back(full_hits[i]);
1026 bool SafeBrowsingDatabaseNew::UpdateStarted(
1027 std::vector<SBListChunkRanges>* lists) {
1028 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1031 // If |BeginUpdate()| fails, reset the database.
1032 if (!browse_store_->BeginUpdate()) {
1033 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN);
1034 HandleCorruptDatabase();
1038 if (download_store_.get() && !download_store_->BeginUpdate()) {
1039 RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN);
1040 HandleCorruptDatabase();
1044 if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) {
1045 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
1046 HandleCorruptDatabase();
1050 if (download_whitelist_store_.get() &&
1051 !download_whitelist_store_->BeginUpdate()) {
1052 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
1053 HandleCorruptDatabase();
1057 if (extension_blacklist_store_ &&
1058 !extension_blacklist_store_->BeginUpdate()) {
1059 RecordFailure(FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN);
1060 HandleCorruptDatabase();
1064 if (side_effect_free_whitelist_store_ &&
1065 !side_effect_free_whitelist_store_->BeginUpdate()) {
1066 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN);
1067 HandleCorruptDatabase();
1071 if (ip_blacklist_store_ && !ip_blacklist_store_->BeginUpdate()) {
1072 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN);
1073 HandleCorruptDatabase();
1077 if (unwanted_software_store_ && !unwanted_software_store_->BeginUpdate()) {
1078 RecordFailure(FAILURE_UNWANTED_SOFTWARE_DATABASE_UPDATE_BEGIN);
1079 HandleCorruptDatabase();
1084 base::AutoLock locked(lookup_lock_);
1085 // Cached fullhash results must be cleared on every database update (whether
1086 // successful or not.)
1087 prefix_gethash_cache_.clear();
1090 UpdateChunkRangesForLists(browse_store_.get(),
1091 safe_browsing_util::kMalwareList,
1092 safe_browsing_util::kPhishingList,
1095 // NOTE(shess): |download_store_| used to contain kBinHashList, which has been
1096 // deprecated. Code to delete the list from the store shows ~15k hits/day as
1097 // of Feb 2014, so it has been removed. Everything _should_ be resilient to
1098 // extra data of that sort.
1099 UpdateChunkRangesForList(download_store_.get(),
1100 safe_browsing_util::kBinUrlList, lists);
1102 UpdateChunkRangesForList(csd_whitelist_store_.get(),
1103 safe_browsing_util::kCsdWhiteList, lists);
1105 UpdateChunkRangesForList(download_whitelist_store_.get(),
1106 safe_browsing_util::kDownloadWhiteList, lists);
1108 UpdateChunkRangesForList(extension_blacklist_store_.get(),
1109 safe_browsing_util::kExtensionBlacklist, lists);
1111 UpdateChunkRangesForList(side_effect_free_whitelist_store_.get(),
1112 safe_browsing_util::kSideEffectFreeWhitelist, lists);
1114 UpdateChunkRangesForList(ip_blacklist_store_.get(),
1115 safe_browsing_util::kIPBlacklist, lists);
1117 UpdateChunkRangesForList(unwanted_software_store_.get(),
1118 safe_browsing_util::kUnwantedUrlList,
1121 corruption_detected_ = false;
1122 change_detected_ = false;
1126 void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) {
1127 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1129 // The update may have failed due to corrupt storage (for instance,
1130 // an excessive number of invalid add_chunks and sub_chunks).
1131 // Double-check that the databases are valid.
1132 // TODO(shess): Providing a checksum for the add_chunk and sub_chunk
1133 // sections would allow throwing a corruption error in
1135 if (!update_succeeded) {
1136 if (!browse_store_->CheckValidity())
1137 DLOG(ERROR) << "Safe-browsing browse database corrupt.";
1139 if (download_store_.get() && !download_store_->CheckValidity())
1140 DLOG(ERROR) << "Safe-browsing download database corrupt.";
1142 if (csd_whitelist_store_.get() && !csd_whitelist_store_->CheckValidity())
1143 DLOG(ERROR) << "Safe-browsing csd whitelist database corrupt.";
1145 if (download_whitelist_store_.get() &&
1146 !download_whitelist_store_->CheckValidity()) {
1147 DLOG(ERROR) << "Safe-browsing download whitelist database corrupt.";
1150 if (extension_blacklist_store_ &&
1151 !extension_blacklist_store_->CheckValidity()) {
1152 DLOG(ERROR) << "Safe-browsing extension blacklist database corrupt.";
1155 if (side_effect_free_whitelist_store_ &&
1156 !side_effect_free_whitelist_store_->CheckValidity()) {
1157 DLOG(ERROR) << "Safe-browsing side-effect free whitelist database "
1161 if (ip_blacklist_store_ && !ip_blacklist_store_->CheckValidity()) {
1162 DLOG(ERROR) << "Safe-browsing IP blacklist database corrupt.";
1165 if (unwanted_software_store_ &&
1166 !unwanted_software_store_->CheckValidity()) {
1167 DLOG(ERROR) << "Unwanted software url list database corrupt.";
1171 if (corruption_detected_)
1174 // Unroll the transaction if there was a protocol error or if the
1175 // transaction was empty. This will leave the prefix set, the
1176 // pending hashes, and the prefix miss cache in place.
1177 if (!update_succeeded || !change_detected_) {
1178 // Track empty updates to answer questions at http://crbug.com/72216 .
1179 if (update_succeeded && !change_detected_)
1180 UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0);
1181 browse_store_->CancelUpdate();
1182 if (download_store_.get())
1183 download_store_->CancelUpdate();
1184 if (csd_whitelist_store_.get())
1185 csd_whitelist_store_->CancelUpdate();
1186 if (download_whitelist_store_.get())
1187 download_whitelist_store_->CancelUpdate();
1188 if (extension_blacklist_store_)
1189 extension_blacklist_store_->CancelUpdate();
1190 if (side_effect_free_whitelist_store_)
1191 side_effect_free_whitelist_store_->CancelUpdate();
1192 if (ip_blacklist_store_)
1193 ip_blacklist_store_->CancelUpdate();
1194 if (unwanted_software_store_)
1195 unwanted_software_store_->CancelUpdate();
1199 if (download_store_) {
1200 int64 size_bytes = UpdateHashPrefixStore(
1201 DownloadDBFilename(filename_base_),
1202 download_store_.get(),
1203 FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH);
1204 UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
1205 static_cast<int>(size_bytes / 1024));
1208 UpdatePrefixSetUrlStore(BrowseDBFilename(filename_base_),
1209 browse_store_.get(),
1210 &browse_prefix_set_,
1211 FAILURE_BROWSE_DATABASE_UPDATE_FINISH,
1212 FAILURE_BROWSE_PREFIX_SET_WRITE);
1214 UpdateWhitelistStore(CsdWhitelistDBFilename(filename_base_),
1215 csd_whitelist_store_.get(),
1217 UpdateWhitelistStore(DownloadWhitelistDBFilename(filename_base_),
1218 download_whitelist_store_.get(),
1219 &download_whitelist_);
1221 if (extension_blacklist_store_) {
1222 int64 size_bytes = UpdateHashPrefixStore(
1223 ExtensionBlacklistDBFilename(filename_base_),
1224 extension_blacklist_store_.get(),
1225 FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH);
1226 UMA_HISTOGRAM_COUNTS("SB2.ExtensionBlacklistKilobytes",
1227 static_cast<int>(size_bytes / 1024));
1230 if (side_effect_free_whitelist_store_)
1231 UpdateSideEffectFreeWhitelistStore();
1233 if (ip_blacklist_store_)
1234 UpdateIpBlacklistStore();
1236 if (unwanted_software_store_) {
1237 UpdatePrefixSetUrlStore(UnwantedSoftwareDBFilename(filename_base_),
1238 unwanted_software_store_.get(),
1239 &unwanted_software_prefix_set_,
1240 FAILURE_UNWANTED_SOFTWARE_DATABASE_UPDATE_FINISH,
1241 FAILURE_UNWANTED_SOFTWARE_PREFIX_SET_WRITE);
1245 void SafeBrowsingDatabaseNew::UpdateWhitelistStore(
1246 const base::FilePath& store_filename,
1247 SafeBrowsingStore* store,
1248 SBWhitelist* whitelist) {
1252 // Note: |builder| will not be empty. The current data store implementation
1253 // stores all full-length hashes as both full and prefix hashes.
1254 safe_browsing::PrefixSetBuilder builder;
1255 std::vector<SBAddFullHash> full_hashes;
1256 if (!store->FinishUpdate(&builder, &full_hashes)) {
1257 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH);
1258 WhitelistEverything(whitelist);
1262 #if defined(OS_MACOSX)
1263 base::mac::SetFileBackupExclusion(store_filename);
1266 LoadWhitelist(full_hashes, whitelist);
1269 int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore(
1270 const base::FilePath& store_filename,
1271 SafeBrowsingStore* store,
1272 FailureType failure_type) {
1273 // These results are not used after this call. Simply ignore the
1274 // returned value after FinishUpdate(...).
1275 safe_browsing::PrefixSetBuilder builder;
1276 std::vector<SBAddFullHash> add_full_hashes_result;
1278 if (!store->FinishUpdate(&builder, &add_full_hashes_result))
1279 RecordFailure(failure_type);
1281 #if defined(OS_MACOSX)
1282 base::mac::SetFileBackupExclusion(store_filename);
1285 return GetFileSizeOrZero(store_filename);
1288 void SafeBrowsingDatabaseNew::UpdatePrefixSetUrlStore(
1289 const base::FilePath& db_filename,
1290 SafeBrowsingStore* url_store,
1291 scoped_ptr<safe_browsing::PrefixSet>* prefix_set,
1292 FailureType finish_failure_type,
1293 FailureType write_failure_type) {
1294 // Measure the amount of IO during the filter build.
1295 base::IoCounters io_before, io_after;
1296 base::ProcessHandle handle = base::GetCurrentProcessHandle();
1297 scoped_ptr<base::ProcessMetrics> metric(
1298 #if !defined(OS_MACOSX)
1299 base::ProcessMetrics::CreateProcessMetrics(handle)
1301 // Getting stats only for the current process is enough, so NULL is fine.
1302 base::ProcessMetrics::CreateProcessMetrics(handle, NULL)
1306 // IoCounters are currently not supported on Mac, and may not be
1307 // available for Linux, so we check the result and only show IO
1308 // stats if they are available.
1309 const bool got_counters = metric->GetIOCounters(&io_before);
1311 const base::TimeTicks before = base::TimeTicks::Now();
1313 // TODO(shess): Perhaps refactor to let builder accumulate full hashes on the
1314 // fly? Other clients use the SBAddFullHash vector, but AFAICT they only use
1315 // the SBFullHash portion. It would need an accessor on PrefixSet.
1316 safe_browsing::PrefixSetBuilder builder;
1317 std::vector<SBAddFullHash> add_full_hashes;
1318 if (!url_store->FinishUpdate(&builder, &add_full_hashes)) {
1319 RecordFailure(finish_failure_type);
1323 std::vector<SBFullHash> full_hash_results;
1324 for (size_t i = 0; i < add_full_hashes.size(); ++i) {
1325 full_hash_results.push_back(add_full_hashes[i].full_hash);
1328 scoped_ptr<safe_browsing::PrefixSet> new_prefix_set(
1329 builder.GetPrefixSet(full_hash_results));
1331 // Swap in the newly built filter.
1333 base::AutoLock locked(lookup_lock_);
1334 prefix_set->swap(new_prefix_set);
1337 UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before);
1339 // Persist the prefix set to disk. Note: there is no need to lock since the
1340 // only write to |*prefix_set| is on this thread (in the swap() above).
1341 // TODO(gab): Strengthen this requirement by design (const pointers) rather
1342 // than assumptions.
1343 WritePrefixSet(db_filename, prefix_set->get(), write_failure_type);
1345 // Gather statistics.
1346 if (got_counters && metric->GetIOCounters(&io_after)) {
1347 UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes",
1348 static_cast<int>(io_after.ReadTransferCount -
1349 io_before.ReadTransferCount) / 1024);
1350 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes",
1351 static_cast<int>(io_after.WriteTransferCount -
1352 io_before.WriteTransferCount) / 1024);
1353 UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
1354 static_cast<int>(io_after.ReadOperationCount -
1355 io_before.ReadOperationCount));
1356 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
1357 static_cast<int>(io_after.WriteOperationCount -
1358 io_before.WriteOperationCount));
1361 const int64 file_size = GetFileSizeOrZero(db_filename);
1362 UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
1363 static_cast<int>(file_size / 1024));
1365 #if defined(OS_MACOSX)
1366 base::mac::SetFileBackupExclusion(db_filename);
1370 void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() {
1371 safe_browsing::PrefixSetBuilder builder;
1372 std::vector<SBAddFullHash> add_full_hashes_result;
1374 if (!side_effect_free_whitelist_store_->FinishUpdate(
1375 &builder, &add_full_hashes_result)) {
1376 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH);
1379 scoped_ptr<safe_browsing::PrefixSet>
1380 prefix_set(builder.GetPrefixSetNoHashes());
1382 // Swap in the newly built prefix set.
1384 base::AutoLock locked(lookup_lock_);
1385 side_effect_free_whitelist_prefix_set_.swap(prefix_set);
1388 const base::FilePath side_effect_free_whitelist_filename =
1389 SideEffectFreeWhitelistDBFilename(filename_base_);
1390 const base::FilePath side_effect_free_whitelist_prefix_set_filename =
1391 PrefixSetForFilename(side_effect_free_whitelist_filename);
1392 const base::TimeTicks before = base::TimeTicks::Now();
1393 const bool write_ok = side_effect_free_whitelist_prefix_set_->WriteFile(
1394 side_effect_free_whitelist_prefix_set_filename);
1395 UMA_HISTOGRAM_TIMES("SB2.SideEffectFreePrefixSetWrite",
1396 base::TimeTicks::Now() - before);
1399 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE);
1401 // Gather statistics.
1402 int64 file_size = GetFileSizeOrZero(
1403 side_effect_free_whitelist_prefix_set_filename);
1404 UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistPrefixSetKilobytes",
1405 static_cast<int>(file_size / 1024));
1406 file_size = GetFileSizeOrZero(side_effect_free_whitelist_filename);
1407 UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistDatabaseKilobytes",
1408 static_cast<int>(file_size / 1024));
1410 #if defined(OS_MACOSX)
1411 base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename);
1412 base::mac::SetFileBackupExclusion(
1413 side_effect_free_whitelist_prefix_set_filename);
1417 void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() {
1418 // Note: prefixes will not be empty. The current data store implementation
1419 // stores all full-length hashes as both full and prefix hashes.
1420 safe_browsing::PrefixSetBuilder builder;
1421 std::vector<SBAddFullHash> full_hashes;
1422 if (!ip_blacklist_store_->FinishUpdate(&builder, &full_hashes)) {
1423 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH);
1424 LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list.
1428 #if defined(OS_MACOSX)
1429 base::mac::SetFileBackupExclusion(IpBlacklistDBFilename(filename_base_));
1432 LoadIpBlacklist(full_hashes);
1435 void SafeBrowsingDatabaseNew::HandleCorruptDatabase() {
1436 // Reset the database after the current task has unwound (but only
1437 // reset once within the scope of a given task).
1438 if (!reset_factory_.HasWeakPtrs()) {
1439 RecordFailure(FAILURE_DATABASE_CORRUPT);
1440 base::MessageLoop::current()->PostTask(FROM_HERE,
1441 base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase,
1442 reset_factory_.GetWeakPtr()));
1446 void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
1447 RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER);
1448 corruption_detected_ = true; // Stop updating the database.
1451 // NOTE(shess): ResetDatabase() should remove the corruption, so this should
1452 // only happen once. If you are here because you are hitting this after a
1453 // restart, then I would be very interested in working with you to figure out
1454 // what is happening, since it may affect real users.
1455 DLOG(FATAL) << "SafeBrowsing database was corrupt and reset";
1458 // TODO(shess): I'm not clear why this code doesn't have any
1459 // real error-handling.
1460 void SafeBrowsingDatabaseNew::LoadPrefixSet(
1461 const base::FilePath& db_filename,
1462 scoped_ptr<safe_browsing::PrefixSet>* prefix_set,
1463 FailureType read_failure_type) {
1467 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1468 DCHECK(!filename_base_.empty());
1470 const base::FilePath prefix_set_filename = PrefixSetForFilename(db_filename);
1472 // Only use the prefix set if database is present and non-empty.
1473 if (!GetFileSizeOrZero(db_filename))
1476 // Cleanup any stale bloom filter (no longer used).
1477 // TODO(shess): Track existence to drive removal of this code?
1478 const base::FilePath bloom_filter_filename =
1479 BloomFilterForFilename(db_filename);
1480 base::DeleteFile(bloom_filter_filename, false);
1482 const base::TimeTicks before = base::TimeTicks::Now();
1483 *prefix_set = safe_browsing::PrefixSet::LoadFile(prefix_set_filename);
1484 UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before);
1486 if (!prefix_set->get())
1487 RecordFailure(read_failure_type);
1490 bool SafeBrowsingDatabaseNew::Delete() {
1491 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1492 DCHECK(!filename_base_.empty());
1494 // TODO(shess): This is a mess. SafeBrowsingFileStore::Delete() closes the
1495 // store before calling DeleteStore(). DeleteStore() deletes transient files
1496 // in addition to the main file. Probably all of these should be converted to
1497 // a helper which calls Delete() if the store exists, else DeleteStore() on
1498 // the generated filename.
1500 // TODO(shess): Determine if the histograms are useful in any way. I cannot
1501 // recall any action taken as a result of their values, in which case it might
1502 // make more sense to histogram an overall thumbs-up/-down and just dig deeper
1503 // if something looks wrong.
1505 const bool r1 = browse_store_->Delete();
1507 RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1509 const bool r2 = download_store_.get() ? download_store_->Delete() : true;
1511 RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1513 const bool r3 = csd_whitelist_store_.get() ?
1514 csd_whitelist_store_->Delete() : true;
1516 RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1518 const bool r4 = download_whitelist_store_.get() ?
1519 download_whitelist_store_->Delete() : true;
1521 RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1523 const base::FilePath browse_filename = BrowseDBFilename(filename_base_);
1524 const base::FilePath bloom_filter_filename =
1525 BloomFilterForFilename(browse_filename);
1526 const bool r5 = base::DeleteFile(bloom_filter_filename, false);
1528 RecordFailure(FAILURE_DATABASE_FILTER_DELETE);
1530 const base::FilePath browse_prefix_set_filename =
1531 PrefixSetForFilename(browse_filename);
1532 const bool r6 = base::DeleteFile(browse_prefix_set_filename, false);
1534 RecordFailure(FAILURE_BROWSE_PREFIX_SET_DELETE);
1536 const base::FilePath extension_blacklist_filename =
1537 ExtensionBlacklistDBFilename(filename_base_);
1538 const bool r7 = base::DeleteFile(extension_blacklist_filename, false);
1540 RecordFailure(FAILURE_EXTENSION_BLACKLIST_DELETE);
1542 const base::FilePath side_effect_free_whitelist_filename =
1543 SideEffectFreeWhitelistDBFilename(filename_base_);
1544 const bool r8 = base::DeleteFile(side_effect_free_whitelist_filename,
1547 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE);
1549 const base::FilePath side_effect_free_whitelist_prefix_set_filename =
1550 PrefixSetForFilename(side_effect_free_whitelist_filename);
1551 const bool r9 = base::DeleteFile(
1552 side_effect_free_whitelist_prefix_set_filename,
1555 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE);
1557 const bool r10 = base::DeleteFile(IpBlacklistDBFilename(filename_base_),
1560 RecordFailure(FAILURE_IP_BLACKLIST_DELETE);
1563 base::DeleteFile(UnwantedSoftwareDBFilename(filename_base_), false);
1565 RecordFailure(FAILURE_UNWANTED_SOFTWARE_PREFIX_SET_DELETE);
1567 return r1 && r2 && r3 && r4 && r5 && r6 && r7 && r8 && r9 && r10 && r11;
1570 void SafeBrowsingDatabaseNew::WritePrefixSet(
1571 const base::FilePath& db_filename,
1572 safe_browsing::PrefixSet* prefix_set,
1573 FailureType write_failure_type) {
1574 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1579 const base::FilePath prefix_set_filename = PrefixSetForFilename(db_filename);
1581 const base::TimeTicks before = base::TimeTicks::Now();
1582 const bool write_ok = prefix_set->WriteFile(prefix_set_filename);
1583 UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before);
1585 const int64 file_size = GetFileSizeOrZero(prefix_set_filename);
1586 UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes",
1587 static_cast<int>(file_size / 1024));
1590 RecordFailure(write_failure_type);
1592 #if defined(OS_MACOSX)
1593 base::mac::SetFileBackupExclusion(prefix_set_filename);
1597 void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist* whitelist) {
1598 base::AutoLock locked(lookup_lock_);
1599 whitelist->second = true;
1600 whitelist->first.clear();
1603 void SafeBrowsingDatabaseNew::LoadWhitelist(
1604 const std::vector<SBAddFullHash>& full_hashes,
1605 SBWhitelist* whitelist) {
1606 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1607 if (full_hashes.size() > kMaxWhitelistSize) {
1608 WhitelistEverything(whitelist);
1612 std::vector<SBFullHash> new_whitelist;
1613 new_whitelist.reserve(full_hashes.size());
1614 for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
1615 it != full_hashes.end(); ++it) {
1616 new_whitelist.push_back(it->full_hash);
1618 std::sort(new_whitelist.begin(), new_whitelist.end(), SBFullHashLess);
1620 SBFullHash kill_switch = SBFullHashForString(kWhitelistKillSwitchUrl);
1621 if (std::binary_search(new_whitelist.begin(), new_whitelist.end(),
1622 kill_switch, SBFullHashLess)) {
1623 // The kill switch is whitelisted hence we whitelist all URLs.
1624 WhitelistEverything(whitelist);
1626 base::AutoLock locked(lookup_lock_);
1627 whitelist->second = false;
1628 whitelist->first.swap(new_whitelist);
1632 void SafeBrowsingDatabaseNew::LoadIpBlacklist(
1633 const std::vector<SBAddFullHash>& full_hashes) {
1634 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1635 IPBlacklist new_blacklist;
1636 for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
1637 it != full_hashes.end();
1639 const char* full_hash = it->full_hash.full_hash;
1640 DCHECK_EQ(crypto::kSHA256Length, arraysize(it->full_hash.full_hash));
1641 // The format of the IP blacklist is:
1642 // SHA-1(IPv6 prefix) + uint8(prefix size) + 11 unused bytes.
1643 std::string hashed_ip_prefix(full_hash, base::kSHA1Length);
1644 size_t prefix_size = static_cast<uint8>(full_hash[base::kSHA1Length]);
1645 if (prefix_size > kMaxIpPrefixSize || prefix_size < kMinIpPrefixSize) {
1646 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_INVALID);
1647 new_blacklist.clear(); // Load empty blacklist.
1651 // We precompute the mask for the given subnet size to speed up lookups.
1652 // Basically we need to create a 16B long string which has the highest
1653 // |size| bits sets to one.
1654 std::string mask(net::kIPv6AddressSize, '\0');
1655 mask.replace(0, prefix_size / 8, prefix_size / 8, '\xFF');
1656 if ((prefix_size % 8) != 0) {
1657 mask[prefix_size / 8] = 0xFF << (8 - (prefix_size % 8));
1659 DVLOG(2) << "Inserting malicious IP: "
1660 << " raw:" << base::HexEncode(full_hash, crypto::kSHA256Length)
1661 << " mask:" << base::HexEncode(mask.data(), mask.size())
1662 << " prefix_size:" << prefix_size
1663 << " hashed_ip:" << base::HexEncode(hashed_ip_prefix.data(),
1664 hashed_ip_prefix.size());
1665 new_blacklist[mask].insert(hashed_ip_prefix);
1668 base::AutoLock locked(lookup_lock_);
1669 ip_blacklist_.swap(new_blacklist);
1672 bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() {
1673 SBFullHash malware_kill_switch = SBFullHashForString(kMalwareIPKillSwitchUrl);
1674 std::vector<SBFullHash> full_hashes;
1675 full_hashes.push_back(malware_kill_switch);
1676 return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
1679 bool SafeBrowsingDatabaseNew::IsCsdWhitelistKillSwitchOn() {
1680 return csd_whitelist_.second;