Upstream version 6.35.121.0
[platform/framework/web/crosswalk.git] / src / chrome / browser / safe_browsing / safe_browsing_database.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/safe_browsing/safe_browsing_database.h"
6
7 #include <algorithm>
8 #include <iterator>
9
10 #include "base/bind.h"
11 #include "base/file_util.h"
12 #include "base/message_loop/message_loop.h"
13 #include "base/metrics/histogram.h"
14 #include "base/metrics/stats_counters.h"
15 #include "base/process/process.h"
16 #include "base/process/process_metrics.h"
17 #include "base/sha1.h"
18 #include "base/strings/string_number_conversions.h"
19 #include "base/strings/stringprintf.h"
20 #include "base/time/time.h"
21 #include "chrome/browser/safe_browsing/prefix_set.h"
22 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
23 #include "content/public/browser/browser_thread.h"
24 #include "crypto/sha2.h"
25 #include "net/base/net_util.h"
26 #include "url/gurl.h"
27
28 #if defined(OS_MACOSX)
29 #include "base/mac/mac_util.h"
30 #endif
31
32 using content::BrowserThread;
33
34 namespace {
35
36 // Filename suffix for the bloom filter.
37 const base::FilePath::CharType kBloomFilterFile[] =
38     FILE_PATH_LITERAL(" Filter 2");
39 // Filename suffix for the prefix set.
40 const base::FilePath::CharType kPrefixSetFile[] =
41     FILE_PATH_LITERAL(" Prefix Set");
42 // Filename suffix for download store.
43 const base::FilePath::CharType kDownloadDBFile[] =
44     FILE_PATH_LITERAL(" Download");
45 // Filename suffix for client-side phishing detection whitelist store.
46 const base::FilePath::CharType kCsdWhitelistDBFile[] =
47     FILE_PATH_LITERAL(" Csd Whitelist");
48 // Filename suffix for the download whitelist store.
49 const base::FilePath::CharType kDownloadWhitelistDBFile[] =
50     FILE_PATH_LITERAL(" Download Whitelist");
51 // Filename suffix for the extension blacklist store.
52 const base::FilePath::CharType kExtensionBlacklistDBFile[] =
53     FILE_PATH_LITERAL(" Extension Blacklist");
54 // Filename suffix for the side-effect free whitelist store.
55 const base::FilePath::CharType kSideEffectFreeWhitelistDBFile[] =
56     FILE_PATH_LITERAL(" Side-Effect Free Whitelist");
57 // Filename suffix for the csd malware IP blacklist store.
58 const base::FilePath::CharType kIPBlacklistDBFile[] =
59     FILE_PATH_LITERAL(" IP Blacklist");
60
61 // Filename suffix for browse store.
62 // TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win.
63 // Unfortunately, to change the name implies lots of transition code
64 // for little benefit.  If/when file formats change (say to put all
65 // the data in one file), that would be a convenient point to rectify
66 // this.
67 const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
68
69 // The maximum staleness for a cached entry.
70 const int kMaxStalenessMinutes = 45;
71
72 // Maximum number of entries we allow in any of the whitelists.
73 // If a whitelist on disk contains more entries then all lookups to
74 // the whitelist will be considered a match.
75 const size_t kMaxWhitelistSize = 5000;
76
77 // If the hash of this exact expression is on a whitelist then all
78 // lookups to this whitelist will be considered a match.
79 const char kWhitelistKillSwitchUrl[] =
80     "sb-ssl.google.com/safebrowsing/csd/killswitch";  // Don't change this!
81
82 // If the hash of this exact expression is on a whitelist then the
83 // malware IP blacklisting feature will be disabled in csd.
84 // Don't change this!
85 const char kMalwareIPKillSwitchUrl[] =
86     "sb-ssl.google.com/safebrowsing/csd/killswitch_malware";
87
88 const size_t kMaxIpPrefixSize = 128;
89 const size_t kMinIpPrefixSize = 1;
90
91 // To save space, the incoming |chunk_id| and |list_id| are combined
92 // into an |encoded_chunk_id| for storage by shifting the |list_id|
93 // into the low-order bits.  These functions decode that information.
94 // TODO(lzheng): It was reasonable when database is saved in sqlite, but
95 // there should be better ways to save chunk_id and list_id after we use
96 // SafeBrowsingStoreFile.
97 int GetListIdBit(const int encoded_chunk_id) {
98   return encoded_chunk_id & 1;
99 }
100 int DecodeChunkId(int encoded_chunk_id) {
101   return encoded_chunk_id >> 1;
102 }
103 int EncodeChunkId(const int chunk, const int list_id) {
104   DCHECK_NE(list_id, safe_browsing_util::INVALID);
105   return chunk << 1 | list_id % 2;
106 }
107
108 // Generate the set of full hashes to check for |url|.  If
109 // |include_whitelist_hashes| is true we will generate additional path-prefixes
110 // to match against the csd whitelist.  E.g., if the path-prefix /foo is on the
111 // whitelist it should also match /foo/bar which is not the case for all the
112 // other lists.  We'll also always add a pattern for the empty path.
113 // TODO(shess): This function is almost the same as
114 // |CompareFullHashes()| in safe_browsing_util.cc, except that code
115 // does an early exit on match.  Since match should be the infrequent
116 // case (phishing or malware found), consider combining this function
117 // with that one.
118 void BrowseFullHashesToCheck(const GURL& url,
119                              bool include_whitelist_hashes,
120                              std::vector<SBFullHash>* full_hashes) {
121   std::vector<std::string> hosts;
122   if (url.HostIsIPAddress()) {
123     hosts.push_back(url.host());
124   } else {
125     safe_browsing_util::GenerateHostsToCheck(url, &hosts);
126   }
127
128   std::vector<std::string> paths;
129   safe_browsing_util::GeneratePathsToCheck(url, &paths);
130
131   for (size_t i = 0; i < hosts.size(); ++i) {
132     for (size_t j = 0; j < paths.size(); ++j) {
133       const std::string& path = paths[j];
134       full_hashes->push_back(SBFullHashForString(hosts[i] + path));
135
136       // We may have /foo as path-prefix in the whitelist which should
137       // also match with /foo/bar and /foo?bar.  Hence, for every path
138       // that ends in '/' we also add the path without the slash.
139       if (include_whitelist_hashes &&
140           path.size() > 1 &&
141           path[path.size() - 1] == '/') {
142         full_hashes->push_back(
143             SBFullHashForString(hosts[i] + path.substr(0, path.size() - 1)));
144       }
145     }
146   }
147 }
148
149 // Get the prefixes matching the download |urls|.
150 void GetDownloadUrlPrefixes(const std::vector<GURL>& urls,
151                             std::vector<SBPrefix>* prefixes) {
152   std::vector<SBFullHash> full_hashes;
153   for (size_t i = 0; i < urls.size(); ++i)
154     BrowseFullHashesToCheck(urls[i], false, &full_hashes);
155
156   for (size_t i = 0; i < full_hashes.size(); ++i)
157     prefixes->push_back(full_hashes[i].prefix);
158 }
159
160 // Helper function to compare addprefixes in |store| with |prefixes|.
161 // The |list_bit| indicates which list (url or hash) to compare.
162 //
163 // Returns true if there is a match, |*prefix_hits| (if non-NULL) will contain
164 // the actual matching prefixes.
165 bool MatchAddPrefixes(SafeBrowsingStore* store,
166                       int list_bit,
167                       const std::vector<SBPrefix>& prefixes,
168                       std::vector<SBPrefix>* prefix_hits) {
169   prefix_hits->clear();
170   bool found_match = false;
171
172   SBAddPrefixes add_prefixes;
173   store->GetAddPrefixes(&add_prefixes);
174   for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
175        iter != add_prefixes.end(); ++iter) {
176     for (size_t j = 0; j < prefixes.size(); ++j) {
177       const SBPrefix& prefix = prefixes[j];
178       if (prefix == iter->prefix &&
179           GetListIdBit(iter->chunk_id) == list_bit) {
180         prefix_hits->push_back(prefix);
181         found_match = true;
182       }
183     }
184   }
185   return found_match;
186 }
187
188 // Find the entries in |full_hashes| with prefix in |prefix_hits|, and
189 // add them to |full_hits| if not expired.  "Not expired" is when
190 // either |last_update| was recent enough, or the item has been
191 // received recently enough.  Expired items are not deleted because a
192 // future update may make them acceptable again.
193 //
194 // For efficiency reasons the code walks |prefix_hits| and
195 // |full_hashes| in parallel, so they must be sorted by prefix.
196 void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits,
197                                   const std::vector<SBAddFullHash>& full_hashes,
198                                   std::vector<SBFullHashResult>* full_hits,
199                                   base::Time last_update) {
200   const base::Time expire_time =
201       base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes);
202
203   std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin();
204   std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin();
205
206   while (piter != prefix_hits.end() && hiter != full_hashes.end()) {
207     if (*piter < hiter->full_hash.prefix) {
208       ++piter;
209     } else if (hiter->full_hash.prefix < *piter) {
210       ++hiter;
211     } else {
212       if (expire_time < last_update ||
213           expire_time.ToTimeT() < hiter->received) {
214         SBFullHashResult result;
215         const int list_bit = GetListIdBit(hiter->chunk_id);
216         DCHECK(list_bit == safe_browsing_util::MALWARE ||
217                list_bit == safe_browsing_util::PHISH);
218         const safe_browsing_util::ListType list_id =
219             static_cast<safe_browsing_util::ListType>(list_bit);
220         if (!safe_browsing_util::GetListName(list_id, &result.list_name))
221           continue;
222         result.add_chunk_id = DecodeChunkId(hiter->chunk_id);
223         result.hash = hiter->full_hash;
224         full_hits->push_back(result);
225       }
226
227       // Only increment |hiter|, |piter| might have multiple hits.
228       ++hiter;
229     }
230   }
231 }
232
233 // This function generates a chunk range string for |chunks|. It
234 // outputs one chunk range string per list and writes it to the
235 // |list_ranges| vector.  We expect |list_ranges| to already be of the
236 // right size.  E.g., if |chunks| contains chunks with two different
237 // list ids then |list_ranges| must contain two elements.
238 void GetChunkRanges(const std::vector<int>& chunks,
239                     std::vector<std::string>* list_ranges) {
240   // Since there are 2 possible list ids, there must be exactly two
241   // list ranges.  Even if the chunk data should only contain one
242   // line, this code has to somehow handle corruption.
243   DCHECK_EQ(2U, list_ranges->size());
244
245   std::vector<std::vector<int> > decoded_chunks(list_ranges->size());
246   for (std::vector<int>::const_iterator iter = chunks.begin();
247        iter != chunks.end(); ++iter) {
248     int mod_list_id = GetListIdBit(*iter);
249     DCHECK_GE(mod_list_id, 0);
250     DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size());
251     decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter));
252   }
253   for (size_t i = 0; i < decoded_chunks.size(); ++i) {
254     ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i]));
255   }
256 }
257
258 // Helper function to create chunk range lists for Browse related
259 // lists.
260 void UpdateChunkRanges(SafeBrowsingStore* store,
261                        const std::vector<std::string>& listnames,
262                        std::vector<SBListChunkRanges>* lists) {
263   if (!store)
264     return;
265
266   DCHECK_GT(listnames.size(), 0U);
267   DCHECK_LE(listnames.size(), 2U);
268   std::vector<int> add_chunks;
269   std::vector<int> sub_chunks;
270   store->GetAddChunks(&add_chunks);
271   store->GetSubChunks(&sub_chunks);
272
273   // Always decode 2 ranges, even if only the first one is expected.
274   // The loop below will only load as many into |lists| as |listnames|
275   // indicates.
276   std::vector<std::string> adds(2);
277   std::vector<std::string> subs(2);
278   GetChunkRanges(add_chunks, &adds);
279   GetChunkRanges(sub_chunks, &subs);
280
281   for (size_t i = 0; i < listnames.size(); ++i) {
282     const std::string& listname = listnames[i];
283     DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2,
284               static_cast<int>(i % 2));
285     DCHECK_NE(safe_browsing_util::GetListId(listname),
286               safe_browsing_util::INVALID);
287     lists->push_back(SBListChunkRanges(listname));
288     lists->back().adds.swap(adds[i]);
289     lists->back().subs.swap(subs[i]);
290   }
291 }
292
293 void UpdateChunkRangesForLists(SafeBrowsingStore* store,
294                                const std::string& listname0,
295                                const std::string& listname1,
296                                std::vector<SBListChunkRanges>* lists) {
297   std::vector<std::string> listnames;
298   listnames.push_back(listname0);
299   listnames.push_back(listname1);
300   UpdateChunkRanges(store, listnames, lists);
301 }
302
303 void UpdateChunkRangesForList(SafeBrowsingStore* store,
304                               const std::string& listname,
305                               std::vector<SBListChunkRanges>* lists) {
306   UpdateChunkRanges(store, std::vector<std::string>(1, listname), lists);
307 }
308
309 // Order |SBAddFullHash| on the prefix part.  |SBAddPrefixLess()| from
310 // safe_browsing_store.h orders on both chunk-id and prefix.
311 bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
312   return a.full_hash.prefix < b.full_hash.prefix;
313 }
314
315 // This code always checks for non-zero file size.  This helper makes
316 // that less verbose.
317 int64 GetFileSizeOrZero(const base::FilePath& file_path) {
318   int64 size_64;
319   if (!base::GetFileSize(file_path, &size_64))
320     return 0;
321   return size_64;
322 }
323
324 // Used to order whitelist storage in memory.
325 bool SBFullHashLess(const SBFullHash& a, const SBFullHash& b) {
326   return memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)) < 0;
327 }
328
329 }  // namespace
330
331 // The default SafeBrowsingDatabaseFactory.
332 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
333  public:
334   virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
335       bool enable_download_protection,
336       bool enable_client_side_whitelist,
337       bool enable_download_whitelist,
338       bool enable_extension_blacklist,
339       bool enable_side_effect_free_whitelist,
340       bool enable_ip_blacklist) OVERRIDE {
341     return new SafeBrowsingDatabaseNew(
342         new SafeBrowsingStoreFile,
343         enable_download_protection ? new SafeBrowsingStoreFile : NULL,
344         enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL,
345         enable_download_whitelist ? new SafeBrowsingStoreFile : NULL,
346         enable_extension_blacklist ? new SafeBrowsingStoreFile : NULL,
347         enable_side_effect_free_whitelist ? new SafeBrowsingStoreFile : NULL,
348         enable_ip_blacklist ? new SafeBrowsingStoreFile : NULL);
349   }
350
351   SafeBrowsingDatabaseFactoryImpl() { }
352
353  private:
354   DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl);
355 };
356
357 // static
358 SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL;
359
360 // Factory method, non-thread safe. Caller has to make sure this s called
361 // on SafeBrowsing Thread.
362 // TODO(shess): There's no need for a factory any longer.  Convert
363 // SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create()
364 // callers just construct things directly.
365 SafeBrowsingDatabase* SafeBrowsingDatabase::Create(
366     bool enable_download_protection,
367     bool enable_client_side_whitelist,
368     bool enable_download_whitelist,
369     bool enable_extension_blacklist,
370     bool enable_side_effect_free_whitelist,
371     bool enable_ip_blacklist) {
372   if (!factory_)
373     factory_ = new SafeBrowsingDatabaseFactoryImpl();
374   return factory_->CreateSafeBrowsingDatabase(
375       enable_download_protection,
376       enable_client_side_whitelist,
377       enable_download_whitelist,
378       enable_extension_blacklist,
379       enable_side_effect_free_whitelist,
380       enable_ip_blacklist);
381 }
382
383 SafeBrowsingDatabase::~SafeBrowsingDatabase() {
384 }
385
386 // static
387 base::FilePath SafeBrowsingDatabase::BrowseDBFilename(
388     const base::FilePath& db_base_filename) {
389   return base::FilePath(db_base_filename.value() + kBrowseDBFile);
390 }
391
392 // static
393 base::FilePath SafeBrowsingDatabase::DownloadDBFilename(
394     const base::FilePath& db_base_filename) {
395   return base::FilePath(db_base_filename.value() + kDownloadDBFile);
396 }
397
398 // static
399 base::FilePath SafeBrowsingDatabase::BloomFilterForFilename(
400     const base::FilePath& db_filename) {
401   return base::FilePath(db_filename.value() + kBloomFilterFile);
402 }
403
404 // static
405 base::FilePath SafeBrowsingDatabase::PrefixSetForFilename(
406     const base::FilePath& db_filename) {
407   return base::FilePath(db_filename.value() + kPrefixSetFile);
408 }
409
410 // static
411 base::FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename(
412     const base::FilePath& db_filename) {
413   return base::FilePath(db_filename.value() + kCsdWhitelistDBFile);
414 }
415
416 // static
417 base::FilePath SafeBrowsingDatabase::DownloadWhitelistDBFilename(
418     const base::FilePath& db_filename) {
419   return base::FilePath(db_filename.value() + kDownloadWhitelistDBFile);
420 }
421
422 // static
423 base::FilePath SafeBrowsingDatabase::ExtensionBlacklistDBFilename(
424     const base::FilePath& db_filename) {
425   return base::FilePath(db_filename.value() + kExtensionBlacklistDBFile);
426 }
427
428 // static
429 base::FilePath SafeBrowsingDatabase::SideEffectFreeWhitelistDBFilename(
430     const base::FilePath& db_filename) {
431   return base::FilePath(db_filename.value() + kSideEffectFreeWhitelistDBFile);
432 }
433
434 // static
435 base::FilePath SafeBrowsingDatabase::IpBlacklistDBFilename(
436     const base::FilePath& db_filename) {
437   return base::FilePath(db_filename.value() + kIPBlacklistDBFile);
438 }
439
440 SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) {
441   if (list_id == safe_browsing_util::PHISH ||
442       list_id == safe_browsing_util::MALWARE) {
443     return browse_store_.get();
444   } else if (list_id == safe_browsing_util::BINURL) {
445     return download_store_.get();
446   } else if (list_id == safe_browsing_util::CSDWHITELIST) {
447     return csd_whitelist_store_.get();
448   } else if (list_id == safe_browsing_util::DOWNLOADWHITELIST) {
449     return download_whitelist_store_.get();
450   } else if (list_id == safe_browsing_util::EXTENSIONBLACKLIST) {
451     return extension_blacklist_store_.get();
452   } else if (list_id == safe_browsing_util::SIDEEFFECTFREEWHITELIST) {
453     return side_effect_free_whitelist_store_.get();
454   } else if (list_id == safe_browsing_util::IPBLACKLIST) {
455     return ip_blacklist_store_.get();
456   }
457   return NULL;
458 }
459
460 // static
461 void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) {
462   UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type,
463                             FAILURE_DATABASE_MAX);
464 }
465
466 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew()
467     : creation_loop_(base::MessageLoop::current()),
468       browse_store_(new SafeBrowsingStoreFile),
469       reset_factory_(this),
470       corruption_detected_(false),
471       change_detected_(false) {
472   DCHECK(browse_store_.get());
473   DCHECK(!download_store_.get());
474   DCHECK(!csd_whitelist_store_.get());
475   DCHECK(!download_whitelist_store_.get());
476   DCHECK(!extension_blacklist_store_.get());
477   DCHECK(!side_effect_free_whitelist_store_.get());
478   DCHECK(!ip_blacklist_store_.get());
479 }
480
481 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew(
482     SafeBrowsingStore* browse_store,
483     SafeBrowsingStore* download_store,
484     SafeBrowsingStore* csd_whitelist_store,
485     SafeBrowsingStore* download_whitelist_store,
486     SafeBrowsingStore* extension_blacklist_store,
487     SafeBrowsingStore* side_effect_free_whitelist_store,
488     SafeBrowsingStore* ip_blacklist_store)
489     : creation_loop_(base::MessageLoop::current()),
490       browse_store_(browse_store),
491       download_store_(download_store),
492       csd_whitelist_store_(csd_whitelist_store),
493       download_whitelist_store_(download_whitelist_store),
494       extension_blacklist_store_(extension_blacklist_store),
495       side_effect_free_whitelist_store_(side_effect_free_whitelist_store),
496       ip_blacklist_store_(ip_blacklist_store),
497       reset_factory_(this),
498       corruption_detected_(false) {
499   DCHECK(browse_store_.get());
500 }
501
502 SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() {
503   // The DCHECK is disabled due to crbug.com/338486 .
504   // DCHECK_EQ(creation_loop_, base::MessageLoop::current());
505 }
506
507 void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) {
508   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
509   // Ensure we haven't been run before.
510   DCHECK(browse_filename_.empty());
511   DCHECK(download_filename_.empty());
512   DCHECK(csd_whitelist_filename_.empty());
513   DCHECK(download_whitelist_filename_.empty());
514   DCHECK(extension_blacklist_filename_.empty());
515   DCHECK(side_effect_free_whitelist_filename_.empty());
516   DCHECK(ip_blacklist_filename_.empty());
517
518   browse_filename_ = BrowseDBFilename(filename_base);
519   browse_prefix_set_filename_ = PrefixSetForFilename(browse_filename_);
520
521   browse_store_->Init(
522       browse_filename_,
523       base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
524                  base::Unretained(this)));
525   DVLOG(1) << "Init browse store: " << browse_filename_.value();
526
527   {
528     // NOTE: There is no need to grab the lock in this function, since
529     // until it returns, there are no pointers to this class on other
530     // threads.  Then again, that means there is no possibility of
531     // contention on the lock...
532     base::AutoLock locked(lookup_lock_);
533     full_browse_hashes_.clear();
534     pending_browse_hashes_.clear();
535     LoadPrefixSet();
536   }
537
538   if (download_store_.get()) {
539     download_filename_ = DownloadDBFilename(filename_base);
540     download_store_->Init(
541         download_filename_,
542         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
543                    base::Unretained(this)));
544     DVLOG(1) << "Init download store: " << download_filename_.value();
545   }
546
547   if (csd_whitelist_store_.get()) {
548     csd_whitelist_filename_ = CsdWhitelistDBFilename(filename_base);
549     csd_whitelist_store_->Init(
550         csd_whitelist_filename_,
551         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
552                    base::Unretained(this)));
553     DVLOG(1) << "Init csd whitelist store: " << csd_whitelist_filename_.value();
554     std::vector<SBAddFullHash> full_hashes;
555     if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) {
556       LoadWhitelist(full_hashes, &csd_whitelist_);
557     } else {
558       WhitelistEverything(&csd_whitelist_);
559     }
560   } else {
561     WhitelistEverything(&csd_whitelist_);  // Just to be safe.
562   }
563
564   if (download_whitelist_store_.get()) {
565     download_whitelist_filename_ = DownloadWhitelistDBFilename(filename_base);
566     download_whitelist_store_->Init(
567         download_whitelist_filename_,
568         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
569                    base::Unretained(this)));
570     DVLOG(1) << "Init download whitelist store: "
571              << download_whitelist_filename_.value();
572     std::vector<SBAddFullHash> full_hashes;
573     if (download_whitelist_store_->GetAddFullHashes(&full_hashes)) {
574       LoadWhitelist(full_hashes, &download_whitelist_);
575     } else {
576       WhitelistEverything(&download_whitelist_);
577     }
578   } else {
579     WhitelistEverything(&download_whitelist_);  // Just to be safe.
580   }
581
582   if (extension_blacklist_store_.get()) {
583     extension_blacklist_filename_ = ExtensionBlacklistDBFilename(filename_base);
584     extension_blacklist_store_->Init(
585         extension_blacklist_filename_,
586         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
587                    base::Unretained(this)));
588     DVLOG(1) << "Init extension blacklist store: "
589              << extension_blacklist_filename_.value();
590   }
591
592   if (side_effect_free_whitelist_store_.get()) {
593     side_effect_free_whitelist_filename_ =
594         SideEffectFreeWhitelistDBFilename(filename_base);
595     side_effect_free_whitelist_prefix_set_filename_ =
596         PrefixSetForFilename(side_effect_free_whitelist_filename_);
597     side_effect_free_whitelist_store_->Init(
598         side_effect_free_whitelist_filename_,
599         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
600                    base::Unretained(this)));
601     DVLOG(1) << "Init side-effect free whitelist store: "
602              << side_effect_free_whitelist_filename_.value();
603
604     // If there is no database, the filter cannot be used.
605     base::File::Info db_info;
606     if (base::GetFileInfo(side_effect_free_whitelist_filename_, &db_info)
607         && db_info.size != 0) {
608       const base::TimeTicks before = base::TimeTicks::Now();
609       side_effect_free_whitelist_prefix_set_ =
610           safe_browsing::PrefixSet::LoadFile(
611               side_effect_free_whitelist_prefix_set_filename_);
612       DVLOG(1) << "SafeBrowsingDatabaseNew read side-effect free whitelist "
613                << "prefix set in "
614                << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
615       UMA_HISTOGRAM_TIMES("SB2.SideEffectFreeWhitelistPrefixSetLoad",
616                           base::TimeTicks::Now() - before);
617       if (!side_effect_free_whitelist_prefix_set_.get())
618         RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ);
619     }
620   } else {
621     // Delete any files of the side-effect free sidelist that may be around
622     // from when it was previously enabled.
623     SafeBrowsingStoreFile::DeleteStore(
624         SideEffectFreeWhitelistDBFilename(filename_base));
625   }
626
627   if (ip_blacklist_store_.get()) {
628     ip_blacklist_filename_ = IpBlacklistDBFilename(filename_base);
629     ip_blacklist_store_->Init(
630         ip_blacklist_filename_,
631         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
632                    base::Unretained(this)));
633     DVLOG(1) << "SafeBrowsingDatabaseNew read ip blacklist: "
634              << ip_blacklist_filename_.value();
635     std::vector<SBAddFullHash> full_hashes;
636     if (ip_blacklist_store_->GetAddFullHashes(&full_hashes)) {
637       LoadIpBlacklist(full_hashes);
638     } else {
639       DVLOG(1) << "Unable to load full hashes from the IP blacklist.";
640       LoadIpBlacklist(std::vector<SBAddFullHash>());  // Clear the list.
641     }
642   }
643 }
644
645 bool SafeBrowsingDatabaseNew::ResetDatabase() {
646   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
647
648   // Delete files on disk.
649   // TODO(shess): Hard to see where one might want to delete without a
650   // reset.  Perhaps inline |Delete()|?
651   if (!Delete())
652     return false;
653
654   // Reset objects in memory.
655   {
656     base::AutoLock locked(lookup_lock_);
657     full_browse_hashes_.clear();
658     pending_browse_hashes_.clear();
659     prefix_miss_cache_.clear();
660     browse_prefix_set_.reset();
661     side_effect_free_whitelist_prefix_set_.reset();
662     ip_blacklist_.clear();
663   }
664   // Wants to acquire the lock itself.
665   WhitelistEverything(&csd_whitelist_);
666   WhitelistEverything(&download_whitelist_);
667   return true;
668 }
669
670 // TODO(lzheng): Remove matching_list, it is not used anywhere.
671 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
672     const GURL& url,
673     std::string* matching_list,
674     std::vector<SBPrefix>* prefix_hits,
675     std::vector<SBFullHashResult>* full_hits,
676     base::Time last_update) {
677   // Clear the results first.
678   matching_list->clear();
679   prefix_hits->clear();
680   full_hits->clear();
681
682   std::vector<SBFullHash> full_hashes;
683   BrowseFullHashesToCheck(url, false, &full_hashes);
684   if (full_hashes.empty())
685     return false;
686
687   // This function is called on the I/O thread, prevent changes to
688   // filter and caches.
689   base::AutoLock locked(lookup_lock_);
690
691   // |browse_prefix_set_| is empty until it is either read from disk, or the
692   // first update populates it.  Bail out without a hit if not yet
693   // available.
694   if (!browse_prefix_set_.get())
695     return false;
696
697   size_t miss_count = 0;
698   for (size_t i = 0; i < full_hashes.size(); ++i) {
699     const SBPrefix prefix = full_hashes[i].prefix;
700     if (browse_prefix_set_->Exists(prefix)) {
701       prefix_hits->push_back(prefix);
702       if (prefix_miss_cache_.count(prefix) > 0)
703         ++miss_count;
704     }
705   }
706
707   // If all the prefixes are cached as 'misses', don't issue a GetHash.
708   if (miss_count == prefix_hits->size())
709     return false;
710
711   // Find the matching full-hash results.  |full_browse_hashes_| are from the
712   // database, |pending_browse_hashes_| are from GetHash requests between
713   // updates.
714   std::sort(prefix_hits->begin(), prefix_hits->end());
715
716   GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_,
717                                full_hits, last_update);
718   GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_,
719                                full_hits, last_update);
720   return true;
721 }
722
723 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
724     const std::vector<GURL>& urls,
725     std::vector<SBPrefix>* prefix_hits) {
726   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
727
728   // Ignore this check when download checking is not enabled.
729   if (!download_store_.get())
730     return false;
731
732   std::vector<SBPrefix> prefixes;
733   GetDownloadUrlPrefixes(urls, &prefixes);
734   return MatchAddPrefixes(download_store_.get(),
735                           safe_browsing_util::BINURL % 2,
736                           prefixes,
737                           prefix_hits);
738 }
739
740 bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) {
741   // This method is theoretically thread-safe but we expect all calls to
742   // originate from the IO thread.
743   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
744   std::vector<SBFullHash> full_hashes;
745   BrowseFullHashesToCheck(url, true, &full_hashes);
746   return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
747 }
748
749 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedUrl(const GURL& url) {
750   std::vector<SBFullHash> full_hashes;
751   BrowseFullHashesToCheck(url, true, &full_hashes);
752   return ContainsWhitelistedHashes(download_whitelist_, full_hashes);
753 }
754
755 bool SafeBrowsingDatabaseNew::ContainsExtensionPrefixes(
756     const std::vector<SBPrefix>& prefixes,
757     std::vector<SBPrefix>* prefix_hits) {
758   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
759   if (!extension_blacklist_store_)
760     return false;
761
762   return MatchAddPrefixes(extension_blacklist_store_.get(),
763                           safe_browsing_util::EXTENSIONBLACKLIST % 2,
764                           prefixes,
765                           prefix_hits);
766 }
767
768 bool SafeBrowsingDatabaseNew::ContainsSideEffectFreeWhitelistUrl(
769     const GURL& url) {
770   std::string host;
771   std::string path;
772   std::string query;
773   safe_browsing_util::CanonicalizeUrl(url, &host, &path, &query);
774   std::string url_to_check = host + path;
775   if (!query.empty())
776     url_to_check +=  "?" + query;
777   SBFullHash full_hash = SBFullHashForString(url_to_check);
778
779   // This function can be called on any thread, so lock against any changes
780   base::AutoLock locked(lookup_lock_);
781
782   // |side_effect_free_whitelist_prefix_set_| is empty until it is either read
783   // from disk, or the first update populates it.  Bail out without a hit if
784   // not yet available.
785   if (!side_effect_free_whitelist_prefix_set_.get())
786     return false;
787
788   return side_effect_free_whitelist_prefix_set_->Exists(full_hash.prefix);
789 }
790
791 bool SafeBrowsingDatabaseNew::ContainsMalwareIP(const std::string& ip_address) {
792   net::IPAddressNumber ip_number;
793   if (!net::ParseIPLiteralToNumber(ip_address, &ip_number)) {
794     DVLOG(2) << "Unable to parse IP address: '" << ip_address << "'";
795     return false;
796   }
797   if (ip_number.size() == net::kIPv4AddressSize) {
798     ip_number = net::ConvertIPv4NumberToIPv6Number(ip_number);
799   }
800   if (ip_number.size() != net::kIPv6AddressSize) {
801     DVLOG(2) << "Unable to convert IPv4 address to IPv6: '"
802              << ip_address << "'";
803     return false;  // better safe than sorry.
804   }
805   // This function can be called from any thread.
806   base::AutoLock locked(lookup_lock_);
807   for (IPBlacklist::const_iterator it = ip_blacklist_.begin();
808        it != ip_blacklist_.end();
809        ++it) {
810     const std::string& mask = it->first;
811     DCHECK_EQ(mask.size(), ip_number.size());
812     std::string subnet(net::kIPv6AddressSize, '\0');
813     for (size_t i = 0; i < net::kIPv6AddressSize; ++i) {
814       subnet[i] = ip_number[i] & mask[i];
815     }
816     const std::string hash = base::SHA1HashString(subnet);
817     DVLOG(2) << "Lookup Malware IP: "
818              << " ip:" << ip_address
819              << " mask:" << base::HexEncode(mask.data(), mask.size())
820              << " subnet:" << base::HexEncode(subnet.data(), subnet.size())
821              << " hash:" << base::HexEncode(hash.data(), hash.size());
822     if (it->second.count(hash) > 0) {
823       return true;
824     }
825   }
826   return false;
827 }
828
829 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedString(
830     const std::string& str) {
831   std::vector<SBFullHash> hashes;
832   hashes.push_back(SBFullHashForString(str));
833   return ContainsWhitelistedHashes(download_whitelist_, hashes);
834 }
835
836 bool SafeBrowsingDatabaseNew::ContainsWhitelistedHashes(
837     const SBWhitelist& whitelist,
838     const std::vector<SBFullHash>& hashes) {
839   base::AutoLock l(lookup_lock_);
840   if (whitelist.second)
841     return true;
842   for (std::vector<SBFullHash>::const_iterator it = hashes.begin();
843        it != hashes.end(); ++it) {
844     if (std::binary_search(whitelist.first.begin(), whitelist.first.end(),
845                            *it, SBFullHashLess)) {
846       return true;
847     }
848   }
849   return false;
850 }
851
852 // Helper to insert entries for all of the prefixes or full hashes in
853 // |entry| into the store.
854 void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host,
855                                         const SBEntry* entry, int list_id) {
856   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
857
858   SafeBrowsingStore* store = GetStore(list_id);
859   if (!store) return;
860
861   STATS_COUNTER("SB.HostInsert", 1);
862   const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
863   const int count = entry->prefix_count();
864
865   DCHECK(!entry->IsSub());
866   if (!count) {
867     // No prefixes, use host instead.
868     STATS_COUNTER("SB.PrefixAdd", 1);
869     store->WriteAddPrefix(encoded_chunk_id, host);
870   } else if (entry->IsPrefix()) {
871     // Prefixes only.
872     for (int i = 0; i < count; i++) {
873       const SBPrefix prefix = entry->PrefixAt(i);
874       STATS_COUNTER("SB.PrefixAdd", 1);
875       store->WriteAddPrefix(encoded_chunk_id, prefix);
876     }
877   } else {
878     // Prefixes and hashes.
879     const base::Time receive_time = base::Time::Now();
880     for (int i = 0; i < count; ++i) {
881       const SBFullHash full_hash = entry->FullHashAt(i);
882       const SBPrefix prefix = full_hash.prefix;
883
884       STATS_COUNTER("SB.PrefixAdd", 1);
885       store->WriteAddPrefix(encoded_chunk_id, prefix);
886
887       STATS_COUNTER("SB.PrefixAddFull", 1);
888       store->WriteAddHash(encoded_chunk_id, receive_time, full_hash);
889     }
890   }
891 }
892
893 // Helper to iterate over all the entries in the hosts in |chunks| and
894 // add them to the store.
895 void SafeBrowsingDatabaseNew::InsertAddChunks(
896     const safe_browsing_util::ListType list_id,
897     const SBChunkList& chunks) {
898   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
899
900   SafeBrowsingStore* store = GetStore(list_id);
901   if (!store) return;
902
903   for (SBChunkList::const_iterator citer = chunks.begin();
904        citer != chunks.end(); ++citer) {
905     const int chunk_id = citer->chunk_number;
906
907     // The server can give us a chunk that we already have because
908     // it's part of a range.  Don't add it again.
909     const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
910     if (store->CheckAddChunk(encoded_chunk_id))
911       continue;
912
913     store->SetAddChunk(encoded_chunk_id);
914     for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
915          hiter != citer->hosts.end(); ++hiter) {
916       // NOTE: Could pass |encoded_chunk_id|, but then inserting add
917       // chunks would look different from inserting sub chunks.
918       InsertAdd(chunk_id, hiter->host, hiter->entry, list_id);
919     }
920   }
921 }
922
923 // Helper to insert entries for all of the prefixes or full hashes in
924 // |entry| into the store.
925 void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host,
926                                         const SBEntry* entry, int list_id) {
927   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
928
929   SafeBrowsingStore* store = GetStore(list_id);
930   if (!store) return;
931
932   STATS_COUNTER("SB.HostDelete", 1);
933   const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
934   const int count = entry->prefix_count();
935
936   DCHECK(entry->IsSub());
937   if (!count) {
938     // No prefixes, use host instead.
939     STATS_COUNTER("SB.PrefixSub", 1);
940     const int add_chunk_id = EncodeChunkId(entry->chunk_id(), list_id);
941     store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, host);
942   } else if (entry->IsPrefix()) {
943     // Prefixes only.
944     for (int i = 0; i < count; i++) {
945       const SBPrefix prefix = entry->PrefixAt(i);
946       const int add_chunk_id =
947           EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
948
949       STATS_COUNTER("SB.PrefixSub", 1);
950       store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix);
951     }
952   } else {
953     // Prefixes and hashes.
954     for (int i = 0; i < count; ++i) {
955       const SBFullHash full_hash = entry->FullHashAt(i);
956       const int add_chunk_id =
957           EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
958
959       STATS_COUNTER("SB.PrefixSub", 1);
960       store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, full_hash.prefix);
961
962       STATS_COUNTER("SB.PrefixSubFull", 1);
963       store->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash);
964     }
965   }
966 }
967
968 // Helper to iterate over all the entries in the hosts in |chunks| and
969 // add them to the store.
970 void SafeBrowsingDatabaseNew::InsertSubChunks(
971     safe_browsing_util::ListType list_id,
972     const SBChunkList& chunks) {
973   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
974
975   SafeBrowsingStore* store = GetStore(list_id);
976   if (!store) return;
977
978   for (SBChunkList::const_iterator citer = chunks.begin();
979        citer != chunks.end(); ++citer) {
980     const int chunk_id = citer->chunk_number;
981
982     // The server can give us a chunk that we already have because
983     // it's part of a range.  Don't add it again.
984     const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
985     if (store->CheckSubChunk(encoded_chunk_id))
986       continue;
987
988     store->SetSubChunk(encoded_chunk_id);
989     for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
990          hiter != citer->hosts.end(); ++hiter) {
991       InsertSub(chunk_id, hiter->host, hiter->entry, list_id);
992     }
993   }
994 }
995
996 void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name,
997                                            const SBChunkList& chunks) {
998   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
999
1000   if (corruption_detected_ || chunks.empty())
1001     return;
1002
1003   const base::TimeTicks before = base::TimeTicks::Now();
1004
1005   const safe_browsing_util::ListType list_id =
1006       safe_browsing_util::GetListId(list_name);
1007   DVLOG(2) << list_name << ": " << list_id;
1008
1009   SafeBrowsingStore* store = GetStore(list_id);
1010   if (!store) return;
1011
1012   change_detected_ = true;
1013
1014   store->BeginChunk();
1015   if (chunks.front().is_add) {
1016     InsertAddChunks(list_id, chunks);
1017   } else {
1018     InsertSubChunks(list_id, chunks);
1019   }
1020   store->FinishChunk();
1021
1022   UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before);
1023 }
1024
1025 void SafeBrowsingDatabaseNew::DeleteChunks(
1026     const std::vector<SBChunkDelete>& chunk_deletes) {
1027   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1028
1029   if (corruption_detected_ || chunk_deletes.empty())
1030     return;
1031
1032   const std::string& list_name = chunk_deletes.front().list_name;
1033   const safe_browsing_util::ListType list_id =
1034       safe_browsing_util::GetListId(list_name);
1035
1036   SafeBrowsingStore* store = GetStore(list_id);
1037   if (!store) return;
1038
1039   change_detected_ = true;
1040
1041   for (size_t i = 0; i < chunk_deletes.size(); ++i) {
1042     std::vector<int> chunk_numbers;
1043     RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers);
1044     for (size_t j = 0; j < chunk_numbers.size(); ++j) {
1045       const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id);
1046       if (chunk_deletes[i].is_sub_del)
1047         store->DeleteSubChunk(encoded_chunk_id);
1048       else
1049         store->DeleteAddChunk(encoded_chunk_id);
1050     }
1051   }
1052 }
1053
1054 void SafeBrowsingDatabaseNew::CacheHashResults(
1055     const std::vector<SBPrefix>& prefixes,
1056     const std::vector<SBFullHashResult>& full_hits) {
1057   // This is called on the I/O thread, lock against updates.
1058   base::AutoLock locked(lookup_lock_);
1059
1060   if (full_hits.empty()) {
1061     prefix_miss_cache_.insert(prefixes.begin(), prefixes.end());
1062     return;
1063   }
1064
1065   // TODO(shess): SBFullHashResult and SBAddFullHash are very similar.
1066   // Refactor to make them identical.
1067   const base::Time now = base::Time::Now();
1068   const size_t orig_size = pending_browse_hashes_.size();
1069   for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin();
1070        iter != full_hits.end(); ++iter) {
1071     const int list_id = safe_browsing_util::GetListId(iter->list_name);
1072     if (list_id == safe_browsing_util::MALWARE ||
1073         list_id == safe_browsing_util::PHISH) {
1074       int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id);
1075       SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash);
1076       pending_browse_hashes_.push_back(add_full_hash);
1077     }
1078   }
1079
1080   // Sort new entries then merge with the previously-sorted entries.
1081   std::vector<SBAddFullHash>::iterator
1082       orig_end = pending_browse_hashes_.begin() + orig_size;
1083   std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess);
1084   std::inplace_merge(pending_browse_hashes_.begin(),
1085                      orig_end, pending_browse_hashes_.end(),
1086                      SBAddFullHashPrefixLess);
1087 }
1088
1089 bool SafeBrowsingDatabaseNew::UpdateStarted(
1090     std::vector<SBListChunkRanges>* lists) {
1091   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1092   DCHECK(lists);
1093
1094   // If |BeginUpdate()| fails, reset the database.
1095   if (!browse_store_->BeginUpdate()) {
1096     RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN);
1097     HandleCorruptDatabase();
1098     return false;
1099   }
1100
1101   if (download_store_.get() && !download_store_->BeginUpdate()) {
1102     RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN);
1103     HandleCorruptDatabase();
1104     return false;
1105   }
1106
1107   if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) {
1108     RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
1109     HandleCorruptDatabase();
1110     return false;
1111   }
1112
1113   if (download_whitelist_store_.get() &&
1114       !download_whitelist_store_->BeginUpdate()) {
1115     RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
1116     HandleCorruptDatabase();
1117     return false;
1118   }
1119
1120   if (extension_blacklist_store_ &&
1121       !extension_blacklist_store_->BeginUpdate()) {
1122     RecordFailure(FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN);
1123     HandleCorruptDatabase();
1124     return false;
1125   }
1126
1127   if (side_effect_free_whitelist_store_ &&
1128       !side_effect_free_whitelist_store_->BeginUpdate()) {
1129     RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN);
1130     HandleCorruptDatabase();
1131     return false;
1132   }
1133
1134   if (ip_blacklist_store_ && !ip_blacklist_store_->BeginUpdate()) {
1135     RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN);
1136     HandleCorruptDatabase();
1137     return false;
1138   }
1139
1140   UpdateChunkRangesForLists(browse_store_.get(),
1141                             safe_browsing_util::kMalwareList,
1142                             safe_browsing_util::kPhishingList,
1143                             lists);
1144
1145   // NOTE(shess): |download_store_| used to contain kBinHashList, which has been
1146   // deprecated.  Code to delete the list from the store shows ~15k hits/day as
1147   // of Feb 2014, so it has been removed.  Everything _should_ be resilient to
1148   // extra data of that sort.
1149   UpdateChunkRangesForList(download_store_.get(),
1150                            safe_browsing_util::kBinUrlList, lists);
1151
1152   UpdateChunkRangesForList(csd_whitelist_store_.get(),
1153                            safe_browsing_util::kCsdWhiteList, lists);
1154
1155   UpdateChunkRangesForList(download_whitelist_store_.get(),
1156                            safe_browsing_util::kDownloadWhiteList, lists);
1157
1158   UpdateChunkRangesForList(extension_blacklist_store_.get(),
1159                            safe_browsing_util::kExtensionBlacklist, lists);
1160
1161   UpdateChunkRangesForList(side_effect_free_whitelist_store_.get(),
1162                            safe_browsing_util::kSideEffectFreeWhitelist, lists);
1163
1164   UpdateChunkRangesForList(ip_blacklist_store_.get(),
1165                            safe_browsing_util::kIPBlacklist, lists);
1166
1167   corruption_detected_ = false;
1168   change_detected_ = false;
1169   return true;
1170 }
1171
1172 void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) {
1173   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1174
1175   // The update may have failed due to corrupt storage (for instance,
1176   // an excessive number of invalid add_chunks and sub_chunks).
1177   // Double-check that the databases are valid.
1178   // TODO(shess): Providing a checksum for the add_chunk and sub_chunk
1179   // sections would allow throwing a corruption error in
1180   // UpdateStarted().
1181   if (!update_succeeded) {
1182     if (!browse_store_->CheckValidity())
1183       DLOG(ERROR) << "Safe-browsing browse database corrupt.";
1184
1185     if (download_store_.get() && !download_store_->CheckValidity())
1186       DLOG(ERROR) << "Safe-browsing download database corrupt.";
1187
1188     if (csd_whitelist_store_.get() && !csd_whitelist_store_->CheckValidity())
1189       DLOG(ERROR) << "Safe-browsing csd whitelist database corrupt.";
1190
1191     if (download_whitelist_store_.get() &&
1192         !download_whitelist_store_->CheckValidity()) {
1193       DLOG(ERROR) << "Safe-browsing download whitelist database corrupt.";
1194     }
1195
1196     if (extension_blacklist_store_ &&
1197         !extension_blacklist_store_->CheckValidity()) {
1198       DLOG(ERROR) << "Safe-browsing extension blacklist database corrupt.";
1199     }
1200
1201     if (side_effect_free_whitelist_store_ &&
1202         !side_effect_free_whitelist_store_->CheckValidity()) {
1203       DLOG(ERROR) << "Safe-browsing side-effect free whitelist database "
1204                   << "corrupt.";
1205     }
1206
1207     if (ip_blacklist_store_ && !ip_blacklist_store_->CheckValidity()) {
1208       DLOG(ERROR) << "Safe-browsing IP blacklist database corrupt.";
1209     }
1210   }
1211
1212   if (corruption_detected_)
1213     return;
1214
1215   // Unroll the transaction if there was a protocol error or if the
1216   // transaction was empty.  This will leave the prefix set, the
1217   // pending hashes, and the prefix miss cache in place.
1218   if (!update_succeeded || !change_detected_) {
1219     // Track empty updates to answer questions at http://crbug.com/72216 .
1220     if (update_succeeded && !change_detected_)
1221       UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0);
1222     browse_store_->CancelUpdate();
1223     if (download_store_.get())
1224       download_store_->CancelUpdate();
1225     if (csd_whitelist_store_.get())
1226       csd_whitelist_store_->CancelUpdate();
1227     if (download_whitelist_store_.get())
1228       download_whitelist_store_->CancelUpdate();
1229     if (extension_blacklist_store_)
1230       extension_blacklist_store_->CancelUpdate();
1231     if (side_effect_free_whitelist_store_)
1232       side_effect_free_whitelist_store_->CancelUpdate();
1233     if (ip_blacklist_store_)
1234       ip_blacklist_store_->CancelUpdate();
1235     return;
1236   }
1237
1238   if (download_store_) {
1239     int64 size_bytes = UpdateHashPrefixStore(
1240         download_filename_,
1241         download_store_.get(),
1242         FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH);
1243     UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
1244                          static_cast<int>(size_bytes / 1024));
1245   }
1246
1247   UpdateBrowseStore();
1248   UpdateWhitelistStore(csd_whitelist_filename_,
1249                        csd_whitelist_store_.get(),
1250                        &csd_whitelist_);
1251   UpdateWhitelistStore(download_whitelist_filename_,
1252                        download_whitelist_store_.get(),
1253                        &download_whitelist_);
1254
1255   if (extension_blacklist_store_) {
1256     int64 size_bytes = UpdateHashPrefixStore(
1257         extension_blacklist_filename_,
1258         extension_blacklist_store_.get(),
1259         FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH);
1260     UMA_HISTOGRAM_COUNTS("SB2.ExtensionBlacklistKilobytes",
1261                          static_cast<int>(size_bytes / 1024));
1262   }
1263
1264   if (side_effect_free_whitelist_store_)
1265     UpdateSideEffectFreeWhitelistStore();
1266
1267   if (ip_blacklist_store_)
1268     UpdateIpBlacklistStore();
1269 }
1270
1271 void SafeBrowsingDatabaseNew::UpdateWhitelistStore(
1272     const base::FilePath& store_filename,
1273     SafeBrowsingStore* store,
1274     SBWhitelist* whitelist) {
1275   if (!store)
1276     return;
1277
1278   // For the whitelists, we don't cache and save full hashes since all
1279   // hashes are already full.
1280   std::vector<SBAddFullHash> empty_add_hashes;
1281
1282   // Note: |builder| will not be empty.  The current data store implementation
1283   // stores all full-length hashes as both full and prefix hashes.
1284   safe_browsing::PrefixSetBuilder builder;
1285   std::vector<SBAddFullHash> full_hashes;
1286   if (!store->FinishUpdate(empty_add_hashes, &builder, &full_hashes)) {
1287     RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH);
1288     WhitelistEverything(whitelist);
1289     return;
1290   }
1291
1292 #if defined(OS_MACOSX)
1293   base::mac::SetFileBackupExclusion(store_filename);
1294 #endif
1295
1296   LoadWhitelist(full_hashes, whitelist);
1297 }
1298
1299 int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore(
1300     const base::FilePath& store_filename,
1301     SafeBrowsingStore* store,
1302     FailureType failure_type) {
1303   // We don't cache and save full hashes.
1304   std::vector<SBAddFullHash> empty_add_hashes;
1305
1306   // These results are not used after this call. Simply ignore the
1307   // returned value after FinishUpdate(...).
1308   safe_browsing::PrefixSetBuilder builder;
1309   std::vector<SBAddFullHash> add_full_hashes_result;
1310
1311   if (!store->FinishUpdate(empty_add_hashes,
1312                            &builder,
1313                            &add_full_hashes_result)) {
1314     RecordFailure(failure_type);
1315   }
1316
1317 #if defined(OS_MACOSX)
1318   base::mac::SetFileBackupExclusion(store_filename);
1319 #endif
1320
1321   return GetFileSizeOrZero(store_filename);
1322 }
1323
1324 void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
1325   // Copy out the pending add hashes.  Copy rather than swapping in
1326   // case |ContainsBrowseURL()| is called before the new filter is complete.
1327   std::vector<SBAddFullHash> pending_add_hashes;
1328   {
1329     base::AutoLock locked(lookup_lock_);
1330     pending_add_hashes.insert(pending_add_hashes.end(),
1331                               pending_browse_hashes_.begin(),
1332                               pending_browse_hashes_.end());
1333   }
1334
1335   // Measure the amount of IO during the filter build.
1336   base::IoCounters io_before, io_after;
1337   base::ProcessHandle handle = base::Process::Current().handle();
1338   scoped_ptr<base::ProcessMetrics> metric(
1339 #if !defined(OS_MACOSX)
1340       base::ProcessMetrics::CreateProcessMetrics(handle)
1341 #else
1342       // Getting stats only for the current process is enough, so NULL is fine.
1343       base::ProcessMetrics::CreateProcessMetrics(handle, NULL)
1344 #endif
1345   );
1346
1347   // IoCounters are currently not supported on Mac, and may not be
1348   // available for Linux, so we check the result and only show IO
1349   // stats if they are available.
1350   const bool got_counters = metric->GetIOCounters(&io_before);
1351
1352   const base::TimeTicks before = base::TimeTicks::Now();
1353
1354   safe_browsing::PrefixSetBuilder builder;
1355   std::vector<SBAddFullHash> add_full_hashes;
1356   if (!browse_store_->FinishUpdate(pending_add_hashes,
1357                                    &builder, &add_full_hashes)) {
1358     RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
1359     return;
1360   }
1361   scoped_ptr<safe_browsing::PrefixSet> prefix_set(builder.GetPrefixSet());
1362
1363   // This needs to be in sorted order by prefix for efficient access.
1364   std::sort(add_full_hashes.begin(), add_full_hashes.end(),
1365             SBAddFullHashPrefixLess);
1366
1367   // Swap in the newly built filter and cache.
1368   {
1369     base::AutoLock locked(lookup_lock_);
1370     full_browse_hashes_.swap(add_full_hashes);
1371
1372     // TODO(shess): If |CacheHashResults()| is posted between the
1373     // earlier lock and this clear, those pending hashes will be lost.
1374     // It could be fixed by only removing hashes which were collected
1375     // at the earlier point.  I believe that is fail-safe as-is (the
1376     // hash will be fetched again).
1377     pending_browse_hashes_.clear();
1378     prefix_miss_cache_.clear();
1379     browse_prefix_set_.swap(prefix_set);
1380   }
1381
1382   DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in "
1383            << (base::TimeTicks::Now() - before).InMilliseconds()
1384            << " ms total.";
1385   UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before);
1386
1387   // Persist the prefix set to disk.  Since only this thread changes
1388   // |browse_prefix_set_|, there is no need to lock.
1389   WritePrefixSet();
1390
1391   // Gather statistics.
1392   if (got_counters && metric->GetIOCounters(&io_after)) {
1393     UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes",
1394                          static_cast<int>(io_after.ReadTransferCount -
1395                                           io_before.ReadTransferCount) / 1024);
1396     UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes",
1397                          static_cast<int>(io_after.WriteTransferCount -
1398                                           io_before.WriteTransferCount) / 1024);
1399     UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
1400                          static_cast<int>(io_after.ReadOperationCount -
1401                                           io_before.ReadOperationCount));
1402     UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
1403                          static_cast<int>(io_after.WriteOperationCount -
1404                                           io_before.WriteOperationCount));
1405   }
1406
1407   int64 file_size = GetFileSizeOrZero(browse_prefix_set_filename_);
1408   UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes",
1409                        static_cast<int>(file_size / 1024));
1410   file_size = GetFileSizeOrZero(browse_filename_);
1411   UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
1412                        static_cast<int>(file_size / 1024));
1413
1414 #if defined(OS_MACOSX)
1415   base::mac::SetFileBackupExclusion(browse_filename_);
1416 #endif
1417 }
1418
1419 void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() {
1420   std::vector<SBAddFullHash> empty_add_hashes;
1421   safe_browsing::PrefixSetBuilder builder;
1422   std::vector<SBAddFullHash> add_full_hashes_result;
1423
1424   if (!side_effect_free_whitelist_store_->FinishUpdate(
1425           empty_add_hashes,
1426           &builder,
1427           &add_full_hashes_result)) {
1428     RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH);
1429     return;
1430   }
1431   scoped_ptr<safe_browsing::PrefixSet> prefix_set(builder.GetPrefixSet());
1432
1433   // Swap in the newly built prefix set.
1434   {
1435     base::AutoLock locked(lookup_lock_);
1436     side_effect_free_whitelist_prefix_set_.swap(prefix_set);
1437   }
1438
1439   const base::TimeTicks before = base::TimeTicks::Now();
1440   const bool write_ok = side_effect_free_whitelist_prefix_set_->WriteFile(
1441       side_effect_free_whitelist_prefix_set_filename_);
1442   DVLOG(1) << "SafeBrowsingDatabaseNew wrote side-effect free whitelist prefix "
1443            << "set in " << (base::TimeTicks::Now() - before).InMilliseconds()
1444            << " ms";
1445   UMA_HISTOGRAM_TIMES("SB2.SideEffectFreePrefixSetWrite",
1446                       base::TimeTicks::Now() - before);
1447
1448   if (!write_ok)
1449     RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE);
1450
1451   // Gather statistics.
1452   int64 file_size = GetFileSizeOrZero(
1453       side_effect_free_whitelist_prefix_set_filename_);
1454   UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistPrefixSetKilobytes",
1455                        static_cast<int>(file_size / 1024));
1456   file_size = GetFileSizeOrZero(side_effect_free_whitelist_filename_);
1457   UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistDatabaseKilobytes",
1458                        static_cast<int>(file_size / 1024));
1459
1460 #if defined(OS_MACOSX)
1461   base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename_);
1462   base::mac::SetFileBackupExclusion(
1463       side_effect_free_whitelist_prefix_set_filename_);
1464 #endif
1465 }
1466
1467 void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() {
1468   // For the IP blacklist, we don't cache and save full hashes since all
1469   // hashes are already full.
1470   std::vector<SBAddFullHash> empty_add_hashes;
1471
1472   // Note: prefixes will not be empty.  The current data store implementation
1473   // stores all full-length hashes as both full and prefix hashes.
1474   safe_browsing::PrefixSetBuilder builder;
1475   std::vector<SBAddFullHash> full_hashes;
1476   if (!ip_blacklist_store_->FinishUpdate(empty_add_hashes,
1477                                          &builder, &full_hashes)) {
1478     RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH);
1479     LoadIpBlacklist(std::vector<SBAddFullHash>());  // Clear the list.
1480     return;
1481   }
1482
1483 #if defined(OS_MACOSX)
1484   base::mac::SetFileBackupExclusion(ip_blacklist_filename_);
1485 #endif
1486
1487   LoadIpBlacklist(full_hashes);
1488 }
1489
1490 void SafeBrowsingDatabaseNew::HandleCorruptDatabase() {
1491   // Reset the database after the current task has unwound (but only
1492   // reset once within the scope of a given task).
1493   if (!reset_factory_.HasWeakPtrs()) {
1494     RecordFailure(FAILURE_DATABASE_CORRUPT);
1495     base::MessageLoop::current()->PostTask(FROM_HERE,
1496         base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase,
1497                    reset_factory_.GetWeakPtr()));
1498   }
1499 }
1500
1501 void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
1502   RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER);
1503   corruption_detected_ = true;  // Stop updating the database.
1504   ResetDatabase();
1505   DLOG(FATAL) << "SafeBrowsing database was corrupt and reset";
1506 }
1507
1508 // TODO(shess): I'm not clear why this code doesn't have any
1509 // real error-handling.
1510 void SafeBrowsingDatabaseNew::LoadPrefixSet() {
1511   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1512   DCHECK(!browse_prefix_set_filename_.empty());
1513
1514   // If there is no database, the filter cannot be used.
1515   base::File::Info db_info;
1516   if (!base::GetFileInfo(browse_filename_, &db_info) || db_info.size == 0)
1517     return;
1518
1519   // Cleanup any stale bloom filter (no longer used).
1520   // TODO(shess): Track failure to delete?
1521   base::FilePath bloom_filter_filename =
1522       BloomFilterForFilename(browse_filename_);
1523   base::DeleteFile(bloom_filter_filename, false);
1524
1525   const base::TimeTicks before = base::TimeTicks::Now();
1526   browse_prefix_set_ = safe_browsing::PrefixSet::LoadFile(
1527       browse_prefix_set_filename_);
1528   DVLOG(1) << "SafeBrowsingDatabaseNew read prefix set in "
1529            << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
1530   UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before);
1531
1532   if (!browse_prefix_set_.get())
1533     RecordFailure(FAILURE_BROWSE_PREFIX_SET_READ);
1534 }
1535
1536 bool SafeBrowsingDatabaseNew::Delete() {
1537   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1538
1539   const bool r1 = browse_store_->Delete();
1540   if (!r1)
1541     RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1542
1543   const bool r2 = download_store_.get() ? download_store_->Delete() : true;
1544   if (!r2)
1545     RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1546
1547   const bool r3 = csd_whitelist_store_.get() ?
1548       csd_whitelist_store_->Delete() : true;
1549   if (!r3)
1550     RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1551
1552   const bool r4 = download_whitelist_store_.get() ?
1553       download_whitelist_store_->Delete() : true;
1554   if (!r4)
1555     RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1556
1557   base::FilePath bloom_filter_filename =
1558       BloomFilterForFilename(browse_filename_);
1559   const bool r5 = base::DeleteFile(bloom_filter_filename, false);
1560   if (!r5)
1561     RecordFailure(FAILURE_DATABASE_FILTER_DELETE);
1562
1563   const bool r6 = base::DeleteFile(browse_prefix_set_filename_, false);
1564   if (!r6)
1565     RecordFailure(FAILURE_BROWSE_PREFIX_SET_DELETE);
1566
1567   const bool r7 = base::DeleteFile(extension_blacklist_filename_, false);
1568   if (!r7)
1569     RecordFailure(FAILURE_EXTENSION_BLACKLIST_DELETE);
1570
1571   const bool r8 = base::DeleteFile(side_effect_free_whitelist_filename_,
1572                                     false);
1573   if (!r8)
1574     RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE);
1575
1576   const bool r9 = base::DeleteFile(
1577       side_effect_free_whitelist_prefix_set_filename_,
1578       false);
1579   if (!r9)
1580     RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE);
1581
1582   const bool r10 = base::DeleteFile(ip_blacklist_filename_, false);
1583   if (!r10)
1584     RecordFailure(FAILURE_IP_BLACKLIST_DELETE);
1585
1586   return r1 && r2 && r3 && r4 && r5 && r6 && r7 && r8 && r9 && r10;
1587 }
1588
1589 void SafeBrowsingDatabaseNew::WritePrefixSet() {
1590   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1591
1592   if (!browse_prefix_set_.get())
1593     return;
1594
1595   const base::TimeTicks before = base::TimeTicks::Now();
1596   const bool write_ok = browse_prefix_set_->WriteFile(
1597       browse_prefix_set_filename_);
1598   DVLOG(1) << "SafeBrowsingDatabaseNew wrote prefix set in "
1599            << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
1600   UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before);
1601
1602   if (!write_ok)
1603     RecordFailure(FAILURE_BROWSE_PREFIX_SET_WRITE);
1604
1605 #if defined(OS_MACOSX)
1606   base::mac::SetFileBackupExclusion(browse_prefix_set_filename_);
1607 #endif
1608 }
1609
1610 void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist* whitelist) {
1611   base::AutoLock locked(lookup_lock_);
1612   whitelist->second = true;
1613   whitelist->first.clear();
1614 }
1615
1616 void SafeBrowsingDatabaseNew::LoadWhitelist(
1617     const std::vector<SBAddFullHash>& full_hashes,
1618     SBWhitelist* whitelist) {
1619   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1620   if (full_hashes.size() > kMaxWhitelistSize) {
1621     WhitelistEverything(whitelist);
1622     return;
1623   }
1624
1625   std::vector<SBFullHash> new_whitelist;
1626   new_whitelist.reserve(full_hashes.size());
1627   for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
1628        it != full_hashes.end(); ++it) {
1629     new_whitelist.push_back(it->full_hash);
1630   }
1631   std::sort(new_whitelist.begin(), new_whitelist.end(), SBFullHashLess);
1632
1633   SBFullHash kill_switch = SBFullHashForString(kWhitelistKillSwitchUrl);
1634   if (std::binary_search(new_whitelist.begin(), new_whitelist.end(),
1635                          kill_switch, SBFullHashLess)) {
1636     // The kill switch is whitelisted hence we whitelist all URLs.
1637     WhitelistEverything(whitelist);
1638   } else {
1639     base::AutoLock locked(lookup_lock_);
1640     whitelist->second = false;
1641     whitelist->first.swap(new_whitelist);
1642   }
1643 }
1644
1645 void SafeBrowsingDatabaseNew::LoadIpBlacklist(
1646     const std::vector<SBAddFullHash>& full_hashes) {
1647   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1648   IPBlacklist new_blacklist;
1649   DVLOG(2) << "Writing IP blacklist of size: " << full_hashes.size();
1650   for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
1651        it != full_hashes.end();
1652        ++it) {
1653     const char* full_hash = it->full_hash.full_hash;
1654     DCHECK_EQ(crypto::kSHA256Length, arraysize(it->full_hash.full_hash));
1655     // The format of the IP blacklist is:
1656     // SHA-1(IPv6 prefix) + uint8(prefix size) + 11 unused bytes.
1657     std::string hashed_ip_prefix(full_hash, base::kSHA1Length);
1658     size_t prefix_size = static_cast<uint8>(full_hash[base::kSHA1Length]);
1659     if (prefix_size > kMaxIpPrefixSize || prefix_size < kMinIpPrefixSize) {
1660       DVLOG(2) << "Invalid IP prefix size in IP blacklist: " << prefix_size;
1661       RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_INVALID);
1662       new_blacklist.clear();  // Load empty blacklist.
1663       break;
1664     }
1665
1666     // We precompute the mask for the given subnet size to speed up lookups.
1667     // Basically we need to create a 16B long string which has the highest
1668     // |size| bits sets to one.
1669     std::string mask(net::kIPv6AddressSize, '\0');
1670     mask.replace(0, prefix_size / 8, prefix_size / 8, '\xFF');
1671     if ((prefix_size % 8) != 0) {
1672       mask[prefix_size / 8] = 0xFF << (8 - (prefix_size % 8));
1673     }
1674     DVLOG(2) << "Inserting malicious IP: "
1675              << " raw:" << base::HexEncode(full_hash, crypto::kSHA256Length)
1676              << " mask:" << base::HexEncode(mask.data(), mask.size())
1677              << " prefix_size:" << prefix_size
1678              << " hashed_ip:" << base::HexEncode(hashed_ip_prefix.data(),
1679                                                  hashed_ip_prefix.size());
1680     new_blacklist[mask].insert(hashed_ip_prefix);
1681   }
1682
1683   base::AutoLock locked(lookup_lock_);
1684   ip_blacklist_.swap(new_blacklist);
1685 }
1686
1687 bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() {
1688   SBFullHash malware_kill_switch = SBFullHashForString(kMalwareIPKillSwitchUrl);
1689   std::vector<SBFullHash> full_hashes;
1690   full_hashes.push_back(malware_kill_switch);
1691   return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
1692 }