Upstream version 10.39.225.0
[platform/framework/web/crosswalk.git] / src / chrome / browser / safe_browsing / safe_browsing_database.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/safe_browsing/safe_browsing_database.h"
6
7 #include <algorithm>
8 #include <iterator>
9
10 #include "base/bind.h"
11 #include "base/files/file_util.h"
12 #include "base/message_loop/message_loop.h"
13 #include "base/metrics/histogram.h"
14 #include "base/metrics/stats_counters.h"
15 #include "base/process/process.h"
16 #include "base/process/process_metrics.h"
17 #include "base/sha1.h"
18 #include "base/strings/string_number_conversions.h"
19 #include "base/strings/stringprintf.h"
20 #include "base/time/time.h"
21 #include "chrome/browser/safe_browsing/prefix_set.h"
22 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
23 #include "content/public/browser/browser_thread.h"
24 #include "crypto/sha2.h"
25 #include "net/base/net_util.h"
26 #include "url/gurl.h"
27
28 #if defined(OS_MACOSX)
29 #include "base/mac/mac_util.h"
30 #endif
31
32 using content::BrowserThread;
33
34 namespace {
35
36 // Filename suffix for the bloom filter.
37 const base::FilePath::CharType kBloomFilterFile[] =
38     FILE_PATH_LITERAL(" Filter 2");
39 // Filename suffix for the prefix set.
40 const base::FilePath::CharType kPrefixSetFile[] =
41     FILE_PATH_LITERAL(" Prefix Set");
42 // Filename suffix for download store.
43 const base::FilePath::CharType kDownloadDBFile[] =
44     FILE_PATH_LITERAL(" Download");
45 // Filename suffix for client-side phishing detection whitelist store.
46 const base::FilePath::CharType kCsdWhitelistDBFile[] =
47     FILE_PATH_LITERAL(" Csd Whitelist");
48 // Filename suffix for the download whitelist store.
49 const base::FilePath::CharType kDownloadWhitelistDBFile[] =
50     FILE_PATH_LITERAL(" Download Whitelist");
51 // Filename suffix for the extension blacklist store.
52 const base::FilePath::CharType kExtensionBlacklistDBFile[] =
53     FILE_PATH_LITERAL(" Extension Blacklist");
54 // Filename suffix for the side-effect free whitelist store.
55 const base::FilePath::CharType kSideEffectFreeWhitelistDBFile[] =
56     FILE_PATH_LITERAL(" Side-Effect Free Whitelist");
57 // Filename suffix for the csd malware IP blacklist store.
58 const base::FilePath::CharType kIPBlacklistDBFile[] =
59     FILE_PATH_LITERAL(" IP Blacklist");
60
61 // Filename suffix for browse store.
62 // TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win.
63 // Unfortunately, to change the name implies lots of transition code
64 // for little benefit.  If/when file formats change (say to put all
65 // the data in one file), that would be a convenient point to rectify
66 // this.
67 // TODO(shess): This shouldn't be OS-driven <http://crbug.com/394379>
68 #if defined(OS_ANDROID)
69 // NOTE(shess): This difference is also reflected in the list name in
70 // safe_browsing_util.cc.
71 // TODO(shess): Spin up an alternate list id which can be persisted in the
72 // store.  Then if a mistake is made, it won't cause confusion between
73 // incompatible lists.
74 const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Mobile");
75 #else
76 const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
77 #endif
78
79 // Maximum number of entries we allow in any of the whitelists.
80 // If a whitelist on disk contains more entries then all lookups to
81 // the whitelist will be considered a match.
82 const size_t kMaxWhitelistSize = 5000;
83
84 // If the hash of this exact expression is on a whitelist then all
85 // lookups to this whitelist will be considered a match.
86 const char kWhitelistKillSwitchUrl[] =
87     "sb-ssl.google.com/safebrowsing/csd/killswitch";  // Don't change this!
88
89 // If the hash of this exact expression is on a whitelist then the
90 // malware IP blacklisting feature will be disabled in csd.
91 // Don't change this!
92 const char kMalwareIPKillSwitchUrl[] =
93     "sb-ssl.google.com/safebrowsing/csd/killswitch_malware";
94
95 const size_t kMaxIpPrefixSize = 128;
96 const size_t kMinIpPrefixSize = 1;
97
98 // To save space, the incoming |chunk_id| and |list_id| are combined
99 // into an |encoded_chunk_id| for storage by shifting the |list_id|
100 // into the low-order bits.  These functions decode that information.
101 // TODO(lzheng): It was reasonable when database is saved in sqlite, but
102 // there should be better ways to save chunk_id and list_id after we use
103 // SafeBrowsingStoreFile.
104 int GetListIdBit(const int encoded_chunk_id) {
105   return encoded_chunk_id & 1;
106 }
107 int DecodeChunkId(int encoded_chunk_id) {
108   return encoded_chunk_id >> 1;
109 }
110 int EncodeChunkId(const int chunk, const int list_id) {
111   DCHECK_NE(list_id, safe_browsing_util::INVALID);
112   return chunk << 1 | list_id % 2;
113 }
114
115 // Generate the set of full hashes to check for |url|.  If
116 // |include_whitelist_hashes| is true we will generate additional path-prefixes
117 // to match against the csd whitelist.  E.g., if the path-prefix /foo is on the
118 // whitelist it should also match /foo/bar which is not the case for all the
119 // other lists.  We'll also always add a pattern for the empty path.
120 // TODO(shess): This function is almost the same as
121 // |CompareFullHashes()| in safe_browsing_util.cc, except that code
122 // does an early exit on match.  Since match should be the infrequent
123 // case (phishing or malware found), consider combining this function
124 // with that one.
125 void BrowseFullHashesToCheck(const GURL& url,
126                              bool include_whitelist_hashes,
127                              std::vector<SBFullHash>* full_hashes) {
128   std::vector<std::string> hosts;
129   if (url.HostIsIPAddress()) {
130     hosts.push_back(url.host());
131   } else {
132     safe_browsing_util::GenerateHostsToCheck(url, &hosts);
133   }
134
135   std::vector<std::string> paths;
136   safe_browsing_util::GeneratePathsToCheck(url, &paths);
137
138   for (size_t i = 0; i < hosts.size(); ++i) {
139     for (size_t j = 0; j < paths.size(); ++j) {
140       const std::string& path = paths[j];
141       full_hashes->push_back(SBFullHashForString(hosts[i] + path));
142
143       // We may have /foo as path-prefix in the whitelist which should
144       // also match with /foo/bar and /foo?bar.  Hence, for every path
145       // that ends in '/' we also add the path without the slash.
146       if (include_whitelist_hashes &&
147           path.size() > 1 &&
148           path[path.size() - 1] == '/') {
149         full_hashes->push_back(
150             SBFullHashForString(hosts[i] + path.substr(0, path.size() - 1)));
151       }
152     }
153   }
154 }
155
156 // Get the prefixes matching the download |urls|.
157 void GetDownloadUrlPrefixes(const std::vector<GURL>& urls,
158                             std::vector<SBPrefix>* prefixes) {
159   std::vector<SBFullHash> full_hashes;
160   for (size_t i = 0; i < urls.size(); ++i)
161     BrowseFullHashesToCheck(urls[i], false, &full_hashes);
162
163   for (size_t i = 0; i < full_hashes.size(); ++i)
164     prefixes->push_back(full_hashes[i].prefix);
165 }
166
167 // Helper function to compare addprefixes in |store| with |prefixes|.
168 // The |list_bit| indicates which list (url or hash) to compare.
169 //
170 // Returns true if there is a match, |*prefix_hits| (if non-NULL) will contain
171 // the actual matching prefixes.
172 bool MatchAddPrefixes(SafeBrowsingStore* store,
173                       int list_bit,
174                       const std::vector<SBPrefix>& prefixes,
175                       std::vector<SBPrefix>* prefix_hits) {
176   prefix_hits->clear();
177   bool found_match = false;
178
179   SBAddPrefixes add_prefixes;
180   store->GetAddPrefixes(&add_prefixes);
181   for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
182        iter != add_prefixes.end(); ++iter) {
183     for (size_t j = 0; j < prefixes.size(); ++j) {
184       const SBPrefix& prefix = prefixes[j];
185       if (prefix == iter->prefix &&
186           GetListIdBit(iter->chunk_id) == list_bit) {
187         prefix_hits->push_back(prefix);
188         found_match = true;
189       }
190     }
191   }
192   return found_match;
193 }
194
195 // This function generates a chunk range string for |chunks|. It
196 // outputs one chunk range string per list and writes it to the
197 // |list_ranges| vector.  We expect |list_ranges| to already be of the
198 // right size.  E.g., if |chunks| contains chunks with two different
199 // list ids then |list_ranges| must contain two elements.
200 void GetChunkRanges(const std::vector<int>& chunks,
201                     std::vector<std::string>* list_ranges) {
202   // Since there are 2 possible list ids, there must be exactly two
203   // list ranges.  Even if the chunk data should only contain one
204   // line, this code has to somehow handle corruption.
205   DCHECK_EQ(2U, list_ranges->size());
206
207   std::vector<std::vector<int> > decoded_chunks(list_ranges->size());
208   for (std::vector<int>::const_iterator iter = chunks.begin();
209        iter != chunks.end(); ++iter) {
210     int mod_list_id = GetListIdBit(*iter);
211     DCHECK_GE(mod_list_id, 0);
212     DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size());
213     decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter));
214   }
215   for (size_t i = 0; i < decoded_chunks.size(); ++i) {
216     ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i]));
217   }
218 }
219
220 // Helper function to create chunk range lists for Browse related
221 // lists.
222 void UpdateChunkRanges(SafeBrowsingStore* store,
223                        const std::vector<std::string>& listnames,
224                        std::vector<SBListChunkRanges>* lists) {
225   if (!store)
226     return;
227
228   DCHECK_GT(listnames.size(), 0U);
229   DCHECK_LE(listnames.size(), 2U);
230   std::vector<int> add_chunks;
231   std::vector<int> sub_chunks;
232   store->GetAddChunks(&add_chunks);
233   store->GetSubChunks(&sub_chunks);
234
235   // Always decode 2 ranges, even if only the first one is expected.
236   // The loop below will only load as many into |lists| as |listnames|
237   // indicates.
238   std::vector<std::string> adds(2);
239   std::vector<std::string> subs(2);
240   GetChunkRanges(add_chunks, &adds);
241   GetChunkRanges(sub_chunks, &subs);
242
243   for (size_t i = 0; i < listnames.size(); ++i) {
244     const std::string& listname = listnames[i];
245     DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2,
246               static_cast<int>(i % 2));
247     DCHECK_NE(safe_browsing_util::GetListId(listname),
248               safe_browsing_util::INVALID);
249     lists->push_back(SBListChunkRanges(listname));
250     lists->back().adds.swap(adds[i]);
251     lists->back().subs.swap(subs[i]);
252   }
253 }
254
255 void UpdateChunkRangesForLists(SafeBrowsingStore* store,
256                                const std::string& listname0,
257                                const std::string& listname1,
258                                std::vector<SBListChunkRanges>* lists) {
259   std::vector<std::string> listnames;
260   listnames.push_back(listname0);
261   listnames.push_back(listname1);
262   UpdateChunkRanges(store, listnames, lists);
263 }
264
265 void UpdateChunkRangesForList(SafeBrowsingStore* store,
266                               const std::string& listname,
267                               std::vector<SBListChunkRanges>* lists) {
268   UpdateChunkRanges(store, std::vector<std::string>(1, listname), lists);
269 }
270
271 // This code always checks for non-zero file size.  This helper makes
272 // that less verbose.
273 int64 GetFileSizeOrZero(const base::FilePath& file_path) {
274   int64 size_64;
275   if (!base::GetFileSize(file_path, &size_64))
276     return 0;
277   return size_64;
278 }
279
280 // Helper for ContainsBrowseUrlHashes().  Returns true if an un-expired match
281 // for |full_hash| is found in |cache|, with any matches appended to |results|
282 // (true can be returned with zero matches).  |expire_base| is used to check the
283 // cache lifetime of matches, expired matches will be discarded from |cache|.
284 bool GetCachedFullHash(std::map<SBPrefix, SBCachedFullHashResult>* cache,
285                        const SBFullHash& full_hash,
286                        const base::Time& expire_base,
287                        std::vector<SBFullHashResult>* results) {
288   // First check if there is a valid cached result for this prefix.
289   std::map<SBPrefix, SBCachedFullHashResult>::iterator
290       citer = cache->find(full_hash.prefix);
291   if (citer == cache->end())
292     return false;
293
294   // Remove expired entries.
295   SBCachedFullHashResult& cached_result = citer->second;
296   if (cached_result.expire_after <= expire_base) {
297     cache->erase(citer);
298     return false;
299   }
300
301   // Find full-hash matches.
302   std::vector<SBFullHashResult>& cached_hashes = cached_result.full_hashes;
303   for (size_t i = 0; i < cached_hashes.size(); ++i) {
304     if (SBFullHashEqual(full_hash, cached_hashes[i].hash))
305       results->push_back(cached_hashes[i]);
306   }
307
308   return true;
309 }
310
311 }  // namespace
312
313 // The default SafeBrowsingDatabaseFactory.
314 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
315  public:
316   virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
317       bool enable_download_protection,
318       bool enable_client_side_whitelist,
319       bool enable_download_whitelist,
320       bool enable_extension_blacklist,
321       bool enable_side_effect_free_whitelist,
322       bool enable_ip_blacklist) OVERRIDE {
323     return new SafeBrowsingDatabaseNew(
324         new SafeBrowsingStoreFile,
325         enable_download_protection ? new SafeBrowsingStoreFile : NULL,
326         enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL,
327         enable_download_whitelist ? new SafeBrowsingStoreFile : NULL,
328         enable_extension_blacklist ? new SafeBrowsingStoreFile : NULL,
329         enable_side_effect_free_whitelist ? new SafeBrowsingStoreFile : NULL,
330         enable_ip_blacklist ? new SafeBrowsingStoreFile : NULL);
331   }
332
333   SafeBrowsingDatabaseFactoryImpl() { }
334
335  private:
336   DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl);
337 };
338
339 // static
340 SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL;
341
342 // Factory method, non-thread safe. Caller has to make sure this s called
343 // on SafeBrowsing Thread.
344 // TODO(shess): There's no need for a factory any longer.  Convert
345 // SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create()
346 // callers just construct things directly.
347 SafeBrowsingDatabase* SafeBrowsingDatabase::Create(
348     bool enable_download_protection,
349     bool enable_client_side_whitelist,
350     bool enable_download_whitelist,
351     bool enable_extension_blacklist,
352     bool enable_side_effect_free_whitelist,
353     bool enable_ip_blacklist) {
354   if (!factory_)
355     factory_ = new SafeBrowsingDatabaseFactoryImpl();
356   return factory_->CreateSafeBrowsingDatabase(
357       enable_download_protection,
358       enable_client_side_whitelist,
359       enable_download_whitelist,
360       enable_extension_blacklist,
361       enable_side_effect_free_whitelist,
362       enable_ip_blacklist);
363 }
364
365 SafeBrowsingDatabase::~SafeBrowsingDatabase() {
366 }
367
368 // static
369 base::FilePath SafeBrowsingDatabase::BrowseDBFilename(
370     const base::FilePath& db_base_filename) {
371   return base::FilePath(db_base_filename.value() + kBrowseDBFile);
372 }
373
374 // static
375 base::FilePath SafeBrowsingDatabase::DownloadDBFilename(
376     const base::FilePath& db_base_filename) {
377   return base::FilePath(db_base_filename.value() + kDownloadDBFile);
378 }
379
380 // static
381 base::FilePath SafeBrowsingDatabase::BloomFilterForFilename(
382     const base::FilePath& db_filename) {
383   return base::FilePath(db_filename.value() + kBloomFilterFile);
384 }
385
386 // static
387 base::FilePath SafeBrowsingDatabase::PrefixSetForFilename(
388     const base::FilePath& db_filename) {
389   return base::FilePath(db_filename.value() + kPrefixSetFile);
390 }
391
392 // static
393 base::FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename(
394     const base::FilePath& db_filename) {
395   return base::FilePath(db_filename.value() + kCsdWhitelistDBFile);
396 }
397
398 // static
399 base::FilePath SafeBrowsingDatabase::DownloadWhitelistDBFilename(
400     const base::FilePath& db_filename) {
401   return base::FilePath(db_filename.value() + kDownloadWhitelistDBFile);
402 }
403
404 // static
405 base::FilePath SafeBrowsingDatabase::ExtensionBlacklistDBFilename(
406     const base::FilePath& db_filename) {
407   return base::FilePath(db_filename.value() + kExtensionBlacklistDBFile);
408 }
409
410 // static
411 base::FilePath SafeBrowsingDatabase::SideEffectFreeWhitelistDBFilename(
412     const base::FilePath& db_filename) {
413   return base::FilePath(db_filename.value() + kSideEffectFreeWhitelistDBFile);
414 }
415
416 // static
417 base::FilePath SafeBrowsingDatabase::IpBlacklistDBFilename(
418     const base::FilePath& db_filename) {
419   return base::FilePath(db_filename.value() + kIPBlacklistDBFile);
420 }
421
422 SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) {
423   if (list_id == safe_browsing_util::PHISH ||
424       list_id == safe_browsing_util::MALWARE) {
425     return browse_store_.get();
426   } else if (list_id == safe_browsing_util::BINURL) {
427     return download_store_.get();
428   } else if (list_id == safe_browsing_util::CSDWHITELIST) {
429     return csd_whitelist_store_.get();
430   } else if (list_id == safe_browsing_util::DOWNLOADWHITELIST) {
431     return download_whitelist_store_.get();
432   } else if (list_id == safe_browsing_util::EXTENSIONBLACKLIST) {
433     return extension_blacklist_store_.get();
434   } else if (list_id == safe_browsing_util::SIDEEFFECTFREEWHITELIST) {
435     return side_effect_free_whitelist_store_.get();
436   } else if (list_id == safe_browsing_util::IPBLACKLIST) {
437     return ip_blacklist_store_.get();
438   }
439   return NULL;
440 }
441
442 // static
443 void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) {
444   UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type,
445                             FAILURE_DATABASE_MAX);
446 }
447
448 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew()
449     : creation_loop_(base::MessageLoop::current()),
450       browse_store_(new SafeBrowsingStoreFile),
451       corruption_detected_(false),
452       change_detected_(false),
453       reset_factory_(this) {
454   DCHECK(browse_store_.get());
455   DCHECK(!download_store_.get());
456   DCHECK(!csd_whitelist_store_.get());
457   DCHECK(!download_whitelist_store_.get());
458   DCHECK(!extension_blacklist_store_.get());
459   DCHECK(!side_effect_free_whitelist_store_.get());
460   DCHECK(!ip_blacklist_store_.get());
461 }
462
463 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew(
464     SafeBrowsingStore* browse_store,
465     SafeBrowsingStore* download_store,
466     SafeBrowsingStore* csd_whitelist_store,
467     SafeBrowsingStore* download_whitelist_store,
468     SafeBrowsingStore* extension_blacklist_store,
469     SafeBrowsingStore* side_effect_free_whitelist_store,
470     SafeBrowsingStore* ip_blacklist_store)
471     : creation_loop_(base::MessageLoop::current()),
472       browse_store_(browse_store),
473       download_store_(download_store),
474       csd_whitelist_store_(csd_whitelist_store),
475       download_whitelist_store_(download_whitelist_store),
476       extension_blacklist_store_(extension_blacklist_store),
477       side_effect_free_whitelist_store_(side_effect_free_whitelist_store),
478       ip_blacklist_store_(ip_blacklist_store),
479       corruption_detected_(false),
480       reset_factory_(this) {
481   DCHECK(browse_store_.get());
482 }
483
484 SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() {
485   // The DCHECK is disabled due to crbug.com/338486 .
486   // DCHECK_EQ(creation_loop_, base::MessageLoop::current());
487 }
488
489 void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) {
490   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
491
492   // This should not be run multiple times.
493   DCHECK(filename_base_.empty());
494
495   filename_base_ = filename_base;
496
497   // TODO(shess): The various stores are really only necessary while doing
498   // updates, or when querying a store directly (see |ContainsDownloadUrl()|).
499   // The store variables are also tested to see if a list is enabled.  Perhaps
500   // the stores could be refactored into an update object so that they are only
501   // live in memory while being actively used.  The sense of enabled probably
502   // belongs in protocol_manager or database_manager.
503
504   browse_store_->Init(
505       BrowseDBFilename(filename_base_),
506       base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
507                  base::Unretained(this)));
508
509   {
510     // NOTE: There is no need to grab the lock in this function, since
511     // until it returns, there are no pointers to this class on other
512     // threads.  Then again, that means there is no possibility of
513     // contention on the lock...
514     base::AutoLock locked(lookup_lock_);
515     browse_gethash_cache_.clear();
516     LoadPrefixSet();
517   }
518
519   if (download_store_.get()) {
520     download_store_->Init(
521         DownloadDBFilename(filename_base_),
522         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
523                    base::Unretained(this)));
524   }
525
526   if (csd_whitelist_store_.get()) {
527     csd_whitelist_store_->Init(
528         CsdWhitelistDBFilename(filename_base_),
529         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
530                    base::Unretained(this)));
531
532     std::vector<SBAddFullHash> full_hashes;
533     if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) {
534       LoadWhitelist(full_hashes, &csd_whitelist_);
535     } else {
536       WhitelistEverything(&csd_whitelist_);
537     }
538   } else {
539     WhitelistEverything(&csd_whitelist_);  // Just to be safe.
540   }
541
542   if (download_whitelist_store_.get()) {
543     download_whitelist_store_->Init(
544         DownloadWhitelistDBFilename(filename_base_),
545         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
546                    base::Unretained(this)));
547
548     std::vector<SBAddFullHash> full_hashes;
549     if (download_whitelist_store_->GetAddFullHashes(&full_hashes)) {
550       LoadWhitelist(full_hashes, &download_whitelist_);
551     } else {
552       WhitelistEverything(&download_whitelist_);
553     }
554   } else {
555     WhitelistEverything(&download_whitelist_);  // Just to be safe.
556   }
557
558   if (extension_blacklist_store_.get()) {
559     extension_blacklist_store_->Init(
560         ExtensionBlacklistDBFilename(filename_base_),
561         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
562                    base::Unretained(this)));
563   }
564
565   if (side_effect_free_whitelist_store_.get()) {
566     const base::FilePath side_effect_free_whitelist_filename =
567         SideEffectFreeWhitelistDBFilename(filename_base_);
568     const base::FilePath side_effect_free_whitelist_prefix_set_filename =
569         PrefixSetForFilename(side_effect_free_whitelist_filename);
570     side_effect_free_whitelist_store_->Init(
571         side_effect_free_whitelist_filename,
572         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
573                    base::Unretained(this)));
574
575     // Only use the prefix set if database is present and non-empty.
576     if (GetFileSizeOrZero(side_effect_free_whitelist_filename)) {
577       const base::TimeTicks before = base::TimeTicks::Now();
578       side_effect_free_whitelist_prefix_set_ =
579           safe_browsing::PrefixSet::LoadFile(
580               side_effect_free_whitelist_prefix_set_filename);
581       UMA_HISTOGRAM_TIMES("SB2.SideEffectFreeWhitelistPrefixSetLoad",
582                           base::TimeTicks::Now() - before);
583       if (!side_effect_free_whitelist_prefix_set_.get())
584         RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ);
585     }
586   } else {
587     // Delete any files of the side-effect free sidelist that may be around
588     // from when it was previously enabled.
589     SafeBrowsingStoreFile::DeleteStore(
590         SideEffectFreeWhitelistDBFilename(filename_base_));
591     base::DeleteFile(
592         PrefixSetForFilename(SideEffectFreeWhitelistDBFilename(filename_base_)),
593         false);
594   }
595
596   if (ip_blacklist_store_.get()) {
597     ip_blacklist_store_->Init(
598         IpBlacklistDBFilename(filename_base_),
599         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
600                    base::Unretained(this)));
601
602     std::vector<SBAddFullHash> full_hashes;
603     if (ip_blacklist_store_->GetAddFullHashes(&full_hashes)) {
604       LoadIpBlacklist(full_hashes);
605     } else {
606       LoadIpBlacklist(std::vector<SBAddFullHash>());  // Clear the list.
607     }
608   }
609 }
610
611 bool SafeBrowsingDatabaseNew::ResetDatabase() {
612   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
613
614   // Delete files on disk.
615   // TODO(shess): Hard to see where one might want to delete without a
616   // reset.  Perhaps inline |Delete()|?
617   if (!Delete())
618     return false;
619
620   // Reset objects in memory.
621   {
622     base::AutoLock locked(lookup_lock_);
623     browse_gethash_cache_.clear();
624     browse_prefix_set_.reset();
625     side_effect_free_whitelist_prefix_set_.reset();
626     ip_blacklist_.clear();
627   }
628   // Wants to acquire the lock itself.
629   WhitelistEverything(&csd_whitelist_);
630   WhitelistEverything(&download_whitelist_);
631   return true;
632 }
633
634 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
635     const GURL& url,
636     std::vector<SBPrefix>* prefix_hits,
637     std::vector<SBFullHashResult>* cache_hits) {
638   // Clear the results first.
639   prefix_hits->clear();
640   cache_hits->clear();
641
642   std::vector<SBFullHash> full_hashes;
643   BrowseFullHashesToCheck(url, false, &full_hashes);
644   if (full_hashes.empty())
645     return false;
646
647   return ContainsBrowseUrlHashes(full_hashes, prefix_hits, cache_hits);
648 }
649
650 bool SafeBrowsingDatabaseNew::ContainsBrowseUrlHashes(
651     const std::vector<SBFullHash>& full_hashes,
652     std::vector<SBPrefix>* prefix_hits,
653     std::vector<SBFullHashResult>* cache_hits) {
654   // Used to determine cache expiration.
655   const base::Time now = base::Time::Now();
656
657   // This function is called on the I/O thread, prevent changes to
658   // filter and caches.
659   base::AutoLock locked(lookup_lock_);
660
661   // |browse_prefix_set_| is empty until it is either read from disk, or the
662   // first update populates it.  Bail out without a hit if not yet
663   // available.
664   if (!browse_prefix_set_.get())
665     return false;
666
667   for (size_t i = 0; i < full_hashes.size(); ++i) {
668     if (!GetCachedFullHash(&browse_gethash_cache_,
669                            full_hashes[i],
670                            now,
671                            cache_hits)) {
672       // No valid cached result, check the database.
673       if (browse_prefix_set_->Exists(full_hashes[i]))
674         prefix_hits->push_back(full_hashes[i].prefix);
675     }
676   }
677
678   // Multiple full hashes could share prefix, remove duplicates.
679   std::sort(prefix_hits->begin(), prefix_hits->end());
680   prefix_hits->erase(std::unique(prefix_hits->begin(), prefix_hits->end()),
681                      prefix_hits->end());
682
683   return !prefix_hits->empty() || !cache_hits->empty();
684 }
685
686 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
687     const std::vector<GURL>& urls,
688     std::vector<SBPrefix>* prefix_hits) {
689   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
690
691   // Ignore this check when download checking is not enabled.
692   if (!download_store_.get())
693     return false;
694
695   std::vector<SBPrefix> prefixes;
696   GetDownloadUrlPrefixes(urls, &prefixes);
697   return MatchAddPrefixes(download_store_.get(),
698                           safe_browsing_util::BINURL % 2,
699                           prefixes,
700                           prefix_hits);
701 }
702
703 bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) {
704   // This method is theoretically thread-safe but we expect all calls to
705   // originate from the IO thread.
706   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
707   std::vector<SBFullHash> full_hashes;
708   BrowseFullHashesToCheck(url, true, &full_hashes);
709   return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
710 }
711
712 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedUrl(const GURL& url) {
713   std::vector<SBFullHash> full_hashes;
714   BrowseFullHashesToCheck(url, true, &full_hashes);
715   return ContainsWhitelistedHashes(download_whitelist_, full_hashes);
716 }
717
718 bool SafeBrowsingDatabaseNew::ContainsExtensionPrefixes(
719     const std::vector<SBPrefix>& prefixes,
720     std::vector<SBPrefix>* prefix_hits) {
721   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
722   if (!extension_blacklist_store_)
723     return false;
724
725   return MatchAddPrefixes(extension_blacklist_store_.get(),
726                           safe_browsing_util::EXTENSIONBLACKLIST % 2,
727                           prefixes,
728                           prefix_hits);
729 }
730
731 bool SafeBrowsingDatabaseNew::ContainsSideEffectFreeWhitelistUrl(
732     const GURL& url) {
733   std::string host;
734   std::string path;
735   std::string query;
736   safe_browsing_util::CanonicalizeUrl(url, &host, &path, &query);
737   std::string url_to_check = host + path;
738   if (!query.empty())
739     url_to_check +=  "?" + query;
740   SBFullHash full_hash = SBFullHashForString(url_to_check);
741
742   // This function can be called on any thread, so lock against any changes
743   base::AutoLock locked(lookup_lock_);
744
745   // |side_effect_free_whitelist_prefix_set_| is empty until it is either read
746   // from disk, or the first update populates it.  Bail out without a hit if
747   // not yet available.
748   if (!side_effect_free_whitelist_prefix_set_.get())
749     return false;
750
751   return side_effect_free_whitelist_prefix_set_->Exists(full_hash);
752 }
753
754 bool SafeBrowsingDatabaseNew::ContainsMalwareIP(const std::string& ip_address) {
755   net::IPAddressNumber ip_number;
756   if (!net::ParseIPLiteralToNumber(ip_address, &ip_number))
757     return false;
758   if (ip_number.size() == net::kIPv4AddressSize)
759     ip_number = net::ConvertIPv4NumberToIPv6Number(ip_number);
760   if (ip_number.size() != net::kIPv6AddressSize)
761     return false;  // better safe than sorry.
762
763   // This function can be called from any thread.
764   base::AutoLock locked(lookup_lock_);
765   for (IPBlacklist::const_iterator it = ip_blacklist_.begin();
766        it != ip_blacklist_.end();
767        ++it) {
768     const std::string& mask = it->first;
769     DCHECK_EQ(mask.size(), ip_number.size());
770     std::string subnet(net::kIPv6AddressSize, '\0');
771     for (size_t i = 0; i < net::kIPv6AddressSize; ++i) {
772       subnet[i] = ip_number[i] & mask[i];
773     }
774     const std::string hash = base::SHA1HashString(subnet);
775     DVLOG(2) << "Lookup Malware IP: "
776              << " ip:" << ip_address
777              << " mask:" << base::HexEncode(mask.data(), mask.size())
778              << " subnet:" << base::HexEncode(subnet.data(), subnet.size())
779              << " hash:" << base::HexEncode(hash.data(), hash.size());
780     if (it->second.count(hash) > 0) {
781       return true;
782     }
783   }
784   return false;
785 }
786
787 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedString(
788     const std::string& str) {
789   std::vector<SBFullHash> hashes;
790   hashes.push_back(SBFullHashForString(str));
791   return ContainsWhitelistedHashes(download_whitelist_, hashes);
792 }
793
794 bool SafeBrowsingDatabaseNew::ContainsWhitelistedHashes(
795     const SBWhitelist& whitelist,
796     const std::vector<SBFullHash>& hashes) {
797   base::AutoLock l(lookup_lock_);
798   if (whitelist.second)
799     return true;
800   for (std::vector<SBFullHash>::const_iterator it = hashes.begin();
801        it != hashes.end(); ++it) {
802     if (std::binary_search(whitelist.first.begin(), whitelist.first.end(),
803                            *it, SBFullHashLess)) {
804       return true;
805     }
806   }
807   return false;
808 }
809
810 // Helper to insert add-chunk entries.
811 void SafeBrowsingDatabaseNew::InsertAddChunk(
812     SafeBrowsingStore* store,
813     const safe_browsing_util::ListType list_id,
814     const SBChunkData& chunk_data) {
815   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
816   DCHECK(store);
817
818   // The server can give us a chunk that we already have because
819   // it's part of a range.  Don't add it again.
820   const int chunk_id = chunk_data.ChunkNumber();
821   const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
822   if (store->CheckAddChunk(encoded_chunk_id))
823     return;
824
825   store->SetAddChunk(encoded_chunk_id);
826   if (chunk_data.IsPrefix()) {
827     const size_t c = chunk_data.PrefixCount();
828     for (size_t i = 0; i < c; ++i) {
829       STATS_COUNTER("SB.PrefixAdd", 1);
830       store->WriteAddPrefix(encoded_chunk_id, chunk_data.PrefixAt(i));
831     }
832   } else {
833     const size_t c = chunk_data.FullHashCount();
834     for (size_t i = 0; i < c; ++i) {
835       STATS_COUNTER("SB.PrefixAddFull", 1);
836       store->WriteAddHash(encoded_chunk_id, chunk_data.FullHashAt(i));
837     }
838   }
839 }
840
841 // Helper to insert sub-chunk entries.
842 void SafeBrowsingDatabaseNew::InsertSubChunk(
843     SafeBrowsingStore* store,
844     const safe_browsing_util::ListType list_id,
845     const SBChunkData& chunk_data) {
846   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
847   DCHECK(store);
848
849   // The server can give us a chunk that we already have because
850   // it's part of a range.  Don't add it again.
851   const int chunk_id = chunk_data.ChunkNumber();
852   const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
853   if (store->CheckSubChunk(encoded_chunk_id))
854     return;
855
856   store->SetSubChunk(encoded_chunk_id);
857   if (chunk_data.IsPrefix()) {
858     const size_t c = chunk_data.PrefixCount();
859     for (size_t i = 0; i < c; ++i) {
860       STATS_COUNTER("SB.PrefixSub", 1);
861       const int add_chunk_id = chunk_data.AddChunkNumberAt(i);
862       const int encoded_add_chunk_id = EncodeChunkId(add_chunk_id, list_id);
863       store->WriteSubPrefix(encoded_chunk_id, encoded_add_chunk_id,
864                             chunk_data.PrefixAt(i));
865     }
866   } else {
867     const size_t c = chunk_data.FullHashCount();
868     for (size_t i = 0; i < c; ++i) {
869       STATS_COUNTER("SB.PrefixSubFull", 1);
870       const int add_chunk_id = chunk_data.AddChunkNumberAt(i);
871       const int encoded_add_chunk_id = EncodeChunkId(add_chunk_id, list_id);
872       store->WriteSubHash(encoded_chunk_id, encoded_add_chunk_id,
873                           chunk_data.FullHashAt(i));
874     }
875   }
876 }
877
878 void SafeBrowsingDatabaseNew::InsertChunks(
879     const std::string& list_name,
880     const std::vector<SBChunkData*>& chunks) {
881   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
882
883   if (corruption_detected_ || chunks.empty())
884     return;
885
886   const base::TimeTicks before = base::TimeTicks::Now();
887
888   // TODO(shess): The caller should just pass list_id.
889   const safe_browsing_util::ListType list_id =
890       safe_browsing_util::GetListId(list_name);
891
892   SafeBrowsingStore* store = GetStore(list_id);
893   if (!store) return;
894
895   change_detected_ = true;
896
897   // TODO(shess): I believe that the list is always add or sub.  Can this use
898   // that productively?
899   store->BeginChunk();
900   for (size_t i = 0; i < chunks.size(); ++i) {
901     if (chunks[i]->IsAdd()) {
902       InsertAddChunk(store, list_id, *chunks[i]);
903     } else if (chunks[i]->IsSub()) {
904       InsertSubChunk(store, list_id, *chunks[i]);
905     } else {
906       NOTREACHED();
907     }
908   }
909   store->FinishChunk();
910
911   UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before);
912 }
913
914 void SafeBrowsingDatabaseNew::DeleteChunks(
915     const std::vector<SBChunkDelete>& chunk_deletes) {
916   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
917
918   if (corruption_detected_ || chunk_deletes.empty())
919     return;
920
921   const std::string& list_name = chunk_deletes.front().list_name;
922   const safe_browsing_util::ListType list_id =
923       safe_browsing_util::GetListId(list_name);
924
925   SafeBrowsingStore* store = GetStore(list_id);
926   if (!store) return;
927
928   change_detected_ = true;
929
930   for (size_t i = 0; i < chunk_deletes.size(); ++i) {
931     std::vector<int> chunk_numbers;
932     RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers);
933     for (size_t j = 0; j < chunk_numbers.size(); ++j) {
934       const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id);
935       if (chunk_deletes[i].is_sub_del)
936         store->DeleteSubChunk(encoded_chunk_id);
937       else
938         store->DeleteAddChunk(encoded_chunk_id);
939     }
940   }
941 }
942
943 void SafeBrowsingDatabaseNew::CacheHashResults(
944     const std::vector<SBPrefix>& prefixes,
945     const std::vector<SBFullHashResult>& full_hits,
946     const base::TimeDelta& cache_lifetime) {
947   const base::Time expire_after = base::Time::Now() + cache_lifetime;
948
949   // This is called on the I/O thread, lock against updates.
950   base::AutoLock locked(lookup_lock_);
951
952   // Create or reset all cached results for these prefixes.
953   for (size_t i = 0; i < prefixes.size(); ++i) {
954     browse_gethash_cache_[prefixes[i]] = SBCachedFullHashResult(expire_after);
955   }
956
957   // Insert any fullhash hits. Note that there may be one, multiple, or no
958   // fullhashes for any given entry in |prefixes|.
959   for (size_t i = 0; i < full_hits.size(); ++i) {
960     const SBPrefix prefix = full_hits[i].hash.prefix;
961     browse_gethash_cache_[prefix].full_hashes.push_back(full_hits[i]);
962   }
963 }
964
965 bool SafeBrowsingDatabaseNew::UpdateStarted(
966     std::vector<SBListChunkRanges>* lists) {
967   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
968   DCHECK(lists);
969
970   // If |BeginUpdate()| fails, reset the database.
971   if (!browse_store_->BeginUpdate()) {
972     RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN);
973     HandleCorruptDatabase();
974     return false;
975   }
976
977   if (download_store_.get() && !download_store_->BeginUpdate()) {
978     RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN);
979     HandleCorruptDatabase();
980     return false;
981   }
982
983   if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) {
984     RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
985     HandleCorruptDatabase();
986     return false;
987   }
988
989   if (download_whitelist_store_.get() &&
990       !download_whitelist_store_->BeginUpdate()) {
991     RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
992     HandleCorruptDatabase();
993     return false;
994   }
995
996   if (extension_blacklist_store_ &&
997       !extension_blacklist_store_->BeginUpdate()) {
998     RecordFailure(FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN);
999     HandleCorruptDatabase();
1000     return false;
1001   }
1002
1003   if (side_effect_free_whitelist_store_ &&
1004       !side_effect_free_whitelist_store_->BeginUpdate()) {
1005     RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN);
1006     HandleCorruptDatabase();
1007     return false;
1008   }
1009
1010   if (ip_blacklist_store_ && !ip_blacklist_store_->BeginUpdate()) {
1011     RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN);
1012     HandleCorruptDatabase();
1013     return false;
1014   }
1015
1016   {
1017     base::AutoLock locked(lookup_lock_);
1018     // Cached fullhash results must be cleared on every database update (whether
1019     // successful or not.)
1020     browse_gethash_cache_.clear();
1021   }
1022
1023   UpdateChunkRangesForLists(browse_store_.get(),
1024                             safe_browsing_util::kMalwareList,
1025                             safe_browsing_util::kPhishingList,
1026                             lists);
1027
1028   // NOTE(shess): |download_store_| used to contain kBinHashList, which has been
1029   // deprecated.  Code to delete the list from the store shows ~15k hits/day as
1030   // of Feb 2014, so it has been removed.  Everything _should_ be resilient to
1031   // extra data of that sort.
1032   UpdateChunkRangesForList(download_store_.get(),
1033                            safe_browsing_util::kBinUrlList, lists);
1034
1035   UpdateChunkRangesForList(csd_whitelist_store_.get(),
1036                            safe_browsing_util::kCsdWhiteList, lists);
1037
1038   UpdateChunkRangesForList(download_whitelist_store_.get(),
1039                            safe_browsing_util::kDownloadWhiteList, lists);
1040
1041   UpdateChunkRangesForList(extension_blacklist_store_.get(),
1042                            safe_browsing_util::kExtensionBlacklist, lists);
1043
1044   UpdateChunkRangesForList(side_effect_free_whitelist_store_.get(),
1045                            safe_browsing_util::kSideEffectFreeWhitelist, lists);
1046
1047   UpdateChunkRangesForList(ip_blacklist_store_.get(),
1048                            safe_browsing_util::kIPBlacklist, lists);
1049
1050   corruption_detected_ = false;
1051   change_detected_ = false;
1052   return true;
1053 }
1054
1055 void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) {
1056   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1057
1058   // The update may have failed due to corrupt storage (for instance,
1059   // an excessive number of invalid add_chunks and sub_chunks).
1060   // Double-check that the databases are valid.
1061   // TODO(shess): Providing a checksum for the add_chunk and sub_chunk
1062   // sections would allow throwing a corruption error in
1063   // UpdateStarted().
1064   if (!update_succeeded) {
1065     if (!browse_store_->CheckValidity())
1066       DLOG(ERROR) << "Safe-browsing browse database corrupt.";
1067
1068     if (download_store_.get() && !download_store_->CheckValidity())
1069       DLOG(ERROR) << "Safe-browsing download database corrupt.";
1070
1071     if (csd_whitelist_store_.get() && !csd_whitelist_store_->CheckValidity())
1072       DLOG(ERROR) << "Safe-browsing csd whitelist database corrupt.";
1073
1074     if (download_whitelist_store_.get() &&
1075         !download_whitelist_store_->CheckValidity()) {
1076       DLOG(ERROR) << "Safe-browsing download whitelist database corrupt.";
1077     }
1078
1079     if (extension_blacklist_store_ &&
1080         !extension_blacklist_store_->CheckValidity()) {
1081       DLOG(ERROR) << "Safe-browsing extension blacklist database corrupt.";
1082     }
1083
1084     if (side_effect_free_whitelist_store_ &&
1085         !side_effect_free_whitelist_store_->CheckValidity()) {
1086       DLOG(ERROR) << "Safe-browsing side-effect free whitelist database "
1087                   << "corrupt.";
1088     }
1089
1090     if (ip_blacklist_store_ && !ip_blacklist_store_->CheckValidity()) {
1091       DLOG(ERROR) << "Safe-browsing IP blacklist database corrupt.";
1092     }
1093   }
1094
1095   if (corruption_detected_)
1096     return;
1097
1098   // Unroll the transaction if there was a protocol error or if the
1099   // transaction was empty.  This will leave the prefix set, the
1100   // pending hashes, and the prefix miss cache in place.
1101   if (!update_succeeded || !change_detected_) {
1102     // Track empty updates to answer questions at http://crbug.com/72216 .
1103     if (update_succeeded && !change_detected_)
1104       UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0);
1105     browse_store_->CancelUpdate();
1106     if (download_store_.get())
1107       download_store_->CancelUpdate();
1108     if (csd_whitelist_store_.get())
1109       csd_whitelist_store_->CancelUpdate();
1110     if (download_whitelist_store_.get())
1111       download_whitelist_store_->CancelUpdate();
1112     if (extension_blacklist_store_)
1113       extension_blacklist_store_->CancelUpdate();
1114     if (side_effect_free_whitelist_store_)
1115       side_effect_free_whitelist_store_->CancelUpdate();
1116     if (ip_blacklist_store_)
1117       ip_blacklist_store_->CancelUpdate();
1118     return;
1119   }
1120
1121   if (download_store_) {
1122     int64 size_bytes = UpdateHashPrefixStore(
1123         DownloadDBFilename(filename_base_),
1124         download_store_.get(),
1125         FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH);
1126     UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
1127                          static_cast<int>(size_bytes / 1024));
1128   }
1129
1130   UpdateBrowseStore();
1131   UpdateWhitelistStore(CsdWhitelistDBFilename(filename_base_),
1132                        csd_whitelist_store_.get(),
1133                        &csd_whitelist_);
1134   UpdateWhitelistStore(DownloadWhitelistDBFilename(filename_base_),
1135                        download_whitelist_store_.get(),
1136                        &download_whitelist_);
1137
1138   if (extension_blacklist_store_) {
1139     int64 size_bytes = UpdateHashPrefixStore(
1140         ExtensionBlacklistDBFilename(filename_base_),
1141         extension_blacklist_store_.get(),
1142         FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH);
1143     UMA_HISTOGRAM_COUNTS("SB2.ExtensionBlacklistKilobytes",
1144                          static_cast<int>(size_bytes / 1024));
1145   }
1146
1147   if (side_effect_free_whitelist_store_)
1148     UpdateSideEffectFreeWhitelistStore();
1149
1150   if (ip_blacklist_store_)
1151     UpdateIpBlacklistStore();
1152 }
1153
1154 void SafeBrowsingDatabaseNew::UpdateWhitelistStore(
1155     const base::FilePath& store_filename,
1156     SafeBrowsingStore* store,
1157     SBWhitelist* whitelist) {
1158   if (!store)
1159     return;
1160
1161   // Note: |builder| will not be empty.  The current data store implementation
1162   // stores all full-length hashes as both full and prefix hashes.
1163   safe_browsing::PrefixSetBuilder builder;
1164   std::vector<SBAddFullHash> full_hashes;
1165   if (!store->FinishUpdate(&builder, &full_hashes)) {
1166     RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH);
1167     WhitelistEverything(whitelist);
1168     return;
1169   }
1170
1171 #if defined(OS_MACOSX)
1172   base::mac::SetFileBackupExclusion(store_filename);
1173 #endif
1174
1175   LoadWhitelist(full_hashes, whitelist);
1176 }
1177
1178 int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore(
1179     const base::FilePath& store_filename,
1180     SafeBrowsingStore* store,
1181     FailureType failure_type) {
1182   // These results are not used after this call. Simply ignore the
1183   // returned value after FinishUpdate(...).
1184   safe_browsing::PrefixSetBuilder builder;
1185   std::vector<SBAddFullHash> add_full_hashes_result;
1186
1187   if (!store->FinishUpdate(&builder, &add_full_hashes_result))
1188     RecordFailure(failure_type);
1189
1190 #if defined(OS_MACOSX)
1191   base::mac::SetFileBackupExclusion(store_filename);
1192 #endif
1193
1194   return GetFileSizeOrZero(store_filename);
1195 }
1196
1197 void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
1198   // Measure the amount of IO during the filter build.
1199   base::IoCounters io_before, io_after;
1200   base::ProcessHandle handle = base::Process::Current().handle();
1201   scoped_ptr<base::ProcessMetrics> metric(
1202 #if !defined(OS_MACOSX)
1203       base::ProcessMetrics::CreateProcessMetrics(handle)
1204 #else
1205       // Getting stats only for the current process is enough, so NULL is fine.
1206       base::ProcessMetrics::CreateProcessMetrics(handle, NULL)
1207 #endif
1208   );
1209
1210   // IoCounters are currently not supported on Mac, and may not be
1211   // available for Linux, so we check the result and only show IO
1212   // stats if they are available.
1213   const bool got_counters = metric->GetIOCounters(&io_before);
1214
1215   const base::TimeTicks before = base::TimeTicks::Now();
1216
1217   // TODO(shess): Perhaps refactor to let builder accumulate full hashes on the
1218   // fly?  Other clients use the SBAddFullHash vector, but AFAICT they only use
1219   // the SBFullHash portion.  It would need an accessor on PrefixSet.
1220   safe_browsing::PrefixSetBuilder builder;
1221   std::vector<SBAddFullHash> add_full_hashes;
1222   if (!browse_store_->FinishUpdate(&builder, &add_full_hashes)) {
1223     RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
1224     return;
1225   }
1226
1227   std::vector<SBFullHash> full_hash_results;
1228   for (size_t i = 0; i < add_full_hashes.size(); ++i) {
1229     full_hash_results.push_back(add_full_hashes[i].full_hash);
1230   }
1231
1232   scoped_ptr<safe_browsing::PrefixSet>
1233       prefix_set(builder.GetPrefixSet(full_hash_results));
1234
1235   // Swap in the newly built filter.
1236   {
1237     base::AutoLock locked(lookup_lock_);
1238     browse_prefix_set_.swap(prefix_set);
1239   }
1240
1241   UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before);
1242
1243   // Persist the prefix set to disk.  Since only this thread changes
1244   // |browse_prefix_set_|, there is no need to lock.
1245   WritePrefixSet();
1246
1247   // Gather statistics.
1248   if (got_counters && metric->GetIOCounters(&io_after)) {
1249     UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes",
1250                          static_cast<int>(io_after.ReadTransferCount -
1251                                           io_before.ReadTransferCount) / 1024);
1252     UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes",
1253                          static_cast<int>(io_after.WriteTransferCount -
1254                                           io_before.WriteTransferCount) / 1024);
1255     UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
1256                          static_cast<int>(io_after.ReadOperationCount -
1257                                           io_before.ReadOperationCount));
1258     UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
1259                          static_cast<int>(io_after.WriteOperationCount -
1260                                           io_before.WriteOperationCount));
1261   }
1262
1263   const base::FilePath browse_filename = BrowseDBFilename(filename_base_);
1264   const int64 file_size = GetFileSizeOrZero(browse_filename);
1265   UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
1266                        static_cast<int>(file_size / 1024));
1267
1268 #if defined(OS_MACOSX)
1269   base::mac::SetFileBackupExclusion(browse_filename);
1270 #endif
1271 }
1272
1273 void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() {
1274   safe_browsing::PrefixSetBuilder builder;
1275   std::vector<SBAddFullHash> add_full_hashes_result;
1276
1277   if (!side_effect_free_whitelist_store_->FinishUpdate(
1278           &builder, &add_full_hashes_result)) {
1279     RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH);
1280     return;
1281   }
1282   scoped_ptr<safe_browsing::PrefixSet>
1283       prefix_set(builder.GetPrefixSetNoHashes());
1284
1285   // Swap in the newly built prefix set.
1286   {
1287     base::AutoLock locked(lookup_lock_);
1288     side_effect_free_whitelist_prefix_set_.swap(prefix_set);
1289   }
1290
1291   const base::FilePath side_effect_free_whitelist_filename =
1292       SideEffectFreeWhitelistDBFilename(filename_base_);
1293   const base::FilePath side_effect_free_whitelist_prefix_set_filename =
1294       PrefixSetForFilename(side_effect_free_whitelist_filename);
1295   const base::TimeTicks before = base::TimeTicks::Now();
1296   const bool write_ok = side_effect_free_whitelist_prefix_set_->WriteFile(
1297       side_effect_free_whitelist_prefix_set_filename);
1298   UMA_HISTOGRAM_TIMES("SB2.SideEffectFreePrefixSetWrite",
1299                       base::TimeTicks::Now() - before);
1300
1301   if (!write_ok)
1302     RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE);
1303
1304   // Gather statistics.
1305   int64 file_size = GetFileSizeOrZero(
1306       side_effect_free_whitelist_prefix_set_filename);
1307   UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistPrefixSetKilobytes",
1308                        static_cast<int>(file_size / 1024));
1309   file_size = GetFileSizeOrZero(side_effect_free_whitelist_filename);
1310   UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistDatabaseKilobytes",
1311                        static_cast<int>(file_size / 1024));
1312
1313 #if defined(OS_MACOSX)
1314   base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename);
1315   base::mac::SetFileBackupExclusion(
1316       side_effect_free_whitelist_prefix_set_filename);
1317 #endif
1318 }
1319
1320 void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() {
1321   // Note: prefixes will not be empty.  The current data store implementation
1322   // stores all full-length hashes as both full and prefix hashes.
1323   safe_browsing::PrefixSetBuilder builder;
1324   std::vector<SBAddFullHash> full_hashes;
1325   if (!ip_blacklist_store_->FinishUpdate(&builder, &full_hashes)) {
1326     RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH);
1327     LoadIpBlacklist(std::vector<SBAddFullHash>());  // Clear the list.
1328     return;
1329   }
1330
1331 #if defined(OS_MACOSX)
1332   base::mac::SetFileBackupExclusion(IpBlacklistDBFilename(filename_base_));
1333 #endif
1334
1335   LoadIpBlacklist(full_hashes);
1336 }
1337
1338 void SafeBrowsingDatabaseNew::HandleCorruptDatabase() {
1339   // Reset the database after the current task has unwound (but only
1340   // reset once within the scope of a given task).
1341   if (!reset_factory_.HasWeakPtrs()) {
1342     RecordFailure(FAILURE_DATABASE_CORRUPT);
1343     base::MessageLoop::current()->PostTask(FROM_HERE,
1344         base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase,
1345                    reset_factory_.GetWeakPtr()));
1346   }
1347 }
1348
1349 void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
1350   RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER);
1351   corruption_detected_ = true;  // Stop updating the database.
1352   ResetDatabase();
1353
1354   // NOTE(shess): ResetDatabase() should remove the corruption, so this should
1355   // only happen once.  If you are here because you are hitting this after a
1356   // restart, then I would be very interested in working with you to figure out
1357   // what is happening, since it may affect real users.
1358   DLOG(FATAL) << "SafeBrowsing database was corrupt and reset";
1359 }
1360
1361 // TODO(shess): I'm not clear why this code doesn't have any
1362 // real error-handling.
1363 void SafeBrowsingDatabaseNew::LoadPrefixSet() {
1364   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1365   DCHECK(!filename_base_.empty());
1366
1367   const base::FilePath browse_filename = BrowseDBFilename(filename_base_);
1368   const base::FilePath browse_prefix_set_filename =
1369       PrefixSetForFilename(browse_filename);
1370
1371   // Only use the prefix set if database is present and non-empty.
1372   if (!GetFileSizeOrZero(browse_filename))
1373     return;
1374
1375   // Cleanup any stale bloom filter (no longer used).
1376   // TODO(shess): Track existence to drive removal of this code?
1377   const base::FilePath bloom_filter_filename =
1378       BloomFilterForFilename(browse_filename);
1379   base::DeleteFile(bloom_filter_filename, false);
1380
1381   const base::TimeTicks before = base::TimeTicks::Now();
1382   browse_prefix_set_ = safe_browsing::PrefixSet::LoadFile(
1383       browse_prefix_set_filename);
1384   UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before);
1385
1386   if (!browse_prefix_set_.get())
1387     RecordFailure(FAILURE_BROWSE_PREFIX_SET_READ);
1388 }
1389
1390 bool SafeBrowsingDatabaseNew::Delete() {
1391   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1392   DCHECK(!filename_base_.empty());
1393
1394   // TODO(shess): This is a mess.  SafeBrowsingFileStore::Delete() closes the
1395   // store before calling DeleteStore().  DeleteStore() deletes transient files
1396   // in addition to the main file.  Probably all of these should be converted to
1397   // a helper which calls Delete() if the store exists, else DeleteStore() on
1398   // the generated filename.
1399
1400   // TODO(shess): Determine if the histograms are useful in any way.  I cannot
1401   // recall any action taken as a result of their values, in which case it might
1402   // make more sense to histogram an overall thumbs-up/-down and just dig deeper
1403   // if something looks wrong.
1404
1405   const bool r1 = browse_store_->Delete();
1406   if (!r1)
1407     RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1408
1409   const bool r2 = download_store_.get() ? download_store_->Delete() : true;
1410   if (!r2)
1411     RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1412
1413   const bool r3 = csd_whitelist_store_.get() ?
1414       csd_whitelist_store_->Delete() : true;
1415   if (!r3)
1416     RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1417
1418   const bool r4 = download_whitelist_store_.get() ?
1419       download_whitelist_store_->Delete() : true;
1420   if (!r4)
1421     RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1422
1423   const base::FilePath browse_filename = BrowseDBFilename(filename_base_);
1424   const base::FilePath bloom_filter_filename =
1425       BloomFilterForFilename(browse_filename);
1426   const bool r5 = base::DeleteFile(bloom_filter_filename, false);
1427   if (!r5)
1428     RecordFailure(FAILURE_DATABASE_FILTER_DELETE);
1429
1430   const base::FilePath browse_prefix_set_filename =
1431       PrefixSetForFilename(browse_filename);
1432   const bool r6 = base::DeleteFile(browse_prefix_set_filename, false);
1433   if (!r6)
1434     RecordFailure(FAILURE_BROWSE_PREFIX_SET_DELETE);
1435
1436   const base::FilePath extension_blacklist_filename =
1437       ExtensionBlacklistDBFilename(filename_base_);
1438   const bool r7 = base::DeleteFile(extension_blacklist_filename, false);
1439   if (!r7)
1440     RecordFailure(FAILURE_EXTENSION_BLACKLIST_DELETE);
1441
1442   const base::FilePath side_effect_free_whitelist_filename =
1443       SideEffectFreeWhitelistDBFilename(filename_base_);
1444   const bool r8 = base::DeleteFile(side_effect_free_whitelist_filename,
1445                                    false);
1446   if (!r8)
1447     RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE);
1448
1449   const base::FilePath side_effect_free_whitelist_prefix_set_filename =
1450       PrefixSetForFilename(side_effect_free_whitelist_filename);
1451   const bool r9 = base::DeleteFile(
1452       side_effect_free_whitelist_prefix_set_filename,
1453       false);
1454   if (!r9)
1455     RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE);
1456
1457   const bool r10 = base::DeleteFile(IpBlacklistDBFilename(filename_base_),
1458                                     false);
1459   if (!r10)
1460     RecordFailure(FAILURE_IP_BLACKLIST_DELETE);
1461
1462   return r1 && r2 && r3 && r4 && r5 && r6 && r7 && r8 && r9 && r10;
1463 }
1464
1465 void SafeBrowsingDatabaseNew::WritePrefixSet() {
1466   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1467
1468   if (!browse_prefix_set_.get())
1469     return;
1470
1471   const base::FilePath browse_filename = BrowseDBFilename(filename_base_);
1472   const base::FilePath browse_prefix_set_filename =
1473       PrefixSetForFilename(browse_filename);
1474
1475   const base::TimeTicks before = base::TimeTicks::Now();
1476   const bool write_ok = browse_prefix_set_->WriteFile(
1477       browse_prefix_set_filename);
1478   UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before);
1479
1480   const int64 file_size = GetFileSizeOrZero(browse_prefix_set_filename);
1481   UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes",
1482                        static_cast<int>(file_size / 1024));
1483
1484   if (!write_ok)
1485     RecordFailure(FAILURE_BROWSE_PREFIX_SET_WRITE);
1486
1487 #if defined(OS_MACOSX)
1488   base::mac::SetFileBackupExclusion(browse_prefix_set_filename);
1489 #endif
1490 }
1491
1492 void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist* whitelist) {
1493   base::AutoLock locked(lookup_lock_);
1494   whitelist->second = true;
1495   whitelist->first.clear();
1496 }
1497
1498 void SafeBrowsingDatabaseNew::LoadWhitelist(
1499     const std::vector<SBAddFullHash>& full_hashes,
1500     SBWhitelist* whitelist) {
1501   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1502   if (full_hashes.size() > kMaxWhitelistSize) {
1503     WhitelistEverything(whitelist);
1504     return;
1505   }
1506
1507   std::vector<SBFullHash> new_whitelist;
1508   new_whitelist.reserve(full_hashes.size());
1509   for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
1510        it != full_hashes.end(); ++it) {
1511     new_whitelist.push_back(it->full_hash);
1512   }
1513   std::sort(new_whitelist.begin(), new_whitelist.end(), SBFullHashLess);
1514
1515   SBFullHash kill_switch = SBFullHashForString(kWhitelistKillSwitchUrl);
1516   if (std::binary_search(new_whitelist.begin(), new_whitelist.end(),
1517                          kill_switch, SBFullHashLess)) {
1518     // The kill switch is whitelisted hence we whitelist all URLs.
1519     WhitelistEverything(whitelist);
1520   } else {
1521     base::AutoLock locked(lookup_lock_);
1522     whitelist->second = false;
1523     whitelist->first.swap(new_whitelist);
1524   }
1525 }
1526
1527 void SafeBrowsingDatabaseNew::LoadIpBlacklist(
1528     const std::vector<SBAddFullHash>& full_hashes) {
1529   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1530   IPBlacklist new_blacklist;
1531   for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
1532        it != full_hashes.end();
1533        ++it) {
1534     const char* full_hash = it->full_hash.full_hash;
1535     DCHECK_EQ(crypto::kSHA256Length, arraysize(it->full_hash.full_hash));
1536     // The format of the IP blacklist is:
1537     // SHA-1(IPv6 prefix) + uint8(prefix size) + 11 unused bytes.
1538     std::string hashed_ip_prefix(full_hash, base::kSHA1Length);
1539     size_t prefix_size = static_cast<uint8>(full_hash[base::kSHA1Length]);
1540     if (prefix_size > kMaxIpPrefixSize || prefix_size < kMinIpPrefixSize) {
1541       RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_INVALID);
1542       new_blacklist.clear();  // Load empty blacklist.
1543       break;
1544     }
1545
1546     // We precompute the mask for the given subnet size to speed up lookups.
1547     // Basically we need to create a 16B long string which has the highest
1548     // |size| bits sets to one.
1549     std::string mask(net::kIPv6AddressSize, '\0');
1550     mask.replace(0, prefix_size / 8, prefix_size / 8, '\xFF');
1551     if ((prefix_size % 8) != 0) {
1552       mask[prefix_size / 8] = 0xFF << (8 - (prefix_size % 8));
1553     }
1554     DVLOG(2) << "Inserting malicious IP: "
1555              << " raw:" << base::HexEncode(full_hash, crypto::kSHA256Length)
1556              << " mask:" << base::HexEncode(mask.data(), mask.size())
1557              << " prefix_size:" << prefix_size
1558              << " hashed_ip:" << base::HexEncode(hashed_ip_prefix.data(),
1559                                                  hashed_ip_prefix.size());
1560     new_blacklist[mask].insert(hashed_ip_prefix);
1561   }
1562
1563   base::AutoLock locked(lookup_lock_);
1564   ip_blacklist_.swap(new_blacklist);
1565 }
1566
1567 bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() {
1568   SBFullHash malware_kill_switch = SBFullHashForString(kMalwareIPKillSwitchUrl);
1569   std::vector<SBFullHash> full_hashes;
1570   full_hashes.push_back(malware_kill_switch);
1571   return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
1572 }
1573
1574 bool SafeBrowsingDatabaseNew::IsCsdWhitelistKillSwitchOn() {
1575   return csd_whitelist_.second;
1576 }