Upstream version 7.36.149.0
[platform/framework/web/crosswalk.git] / src / chrome / browser / safe_browsing / safe_browsing_database.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/safe_browsing/safe_browsing_database.h"
6
7 #include <algorithm>
8 #include <iterator>
9
10 #include "base/bind.h"
11 #include "base/file_util.h"
12 #include "base/message_loop/message_loop.h"
13 #include "base/metrics/histogram.h"
14 #include "base/metrics/stats_counters.h"
15 #include "base/process/process.h"
16 #include "base/process/process_metrics.h"
17 #include "base/sha1.h"
18 #include "base/strings/string_number_conversions.h"
19 #include "base/strings/stringprintf.h"
20 #include "base/time/time.h"
21 #include "chrome/browser/safe_browsing/prefix_set.h"
22 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
23 #include "content/public/browser/browser_thread.h"
24 #include "crypto/sha2.h"
25 #include "net/base/net_util.h"
26 #include "url/gurl.h"
27
28 #if defined(OS_MACOSX)
29 #include "base/mac/mac_util.h"
30 #endif
31
32 using content::BrowserThread;
33
34 namespace {
35
36 // Filename suffix for the bloom filter.
37 const base::FilePath::CharType kBloomFilterFile[] =
38     FILE_PATH_LITERAL(" Filter 2");
39 // Filename suffix for the prefix set.
40 const base::FilePath::CharType kPrefixSetFile[] =
41     FILE_PATH_LITERAL(" Prefix Set");
42 // Filename suffix for download store.
43 const base::FilePath::CharType kDownloadDBFile[] =
44     FILE_PATH_LITERAL(" Download");
45 // Filename suffix for client-side phishing detection whitelist store.
46 const base::FilePath::CharType kCsdWhitelistDBFile[] =
47     FILE_PATH_LITERAL(" Csd Whitelist");
48 // Filename suffix for the download whitelist store.
49 const base::FilePath::CharType kDownloadWhitelistDBFile[] =
50     FILE_PATH_LITERAL(" Download Whitelist");
51 // Filename suffix for the extension blacklist store.
52 const base::FilePath::CharType kExtensionBlacklistDBFile[] =
53     FILE_PATH_LITERAL(" Extension Blacklist");
54 // Filename suffix for the side-effect free whitelist store.
55 const base::FilePath::CharType kSideEffectFreeWhitelistDBFile[] =
56     FILE_PATH_LITERAL(" Side-Effect Free Whitelist");
57 // Filename suffix for the csd malware IP blacklist store.
58 const base::FilePath::CharType kIPBlacklistDBFile[] =
59     FILE_PATH_LITERAL(" IP Blacklist");
60
61 // Filename suffix for browse store.
62 // TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win.
63 // Unfortunately, to change the name implies lots of transition code
64 // for little benefit.  If/when file formats change (say to put all
65 // the data in one file), that would be a convenient point to rectify
66 // this.
67 const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
68
69 // The maximum staleness for a cached entry.
70 const int kMaxStalenessMinutes = 45;
71
72 // Maximum number of entries we allow in any of the whitelists.
73 // If a whitelist on disk contains more entries then all lookups to
74 // the whitelist will be considered a match.
75 const size_t kMaxWhitelistSize = 5000;
76
77 // If the hash of this exact expression is on a whitelist then all
78 // lookups to this whitelist will be considered a match.
79 const char kWhitelistKillSwitchUrl[] =
80     "sb-ssl.google.com/safebrowsing/csd/killswitch";  // Don't change this!
81
82 // If the hash of this exact expression is on a whitelist then the
83 // malware IP blacklisting feature will be disabled in csd.
84 // Don't change this!
85 const char kMalwareIPKillSwitchUrl[] =
86     "sb-ssl.google.com/safebrowsing/csd/killswitch_malware";
87
88 const size_t kMaxIpPrefixSize = 128;
89 const size_t kMinIpPrefixSize = 1;
90
91 // To save space, the incoming |chunk_id| and |list_id| are combined
92 // into an |encoded_chunk_id| for storage by shifting the |list_id|
93 // into the low-order bits.  These functions decode that information.
94 // TODO(lzheng): It was reasonable when database is saved in sqlite, but
95 // there should be better ways to save chunk_id and list_id after we use
96 // SafeBrowsingStoreFile.
97 int GetListIdBit(const int encoded_chunk_id) {
98   return encoded_chunk_id & 1;
99 }
100 int DecodeChunkId(int encoded_chunk_id) {
101   return encoded_chunk_id >> 1;
102 }
103 int EncodeChunkId(const int chunk, const int list_id) {
104   DCHECK_NE(list_id, safe_browsing_util::INVALID);
105   return chunk << 1 | list_id % 2;
106 }
107
108 // Generate the set of full hashes to check for |url|.  If
109 // |include_whitelist_hashes| is true we will generate additional path-prefixes
110 // to match against the csd whitelist.  E.g., if the path-prefix /foo is on the
111 // whitelist it should also match /foo/bar which is not the case for all the
112 // other lists.  We'll also always add a pattern for the empty path.
113 // TODO(shess): This function is almost the same as
114 // |CompareFullHashes()| in safe_browsing_util.cc, except that code
115 // does an early exit on match.  Since match should be the infrequent
116 // case (phishing or malware found), consider combining this function
117 // with that one.
118 void BrowseFullHashesToCheck(const GURL& url,
119                              bool include_whitelist_hashes,
120                              std::vector<SBFullHash>* full_hashes) {
121   std::vector<std::string> hosts;
122   if (url.HostIsIPAddress()) {
123     hosts.push_back(url.host());
124   } else {
125     safe_browsing_util::GenerateHostsToCheck(url, &hosts);
126   }
127
128   std::vector<std::string> paths;
129   safe_browsing_util::GeneratePathsToCheck(url, &paths);
130
131   for (size_t i = 0; i < hosts.size(); ++i) {
132     for (size_t j = 0; j < paths.size(); ++j) {
133       const std::string& path = paths[j];
134       full_hashes->push_back(SBFullHashForString(hosts[i] + path));
135
136       // We may have /foo as path-prefix in the whitelist which should
137       // also match with /foo/bar and /foo?bar.  Hence, for every path
138       // that ends in '/' we also add the path without the slash.
139       if (include_whitelist_hashes &&
140           path.size() > 1 &&
141           path[path.size() - 1] == '/') {
142         full_hashes->push_back(
143             SBFullHashForString(hosts[i] + path.substr(0, path.size() - 1)));
144       }
145     }
146   }
147 }
148
149 // Get the prefixes matching the download |urls|.
150 void GetDownloadUrlPrefixes(const std::vector<GURL>& urls,
151                             std::vector<SBPrefix>* prefixes) {
152   std::vector<SBFullHash> full_hashes;
153   for (size_t i = 0; i < urls.size(); ++i)
154     BrowseFullHashesToCheck(urls[i], false, &full_hashes);
155
156   for (size_t i = 0; i < full_hashes.size(); ++i)
157     prefixes->push_back(full_hashes[i].prefix);
158 }
159
160 // Helper function to compare addprefixes in |store| with |prefixes|.
161 // The |list_bit| indicates which list (url or hash) to compare.
162 //
163 // Returns true if there is a match, |*prefix_hits| (if non-NULL) will contain
164 // the actual matching prefixes.
165 bool MatchAddPrefixes(SafeBrowsingStore* store,
166                       int list_bit,
167                       const std::vector<SBPrefix>& prefixes,
168                       std::vector<SBPrefix>* prefix_hits) {
169   prefix_hits->clear();
170   bool found_match = false;
171
172   SBAddPrefixes add_prefixes;
173   store->GetAddPrefixes(&add_prefixes);
174   for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
175        iter != add_prefixes.end(); ++iter) {
176     for (size_t j = 0; j < prefixes.size(); ++j) {
177       const SBPrefix& prefix = prefixes[j];
178       if (prefix == iter->prefix &&
179           GetListIdBit(iter->chunk_id) == list_bit) {
180         prefix_hits->push_back(prefix);
181         found_match = true;
182       }
183     }
184   }
185   return found_match;
186 }
187
188 // Find the entries in |full_hashes| with prefix in |prefix_hits|, and
189 // add them to |full_hits| if not expired.  "Not expired" is when
190 // either |last_update| was recent enough, or the item has been
191 // received recently enough.  Expired items are not deleted because a
192 // future update may make them acceptable again.
193 //
194 // For efficiency reasons the code walks |prefix_hits| and
195 // |full_hashes| in parallel, so they must be sorted by prefix.
196 void GetCachedFullHashesForBrowse(
197     const std::vector<SBPrefix>& prefix_hits,
198     const std::vector<SBFullHashCached>& full_hashes,
199     std::vector<SBFullHashResult>* full_hits,
200     base::Time last_update) {
201   const base::Time expire_time =
202       base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes);
203
204   std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin();
205   std::vector<SBFullHashCached>::const_iterator hiter = full_hashes.begin();
206
207   while (piter != prefix_hits.end() && hiter != full_hashes.end()) {
208     if (*piter < hiter->hash.prefix) {
209       ++piter;
210     } else if (hiter->hash.prefix < *piter) {
211       ++hiter;
212     } else {
213       if (expire_time < last_update ||
214           expire_time.ToTimeT() < hiter->received) {
215         SBFullHashResult result;
216         result.list_id = hiter->list_id;
217         result.hash = hiter->hash;
218         full_hits->push_back(result);
219       }
220
221       // Only increment |hiter|, |piter| might have multiple hits.
222       ++hiter;
223     }
224   }
225 }
226
227 // This function generates a chunk range string for |chunks|. It
228 // outputs one chunk range string per list and writes it to the
229 // |list_ranges| vector.  We expect |list_ranges| to already be of the
230 // right size.  E.g., if |chunks| contains chunks with two different
231 // list ids then |list_ranges| must contain two elements.
232 void GetChunkRanges(const std::vector<int>& chunks,
233                     std::vector<std::string>* list_ranges) {
234   // Since there are 2 possible list ids, there must be exactly two
235   // list ranges.  Even if the chunk data should only contain one
236   // line, this code has to somehow handle corruption.
237   DCHECK_EQ(2U, list_ranges->size());
238
239   std::vector<std::vector<int> > decoded_chunks(list_ranges->size());
240   for (std::vector<int>::const_iterator iter = chunks.begin();
241        iter != chunks.end(); ++iter) {
242     int mod_list_id = GetListIdBit(*iter);
243     DCHECK_GE(mod_list_id, 0);
244     DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size());
245     decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter));
246   }
247   for (size_t i = 0; i < decoded_chunks.size(); ++i) {
248     ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i]));
249   }
250 }
251
252 // Helper function to create chunk range lists for Browse related
253 // lists.
254 void UpdateChunkRanges(SafeBrowsingStore* store,
255                        const std::vector<std::string>& listnames,
256                        std::vector<SBListChunkRanges>* lists) {
257   if (!store)
258     return;
259
260   DCHECK_GT(listnames.size(), 0U);
261   DCHECK_LE(listnames.size(), 2U);
262   std::vector<int> add_chunks;
263   std::vector<int> sub_chunks;
264   store->GetAddChunks(&add_chunks);
265   store->GetSubChunks(&sub_chunks);
266
267   // Always decode 2 ranges, even if only the first one is expected.
268   // The loop below will only load as many into |lists| as |listnames|
269   // indicates.
270   std::vector<std::string> adds(2);
271   std::vector<std::string> subs(2);
272   GetChunkRanges(add_chunks, &adds);
273   GetChunkRanges(sub_chunks, &subs);
274
275   for (size_t i = 0; i < listnames.size(); ++i) {
276     const std::string& listname = listnames[i];
277     DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2,
278               static_cast<int>(i % 2));
279     DCHECK_NE(safe_browsing_util::GetListId(listname),
280               safe_browsing_util::INVALID);
281     lists->push_back(SBListChunkRanges(listname));
282     lists->back().adds.swap(adds[i]);
283     lists->back().subs.swap(subs[i]);
284   }
285 }
286
287 void UpdateChunkRangesForLists(SafeBrowsingStore* store,
288                                const std::string& listname0,
289                                const std::string& listname1,
290                                std::vector<SBListChunkRanges>* lists) {
291   std::vector<std::string> listnames;
292   listnames.push_back(listname0);
293   listnames.push_back(listname1);
294   UpdateChunkRanges(store, listnames, lists);
295 }
296
297 void UpdateChunkRangesForList(SafeBrowsingStore* store,
298                               const std::string& listname,
299                               std::vector<SBListChunkRanges>* lists) {
300   UpdateChunkRanges(store, std::vector<std::string>(1, listname), lists);
301 }
302
303 // Order |SBFullHashCached| items on the prefix part.
304 bool SBFullHashCachedPrefixLess(const SBFullHashCached& a,
305                                 const SBFullHashCached& b) {
306   return a.hash.prefix < b.hash.prefix;
307 }
308
309 // This code always checks for non-zero file size.  This helper makes
310 // that less verbose.
311 int64 GetFileSizeOrZero(const base::FilePath& file_path) {
312   int64 size_64;
313   if (!base::GetFileSize(file_path, &size_64))
314     return 0;
315   return size_64;
316 }
317
318 }  // namespace
319
320 // The default SafeBrowsingDatabaseFactory.
321 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
322  public:
323   virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
324       bool enable_download_protection,
325       bool enable_client_side_whitelist,
326       bool enable_download_whitelist,
327       bool enable_extension_blacklist,
328       bool enable_side_effect_free_whitelist,
329       bool enable_ip_blacklist) OVERRIDE {
330     return new SafeBrowsingDatabaseNew(
331         new SafeBrowsingStoreFile,
332         enable_download_protection ? new SafeBrowsingStoreFile : NULL,
333         enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL,
334         enable_download_whitelist ? new SafeBrowsingStoreFile : NULL,
335         enable_extension_blacklist ? new SafeBrowsingStoreFile : NULL,
336         enable_side_effect_free_whitelist ? new SafeBrowsingStoreFile : NULL,
337         enable_ip_blacklist ? new SafeBrowsingStoreFile : NULL);
338   }
339
340   SafeBrowsingDatabaseFactoryImpl() { }
341
342  private:
343   DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl);
344 };
345
346 // static
347 SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL;
348
349 // Factory method, non-thread safe. Caller has to make sure this s called
350 // on SafeBrowsing Thread.
351 // TODO(shess): There's no need for a factory any longer.  Convert
352 // SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create()
353 // callers just construct things directly.
354 SafeBrowsingDatabase* SafeBrowsingDatabase::Create(
355     bool enable_download_protection,
356     bool enable_client_side_whitelist,
357     bool enable_download_whitelist,
358     bool enable_extension_blacklist,
359     bool enable_side_effect_free_whitelist,
360     bool enable_ip_blacklist) {
361   if (!factory_)
362     factory_ = new SafeBrowsingDatabaseFactoryImpl();
363   return factory_->CreateSafeBrowsingDatabase(
364       enable_download_protection,
365       enable_client_side_whitelist,
366       enable_download_whitelist,
367       enable_extension_blacklist,
368       enable_side_effect_free_whitelist,
369       enable_ip_blacklist);
370 }
371
372 SafeBrowsingDatabase::~SafeBrowsingDatabase() {
373 }
374
375 // static
376 base::FilePath SafeBrowsingDatabase::BrowseDBFilename(
377     const base::FilePath& db_base_filename) {
378   return base::FilePath(db_base_filename.value() + kBrowseDBFile);
379 }
380
381 // static
382 base::FilePath SafeBrowsingDatabase::DownloadDBFilename(
383     const base::FilePath& db_base_filename) {
384   return base::FilePath(db_base_filename.value() + kDownloadDBFile);
385 }
386
387 // static
388 base::FilePath SafeBrowsingDatabase::BloomFilterForFilename(
389     const base::FilePath& db_filename) {
390   return base::FilePath(db_filename.value() + kBloomFilterFile);
391 }
392
393 // static
394 base::FilePath SafeBrowsingDatabase::PrefixSetForFilename(
395     const base::FilePath& db_filename) {
396   return base::FilePath(db_filename.value() + kPrefixSetFile);
397 }
398
399 // static
400 base::FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename(
401     const base::FilePath& db_filename) {
402   return base::FilePath(db_filename.value() + kCsdWhitelistDBFile);
403 }
404
405 // static
406 base::FilePath SafeBrowsingDatabase::DownloadWhitelistDBFilename(
407     const base::FilePath& db_filename) {
408   return base::FilePath(db_filename.value() + kDownloadWhitelistDBFile);
409 }
410
411 // static
412 base::FilePath SafeBrowsingDatabase::ExtensionBlacklistDBFilename(
413     const base::FilePath& db_filename) {
414   return base::FilePath(db_filename.value() + kExtensionBlacklistDBFile);
415 }
416
417 // static
418 base::FilePath SafeBrowsingDatabase::SideEffectFreeWhitelistDBFilename(
419     const base::FilePath& db_filename) {
420   return base::FilePath(db_filename.value() + kSideEffectFreeWhitelistDBFile);
421 }
422
423 // static
424 base::FilePath SafeBrowsingDatabase::IpBlacklistDBFilename(
425     const base::FilePath& db_filename) {
426   return base::FilePath(db_filename.value() + kIPBlacklistDBFile);
427 }
428
429 SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) {
430   if (list_id == safe_browsing_util::PHISH ||
431       list_id == safe_browsing_util::MALWARE) {
432     return browse_store_.get();
433   } else if (list_id == safe_browsing_util::BINURL) {
434     return download_store_.get();
435   } else if (list_id == safe_browsing_util::CSDWHITELIST) {
436     return csd_whitelist_store_.get();
437   } else if (list_id == safe_browsing_util::DOWNLOADWHITELIST) {
438     return download_whitelist_store_.get();
439   } else if (list_id == safe_browsing_util::EXTENSIONBLACKLIST) {
440     return extension_blacklist_store_.get();
441   } else if (list_id == safe_browsing_util::SIDEEFFECTFREEWHITELIST) {
442     return side_effect_free_whitelist_store_.get();
443   } else if (list_id == safe_browsing_util::IPBLACKLIST) {
444     return ip_blacklist_store_.get();
445   }
446   return NULL;
447 }
448
449 // static
450 void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) {
451   UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type,
452                             FAILURE_DATABASE_MAX);
453 }
454
455 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew()
456     : creation_loop_(base::MessageLoop::current()),
457       browse_store_(new SafeBrowsingStoreFile),
458       reset_factory_(this),
459       corruption_detected_(false),
460       change_detected_(false) {
461   DCHECK(browse_store_.get());
462   DCHECK(!download_store_.get());
463   DCHECK(!csd_whitelist_store_.get());
464   DCHECK(!download_whitelist_store_.get());
465   DCHECK(!extension_blacklist_store_.get());
466   DCHECK(!side_effect_free_whitelist_store_.get());
467   DCHECK(!ip_blacklist_store_.get());
468 }
469
470 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew(
471     SafeBrowsingStore* browse_store,
472     SafeBrowsingStore* download_store,
473     SafeBrowsingStore* csd_whitelist_store,
474     SafeBrowsingStore* download_whitelist_store,
475     SafeBrowsingStore* extension_blacklist_store,
476     SafeBrowsingStore* side_effect_free_whitelist_store,
477     SafeBrowsingStore* ip_blacklist_store)
478     : creation_loop_(base::MessageLoop::current()),
479       browse_store_(browse_store),
480       download_store_(download_store),
481       csd_whitelist_store_(csd_whitelist_store),
482       download_whitelist_store_(download_whitelist_store),
483       extension_blacklist_store_(extension_blacklist_store),
484       side_effect_free_whitelist_store_(side_effect_free_whitelist_store),
485       ip_blacklist_store_(ip_blacklist_store),
486       reset_factory_(this),
487       corruption_detected_(false) {
488   DCHECK(browse_store_.get());
489 }
490
491 SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() {
492   // The DCHECK is disabled due to crbug.com/338486 .
493   // DCHECK_EQ(creation_loop_, base::MessageLoop::current());
494 }
495
496 void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) {
497   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
498   // Ensure we haven't been run before.
499   DCHECK(browse_filename_.empty());
500   DCHECK(download_filename_.empty());
501   DCHECK(csd_whitelist_filename_.empty());
502   DCHECK(download_whitelist_filename_.empty());
503   DCHECK(extension_blacklist_filename_.empty());
504   DCHECK(side_effect_free_whitelist_filename_.empty());
505   DCHECK(ip_blacklist_filename_.empty());
506
507   browse_filename_ = BrowseDBFilename(filename_base);
508   browse_prefix_set_filename_ = PrefixSetForFilename(browse_filename_);
509
510   browse_store_->Init(
511       browse_filename_,
512       base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
513                  base::Unretained(this)));
514   DVLOG(1) << "Init browse store: " << browse_filename_.value();
515
516   {
517     // NOTE: There is no need to grab the lock in this function, since
518     // until it returns, there are no pointers to this class on other
519     // threads.  Then again, that means there is no possibility of
520     // contention on the lock...
521     base::AutoLock locked(lookup_lock_);
522     cached_browse_hashes_.clear();
523     LoadPrefixSet();
524   }
525
526   if (download_store_.get()) {
527     download_filename_ = DownloadDBFilename(filename_base);
528     download_store_->Init(
529         download_filename_,
530         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
531                    base::Unretained(this)));
532     DVLOG(1) << "Init download store: " << download_filename_.value();
533   }
534
535   if (csd_whitelist_store_.get()) {
536     csd_whitelist_filename_ = CsdWhitelistDBFilename(filename_base);
537     csd_whitelist_store_->Init(
538         csd_whitelist_filename_,
539         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
540                    base::Unretained(this)));
541     DVLOG(1) << "Init csd whitelist store: " << csd_whitelist_filename_.value();
542     std::vector<SBAddFullHash> full_hashes;
543     if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) {
544       LoadWhitelist(full_hashes, &csd_whitelist_);
545     } else {
546       WhitelistEverything(&csd_whitelist_);
547     }
548   } else {
549     WhitelistEverything(&csd_whitelist_);  // Just to be safe.
550   }
551
552   if (download_whitelist_store_.get()) {
553     download_whitelist_filename_ = DownloadWhitelistDBFilename(filename_base);
554     download_whitelist_store_->Init(
555         download_whitelist_filename_,
556         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
557                    base::Unretained(this)));
558     DVLOG(1) << "Init download whitelist store: "
559              << download_whitelist_filename_.value();
560     std::vector<SBAddFullHash> full_hashes;
561     if (download_whitelist_store_->GetAddFullHashes(&full_hashes)) {
562       LoadWhitelist(full_hashes, &download_whitelist_);
563     } else {
564       WhitelistEverything(&download_whitelist_);
565     }
566   } else {
567     WhitelistEverything(&download_whitelist_);  // Just to be safe.
568   }
569
570   if (extension_blacklist_store_.get()) {
571     extension_blacklist_filename_ = ExtensionBlacklistDBFilename(filename_base);
572     extension_blacklist_store_->Init(
573         extension_blacklist_filename_,
574         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
575                    base::Unretained(this)));
576     DVLOG(1) << "Init extension blacklist store: "
577              << extension_blacklist_filename_.value();
578   }
579
580   if (side_effect_free_whitelist_store_.get()) {
581     side_effect_free_whitelist_filename_ =
582         SideEffectFreeWhitelistDBFilename(filename_base);
583     side_effect_free_whitelist_prefix_set_filename_ =
584         PrefixSetForFilename(side_effect_free_whitelist_filename_);
585     side_effect_free_whitelist_store_->Init(
586         side_effect_free_whitelist_filename_,
587         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
588                    base::Unretained(this)));
589     DVLOG(1) << "Init side-effect free whitelist store: "
590              << side_effect_free_whitelist_filename_.value();
591
592     // If there is no database, the filter cannot be used.
593     base::File::Info db_info;
594     if (base::GetFileInfo(side_effect_free_whitelist_filename_, &db_info)
595         && db_info.size != 0) {
596       const base::TimeTicks before = base::TimeTicks::Now();
597       side_effect_free_whitelist_prefix_set_ =
598           safe_browsing::PrefixSet::LoadFile(
599               side_effect_free_whitelist_prefix_set_filename_);
600       DVLOG(1) << "SafeBrowsingDatabaseNew read side-effect free whitelist "
601                << "prefix set in "
602                << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
603       UMA_HISTOGRAM_TIMES("SB2.SideEffectFreeWhitelistPrefixSetLoad",
604                           base::TimeTicks::Now() - before);
605       if (!side_effect_free_whitelist_prefix_set_.get())
606         RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ);
607     }
608   } else {
609     // Delete any files of the side-effect free sidelist that may be around
610     // from when it was previously enabled.
611     SafeBrowsingStoreFile::DeleteStore(
612         SideEffectFreeWhitelistDBFilename(filename_base));
613   }
614
615   if (ip_blacklist_store_.get()) {
616     ip_blacklist_filename_ = IpBlacklistDBFilename(filename_base);
617     ip_blacklist_store_->Init(
618         ip_blacklist_filename_,
619         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
620                    base::Unretained(this)));
621     DVLOG(1) << "SafeBrowsingDatabaseNew read ip blacklist: "
622              << ip_blacklist_filename_.value();
623     std::vector<SBAddFullHash> full_hashes;
624     if (ip_blacklist_store_->GetAddFullHashes(&full_hashes)) {
625       LoadIpBlacklist(full_hashes);
626     } else {
627       DVLOG(1) << "Unable to load full hashes from the IP blacklist.";
628       LoadIpBlacklist(std::vector<SBAddFullHash>());  // Clear the list.
629     }
630   }
631 }
632
633 bool SafeBrowsingDatabaseNew::ResetDatabase() {
634   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
635
636   // Delete files on disk.
637   // TODO(shess): Hard to see where one might want to delete without a
638   // reset.  Perhaps inline |Delete()|?
639   if (!Delete())
640     return false;
641
642   // Reset objects in memory.
643   {
644     base::AutoLock locked(lookup_lock_);
645     cached_browse_hashes_.clear();
646     prefix_miss_cache_.clear();
647     browse_prefix_set_.reset();
648     side_effect_free_whitelist_prefix_set_.reset();
649     ip_blacklist_.clear();
650   }
651   // Wants to acquire the lock itself.
652   WhitelistEverything(&csd_whitelist_);
653   WhitelistEverything(&download_whitelist_);
654   return true;
655 }
656
657 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
658     const GURL& url,
659     std::vector<SBPrefix>* prefix_hits,
660     std::vector<SBFullHashResult>* cached_hits,
661     base::Time last_update) {
662   // Clear the results first.
663   prefix_hits->clear();
664   cached_hits->clear();
665
666   std::vector<SBFullHash> full_hashes;
667   BrowseFullHashesToCheck(url, false, &full_hashes);
668   if (full_hashes.empty())
669     return false;
670
671   // This function is called on the I/O thread, prevent changes to
672   // filter and caches.
673   base::AutoLock locked(lookup_lock_);
674
675   // |browse_prefix_set_| is empty until it is either read from disk, or the
676   // first update populates it.  Bail out without a hit if not yet
677   // available.
678   if (!browse_prefix_set_.get())
679     return false;
680
681   size_t miss_count = 0;
682   for (size_t i = 0; i < full_hashes.size(); ++i) {
683     if (browse_prefix_set_->Exists(full_hashes[i])) {
684       const SBPrefix prefix = full_hashes[i].prefix;
685       prefix_hits->push_back(prefix);
686       if (prefix_miss_cache_.count(prefix) > 0)
687         ++miss_count;
688     }
689   }
690
691   // If all the prefixes are cached as 'misses', don't issue a GetHash.
692   if (miss_count == prefix_hits->size())
693     return false;
694
695   // Find matching cached gethash responses.
696   std::sort(prefix_hits->begin(), prefix_hits->end());
697   GetCachedFullHashesForBrowse(*prefix_hits, cached_browse_hashes_,
698                                cached_hits, last_update);
699
700   return true;
701 }
702
703 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
704     const std::vector<GURL>& urls,
705     std::vector<SBPrefix>* prefix_hits) {
706   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
707
708   // Ignore this check when download checking is not enabled.
709   if (!download_store_.get())
710     return false;
711
712   std::vector<SBPrefix> prefixes;
713   GetDownloadUrlPrefixes(urls, &prefixes);
714   return MatchAddPrefixes(download_store_.get(),
715                           safe_browsing_util::BINURL % 2,
716                           prefixes,
717                           prefix_hits);
718 }
719
720 bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) {
721   // This method is theoretically thread-safe but we expect all calls to
722   // originate from the IO thread.
723   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
724   std::vector<SBFullHash> full_hashes;
725   BrowseFullHashesToCheck(url, true, &full_hashes);
726   return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
727 }
728
729 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedUrl(const GURL& url) {
730   std::vector<SBFullHash> full_hashes;
731   BrowseFullHashesToCheck(url, true, &full_hashes);
732   return ContainsWhitelistedHashes(download_whitelist_, full_hashes);
733 }
734
735 bool SafeBrowsingDatabaseNew::ContainsExtensionPrefixes(
736     const std::vector<SBPrefix>& prefixes,
737     std::vector<SBPrefix>* prefix_hits) {
738   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
739   if (!extension_blacklist_store_)
740     return false;
741
742   return MatchAddPrefixes(extension_blacklist_store_.get(),
743                           safe_browsing_util::EXTENSIONBLACKLIST % 2,
744                           prefixes,
745                           prefix_hits);
746 }
747
748 bool SafeBrowsingDatabaseNew::ContainsSideEffectFreeWhitelistUrl(
749     const GURL& url) {
750   std::string host;
751   std::string path;
752   std::string query;
753   safe_browsing_util::CanonicalizeUrl(url, &host, &path, &query);
754   std::string url_to_check = host + path;
755   if (!query.empty())
756     url_to_check +=  "?" + query;
757   SBFullHash full_hash = SBFullHashForString(url_to_check);
758
759   // This function can be called on any thread, so lock against any changes
760   base::AutoLock locked(lookup_lock_);
761
762   // |side_effect_free_whitelist_prefix_set_| is empty until it is either read
763   // from disk, or the first update populates it.  Bail out without a hit if
764   // not yet available.
765   if (!side_effect_free_whitelist_prefix_set_.get())
766     return false;
767
768   return side_effect_free_whitelist_prefix_set_->Exists(full_hash);
769 }
770
771 bool SafeBrowsingDatabaseNew::ContainsMalwareIP(const std::string& ip_address) {
772   net::IPAddressNumber ip_number;
773   if (!net::ParseIPLiteralToNumber(ip_address, &ip_number)) {
774     DVLOG(2) << "Unable to parse IP address: '" << ip_address << "'";
775     return false;
776   }
777   if (ip_number.size() == net::kIPv4AddressSize) {
778     ip_number = net::ConvertIPv4NumberToIPv6Number(ip_number);
779   }
780   if (ip_number.size() != net::kIPv6AddressSize) {
781     DVLOG(2) << "Unable to convert IPv4 address to IPv6: '"
782              << ip_address << "'";
783     return false;  // better safe than sorry.
784   }
785   // This function can be called from any thread.
786   base::AutoLock locked(lookup_lock_);
787   for (IPBlacklist::const_iterator it = ip_blacklist_.begin();
788        it != ip_blacklist_.end();
789        ++it) {
790     const std::string& mask = it->first;
791     DCHECK_EQ(mask.size(), ip_number.size());
792     std::string subnet(net::kIPv6AddressSize, '\0');
793     for (size_t i = 0; i < net::kIPv6AddressSize; ++i) {
794       subnet[i] = ip_number[i] & mask[i];
795     }
796     const std::string hash = base::SHA1HashString(subnet);
797     DVLOG(2) << "Lookup Malware IP: "
798              << " ip:" << ip_address
799              << " mask:" << base::HexEncode(mask.data(), mask.size())
800              << " subnet:" << base::HexEncode(subnet.data(), subnet.size())
801              << " hash:" << base::HexEncode(hash.data(), hash.size());
802     if (it->second.count(hash) > 0) {
803       return true;
804     }
805   }
806   return false;
807 }
808
809 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedString(
810     const std::string& str) {
811   std::vector<SBFullHash> hashes;
812   hashes.push_back(SBFullHashForString(str));
813   return ContainsWhitelistedHashes(download_whitelist_, hashes);
814 }
815
816 bool SafeBrowsingDatabaseNew::ContainsWhitelistedHashes(
817     const SBWhitelist& whitelist,
818     const std::vector<SBFullHash>& hashes) {
819   base::AutoLock l(lookup_lock_);
820   if (whitelist.second)
821     return true;
822   for (std::vector<SBFullHash>::const_iterator it = hashes.begin();
823        it != hashes.end(); ++it) {
824     if (std::binary_search(whitelist.first.begin(), whitelist.first.end(),
825                            *it, SBFullHashLess)) {
826       return true;
827     }
828   }
829   return false;
830 }
831
832 // Helper to insert entries for all of the prefixes or full hashes in
833 // |entry| into the store.
834 void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host,
835                                         const SBEntry* entry, int list_id) {
836   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
837
838   SafeBrowsingStore* store = GetStore(list_id);
839   if (!store) return;
840
841   STATS_COUNTER("SB.HostInsert", 1);
842   const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
843   const int count = entry->prefix_count();
844
845   DCHECK(!entry->IsSub());
846   if (!count) {
847     // No prefixes, use host instead.
848     STATS_COUNTER("SB.PrefixAdd", 1);
849     store->WriteAddPrefix(encoded_chunk_id, host);
850   } else if (entry->IsPrefix()) {
851     // Prefixes only.
852     for (int i = 0; i < count; i++) {
853       const SBPrefix prefix = entry->PrefixAt(i);
854       STATS_COUNTER("SB.PrefixAdd", 1);
855       store->WriteAddPrefix(encoded_chunk_id, prefix);
856     }
857   } else {
858     // Full hashes only.
859     for (int i = 0; i < count; ++i) {
860       const SBFullHash full_hash = entry->FullHashAt(i);
861
862       STATS_COUNTER("SB.PrefixAddFull", 1);
863       store->WriteAddHash(encoded_chunk_id, full_hash);
864     }
865   }
866 }
867
868 // Helper to iterate over all the entries in the hosts in |chunks| and
869 // add them to the store.
870 void SafeBrowsingDatabaseNew::InsertAddChunks(
871     const safe_browsing_util::ListType list_id,
872     const SBChunkList& chunks) {
873   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
874
875   SafeBrowsingStore* store = GetStore(list_id);
876   if (!store) return;
877
878   for (SBChunkList::const_iterator citer = chunks.begin();
879        citer != chunks.end(); ++citer) {
880     const int chunk_id = citer->chunk_number;
881
882     // The server can give us a chunk that we already have because
883     // it's part of a range.  Don't add it again.
884     const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
885     if (store->CheckAddChunk(encoded_chunk_id))
886       continue;
887
888     store->SetAddChunk(encoded_chunk_id);
889     for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
890          hiter != citer->hosts.end(); ++hiter) {
891       // NOTE: Could pass |encoded_chunk_id|, but then inserting add
892       // chunks would look different from inserting sub chunks.
893       InsertAdd(chunk_id, hiter->host, hiter->entry, list_id);
894     }
895   }
896 }
897
898 // Helper to insert entries for all of the prefixes or full hashes in
899 // |entry| into the store.
900 void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host,
901                                         const SBEntry* entry, int list_id) {
902   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
903
904   SafeBrowsingStore* store = GetStore(list_id);
905   if (!store) return;
906
907   STATS_COUNTER("SB.HostDelete", 1);
908   const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
909   const int count = entry->prefix_count();
910
911   DCHECK(entry->IsSub());
912   if (!count) {
913     // No prefixes, use host instead.
914     STATS_COUNTER("SB.PrefixSub", 1);
915     const int add_chunk_id = EncodeChunkId(entry->chunk_id(), list_id);
916     store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, host);
917   } else if (entry->IsPrefix()) {
918     // Prefixes only.
919     for (int i = 0; i < count; i++) {
920       const SBPrefix prefix = entry->PrefixAt(i);
921       const int add_chunk_id =
922           EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
923
924       STATS_COUNTER("SB.PrefixSub", 1);
925       store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix);
926     }
927   } else {
928     // Full hashes only.
929     for (int i = 0; i < count; ++i) {
930       const SBFullHash full_hash = entry->FullHashAt(i);
931       const int add_chunk_id =
932           EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
933
934       STATS_COUNTER("SB.PrefixSubFull", 1);
935       store->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash);
936     }
937   }
938 }
939
940 // Helper to iterate over all the entries in the hosts in |chunks| and
941 // add them to the store.
942 void SafeBrowsingDatabaseNew::InsertSubChunks(
943     safe_browsing_util::ListType list_id,
944     const SBChunkList& chunks) {
945   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
946
947   SafeBrowsingStore* store = GetStore(list_id);
948   if (!store) return;
949
950   for (SBChunkList::const_iterator citer = chunks.begin();
951        citer != chunks.end(); ++citer) {
952     const int chunk_id = citer->chunk_number;
953
954     // The server can give us a chunk that we already have because
955     // it's part of a range.  Don't add it again.
956     const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
957     if (store->CheckSubChunk(encoded_chunk_id))
958       continue;
959
960     store->SetSubChunk(encoded_chunk_id);
961     for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
962          hiter != citer->hosts.end(); ++hiter) {
963       InsertSub(chunk_id, hiter->host, hiter->entry, list_id);
964     }
965   }
966 }
967
968 void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name,
969                                            const SBChunkList& chunks) {
970   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
971
972   if (corruption_detected_ || chunks.empty())
973     return;
974
975   const base::TimeTicks before = base::TimeTicks::Now();
976
977   const safe_browsing_util::ListType list_id =
978       safe_browsing_util::GetListId(list_name);
979   DVLOG(2) << list_name << ": " << list_id;
980
981   SafeBrowsingStore* store = GetStore(list_id);
982   if (!store) return;
983
984   change_detected_ = true;
985
986   store->BeginChunk();
987   if (chunks.front().is_add) {
988     InsertAddChunks(list_id, chunks);
989   } else {
990     InsertSubChunks(list_id, chunks);
991   }
992   store->FinishChunk();
993
994   UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before);
995 }
996
997 void SafeBrowsingDatabaseNew::DeleteChunks(
998     const std::vector<SBChunkDelete>& chunk_deletes) {
999   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1000
1001   if (corruption_detected_ || chunk_deletes.empty())
1002     return;
1003
1004   const std::string& list_name = chunk_deletes.front().list_name;
1005   const safe_browsing_util::ListType list_id =
1006       safe_browsing_util::GetListId(list_name);
1007
1008   SafeBrowsingStore* store = GetStore(list_id);
1009   if (!store) return;
1010
1011   change_detected_ = true;
1012
1013   for (size_t i = 0; i < chunk_deletes.size(); ++i) {
1014     std::vector<int> chunk_numbers;
1015     RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers);
1016     for (size_t j = 0; j < chunk_numbers.size(); ++j) {
1017       const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id);
1018       if (chunk_deletes[i].is_sub_del)
1019         store->DeleteSubChunk(encoded_chunk_id);
1020       else
1021         store->DeleteAddChunk(encoded_chunk_id);
1022     }
1023   }
1024 }
1025
1026 void SafeBrowsingDatabaseNew::CacheHashResults(
1027     const std::vector<SBPrefix>& prefixes,
1028     const std::vector<SBFullHashResult>& full_hits) {
1029   // This is called on the I/O thread, lock against updates.
1030   base::AutoLock locked(lookup_lock_);
1031
1032   if (full_hits.empty()) {
1033     prefix_miss_cache_.insert(prefixes.begin(), prefixes.end());
1034     return;
1035   }
1036
1037   const base::Time now = base::Time::Now();
1038   const size_t orig_size = cached_browse_hashes_.size();
1039   for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin();
1040        iter != full_hits.end(); ++iter) {
1041     if (iter->list_id == safe_browsing_util::MALWARE ||
1042         iter->list_id == safe_browsing_util::PHISH) {
1043       SBFullHashCached cached_hash;
1044       cached_hash.hash = iter->hash;
1045       cached_hash.list_id = iter->list_id;
1046       cached_hash.received = static_cast<int>(now.ToTimeT());
1047       cached_browse_hashes_.push_back(cached_hash);
1048     }
1049   }
1050
1051   // Sort new entries then merge with the previously-sorted entries.
1052   std::vector<SBFullHashCached>::iterator
1053       orig_end = cached_browse_hashes_.begin() + orig_size;
1054   std::sort(orig_end, cached_browse_hashes_.end(), SBFullHashCachedPrefixLess);
1055   std::inplace_merge(cached_browse_hashes_.begin(),
1056                      orig_end, cached_browse_hashes_.end(),
1057                      SBFullHashCachedPrefixLess);
1058 }
1059
1060 bool SafeBrowsingDatabaseNew::UpdateStarted(
1061     std::vector<SBListChunkRanges>* lists) {
1062   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1063   DCHECK(lists);
1064
1065   // If |BeginUpdate()| fails, reset the database.
1066   if (!browse_store_->BeginUpdate()) {
1067     RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN);
1068     HandleCorruptDatabase();
1069     return false;
1070   }
1071
1072   if (download_store_.get() && !download_store_->BeginUpdate()) {
1073     RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN);
1074     HandleCorruptDatabase();
1075     return false;
1076   }
1077
1078   if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) {
1079     RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
1080     HandleCorruptDatabase();
1081     return false;
1082   }
1083
1084   if (download_whitelist_store_.get() &&
1085       !download_whitelist_store_->BeginUpdate()) {
1086     RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
1087     HandleCorruptDatabase();
1088     return false;
1089   }
1090
1091   if (extension_blacklist_store_ &&
1092       !extension_blacklist_store_->BeginUpdate()) {
1093     RecordFailure(FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN);
1094     HandleCorruptDatabase();
1095     return false;
1096   }
1097
1098   if (side_effect_free_whitelist_store_ &&
1099       !side_effect_free_whitelist_store_->BeginUpdate()) {
1100     RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN);
1101     HandleCorruptDatabase();
1102     return false;
1103   }
1104
1105   if (ip_blacklist_store_ && !ip_blacklist_store_->BeginUpdate()) {
1106     RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN);
1107     HandleCorruptDatabase();
1108     return false;
1109   }
1110
1111   UpdateChunkRangesForLists(browse_store_.get(),
1112                             safe_browsing_util::kMalwareList,
1113                             safe_browsing_util::kPhishingList,
1114                             lists);
1115
1116   // NOTE(shess): |download_store_| used to contain kBinHashList, which has been
1117   // deprecated.  Code to delete the list from the store shows ~15k hits/day as
1118   // of Feb 2014, so it has been removed.  Everything _should_ be resilient to
1119   // extra data of that sort.
1120   UpdateChunkRangesForList(download_store_.get(),
1121                            safe_browsing_util::kBinUrlList, lists);
1122
1123   UpdateChunkRangesForList(csd_whitelist_store_.get(),
1124                            safe_browsing_util::kCsdWhiteList, lists);
1125
1126   UpdateChunkRangesForList(download_whitelist_store_.get(),
1127                            safe_browsing_util::kDownloadWhiteList, lists);
1128
1129   UpdateChunkRangesForList(extension_blacklist_store_.get(),
1130                            safe_browsing_util::kExtensionBlacklist, lists);
1131
1132   UpdateChunkRangesForList(side_effect_free_whitelist_store_.get(),
1133                            safe_browsing_util::kSideEffectFreeWhitelist, lists);
1134
1135   UpdateChunkRangesForList(ip_blacklist_store_.get(),
1136                            safe_browsing_util::kIPBlacklist, lists);
1137
1138   corruption_detected_ = false;
1139   change_detected_ = false;
1140   return true;
1141 }
1142
1143 void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) {
1144   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1145
1146   // The update may have failed due to corrupt storage (for instance,
1147   // an excessive number of invalid add_chunks and sub_chunks).
1148   // Double-check that the databases are valid.
1149   // TODO(shess): Providing a checksum for the add_chunk and sub_chunk
1150   // sections would allow throwing a corruption error in
1151   // UpdateStarted().
1152   if (!update_succeeded) {
1153     if (!browse_store_->CheckValidity())
1154       DLOG(ERROR) << "Safe-browsing browse database corrupt.";
1155
1156     if (download_store_.get() && !download_store_->CheckValidity())
1157       DLOG(ERROR) << "Safe-browsing download database corrupt.";
1158
1159     if (csd_whitelist_store_.get() && !csd_whitelist_store_->CheckValidity())
1160       DLOG(ERROR) << "Safe-browsing csd whitelist database corrupt.";
1161
1162     if (download_whitelist_store_.get() &&
1163         !download_whitelist_store_->CheckValidity()) {
1164       DLOG(ERROR) << "Safe-browsing download whitelist database corrupt.";
1165     }
1166
1167     if (extension_blacklist_store_ &&
1168         !extension_blacklist_store_->CheckValidity()) {
1169       DLOG(ERROR) << "Safe-browsing extension blacklist database corrupt.";
1170     }
1171
1172     if (side_effect_free_whitelist_store_ &&
1173         !side_effect_free_whitelist_store_->CheckValidity()) {
1174       DLOG(ERROR) << "Safe-browsing side-effect free whitelist database "
1175                   << "corrupt.";
1176     }
1177
1178     if (ip_blacklist_store_ && !ip_blacklist_store_->CheckValidity()) {
1179       DLOG(ERROR) << "Safe-browsing IP blacklist database corrupt.";
1180     }
1181   }
1182
1183   if (corruption_detected_)
1184     return;
1185
1186   // Unroll the transaction if there was a protocol error or if the
1187   // transaction was empty.  This will leave the prefix set, the
1188   // pending hashes, and the prefix miss cache in place.
1189   if (!update_succeeded || !change_detected_) {
1190     // Track empty updates to answer questions at http://crbug.com/72216 .
1191     if (update_succeeded && !change_detected_)
1192       UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0);
1193     browse_store_->CancelUpdate();
1194     if (download_store_.get())
1195       download_store_->CancelUpdate();
1196     if (csd_whitelist_store_.get())
1197       csd_whitelist_store_->CancelUpdate();
1198     if (download_whitelist_store_.get())
1199       download_whitelist_store_->CancelUpdate();
1200     if (extension_blacklist_store_)
1201       extension_blacklist_store_->CancelUpdate();
1202     if (side_effect_free_whitelist_store_)
1203       side_effect_free_whitelist_store_->CancelUpdate();
1204     if (ip_blacklist_store_)
1205       ip_blacklist_store_->CancelUpdate();
1206     return;
1207   }
1208
1209   if (download_store_) {
1210     int64 size_bytes = UpdateHashPrefixStore(
1211         download_filename_,
1212         download_store_.get(),
1213         FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH);
1214     UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
1215                          static_cast<int>(size_bytes / 1024));
1216   }
1217
1218   UpdateBrowseStore();
1219   UpdateWhitelistStore(csd_whitelist_filename_,
1220                        csd_whitelist_store_.get(),
1221                        &csd_whitelist_);
1222   UpdateWhitelistStore(download_whitelist_filename_,
1223                        download_whitelist_store_.get(),
1224                        &download_whitelist_);
1225
1226   if (extension_blacklist_store_) {
1227     int64 size_bytes = UpdateHashPrefixStore(
1228         extension_blacklist_filename_,
1229         extension_blacklist_store_.get(),
1230         FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH);
1231     UMA_HISTOGRAM_COUNTS("SB2.ExtensionBlacklistKilobytes",
1232                          static_cast<int>(size_bytes / 1024));
1233   }
1234
1235   if (side_effect_free_whitelist_store_)
1236     UpdateSideEffectFreeWhitelistStore();
1237
1238   if (ip_blacklist_store_)
1239     UpdateIpBlacklistStore();
1240 }
1241
1242 void SafeBrowsingDatabaseNew::UpdateWhitelistStore(
1243     const base::FilePath& store_filename,
1244     SafeBrowsingStore* store,
1245     SBWhitelist* whitelist) {
1246   if (!store)
1247     return;
1248
1249   // Note: |builder| will not be empty.  The current data store implementation
1250   // stores all full-length hashes as both full and prefix hashes.
1251   safe_browsing::PrefixSetBuilder builder;
1252   std::vector<SBAddFullHash> full_hashes;
1253   if (!store->FinishUpdate(&builder, &full_hashes)) {
1254     RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH);
1255     WhitelistEverything(whitelist);
1256     return;
1257   }
1258
1259 #if defined(OS_MACOSX)
1260   base::mac::SetFileBackupExclusion(store_filename);
1261 #endif
1262
1263   LoadWhitelist(full_hashes, whitelist);
1264 }
1265
1266 int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore(
1267     const base::FilePath& store_filename,
1268     SafeBrowsingStore* store,
1269     FailureType failure_type) {
1270   // These results are not used after this call. Simply ignore the
1271   // returned value after FinishUpdate(...).
1272   safe_browsing::PrefixSetBuilder builder;
1273   std::vector<SBAddFullHash> add_full_hashes_result;
1274
1275   if (!store->FinishUpdate(&builder, &add_full_hashes_result))
1276     RecordFailure(failure_type);
1277
1278 #if defined(OS_MACOSX)
1279   base::mac::SetFileBackupExclusion(store_filename);
1280 #endif
1281
1282   return GetFileSizeOrZero(store_filename);
1283 }
1284
1285 void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
1286   // Measure the amount of IO during the filter build.
1287   base::IoCounters io_before, io_after;
1288   base::ProcessHandle handle = base::Process::Current().handle();
1289   scoped_ptr<base::ProcessMetrics> metric(
1290 #if !defined(OS_MACOSX)
1291       base::ProcessMetrics::CreateProcessMetrics(handle)
1292 #else
1293       // Getting stats only for the current process is enough, so NULL is fine.
1294       base::ProcessMetrics::CreateProcessMetrics(handle, NULL)
1295 #endif
1296   );
1297
1298   // IoCounters are currently not supported on Mac, and may not be
1299   // available for Linux, so we check the result and only show IO
1300   // stats if they are available.
1301   const bool got_counters = metric->GetIOCounters(&io_before);
1302
1303   const base::TimeTicks before = base::TimeTicks::Now();
1304
1305   // TODO(shess): Perhaps refactor to let builder accumulate full hashes on the
1306   // fly?  Other clients use the SBAddFullHash vector, but AFAICT they only use
1307   // the SBFullHash portion.  It would need an accessor on PrefixSet.
1308   safe_browsing::PrefixSetBuilder builder;
1309   std::vector<SBAddFullHash> add_full_hashes;
1310   if (!browse_store_->FinishUpdate(&builder, &add_full_hashes)) {
1311     RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
1312     return;
1313   }
1314
1315   std::vector<SBFullHash> full_hash_results;
1316   for (size_t i = 0; i < add_full_hashes.size(); ++i) {
1317     full_hash_results.push_back(add_full_hashes[i].full_hash);
1318   }
1319
1320   scoped_ptr<safe_browsing::PrefixSet>
1321       prefix_set(builder.GetPrefixSet(full_hash_results));
1322
1323   // Swap in the newly built filter and cache.
1324   {
1325     base::AutoLock locked(lookup_lock_);
1326
1327     // TODO(shess): If |CacheHashResults()| is posted between the
1328     // earlier lock and this clear, those pending hashes will be lost.
1329     // It could be fixed by only removing hashes which were collected
1330     // at the earlier point.  I believe that is fail-safe as-is (the
1331     // hash will be fetched again).
1332     cached_browse_hashes_.clear();
1333     prefix_miss_cache_.clear();
1334     browse_prefix_set_.swap(prefix_set);
1335   }
1336
1337   DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in "
1338            << (base::TimeTicks::Now() - before).InMilliseconds()
1339            << " ms total.";
1340   UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before);
1341
1342   // Persist the prefix set to disk.  Since only this thread changes
1343   // |browse_prefix_set_|, there is no need to lock.
1344   WritePrefixSet();
1345
1346   // Gather statistics.
1347   if (got_counters && metric->GetIOCounters(&io_after)) {
1348     UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes",
1349                          static_cast<int>(io_after.ReadTransferCount -
1350                                           io_before.ReadTransferCount) / 1024);
1351     UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes",
1352                          static_cast<int>(io_after.WriteTransferCount -
1353                                           io_before.WriteTransferCount) / 1024);
1354     UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
1355                          static_cast<int>(io_after.ReadOperationCount -
1356                                           io_before.ReadOperationCount));
1357     UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
1358                          static_cast<int>(io_after.WriteOperationCount -
1359                                           io_before.WriteOperationCount));
1360   }
1361
1362   int64 file_size = GetFileSizeOrZero(browse_prefix_set_filename_);
1363   UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes",
1364                        static_cast<int>(file_size / 1024));
1365   file_size = GetFileSizeOrZero(browse_filename_);
1366   UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
1367                        static_cast<int>(file_size / 1024));
1368
1369 #if defined(OS_MACOSX)
1370   base::mac::SetFileBackupExclusion(browse_filename_);
1371 #endif
1372 }
1373
1374 void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() {
1375   safe_browsing::PrefixSetBuilder builder;
1376   std::vector<SBAddFullHash> add_full_hashes_result;
1377
1378   if (!side_effect_free_whitelist_store_->FinishUpdate(
1379           &builder, &add_full_hashes_result)) {
1380     RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH);
1381     return;
1382   }
1383   scoped_ptr<safe_browsing::PrefixSet>
1384       prefix_set(builder.GetPrefixSetNoHashes());
1385
1386   // Swap in the newly built prefix set.
1387   {
1388     base::AutoLock locked(lookup_lock_);
1389     side_effect_free_whitelist_prefix_set_.swap(prefix_set);
1390   }
1391
1392   const base::TimeTicks before = base::TimeTicks::Now();
1393   const bool write_ok = side_effect_free_whitelist_prefix_set_->WriteFile(
1394       side_effect_free_whitelist_prefix_set_filename_);
1395   DVLOG(1) << "SafeBrowsingDatabaseNew wrote side-effect free whitelist prefix "
1396            << "set in " << (base::TimeTicks::Now() - before).InMilliseconds()
1397            << " ms";
1398   UMA_HISTOGRAM_TIMES("SB2.SideEffectFreePrefixSetWrite",
1399                       base::TimeTicks::Now() - before);
1400
1401   if (!write_ok)
1402     RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE);
1403
1404   // Gather statistics.
1405   int64 file_size = GetFileSizeOrZero(
1406       side_effect_free_whitelist_prefix_set_filename_);
1407   UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistPrefixSetKilobytes",
1408                        static_cast<int>(file_size / 1024));
1409   file_size = GetFileSizeOrZero(side_effect_free_whitelist_filename_);
1410   UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistDatabaseKilobytes",
1411                        static_cast<int>(file_size / 1024));
1412
1413 #if defined(OS_MACOSX)
1414   base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename_);
1415   base::mac::SetFileBackupExclusion(
1416       side_effect_free_whitelist_prefix_set_filename_);
1417 #endif
1418 }
1419
1420 void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() {
1421   // Note: prefixes will not be empty.  The current data store implementation
1422   // stores all full-length hashes as both full and prefix hashes.
1423   safe_browsing::PrefixSetBuilder builder;
1424   std::vector<SBAddFullHash> full_hashes;
1425   if (!ip_blacklist_store_->FinishUpdate(&builder, &full_hashes)) {
1426     RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH);
1427     LoadIpBlacklist(std::vector<SBAddFullHash>());  // Clear the list.
1428     return;
1429   }
1430
1431 #if defined(OS_MACOSX)
1432   base::mac::SetFileBackupExclusion(ip_blacklist_filename_);
1433 #endif
1434
1435   LoadIpBlacklist(full_hashes);
1436 }
1437
1438 void SafeBrowsingDatabaseNew::HandleCorruptDatabase() {
1439   // Reset the database after the current task has unwound (but only
1440   // reset once within the scope of a given task).
1441   if (!reset_factory_.HasWeakPtrs()) {
1442     RecordFailure(FAILURE_DATABASE_CORRUPT);
1443     base::MessageLoop::current()->PostTask(FROM_HERE,
1444         base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase,
1445                    reset_factory_.GetWeakPtr()));
1446   }
1447 }
1448
1449 void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
1450   RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER);
1451   corruption_detected_ = true;  // Stop updating the database.
1452   ResetDatabase();
1453   DLOG(FATAL) << "SafeBrowsing database was corrupt and reset";
1454 }
1455
1456 // TODO(shess): I'm not clear why this code doesn't have any
1457 // real error-handling.
1458 void SafeBrowsingDatabaseNew::LoadPrefixSet() {
1459   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1460   DCHECK(!browse_prefix_set_filename_.empty());
1461
1462   // If there is no database, the filter cannot be used.
1463   base::File::Info db_info;
1464   if (!base::GetFileInfo(browse_filename_, &db_info) || db_info.size == 0)
1465     return;
1466
1467   // Cleanup any stale bloom filter (no longer used).
1468   // TODO(shess): Track failure to delete?
1469   base::FilePath bloom_filter_filename =
1470       BloomFilterForFilename(browse_filename_);
1471   base::DeleteFile(bloom_filter_filename, false);
1472
1473   const base::TimeTicks before = base::TimeTicks::Now();
1474   browse_prefix_set_ = safe_browsing::PrefixSet::LoadFile(
1475       browse_prefix_set_filename_);
1476   DVLOG(1) << "SafeBrowsingDatabaseNew read prefix set in "
1477            << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
1478   UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before);
1479
1480   if (!browse_prefix_set_.get())
1481     RecordFailure(FAILURE_BROWSE_PREFIX_SET_READ);
1482 }
1483
1484 bool SafeBrowsingDatabaseNew::Delete() {
1485   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1486
1487   const bool r1 = browse_store_->Delete();
1488   if (!r1)
1489     RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1490
1491   const bool r2 = download_store_.get() ? download_store_->Delete() : true;
1492   if (!r2)
1493     RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1494
1495   const bool r3 = csd_whitelist_store_.get() ?
1496       csd_whitelist_store_->Delete() : true;
1497   if (!r3)
1498     RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1499
1500   const bool r4 = download_whitelist_store_.get() ?
1501       download_whitelist_store_->Delete() : true;
1502   if (!r4)
1503     RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1504
1505   base::FilePath bloom_filter_filename =
1506       BloomFilterForFilename(browse_filename_);
1507   const bool r5 = base::DeleteFile(bloom_filter_filename, false);
1508   if (!r5)
1509     RecordFailure(FAILURE_DATABASE_FILTER_DELETE);
1510
1511   const bool r6 = base::DeleteFile(browse_prefix_set_filename_, false);
1512   if (!r6)
1513     RecordFailure(FAILURE_BROWSE_PREFIX_SET_DELETE);
1514
1515   const bool r7 = base::DeleteFile(extension_blacklist_filename_, false);
1516   if (!r7)
1517     RecordFailure(FAILURE_EXTENSION_BLACKLIST_DELETE);
1518
1519   const bool r8 = base::DeleteFile(side_effect_free_whitelist_filename_,
1520                                     false);
1521   if (!r8)
1522     RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE);
1523
1524   const bool r9 = base::DeleteFile(
1525       side_effect_free_whitelist_prefix_set_filename_,
1526       false);
1527   if (!r9)
1528     RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE);
1529
1530   const bool r10 = base::DeleteFile(ip_blacklist_filename_, false);
1531   if (!r10)
1532     RecordFailure(FAILURE_IP_BLACKLIST_DELETE);
1533
1534   return r1 && r2 && r3 && r4 && r5 && r6 && r7 && r8 && r9 && r10;
1535 }
1536
1537 void SafeBrowsingDatabaseNew::WritePrefixSet() {
1538   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1539
1540   if (!browse_prefix_set_.get())
1541     return;
1542
1543   const base::TimeTicks before = base::TimeTicks::Now();
1544   const bool write_ok = browse_prefix_set_->WriteFile(
1545       browse_prefix_set_filename_);
1546   DVLOG(1) << "SafeBrowsingDatabaseNew wrote prefix set in "
1547            << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
1548   UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before);
1549
1550   if (!write_ok)
1551     RecordFailure(FAILURE_BROWSE_PREFIX_SET_WRITE);
1552
1553 #if defined(OS_MACOSX)
1554   base::mac::SetFileBackupExclusion(browse_prefix_set_filename_);
1555 #endif
1556 }
1557
1558 void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist* whitelist) {
1559   base::AutoLock locked(lookup_lock_);
1560   whitelist->second = true;
1561   whitelist->first.clear();
1562 }
1563
1564 void SafeBrowsingDatabaseNew::LoadWhitelist(
1565     const std::vector<SBAddFullHash>& full_hashes,
1566     SBWhitelist* whitelist) {
1567   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1568   if (full_hashes.size() > kMaxWhitelistSize) {
1569     WhitelistEverything(whitelist);
1570     return;
1571   }
1572
1573   std::vector<SBFullHash> new_whitelist;
1574   new_whitelist.reserve(full_hashes.size());
1575   for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
1576        it != full_hashes.end(); ++it) {
1577     new_whitelist.push_back(it->full_hash);
1578   }
1579   std::sort(new_whitelist.begin(), new_whitelist.end(), SBFullHashLess);
1580
1581   SBFullHash kill_switch = SBFullHashForString(kWhitelistKillSwitchUrl);
1582   if (std::binary_search(new_whitelist.begin(), new_whitelist.end(),
1583                          kill_switch, SBFullHashLess)) {
1584     // The kill switch is whitelisted hence we whitelist all URLs.
1585     WhitelistEverything(whitelist);
1586   } else {
1587     base::AutoLock locked(lookup_lock_);
1588     whitelist->second = false;
1589     whitelist->first.swap(new_whitelist);
1590   }
1591 }
1592
1593 void SafeBrowsingDatabaseNew::LoadIpBlacklist(
1594     const std::vector<SBAddFullHash>& full_hashes) {
1595   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1596   IPBlacklist new_blacklist;
1597   DVLOG(2) << "Writing IP blacklist of size: " << full_hashes.size();
1598   for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
1599        it != full_hashes.end();
1600        ++it) {
1601     const char* full_hash = it->full_hash.full_hash;
1602     DCHECK_EQ(crypto::kSHA256Length, arraysize(it->full_hash.full_hash));
1603     // The format of the IP blacklist is:
1604     // SHA-1(IPv6 prefix) + uint8(prefix size) + 11 unused bytes.
1605     std::string hashed_ip_prefix(full_hash, base::kSHA1Length);
1606     size_t prefix_size = static_cast<uint8>(full_hash[base::kSHA1Length]);
1607     if (prefix_size > kMaxIpPrefixSize || prefix_size < kMinIpPrefixSize) {
1608       DVLOG(2) << "Invalid IP prefix size in IP blacklist: " << prefix_size;
1609       RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_INVALID);
1610       new_blacklist.clear();  // Load empty blacklist.
1611       break;
1612     }
1613
1614     // We precompute the mask for the given subnet size to speed up lookups.
1615     // Basically we need to create a 16B long string which has the highest
1616     // |size| bits sets to one.
1617     std::string mask(net::kIPv6AddressSize, '\0');
1618     mask.replace(0, prefix_size / 8, prefix_size / 8, '\xFF');
1619     if ((prefix_size % 8) != 0) {
1620       mask[prefix_size / 8] = 0xFF << (8 - (prefix_size % 8));
1621     }
1622     DVLOG(2) << "Inserting malicious IP: "
1623              << " raw:" << base::HexEncode(full_hash, crypto::kSHA256Length)
1624              << " mask:" << base::HexEncode(mask.data(), mask.size())
1625              << " prefix_size:" << prefix_size
1626              << " hashed_ip:" << base::HexEncode(hashed_ip_prefix.data(),
1627                                                  hashed_ip_prefix.size());
1628     new_blacklist[mask].insert(hashed_ip_prefix);
1629   }
1630
1631   base::AutoLock locked(lookup_lock_);
1632   ip_blacklist_.swap(new_blacklist);
1633 }
1634
1635 bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() {
1636   SBFullHash malware_kill_switch = SBFullHashForString(kMalwareIPKillSwitchUrl);
1637   std::vector<SBFullHash> full_hashes;
1638   full_hashes.push_back(malware_kill_switch);
1639   return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
1640 }