785ceca7b162730cb474ffe82a067677cca5270f
[platform/framework/web/crosswalk.git] / src / chrome / browser / safe_browsing / download_protection_service.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/safe_browsing/download_protection_service.h"
6
7 #include "base/bind.h"
8 #include "base/compiler_specific.h"
9 #include "base/format_macros.h"
10 #include "base/memory/scoped_ptr.h"
11 #include "base/memory/weak_ptr.h"
12 #include "base/metrics/histogram.h"
13 #include "base/metrics/sparse_histogram.h"
14 #include "base/sequenced_task_runner_helpers.h"
15 #include "base/stl_util.h"
16 #include "base/strings/string_number_conversions.h"
17 #include "base/strings/string_util.h"
18 #include "base/strings/stringprintf.h"
19 #include "base/threading/sequenced_worker_pool.h"
20 #include "base/time/time.h"
21 #include "chrome/browser/history/history_service.h"
22 #include "chrome/browser/history/history_service_factory.h"
23 #include "chrome/browser/safe_browsing/binary_feature_extractor.h"
24 #include "chrome/browser/safe_browsing/download_feedback_service.h"
25 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
26 #include "chrome/browser/safe_browsing/sandboxed_zip_analyzer.h"
27 #include "chrome/browser/ui/browser.h"
28 #include "chrome/browser/ui/browser_list.h"
29 #include "chrome/common/safe_browsing/csd.pb.h"
30 #include "chrome/common/safe_browsing/download_protection_util.h"
31 #include "chrome/common/safe_browsing/zip_analyzer.h"
32 #include "chrome/common/url_constants.h"
33 #include "content/public/browser/browser_thread.h"
34 #include "content/public/browser/download_item.h"
35 #include "content/public/browser/page_navigator.h"
36 #include "google_apis/google_api_keys.h"
37 #include "net/base/escape.h"
38 #include "net/base/load_flags.h"
39 #include "net/cert/x509_cert_types.h"
40 #include "net/cert/x509_certificate.h"
41 #include "net/http/http_status_code.h"
42 #include "net/url_request/url_fetcher.h"
43 #include "net/url_request/url_fetcher_delegate.h"
44 #include "net/url_request/url_request_context_getter.h"
45 #include "net/url_request/url_request_status.h"
46
47 using content::BrowserThread;
48
49 namespace {
50 static const int64 kDownloadRequestTimeoutMs = 7000;
51 }  // namespace
52
53 namespace safe_browsing {
54
55 const char DownloadProtectionService::kDownloadRequestUrl[] =
56     "https://sb-ssl.google.com/safebrowsing/clientreport/download";
57
58 namespace {
59 // List of extensions for which we track some UMA stats.
60 enum MaliciousExtensionType {
61   EXTENSION_EXE,
62   EXTENSION_MSI,
63   EXTENSION_CAB,
64   EXTENSION_SYS,
65   EXTENSION_SCR,
66   EXTENSION_DRV,
67   EXTENSION_BAT,
68   EXTENSION_ZIP,
69   EXTENSION_RAR,
70   EXTENSION_DLL,
71   EXTENSION_PIF,
72   EXTENSION_COM,
73   EXTENSION_JAR,
74   EXTENSION_CLASS,
75   EXTENSION_PDF,
76   EXTENSION_VB,
77   EXTENSION_REG,
78   EXTENSION_GRP,
79   EXTENSION_OTHER,  // Groups all other extensions into one bucket.
80   EXTENSION_CRX,
81   EXTENSION_APK,
82   EXTENSION_DMG,
83   EXTENSION_PKG,
84   EXTENSION_TORRENT,
85   EXTENSION_MAX,
86 };
87
88 MaliciousExtensionType GetExtensionType(const base::FilePath& f) {
89   if (f.MatchesExtension(FILE_PATH_LITERAL(".exe"))) return EXTENSION_EXE;
90   if (f.MatchesExtension(FILE_PATH_LITERAL(".msi"))) return EXTENSION_MSI;
91   if (f.MatchesExtension(FILE_PATH_LITERAL(".cab"))) return EXTENSION_CAB;
92   if (f.MatchesExtension(FILE_PATH_LITERAL(".sys"))) return EXTENSION_SYS;
93   if (f.MatchesExtension(FILE_PATH_LITERAL(".scr"))) return EXTENSION_SCR;
94   if (f.MatchesExtension(FILE_PATH_LITERAL(".drv"))) return EXTENSION_DRV;
95   if (f.MatchesExtension(FILE_PATH_LITERAL(".bat"))) return EXTENSION_BAT;
96   if (f.MatchesExtension(FILE_PATH_LITERAL(".zip"))) return EXTENSION_ZIP;
97   if (f.MatchesExtension(FILE_PATH_LITERAL(".rar"))) return EXTENSION_RAR;
98   if (f.MatchesExtension(FILE_PATH_LITERAL(".dll"))) return EXTENSION_DLL;
99   if (f.MatchesExtension(FILE_PATH_LITERAL(".pif"))) return EXTENSION_PIF;
100   if (f.MatchesExtension(FILE_PATH_LITERAL(".com"))) return EXTENSION_COM;
101   if (f.MatchesExtension(FILE_PATH_LITERAL(".jar"))) return EXTENSION_JAR;
102   if (f.MatchesExtension(FILE_PATH_LITERAL(".class"))) return EXTENSION_CLASS;
103   if (f.MatchesExtension(FILE_PATH_LITERAL(".pdf"))) return EXTENSION_PDF;
104   if (f.MatchesExtension(FILE_PATH_LITERAL(".vb"))) return EXTENSION_VB;
105   if (f.MatchesExtension(FILE_PATH_LITERAL(".reg"))) return EXTENSION_REG;
106   if (f.MatchesExtension(FILE_PATH_LITERAL(".grp"))) return EXTENSION_GRP;
107   if (f.MatchesExtension(FILE_PATH_LITERAL(".crx"))) return EXTENSION_CRX;
108   if (f.MatchesExtension(FILE_PATH_LITERAL(".apk"))) return EXTENSION_APK;
109   if (f.MatchesExtension(FILE_PATH_LITERAL(".dmg"))) return EXTENSION_DMG;
110   if (f.MatchesExtension(FILE_PATH_LITERAL(".pkg"))) return EXTENSION_PKG;
111   if (f.MatchesExtension(FILE_PATH_LITERAL(".torrent")))
112     return EXTENSION_TORRENT;
113   return EXTENSION_OTHER;
114 }
115
116 void RecordFileExtensionType(const base::FilePath& file) {
117   UMA_HISTOGRAM_ENUMERATION("SBClientDownload.DownloadExtensions",
118                             GetExtensionType(file),
119                             EXTENSION_MAX);
120 }
121
122 // Enumerate for histogramming purposes.
123 // DO NOT CHANGE THE ORDERING OF THESE VALUES (different histogram data will
124 // be mixed together based on their values).
125 enum SBStatsType {
126   DOWNLOAD_URL_CHECKS_TOTAL,
127   DOWNLOAD_URL_CHECKS_CANCELED,
128   DOWNLOAD_URL_CHECKS_MALWARE,
129
130   DOWNLOAD_HASH_CHECKS_TOTAL,
131   DOWNLOAD_HASH_CHECKS_MALWARE,
132
133   // Memory space for histograms is determined by the max.
134   // ALWAYS ADD NEW VALUES BEFORE THIS ONE.
135   DOWNLOAD_CHECKS_MAX
136 };
137 }  // namespace
138
139 // Parent SafeBrowsing::Client class used to lookup the bad binary
140 // URL and digest list.  There are two sub-classes (one for each list).
141 class DownloadSBClient
142     : public SafeBrowsingDatabaseManager::Client,
143       public base::RefCountedThreadSafe<DownloadSBClient> {
144  public:
145   DownloadSBClient(
146       const content::DownloadItem& item,
147       const DownloadProtectionService::CheckDownloadCallback& callback,
148       const scoped_refptr<SafeBrowsingUIManager>& ui_manager,
149       SBStatsType total_type,
150       SBStatsType dangerous_type)
151       : sha256_hash_(item.GetHash()),
152         url_chain_(item.GetUrlChain()),
153         referrer_url_(item.GetReferrerUrl()),
154         callback_(callback),
155         ui_manager_(ui_manager),
156         start_time_(base::TimeTicks::Now()),
157         total_type_(total_type),
158         dangerous_type_(dangerous_type) {}
159
160   virtual void StartCheck() = 0;
161   virtual bool IsDangerous(SBThreatType threat_type) const = 0;
162
163  protected:
164   friend class base::RefCountedThreadSafe<DownloadSBClient>;
165   virtual ~DownloadSBClient() {}
166
167   void CheckDone(SBThreatType threat_type) {
168     DownloadProtectionService::DownloadCheckResult result =
169         IsDangerous(threat_type) ?
170         DownloadProtectionService::DANGEROUS :
171         DownloadProtectionService::SAFE;
172     BrowserThread::PostTask(BrowserThread::UI,
173                             FROM_HERE,
174                             base::Bind(callback_, result));
175     UpdateDownloadCheckStats(total_type_);
176     if (threat_type != SB_THREAT_TYPE_SAFE) {
177       UpdateDownloadCheckStats(dangerous_type_);
178       BrowserThread::PostTask(
179           BrowserThread::UI,
180           FROM_HERE,
181           base::Bind(&DownloadSBClient::ReportMalware,
182                      this, threat_type));
183     }
184   }
185
186   void ReportMalware(SBThreatType threat_type) {
187     std::string post_data;
188     if (!sha256_hash_.empty())
189       post_data += base::HexEncode(sha256_hash_.data(),
190                                    sha256_hash_.size()) + "\n";
191     for (size_t i = 0; i < url_chain_.size(); ++i) {
192       post_data += url_chain_[i].spec() + "\n";
193     }
194     ui_manager_->ReportSafeBrowsingHit(
195         url_chain_.back(),  // malicious_url
196         url_chain_.front(), // page_url
197         referrer_url_,
198         true,  // is_subresource
199         threat_type,
200         post_data);
201   }
202
203   void UpdateDownloadCheckStats(SBStatsType stat_type) {
204     UMA_HISTOGRAM_ENUMERATION("SB2.DownloadChecks",
205                               stat_type,
206                               DOWNLOAD_CHECKS_MAX);
207   }
208
209   std::string sha256_hash_;
210   std::vector<GURL> url_chain_;
211   GURL referrer_url_;
212   DownloadProtectionService::CheckDownloadCallback callback_;
213   scoped_refptr<SafeBrowsingUIManager> ui_manager_;
214   base::TimeTicks start_time_;
215
216  private:
217   const SBStatsType total_type_;
218   const SBStatsType dangerous_type_;
219
220   DISALLOW_COPY_AND_ASSIGN(DownloadSBClient);
221 };
222
223 class DownloadUrlSBClient : public DownloadSBClient {
224  public:
225   DownloadUrlSBClient(
226       const content::DownloadItem& item,
227       const DownloadProtectionService::CheckDownloadCallback& callback,
228       const scoped_refptr<SafeBrowsingUIManager>& ui_manager,
229       const scoped_refptr<SafeBrowsingDatabaseManager>& database_manager)
230       : DownloadSBClient(item, callback, ui_manager,
231                          DOWNLOAD_URL_CHECKS_TOTAL,
232                          DOWNLOAD_URL_CHECKS_MALWARE),
233         database_manager_(database_manager) { }
234
235   virtual void StartCheck() OVERRIDE {
236     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
237     if (!database_manager_.get() ||
238         database_manager_->CheckDownloadUrl(url_chain_, this)) {
239       CheckDone(SB_THREAT_TYPE_SAFE);
240     } else {
241       AddRef();  // SafeBrowsingService takes a pointer not a scoped_refptr.
242     }
243   }
244
245   virtual bool IsDangerous(SBThreatType threat_type) const OVERRIDE {
246     return threat_type == SB_THREAT_TYPE_BINARY_MALWARE_URL;
247   }
248
249   virtual void OnCheckDownloadUrlResult(const std::vector<GURL>& url_chain,
250                                         SBThreatType threat_type) OVERRIDE {
251     CheckDone(threat_type);
252     UMA_HISTOGRAM_TIMES("SB2.DownloadUrlCheckDuration",
253                         base::TimeTicks::Now() - start_time_);
254     Release();
255   }
256
257  protected:
258   virtual ~DownloadUrlSBClient() {}
259
260  private:
261   scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;
262
263   DISALLOW_COPY_AND_ASSIGN(DownloadUrlSBClient);
264 };
265
266 class DownloadProtectionService::CheckClientDownloadRequest
267     : public base::RefCountedThreadSafe<
268           DownloadProtectionService::CheckClientDownloadRequest,
269           BrowserThread::DeleteOnUIThread>,
270       public net::URLFetcherDelegate,
271       public content::DownloadItem::Observer {
272  public:
273   CheckClientDownloadRequest(
274       content::DownloadItem* item,
275       const CheckDownloadCallback& callback,
276       DownloadProtectionService* service,
277       const scoped_refptr<SafeBrowsingDatabaseManager>& database_manager,
278       BinaryFeatureExtractor* binary_feature_extractor)
279       : item_(item),
280         url_chain_(item->GetUrlChain()),
281         referrer_url_(item->GetReferrerUrl()),
282         tab_url_(item->GetTabUrl()),
283         tab_referrer_url_(item->GetTabReferrerUrl()),
284         zipped_executable_(false),
285         callback_(callback),
286         service_(service),
287         binary_feature_extractor_(binary_feature_extractor),
288         database_manager_(database_manager),
289         pingback_enabled_(service_->enabled()),
290         finished_(false),
291         type_(ClientDownloadRequest::WIN_EXECUTABLE),
292         weakptr_factory_(this),
293         start_time_(base::TimeTicks::Now()) {
294     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
295     item_->AddObserver(this);
296   }
297
298   void Start() {
299     VLOG(2) << "Starting SafeBrowsing download check for: "
300             << item_->DebugString(true);
301     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
302     // TODO(noelutz): implement some cache to make sure we don't issue the same
303     // request over and over again if a user downloads the same binary multiple
304     // times.
305     DownloadCheckResultReason reason = REASON_MAX;
306     if (!IsSupportedDownload(
307         *item_, item_->GetTargetFilePath(), &reason, &type_)) {
308       switch (reason) {
309         case REASON_EMPTY_URL_CHAIN:
310         case REASON_INVALID_URL:
311           PostFinishTask(SAFE, reason);
312           return;
313
314         case REASON_NOT_BINARY_FILE:
315           RecordFileExtensionType(item_->GetTargetFilePath());
316           PostFinishTask(SAFE, reason);
317           return;
318
319         default:
320           // We only expect the reasons explicitly handled above.
321           NOTREACHED();
322       }
323     }
324     RecordFileExtensionType(item_->GetTargetFilePath());
325
326     // Compute features from the file contents. Note that we record histograms
327     // based on the result, so this runs regardless of whether the pingbacks
328     // are enabled.
329     if (item_->GetTargetFilePath().MatchesExtension(
330         FILE_PATH_LITERAL(".zip"))) {
331       StartExtractZipFeatures();
332     } else {
333       DCHECK(!download_protection_util::IsArchiveFile(
334           item_->GetTargetFilePath()));
335       StartExtractFileFeatures();
336     }
337   }
338
339   // Start a timeout to cancel the request if it takes too long.
340   // This should only be called after we have finished accessing the file.
341   void StartTimeout() {
342     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
343     if (!service_) {
344       // Request has already been cancelled.
345       return;
346     }
347     timeout_start_time_ = base::TimeTicks::Now();
348     BrowserThread::PostDelayedTask(
349         BrowserThread::UI,
350         FROM_HERE,
351         base::Bind(&CheckClientDownloadRequest::Cancel,
352                    weakptr_factory_.GetWeakPtr()),
353         base::TimeDelta::FromMilliseconds(
354             service_->download_request_timeout_ms()));
355   }
356
357   // Canceling a request will cause us to always report the result as SAFE
358   // unless a pending request is about to call FinishRequest.
359   void Cancel() {
360     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
361     if (fetcher_.get()) {
362       // The DownloadProtectionService is going to release its reference, so we
363       // might be destroyed before the URLFetcher completes.  Cancel the
364       // fetcher so it does not try to invoke OnURLFetchComplete.
365       fetcher_.reset();
366     }
367     // Note: If there is no fetcher, then some callback is still holding a
368     // reference to this object.  We'll eventually wind up in some method on
369     // the UI thread that will call FinishRequest() again.  If FinishRequest()
370     // is called a second time, it will be a no-op.
371     FinishRequest(SAFE, REASON_REQUEST_CANCELED);
372     // Calling FinishRequest might delete this object, we may be deleted by
373     // this point.
374   }
375
376   // content::DownloadItem::Observer implementation.
377   virtual void OnDownloadDestroyed(content::DownloadItem* download) OVERRIDE {
378     Cancel();
379     DCHECK(item_ == NULL);
380   }
381
382   // From the net::URLFetcherDelegate interface.
383   virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE {
384     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
385     DCHECK_EQ(source, fetcher_.get());
386     VLOG(2) << "Received a response for URL: "
387             << item_->GetUrlChain().back() << ": success="
388             << source->GetStatus().is_success() << " response_code="
389             << source->GetResponseCode();
390     if (source->GetStatus().is_success()) {
391       UMA_HISTOGRAM_SPARSE_SLOWLY(
392           "SBClientDownload.DownloadRequestResponseCode",
393           source->GetResponseCode());
394     }
395     UMA_HISTOGRAM_SPARSE_SLOWLY(
396         "SBClientDownload.DownloadRequestNetError",
397         -source->GetStatus().error());
398     DownloadCheckResultReason reason = REASON_SERVER_PING_FAILED;
399     DownloadCheckResult result = SAFE;
400     if (source->GetStatus().is_success() &&
401         net::HTTP_OK == source->GetResponseCode()) {
402       ClientDownloadResponse response;
403       std::string data;
404       bool got_data = source->GetResponseAsString(&data);
405       DCHECK(got_data);
406       if (!response.ParseFromString(data)) {
407         reason = REASON_INVALID_RESPONSE_PROTO;
408       } else if (response.verdict() == ClientDownloadResponse::SAFE) {
409         reason = REASON_DOWNLOAD_SAFE;
410       } else if (service_ && !service_->IsSupportedDownload(
411           *item_, item_->GetTargetFilePath())) {
412         // The client of the download protection service assumes that we don't
413         // support this download so we cannot return any other verdict than
414         // SAFE even if the server says it's dangerous to download this file.
415         // Note: if service_ is NULL we already cancelled the request and
416         // returned SAFE.
417         reason = REASON_DOWNLOAD_NOT_SUPPORTED;
418       } else if (response.verdict() == ClientDownloadResponse::DANGEROUS) {
419         reason = REASON_DOWNLOAD_DANGEROUS;
420         result = DANGEROUS;
421       } else if (response.verdict() == ClientDownloadResponse::UNCOMMON) {
422         reason = REASON_DOWNLOAD_UNCOMMON;
423         result = UNCOMMON;
424       } else if (response.verdict() == ClientDownloadResponse::DANGEROUS_HOST) {
425         reason = REASON_DOWNLOAD_DANGEROUS_HOST;
426         result = DANGEROUS_HOST;
427       } else if (
428           response.verdict() == ClientDownloadResponse::POTENTIALLY_UNWANTED) {
429         reason = REASON_DOWNLOAD_POTENTIALLY_UNWANTED;
430         result = POTENTIALLY_UNWANTED;
431       } else {
432         LOG(DFATAL) << "Unknown download response verdict: "
433                     << response.verdict();
434         reason = REASON_INVALID_RESPONSE_VERDICT;
435       }
436       DownloadFeedbackService::MaybeStorePingsForDownload(
437           result, item_, client_download_request_data_, data);
438     }
439     // We don't need the fetcher anymore.
440     fetcher_.reset();
441     UMA_HISTOGRAM_TIMES("SBClientDownload.DownloadRequestDuration",
442                         base::TimeTicks::Now() - start_time_);
443     UMA_HISTOGRAM_TIMES("SBClientDownload.DownloadRequestNetworkDuration",
444                         base::TimeTicks::Now() - request_start_time_);
445     FinishRequest(result, reason);
446   }
447
448   static bool IsSupportedDownload(const content::DownloadItem& item,
449                                   const base::FilePath& target_path,
450                                   DownloadCheckResultReason* reason,
451                                   ClientDownloadRequest::DownloadType* type) {
452     if (item.GetUrlChain().empty()) {
453       *reason = REASON_EMPTY_URL_CHAIN;
454       return false;
455     }
456     const GURL& final_url = item.GetUrlChain().back();
457     if (!final_url.is_valid() || final_url.is_empty() ||
458         !final_url.IsStandard() || final_url.SchemeIsFile()) {
459       *reason = REASON_INVALID_URL;
460       return false;
461     }
462     if (!download_protection_util::IsBinaryFile(target_path)) {
463       *reason = REASON_NOT_BINARY_FILE;
464       return false;
465     }
466     *type = download_protection_util::GetDownloadType(target_path);
467     return true;
468   }
469
470  private:
471   friend struct BrowserThread::DeleteOnThread<BrowserThread::UI>;
472   friend class base::DeleteHelper<CheckClientDownloadRequest>;
473
474   virtual ~CheckClientDownloadRequest() {
475     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
476     DCHECK(item_ == NULL);
477   }
478
479   void OnFileFeatureExtractionDone() {
480     // This can run in any thread, since it just posts more messages.
481
482     // TODO(noelutz): DownloadInfo should also contain the IP address of
483     // every URL in the redirect chain.  We also should check whether the
484     // download URL is hosted on the internal network.
485     BrowserThread::PostTask(
486         BrowserThread::IO,
487         FROM_HERE,
488         base::Bind(&CheckClientDownloadRequest::CheckWhitelists, this));
489
490     // We wait until after the file checks finish to start the timeout, as
491     // windows can cause permissions errors if the timeout fired while we were
492     // checking the file signature and we tried to complete the download.
493     BrowserThread::PostTask(
494         BrowserThread::UI,
495         FROM_HERE,
496         base::Bind(&CheckClientDownloadRequest::StartTimeout, this));
497   }
498
499   void StartExtractFileFeatures() {
500     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
501     DCHECK(item_);  // Called directly from Start(), item should still exist.
502     // Since we do blocking I/O, offload this to a worker thread.
503     // The task does not need to block shutdown.
504     BrowserThread::GetBlockingPool()->PostWorkerTaskWithShutdownBehavior(
505         FROM_HERE,
506         base::Bind(&CheckClientDownloadRequest::ExtractFileFeatures,
507                    this, item_->GetFullPath()),
508         base::SequencedWorkerPool::CONTINUE_ON_SHUTDOWN);
509   }
510
511   void ExtractFileFeatures(const base::FilePath& file_path) {
512     base::TimeTicks start_time = base::TimeTicks::Now();
513     binary_feature_extractor_->CheckSignature(file_path, &signature_info_);
514     bool is_signed = (signature_info_.certificate_chain_size() > 0);
515     if (is_signed) {
516       VLOG(2) << "Downloaded a signed binary: " << file_path.value();
517     } else {
518       VLOG(2) << "Downloaded an unsigned binary: "
519               << file_path.value();
520     }
521     UMA_HISTOGRAM_BOOLEAN("SBClientDownload.SignedBinaryDownload", is_signed);
522     UMA_HISTOGRAM_TIMES("SBClientDownload.ExtractSignatureFeaturesTime",
523                         base::TimeTicks::Now() - start_time);
524
525     start_time = base::TimeTicks::Now();
526     binary_feature_extractor_->ExtractImageHeaders(file_path, &image_headers_);
527     UMA_HISTOGRAM_TIMES("SBClientDownload.ExtractImageHeadersTime",
528                         base::TimeTicks::Now() - start_time);
529
530     OnFileFeatureExtractionDone();
531   }
532
533   void StartExtractZipFeatures() {
534     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
535     DCHECK(item_);  // Called directly from Start(), item should still exist.
536     zip_analysis_start_time_ = base::TimeTicks::Now();
537     // We give the zip analyzer a weak pointer to this object.  Since the
538     // analyzer is refcounted, it might outlive the request.
539     analyzer_ = new SandboxedZipAnalyzer(
540         item_->GetFullPath(),
541         base::Bind(&CheckClientDownloadRequest::OnZipAnalysisFinished,
542                    weakptr_factory_.GetWeakPtr()));
543     analyzer_->Start();
544   }
545
546   void OnZipAnalysisFinished(const zip_analyzer::Results& results) {
547     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
548     if (!service_)
549       return;
550     if (results.success) {
551       zipped_executable_ = results.has_executable;
552       VLOG(1) << "Zip analysis finished for " << item_->GetFullPath().value()
553               << ", has_executable=" << results.has_executable
554               << " has_archive=" << results.has_archive;
555     } else {
556       VLOG(1) << "Zip analysis failed for " << item_->GetFullPath().value();
557     }
558     UMA_HISTOGRAM_BOOLEAN("SBClientDownload.ZipFileHasExecutable",
559                           zipped_executable_);
560     UMA_HISTOGRAM_BOOLEAN("SBClientDownload.ZipFileHasArchiveButNoExecutable",
561                           results.has_archive && !zipped_executable_);
562     UMA_HISTOGRAM_TIMES("SBClientDownload.ExtractZipFeaturesTime",
563                         base::TimeTicks::Now() - zip_analysis_start_time_);
564
565     if (!zipped_executable_) {
566       PostFinishTask(SAFE, REASON_ARCHIVE_WITHOUT_BINARIES);
567       return;
568     }
569     OnFileFeatureExtractionDone();
570   }
571
572   void CheckWhitelists() {
573     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
574     DownloadCheckResultReason reason = REASON_MAX;
575     if (!database_manager_.get()) {
576       reason = REASON_SB_DISABLED;
577     } else {
578       const GURL& url = url_chain_.back();
579       if (url.is_valid() && database_manager_->MatchDownloadWhitelistUrl(url)) {
580         VLOG(2) << url << " is on the download whitelist.";
581         reason = REASON_WHITELISTED_URL;
582       }
583       if (reason != REASON_MAX || signature_info_.trusted()) {
584         UMA_HISTOGRAM_COUNTS("SBClientDownload.SignedOrWhitelistedDownload", 1);
585       }
586     }
587     if (reason == REASON_MAX && signature_info_.trusted()) {
588       for (int i = 0; i < signature_info_.certificate_chain_size(); ++i) {
589         if (CertificateChainIsWhitelisted(
590                 signature_info_.certificate_chain(i))) {
591           reason = REASON_TRUSTED_EXECUTABLE;
592           break;
593         }
594       }
595     }
596     if (reason != REASON_MAX) {
597       PostFinishTask(SAFE, reason);
598     } else if (!pingback_enabled_) {
599       PostFinishTask(SAFE, REASON_PING_DISABLED);
600     } else {
601       // Currently, the UI only works on Windows so we don't even bother
602       // with pinging the server if we're not on Windows.  TODO(noelutz):
603       // change this code once the UI is done for Linux and Mac.
604 #if defined(OS_WIN)
605       // The URLFetcher is owned by the UI thread, so post a message to
606       // start the pingback.
607       BrowserThread::PostTask(
608           BrowserThread::UI,
609           FROM_HERE,
610           base::Bind(&CheckClientDownloadRequest::GetTabRedirects, this));
611 #else
612       PostFinishTask(SAFE, REASON_OS_NOT_SUPPORTED);
613 #endif
614     }
615   }
616
617   void GetTabRedirects() {
618     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
619     if (!tab_url_.is_valid()) {
620       SendRequest();
621       return;
622     }
623
624     Profile* profile = Profile::FromBrowserContext(item_->GetBrowserContext());
625     HistoryService* history =
626         HistoryServiceFactory::GetForProfile(profile, Profile::EXPLICIT_ACCESS);
627     if (!history) {
628       SendRequest();
629       return;
630     }
631
632     history->QueryRedirectsTo(
633         tab_url_,
634         &request_consumer_,
635         base::Bind(&CheckClientDownloadRequest::OnGotTabRedirects,
636                    base::Unretained(this)));
637   }
638
639   void OnGotTabRedirects(HistoryService::Handle handle,
640                          GURL url,
641                          bool success,
642                          history::RedirectList* redirect_list) {
643     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
644     DCHECK_EQ(url, tab_url_);
645
646     if (success && redirect_list->size() > 0) {
647       for (history::RedirectList::reverse_iterator i = redirect_list->rbegin();
648            i != redirect_list->rend();
649            ++i) {
650         tab_redirects_.push_back(*i);
651       }
652     }
653
654     SendRequest();
655   }
656
657   void SendRequest() {
658     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
659
660     // This is our last chance to check whether the request has been canceled
661     // before sending it.
662     if (!service_)
663       return;
664
665     ClientDownloadRequest request;
666     request.set_url(item_->GetUrlChain().back().spec());
667     request.mutable_digests()->set_sha256(item_->GetHash());
668     request.set_length(item_->GetReceivedBytes());
669     for (size_t i = 0; i < item_->GetUrlChain().size(); ++i) {
670       ClientDownloadRequest::Resource* resource = request.add_resources();
671       resource->set_url(item_->GetUrlChain()[i].spec());
672       if (i == item_->GetUrlChain().size() - 1) {
673         // The last URL in the chain is the download URL.
674         resource->set_type(ClientDownloadRequest::DOWNLOAD_URL);
675         resource->set_referrer(item_->GetReferrerUrl().spec());
676         DVLOG(2) << "dl url " << resource->url();
677         if (!item_->GetRemoteAddress().empty()) {
678           resource->set_remote_ip(item_->GetRemoteAddress());
679           DVLOG(2) << "  dl url remote addr: " << resource->remote_ip();
680         }
681         DVLOG(2) << "dl referrer " << resource->referrer();
682       } else {
683         DVLOG(2) << "dl redirect " << i << " " << resource->url();
684         resource->set_type(ClientDownloadRequest::DOWNLOAD_REDIRECT);
685       }
686       // TODO(noelutz): fill out the remote IP addresses.
687     }
688     // TODO(mattm): fill out the remote IP addresses for tab resources.
689     for (size_t i = 0; i < tab_redirects_.size(); ++i) {
690       ClientDownloadRequest::Resource* resource = request.add_resources();
691       DVLOG(2) << "tab redirect " << i << " " << tab_redirects_[i].spec();
692       resource->set_url(tab_redirects_[i].spec());
693       resource->set_type(ClientDownloadRequest::TAB_REDIRECT);
694     }
695     if (tab_url_.is_valid()) {
696       ClientDownloadRequest::Resource* resource = request.add_resources();
697       resource->set_url(tab_url_.spec());
698       DVLOG(2) << "tab url " << resource->url();
699       resource->set_type(ClientDownloadRequest::TAB_URL);
700       if (tab_referrer_url_.is_valid()) {
701         resource->set_referrer(tab_referrer_url_.spec());
702         DVLOG(2) << "tab referrer " << resource->referrer();
703       }
704     }
705
706     request.set_user_initiated(item_->HasUserGesture());
707     request.set_file_basename(
708         item_->GetTargetFilePath().BaseName().AsUTF8Unsafe());
709     request.set_download_type(type_);
710     request.mutable_signature()->CopyFrom(signature_info_);
711     request.mutable_image_headers()->CopyFrom(image_headers_);
712     if (!request.SerializeToString(&client_download_request_data_)) {
713       FinishRequest(SAFE, REASON_INVALID_REQUEST_PROTO);
714       return;
715     }
716
717     VLOG(2) << "Sending a request for URL: "
718             << item_->GetUrlChain().back();
719     fetcher_.reset(net::URLFetcher::Create(0 /* ID used for testing */,
720                                            GetDownloadRequestUrl(),
721                                            net::URLFetcher::POST,
722                                            this));
723     fetcher_->SetLoadFlags(net::LOAD_DISABLE_CACHE);
724     fetcher_->SetAutomaticallyRetryOn5xx(false);  // Don't retry on error.
725     fetcher_->SetRequestContext(service_->request_context_getter_.get());
726     fetcher_->SetUploadData("application/octet-stream",
727                             client_download_request_data_);
728     request_start_time_ = base::TimeTicks::Now();
729     UMA_HISTOGRAM_COUNTS("SBClientDownload.DownloadRequestPayloadSize",
730                          client_download_request_data_.size());
731     fetcher_->Start();
732   }
733
734   void PostFinishTask(DownloadCheckResult result,
735                       DownloadCheckResultReason reason) {
736     BrowserThread::PostTask(
737         BrowserThread::UI,
738         FROM_HERE,
739         base::Bind(&CheckClientDownloadRequest::FinishRequest, this, result,
740                    reason));
741   }
742
743   void FinishRequest(DownloadCheckResult result,
744                      DownloadCheckResultReason reason) {
745     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
746     if (finished_) {
747       return;
748     }
749     finished_ = true;
750     // Ensure the timeout task is cancelled while we still have a non-zero
751     // refcount. (crbug.com/240449)
752     weakptr_factory_.InvalidateWeakPtrs();
753     if (!request_start_time_.is_null()) {
754       UMA_HISTOGRAM_ENUMERATION("SBClientDownload.DownloadRequestNetworkStats",
755                                 reason,
756                                 REASON_MAX);
757     }
758     if (!timeout_start_time_.is_null()) {
759       UMA_HISTOGRAM_ENUMERATION("SBClientDownload.DownloadRequestTimeoutStats",
760                                 reason,
761                                 REASON_MAX);
762       if (reason != REASON_REQUEST_CANCELED) {
763         UMA_HISTOGRAM_TIMES("SBClientDownload.DownloadRequestTimeoutDuration",
764                             base::TimeTicks::Now() - timeout_start_time_);
765       }
766     }
767     if (service_) {
768       VLOG(2) << "SafeBrowsing download verdict for: "
769               << item_->DebugString(true) << " verdict:" << reason;
770       UMA_HISTOGRAM_ENUMERATION("SBClientDownload.CheckDownloadStats",
771                                 reason,
772                                 REASON_MAX);
773       callback_.Run(result);
774       item_->RemoveObserver(this);
775       item_ = NULL;
776       DownloadProtectionService* service = service_;
777       service_ = NULL;
778       service->RequestFinished(this);
779       // DownloadProtectionService::RequestFinished will decrement our refcount,
780       // so we may be deleted now.
781     } else {
782       callback_.Run(SAFE);
783     }
784   }
785
786   bool CertificateChainIsWhitelisted(
787       const ClientDownloadRequest_CertificateChain& chain) {
788     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
789     if (chain.element_size() < 2) {
790       // We need to have both a signing certificate and its issuer certificate
791       // present to construct a whitelist entry.
792       return false;
793     }
794     scoped_refptr<net::X509Certificate> cert =
795         net::X509Certificate::CreateFromBytes(
796             chain.element(0).certificate().data(),
797             chain.element(0).certificate().size());
798     if (!cert.get()) {
799       return false;
800     }
801
802     for (int i = 1; i < chain.element_size(); ++i) {
803       scoped_refptr<net::X509Certificate> issuer =
804           net::X509Certificate::CreateFromBytes(
805               chain.element(i).certificate().data(),
806               chain.element(i).certificate().size());
807       if (!issuer.get()) {
808         return false;
809       }
810       std::vector<std::string> whitelist_strings;
811       DownloadProtectionService::GetCertificateWhitelistStrings(
812           *cert.get(), *issuer.get(), &whitelist_strings);
813       for (size_t j = 0; j < whitelist_strings.size(); ++j) {
814         if (database_manager_->MatchDownloadWhitelistString(
815                 whitelist_strings[j])) {
816           VLOG(2) << "Certificate matched whitelist, cert="
817                   << cert->subject().GetDisplayName()
818                   << " issuer=" << issuer->subject().GetDisplayName();
819           return true;
820         }
821       }
822       cert = issuer;
823     }
824     return false;
825   }
826
827   // The DownloadItem we are checking. Will be NULL if the request has been
828   // canceled. Must be accessed only on UI thread.
829   content::DownloadItem* item_;
830   // Copies of data from |item_| for access on other threads.
831   std::vector<GURL> url_chain_;
832   GURL referrer_url_;
833   // URL chain of redirects leading to (but not including) |tab_url|.
834   std::vector<GURL> tab_redirects_;
835   // URL and referrer of the window the download was started from.
836   GURL tab_url_;
837   GURL tab_referrer_url_;
838
839   bool zipped_executable_;
840   ClientDownloadRequest_SignatureInfo signature_info_;
841   ClientDownloadRequest_ImageHeaders image_headers_;
842   CheckDownloadCallback callback_;
843   // Will be NULL if the request has been canceled.
844   DownloadProtectionService* service_;
845   scoped_refptr<BinaryFeatureExtractor> binary_feature_extractor_;
846   scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;
847   const bool pingback_enabled_;
848   scoped_ptr<net::URLFetcher> fetcher_;
849   scoped_refptr<SandboxedZipAnalyzer> analyzer_;
850   base::TimeTicks zip_analysis_start_time_;
851   bool finished_;
852   ClientDownloadRequest::DownloadType type_;
853   std::string client_download_request_data_;
854   CancelableRequestConsumer request_consumer_;  // For HistoryService lookup.
855   base::WeakPtrFactory<CheckClientDownloadRequest> weakptr_factory_;
856   base::TimeTicks start_time_;  // Used for stats.
857   base::TimeTicks timeout_start_time_;
858   base::TimeTicks request_start_time_;
859
860   DISALLOW_COPY_AND_ASSIGN(CheckClientDownloadRequest);
861 };
862
863 DownloadProtectionService::DownloadProtectionService(
864     SafeBrowsingService* sb_service,
865     net::URLRequestContextGetter* request_context_getter)
866     : request_context_getter_(request_context_getter),
867       enabled_(false),
868       binary_feature_extractor_(new BinaryFeatureExtractor()),
869       download_request_timeout_ms_(kDownloadRequestTimeoutMs),
870       feedback_service_(new DownloadFeedbackService(
871           request_context_getter, BrowserThread::GetBlockingPool())) {
872
873   if (sb_service) {
874     ui_manager_ = sb_service->ui_manager();
875     database_manager_ = sb_service->database_manager();
876   }
877 }
878
879 DownloadProtectionService::~DownloadProtectionService() {
880   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
881   CancelPendingRequests();
882 }
883
884 void DownloadProtectionService::SetEnabled(bool enabled) {
885   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
886   if (enabled == enabled_) {
887     return;
888   }
889   enabled_ = enabled;
890   if (!enabled_) {
891     CancelPendingRequests();
892   }
893 }
894
895 void DownloadProtectionService::CheckClientDownload(
896     content::DownloadItem* item,
897     const CheckDownloadCallback& callback) {
898   scoped_refptr<CheckClientDownloadRequest> request(
899       new CheckClientDownloadRequest(item, callback, this,
900                                      database_manager_,
901                                      binary_feature_extractor_.get()));
902   download_requests_.insert(request);
903   request->Start();
904 }
905
906 void DownloadProtectionService::CheckDownloadUrl(
907     const content::DownloadItem& item,
908     const CheckDownloadCallback& callback) {
909   DCHECK(!item.GetUrlChain().empty());
910   scoped_refptr<DownloadUrlSBClient> client(
911       new DownloadUrlSBClient(item, callback, ui_manager_, database_manager_));
912   // The client will release itself once it is done.
913   BrowserThread::PostTask(
914         BrowserThread::IO,
915         FROM_HERE,
916         base::Bind(&DownloadUrlSBClient::StartCheck, client));
917 }
918
919 bool DownloadProtectionService::IsSupportedDownload(
920     const content::DownloadItem& item,
921     const base::FilePath& target_path) const {
922   // Currently, the UI only works on Windows.  On Linux and Mac we still
923   // want to show the dangerous file type warning if the file is possibly
924   // dangerous which means we have to always return false here.
925 #if defined(OS_WIN)
926   DownloadCheckResultReason reason = REASON_MAX;
927   ClientDownloadRequest::DownloadType type =
928       ClientDownloadRequest::WIN_EXECUTABLE;
929   return (CheckClientDownloadRequest::IsSupportedDownload(item, target_path,
930                                                           &reason, &type) &&
931           (ClientDownloadRequest::ANDROID_APK == type ||
932            ClientDownloadRequest::WIN_EXECUTABLE == type ||
933            ClientDownloadRequest::ZIPPED_EXECUTABLE == type));
934 #else
935   return false;
936 #endif
937 }
938
939 void DownloadProtectionService::CancelPendingRequests() {
940   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
941   for (std::set<scoped_refptr<CheckClientDownloadRequest> >::iterator it =
942            download_requests_.begin();
943        it != download_requests_.end();) {
944     // We need to advance the iterator before we cancel because canceling
945     // the request will invalidate it when RequestFinished is called below.
946     scoped_refptr<CheckClientDownloadRequest> tmp = *it++;
947     tmp->Cancel();
948   }
949   DCHECK(download_requests_.empty());
950 }
951
952 void DownloadProtectionService::RequestFinished(
953     CheckClientDownloadRequest* request) {
954   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
955   std::set<scoped_refptr<CheckClientDownloadRequest> >::iterator it =
956       download_requests_.find(request);
957   DCHECK(it != download_requests_.end());
958   download_requests_.erase(*it);
959 }
960
961 void DownloadProtectionService::ShowDetailsForDownload(
962     const content::DownloadItem& item,
963     content::PageNavigator* navigator) {
964   GURL learn_more_url(chrome::kDownloadScanningLearnMoreURL);
965   navigator->OpenURL(
966       content::OpenURLParams(learn_more_url,
967                              content::Referrer(),
968                              NEW_FOREGROUND_TAB,
969                              content::PAGE_TRANSITION_LINK,
970                              false));
971 }
972
973 namespace {
974 // Escapes a certificate attribute so that it can be used in a whitelist
975 // entry.  Currently, we only escape slashes, since they are used as a
976 // separator between attributes.
977 std::string EscapeCertAttribute(const std::string& attribute) {
978   std::string escaped;
979   for (size_t i = 0; i < attribute.size(); ++i) {
980     if (attribute[i] == '%') {
981       escaped.append("%25");
982     } else if (attribute[i] == '/') {
983       escaped.append("%2F");
984     } else {
985       escaped.push_back(attribute[i]);
986     }
987   }
988   return escaped;
989 }
990 }  // namespace
991
992 // static
993 void DownloadProtectionService::GetCertificateWhitelistStrings(
994     const net::X509Certificate& certificate,
995     const net::X509Certificate& issuer,
996     std::vector<std::string>* whitelist_strings) {
997   // The whitelist paths are in the format:
998   // cert/<ascii issuer fingerprint>[/CN=common_name][/O=org][/OU=unit]
999   //
1000   // Any of CN, O, or OU may be omitted from the whitelist entry, in which
1001   // case they match anything.  However, the attributes that do appear will
1002   // always be in the order shown above.  At least one attribute will always
1003   // be present.
1004
1005   const net::CertPrincipal& subject = certificate.subject();
1006   std::vector<std::string> ou_tokens;
1007   for (size_t i = 0; i < subject.organization_unit_names.size(); ++i) {
1008     ou_tokens.push_back(
1009         "/OU=" + EscapeCertAttribute(subject.organization_unit_names[i]));
1010   }
1011
1012   std::vector<std::string> o_tokens;
1013   for (size_t i = 0; i < subject.organization_names.size(); ++i) {
1014     o_tokens.push_back(
1015         "/O=" + EscapeCertAttribute(subject.organization_names[i]));
1016   }
1017
1018   std::string cn_token;
1019   if (!subject.common_name.empty()) {
1020     cn_token = "/CN=" + EscapeCertAttribute(subject.common_name);
1021   }
1022
1023   std::set<std::string> paths_to_check;
1024   if (!cn_token.empty()) {
1025     paths_to_check.insert(cn_token);
1026   }
1027   for (size_t i = 0; i < o_tokens.size(); ++i) {
1028     paths_to_check.insert(cn_token + o_tokens[i]);
1029     paths_to_check.insert(o_tokens[i]);
1030     for (size_t j = 0; j < ou_tokens.size(); ++j) {
1031       paths_to_check.insert(cn_token + o_tokens[i] + ou_tokens[j]);
1032       paths_to_check.insert(o_tokens[i] + ou_tokens[j]);
1033     }
1034   }
1035   for (size_t i = 0; i < ou_tokens.size(); ++i) {
1036     paths_to_check.insert(cn_token + ou_tokens[i]);
1037     paths_to_check.insert(ou_tokens[i]);
1038   }
1039
1040   std::string issuer_fp = base::HexEncode(issuer.fingerprint().data,
1041                                           sizeof(issuer.fingerprint().data));
1042   for (std::set<std::string>::iterator it = paths_to_check.begin();
1043        it != paths_to_check.end(); ++it) {
1044     whitelist_strings->push_back("cert/" + issuer_fp + *it);
1045   }
1046 }
1047
1048 // static
1049 GURL DownloadProtectionService::GetDownloadRequestUrl() {
1050   GURL url(kDownloadRequestUrl);
1051   std::string api_key = google_apis::GetAPIKey();
1052   if (!api_key.empty())
1053     url = url.Resolve("?key=" + net::EscapeQueryParamValue(api_key, true));
1054
1055   return url;
1056 }
1057
1058 }  // namespace safe_browsing