Upstream version 9.38.198.0
[platform/framework/web/crosswalk.git] / src / chrome / browser / safe_browsing / client_side_detection_host.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/safe_browsing/client_side_detection_host.h"
6
7 #include <vector>
8
9 #include "base/logging.h"
10 #include "base/memory/ref_counted.h"
11 #include "base/memory/scoped_ptr.h"
12 #include "base/metrics/histogram.h"
13 #include "base/prefs/pref_service.h"
14 #include "base/sequenced_task_runner_helpers.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "chrome/browser/browser_process.h"
17 #include "chrome/browser/profiles/profile.h"
18 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"
19 #include "chrome/browser/safe_browsing/client_side_detection_service.h"
20 #include "chrome/browser/safe_browsing/database_manager.h"
21 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
22 #include "chrome/common/pref_names.h"
23 #include "chrome/common/safe_browsing/csd.pb.h"
24 #include "chrome/common/safe_browsing/safebrowsing_messages.h"
25 #include "content/public/browser/browser_thread.h"
26 #include "content/public/browser/navigation_controller.h"
27 #include "content/public/browser/navigation_details.h"
28 #include "content/public/browser/navigation_entry.h"
29 #include "content/public/browser/notification_details.h"
30 #include "content/public/browser/notification_source.h"
31 #include "content/public/browser/notification_types.h"
32 #include "content/public/browser/render_process_host.h"
33 #include "content/public/browser/render_view_host.h"
34 #include "content/public/browser/resource_request_details.h"
35 #include "content/public/browser/web_contents.h"
36 #include "content/public/common/frame_navigate_params.h"
37 #include "content/public/common/url_constants.h"
38 #include "url/gurl.h"
39
40 using content::BrowserThread;
41 using content::NavigationEntry;
42 using content::ResourceRequestDetails;
43 using content::ResourceType;
44 using content::WebContents;
45
46 namespace safe_browsing {
47
48 const size_t ClientSideDetectionHost::kMaxUrlsPerIP = 20;
49 const size_t ClientSideDetectionHost::kMaxIPsPerBrowse = 200;
50
51 const char kSafeBrowsingMatchKey[] = "safe_browsing_match";
52
53 typedef base::Callback<void(bool)> ShouldClassifyUrlCallback;
54
55 // This class is instantiated each time a new toplevel URL loads, and
56 // asynchronously checks whether the malware and phishing classifiers should run
57 // for this URL.  If so, it notifies the host class by calling the provided
58 // callback form the UI thread.  Objects of this class are ref-counted and will
59 // be destroyed once nobody uses it anymore.  If |web_contents|, |csd_service|
60 // or |host| go away you need to call Cancel().  We keep the |database_manager|
61 // alive in a ref pointer for as long as it takes.
62 class ClientSideDetectionHost::ShouldClassifyUrlRequest
63     : public base::RefCountedThreadSafe<
64           ClientSideDetectionHost::ShouldClassifyUrlRequest> {
65  public:
66   ShouldClassifyUrlRequest(
67       const content::FrameNavigateParams& params,
68       const ShouldClassifyUrlCallback& start_phishing_classification,
69       const ShouldClassifyUrlCallback& start_malware_classification,
70       WebContents* web_contents,
71       ClientSideDetectionService* csd_service,
72       SafeBrowsingDatabaseManager* database_manager,
73       ClientSideDetectionHost* host)
74       : params_(params),
75         web_contents_(web_contents),
76         csd_service_(csd_service),
77         database_manager_(database_manager),
78         host_(host),
79         start_phishing_classification_cb_(start_phishing_classification),
80         start_malware_classification_cb_(start_malware_classification) {
81     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
82     DCHECK(web_contents_);
83     DCHECK(csd_service_);
84     DCHECK(database_manager_.get());
85     DCHECK(host_);
86   }
87
88   void Start() {
89     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
90
91     // We start by doing some simple checks that can run on the UI thread.
92     UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.ClassificationStart", 1);
93     UMA_HISTOGRAM_BOOLEAN("SBClientMalware.ClassificationStart", 1);
94
95     // Only classify [X]HTML documents.
96     if (params_.contents_mime_type != "text/html" &&
97         params_.contents_mime_type != "application/xhtml+xml") {
98       VLOG(1) << "Skipping phishing classification for URL: " << params_.url
99               << " because it has an unsupported MIME type: "
100               << params_.contents_mime_type;
101       DontClassifyForPhishing(NO_CLASSIFY_UNSUPPORTED_MIME_TYPE);
102     }
103
104     if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {
105       VLOG(1) << "Skipping phishing classification for URL: " << params_.url
106               << " because of hosting on private IP: "
107               << params_.socket_address.host();
108       DontClassifyForPhishing(NO_CLASSIFY_PRIVATE_IP);
109       DontClassifyForMalware(NO_CLASSIFY_PRIVATE_IP);
110     }
111
112     // For phishing we only classify HTTP pages.
113     if (!params_.url.SchemeIs(url::kHttpScheme)) {
114       VLOG(1) << "Skipping phishing classification for URL: " << params_.url
115               << " because it is not HTTP: "
116               << params_.socket_address.host();
117       DontClassifyForPhishing(NO_CLASSIFY_NOT_HTTP_URL);
118     }
119
120     // Don't run any classifier if the tab is incognito.
121     if (web_contents_->GetBrowserContext()->IsOffTheRecord()) {
122       VLOG(1) << "Skipping phishing and malware classification for URL: "
123               << params_.url << " because we're browsing incognito.";
124       DontClassifyForPhishing(NO_CLASSIFY_OFF_THE_RECORD);
125       DontClassifyForMalware(NO_CLASSIFY_OFF_THE_RECORD);
126     }
127
128     // We lookup the csd-whitelist before we lookup the cache because
129     // a URL may have recently been whitelisted.  If the URL matches
130     // the csd-whitelist we won't start phishing classification.  The
131     // csd-whitelist check has to be done on the IO thread because it
132     // uses the SafeBrowsing service class.
133     if (ShouldClassifyForPhishing() || ShouldClassifyForMalware()) {
134       BrowserThread::PostTask(
135           BrowserThread::IO,
136           FROM_HERE,
137           base::Bind(&ShouldClassifyUrlRequest::CheckSafeBrowsingDatabase,
138                      this, params_.url));
139     }
140   }
141
142   void Cancel() {
143     DontClassifyForPhishing(NO_CLASSIFY_CANCEL);
144     DontClassifyForMalware(NO_CLASSIFY_CANCEL);
145     // Just to make sure we don't do anything stupid we reset all these
146     // pointers except for the safebrowsing service class which may be
147     // accessed by CheckSafeBrowsingDatabase().
148     web_contents_ = NULL;
149     csd_service_ = NULL;
150     host_ = NULL;
151   }
152
153  private:
154   friend class base::RefCountedThreadSafe<
155       ClientSideDetectionHost::ShouldClassifyUrlRequest>;
156
157   // Enum used to keep stats about why the pre-classification check failed.
158   enum PreClassificationCheckFailures {
159     OBSOLETE_NO_CLASSIFY_PROXY_FETCH,
160     NO_CLASSIFY_PRIVATE_IP,
161     NO_CLASSIFY_OFF_THE_RECORD,
162     NO_CLASSIFY_MATCH_CSD_WHITELIST,
163     NO_CLASSIFY_TOO_MANY_REPORTS,
164     NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
165     NO_CLASSIFY_NO_DATABASE_MANAGER,
166     NO_CLASSIFY_KILLSWITCH,
167     NO_CLASSIFY_CANCEL,
168     NO_CLASSIFY_RESULT_FROM_CACHE,
169     NO_CLASSIFY_NOT_HTTP_URL,
170
171     NO_CLASSIFY_MAX  // Always add new values before this one.
172   };
173
174   // The destructor can be called either from the UI or the IO thread.
175   virtual ~ShouldClassifyUrlRequest() { }
176
177   bool ShouldClassifyForPhishing() const {
178     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
179     return !start_phishing_classification_cb_.is_null();
180   }
181
182   bool ShouldClassifyForMalware() const {
183     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
184     return !start_malware_classification_cb_.is_null();
185   }
186
187   void DontClassifyForPhishing(PreClassificationCheckFailures reason) {
188     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
189     if (ShouldClassifyForPhishing()) {
190       // Track the first reason why we stopped classifying for phishing.
191       UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
192                                 reason, NO_CLASSIFY_MAX);
193       DVLOG(2) << "Failed phishing pre-classification checks.  Reason: "
194                << reason;
195       start_phishing_classification_cb_.Run(false);
196     }
197     start_phishing_classification_cb_.Reset();
198   }
199
200   void DontClassifyForMalware(PreClassificationCheckFailures reason) {
201     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
202     if (ShouldClassifyForMalware()) {
203       // Track the first reason why we stopped classifying for malware.
204       UMA_HISTOGRAM_ENUMERATION("SBClientMalware.PreClassificationCheckFail",
205                                 reason, NO_CLASSIFY_MAX);
206       DVLOG(2) << "Failed malware pre-classification checks.  Reason: "
207                << reason;
208       start_malware_classification_cb_.Run(false);
209     }
210     start_malware_classification_cb_.Reset();
211   }
212
213   void CheckSafeBrowsingDatabase(const GURL& url) {
214     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
215     // We don't want to call the classification callbacks from the IO
216     // thread so we simply pass the results of this method to CheckCache()
217     // which is called on the UI thread;
218     PreClassificationCheckFailures phishing_reason = NO_CLASSIFY_MAX;
219     PreClassificationCheckFailures malware_reason = NO_CLASSIFY_MAX;
220     if (!database_manager_.get()) {
221       // We cannot check the Safe Browsing whitelists so we stop here
222       // for safety.
223       malware_reason = phishing_reason = NO_CLASSIFY_NO_DATABASE_MANAGER;
224     } else {
225       if (database_manager_->MatchCsdWhitelistUrl(url)) {
226         VLOG(1) << "Skipping phishing classification for URL: " << url
227                 << " because it matches the csd whitelist";
228         phishing_reason = NO_CLASSIFY_MATCH_CSD_WHITELIST;
229       }
230       if (database_manager_->IsMalwareKillSwitchOn()) {
231         malware_reason = NO_CLASSIFY_KILLSWITCH;
232       }
233     }
234     BrowserThread::PostTask(
235         BrowserThread::UI,
236         FROM_HERE,
237         base::Bind(&ShouldClassifyUrlRequest::CheckCache,
238                    this,
239                    phishing_reason,
240                    malware_reason));
241   }
242
243   void CheckCache(PreClassificationCheckFailures phishing_reason,
244                   PreClassificationCheckFailures malware_reason) {
245     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
246     if (phishing_reason != NO_CLASSIFY_MAX)
247       DontClassifyForPhishing(phishing_reason);
248     if (malware_reason != NO_CLASSIFY_MAX)
249       DontClassifyForMalware(malware_reason);
250     if (!ShouldClassifyForMalware() && !ShouldClassifyForPhishing()) {
251       return;  // No point in doing anything else.
252     }
253     // If result is cached, we don't want to run classification again.
254     // In that case we're just trying to show the warning.
255     bool is_phishing;
256     if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {
257       VLOG(1) << "Satisfying request for " << params_.url << " from cache";
258       UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.RequestSatisfiedFromCache", 1);
259       // Since we are already on the UI thread, this is safe.
260       host_->MaybeShowPhishingWarning(params_.url, is_phishing);
261       DontClassifyForPhishing(NO_CLASSIFY_RESULT_FROM_CACHE);
262     }
263
264     // We want to limit the number of requests, though we will ignore the
265     // limit for urls in the cache.  We don't want to start classifying
266     // too many pages as phishing, but for those that we already think are
267     // phishing we want to send a request to the server to give ourselves
268     // a chance to fix misclassifications.
269     if (csd_service_->IsInCache(params_.url)) {
270       VLOG(1) << "Reporting limit skipped for " << params_.url
271               << " as it was in the cache.";
272       UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.ReportLimitSkipped", 1);
273     } else if (csd_service_->OverPhishingReportLimit()) {
274       VLOG(1) << "Too many report phishing requests sent recently, "
275               << "not running classification for " << params_.url;
276       DontClassifyForPhishing(NO_CLASSIFY_TOO_MANY_REPORTS);
277     }
278     if (csd_service_->OverMalwareReportLimit()) {
279       DontClassifyForMalware(NO_CLASSIFY_TOO_MANY_REPORTS);
280     }
281
282     // Everything checks out, so start classification.
283     // |web_contents_| is safe to call as we will be destructed
284     // before it is.
285     if (ShouldClassifyForPhishing()) {
286       start_phishing_classification_cb_.Run(true);
287       // Reset the callback to make sure ShouldClassifyForPhishing()
288       // returns false.
289       start_phishing_classification_cb_.Reset();
290     }
291     if (ShouldClassifyForMalware()) {
292       start_malware_classification_cb_.Run(true);
293       // Reset the callback to make sure ShouldClassifyForMalware()
294       // returns false.
295       start_malware_classification_cb_.Reset();
296     }
297   }
298
299   content::FrameNavigateParams params_;
300   WebContents* web_contents_;
301   ClientSideDetectionService* csd_service_;
302   // We keep a ref pointer here just to make sure the safe browsing
303   // database manager stays alive long enough.
304   scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;
305   ClientSideDetectionHost* host_;
306
307   ShouldClassifyUrlCallback start_phishing_classification_cb_;
308   ShouldClassifyUrlCallback start_malware_classification_cb_;
309
310   DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);
311 };
312
313 // static
314 ClientSideDetectionHost* ClientSideDetectionHost::Create(
315     WebContents* tab) {
316   return new ClientSideDetectionHost(tab);
317 }
318
319 ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)
320     : content::WebContentsObserver(tab),
321       csd_service_(NULL),
322       classification_request_(NULL),
323       should_extract_malware_features_(true),
324       should_classify_for_malware_(false),
325       pageload_complete_(false),
326       weak_factory_(this),
327       unsafe_unique_page_id_(-1) {
328   DCHECK(tab);
329   // Note: csd_service_ and sb_service will be NULL here in testing.
330   csd_service_ = g_browser_process->safe_browsing_detection_service();
331   feature_extractor_.reset(new BrowserFeatureExtractor(tab, this));
332   registrar_.Add(this, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED,
333                  content::Source<WebContents>(tab));
334
335   scoped_refptr<SafeBrowsingService> sb_service =
336       g_browser_process->safe_browsing_service();
337   if (sb_service.get()) {
338     ui_manager_ = sb_service->ui_manager();
339     database_manager_ = sb_service->database_manager();
340     ui_manager_->AddObserver(this);
341   }
342 }
343
344 ClientSideDetectionHost::~ClientSideDetectionHost() {
345   if (ui_manager_.get())
346     ui_manager_->RemoveObserver(this);
347 }
348
349 bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {
350   bool handled = true;
351   IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)
352     IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_PhishingDetectionDone,
353                         OnPhishingDetectionDone)
354     IPC_MESSAGE_UNHANDLED(handled = false)
355   IPC_END_MESSAGE_MAP()
356   return handled;
357 }
358
359 void ClientSideDetectionHost::DidNavigateMainFrame(
360     const content::LoadCommittedDetails& details,
361     const content::FrameNavigateParams& params) {
362   // TODO(noelutz): move this DCHECK to WebContents and fix all the unit tests
363   // that don't call this method on the UI thread.
364   // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
365   if (details.is_in_page) {
366     // If the navigation is within the same page, the user isn't really
367     // navigating away.  We don't need to cancel a pending callback or
368     // begin a new classification.
369     return;
370   }
371   // Cancel any pending classification request.
372   if (classification_request_.get()) {
373     classification_request_->Cancel();
374   }
375   // If we navigate away and there currently is a pending phishing
376   // report request we have to cancel it to make sure we don't display
377   // an interstitial for the wrong page.  Note that this won't cancel
378   // the server ping back but only cancel the showing of the
379   // interstial.
380   weak_factory_.InvalidateWeakPtrs();
381
382   if (!csd_service_) {
383     return;
384   }
385   browse_info_.reset(new BrowseInfo);
386
387   // Store redirect chain information.
388   if (params.url.host() != cur_host_) {
389     cur_host_ = params.url.host();
390     cur_host_redirects_ = params.redirects;
391   }
392   browse_info_->url = params.url;
393   browse_info_->host_redirects = cur_host_redirects_;
394   browse_info_->url_redirects = params.redirects;
395   browse_info_->referrer = params.referrer.url;
396   browse_info_->http_status_code = details.http_status_code;
397   browse_info_->page_id = params.page_id;
398
399   should_extract_malware_features_ = true;
400   should_classify_for_malware_ = false;
401   pageload_complete_ = false;
402
403   // Check whether we can cassify the current URL for phishing or malware.
404   classification_request_ = new ShouldClassifyUrlRequest(
405       params,
406       base::Bind(&ClientSideDetectionHost::OnPhishingPreClassificationDone,
407                  weak_factory_.GetWeakPtr()),
408       base::Bind(&ClientSideDetectionHost::OnMalwarePreClassificationDone,
409                  weak_factory_.GetWeakPtr()),
410       web_contents(), csd_service_, database_manager_.get(), this);
411   classification_request_->Start();
412 }
413
414 void ClientSideDetectionHost::OnSafeBrowsingHit(
415     const SafeBrowsingUIManager::UnsafeResource& resource) {
416   if (!web_contents() || !web_contents()->GetController().GetActiveEntry())
417     return;
418
419   // Check that the hit is either malware or phishing.
420   if (resource.threat_type != SB_THREAT_TYPE_URL_PHISHING &&
421       resource.threat_type != SB_THREAT_TYPE_URL_MALWARE)
422     return;
423
424   // Check that this notification is really for us.
425   content::RenderViewHost* hit_rvh = content::RenderViewHost::FromID(
426       resource.render_process_host_id, resource.render_view_id);
427   if (!hit_rvh ||
428       web_contents() != content::WebContents::FromRenderViewHost(hit_rvh))
429     return;
430
431   // Store the unique page ID for later.
432   unsafe_unique_page_id_ =
433       web_contents()->GetController().GetActiveEntry()->GetUniqueID();
434
435   // We also keep the resource around in order to be able to send the
436   // malicious URL to the server.
437   unsafe_resource_.reset(new SafeBrowsingUIManager::UnsafeResource(resource));
438   unsafe_resource_->callback.Reset();  // Don't do anything stupid.
439 }
440
441 void ClientSideDetectionHost::OnSafeBrowsingMatch(
442     const SafeBrowsingUIManager::UnsafeResource& resource) {
443   if (!web_contents() || !web_contents()->GetController().GetActiveEntry())
444     return;
445
446   // Check that this notification is really for us.
447   content::RenderViewHost* hit_rvh = content::RenderViewHost::FromID(
448       resource.render_process_host_id, resource.render_view_id);
449   if (!hit_rvh ||
450       web_contents() != content::WebContents::FromRenderViewHost(hit_rvh))
451     return;
452
453   web_contents()->GetController().GetActiveEntry()->SetExtraData(
454       kSafeBrowsingMatchKey, base::ASCIIToUTF16("1"));
455 }
456
457 scoped_refptr<SafeBrowsingDatabaseManager>
458 ClientSideDetectionHost::database_manager() {
459   return database_manager_;
460 }
461
462 bool ClientSideDetectionHost::DidPageReceiveSafeBrowsingMatch() const {
463   if (!web_contents() || !web_contents()->GetController().GetVisibleEntry())
464     return false;
465
466   // If an interstitial page is showing, GetVisibleEntry will return the
467   // transient NavigationEntry for the interstitial. The transient entry
468   // will not have the flag set, so use the pending entry instead if there
469   // is one.
470   NavigationEntry* entry = web_contents()->GetController().GetPendingEntry();
471   if (!entry) {
472     entry = web_contents()->GetController().GetVisibleEntry();
473     if (entry->GetPageType() == content::PAGE_TYPE_INTERSTITIAL)
474       entry = web_contents()->GetController().GetLastCommittedEntry();
475     if (!entry)
476       return false;
477   }
478
479   base::string16 value;
480   return entry->GetExtraData(kSafeBrowsingMatchKey, &value);
481 }
482
483 void ClientSideDetectionHost::WebContentsDestroyed() {
484   // Tell any pending classification request that it is being canceled.
485   if (classification_request_.get()) {
486     classification_request_->Cancel();
487   }
488   // Cancel all pending feature extractions.
489   feature_extractor_.reset();
490 }
491
492 void ClientSideDetectionHost::OnPhishingPreClassificationDone(
493     bool should_classify) {
494   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
495   if (browse_info_.get() && should_classify) {
496     VLOG(1) << "Instruct renderer to start phishing detection for URL: "
497             << browse_info_->url;
498     content::RenderViewHost* rvh = web_contents()->GetRenderViewHost();
499     rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(
500         rvh->GetRoutingID(), browse_info_->url));
501   }
502 }
503
504 void ClientSideDetectionHost::OnMalwarePreClassificationDone(
505     bool should_classify) {
506   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
507   // If classification checks failed we should stop extracting malware features.
508   DVLOG(2) << "Malware pre-classification checks done. Should classify: "
509            << should_classify;
510   should_extract_malware_features_ = should_classify;
511   should_classify_for_malware_ = should_classify;
512   MaybeStartMalwareFeatureExtraction();
513 }
514
515 void ClientSideDetectionHost::DidStopLoading(content::RenderViewHost* rvh) {
516   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
517   if (!csd_service_ || !browse_info_.get())
518     return;
519   DVLOG(2) << "Page finished loading.";
520   pageload_complete_ = true;
521   MaybeStartMalwareFeatureExtraction();
522 }
523
524 void ClientSideDetectionHost::MaybeStartMalwareFeatureExtraction() {
525   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
526   if (csd_service_ && browse_info_.get() &&
527       should_classify_for_malware_ &&
528       pageload_complete_) {
529     scoped_ptr<ClientMalwareRequest> malware_request(
530         new ClientMalwareRequest);
531     // Start browser-side malware feature extraction.  Once we're done it will
532     // send the malware client verdict request.
533     malware_request->set_url(browse_info_->url.spec());
534     const GURL& referrer = browse_info_->referrer;
535     if (referrer.SchemeIs("http")) {  // Only send http urls.
536       malware_request->set_referrer_url(referrer.spec());
537     }
538     // This function doesn't expect browse_info_ to stay around after this
539     // function returns.
540     feature_extractor_->ExtractMalwareFeatures(
541         browse_info_.get(),
542         malware_request.release(),
543         base::Bind(&ClientSideDetectionHost::MalwareFeatureExtractionDone,
544                    weak_factory_.GetWeakPtr()));
545     should_classify_for_malware_ = false;
546   }
547 }
548
549 void ClientSideDetectionHost::OnPhishingDetectionDone(
550     const std::string& verdict_str) {
551   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
552   // There is something seriously wrong if there is no service class but
553   // this method is called.  The renderer should not start phishing detection
554   // if there isn't any service class in the browser.
555   DCHECK(csd_service_);
556   DCHECK(browse_info_.get());
557
558   // We parse the protocol buffer here.  If we're unable to parse it we won't
559   // send the verdict further.
560   scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);
561   if (csd_service_ &&
562       browse_info_.get() &&
563       verdict->ParseFromString(verdict_str) &&
564       verdict->IsInitialized()) {
565     // We only send phishing verdict to the server if the verdict is phishing or
566     // if a SafeBrowsing interstitial was already shown for this site.  E.g., a
567     // malware or phishing interstitial was shown but the user clicked
568     // through.
569     if (verdict->is_phishing() || DidShowSBInterstitial()) {
570       if (DidShowSBInterstitial()) {
571         browse_info_->unsafe_resource.reset(unsafe_resource_.release());
572       }
573       // Start browser-side feature extraction.  Once we're done it will send
574       // the client verdict request.
575       feature_extractor_->ExtractFeatures(
576           browse_info_.get(),
577           verdict.release(),
578           base::Bind(&ClientSideDetectionHost::FeatureExtractionDone,
579                      weak_factory_.GetWeakPtr()));
580     }
581   }
582 }
583
584 void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,
585                                                        bool is_phishing) {
586   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
587   DVLOG(2) << "Received server phishing verdict for URL:" << phishing_url
588            << " is_phishing:" << is_phishing;
589   if (is_phishing) {
590     DCHECK(web_contents());
591     if (ui_manager_.get()) {
592       SafeBrowsingUIManager::UnsafeResource resource;
593       resource.url = phishing_url;
594       resource.original_url = phishing_url;
595       resource.is_subresource = false;
596       resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL;
597       resource.render_process_host_id =
598           web_contents()->GetRenderProcessHost()->GetID();
599       resource.render_view_id =
600           web_contents()->GetRenderViewHost()->GetRoutingID();
601       if (!ui_manager_->IsWhitelisted(resource)) {
602         // We need to stop any pending navigations, otherwise the interstital
603         // might not get created properly.
604         web_contents()->GetController().DiscardNonCommittedEntries();
605       }
606       ui_manager_->DisplayBlockingPage(resource);
607     }
608     // If there is true phishing verdict, invalidate weakptr so that no longer
609     // consider the malware vedict.
610     weak_factory_.InvalidateWeakPtrs();
611   }
612 }
613
614 void ClientSideDetectionHost::MaybeShowMalwareWarning(GURL original_url,
615                                                       GURL malware_url,
616                                                       bool is_malware) {
617   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
618   DVLOG(2) << "Received server malawre IP verdict for URL:" << malware_url
619            << " is_malware:" << is_malware;
620   if (is_malware && malware_url.is_valid() && original_url.is_valid()) {
621     DCHECK(web_contents());
622     if (ui_manager_.get()) {
623       SafeBrowsingUIManager::UnsafeResource resource;
624       resource.url = malware_url;
625       resource.original_url = original_url;
626       resource.is_subresource = (malware_url.host() != original_url.host());
627       resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL;
628       resource.render_process_host_id =
629           web_contents()->GetRenderProcessHost()->GetID();
630       resource.render_view_id =
631           web_contents()->GetRenderViewHost()->GetRoutingID();
632       if (!ui_manager_->IsWhitelisted(resource)) {
633         // We need to stop any pending navigations, otherwise the interstital
634         // might not get created properly.
635         web_contents()->GetController().DiscardNonCommittedEntries();
636       }
637       ui_manager_->DisplayBlockingPage(resource);
638     }
639     // If there is true malware verdict, invalidate weakptr so that no longer
640     // consider the phishing vedict.
641     weak_factory_.InvalidateWeakPtrs();
642   }
643 }
644
645 void ClientSideDetectionHost::FeatureExtractionDone(
646     bool success,
647     scoped_ptr<ClientPhishingRequest> request) {
648   DCHECK(request);
649   DVLOG(2) << "Feature extraction done (success:" << success << ") for URL: "
650            << request->url() << ". Start sending client phishing request.";
651   ClientSideDetectionService::ClientReportPhishingRequestCallback callback;
652   // If the client-side verdict isn't phishing we don't care about the server
653   // response because we aren't going to display a warning.
654   if (request->is_phishing()) {
655     callback = base::Bind(&ClientSideDetectionHost::MaybeShowPhishingWarning,
656                           weak_factory_.GetWeakPtr());
657   }
658   // Send ping even if the browser feature extraction failed.
659   csd_service_->SendClientReportPhishingRequest(
660       request.release(),  // The service takes ownership of the request object.
661       callback);
662 }
663
664 void ClientSideDetectionHost::MalwareFeatureExtractionDone(
665     bool feature_extraction_success,
666     scoped_ptr<ClientMalwareRequest> request) {
667   DCHECK(request.get());
668   DVLOG(2) << "Malware Feature extraction done for URL: " << request->url()
669            << ", with badip url count:" << request->bad_ip_url_info_size();
670
671   // Send ping if there is matching features.
672   if (feature_extraction_success && request->bad_ip_url_info_size() > 0) {
673     VLOG(1) << "Start sending client malware request.";
674     ClientSideDetectionService::ClientReportMalwareRequestCallback callback;
675     callback = base::Bind(&ClientSideDetectionHost::MaybeShowMalwareWarning,
676                           weak_factory_.GetWeakPtr());
677     csd_service_->SendClientReportMalwareRequest(request.release(), callback);
678   }
679 }
680
681 void ClientSideDetectionHost::UpdateIPUrlMap(const std::string& ip,
682                                              const std::string& url,
683                                              const std::string& method,
684                                              const std::string& referrer,
685                                              const ResourceType resource_type) {
686   if (ip.empty() || url.empty())
687     return;
688
689   IPUrlMap::iterator it = browse_info_->ips.find(ip);
690   if (it == browse_info_->ips.end()) {
691     if (browse_info_->ips.size() < kMaxIPsPerBrowse) {
692       std::vector<IPUrlInfo> url_infos;
693       url_infos.push_back(IPUrlInfo(url, method, referrer, resource_type));
694       browse_info_->ips.insert(make_pair(ip, url_infos));
695     }
696   } else if (it->second.size() < kMaxUrlsPerIP) {
697     it->second.push_back(IPUrlInfo(url, method, referrer, resource_type));
698   }
699 }
700
701 void ClientSideDetectionHost::Observe(
702     int type,
703     const content::NotificationSource& source,
704     const content::NotificationDetails& details) {
705   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
706   DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED);
707   const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>(
708       details).ptr();
709   if (req && browse_info_.get() &&
710       should_extract_malware_features_ && req->url.is_valid()) {
711     UpdateIPUrlMap(req->socket_address.host() /* ip */,
712                    req->url.spec()  /* url */,
713                    req->method,
714                    req->referrer,
715                    req->resource_type);
716   }
717 }
718
719 bool ClientSideDetectionHost::DidShowSBInterstitial() const {
720   if (unsafe_unique_page_id_ <= 0 || !web_contents()) {
721     return false;
722   }
723   const NavigationEntry* nav_entry =
724       web_contents()->GetController().GetActiveEntry();
725   return (nav_entry && nav_entry->GetUniqueID() == unsafe_unique_page_id_);
726 }
727
728 void ClientSideDetectionHost::set_client_side_detection_service(
729     ClientSideDetectionService* service) {
730   csd_service_ = service;
731 }
732
733 void ClientSideDetectionHost::set_safe_browsing_managers(
734     SafeBrowsingUIManager* ui_manager,
735     SafeBrowsingDatabaseManager* database_manager) {
736   if (ui_manager_.get())
737     ui_manager_->RemoveObserver(this);
738
739   ui_manager_ = ui_manager;
740   if (ui_manager)
741     ui_manager_->AddObserver(this);
742
743   database_manager_ = database_manager;
744 }
745
746 }  // namespace safe_browsing