1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/safe_browsing/client_side_detection_host.h"
9 #include "base/logging.h"
10 #include "base/memory/ref_counted.h"
11 #include "base/memory/scoped_ptr.h"
12 #include "base/metrics/histogram.h"
13 #include "base/prefs/pref_service.h"
14 #include "base/sequenced_task_runner_helpers.h"
15 #include "chrome/browser/browser_process.h"
16 #include "chrome/browser/profiles/profile.h"
17 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"
18 #include "chrome/browser/safe_browsing/client_side_detection_service.h"
19 #include "chrome/browser/safe_browsing/database_manager.h"
20 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
21 #include "chrome/common/chrome_switches.h"
22 #include "chrome/common/chrome_version_info.h"
23 #include "chrome/common/pref_names.h"
24 #include "chrome/common/safe_browsing/csd.pb.h"
25 #include "chrome/common/safe_browsing/safebrowsing_messages.h"
26 #include "content/public/browser/browser_thread.h"
27 #include "content/public/browser/navigation_controller.h"
28 #include "content/public/browser/navigation_details.h"
29 #include "content/public/browser/navigation_entry.h"
30 #include "content/public/browser/notification_details.h"
31 #include "content/public/browser/notification_source.h"
32 #include "content/public/browser/notification_types.h"
33 #include "content/public/browser/render_process_host.h"
34 #include "content/public/browser/render_view_host.h"
35 #include "content/public/browser/resource_request_details.h"
36 #include "content/public/browser/web_contents.h"
37 #include "content/public/common/frame_navigate_params.h"
40 using content::BrowserThread;
41 using content::NavigationEntry;
42 using content::ResourceRequestDetails;
43 using content::WebContents;
45 namespace safe_browsing {
47 const int ClientSideDetectionHost::kMaxUrlsPerIP = 20;
48 const int ClientSideDetectionHost::kMaxIPsPerBrowse = 200;
52 void EmptyUrlCheckCallback(bool processed) {
53 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
58 // This class is instantiated each time a new toplevel URL loads, and
59 // asynchronously checks whether the phishing classifier should run for this
60 // URL. If so, it notifies the renderer with a StartPhishingDetection IPC.
61 // Objects of this class are ref-counted and will be destroyed once nobody
62 // uses it anymore. If |web_contents|, |csd_service| or |host| go away you need
63 // to call Cancel(). We keep the |database_manager| alive in a ref pointer for
64 // as long as it takes.
65 class ClientSideDetectionHost::ShouldClassifyUrlRequest
66 : public base::RefCountedThreadSafe<
67 ClientSideDetectionHost::ShouldClassifyUrlRequest> {
69 ShouldClassifyUrlRequest(const content::FrameNavigateParams& params,
70 WebContents* web_contents,
71 ClientSideDetectionService* csd_service,
72 SafeBrowsingDatabaseManager* database_manager,
73 ClientSideDetectionHost* host)
76 web_contents_(web_contents),
77 csd_service_(csd_service),
78 database_manager_(database_manager),
80 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
81 DCHECK(web_contents_);
83 DCHECK(database_manager_.get());
88 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
90 // We start by doing some simple checks that can run on the UI thread.
91 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1);
93 // Only classify [X]HTML documents.
94 if (params_.contents_mime_type != "text/html" &&
95 params_.contents_mime_type != "application/xhtml+xml") {
96 VLOG(1) << "Skipping phishing classification for URL: " << params_.url
97 << " because it has an unsupported MIME type: "
98 << params_.contents_mime_type;
99 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
100 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
105 if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {
106 VLOG(1) << "Skipping phishing classification for URL: " << params_.url
107 << " because of hosting on private IP: "
108 << params_.socket_address.host();
109 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
110 NO_CLASSIFY_PRIVATE_IP,
115 // Don't run the phishing classifier if the tab is incognito.
116 if (web_contents_->GetBrowserContext()->IsOffTheRecord()) {
117 VLOG(1) << "Skipping phishing classification for URL: " << params_.url
118 << " because we're browsing incognito.";
119 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
120 NO_CLASSIFY_OFF_THE_RECORD,
126 // We lookup the csd-whitelist before we lookup the cache because
127 // a URL may have recently been whitelisted. If the URL matches
128 // the csd-whitelist we won't start classification. The
129 // csd-whitelist check has to be done on the IO thread because it
130 // uses the SafeBrowsing service class.
131 BrowserThread::PostTask(
134 base::Bind(&ShouldClassifyUrlRequest::CheckCsdWhitelist,
140 // Just to make sure we don't do anything stupid we reset all these
141 // pointers except for the safebrowsing service class which may be
142 // accessed by CheckCsdWhitelist().
143 web_contents_ = NULL;
149 friend class base::RefCountedThreadSafe<
150 ClientSideDetectionHost::ShouldClassifyUrlRequest>;
152 // Enum used to keep stats about why the pre-classification check failed.
153 enum PreClassificationCheckFailures {
154 OBSOLETE_NO_CLASSIFY_PROXY_FETCH,
155 NO_CLASSIFY_PRIVATE_IP,
156 NO_CLASSIFY_OFF_THE_RECORD,
157 NO_CLASSIFY_MATCH_CSD_WHITELIST,
158 NO_CLASSIFY_TOO_MANY_REPORTS,
159 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
161 NO_CLASSIFY_MAX // Always add new values before this one.
164 // The destructor can be called either from the UI or the IO thread.
165 virtual ~ShouldClassifyUrlRequest() { }
167 void CheckCsdWhitelist(const GURL& url) {
168 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
169 if (!database_manager_.get() ||
170 database_manager_->MatchCsdWhitelistUrl(url)) {
171 // We're done. There is no point in going back to the UI thread.
172 VLOG(1) << "Skipping phishing classification for URL: " << url
173 << " because it matches the csd whitelist";
174 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
175 NO_CLASSIFY_MATCH_CSD_WHITELIST,
180 bool malware_killswitch_on = database_manager_->IsMalwareKillSwitchOn();
182 BrowserThread::PostTask(
185 base::Bind(&ShouldClassifyUrlRequest::CheckCache, this,
186 malware_killswitch_on));
189 void CheckCache(bool malware_killswitch_on) {
190 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
195 host_->SetMalwareKillSwitch(malware_killswitch_on);
196 // If result is cached, we don't want to run classification again
198 if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {
199 VLOG(1) << "Satisfying request for " << params_.url << " from cache";
200 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);
201 // Since we are already on the UI thread, this is safe.
202 host_->MaybeShowPhishingWarning(params_.url, is_phishing);
206 // We want to limit the number of requests, though we will ignore the
207 // limit for urls in the cache. We don't want to start classifying
208 // too many pages as phishing, but for those that we already think are
209 // phishing we want to give ourselves a chance to fix false positives.
210 if (csd_service_->IsInCache(params_.url)) {
211 VLOG(1) << "Reporting limit skipped for " << params_.url
212 << " as it was in the cache.";
213 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ReportLimitSkipped", 1);
214 } else if (csd_service_->OverPhishingReportLimit()) {
215 VLOG(1) << "Too many report phishing requests sent recently, "
216 << "not running classification for " << params_.url;
217 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
218 NO_CLASSIFY_TOO_MANY_REPORTS,
223 // Everything checks out, so start classification.
224 // |web_contents_| is safe to call as we will be destructed
226 VLOG(1) << "Instruct renderer to start phishing detection for URL: "
228 content::RenderViewHost* rvh = web_contents_->GetRenderViewHost();
229 rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(
230 rvh->GetRoutingID(), params_.url));
233 // No need to protect |canceled_| with a lock because it is only read and
234 // written by the UI thread.
236 content::FrameNavigateParams params_;
237 WebContents* web_contents_;
238 ClientSideDetectionService* csd_service_;
239 // We keep a ref pointer here just to make sure the safe browsing
240 // database manager stays alive long enough.
241 scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;
242 ClientSideDetectionHost* host_;
244 DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);
248 ClientSideDetectionHost* ClientSideDetectionHost::Create(
250 return new ClientSideDetectionHost(tab);
253 ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)
254 : content::WebContentsObserver(tab),
257 unsafe_unique_page_id_(-1),
258 malware_killswitch_on_(false),
259 malware_report_enabled_(false) {
261 // Note: csd_service_ and sb_service will be NULL here in testing.
262 csd_service_ = g_browser_process->safe_browsing_detection_service();
263 feature_extractor_.reset(new BrowserFeatureExtractor(tab, csd_service_));
264 registrar_.Add(this, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED,
265 content::Source<WebContents>(tab));
267 scoped_refptr<SafeBrowsingService> sb_service =
268 g_browser_process->safe_browsing_service();
269 if (sb_service.get()) {
270 ui_manager_ = sb_service->ui_manager();
271 database_manager_ = sb_service->database_manager();
272 ui_manager_->AddObserver(this);
275 // Only enable the malware bad IP matching and report feature for canary
277 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();
278 malware_report_enabled_ = (
279 channel == chrome::VersionInfo::CHANNEL_DEV ||
280 channel == chrome::VersionInfo::CHANNEL_CANARY);
283 ClientSideDetectionHost::~ClientSideDetectionHost() {
284 if (ui_manager_.get())
285 ui_manager_->RemoveObserver(this);
288 bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {
290 IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)
291 IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_PhishingDetectionDone,
292 OnPhishingDetectionDone)
293 IPC_MESSAGE_UNHANDLED(handled = false)
294 IPC_END_MESSAGE_MAP()
298 void ClientSideDetectionHost::DidNavigateMainFrame(
299 const content::LoadCommittedDetails& details,
300 const content::FrameNavigateParams& params) {
301 // TODO(noelutz): move this DCHECK to WebContents and fix all the unit tests
302 // that don't call this method on the UI thread.
303 // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
304 if (details.is_in_page) {
305 // If the navigation is within the same page, the user isn't really
306 // navigating away. We don't need to cancel a pending callback or
307 // begin a new classification.
310 // If we navigate away and there currently is a pending phishing
311 // report request we have to cancel it to make sure we don't display
312 // an interstitial for the wrong page. Note that this won't cancel
313 // the server ping back but only cancel the showing of the
315 weak_factory_.InvalidateWeakPtrs();
321 // Cancel any pending classification request.
322 if (classification_request_.get()) {
323 classification_request_->Cancel();
325 browse_info_.reset(new BrowseInfo);
327 // Store redirect chain information.
328 if (params.url.host() != cur_host_) {
329 cur_host_ = params.url.host();
330 cur_host_redirects_ = params.redirects;
332 browse_info_->host_redirects = cur_host_redirects_;
333 browse_info_->url_redirects = params.redirects;
334 browse_info_->http_status_code = details.http_status_code;
336 // Notify the renderer if it should classify this URL.
337 classification_request_ = new ShouldClassifyUrlRequest(
338 params, web_contents(), csd_service_, database_manager_.get(), this);
339 classification_request_->Start();
342 void ClientSideDetectionHost::OnSafeBrowsingHit(
343 const SafeBrowsingUIManager::UnsafeResource& resource) {
344 // Check that this notification is really for us and that it corresponds to
345 // either a malware or phishing hit. In this case we store the unique page
347 if (web_contents() &&
348 web_contents()->GetRenderProcessHost()->GetID() ==
349 resource.render_process_host_id &&
350 web_contents()->GetRenderViewHost()->GetRoutingID() ==
351 resource.render_view_id &&
352 (resource.threat_type == SB_THREAT_TYPE_URL_PHISHING ||
353 resource.threat_type == SB_THREAT_TYPE_URL_MALWARE) &&
354 web_contents()->GetController().GetActiveEntry()) {
355 unsafe_unique_page_id_ =
356 web_contents()->GetController().GetActiveEntry()->GetUniqueID();
357 // We also keep the resource around in order to be able to send the
358 // malicious URL to the server.
359 unsafe_resource_.reset(new SafeBrowsingUIManager::UnsafeResource(resource));
360 unsafe_resource_->callback.Reset(); // Don't do anything stupid.
364 void ClientSideDetectionHost::WebContentsDestroyed(WebContents* tab) {
366 // Tell any pending classification request that it is being canceled.
367 if (classification_request_.get()) {
368 classification_request_->Cancel();
370 // Cancel all pending feature extractions.
371 feature_extractor_.reset();
374 void ClientSideDetectionHost::OnPhishingDetectionDone(
375 const std::string& verdict_str) {
376 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
377 // There is something seriously wrong if there is no service class but
378 // this method is called. The renderer should not start phishing detection
379 // if there isn't any service class in the browser.
380 DCHECK(csd_service_);
381 // There shouldn't be any pending requests because we revoke them everytime
383 DCHECK(!weak_factory_.HasWeakPtrs());
384 DCHECK(browse_info_.get());
386 // We parse the protocol buffer here. If we're unable to parse it we won't
387 // send the verdict further.
388 scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);
390 !weak_factory_.HasWeakPtrs() &&
391 browse_info_.get() &&
392 verdict->ParseFromString(verdict_str) &&
393 verdict->IsInitialized()) {
394 // We do the malware IP matching and request sending if the feature
396 if (malware_report_enabled_ && !MalwareKillSwitchIsOn()) {
397 scoped_ptr<ClientMalwareRequest> malware_verdict(
398 new ClientMalwareRequest);
399 // Start browser-side malware feature extraction. Once we're done it will
400 // send the malware client verdict request.
401 malware_verdict->set_url(verdict->url());
402 feature_extractor_->ExtractMalwareFeatures(
403 browse_info_.get(), malware_verdict.get());
404 MalwareFeatureExtractionDone(malware_verdict.Pass());
407 // We only send phishing verdict to the server if the verdict is phishing or
408 // if a SafeBrowsing interstitial was already shown for this site. E.g., a
409 // malware or phishing interstitial was shown but the user clicked
411 if (verdict->is_phishing() || DidShowSBInterstitial()) {
412 if (DidShowSBInterstitial()) {
413 browse_info_->unsafe_resource.reset(unsafe_resource_.release());
415 // Start browser-side feature extraction. Once we're done it will send
416 // the client verdict request.
417 feature_extractor_->ExtractFeatures(
420 base::Bind(&ClientSideDetectionHost::FeatureExtractionDone,
421 weak_factory_.GetWeakPtr()));
424 browse_info_.reset();
427 void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,
429 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
430 VLOG(2) << "Received server phishing verdict for URL:" << phishing_url
431 << " is_phishing:" << is_phishing;
433 DCHECK(web_contents());
434 if (ui_manager_.get()) {
435 SafeBrowsingUIManager::UnsafeResource resource;
436 resource.url = phishing_url;
437 resource.original_url = phishing_url;
438 resource.is_subresource = false;
439 resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL;
440 resource.render_process_host_id =
441 web_contents()->GetRenderProcessHost()->GetID();
442 resource.render_view_id =
443 web_contents()->GetRenderViewHost()->GetRoutingID();
444 if (!ui_manager_->IsWhitelisted(resource)) {
445 // We need to stop any pending navigations, otherwise the interstital
446 // might not get created properly.
447 web_contents()->GetController().DiscardNonCommittedEntries();
448 resource.callback = base::Bind(&EmptyUrlCheckCallback);
449 ui_manager_->DoDisplayBlockingPage(resource);
452 // If there is true phishing verdict, invalidate weakptr so that no longer
453 // consider the malware vedict.
454 weak_factory_.InvalidateWeakPtrs();
458 void ClientSideDetectionHost::MaybeShowMalwareWarning(GURL original_url,
461 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
462 VLOG(2) << "Received server malawre IP verdict for URL:" << malware_url
463 << " is_malware:" << is_malware;
464 if (is_malware && malware_url.is_valid() && original_url.is_valid()) {
465 DCHECK(web_contents());
466 if (ui_manager_.get()) {
467 SafeBrowsingUIManager::UnsafeResource resource;
468 resource.url = malware_url;
469 resource.original_url = original_url;
470 resource.is_subresource = (malware_url.host() != original_url.host());
471 resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL;
472 resource.render_process_host_id =
473 web_contents()->GetRenderProcessHost()->GetID();
474 resource.render_view_id =
475 web_contents()->GetRenderViewHost()->GetRoutingID();
476 if (!ui_manager_->IsWhitelisted(resource)) {
477 // We need to stop any pending navigations, otherwise the interstital
478 // might not get created properly.
479 web_contents()->GetController().DiscardNonCommittedEntries();
480 resource.callback = base::Bind(&EmptyUrlCheckCallback);
481 ui_manager_->DoDisplayBlockingPage(resource);
484 // If there is true malware verdict, invalidate weakptr so that no longer
485 // consider the phishing vedict.
486 weak_factory_.InvalidateWeakPtrs();
490 void ClientSideDetectionHost::FeatureExtractionDone(
492 ClientPhishingRequest* request) {
494 DLOG(FATAL) << "Invalid request object in FeatureExtractionDone";
497 VLOG(2) << "Feature extraction done (success:" << success << ") for URL: "
498 << request->url() << ". Start sending client phishing request.";
499 ClientSideDetectionService::ClientReportPhishingRequestCallback callback;
500 // If the client-side verdict isn't phishing we don't care about the server
501 // response because we aren't going to display a warning.
502 if (request->is_phishing()) {
503 callback = base::Bind(&ClientSideDetectionHost::MaybeShowPhishingWarning,
504 weak_factory_.GetWeakPtr());
506 // Send ping even if the browser feature extraction failed.
507 csd_service_->SendClientReportPhishingRequest(
508 request, // The service takes ownership of the request object.
512 void ClientSideDetectionHost::MalwareFeatureExtractionDone(
513 scoped_ptr<ClientMalwareRequest> request) {
515 DLOG(FATAL) << "Invalid request object in MalwareFeatureExtractionDone";
518 VLOG(2) << "Malware Feature extraction done for URL: " << request->url()
519 << ", with features count:" << request->feature_map_size();
521 // Send ping if there is matching features.
522 if (request->feature_map_size() > 0) {
523 VLOG(1) << "Start sending client malware request.";
524 ClientSideDetectionService::ClientReportMalwareRequestCallback callback;
525 callback = base::Bind(&ClientSideDetectionHost::MaybeShowMalwareWarning,
526 weak_factory_.GetWeakPtr());
527 csd_service_->SendClientReportMalwareRequest(
528 request.release(), // The service takes ownership of the request object
533 void ClientSideDetectionHost::UpdateIPUrlMap(const std::string& ip,
534 const std::string& url) {
535 if (ip.empty() || url.empty())
538 IPUrlMap::iterator it = browse_info_->ips.find(ip);
539 if (it == browse_info_->ips.end()) {
540 if (int(browse_info_->ips.size()) < kMaxIPsPerBrowse) {
541 std::set<std::string> urls;
543 browse_info_->ips.insert(make_pair(ip, urls));
545 } else if (int(it->second.size()) < kMaxUrlsPerIP) {
546 it->second.insert(url);
550 void ClientSideDetectionHost::Observe(
552 const content::NotificationSource& source,
553 const content::NotificationDetails& details) {
554 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
555 DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED);
556 const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>(
558 if (req && browse_info_.get() && malware_report_enabled_ &&
559 !MalwareKillSwitchIsOn()) {
560 if (req->url.is_valid()) {
561 UpdateIPUrlMap(req->socket_address.host() /* ip */,
562 req->url.spec() /* url */);
567 bool ClientSideDetectionHost::DidShowSBInterstitial() {
568 if (unsafe_unique_page_id_ <= 0 || !web_contents()) {
571 const NavigationEntry* nav_entry =
572 web_contents()->GetController().GetActiveEntry();
573 return (nav_entry && nav_entry->GetUniqueID() == unsafe_unique_page_id_);
576 void ClientSideDetectionHost::set_client_side_detection_service(
577 ClientSideDetectionService* service) {
578 csd_service_ = service;
581 void ClientSideDetectionHost::set_safe_browsing_managers(
582 SafeBrowsingUIManager* ui_manager,
583 SafeBrowsingDatabaseManager* database_manager) {
584 if (ui_manager_.get())
585 ui_manager_->RemoveObserver(this);
587 ui_manager_ = ui_manager;
589 ui_manager_->AddObserver(this);
591 database_manager_ = database_manager;
594 bool ClientSideDetectionHost::MalwareKillSwitchIsOn() {
595 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
596 return malware_killswitch_on_;
599 void ClientSideDetectionHost::SetMalwareKillSwitch(bool killswitch_on) {
600 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
601 malware_killswitch_on_ = killswitch_on;
604 } // namespace safe_browsing