Upstream version 9.38.198.0
[platform/framework/web/crosswalk.git] / src / chrome / browser / safe_browsing / browser_feature_extractor_unittest.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"
6
7 #include <map>
8 #include <string>
9 #include <vector>
10
11 #include "base/memory/scoped_ptr.h"
12 #include "base/message_loop/message_loop.h"
13 #include "base/strings/stringprintf.h"
14 #include "base/time/time.h"
15 #include "chrome/browser/history/history_backend.h"
16 #include "chrome/browser/history/history_service.h"
17 #include "chrome/browser/history/history_service_factory.h"
18 #include "chrome/browser/profiles/profile.h"
19 #include "chrome/browser/safe_browsing/browser_features.h"
20 #include "chrome/browser/safe_browsing/client_side_detection_host.h"
21 #include "chrome/browser/safe_browsing/database_manager.h"
22 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
23 #include "chrome/browser/safe_browsing/ui_manager.h"
24 #include "chrome/common/safe_browsing/csd.pb.h"
25 #include "chrome/test/base/chrome_render_view_host_test_harness.h"
26 #include "chrome/test/base/testing_profile.h"
27 #include "content/public/browser/navigation_controller.h"
28 #include "content/public/browser/web_contents.h"
29 #include "content/public/common/page_transition_types.h"
30 #include "content/public/common/referrer.h"
31 #include "content/public/test/test_browser_thread.h"
32 #include "content/public/test/web_contents_tester.h"
33 #include "testing/gmock/include/gmock/gmock.h"
34 #include "testing/gtest/include/gtest/gtest.h"
35 #include "url/gurl.h"
36
37 using content::BrowserThread;
38 using content::ResourceType;
39 using content::WebContentsTester;
40
41 using testing::DoAll;
42 using testing::Return;
43 using testing::StrictMock;
44
45 namespace safe_browsing {
46
47 namespace {
48
49 class MockSafeBrowsingDatabaseManager : public SafeBrowsingDatabaseManager {
50  public:
51   explicit MockSafeBrowsingDatabaseManager(
52       const scoped_refptr<SafeBrowsingService>& service)
53       : SafeBrowsingDatabaseManager(service) { }
54
55   MOCK_METHOD1(MatchMalwareIP, bool(const std::string& ip_address));
56
57  protected:
58   virtual ~MockSafeBrowsingDatabaseManager() {}
59
60  private:
61   DISALLOW_COPY_AND_ASSIGN(MockSafeBrowsingDatabaseManager);
62 };
63
64 class MockClientSideDetectionHost : public ClientSideDetectionHost {
65  public:
66   MockClientSideDetectionHost(
67       content::WebContents* tab,
68       SafeBrowsingDatabaseManager* database_manager)
69       : ClientSideDetectionHost(tab) {
70     set_safe_browsing_managers(NULL, database_manager);
71   }
72
73   virtual ~MockClientSideDetectionHost() {}
74
75   MOCK_METHOD1(IsBadIpAddress, bool(const std::string&));
76 };
77 }  // namespace
78
79 class BrowserFeatureExtractorTest : public ChromeRenderViewHostTestHarness {
80  protected:
81   virtual void SetUp() {
82     ChromeRenderViewHostTestHarness::SetUp();
83     ASSERT_TRUE(profile()->CreateHistoryService(
84         true /* delete_file */, false /* no_db */));
85
86     db_manager_ = new StrictMock<MockSafeBrowsingDatabaseManager>(
87         SafeBrowsingService::CreateSafeBrowsingService());
88     host_.reset(new StrictMock<MockClientSideDetectionHost>(
89         web_contents(), db_manager_.get()));
90     extractor_.reset(
91         new BrowserFeatureExtractor(web_contents(), host_.get()));
92     num_pending_ = 0;
93     browse_info_.reset(new BrowseInfo);
94   }
95
96   virtual void TearDown() {
97     extractor_.reset();
98     host_.reset();
99     db_manager_ = NULL;
100     profile()->DestroyHistoryService();
101     ChromeRenderViewHostTestHarness::TearDown();
102     ASSERT_EQ(0, num_pending_);
103   }
104
105   HistoryService* history_service() {
106     return HistoryServiceFactory::GetForProfile(profile(),
107                                                 Profile::EXPLICIT_ACCESS);
108   }
109
110   void SetRedirectChain(const std::vector<GURL>& redirect_chain,
111                         bool new_host) {
112     browse_info_->url_redirects = redirect_chain;
113     if (new_host) {
114       browse_info_->host_redirects = redirect_chain;
115     }
116   }
117
118   // Wrapper around NavigateAndCommit that also sets the redirect chain to
119   // a sane value.
120   void SimpleNavigateAndCommit(const GURL& url) {
121     std::vector<GURL> redirect_chain;
122     redirect_chain.push_back(url);
123     SetRedirectChain(redirect_chain, true);
124     NavigateAndCommit(url, GURL(), content::PAGE_TRANSITION_LINK);
125   }
126
127   // This is similar to NavigateAndCommit that is in WebContentsTester, but
128   // allows us to specify the referrer and page_transition_type.
129   void NavigateAndCommit(const GURL& url,
130                          const GURL& referrer,
131                          content::PageTransition type) {
132     web_contents()->GetController().LoadURL(
133         url, content::Referrer(referrer, blink::WebReferrerPolicyDefault),
134         type, std::string());
135
136     static int page_id = 0;
137     content::RenderFrameHost* rfh =
138         WebContentsTester::For(web_contents())->GetPendingMainFrame();
139     if (!rfh) {
140       rfh = web_contents()->GetMainFrame();
141     }
142     WebContentsTester::For(web_contents())->ProceedWithCrossSiteNavigation();
143     WebContentsTester::For(web_contents())->TestDidNavigateWithReferrer(
144         rfh, ++page_id, url,
145         content::Referrer(referrer, blink::WebReferrerPolicyDefault), type);
146   }
147
148   bool ExtractFeatures(ClientPhishingRequest* request) {
149     StartExtractFeatures(request);
150     base::MessageLoop::current()->Run();
151     EXPECT_EQ(1U, success_.count(request));
152     return success_.count(request) ? success_[request] : false;
153   }
154
155   void StartExtractFeatures(ClientPhishingRequest* request) {
156     success_.erase(request);
157     ++num_pending_;
158     extractor_->ExtractFeatures(
159         browse_info_.get(),
160         request,
161         base::Bind(&BrowserFeatureExtractorTest::ExtractFeaturesDone,
162                    base::Unretained(this)));
163   }
164
165   void GetFeatureMap(const ClientPhishingRequest& request,
166                      std::map<std::string, double>* features) {
167     for (int i = 0; i < request.non_model_feature_map_size(); ++i) {
168       const ClientPhishingRequest::Feature& feature =
169           request.non_model_feature_map(i);
170       EXPECT_EQ(0U, features->count(feature.name()));
171       (*features)[feature.name()] = feature.value();
172     }
173   }
174
175   void ExtractMalwareFeatures(ClientMalwareRequest* request) {
176     // Feature extraction takes ownership of the request object
177     // and passes it along to the done callback in the end.
178     StartExtractMalwareFeatures(request);
179     base::MessageLoopForUI::current()->Run();
180     EXPECT_EQ(1U, success_.count(request));
181     EXPECT_TRUE(success_[request]);
182   }
183
184   void StartExtractMalwareFeatures(ClientMalwareRequest* request) {
185     success_.erase(request);
186     ++num_pending_;
187     // We temporarily give up ownership of request to ExtractMalwareFeatures
188     // but we'll regain ownership of it in ExtractMalwareFeaturesDone.
189     extractor_->ExtractMalwareFeatures(
190         browse_info_.get(),
191         request,
192         base::Bind(&BrowserFeatureExtractorTest::ExtractMalwareFeaturesDone,
193                    base::Unretained(this)));
194   }
195
196   void GetMalwareUrls(
197       const ClientMalwareRequest& request,
198       std::map<std::string, std::set<std::string> >* urls) {
199     for (int i = 0; i < request.bad_ip_url_info_size(); ++i) {
200       const ClientMalwareRequest::UrlInfo& urlinfo =
201           request.bad_ip_url_info(i);
202       (*urls)[urlinfo.ip()].insert(urlinfo.url());
203     }
204   }
205
206   int num_pending_;  // Number of pending feature extractions.
207   scoped_ptr<BrowserFeatureExtractor> extractor_;
208   std::map<void*, bool> success_;
209   scoped_ptr<BrowseInfo> browse_info_;
210   scoped_ptr<StrictMock<MockClientSideDetectionHost> > host_;
211   scoped_refptr<StrictMock<MockSafeBrowsingDatabaseManager> > db_manager_;
212
213  private:
214   void ExtractFeaturesDone(bool success,
215                            scoped_ptr<ClientPhishingRequest> request) {
216     EXPECT_TRUE(BrowserThread::CurrentlyOn(BrowserThread::UI));
217     ASSERT_EQ(0U, success_.count(request.get()));
218     // The pointer doesn't really belong to us.  It belongs to
219     // the test case which passed it to ExtractFeatures above.
220     success_[request.release()] = success;
221     if (--num_pending_ == 0) {
222       base::MessageLoop::current()->Quit();
223     }
224   }
225
226   void ExtractMalwareFeaturesDone(
227       bool success,
228       scoped_ptr<ClientMalwareRequest> request) {
229     EXPECT_TRUE(BrowserThread::CurrentlyOn(BrowserThread::UI));
230     ASSERT_EQ(0U, success_.count(request.get()));
231     // The pointer doesn't really belong to us.  It belongs to
232     // the test case which passed it to ExtractMalwareFeatures above.
233     success_[request.release()] = success;
234     if (--num_pending_ == 0) {
235       base::MessageLoopForUI::current()->Quit();
236     }
237   }
238 };
239
240 TEST_F(BrowserFeatureExtractorTest, UrlNotInHistory) {
241   ClientPhishingRequest request;
242   SimpleNavigateAndCommit(GURL("http://www.google.com"));
243   request.set_url("http://www.google.com/");
244   request.set_client_score(0.5);
245   EXPECT_FALSE(ExtractFeatures(&request));
246 }
247
248 TEST_F(BrowserFeatureExtractorTest, RequestNotInitialized) {
249   ClientPhishingRequest request;
250   request.set_url("http://www.google.com/");
251   // Request is missing the score value.
252   SimpleNavigateAndCommit(GURL("http://www.google.com"));
253   EXPECT_FALSE(ExtractFeatures(&request));
254 }
255
256 TEST_F(BrowserFeatureExtractorTest, UrlInHistory) {
257   history_service()->AddPage(GURL("http://www.foo.com/bar.html"),
258                              base::Time::Now(),
259                              history::SOURCE_BROWSED);
260   history_service()->AddPage(GURL("https://www.foo.com/gaa.html"),
261                              base::Time::Now(),
262                              history::SOURCE_BROWSED);  // same host HTTPS.
263   history_service()->AddPage(GURL("http://www.foo.com/gaa.html"),
264                              base::Time::Now(),
265                              history::SOURCE_BROWSED);  // same host HTTP.
266   history_service()->AddPage(GURL("http://bar.foo.com/gaa.html"),
267                              base::Time::Now(),
268                              history::SOURCE_BROWSED);  // different host.
269   history_service()->AddPage(GURL("http://www.foo.com/bar.html?a=b"),
270                              base::Time::Now() - base::TimeDelta::FromHours(23),
271                              NULL, 0, GURL(), history::RedirectList(),
272                              content::PAGE_TRANSITION_LINK,
273                              history::SOURCE_BROWSED, false);
274   history_service()->AddPage(GURL("http://www.foo.com/bar.html"),
275                              base::Time::Now() - base::TimeDelta::FromHours(25),
276                              NULL, 0, GURL(), history::RedirectList(),
277                              content::PAGE_TRANSITION_TYPED,
278                              history::SOURCE_BROWSED, false);
279   history_service()->AddPage(GURL("https://www.foo.com/goo.html"),
280                              base::Time::Now() - base::TimeDelta::FromDays(5),
281                              NULL, 0, GURL(), history::RedirectList(),
282                              content::PAGE_TRANSITION_TYPED,
283                              history::SOURCE_BROWSED, false);
284
285   SimpleNavigateAndCommit(GURL("http://www.foo.com/bar.html"));
286
287   ClientPhishingRequest request;
288   request.set_url("http://www.foo.com/bar.html");
289   request.set_client_score(0.5);
290   EXPECT_TRUE(ExtractFeatures(&request));
291   std::map<std::string, double> features;
292   GetFeatureMap(request, &features);
293
294   EXPECT_EQ(12U, features.size());
295   EXPECT_DOUBLE_EQ(2.0, features[features::kUrlHistoryVisitCount]);
296   EXPECT_DOUBLE_EQ(1.0,
297                    features[features::kUrlHistoryVisitCountMoreThan24hAgo]);
298   EXPECT_DOUBLE_EQ(1.0, features[features::kUrlHistoryTypedCount]);
299   EXPECT_DOUBLE_EQ(1.0, features[features::kUrlHistoryLinkCount]);
300   EXPECT_DOUBLE_EQ(4.0, features[features::kHttpHostVisitCount]);
301   EXPECT_DOUBLE_EQ(2.0, features[features::kHttpsHostVisitCount]);
302   EXPECT_DOUBLE_EQ(1.0, features[features::kFirstHttpHostVisitMoreThan24hAgo]);
303   EXPECT_DOUBLE_EQ(1.0, features[features::kFirstHttpsHostVisitMoreThan24hAgo]);
304
305   request.Clear();
306   request.set_url("http://bar.foo.com/gaa.html");
307   request.set_client_score(0.5);
308   EXPECT_TRUE(ExtractFeatures(&request));
309   features.clear();
310   GetFeatureMap(request, &features);
311   // We have less features because we didn't Navigate to this page, so we don't
312   // have Referrer, IsFirstNavigation, HasSSLReferrer, etc.
313   EXPECT_EQ(7U, features.size());
314   EXPECT_DOUBLE_EQ(1.0, features[features::kUrlHistoryVisitCount]);
315   EXPECT_DOUBLE_EQ(0.0,
316                    features[features::kUrlHistoryVisitCountMoreThan24hAgo]);
317   EXPECT_DOUBLE_EQ(0.0, features[features::kUrlHistoryTypedCount]);
318   EXPECT_DOUBLE_EQ(1.0, features[features::kUrlHistoryLinkCount]);
319   EXPECT_DOUBLE_EQ(1.0, features[features::kHttpHostVisitCount]);
320   EXPECT_DOUBLE_EQ(0.0, features[features::kHttpsHostVisitCount]);
321   EXPECT_DOUBLE_EQ(0.0, features[features::kFirstHttpHostVisitMoreThan24hAgo]);
322   EXPECT_FALSE(features.count(features::kFirstHttpsHostVisitMoreThan24hAgo));
323 }
324
325 TEST_F(BrowserFeatureExtractorTest, MultipleRequestsAtOnce) {
326   history_service()->AddPage(GURL("http://www.foo.com/bar.html"),
327                              base::Time::Now(),
328                              history::SOURCE_BROWSED);
329   SimpleNavigateAndCommit(GURL("http:/www.foo.com/bar.html"));
330   ClientPhishingRequest request;
331   request.set_url("http://www.foo.com/bar.html");
332   request.set_client_score(0.5);
333   StartExtractFeatures(&request);
334
335   SimpleNavigateAndCommit(GURL("http://www.foo.com/goo.html"));
336   ClientPhishingRequest request2;
337   request2.set_url("http://www.foo.com/goo.html");
338   request2.set_client_score(1.0);
339   StartExtractFeatures(&request2);
340
341   base::MessageLoop::current()->Run();
342   EXPECT_TRUE(success_[&request]);
343   // Success is false because the second URL is not in the history and we are
344   // not able to distinguish between a missing URL in the history and an error.
345   EXPECT_FALSE(success_[&request2]);
346 }
347
348 TEST_F(BrowserFeatureExtractorTest, BrowseFeatures) {
349   history_service()->AddPage(GURL("http://www.foo.com/"),
350                              base::Time::Now(),
351                              history::SOURCE_BROWSED);
352   history_service()->AddPage(GURL("http://www.foo.com/page.html"),
353                              base::Time::Now(),
354                              history::SOURCE_BROWSED);
355   history_service()->AddPage(GURL("http://www.bar.com/"),
356                              base::Time::Now(),
357                              history::SOURCE_BROWSED);
358   history_service()->AddPage(GURL("http://www.bar.com/other_page.html"),
359                              base::Time::Now(),
360                              history::SOURCE_BROWSED);
361   history_service()->AddPage(GURL("http://www.baz.com/"),
362                              base::Time::Now(),
363                              history::SOURCE_BROWSED);
364
365   ClientPhishingRequest request;
366   request.set_url("http://www.foo.com/");
367   request.set_client_score(0.5);
368   std::vector<GURL> redirect_chain;
369   redirect_chain.push_back(GURL("http://somerandomwebsite.com/"));
370   redirect_chain.push_back(GURL("http://www.foo.com/"));
371   SetRedirectChain(redirect_chain, true);
372   browse_info_->http_status_code = 200;
373   NavigateAndCommit(GURL("http://www.foo.com/"),
374                     GURL("http://google.com/"),
375                     content::PageTransitionFromInt(
376                         content::PAGE_TRANSITION_AUTO_BOOKMARK |
377                         content::PAGE_TRANSITION_FORWARD_BACK));
378
379   EXPECT_TRUE(ExtractFeatures(&request));
380   std::map<std::string, double> features;
381   GetFeatureMap(request, &features);
382
383   EXPECT_EQ(1.0,
384             features[base::StringPrintf("%s=%s",
385                                         features::kReferrer,
386                                         "http://google.com/")]);
387   EXPECT_EQ(1.0,
388             features[base::StringPrintf("%s[0]=%s",
389                                         features::kRedirect,
390                                         "http://somerandomwebsite.com/")]);
391   // We shouldn't have a feature for the last redirect in the chain, since it
392   // should always be the URL that we navigated to.
393   EXPECT_EQ(0.0,
394             features[base::StringPrintf("%s[1]=%s",
395                                         features::kRedirect,
396                                         "http://foo.com/")]);
397   EXPECT_EQ(0.0, features[features::kHasSSLReferrer]);
398   EXPECT_EQ(2.0, features[features::kPageTransitionType]);
399   EXPECT_EQ(1.0, features[features::kIsFirstNavigation]);
400   EXPECT_EQ(200.0, features[features::kHttpStatusCode]);
401
402   request.Clear();
403   request.set_url("http://www.foo.com/page.html");
404   request.set_client_score(0.5);
405   redirect_chain.clear();
406   redirect_chain.push_back(GURL("http://www.foo.com/redirect"));
407   redirect_chain.push_back(GURL("http://www.foo.com/second_redirect"));
408   redirect_chain.push_back(GURL("http://www.foo.com/page.html"));
409   SetRedirectChain(redirect_chain, false);
410   browse_info_->http_status_code = 404;
411   NavigateAndCommit(GURL("http://www.foo.com/page.html"),
412                     GURL("http://www.foo.com"),
413                     content::PageTransitionFromInt(
414                         content::PAGE_TRANSITION_TYPED |
415                         content::PAGE_TRANSITION_CHAIN_START |
416                         content::PAGE_TRANSITION_CLIENT_REDIRECT));
417
418   EXPECT_TRUE(ExtractFeatures(&request));
419   features.clear();
420   GetFeatureMap(request, &features);
421
422   EXPECT_EQ(1,
423             features[base::StringPrintf("%s=%s",
424                                         features::kReferrer,
425                                         "http://www.foo.com/")]);
426   EXPECT_EQ(1.0,
427             features[base::StringPrintf("%s[0]=%s",
428                                         features::kRedirect,
429                                         "http://www.foo.com/redirect")]);
430   EXPECT_EQ(1.0,
431             features[base::StringPrintf("%s[1]=%s",
432                                         features::kRedirect,
433                                         "http://www.foo.com/second_redirect")]);
434   EXPECT_EQ(0.0, features[features::kHasSSLReferrer]);
435   EXPECT_EQ(1.0, features[features::kPageTransitionType]);
436   EXPECT_EQ(0.0, features[features::kIsFirstNavigation]);
437   EXPECT_EQ(1.0,
438             features[base::StringPrintf("%s%s=%s",
439                                         features::kHostPrefix,
440                                         features::kReferrer,
441                                         "http://google.com/")]);
442   EXPECT_EQ(1.0,
443             features[base::StringPrintf("%s%s[0]=%s",
444                                         features::kHostPrefix,
445                                         features::kRedirect,
446                                         "http://somerandomwebsite.com/")]);
447   EXPECT_EQ(2.0,
448             features[base::StringPrintf("%s%s",
449                                         features::kHostPrefix,
450                                         features::kPageTransitionType)]);
451   EXPECT_EQ(1.0,
452             features[base::StringPrintf("%s%s",
453                                         features::kHostPrefix,
454                                         features::kIsFirstNavigation)]);
455   EXPECT_EQ(404.0, features[features::kHttpStatusCode]);
456
457   request.Clear();
458   request.set_url("http://www.bar.com/");
459   request.set_client_score(0.5);
460   redirect_chain.clear();
461   redirect_chain.push_back(GURL("http://www.foo.com/page.html"));
462   redirect_chain.push_back(GURL("http://www.bar.com/"));
463   SetRedirectChain(redirect_chain, true);
464   NavigateAndCommit(GURL("http://www.bar.com/"),
465                     GURL("http://www.foo.com/page.html"),
466                     content::PageTransitionFromInt(
467                         content::PAGE_TRANSITION_LINK |
468                         content::PAGE_TRANSITION_CHAIN_END |
469                         content::PAGE_TRANSITION_CLIENT_REDIRECT));
470
471   EXPECT_TRUE(ExtractFeatures(&request));
472   features.clear();
473   GetFeatureMap(request, &features);
474
475   EXPECT_EQ(1.0,
476             features[base::StringPrintf("%s=%s",
477                                         features::kReferrer,
478                                         "http://www.foo.com/page.html")]);
479   EXPECT_EQ(1.0,
480             features[base::StringPrintf("%s[0]=%s",
481                                         features::kRedirect,
482                                         "http://www.foo.com/page.html")]);
483   EXPECT_EQ(0.0, features[features::kHasSSLReferrer]);
484   EXPECT_EQ(0.0, features[features::kPageTransitionType]);
485   EXPECT_EQ(0.0, features[features::kIsFirstNavigation]);
486
487   // Should not have host features.
488   EXPECT_EQ(0U,
489             features.count(base::StringPrintf("%s%s",
490                                               features::kHostPrefix,
491                                               features::kPageTransitionType)));
492   EXPECT_EQ(0U,
493             features.count(base::StringPrintf("%s%s",
494                                               features::kHostPrefix,
495                                               features::kIsFirstNavigation)));
496
497   request.Clear();
498   request.set_url("http://www.bar.com/other_page.html");
499   request.set_client_score(0.5);
500   redirect_chain.clear();
501   redirect_chain.push_back(GURL("http://www.bar.com/other_page.html"));
502   SetRedirectChain(redirect_chain, false);
503   NavigateAndCommit(GURL("http://www.bar.com/other_page.html"),
504                     GURL("http://www.bar.com/"),
505                     content::PAGE_TRANSITION_LINK);
506
507   EXPECT_TRUE(ExtractFeatures(&request));
508   features.clear();
509   GetFeatureMap(request, &features);
510
511   EXPECT_EQ(1.0,
512             features[base::StringPrintf("%s=%s",
513                                         features::kReferrer,
514                                         "http://www.bar.com/")]);
515   EXPECT_EQ(0.0, features[features::kHasSSLReferrer]);
516   EXPECT_EQ(0.0, features[features::kPageTransitionType]);
517   EXPECT_EQ(0.0, features[features::kIsFirstNavigation]);
518   EXPECT_EQ(1.0,
519             features[base::StringPrintf("%s%s=%s",
520                                         features::kHostPrefix,
521                                         features::kReferrer,
522                                         "http://www.foo.com/page.html")]);
523   EXPECT_EQ(1.0,
524             features[base::StringPrintf("%s%s[0]=%s",
525                                         features::kHostPrefix,
526                                         features::kRedirect,
527                                         "http://www.foo.com/page.html")]);
528   EXPECT_EQ(0.0,
529             features[base::StringPrintf("%s%s",
530                                         features::kHostPrefix,
531                                         features::kPageTransitionType)]);
532   EXPECT_EQ(0.0,
533             features[base::StringPrintf("%s%s",
534                                         features::kHostPrefix,
535                                         features::kIsFirstNavigation)]);
536   request.Clear();
537   request.set_url("http://www.baz.com/");
538   request.set_client_score(0.5);
539   redirect_chain.clear();
540   redirect_chain.push_back(GURL("https://bankofamerica.com"));
541   redirect_chain.push_back(GURL("http://www.baz.com/"));
542   SetRedirectChain(redirect_chain, true);
543   NavigateAndCommit(GURL("http://www.baz.com"),
544                     GURL("https://bankofamerica.com"),
545                     content::PAGE_TRANSITION_GENERATED);
546
547   EXPECT_TRUE(ExtractFeatures(&request));
548   features.clear();
549   GetFeatureMap(request, &features);
550
551   EXPECT_EQ(1.0,
552             features[base::StringPrintf("%s[0]=%s",
553                                         features::kRedirect,
554                                         features::kSecureRedirectValue)]);
555   EXPECT_EQ(1.0, features[features::kHasSSLReferrer]);
556   EXPECT_EQ(5.0, features[features::kPageTransitionType]);
557   // Should not have redirect or host features.
558   EXPECT_EQ(0U,
559             features.count(base::StringPrintf("%s%s",
560                                               features::kHostPrefix,
561                                               features::kPageTransitionType)));
562   EXPECT_EQ(0U,
563             features.count(base::StringPrintf("%s%s",
564                                               features::kHostPrefix,
565                                               features::kIsFirstNavigation)));
566   EXPECT_EQ(5.0, features[features::kPageTransitionType]);
567 }
568
569 TEST_F(BrowserFeatureExtractorTest, SafeBrowsingFeatures) {
570   SimpleNavigateAndCommit(GURL("http://www.foo.com/malware.html"));
571   ClientPhishingRequest request;
572   request.set_url("http://www.foo.com/malware.html");
573   request.set_client_score(0.5);
574
575   browse_info_->unsafe_resource.reset(
576       new SafeBrowsingUIManager::UnsafeResource);
577   browse_info_->unsafe_resource->url = GURL("http://www.malware.com/");
578   browse_info_->unsafe_resource->original_url = GURL("http://www.good.com/");
579   browse_info_->unsafe_resource->is_subresource = true;
580   browse_info_->unsafe_resource->threat_type = SB_THREAT_TYPE_URL_MALWARE;
581
582   ExtractFeatures(&request);
583   std::map<std::string, double> features;
584   GetFeatureMap(request, &features);
585   EXPECT_TRUE(features.count(base::StringPrintf(
586       "%s%s",
587       features::kSafeBrowsingMaliciousUrl,
588       "http://www.malware.com/")));
589   EXPECT_TRUE(features.count(base::StringPrintf(
590       "%s%s",
591        features::kSafeBrowsingOriginalUrl,
592         "http://www.good.com/")));
593   EXPECT_DOUBLE_EQ(1.0, features[features::kSafeBrowsingIsSubresource]);
594   EXPECT_DOUBLE_EQ(2.0, features[features::kSafeBrowsingThreatType]);
595 }
596
597 TEST_F(BrowserFeatureExtractorTest, MalwareFeatures) {
598   ClientMalwareRequest request;
599   request.set_url("http://www.foo.com/");
600
601   std::vector<IPUrlInfo> bad_urls;
602   bad_urls.push_back(
603       IPUrlInfo("http://bad.com", "GET", "", content::RESOURCE_TYPE_SCRIPT));
604   bad_urls.push_back(
605       IPUrlInfo("http://evil.com", "GET", "", content::RESOURCE_TYPE_SCRIPT));
606   browse_info_->ips.insert(std::make_pair("193.5.163.8", bad_urls));
607   browse_info_->ips.insert(std::make_pair("92.92.92.92", bad_urls));
608   std::vector<IPUrlInfo> good_urls;
609   good_urls.push_back(
610       IPUrlInfo("http://ok.com", "GET", "", content::RESOURCE_TYPE_SCRIPT));
611   browse_info_->ips.insert(std::make_pair("23.94.78.1", good_urls));
612   EXPECT_CALL(*db_manager_, MatchMalwareIP("193.5.163.8"))
613       .WillOnce(Return(true));
614   EXPECT_CALL(*db_manager_, MatchMalwareIP("92.92.92.92"))
615       .WillOnce(Return(true));
616   EXPECT_CALL(*db_manager_, MatchMalwareIP("23.94.78.1"))
617       .WillOnce(Return(false));
618
619   ExtractMalwareFeatures(&request);
620   EXPECT_EQ(4, request.bad_ip_url_info_size());
621   std::map<std::string, std::set<std::string> > result_urls;
622   GetMalwareUrls(request, &result_urls);
623
624   EXPECT_EQ(2U, result_urls.size());
625   EXPECT_TRUE(result_urls.count("193.5.163.8"));
626   std::set<std::string> urls = result_urls["193.5.163.8"];
627   EXPECT_EQ(2U, urls.size());
628   EXPECT_TRUE(urls.find("http://bad.com") != urls.end());
629   EXPECT_TRUE(urls.find("http://evil.com") != urls.end());
630   EXPECT_TRUE(result_urls.count("92.92.92.92"));
631   urls = result_urls["92.92.92.92"];
632   EXPECT_EQ(2U, urls.size());
633   EXPECT_TRUE(urls.find("http://bad.com") != urls.end());
634   EXPECT_TRUE(urls.find("http://evil.com") != urls.end());
635 }
636
637 TEST_F(BrowserFeatureExtractorTest, MalwareFeatures_ExceedLimit) {
638   ClientMalwareRequest request;
639   request.set_url("http://www.foo.com/");
640
641   std::vector<IPUrlInfo> bad_urls;
642   bad_urls.push_back(
643       IPUrlInfo("http://bad.com", "GET", "", content::RESOURCE_TYPE_SCRIPT));
644   std::vector<std::string> ips;
645   for (int i = 0; i < 7; ++i) {  // Add 7 ips
646     std::string ip = base::StringPrintf("%d.%d.%d.%d", i, i, i, i);
647     ips.push_back(ip);
648     browse_info_->ips.insert(std::make_pair(ip, bad_urls));
649
650     // First ip is good but all the others are bad.
651     EXPECT_CALL(*db_manager_, MatchMalwareIP(ip)).WillOnce(Return(i > 0));
652   }
653
654   ExtractMalwareFeatures(&request);
655   // The number of IP matched url we store is capped at 5 IPs per request.
656   EXPECT_EQ(5, request.bad_ip_url_info_size());
657 }
658
659 }  // namespace safe_browsing