- add sources.
[platform/framework/web/crosswalk.git] / src / chrome / renderer / safe_browsing / phishing_dom_feature_extractor_browsertest.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Note that although this is not a "browser" test, it runs as part of
6 // browser_tests.  This is because WebKit does not work properly if it is
7 // shutdown and re-initialized.  Since browser_tests runs each test in a
8 // new process, this avoids the problem.
9
10 #include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h"
11
12 #include "base/bind.h"
13 #include "base/callback.h"
14 #include "base/compiler_specific.h"
15 #include "base/memory/weak_ptr.h"
16 #include "base/message_loop/message_loop.h"
17 #include "base/time/time.h"
18 #include "chrome/renderer/safe_browsing/features.h"
19 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
20 #include "chrome/renderer/safe_browsing/test_utils.h"
21 #include "content/public/test/render_view_fake_resources_test.h"
22 #include "testing/gmock/include/gmock/gmock.h"
23 #include "third_party/WebKit/public/platform/WebString.h"
24 #include "third_party/WebKit/public/web/WebFrame.h"
25 #include "third_party/WebKit/public/web/WebScriptSource.h"
26
27 using ::testing::DoAll;
28 using ::testing::Invoke;
29 using ::testing::Return;
30
31 namespace safe_browsing {
32
33 class PhishingDOMFeatureExtractorTest
34     : public content::RenderViewFakeResourcesTest {
35  public:
36   // Helper for the SubframeRemoval test that posts a message to remove
37   // the iframe "frame1" from the document.
38   void ScheduleRemoveIframe() {
39     message_loop_.PostTask(
40         FROM_HERE,
41         base::Bind(&PhishingDOMFeatureExtractorTest::RemoveIframe,
42                    weak_factory_.GetWeakPtr()));
43   }
44
45  protected:
46   PhishingDOMFeatureExtractorTest()
47       : content::RenderViewFakeResourcesTest(),
48         weak_factory_(this) {}
49
50   virtual ~PhishingDOMFeatureExtractorTest() {}
51
52   virtual void SetUp() {
53     // Set up WebKit and the RenderView.
54     content::RenderViewFakeResourcesTest::SetUp();
55     extractor_.reset(new PhishingDOMFeatureExtractor(view(), &clock_));
56   }
57
58   virtual void TearDown() {
59     content::RenderViewFakeResourcesTest::TearDown();
60   }
61
62   // Runs the DOMFeatureExtractor on the RenderView, waiting for the
63   // completion callback.  Returns the success boolean from the callback.
64   bool ExtractFeatures(FeatureMap* features) {
65     success_ = false;
66     extractor_->ExtractFeatures(
67         features,
68         base::Bind(&PhishingDOMFeatureExtractorTest::ExtractionDone,
69                    base::Unretained(this)));
70     message_loop_.Run();
71     return success_;
72   }
73
74   // Completion callback for feature extraction.
75   void ExtractionDone(bool success) {
76     success_ = success;
77     message_loop_.Quit();
78   }
79
80   // Does the actual work of removing the iframe "frame1" from the document.
81   void RemoveIframe() {
82     WebKit::WebFrame* main_frame = GetMainFrame();
83     ASSERT_TRUE(main_frame);
84     main_frame->executeScript(
85         WebKit::WebString(
86             "document.body.removeChild(document.getElementById('frame1'));"));
87   }
88
89   MockFeatureExtractorClock clock_;
90   scoped_ptr<PhishingDOMFeatureExtractor> extractor_;
91   bool success_;  // holds the success value from ExtractFeatures
92   base::WeakPtrFactory<PhishingDOMFeatureExtractorTest> weak_factory_;
93 };
94
95 TEST_F(PhishingDOMFeatureExtractorTest, FormFeatures) {
96   // This test doesn't exercise the extraction timing.
97   EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
98   responses_["http://host.com/"] =
99       "<html><head><body>"
100       "<form action=\"query\"><input type=text><input type=checkbox></form>"
101       "<form action=\"http://cgi.host.com/submit\"></form>"
102       "<form action=\"http://other.com/\"></form>"
103       "<form action=\"query\"></form>"
104       "<form></form></body></html>";
105
106   FeatureMap expected_features;
107   expected_features.AddBooleanFeature(features::kPageHasForms);
108   expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.25);
109   expected_features.AddBooleanFeature(features::kPageHasTextInputs);
110   expected_features.AddBooleanFeature(features::kPageHasCheckInputs);
111
112   FeatureMap features;
113   LoadURL("http://host.com/");
114   ASSERT_TRUE(ExtractFeatures(&features));
115   ExpectFeatureMapsAreEqual(features, expected_features);
116
117   responses_["http://host.com/"] =
118       "<html><head><body>"
119       "<input type=\"radio\"><input type=password></body></html>";
120
121   expected_features.Clear();
122   expected_features.AddBooleanFeature(features::kPageHasRadioInputs);
123   expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
124
125   features.Clear();
126   LoadURL("http://host.com/");
127   ASSERT_TRUE(ExtractFeatures(&features));
128   ExpectFeatureMapsAreEqual(features, expected_features);
129
130   responses_["http://host.com/"] =
131       "<html><head><body><input></body></html>";
132
133   expected_features.Clear();
134   expected_features.AddBooleanFeature(features::kPageHasTextInputs);
135
136   features.Clear();
137   LoadURL("http://host.com/");
138   ASSERT_TRUE(ExtractFeatures(&features));
139   ExpectFeatureMapsAreEqual(features, expected_features);
140
141   responses_["http://host.com/"] =
142       "<html><head><body><input type=\"invalid\"></body></html>";
143
144   expected_features.Clear();
145   expected_features.AddBooleanFeature(features::kPageHasTextInputs);
146
147   features.Clear();
148   LoadURL("http://host.com/");
149   ASSERT_TRUE(ExtractFeatures(&features));
150   ExpectFeatureMapsAreEqual(features, expected_features);
151 }
152
153 TEST_F(PhishingDOMFeatureExtractorTest, LinkFeatures) {
154   // This test doesn't exercise the extraction timing.
155   EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
156   responses_["http://www.host.com/"] =
157       "<html><head><body>"
158       "<a href=\"http://www2.host.com/abc\">link</a>"
159       "<a name=page_anchor></a>"
160       "<a href=\"http://www.chromium.org/\">chromium</a>"
161       "</body></html";
162
163   FeatureMap expected_features;
164   expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.5);
165   expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.0);
166   expected_features.AddBooleanFeature(features::kPageLinkDomain +
167                                       std::string("chromium.org"));
168
169   FeatureMap features;
170   LoadURL("http://www.host.com/");
171   ASSERT_TRUE(ExtractFeatures(&features));
172   ExpectFeatureMapsAreEqual(features, expected_features);
173
174   responses_.clear();
175   responses_["https://www.host.com/"] =
176       "<html><head><body>"
177       "<a href=\"login\">this is secure</a>"
178       "<a href=\"http://host.com\">not secure</a>"
179       "<a href=\"https://www2.host.com/login\">also secure</a>"
180       "<a href=\"http://chromium.org/\">also not secure</a>"
181       "</body></html>";
182
183   expected_features.Clear();
184   expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.25);
185   expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.5);
186   expected_features.AddBooleanFeature(features::kPageLinkDomain +
187                                       std::string("chromium.org"));
188
189   features.Clear();
190   LoadURL("https://www.host.com/");
191   ASSERT_TRUE(ExtractFeatures(&features));
192   ExpectFeatureMapsAreEqual(features, expected_features);
193 }
194
195 TEST_F(PhishingDOMFeatureExtractorTest, ScriptAndImageFeatures) {
196   // This test doesn't exercise the extraction timing.
197   EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
198   responses_["http://host.com/"] =
199       "<html><head><script></script><script></script></head></html>";
200
201   FeatureMap expected_features;
202   expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
203
204   FeatureMap features;
205   LoadURL("http://host.com/");
206   ASSERT_TRUE(ExtractFeatures(&features));
207   ExpectFeatureMapsAreEqual(features, expected_features);
208
209   responses_["http://host.com/"] =
210       "<html><head><script></script><script></script><script></script>"
211       "<script></script><script></script><script></script><script></script>"
212       "</head><body><img src=\"blah.gif\">"
213       "<img src=\"http://host2.com/blah.gif\"></body></html>";
214
215   expected_features.Clear();
216   expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
217   expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTSix);
218   expected_features.AddRealFeature(features::kPageImgOtherDomainFreq, 0.5);
219
220   features.Clear();
221   LoadURL("http://host.com/");
222   ASSERT_TRUE(ExtractFeatures(&features));
223   ExpectFeatureMapsAreEqual(features, expected_features);
224 }
225
226 TEST_F(PhishingDOMFeatureExtractorTest, SubFrames) {
227   // This test doesn't exercise the extraction timing.
228   EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
229
230   // Test that features are aggregated across all frames.
231   responses_["http://host.com/"] =
232       "<html><body><input type=text><a href=\"info.html\">link</a>"
233       "<iframe src=\"http://host2.com/\"></iframe>"
234       "<iframe src=\"http://host3.com/\"></iframe>"
235       "</body></html>";
236
237   responses_["http://host2.com/"] =
238       "<html><head><script></script><body>"
239       "<form action=\"http://host4.com/\"><input type=checkbox></form>"
240       "<form action=\"http://host2.com/submit\"></form>"
241       "<a href=\"http://www.host2.com/home\">link</a>"
242       "<iframe src=\"nested.html\"></iframe>"
243       "<body></html>";
244
245   responses_["http://host2.com/nested.html"] =
246       "<html><body><input type=password>"
247       "<a href=\"https://host4.com/\">link</a>"
248       "<a href=\"relative\">another</a>"
249       "</body></html>";
250
251   responses_["http://host3.com/"] =
252       "<html><head><script></script><body>"
253       "<img src=\"http://host.com/123.png\">"
254       "</body></html>";
255
256   FeatureMap expected_features;
257   expected_features.AddBooleanFeature(features::kPageHasForms);
258   // Form action domains are compared to the URL of the document they're in,
259   // not the URL of the toplevel page.  So http://host2.com/ has two form
260   // actions, one of which is external.
261   expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.5);
262   expected_features.AddBooleanFeature(features::kPageHasTextInputs);
263   expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
264   expected_features.AddBooleanFeature(features::kPageHasCheckInputs);
265   expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.25);
266   expected_features.AddBooleanFeature(features::kPageLinkDomain +
267                                       std::string("host4.com"));
268   expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.25);
269   expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
270   expected_features.AddRealFeature(features::kPageImgOtherDomainFreq, 1.0);
271
272   FeatureMap features;
273   LoadURL("http://host.com/");
274   ASSERT_TRUE(ExtractFeatures(&features));
275   ExpectFeatureMapsAreEqual(features, expected_features);
276 }
277
278 TEST_F(PhishingDOMFeatureExtractorTest, Continuation) {
279   // For this test, we'll cause the feature extraction to run multiple
280   // iterations by incrementing the clock.
281
282   // This page has a total of 50 elements.  For the external forms feature to
283   // be computed correctly, the extractor has to examine the whole document.
284   // Note: the empty HEAD is important -- WebKit will synthesize a HEAD if
285   // there isn't one present, which can be confusing for the element counts.
286   std::string response = "<html><head></head><body>"
287       "<form action=\"ondomain\"></form>";
288   for (int i = 0; i < 45; ++i) {
289     response.append("<p>");
290   }
291   response.append("<form action=\"http://host2.com/\"></form></body></html>");
292   responses_["http://host.com/"] = response;
293
294   // Advance the clock 6 ms every 10 elements processed, 10 ms between chunks.
295   // Note that this assumes kClockCheckGranularity = 10 and
296   // kMaxTimePerChunkMs = 10.
297   base::TimeTicks now = base::TimeTicks::Now();
298   EXPECT_CALL(clock_, Now())
299       // Time check at the start of extraction.
300       .WillOnce(Return(now))
301       // Time check at the start of the first chunk of work.
302       .WillOnce(Return(now))
303       // Time check after the first 10 elements.
304       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(6)))
305       // Time check after the next 10 elements.  This is over the chunk
306       // time limit, so a continuation task will be posted.
307       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(12)))
308       // Time check at the start of the second chunk of work.
309       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(22)))
310       // Time check after resuming iteration for the second chunk.
311       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(24)))
312       // Time check after the next 10 elements.
313       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(30)))
314       // Time check after the next 10 elements.  This will trigger another
315       // continuation task.
316       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(36)))
317       // Time check at the start of the third chunk of work.
318       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(46)))
319       // Time check after resuming iteration for the third chunk.
320       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(48)))
321       // Time check after the last 10 elements.
322       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(54)))
323       // A final time check for the histograms.
324       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(56)));
325
326   FeatureMap expected_features;
327   expected_features.AddBooleanFeature(features::kPageHasForms);
328   expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.5);
329
330   FeatureMap features;
331   LoadURL("http://host.com/");
332   ASSERT_TRUE(ExtractFeatures(&features));
333   ExpectFeatureMapsAreEqual(features, expected_features);
334   // Make sure none of the mock expectations carry over to the next test.
335   ::testing::Mock::VerifyAndClearExpectations(&clock_);
336
337   // Now repeat the test with the same page, but advance the clock faster so
338   // that the extraction time exceeds the maximum total time for the feature
339   // extractor.  Extraction should fail.  Note that this assumes
340   // kMaxTotalTimeMs = 500.
341   EXPECT_CALL(clock_, Now())
342       // Time check at the start of extraction.
343       .WillOnce(Return(now))
344       // Time check at the start of the first chunk of work.
345       .WillOnce(Return(now))
346       // Time check after the first 10 elements.
347       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(300)))
348       // Time check at the start of the second chunk of work.
349       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(350)))
350       // Time check after resuming iteration for the second chunk.
351       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(360)))
352       // Time check after the next 10 elements.  This is over the limit.
353       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(600)))
354       // A final time check for the histograms.
355       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(620)));
356
357   features.Clear();
358   EXPECT_FALSE(ExtractFeatures(&features));
359 }
360
361 TEST_F(PhishingDOMFeatureExtractorTest, SubframeRemoval) {
362   // In this test, we'll advance the feature extractor so that it is positioned
363   // inside an iframe, and have it pause due to exceeding the chunk time limit.
364   // Then, prior to continuation, the iframe is removed from the document.
365   // As currently implemented, this should finish extraction from the removed
366   // iframe document.
367   responses_["http://host.com/"] =
368       "<html><head></head><body>"
369       "<iframe src=\"frame.html\" id=\"frame1\"></iframe>"
370       "<form></form></body></html>";
371   responses_["http://host.com/frame.html"] =
372       "<html><body><p><p><p><input type=password></body></html>";
373
374   base::TimeTicks now = base::TimeTicks::Now();
375   EXPECT_CALL(clock_, Now())
376       // Time check at the start of extraction.
377       .WillOnce(Return(now))
378       // Time check at the start of the first chunk of work.
379       .WillOnce(Return(now))
380       // Time check after the first 10 elements.  Enough time has passed
381       // to stop extraction.  Schedule the iframe removal to happen as soon as
382       // the feature extractor returns control to the message loop.
383       .WillOnce(DoAll(
384           Invoke(this, &PhishingDOMFeatureExtractorTest::ScheduleRemoveIframe),
385           Return(now + base::TimeDelta::FromMilliseconds(21))))
386       // Time check at the start of the second chunk of work.
387       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(25)))
388       // Time check after resuming iteration for the second chunk.
389       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(27)))
390       // A final time check for the histograms.
391       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(33)));
392
393   FeatureMap expected_features;
394   expected_features.AddBooleanFeature(features::kPageHasForms);
395   expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
396
397   FeatureMap features;
398   LoadURL("http://host.com/");
399   ASSERT_TRUE(ExtractFeatures(&features));
400   ExpectFeatureMapsAreEqual(features, expected_features);
401 }
402
403 }  // namespace safe_browsing