1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // Note that although this is not a "browser" test, it runs as part of
6 // browser_tests. This is because WebKit does not work properly if it is
7 // shutdown and re-initialized. Since browser_tests runs each test in a
8 // new process, this avoids the problem.
10 #include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h"
12 #include "base/bind.h"
13 #include "base/callback.h"
14 #include "base/compiler_specific.h"
15 #include "base/memory/weak_ptr.h"
16 #include "base/message_loop/message_loop.h"
17 #include "base/time/time.h"
18 #include "chrome/renderer/safe_browsing/features.h"
19 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
20 #include "chrome/renderer/safe_browsing/test_utils.h"
21 #include "content/public/test/render_view_fake_resources_test.h"
22 #include "testing/gmock/include/gmock/gmock.h"
23 #include "third_party/WebKit/public/platform/WebString.h"
24 #include "third_party/WebKit/public/web/WebFrame.h"
25 #include "third_party/WebKit/public/web/WebScriptSource.h"
27 using ::testing::DoAll;
28 using ::testing::Invoke;
29 using ::testing::Return;
31 namespace safe_browsing {
33 class PhishingDOMFeatureExtractorTest
34 : public content::RenderViewFakeResourcesTest {
36 // Helper for the SubframeRemoval test that posts a message to remove
37 // the iframe "frame1" from the document.
38 void ScheduleRemoveIframe() {
39 message_loop_.PostTask(
41 base::Bind(&PhishingDOMFeatureExtractorTest::RemoveIframe,
42 weak_factory_.GetWeakPtr()));
46 PhishingDOMFeatureExtractorTest()
47 : content::RenderViewFakeResourcesTest(),
48 weak_factory_(this) {}
50 virtual ~PhishingDOMFeatureExtractorTest() {}
52 virtual void SetUp() {
53 // Set up WebKit and the RenderView.
54 content::RenderViewFakeResourcesTest::SetUp();
55 extractor_.reset(new PhishingDOMFeatureExtractor(view(), &clock_));
58 virtual void TearDown() {
59 content::RenderViewFakeResourcesTest::TearDown();
62 // Runs the DOMFeatureExtractor on the RenderView, waiting for the
63 // completion callback. Returns the success boolean from the callback.
64 bool ExtractFeatures(FeatureMap* features) {
66 extractor_->ExtractFeatures(
68 base::Bind(&PhishingDOMFeatureExtractorTest::ExtractionDone,
69 base::Unretained(this)));
74 // Completion callback for feature extraction.
75 void ExtractionDone(bool success) {
80 // Does the actual work of removing the iframe "frame1" from the document.
82 WebKit::WebFrame* main_frame = GetMainFrame();
83 ASSERT_TRUE(main_frame);
84 main_frame->executeScript(
86 "document.body.removeChild(document.getElementById('frame1'));"));
89 MockFeatureExtractorClock clock_;
90 scoped_ptr<PhishingDOMFeatureExtractor> extractor_;
91 bool success_; // holds the success value from ExtractFeatures
92 base::WeakPtrFactory<PhishingDOMFeatureExtractorTest> weak_factory_;
95 TEST_F(PhishingDOMFeatureExtractorTest, FormFeatures) {
96 // This test doesn't exercise the extraction timing.
97 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
98 responses_["http://host.com/"] =
100 "<form action=\"query\"><input type=text><input type=checkbox></form>"
101 "<form action=\"http://cgi.host.com/submit\"></form>"
102 "<form action=\"http://other.com/\"></form>"
103 "<form action=\"query\"></form>"
104 "<form></form></body></html>";
106 FeatureMap expected_features;
107 expected_features.AddBooleanFeature(features::kPageHasForms);
108 expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.25);
109 expected_features.AddBooleanFeature(features::kPageHasTextInputs);
110 expected_features.AddBooleanFeature(features::kPageHasCheckInputs);
113 LoadURL("http://host.com/");
114 ASSERT_TRUE(ExtractFeatures(&features));
115 ExpectFeatureMapsAreEqual(features, expected_features);
117 responses_["http://host.com/"] =
119 "<input type=\"radio\"><input type=password></body></html>";
121 expected_features.Clear();
122 expected_features.AddBooleanFeature(features::kPageHasRadioInputs);
123 expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
126 LoadURL("http://host.com/");
127 ASSERT_TRUE(ExtractFeatures(&features));
128 ExpectFeatureMapsAreEqual(features, expected_features);
130 responses_["http://host.com/"] =
131 "<html><head><body><input></body></html>";
133 expected_features.Clear();
134 expected_features.AddBooleanFeature(features::kPageHasTextInputs);
137 LoadURL("http://host.com/");
138 ASSERT_TRUE(ExtractFeatures(&features));
139 ExpectFeatureMapsAreEqual(features, expected_features);
141 responses_["http://host.com/"] =
142 "<html><head><body><input type=\"invalid\"></body></html>";
144 expected_features.Clear();
145 expected_features.AddBooleanFeature(features::kPageHasTextInputs);
148 LoadURL("http://host.com/");
149 ASSERT_TRUE(ExtractFeatures(&features));
150 ExpectFeatureMapsAreEqual(features, expected_features);
153 TEST_F(PhishingDOMFeatureExtractorTest, LinkFeatures) {
154 // This test doesn't exercise the extraction timing.
155 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
156 responses_["http://www.host.com/"] =
158 "<a href=\"http://www2.host.com/abc\">link</a>"
159 "<a name=page_anchor></a>"
160 "<a href=\"http://www.chromium.org/\">chromium</a>"
163 FeatureMap expected_features;
164 expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.5);
165 expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.0);
166 expected_features.AddBooleanFeature(features::kPageLinkDomain +
167 std::string("chromium.org"));
170 LoadURL("http://www.host.com/");
171 ASSERT_TRUE(ExtractFeatures(&features));
172 ExpectFeatureMapsAreEqual(features, expected_features);
175 responses_["https://www.host.com/"] =
177 "<a href=\"login\">this is secure</a>"
178 "<a href=\"http://host.com\">not secure</a>"
179 "<a href=\"https://www2.host.com/login\">also secure</a>"
180 "<a href=\"http://chromium.org/\">also not secure</a>"
183 expected_features.Clear();
184 expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.25);
185 expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.5);
186 expected_features.AddBooleanFeature(features::kPageLinkDomain +
187 std::string("chromium.org"));
190 LoadURL("https://www.host.com/");
191 ASSERT_TRUE(ExtractFeatures(&features));
192 ExpectFeatureMapsAreEqual(features, expected_features);
195 TEST_F(PhishingDOMFeatureExtractorTest, ScriptAndImageFeatures) {
196 // This test doesn't exercise the extraction timing.
197 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
198 responses_["http://host.com/"] =
199 "<html><head><script></script><script></script></head></html>";
201 FeatureMap expected_features;
202 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
205 LoadURL("http://host.com/");
206 ASSERT_TRUE(ExtractFeatures(&features));
207 ExpectFeatureMapsAreEqual(features, expected_features);
209 responses_["http://host.com/"] =
210 "<html><head><script></script><script></script><script></script>"
211 "<script></script><script></script><script></script><script></script>"
212 "</head><body><img src=\"blah.gif\">"
213 "<img src=\"http://host2.com/blah.gif\"></body></html>";
215 expected_features.Clear();
216 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
217 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTSix);
218 expected_features.AddRealFeature(features::kPageImgOtherDomainFreq, 0.5);
221 LoadURL("http://host.com/");
222 ASSERT_TRUE(ExtractFeatures(&features));
223 ExpectFeatureMapsAreEqual(features, expected_features);
226 TEST_F(PhishingDOMFeatureExtractorTest, SubFrames) {
227 // This test doesn't exercise the extraction timing.
228 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
230 // Test that features are aggregated across all frames.
231 responses_["http://host.com/"] =
232 "<html><body><input type=text><a href=\"info.html\">link</a>"
233 "<iframe src=\"http://host2.com/\"></iframe>"
234 "<iframe src=\"http://host3.com/\"></iframe>"
237 responses_["http://host2.com/"] =
238 "<html><head><script></script><body>"
239 "<form action=\"http://host4.com/\"><input type=checkbox></form>"
240 "<form action=\"http://host2.com/submit\"></form>"
241 "<a href=\"http://www.host2.com/home\">link</a>"
242 "<iframe src=\"nested.html\"></iframe>"
245 responses_["http://host2.com/nested.html"] =
246 "<html><body><input type=password>"
247 "<a href=\"https://host4.com/\">link</a>"
248 "<a href=\"relative\">another</a>"
251 responses_["http://host3.com/"] =
252 "<html><head><script></script><body>"
253 "<img src=\"http://host.com/123.png\">"
256 FeatureMap expected_features;
257 expected_features.AddBooleanFeature(features::kPageHasForms);
258 // Form action domains are compared to the URL of the document they're in,
259 // not the URL of the toplevel page. So http://host2.com/ has two form
260 // actions, one of which is external.
261 expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.5);
262 expected_features.AddBooleanFeature(features::kPageHasTextInputs);
263 expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
264 expected_features.AddBooleanFeature(features::kPageHasCheckInputs);
265 expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.25);
266 expected_features.AddBooleanFeature(features::kPageLinkDomain +
267 std::string("host4.com"));
268 expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.25);
269 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
270 expected_features.AddRealFeature(features::kPageImgOtherDomainFreq, 1.0);
273 LoadURL("http://host.com/");
274 ASSERT_TRUE(ExtractFeatures(&features));
275 ExpectFeatureMapsAreEqual(features, expected_features);
278 TEST_F(PhishingDOMFeatureExtractorTest, Continuation) {
279 // For this test, we'll cause the feature extraction to run multiple
280 // iterations by incrementing the clock.
282 // This page has a total of 50 elements. For the external forms feature to
283 // be computed correctly, the extractor has to examine the whole document.
284 // Note: the empty HEAD is important -- WebKit will synthesize a HEAD if
285 // there isn't one present, which can be confusing for the element counts.
286 std::string response = "<html><head></head><body>"
287 "<form action=\"ondomain\"></form>";
288 for (int i = 0; i < 45; ++i) {
289 response.append("<p>");
291 response.append("<form action=\"http://host2.com/\"></form></body></html>");
292 responses_["http://host.com/"] = response;
294 // Advance the clock 6 ms every 10 elements processed, 10 ms between chunks.
295 // Note that this assumes kClockCheckGranularity = 10 and
296 // kMaxTimePerChunkMs = 10.
297 base::TimeTicks now = base::TimeTicks::Now();
298 EXPECT_CALL(clock_, Now())
299 // Time check at the start of extraction.
300 .WillOnce(Return(now))
301 // Time check at the start of the first chunk of work.
302 .WillOnce(Return(now))
303 // Time check after the first 10 elements.
304 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(6)))
305 // Time check after the next 10 elements. This is over the chunk
306 // time limit, so a continuation task will be posted.
307 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(12)))
308 // Time check at the start of the second chunk of work.
309 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(22)))
310 // Time check after resuming iteration for the second chunk.
311 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(24)))
312 // Time check after the next 10 elements.
313 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(30)))
314 // Time check after the next 10 elements. This will trigger another
315 // continuation task.
316 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(36)))
317 // Time check at the start of the third chunk of work.
318 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(46)))
319 // Time check after resuming iteration for the third chunk.
320 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(48)))
321 // Time check after the last 10 elements.
322 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(54)))
323 // A final time check for the histograms.
324 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(56)));
326 FeatureMap expected_features;
327 expected_features.AddBooleanFeature(features::kPageHasForms);
328 expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.5);
331 LoadURL("http://host.com/");
332 ASSERT_TRUE(ExtractFeatures(&features));
333 ExpectFeatureMapsAreEqual(features, expected_features);
334 // Make sure none of the mock expectations carry over to the next test.
335 ::testing::Mock::VerifyAndClearExpectations(&clock_);
337 // Now repeat the test with the same page, but advance the clock faster so
338 // that the extraction time exceeds the maximum total time for the feature
339 // extractor. Extraction should fail. Note that this assumes
340 // kMaxTotalTimeMs = 500.
341 EXPECT_CALL(clock_, Now())
342 // Time check at the start of extraction.
343 .WillOnce(Return(now))
344 // Time check at the start of the first chunk of work.
345 .WillOnce(Return(now))
346 // Time check after the first 10 elements.
347 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(300)))
348 // Time check at the start of the second chunk of work.
349 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(350)))
350 // Time check after resuming iteration for the second chunk.
351 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(360)))
352 // Time check after the next 10 elements. This is over the limit.
353 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(600)))
354 // A final time check for the histograms.
355 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(620)));
358 EXPECT_FALSE(ExtractFeatures(&features));
361 TEST_F(PhishingDOMFeatureExtractorTest, SubframeRemoval) {
362 // In this test, we'll advance the feature extractor so that it is positioned
363 // inside an iframe, and have it pause due to exceeding the chunk time limit.
364 // Then, prior to continuation, the iframe is removed from the document.
365 // As currently implemented, this should finish extraction from the removed
367 responses_["http://host.com/"] =
368 "<html><head></head><body>"
369 "<iframe src=\"frame.html\" id=\"frame1\"></iframe>"
370 "<form></form></body></html>";
371 responses_["http://host.com/frame.html"] =
372 "<html><body><p><p><p><input type=password></body></html>";
374 base::TimeTicks now = base::TimeTicks::Now();
375 EXPECT_CALL(clock_, Now())
376 // Time check at the start of extraction.
377 .WillOnce(Return(now))
378 // Time check at the start of the first chunk of work.
379 .WillOnce(Return(now))
380 // Time check after the first 10 elements. Enough time has passed
381 // to stop extraction. Schedule the iframe removal to happen as soon as
382 // the feature extractor returns control to the message loop.
384 Invoke(this, &PhishingDOMFeatureExtractorTest::ScheduleRemoveIframe),
385 Return(now + base::TimeDelta::FromMilliseconds(21))))
386 // Time check at the start of the second chunk of work.
387 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(25)))
388 // Time check after resuming iteration for the second chunk.
389 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(27)))
390 // A final time check for the histograms.
391 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(33)));
393 FeatureMap expected_features;
394 expected_features.AddBooleanFeature(features::kPageHasForms);
395 expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
398 LoadURL("http://host.com/");
399 ASSERT_TRUE(ExtractFeatures(&features));
400 ExpectFeatureMapsAreEqual(features, expected_features);
403 } // namespace safe_browsing