- add sources.
[platform/framework/web/crosswalk.git] / src / chrome / renderer / safe_browsing / phishing_url_feature_extractor_unittest.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h"
6
7 #include <string>
8 #include <vector>
9 #include "chrome/renderer/safe_browsing/features.h"
10 #include "chrome/renderer/safe_browsing/test_utils.h"
11 #include "testing/gmock/include/gmock/gmock.h"
12 #include "testing/gtest/include/gtest/gtest.h"
13 #include "url/gurl.h"
14
15 using ::testing::ElementsAre;
16
17 namespace safe_browsing {
18
19 class PhishingUrlFeatureExtractorTest : public ::testing::Test {
20  protected:
21   PhishingUrlFeatureExtractor extractor_;
22
23   void SplitStringIntoLongAlphanumTokens(const std::string& full,
24                                          std::vector<std::string>* tokens) {
25     PhishingUrlFeatureExtractor::SplitStringIntoLongAlphanumTokens(full,
26                                                                    tokens);
27   }
28 };
29
30 TEST_F(PhishingUrlFeatureExtractorTest, ExtractFeatures) {
31   std::string url = "http://123.0.0.1/mydocuments/a.file.html";
32   FeatureMap expected_features;
33   expected_features.AddBooleanFeature(features::kUrlHostIsIpAddress);
34   expected_features.AddBooleanFeature(features::kUrlPathToken +
35                                       std::string("mydocuments"));
36   expected_features.AddBooleanFeature(features::kUrlPathToken +
37                                       std::string("file"));
38   expected_features.AddBooleanFeature(features::kUrlPathToken +
39                                       std::string("html"));
40
41   FeatureMap features;
42   ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
43   ExpectFeatureMapsAreEqual(features, expected_features);
44
45   url = "http://www.www.cnn.co.uk/sports/sports/index.html?shouldnotappear";
46   expected_features.Clear();
47   expected_features.AddBooleanFeature(features::kUrlTldToken +
48                                       std::string("co.uk"));
49   expected_features.AddBooleanFeature(features::kUrlDomainToken +
50                                       std::string("cnn"));
51   expected_features.AddBooleanFeature(features::kUrlOtherHostToken +
52                                       std::string("www"));
53   expected_features.AddBooleanFeature(features::kUrlNumOtherHostTokensGTOne);
54   expected_features.AddBooleanFeature(features::kUrlPathToken +
55                                       std::string("sports"));
56   expected_features.AddBooleanFeature(features::kUrlPathToken +
57                                       std::string("index"));
58   expected_features.AddBooleanFeature(features::kUrlPathToken +
59                                       std::string("html"));
60
61   features.Clear();
62   ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
63   ExpectFeatureMapsAreEqual(features, expected_features);
64
65   url = "http://justadomain.com/";
66   expected_features.Clear();
67   expected_features.AddBooleanFeature(features::kUrlTldToken +
68                                       std::string("com"));
69   expected_features.AddBooleanFeature(features::kUrlDomainToken +
70                                       std::string("justadomain"));
71
72   features.Clear();
73   ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
74   ExpectFeatureMapsAreEqual(features, expected_features);
75
76   url = "http://witharef.com/#abc";
77   expected_features.Clear();
78   expected_features.AddBooleanFeature(features::kUrlTldToken +
79                                       std::string("com"));
80   expected_features.AddBooleanFeature(features::kUrlDomainToken +
81                                       std::string("witharef"));
82
83   features.Clear();
84   ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
85   ExpectFeatureMapsAreEqual(features, expected_features);
86
87   url = "http://...www..lotsodots....com./";
88   expected_features.Clear();
89   expected_features.AddBooleanFeature(features::kUrlTldToken +
90                                       std::string("com"));
91   expected_features.AddBooleanFeature(features::kUrlDomainToken +
92                                       std::string("lotsodots"));
93   expected_features.AddBooleanFeature(features::kUrlOtherHostToken +
94                                       std::string("www"));
95
96   features.Clear();
97   ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
98   ExpectFeatureMapsAreEqual(features, expected_features);
99
100   url = "http://unrecognized.tld/";
101   EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
102
103   url = "http://com/123";
104   EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
105
106   url = "http://.co.uk/";
107   EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
108
109   url = "file:///nohost.txt";
110   EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
111
112   url = "not:valid:at:all";
113   EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
114 }
115
116 TEST_F(PhishingUrlFeatureExtractorTest, SplitStringIntoLongAlphanumTokens) {
117   std::string full = "This.is/a_pretty\\unusual-!path,indeed";
118   std::vector<std::string> long_tokens;
119   SplitStringIntoLongAlphanumTokens(full, &long_tokens);
120   EXPECT_THAT(long_tokens,
121               ElementsAre("This", "pretty", "unusual", "path", "indeed"));
122
123   long_tokens.clear();
124   full = "...i-am_re/al&ly\\b,r,o|k=e:n///up%20";
125   SplitStringIntoLongAlphanumTokens(full, &long_tokens);
126   EXPECT_THAT(long_tokens, ElementsAre());
127 }
128
129 }  // namespace safe_browsing