Upstream version 9.38.198.0
[platform/framework/web/crosswalk.git] / src / chrome / renderer / safe_browsing / scorer_unittest.cc
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/renderer/safe_browsing/scorer.h"
6
7 #include "base/containers/hash_tables.h"
8 #include "base/files/file_path.h"
9 #include "base/files/scoped_temp_dir.h"
10 #include "base/format_macros.h"
11 #include "base/memory/scoped_ptr.h"
12 #include "base/message_loop/message_loop.h"
13 #include "base/threading/thread.h"
14 #include "chrome/common/safe_browsing/client_model.pb.h"
15 #include "chrome/renderer/safe_browsing/features.h"
16 #include "testing/gmock/include/gmock/gmock.h"
17 #include "testing/gtest/include/gtest/gtest.h"
18
19 namespace safe_browsing {
20
21 class PhishingScorerTest : public ::testing::Test {
22  protected:
23   virtual void SetUp() {
24     // Setup a simple model.  Note that the scorer does not care about
25     // how features are encoded so we use readable strings here to make
26     // the test simpler to follow.
27     model_.Clear();
28     model_.add_hashes("feature1");
29     model_.add_hashes("feature2");
30     model_.add_hashes("feature3");
31     model_.add_hashes("token one");
32     model_.add_hashes("token two");
33
34     ClientSideModel::Rule* rule;
35     rule = model_.add_rule();
36     rule->set_weight(0.5);
37
38     rule = model_.add_rule();
39     rule->add_feature(0);  // feature1
40     rule->set_weight(2.0);
41
42     rule = model_.add_rule();
43     rule->add_feature(0);  // feature1
44     rule->add_feature(1);  // feature2
45     rule->set_weight(3.0);
46
47     model_.add_page_term(3);  // token one
48     model_.add_page_term(4);  // token two
49
50     // These will be murmur3 hashes, but for this test it's not necessary
51     // that the hashes correspond to actual words.
52     model_.add_page_word(1000U);
53     model_.add_page_word(2000U);
54     model_.add_page_word(3000U);
55
56     model_.set_max_words_per_term(2);
57     model_.set_murmur_hash_seed(12345U);
58     model_.set_max_shingles_per_page(10);
59     model_.set_shingle_size(3);
60   }
61
62   ClientSideModel model_;
63 };
64
65 TEST_F(PhishingScorerTest, HasValidModel) {
66   scoped_ptr<Scorer> scorer;
67   scorer.reset(Scorer::Create(model_.SerializeAsString()));
68   EXPECT_TRUE(scorer.get() != NULL);
69
70   // Invalid model string.
71   scorer.reset(Scorer::Create("bogus string"));
72   EXPECT_FALSE(scorer.get());
73
74   // Mode is missing a required field.
75   model_.clear_max_words_per_term();
76   scorer.reset(Scorer::Create(model_.SerializePartialAsString()));
77   EXPECT_FALSE(scorer.get());
78 }
79
80 TEST_F(PhishingScorerTest, PageTerms) {
81   scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));
82   ASSERT_TRUE(scorer.get());
83
84   // Use std::vector instead of base::hash_set for comparison.
85   // On Android, EXPECT_THAT(..., ContainerEq(...)) doesn't support
86   // std::hash_set, but std::vector works fine.
87   std::vector<std::string> expected_page_terms;
88   expected_page_terms.push_back("token one");
89   expected_page_terms.push_back("token two");
90   std::sort(expected_page_terms.begin(), expected_page_terms.end());
91
92   base::hash_set<std::string> page_terms = scorer->page_terms();
93   std::vector<std::string> page_terms_v(page_terms.begin(), page_terms.end());
94   std::sort(page_terms_v.begin(), page_terms_v.end());
95
96   EXPECT_THAT(page_terms_v, ::testing::ContainerEq(expected_page_terms));
97 }
98
99 TEST_F(PhishingScorerTest, PageWords) {
100   scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));
101   ASSERT_TRUE(scorer.get());
102   std::vector<uint32> expected_page_words;
103   expected_page_words.push_back(1000U);
104   expected_page_words.push_back(2000U);
105   expected_page_words.push_back(3000U);
106   std::sort(expected_page_words.begin(), expected_page_words.end());
107
108   base::hash_set<uint32> page_words = scorer->page_words();
109   std::vector<uint32> page_words_v(page_words.begin(), page_words.end());
110   std::sort(page_words_v.begin(), page_words_v.end());
111
112   EXPECT_THAT(page_words_v, ::testing::ContainerEq(expected_page_words));
113
114   EXPECT_EQ(2U, scorer->max_words_per_term());
115   EXPECT_EQ(12345U, scorer->murmurhash3_seed());
116   EXPECT_EQ(10U, scorer->max_shingles_per_page());
117   EXPECT_EQ(3U, scorer->shingle_size());
118 }
119
120 TEST_F(PhishingScorerTest, ComputeScore) {
121   scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));
122   ASSERT_TRUE(scorer.get());
123
124   // An empty feature map should match the empty rule.
125   FeatureMap features;
126   // The expected logodds is 0.5 (empty rule) => p = exp(0.5) / (exp(0.5) + 1)
127   // => 0.62245933120185459
128   EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features));
129   // Same if the feature does not match any rule.
130   EXPECT_TRUE(features.AddBooleanFeature("not existing feature"));
131   EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features));
132
133   // Feature 1 matches which means that the logodds will be:
134   //   0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) = 0.8
135   //   => p = 0.6899744811276125
136   EXPECT_TRUE(features.AddRealFeature("feature1", 0.15));
137   EXPECT_DOUBLE_EQ(0.6899744811276125, scorer->ComputeScore(features));
138
139   // Now, both feature 1 and feature 2 match.  Expected logodds:
140   //   0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) +
141   //   3.0 (rule weight) * 0.15 (feature1 weight) * 1.0 (feature2) weight = 9.8
142   //   => p = 0.99999627336071584
143   EXPECT_TRUE(features.AddBooleanFeature("feature2"));
144   EXPECT_DOUBLE_EQ(0.77729986117469119, scorer->ComputeScore(features));
145 }
146 }  // namespace safe_browsing