1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/assist_ranker/example_preprocessing.h"
7 #include "base/strings/string_number_conversions.h"
8 #include "testing/gtest/include/gtest/gtest.h"
9 #include "third_party/protobuf/src/google/protobuf/map.h"
10 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"
12 namespace assist_ranker {
15 using ::google::protobuf::Map;
16 using ::google::protobuf::RepeatedField;
18 void EXPECT_EQUALS_EXAMPLE(const RankerExample& example1,
19 const RankerExample& example2) {
20 EXPECT_EQ(example1.features_size(), example2.features_size());
21 for (const auto& pair : example1.features()) {
22 const Feature& feature1 = pair.second;
23 const Feature& feature2 = example2.features().at(pair.first);
24 EXPECT_EQ(feature1.feature_type_case(), feature2.feature_type_case());
25 EXPECT_EQ(feature1.bool_value(), feature2.bool_value());
26 EXPECT_EQ(feature1.int32_value(), feature2.int32_value());
27 EXPECT_EQ(feature1.float_value(), feature2.float_value());
28 EXPECT_EQ(feature1.string_value(), feature2.string_value());
29 EXPECT_EQ(feature1.string_list().string_value_size(),
30 feature2.string_list().string_value_size());
31 for (int i = 0; i < feature1.string_list().string_value_size(); ++i) {
32 EXPECT_EQ(feature1.string_list().string_value(i),
33 feature2.string_list().string_value(i));
40 class ExamplePreprocessorTest : public ::testing::Test {
42 void SetUp() override {
43 auto& features = *example_.mutable_features();
44 features[bool_name_].set_bool_value(bool_value_);
45 features[int32_name_].set_int32_value(int32_value_);
46 features[float_name_].set_float_value(float_value_);
47 features[one_hot_name_].set_string_value(one_hot_value_);
48 *features[sparse_name_].mutable_string_list()->mutable_string_value() = {
49 sparse_values_.begin(), sparse_values_.end()};
52 RankerExample example_;
53 const std::string bool_name_ = "bool_feature";
54 const bool bool_value_ = true;
55 const std::string int32_name_ = "int32_feature";
56 const int int32_value_ = 2;
57 const std::string float_name_ = "float_feature";
58 const float float_value_ = 3.0;
59 const std::string one_hot_name_ = "one_hot_feature";
60 const std::string elem1_ = "elem1";
61 const std::string elem2_ = "elem2";
62 const std::string one_hot_value_ = elem1_;
63 const std::string sparse_name_ = "sparse_feature";
64 const std::vector<std::string> sparse_values_ = {elem1_, elem2_};
67 TEST_F(ExamplePreprocessorTest, AddMissingFeatures) {
68 RankerExample expected = example_;
69 ExamplePreprocessorConfig config;
71 // Adding missing feature label to an existing feature has no effect.
72 config.add_missing_features(bool_name_);
73 EXPECT_EQ(ExamplePreprocessor(config).Process(&example_),
74 ExamplePreprocessor::kSuccess);
75 EXPECT_EQUALS_EXAMPLE(example_, expected);
78 // Adding missing feature label to non-existing feature returns a
79 // "_MissingFeature" feature with a list of feature names.
80 const std::string foo = "foo";
81 config.add_missing_features(foo);
82 EXPECT_EQ(ExamplePreprocessor(config).Process(&example_),
83 ExamplePreprocessor::kSuccess);
85 .mutable_features())[ExamplePreprocessor::kMissingFeatureDefaultName]
86 .mutable_string_list()
87 ->add_string_value(foo);
88 EXPECT_EQUALS_EXAMPLE(example_, expected);
92 TEST_F(ExamplePreprocessorTest, AddBucketizeFeatures) {
93 RankerExample expected = example_;
94 ExamplePreprocessorConfig config;
95 Map<std::string, ExamplePreprocessorConfig::Boundaries>& bucketizers =
96 *config.mutable_bucketizers();
98 // Adding bucketized feature to non-existing feature returns the same example.
99 const std::string foo = "foo";
100 bucketizers[foo].add_boundaries(0.5);
101 EXPECT_EQ(ExamplePreprocessor(config).Process(&example_),
102 ExamplePreprocessor::kSuccess);
103 EXPECT_EQUALS_EXAMPLE(example_, expected);
106 // Bucketizing a bool feature returns same proto.
107 bucketizers[bool_name_].add_boundaries(0.5);
108 EXPECT_EQ(ExamplePreprocessor(config).Process(&example_),
109 ExamplePreprocessor::kNonbucketizableFeatureType);
110 EXPECT_EQUALS_EXAMPLE(example_, expected);
113 // Bucketizing a string feature returns same proto.
114 bucketizers[one_hot_name_].add_boundaries(0.5);
115 EXPECT_EQ(ExamplePreprocessor(config).Process(&example_),
116 ExamplePreprocessor::kNonbucketizableFeatureType);
117 EXPECT_EQUALS_EXAMPLE(example_, expected);
120 // Bucketizing an int32 feature with 3 boundary.
121 bucketizers[int32_name_].add_boundaries(int32_value_ - 2);
122 bucketizers[int32_name_].add_boundaries(int32_value_ - 1);
123 bucketizers[int32_name_].add_boundaries(int32_value_ + 1);
124 EXPECT_EQ(ExamplePreprocessor(config).Process(&example_),
125 ExamplePreprocessor::kSuccess);
126 (*expected.mutable_features())[int32_name_].set_string_value("2");
127 EXPECT_EQUALS_EXAMPLE(example_, expected);
130 // Bucketizing a float feature with 3 boundary.
131 bucketizers[float_name_].add_boundaries(float_value_ - 0.2);
132 bucketizers[float_name_].add_boundaries(float_value_ - 0.1);
133 bucketizers[float_name_].add_boundaries(float_value_ + 0.1);
134 EXPECT_EQ(ExamplePreprocessor(config).Process(&example_),
135 ExamplePreprocessor::kSuccess);
136 (*expected.mutable_features())[float_name_].set_string_value("2");
137 EXPECT_EQUALS_EXAMPLE(example_, expected);
140 // Bucketizing a float feature with value equal to a boundary.
141 (*example_.mutable_features())[float_name_].set_float_value(float_value_);
142 bucketizers[float_name_].add_boundaries(float_value_ - 0.2);
143 bucketizers[float_name_].add_boundaries(float_value_ - 0.1);
144 bucketizers[float_name_].add_boundaries(float_value_);
145 bucketizers[float_name_].add_boundaries(float_value_ + 0.1);
146 EXPECT_EQ(ExamplePreprocessor(config).Process(&example_),
147 ExamplePreprocessor::kSuccess);
148 (*expected.mutable_features())[float_name_].set_string_value("3");
149 EXPECT_EQUALS_EXAMPLE(example_, expected);
153 // Tests normalization of float and int32 features.
154 TEST_F(ExamplePreprocessorTest, NormalizeFeatures) {
155 RankerExample expected = example_;
156 ExamplePreprocessorConfig config;
157 Map<std::string, float>& normalizers = *config.mutable_normalizers();
158 normalizers[int32_name_] = int32_value_ - 1.0f;
159 normalizers[float_name_] = float_value_ + 1.0f;
161 (*expected.mutable_features())[int32_name_].set_float_value(1.0f);
162 (*expected.mutable_features())[float_name_].set_float_value(
163 float_value_ / (float_value_ + 1.0f));
165 EXPECT_EQ(ExamplePreprocessor(config).Process(&example_),
166 ExamplePreprocessor::kSuccess);
167 EXPECT_EQUALS_EXAMPLE(example_, expected);
169 // Zero normalizer returns an error.
170 normalizers[float_name_] = 0.0f;
171 EXPECT_EQ(ExamplePreprocessor(config).Process(&example_),
172 ExamplePreprocessor::kNormalizerIsZero);
175 // Zero normalizer returns an error.
176 TEST_F(ExamplePreprocessorTest, ZeroNormalizerReturnsError) {
177 RankerExample expected = example_;
178 ExamplePreprocessorConfig config;
179 (*config.mutable_normalizers())[float_name_] = 0.0f;
180 EXPECT_EQ(ExamplePreprocessor(config).Process(&example_),
181 ExamplePreprocessor::kNormalizerIsZero);
184 // Tests converts a bool or int32 feature to a string feature.
185 TEST_F(ExamplePreprocessorTest, ConvertToStringFeatures) {
186 RankerExample expected = example_;
187 ExamplePreprocessorConfig config;
188 auto& features_list = *config.mutable_convert_to_string_features();
189 *features_list.Add() = bool_name_;
190 *features_list.Add() = int32_name_;
191 *features_list.Add() = one_hot_name_;
193 EXPECT_EQ(ExamplePreprocessor(config).Process(&example_),
194 ExamplePreprocessor::kSuccess);
196 (*expected.mutable_features())[bool_name_].set_string_value(
197 base::IntToString(static_cast<int>(bool_value_)));
198 (*expected.mutable_features())[int32_name_].set_string_value(
199 base::IntToString(int32_value_));
200 EXPECT_EQUALS_EXAMPLE(example_, expected);
203 // Float features can't be convert to string features.
204 TEST_F(ExamplePreprocessorTest,
205 ConvertFloatFeatureToStringFeatureReturnsError) {
206 RankerExample expected = example_;
207 ExamplePreprocessorConfig config;
208 config.add_convert_to_string_features(float_name_);
209 EXPECT_EQ(ExamplePreprocessor(config).Process(&example_),
210 ExamplePreprocessor::kNonConvertibleToStringFeatureType);
213 TEST_F(ExamplePreprocessorTest, Vectorization) {
214 ExamplePreprocessorConfig config;
215 Map<std::string, int32_t>& feature_indices =
216 *config.mutable_feature_indices();
218 RankerExample example_vec_expected = example_;
219 RepeatedField<float>& feature_vector =
220 *(*example_vec_expected.mutable_features())
221 [ExamplePreprocessor::kVectorizedFeatureDefaultName]
222 .mutable_float_list()
223 ->mutable_float_value();
225 // bool feature puts the value to the corresponding place.
226 feature_indices[bool_name_] = 0;
227 feature_vector.Add(1.0);
229 // int32 feature puts the value to the corresponding place.
230 feature_indices[int32_name_] = 1;
231 feature_vector.Add(int32_value_);
233 // float feature puts the value to the corresponding place.
234 feature_indices[float_name_] = 2;
235 feature_vector.Add(float_value_);
237 // string value is vectorized as 1.0.
238 feature_indices[ExamplePreprocessor::FeatureFullname(one_hot_name_,
239 one_hot_value_)] = 3;
240 feature_vector.Add(1.0);
242 // string list value is vectorized as 1.0.
243 feature_indices[ExamplePreprocessor::FeatureFullname(sparse_name_, elem1_)] =
245 feature_indices[ExamplePreprocessor::FeatureFullname(sparse_name_, elem2_)] =
247 feature_vector.Add(1.0);
248 feature_vector.Add(1.0);
250 // string list value with element not in the example sets the corresponding
252 feature_indices[ExamplePreprocessor::FeatureFullname(sparse_name_, "foo")] =
254 feature_vector.Add(0.0);
256 // Non-existing feature puts 0 to the corresponding place.
257 feature_indices["bar"] = 6;
258 feature_vector.Add(0.0);
260 // Verify the propressing result.
261 RankerExample example = example_;
262 EXPECT_EQ(ExamplePreprocessor(config).Process(&example),
263 ExamplePreprocessor::kSuccess);
264 EXPECT_EQUALS_EXAMPLE(example, example_vec_expected);
266 // Example with extra numeric feature gets kNoFeatureIndexFound error;
267 RankerExample example_with_extra_numeric = example_;
268 (*example_with_extra_numeric.mutable_features())["foo"].set_float_value(1.0);
269 EXPECT_EQ(ExamplePreprocessor(config).Process(&example_with_extra_numeric),
270 ExamplePreprocessor::ExamplePreprocessor::kNoFeatureIndexFound);
272 // Example with extra one-hot feature gets kNoFeatureIndexFound error;
273 RankerExample example_with_extra_one_hot = example_;
274 (*example_with_extra_one_hot.mutable_features())["foo"].set_string_value(
276 EXPECT_EQ(ExamplePreprocessor(config).Process(&example_with_extra_one_hot),
277 ExamplePreprocessor::ExamplePreprocessor::kNoFeatureIndexFound);
279 // Example with extra sparse feature value gets kNoFeatureIndexFound error;
280 RankerExample example_with_extra_sparse = example_;
281 (*example_with_extra_sparse.mutable_features())[sparse_name_]
282 .mutable_string_list()
283 ->add_string_value("bar");
284 EXPECT_EQ(ExamplePreprocessor(config).Process(&example_with_extra_sparse),
285 ExamplePreprocessor::ExamplePreprocessor::kNoFeatureIndexFound);
288 TEST_F(ExamplePreprocessorTest, MultipleErrorCode) {
289 ExamplePreprocessorConfig config;
291 (*config.mutable_feature_indices())[int32_name_] = 0;
292 (*config.mutable_feature_indices())[float_name_] = 1;
293 (*config.mutable_bucketizers())[one_hot_name_].add_boundaries(0.5);
294 RankerExample example_vec_expected = example_;
295 RepeatedField<float>& feature_vector =
296 *(*example_vec_expected.mutable_features())
297 [ExamplePreprocessor::kVectorizedFeatureDefaultName]
298 .mutable_float_list()
299 ->mutable_float_value();
301 feature_vector.Add(int32_value_);
302 feature_vector.Add(float_value_);
304 const int error_code = ExamplePreprocessor(config).Process(&example_);
305 // Error code contains features in example_ but not in feature_indices.
306 EXPECT_TRUE(error_code & ExamplePreprocessor::kNoFeatureIndexFound);
307 // Error code contains features that are not bucketizable.
308 EXPECT_TRUE(error_code & ExamplePreprocessor::kNonbucketizableFeatureType);
309 // No kInvalidFeatureType error.
310 EXPECT_FALSE(error_code & ExamplePreprocessor::kInvalidFeatureType);
311 // Only two elements is correctly vectorized.
312 EXPECT_EQUALS_EXAMPLE(example_, example_vec_expected);
315 TEST_F(ExamplePreprocessorTest, ExampleFloatIterator) {
316 RankerExample float_example;
317 for (const auto& field : ExampleFloatIterator(example_)) {
318 EXPECT_EQ(field.error, ExamplePreprocessor::kSuccess);
319 (*float_example.mutable_features())[field.fullname].set_float_value(
323 RankerExample float_example_expected;
324 auto& feature_map = *float_example_expected.mutable_features();
326 feature_map[bool_name_].set_float_value(bool_value_);
327 feature_map[int32_name_].set_float_value(int32_value_);
328 feature_map[float_name_].set_float_value(float_value_);
329 feature_map[ExamplePreprocessor::FeatureFullname(one_hot_name_,
331 .set_float_value(1.0);
332 feature_map[ExamplePreprocessor::FeatureFullname(sparse_name_, elem1_)]
333 .set_float_value(1.0);
334 feature_map[ExamplePreprocessor::FeatureFullname(sparse_name_, elem2_)]
335 .set_float_value(1.0);
337 EXPECT_EQUALS_EXAMPLE(float_example, float_example_expected);
340 TEST_F(ExamplePreprocessorTest, ExampleFloatIteratorError) {
341 RankerExample example;
342 example.mutable_features()->insert({"foo", Feature::default_instance()});
343 (*example.mutable_features())["bar"]
344 .mutable_string_list()
345 ->mutable_string_value();
346 int num_of_fields = 0;
347 for (const auto& field : ExampleFloatIterator(example)) {
348 if (field.fullname == "foo") {
349 EXPECT_EQ(field.error, ExamplePreprocessor::kInvalidFeatureType);
351 if (field.fullname == "bar") {
352 EXPECT_EQ(field.error, ExamplePreprocessor::kInvalidFeatureListIndex);
356 // Check the iterator indeed found the two fields.
357 EXPECT_EQ(num_of_fields, 2);
360 } // namespace assist_ranker