2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18 #include <gtest/gtest.h>
19 #include "api/memory.hpp"
20 #include <api/input_layout.hpp>
21 #include "api/convolution.hpp"
22 #include "api/eltwise.hpp"
23 #include "api/reorder.hpp"
24 #include <api/topology.hpp>
25 #include <api/network.hpp>
26 #include <api/engine.hpp>
27 #include "test_utils/test_utils.h"
28 #include <api/data.hpp>
30 #include <api_extension/fused_conv_eltwise.hpp>
34 #include <gmock/gmock.h>
37 using namespace cldnn;
38 using namespace tests;
39 using namespace testing;
41 TEST(fused_conv_eltwise, basic_0)
43 const auto& engine = get_test_engine();
45 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 4, 5 } });
46 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
49 1.0f, 2.0f, -15.f, 3.0f, 4.0f, -15.f, 5.0f, 6.0f, -15.f, 7.0f,
50 -15.f, 0.0f, 0.0f, -15.f, 0.5f, -0.5f, -15.f, 8.0f, 1.5f, 5.2f
54 input_layout("input", input.get_layout()),
55 data("weights", weights),
56 convolution("conv", "input", { "weights" }),
57 eltwise("eltwise", "input", "conv", eltwise_mode::sum),
58 reorder("out", "eltwise", format::bfyx, data_types::f32));
61 opt.set_option(build_option::optimize_data(true));
62 network network(engine, topology, opt);
63 network.set_input_data("input", input);
65 auto outputs = network.execute();
66 EXPECT_EQ(outputs.size(), size_t(1));
67 EXPECT_EQ(outputs.begin()->first, "out");
69 auto output = outputs.begin()->second.get_memory();
70 auto&& out_layout = output.get_layout();
72 EXPECT_EQ(out_layout.format, format::bfyx);
73 EXPECT_EQ(out_layout.size.batch[0], 1);
74 EXPECT_EQ(out_layout.size.feature[0], 1);
75 EXPECT_EQ(out_layout.size.spatial[0], 4);
76 EXPECT_EQ(out_layout.size.spatial[1], 5);
79 TEST(fused_conv_eltwise, dont_fuse_if_conv_elt_are_outputs)
81 const auto& engine = get_test_engine();
83 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 5 } });
84 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
87 1.0f, 2.0f, -15.f, 3.0f, 4.0f, -15.f, 5.0f, 6.0f, -15.f, 7.0f,
88 -15.f, 0.0f, 0.0f, -15.f, 0.5f, -0.5f, -15.f, 8.0f, 1.5f, 5.2f
92 input_layout("input", input.get_layout()),
93 data("weights", weights),
94 convolution("conv", "input", { "weights" }),
95 eltwise("out", "input", "conv", eltwise_mode::sum));
98 opt.set_option(build_option::optimize_data(true));
99 network network(engine, topology, opt);
100 network.set_input_data("input", input);
102 auto outputs = network.execute();
103 EXPECT_EQ(outputs.size(), size_t(1));
104 EXPECT_EQ(outputs.begin()->first, "out");
106 auto output = outputs.begin()->second.get_memory();
107 auto&& out_layout = output.get_layout();
109 EXPECT_EQ(out_layout.format, format::bfyx);
110 EXPECT_EQ(out_layout.size.batch[0], 1);
111 EXPECT_EQ(out_layout.size.feature[0], 1);
112 EXPECT_EQ(out_layout.size.spatial[0], 4);
113 EXPECT_EQ(out_layout.size.spatial[1], 5);
116 template<typename InputTy,
118 class FusedConvTest : public testing::Test
121 static constexpr bool is_pure_float = std::is_same<InputTy, float>::value;
122 using OutputPreActivationTy = typename std::conditional<is_pure_float, float, int32_t>::type;
123 using WeightsTy = typename std::conditional<is_pure_float, float, int8_t>::type;
124 using BiasesTy = typename std::conditional<is_pure_float, float, int32_t>::type;
126 topology the_topology;
128 std::vector<InputTy> input_values;
129 std::vector<WeightsTy> weights_values;
130 std::vector<BiasesTy> biases_values;
131 // Note, not all of the quantization/calibration factors are used in all the
132 // tests. However, I didn't come up with a way to correctly reflect that
133 // while unifying the boileplate testing code.
134 static constexpr float ignore = std::numeric_limits<float>::quiet_NaN();
135 std::vector<float> input_quant_factors_values;
136 std::vector<float> calibration_values;
139 std::vector<InputTy> non_conv_input_values;
140 std::vector<float> eltw_output_calibration_values;
141 std::vector<OutputPreActivationTy> output_pre_relu;
143 void add_feature(std::vector<InputTy> input,
144 std::vector<WeightsTy> weights,
146 float input_quant_factor,
147 float conv_calibration,
148 std::vector<InputTy> non_conv_input,
149 float eltw_output_calibration,
150 std::vector<OutputPreActivationTy> output)
152 assert(non_conv_input.size() == output.size());
153 input_values.insert(input_values.end(), input.begin(), input.end());
154 weights_values.insert(
155 weights_values.end(), weights.begin(), weights.end());
156 biases_values.push_back(bias);
157 input_quant_factors_values.push_back(input_quant_factor);
158 calibration_values.push_back(conv_calibration);
159 non_conv_input_values.insert(non_conv_input_values.end(),
160 non_conv_input.begin(),
161 non_conv_input.end());
162 eltw_output_calibration_values.push_back(eltw_output_calibration);
163 output_pre_relu.insert(
164 output_pre_relu.end(), output.begin(), output.end());
167 void do_test(const fused_conv_eltwise& fused_prim)
169 const auto& engine = get_test_engine();
171 int n_features = static_cast<int>(biases_values.size());
173 auto input_shape = tensor(1, n_features, 4, 1);
174 auto weights_shape = tensor(n_features, n_features, 3, 1);
175 auto biases_shape = tensor(1, n_features, 1, 1);
176 auto sum_input_shape = tensor(1, n_features, 2, 1);
178 auto input = memory::allocate(
180 {type_to_data_type<InputTy>::value, format::bfyx, input_shape});
181 auto weights = memory::allocate(
183 {type_to_data_type<WeightsTy>::value, format::bfyx, weights_shape});
185 auto biases = memory::allocate(
187 {type_to_data_type<BiasesTy>::value, format::bfyx, biases_shape});
188 auto input_quant_factors = memory::allocate(
189 engine, {data_types::f32, format::bfyx, biases_shape});
190 auto conv_output_calibration = memory::allocate(
191 engine, {data_types::f32, format::bfyx, biases_shape});
192 auto sum_input = memory::allocate(
194 {type_to_data_type<InputTy>::value, format::bfyx, sum_input_shape});
195 auto eltw_output_calibration = memory::allocate(
196 engine, {data_types::f32, format::bfyx, biases_shape});
198 set_values(input, input_values);
199 std::vector<WeightsTy> post_processed_weights_values(n_features
201 for (int output_feature = 0; output_feature < n_features; ++output_feature)
202 for (int input_feature = 0; input_feature < n_features;
204 for (int x = 0; x < 3; ++x)
207 output_feature * n_features * 3 + input_feature * 3 + x;
208 if (input_feature == output_feature)
209 post_processed_weights_values[idx] =
210 weights_values[input_feature * 3 + x];
212 post_processed_weights_values[idx] = 0;
214 set_values(weights, post_processed_weights_values);
215 set_values(biases, biases_values);
216 set_values(input_quant_factors, input_quant_factors_values);
217 set_values(conv_output_calibration, calibration_values);
218 set_values(sum_input, non_conv_input_values);
219 set_values(eltw_output_calibration, eltw_output_calibration_values);
221 the_topology.add(input_layout("input", input.get_layout()));
222 the_topology.add(data("weights", weights));
223 the_topology.add(data("biases", biases));
224 the_topology.add(data("sum_input", sum_input));
225 the_topology.add(data("input_quant_factors", input_quant_factors));
226 the_topology.add(data("conv_output_calibration", conv_output_calibration));
227 the_topology.add(data("eltw_output_calibration", eltw_output_calibration));
228 the_topology.add(fused_prim);
231 opts.set_option(build_option::optimize_data(false));
233 network network(engine, the_topology, opts);
234 network.set_input_data("input", input);
236 auto outputs = network.execute();
238 auto output_memory = outputs.at("fused_conv").get_memory();
239 auto output_layout = output_memory.get_layout();
240 auto output_ptr = output_memory.pointer<OutputTy>();
241 int y_size = output_layout.size.spatial[1];
242 int x_size = output_layout.size.spatial[0];
243 int f_size = output_layout.size.feature[0];
244 int b_size = output_layout.size.batch[0];
245 EXPECT_EQ(output_layout.format, format::bfyx);
246 EXPECT_EQ(y_size, 1);
247 EXPECT_EQ(x_size, 2);
248 EXPECT_EQ(f_size, n_features);
249 EXPECT_EQ(b_size, 1);
251 for (int f = 0; f < f_size; f++)
252 for (int x = 0; x < x_size; ++x)
254 // printf("f: %d, x: %d\n", f, x);
255 OutputPreActivationTy expected =
256 pre_relu_to_output(output_pre_relu[f * x_size + x]);
257 auto actual = static_cast<OutputPreActivationTy>(
258 output_ptr[f * x_size + x]);
259 expect_eq(expected, actual);
264 template<typename T = OutputPreActivationTy>
265 static typename std::enable_if<std::is_floating_point<T>::value>::type
266 expect_eq(const OutputPreActivationTy& lhs, const OutputPreActivationTy& rhs)
268 EXPECT_NEAR(lhs, rhs, 0.001f);
271 template<typename T = OutputPreActivationTy>
272 static typename std::enable_if<std::is_integral<T>::value>::type
273 expect_eq(const OutputPreActivationTy& lhs, const OutputPreActivationTy& rhs)
278 template <typename T>
279 static T pre_relu_to_output(T pre_relu) {
280 // No std::clamp before C++17 :(
282 static_cast<T>(std::numeric_limits<OutputTy>::max()),
283 std::max(static_cast<T>(std::numeric_limits<OutputTy>::lowest()),
284 std::max(static_cast<T>(0), pre_relu)));
288 class FusedConvTest_all_float : public FusedConvTest<float, float>
291 TEST_F(FusedConvTest_all_float, basic) {
292 add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
293 {2.0f, 0.0f, 1.0f}, // weights
295 1.0f, // conv_input_quant
296 1.0f, // conv_output_calibration
297 {-10.0f, -10.0f}, // non_conv_input
298 1.0f, // eltw_output_calibration
299 {241.0f, 242.0f}); // output_pre_relu
301 add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
302 {2.0f, 0.0f, 1.0f}, // weights
304 1.0f, // conv_input_quant
305 1.0f, // conv_output_calibration
306 {-10.0f, -11.0f}, // non_conv_input
307 2.0f, // eltw_output_calibration
308 {480.0f, 480.0f}); // output_pre_relu
310 do_test(fused_conv_eltwise("fused_conv",
316 {"input_quant_factors"},
317 {"conv_output_calibration"},
318 1.0f, // conv_i_quantization_factor
319 1.0f, // non_conv_scale
320 "eltw_output_calibration",
321 {{1, 1, 1, 1}}, // eltw_stride
322 {1, 1, 1, 1}, // stride
323 {0, 0, 0, 0}, // input_offset
324 {1, 1, 1, 1}, // dilation
325 false, // conv_with_activation
326 0.0f, // con_activation_slp
327 true, // eltw_activation
328 0.0f)); // eltw_activation_slp
331 class FusedConvTest_no_conv_calibration : public FusedConvTest<float, float>
334 TEST_F(FusedConvTest_no_conv_calibration, basic) {
335 // That might happen if both conv output and non-conv input happen to be
336 // normalized to the same dynamic range of if tensor-wise (instead of
337 // per-channel) calibration is used. Also, a similar thing might happen for
338 // a convolution with calibration without quantization (which is the real
339 // target of this test, needed for the Inference Engine).
341 // add_feature contains data for conv quantization/calibration, but the
342 // primitive won't use it. It's just much easier to unify different tests
344 add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
345 {2.0f, 0.0f, 1.0f}, // weights
347 1.0f, // conv_input_quant
348 ignore, // conv_output_calibration
349 {-10.0f, -10.0f}, // non_conv_input
350 1.0f, // eltw_output_calibration
351 {241.0f, 242.0f}); // output_pre_relu
353 add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
354 {2.0f, 0.0f, 1.0f}, // weights
356 1.0f, // conv_input_quant
357 ignore, // conv_output_calibration
358 {-10.0f, -11.0f}, // non_conv_input
359 2.0f, // eltw_output_calibration
360 {480.0f, 480.0f}); // output_pre_relu
362 do_test(fused_conv_eltwise("fused_conv",
368 {"input_quant_factors"},
369 {}, // conv_output_calibration
370 1.0f, // conv_i_quantization_factor
371 1.0f, // non_conv_scale
372 "eltw_output_calibration",
373 {{1, 1, 1, 1}}, // eltw_stride
374 {1, 1, 1, 1}, // stride
375 {0, 0, 0, 0}, // input_offset
376 {1, 1, 1, 1}, // dilation
377 false, // conv_with_activation
378 0.0f, // con_activation_slp
379 true, // eltw_activation
380 0.0f)); // eltw_activation_slp
383 class FusedConvTest_non_conv_scale_per_primitive : public FusedConvTest<int8_t, int8_t>
386 TEST_F(FusedConvTest_non_conv_scale_per_primitive, basic) {
387 // NOTE: The data in add_feature calls implicitly assumes this!
388 const float non_conv_scale = 2.0f; // TODO: Need per-channel too?
390 // Check that the output precision is `u8` indeed. If it was not, than 251
391 // would eighter be rounded to 250 or 252. Ensure it's not the case and the
392 // outputs actually differ.
393 add_feature({125, 125, 0, 1}, {2, 0, 1}, 1, 1.0f, ignore, {-10, -10}, 1.0f, {231, 232});
394 add_feature({125, 125, 0, 1}, {2, 0, 1}, 0, 1.0f, ignore, {-10, -10}, 1.0f, {230, 231});
396 // Verify that activation is done before the final calibration+type
397 // conversion (in other words, in higher precision than the output).
398 add_feature({0, 50, 0, -50}, {0, 4, 4}, 1, 1.0f, ignore, {-10, -10}, 1.0f, {181, -219});
399 add_feature({0, 50, 0, -50}, {0, 4, 4}, 1, 1.0f, ignore, {-5, -5}, 1.0f, {191, -209});
401 // Same but with non-unit calibration (just in case).
402 add_feature({0, 50, 0, -50}, {0, 8, 8}, 2, 1.0f, ignore, {10, 10}, 0.5f, {211, -189});
404 do_test(fused_conv_eltwise("fused_conv",
410 {"input_quant_factors"},
411 {}, // conv_output_calibration
412 1.0f, // conv_i_quantization_factor
413 non_conv_scale, // non_conv_scale
414 "eltw_output_calibration",
415 {{1, 1, 1, 1}}, // eltw_stride
416 {1, 1, 1, 1}, // stride
417 {0, 0, 0, 0}, // input_offset
418 {1, 1, 1, 1}, // dilation
419 false, // conv_with_activation
420 0.0f, // con_activation_slp
421 true, // eltw_activation
422 0.0f)); // eltw_activation_slp
425 class FusedConvTest_i8_to_u8_quantized : public FusedConvTest<int8_t, uint8_t>
428 TEST_F(FusedConvTest_i8_to_u8_quantized, basic) {
429 add_feature({125, 125, 0, 1}, {2, 0, 1}, 1, ignore, ignore, {-10, -10}, 1, {241, 242});
430 add_feature({125, 125, 0, 1}, {2, 0, 1}, 0, ignore, ignore, {-10, -11}, 2, {480, 480});
432 do_test(fused_conv_eltwise("fused_conv",
438 {}, // input_quant_factors
439 {}, // conv_output_calibration
440 1.0f, // conv_i_quantization_factor
441 1.0f, // non_conv_scale
442 "eltw_output_calibration",
443 std::vector<tensor>{tensor{1, 1, 1, 1}}, // eltw_stride
444 tensor{1, 1, 1, 1}, // stride
445 tensor{0, 0, 0, 0}, // input_offset
446 tensor{1, 1, 1, 1}, // dilation
447 false, // conv_with_activation
448 0.0f, // con_activation_slp
449 true, // eltw_activation
450 0.0f, // eltw_activation_slp
452 optional_data_type{data_types::u8}));
455 class FusedConvTest_i8_to_u8_no_eltw_calibration
456 : public FusedConvTest<int8_t, uint8_t>
459 TEST_F(FusedConvTest_i8_to_u8_no_eltw_calibration, basic) {
460 const float non_conv_scale = 1.0f / 3.0f;
462 add_feature({124, 124, 0, -4}, // input
463 {2, 0, 1}, // weights
465 0.5f, // conv_input_quant
466 ignore, // conv_output_calibration
467 {-60, -60}, // non_conv_input
468 ignore, // eltw_output_calibration
469 {252 / 2 - 20, 248 / 2 - 20}); // output_pre_relu
471 add_feature({3, 3, 1, 1}, // input
472 {2, 0, 1}, // weights
474 1.0f / 3.0f, // conv_input_quant
475 ignore, // conv_output_calibration
476 {1, 1}, // eltw_sum_input
477 ignore, // eltw_output_calibration
478 // TODO: Do we really need that round? Should it be "3" instead?
479 // { round(2.333) + round (0.333) }
480 {2, 2}); // output_pre_relu
482 do_test(fused_conv_eltwise("fused_conv",
488 {"input_quant_factors"},
489 {}, // conv_output_calibration
490 1.0f, // conv_i_quantization_factor
492 {}, // eltw_output_calibration
493 std::vector<tensor>{tensor{1, 1, 1, 1}}, // eltw_stride
494 tensor{1, 1, 1, 1}, // stride
495 tensor{0, 0, 0, 0}, // input_offset
496 tensor{1, 1, 1, 1}, // dilation
497 false, // conv_with_activation
498 0.0f, // con_activation_slp
499 true, // eltw_activation
500 0.0f, // eltw_activation_slp
502 optional_data_type{data_types::u8}));