2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18 #include <gtest/gtest.h>
19 #include "api/CPP/memory.hpp"
20 #include <api/CPP/input_layout.hpp>
21 #include "api/CPP/convolution.hpp"
22 #include "api/CPP/eltwise.hpp"
23 #include "api/CPP/reorder.hpp"
24 #include <api/CPP/topology.hpp>
25 #include <api/CPP/network.hpp>
26 #include <api/CPP/engine.hpp>
27 #include "test_utils/test_utils.h"
28 #include <api/CPP/data.hpp>
30 #include <api_extension/CPP/fused_conv_eltwise.hpp>
34 #include <gmock/gmock.h>
37 using namespace cldnn;
38 using namespace tests;
39 using namespace testing;
41 TEST(fused_conv_eltwise, basic_0)
43 const auto& engine = get_test_engine();
45 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 4, 5 } });
46 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
49 1.0f, 2.0f, -15.f, 3.0f, 4.0f, -15.f, 5.0f, 6.0f, -15.f, 7.0f,
50 -15.f, 0.0f, 0.0f, -15.f, 0.5f, -0.5f, -15.f, 8.0f, 1.5f, 5.2f
54 input_layout("input", input.get_layout()),
55 data("weights", weights),
56 convolution("conv", "input", { "weights" }),
57 eltwise("eltwise", "input", "conv", eltwise_mode::sum),
58 reorder("out", "eltwise", format::bfyx, data_types::f32));
61 opt.set_option(build_option::optimize_data(true));
62 network network(engine, topology, opt);
63 network.set_input_data("input", input);
65 auto outputs = network.execute();
66 EXPECT_EQ(outputs.size(), size_t(1));
67 EXPECT_EQ(outputs.begin()->first, "out");
69 auto output = outputs.begin()->second.get_memory();
70 auto&& out_layout = output.get_layout();
72 EXPECT_EQ(out_layout.format, format::bfyx);
73 EXPECT_EQ(out_layout.size.batch[0], 1);
74 EXPECT_EQ(out_layout.size.feature[0], 1);
75 EXPECT_EQ(out_layout.size.spatial[0], 4);
76 EXPECT_EQ(out_layout.size.spatial[1], 5);
80 TEST(fused_conv_eltwise, dont_fuse_if_conv_elt_are_outputs)
82 const auto& engine = get_test_engine();
84 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 5 } });
85 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
88 1.0f, 2.0f, -15.f, 3.0f, 4.0f, -15.f, 5.0f, 6.0f, -15.f, 7.0f,
89 -15.f, 0.0f, 0.0f, -15.f, 0.5f, -0.5f, -15.f, 8.0f, 1.5f, 5.2f
93 input_layout("input", input.get_layout()),
94 data("weights", weights),
95 convolution("conv", "input", { "weights" }),
96 eltwise("out", "input", "conv", eltwise_mode::sum));
99 opt.set_option(build_option::optimize_data(true));
100 network network(engine, topology, opt);
101 network.set_input_data("input", input);
103 auto outputs = network.execute();
104 EXPECT_EQ(outputs.size(), size_t(1));
105 EXPECT_EQ(outputs.begin()->first, "out");
107 auto output = outputs.begin()->second.get_memory();
108 auto&& out_layout = output.get_layout();
110 EXPECT_EQ(out_layout.format, format::bfyx);
111 EXPECT_EQ(out_layout.size.batch[0], 1);
112 EXPECT_EQ(out_layout.size.feature[0], 1);
113 EXPECT_EQ(out_layout.size.spatial[0], 4);
114 EXPECT_EQ(out_layout.size.spatial[1], 5);
117 template<typename InputTy,
119 class FusedConvTest : public testing::Test
122 static constexpr bool is_pure_float = std::is_same<InputTy, float>::value;
123 using OutputPreActivationTy = typename std::conditional<is_pure_float, float, int32_t>::type;
124 using WeightsTy = typename std::conditional<is_pure_float, float, int8_t>::type;
125 using BiasesTy = typename std::conditional<is_pure_float, float, int32_t>::type;
127 topology the_topology;
129 std::vector<InputTy> input_values;
130 std::vector<WeightsTy> weights_values;
131 std::vector<BiasesTy> biases_values;
132 // Note, not all of the quantization/calibration factors are used in all the
133 // tests. However, I didn't come up with a way to correctly reflect that
134 // while unifying the boileplate testing code.
135 static constexpr float ignore = std::numeric_limits<float>::quiet_NaN();
136 std::vector<float> input_quant_factors_values;
137 std::vector<float> calibration_values;
140 std::vector<InputTy> non_conv_input_values;
141 std::vector<float> eltw_output_calibration_values;
142 std::vector<OutputPreActivationTy> output_pre_relu;
144 void add_feature(std::vector<InputTy> input,
145 std::vector<WeightsTy> weights,
147 float input_quant_factor,
148 float conv_calibration,
149 std::vector<InputTy> non_conv_input,
150 float eltw_output_calibration,
151 std::vector<OutputPreActivationTy> output)
153 assert(non_conv_input.size() == output.size());
154 input_values.insert(input_values.end(), input.begin(), input.end());
155 weights_values.insert(
156 weights_values.end(), weights.begin(), weights.end());
157 biases_values.push_back(bias);
158 input_quant_factors_values.push_back(input_quant_factor);
159 calibration_values.push_back(conv_calibration);
160 non_conv_input_values.insert(non_conv_input_values.end(),
161 non_conv_input.begin(),
162 non_conv_input.end());
163 eltw_output_calibration_values.push_back(eltw_output_calibration);
164 output_pre_relu.insert(
165 output_pre_relu.end(), output.begin(), output.end());
168 void do_test(const fused_conv_eltwise& fused_prim)
170 const auto& engine = get_test_engine();
172 int n_features = static_cast<int>(biases_values.size());
174 auto input_shape = tensor(1, n_features, 4, 1);
175 auto weights_shape = tensor(n_features, n_features, 3, 1);
176 auto biases_shape = tensor(1, 1, n_features, 1);
177 auto sum_input_shape = tensor(1, n_features, 2, 1);
179 auto input = memory::allocate(
181 {type_to_data_type<InputTy>::value, format::bfyx, input_shape});
182 auto weights = memory::allocate(
184 {type_to_data_type<WeightsTy>::value, format::bfyx, weights_shape});
186 auto biases = memory::allocate(
188 {type_to_data_type<BiasesTy>::value, format::bfyx, biases_shape});
189 auto input_quant_factors = memory::allocate(
190 engine, {data_types::f32, format::bfyx, biases_shape});
191 auto conv_output_calibration = memory::allocate(
192 engine, {data_types::f32, format::bfyx, biases_shape});
193 auto sum_input = memory::allocate(
195 {type_to_data_type<InputTy>::value, format::bfyx, sum_input_shape});
196 auto eltw_output_calibration = memory::allocate(
197 engine, {data_types::f32, format::bfyx, biases_shape});
199 set_values(input, input_values);
200 std::vector<WeightsTy> post_processed_weights_values(n_features
202 for (int output_feature = 0; output_feature < n_features; ++output_feature)
203 for (int input_feature = 0; input_feature < n_features;
205 for (int x = 0; x < 3; ++x)
208 output_feature * n_features * 3 + input_feature * 3 + x;
209 if (input_feature == output_feature)
210 post_processed_weights_values[idx] =
211 weights_values[input_feature * 3 + x];
213 post_processed_weights_values[idx] = 0;
215 set_values(weights, post_processed_weights_values);
216 set_values(biases, biases_values);
217 set_values(input_quant_factors, input_quant_factors_values);
218 set_values(conv_output_calibration, calibration_values);
219 set_values(sum_input, non_conv_input_values);
220 set_values(eltw_output_calibration, eltw_output_calibration_values);
222 the_topology.add(input_layout("input", input.get_layout()));
223 the_topology.add(data("weights", weights));
224 the_topology.add(data("biases", biases));
225 the_topology.add(data("sum_input", sum_input));
226 the_topology.add(data("input_quant_factors", input_quant_factors));
227 the_topology.add(data("conv_output_calibration", conv_output_calibration));
228 the_topology.add(data("eltw_output_calibration", eltw_output_calibration));
229 the_topology.add(fused_prim);
232 opts.set_option(build_option::optimize_data(false));
234 network network(engine, the_topology, opts);
235 network.set_input_data("input", input);
237 auto outputs = network.execute();
239 auto output_memory = outputs.at("fused_conv").get_memory();
240 auto output_layout = output_memory.get_layout();
241 auto output_ptr = output_memory.pointer<OutputTy>();
242 int y_size = output_layout.size.spatial[1];
243 int x_size = output_layout.size.spatial[0];
244 int f_size = output_layout.size.feature[0];
245 int b_size = output_layout.size.batch[0];
246 EXPECT_EQ(output_layout.format, format::bfyx);
247 EXPECT_EQ(y_size, 1);
248 EXPECT_EQ(x_size, 2);
249 EXPECT_EQ(f_size, n_features);
250 EXPECT_EQ(b_size, 1);
252 for (int f = 0; f < f_size; f++)
253 for (int x = 0; x < x_size; ++x)
255 // printf("f: %d, x: %d\n", f, x);
256 OutputPreActivationTy expected =
257 pre_relu_to_output(output_pre_relu[f * x_size + x]);
258 auto actual = static_cast<OutputPreActivationTy>(
259 output_ptr[f * x_size + x]);
260 expect_eq(expected, actual);
265 template<typename T = OutputPreActivationTy>
266 static typename std::enable_if<std::is_floating_point<T>::value>::type
267 expect_eq(const OutputPreActivationTy& lhs, const OutputPreActivationTy& rhs)
269 EXPECT_NEAR(lhs, rhs, 0.001f);
272 template<typename T = OutputPreActivationTy>
273 static typename std::enable_if<std::is_integral<T>::value>::type
274 expect_eq(const OutputPreActivationTy& lhs, const OutputPreActivationTy& rhs)
279 template <typename T>
280 static T pre_relu_to_output(T pre_relu) {
281 // No std::clamp before C++17 :(
283 static_cast<T>(std::numeric_limits<OutputTy>::max()),
284 std::max(static_cast<T>(std::numeric_limits<OutputTy>::lowest()),
285 std::max(static_cast<T>(0), pre_relu)));
289 class FusedConvTest_all_float : public FusedConvTest<float, float>
292 TEST_F(FusedConvTest_all_float, basic) {
293 add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
294 {2.0f, 0.0f, 1.0f}, // weights
296 1.0f, // conv_input_quant
297 1.0f, // conv_output_calibration
298 {-10.0f, -10.0f}, // non_conv_input
299 1.0f, // eltw_output_calibration
300 {241.0f, 242.0f}); // output_pre_relu
302 add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
303 {2.0f, 0.0f, 1.0f}, // weights
305 1.0f, // conv_input_quant
306 1.0f, // conv_output_calibration
307 {-10.0f, -11.0f}, // non_conv_input
308 2.0f, // eltw_output_calibration
309 {480.0f, 480.0f}); // output_pre_relu
311 do_test(fused_conv_eltwise("fused_conv",
317 {"input_quant_factors"},
318 {"conv_output_calibration"},
319 1.0f, // conv_i_quantization_factor
320 1.0f, // non_conv_scale
321 "eltw_output_calibration",
322 {{1, 1, 1, 1}}, // eltw_stride
323 {1, 1, 1, 1}, // stride
324 {0, 0, 0, 0}, // input_offset
325 {1, 1, 1, 1}, // dilation
326 false, // conv_with_activation
327 0.0f, // con_activation_slp
328 true, // eltw_activation
329 0.0f)); // eltw_activation_slp
332 class FusedConvTest_no_conv_calibration : public FusedConvTest<float, float>
335 TEST_F(FusedConvTest_no_conv_calibration, basic) {
336 // That might happen if both conv output and non-conv input happen to be
337 // normalized to the same dynamic range of if tensor-wise (instead of
338 // per-channel) calibration is used. Also, a similar thing might happen for
339 // a convolution with calibration without quantization (which is the real
340 // target of this test, needed for the Inference Engine).
342 // add_feature contains data for conv quantization/calibration, but the
343 // primitive won't use it. It's just much easier to unify different tests
345 add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
346 {2.0f, 0.0f, 1.0f}, // weights
348 1.0f, // conv_input_quant
349 ignore, // conv_output_calibration
350 {-10.0f, -10.0f}, // non_conv_input
351 1.0f, // eltw_output_calibration
352 {241.0f, 242.0f}); // output_pre_relu
354 add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
355 {2.0f, 0.0f, 1.0f}, // weights
357 1.0f, // conv_input_quant
358 ignore, // conv_output_calibration
359 {-10.0f, -11.0f}, // non_conv_input
360 2.0f, // eltw_output_calibration
361 {480.0f, 480.0f}); // output_pre_relu
363 do_test(fused_conv_eltwise("fused_conv",
369 {"input_quant_factors"},
370 {}, // conv_output_calibration
371 1.0f, // conv_i_quantization_factor
372 1.0f, // non_conv_scale
373 "eltw_output_calibration",
374 {{1, 1, 1, 1}}, // eltw_stride
375 {1, 1, 1, 1}, // stride
376 {0, 0, 0, 0}, // input_offset
377 {1, 1, 1, 1}, // dilation
378 false, // conv_with_activation
379 0.0f, // con_activation_slp
380 true, // eltw_activation
381 0.0f)); // eltw_activation_slp
384 class FusedConvTest_non_conv_scale_per_primitive : public FusedConvTest<int8_t, int8_t>
387 TEST_F(FusedConvTest_non_conv_scale_per_primitive, basic) {
388 // NOTE: The data in add_feature calls implicitly assumes this!
389 const float non_conv_scale = 2.0f; // TODO: Need per-channel too?
391 // Check that the output precision is `u8` indeed. If it was not, than 251
392 // would eighter be rounded to 250 or 252. Ensure it's not the case and the
393 // outputs actually differ.
394 add_feature({125, 125, 0, 1}, {2, 0, 1}, 1, 1.0f, ignore, {-10, -10}, 1.0f, {231, 232});
395 add_feature({125, 125, 0, 1}, {2, 0, 1}, 0, 1.0f, ignore, {-10, -10}, 1.0f, {230, 231});
397 // Verify that activation is done before the final calibration+type
398 // conversion (in other words, in higher precision than the output).
399 add_feature({0, 50, 0, -50}, {0, 4, 4}, 1, 1.0f, ignore, {-10, -10}, 1.0f, {181, -219});
400 add_feature({0, 50, 0, -50}, {0, 4, 4}, 1, 1.0f, ignore, {-5, -5}, 1.0f, {191, -209});
402 // Same but with non-unit calibration (just in case).
403 add_feature({0, 50, 0, -50}, {0, 8, 8}, 2, 1.0f, ignore, {10, 10}, 0.5f, {211, -189});
405 do_test(fused_conv_eltwise("fused_conv",
411 {"input_quant_factors"},
412 {}, // conv_output_calibration
413 1.0f, // conv_i_quantization_factor
414 non_conv_scale, // non_conv_scale
415 "eltw_output_calibration",
416 {{1, 1, 1, 1}}, // eltw_stride
417 {1, 1, 1, 1}, // stride
418 {0, 0, 0, 0}, // input_offset
419 {1, 1, 1, 1}, // dilation
420 false, // conv_with_activation
421 0.0f, // con_activation_slp
422 true, // eltw_activation
423 0.0f)); // eltw_activation_slp
426 class FusedConvTest_i8_to_u8_quantized : public FusedConvTest<int8_t, uint8_t>
429 TEST_F(FusedConvTest_i8_to_u8_quantized, basic) {
430 add_feature({125, 125, 0, 1}, {2, 0, 1}, 1, ignore, ignore, {-10, -10}, 1, {241, 242});
431 add_feature({125, 125, 0, 1}, {2, 0, 1}, 0, ignore, ignore, {-10, -11}, 2, {480, 480});
433 do_test(fused_conv_eltwise("fused_conv",
439 {}, // input_quant_factors
440 {}, // conv_output_calibration
441 1.0f, // conv_i_quantization_factor
442 1.0f, // non_conv_scale
443 "eltw_output_calibration",
444 std::vector<tensor>{tensor{1, 1, 1, 1}}, // eltw_stride
445 tensor{1, 1, 1, 1}, // stride
446 tensor{0, 0, 0, 0}, // input_offset
447 tensor{1, 1, 1, 1}, // dilation
448 false, // conv_with_activation
449 0.0f, // con_activation_slp
450 true, // eltw_activation
451 0.0f, // eltw_activation_slp
453 optional_data_type{data_types::u8}));
456 class FusedConvTest_i8_to_u8_no_eltw_calibration
457 : public FusedConvTest<int8_t, uint8_t>
460 TEST_F(FusedConvTest_i8_to_u8_no_eltw_calibration, basic) {
461 const float non_conv_scale = 1.0f / 3.0f;
463 add_feature({124, 124, 0, -4}, // input
464 {2, 0, 1}, // weights
466 0.5f, // conv_input_quant
467 ignore, // conv_output_calibration
468 {-60, -60}, // non_conv_input
469 ignore, // eltw_output_calibration
470 {252 / 2 - 20, 248 / 2 - 20}); // output_pre_relu
472 add_feature({3, 3, 1, 1}, // input
473 {2, 0, 1}, // weights
475 1.0f / 3.0f, // conv_input_quant
476 ignore, // conv_output_calibration
477 {1, 1}, // eltw_sum_input
478 ignore, // eltw_output_calibration
479 // TODO: Do we really need that round? Should it be "3" instead?
480 // { round(2.333) + round (0.333) }
481 {2, 2}); // output_pre_relu
483 do_test(fused_conv_eltwise("fused_conv",
489 {"input_quant_factors"},
490 {}, // conv_output_calibration
491 1.0f, // conv_i_quantization_factor
493 {}, // eltw_output_calibration
494 std::vector<tensor>{tensor{1, 1, 1, 1}}, // eltw_stride
495 tensor{1, 1, 1, 1}, // stride
496 tensor{0, 0, 0, 0}, // input_offset
497 tensor{1, 1, 1, 1}, // dilation
498 false, // conv_with_activation
499 0.0f, // con_activation_slp
500 true, // eltw_activation
501 0.0f, // eltw_activation_slp
503 optional_data_type{data_types::u8}));