a2603aff47f8296cd0e9e9b1358f0ca705cfeca3
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / tests / test_cases / fused_conv_eltwise_gpu_test.cpp
1 /*
2 // Copyright (c) 2016 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18 #include <gtest/gtest.h>
19 #include "api/memory.hpp"
20 #include <api/input_layout.hpp>
21 #include "api/convolution.hpp"
22 #include "api/eltwise.hpp"
23 #include "api/reorder.hpp"
24 #include <api/topology.hpp>
25 #include <api/network.hpp>
26 #include <api/engine.hpp>
27 #include "test_utils/test_utils.h"
28 #include <api/data.hpp>
29
30 #include <api_extension/fused_conv_eltwise.hpp>
31
32 #include <cassert>
33 #include <cmath>
34 #include <gmock/gmock.h>
35 #include <limits>
36
37 using namespace cldnn;
38 using namespace tests;
39 using namespace testing;
40
41 TEST(fused_conv_eltwise, basic_0)
42 {
43     const auto& engine = get_test_engine();
44
45     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 4, 5 } });
46     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
47
48     set_values(input, {
49         1.0f,  2.0f, -15.f,  3.0f, 4.0f, -15.f, 5.0f,  6.0f, -15.f, 7.0f,
50         -15.f, 0.0f,  0.0f, -15.f, 0.5f, -0.5f, -15.f, 8.0f,  1.5f,  5.2f
51     });
52
53     topology topology(
54         input_layout("input", input.get_layout()),
55         data("weights", weights),
56         convolution("conv", "input", { "weights" }),
57         eltwise("eltwise", "input", "conv", eltwise_mode::sum),
58         reorder("out", "eltwise", format::bfyx, data_types::f32));
59
60     build_options opt;
61     opt.set_option(build_option::optimize_data(true));
62     network network(engine, topology, opt);
63     network.set_input_data("input", input);
64
65     auto outputs = network.execute();
66     EXPECT_EQ(outputs.size(), size_t(1));
67     EXPECT_EQ(outputs.begin()->first, "out");
68
69     auto output = outputs.begin()->second.get_memory();
70     auto&& out_layout = output.get_layout();
71
72     EXPECT_EQ(out_layout.format, format::bfyx);
73     EXPECT_EQ(out_layout.size.batch[0], 1);
74     EXPECT_EQ(out_layout.size.feature[0], 1);
75     EXPECT_EQ(out_layout.size.spatial[0], 4);
76     EXPECT_EQ(out_layout.size.spatial[1], 5);
77 }
78
79 TEST(fused_conv_eltwise, dont_fuse_if_conv_elt_are_outputs)
80 {
81     const auto& engine = get_test_engine();
82
83     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 5 } });
84     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
85
86     set_values(input, {
87         1.0f,  2.0f, -15.f,  3.0f, 4.0f, -15.f, 5.0f,  6.0f, -15.f, 7.0f,
88         -15.f, 0.0f,  0.0f, -15.f, 0.5f, -0.5f, -15.f, 8.0f,  1.5f,  5.2f
89         });
90
91     topology topology(
92         input_layout("input", input.get_layout()),
93         data("weights", weights),
94         convolution("conv", "input", { "weights" }),
95         eltwise("out", "input", "conv", eltwise_mode::sum));
96
97     build_options opt;
98     opt.set_option(build_option::optimize_data(true));
99     network network(engine, topology, opt);
100     network.set_input_data("input", input);
101
102     auto outputs = network.execute();
103     EXPECT_EQ(outputs.size(), size_t(1));
104     EXPECT_EQ(outputs.begin()->first, "out");
105
106     auto output = outputs.begin()->second.get_memory();
107     auto&& out_layout = output.get_layout();
108
109     EXPECT_EQ(out_layout.format, format::bfyx);
110     EXPECT_EQ(out_layout.size.batch[0], 1);
111     EXPECT_EQ(out_layout.size.feature[0], 1);
112     EXPECT_EQ(out_layout.size.spatial[0], 4);
113     EXPECT_EQ(out_layout.size.spatial[1], 5);
114 }
115
116 template<typename InputTy,
117          typename OutputTy>
118 class FusedConvTest : public testing::Test
119 {
120 protected:
121     static constexpr bool is_pure_float = std::is_same<InputTy, float>::value;
122     using OutputPreActivationTy = typename std::conditional<is_pure_float, float, int32_t>::type;
123     using WeightsTy = typename std::conditional<is_pure_float, float, int8_t>::type;
124     using BiasesTy = typename std::conditional<is_pure_float, float, int32_t>::type;
125
126     topology the_topology;
127
128     std::vector<InputTy> input_values;
129     std::vector<WeightsTy> weights_values;
130     std::vector<BiasesTy> biases_values;
131     // Note, not all of the quantization/calibration factors are used in all the
132     // tests. However, I didn't come up with a way to correctly reflect that
133     // while unifying the boileplate testing code.
134     static constexpr float ignore = std::numeric_limits<float>::quiet_NaN();
135     std::vector<float> input_quant_factors_values;
136     std::vector<float> calibration_values;
137
138     // Eltw part.
139     std::vector<InputTy> non_conv_input_values;
140     std::vector<float> eltw_output_calibration_values;
141     std::vector<OutputPreActivationTy> output_pre_relu;
142
143     void add_feature(std::vector<InputTy> input,
144                      std::vector<WeightsTy> weights,
145                      BiasesTy bias,
146                      float input_quant_factor,
147                      float conv_calibration,
148                      std::vector<InputTy> non_conv_input,
149                      float eltw_output_calibration,
150                      std::vector<OutputPreActivationTy> output)
151     {
152         assert(non_conv_input.size() == output.size());
153         input_values.insert(input_values.end(), input.begin(), input.end());
154         weights_values.insert(
155             weights_values.end(), weights.begin(), weights.end());
156         biases_values.push_back(bias);
157         input_quant_factors_values.push_back(input_quant_factor);
158         calibration_values.push_back(conv_calibration);
159         non_conv_input_values.insert(non_conv_input_values.end(),
160                                      non_conv_input.begin(),
161                                      non_conv_input.end());
162         eltw_output_calibration_values.push_back(eltw_output_calibration);
163         output_pre_relu.insert(
164             output_pre_relu.end(), output.begin(), output.end());
165     }
166
167     void do_test(const fused_conv_eltwise& fused_prim)
168     {
169         const auto& engine = get_test_engine();
170
171         int n_features = static_cast<int>(biases_values.size());
172
173         auto input_shape = tensor(1, n_features, 4, 1);
174         auto weights_shape = tensor(n_features, n_features, 3, 1);
175         auto biases_shape = tensor(1, n_features, 1, 1);
176         auto sum_input_shape = tensor(1, n_features, 2, 1);
177
178         auto input = memory::allocate(
179             engine,
180             {type_to_data_type<InputTy>::value, format::bfyx, input_shape});
181         auto weights = memory::allocate(
182             engine,
183             {type_to_data_type<WeightsTy>::value, format::bfyx, weights_shape});
184
185         auto biases = memory::allocate(
186             engine,
187             {type_to_data_type<BiasesTy>::value, format::bfyx, biases_shape});
188         auto input_quant_factors = memory::allocate(
189             engine, {data_types::f32, format::bfyx, biases_shape});
190         auto conv_output_calibration = memory::allocate(
191             engine, {data_types::f32, format::bfyx, biases_shape});
192         auto sum_input = memory::allocate(
193             engine,
194             {type_to_data_type<InputTy>::value, format::bfyx, sum_input_shape});
195         auto eltw_output_calibration = memory::allocate(
196             engine, {data_types::f32, format::bfyx, biases_shape});
197
198         set_values(input, input_values);
199         std::vector<WeightsTy> post_processed_weights_values(n_features
200                                                              * n_features * 3);
201         for (int output_feature = 0; output_feature < n_features; ++output_feature)
202             for (int input_feature = 0; input_feature < n_features;
203                  ++input_feature)
204                 for (int x = 0; x < 3; ++x)
205                 {
206                     int idx =
207                         output_feature * n_features * 3 + input_feature * 3 + x;
208                     if (input_feature == output_feature)
209                         post_processed_weights_values[idx] =
210                             weights_values[input_feature * 3 + x];
211                     else
212                         post_processed_weights_values[idx] = 0;
213                 }
214         set_values(weights, post_processed_weights_values);
215         set_values(biases, biases_values);
216         set_values(input_quant_factors, input_quant_factors_values);
217         set_values(conv_output_calibration, calibration_values);
218         set_values(sum_input, non_conv_input_values);
219         set_values(eltw_output_calibration, eltw_output_calibration_values);
220
221         the_topology.add(input_layout("input", input.get_layout()));
222         the_topology.add(data("weights", weights));
223         the_topology.add(data("biases", biases));
224         the_topology.add(data("sum_input", sum_input));
225         the_topology.add(data("input_quant_factors", input_quant_factors));
226         the_topology.add(data("conv_output_calibration", conv_output_calibration));
227         the_topology.add(data("eltw_output_calibration", eltw_output_calibration));
228         the_topology.add(fused_prim);
229
230         build_options opts;
231         opts.set_option(build_option::optimize_data(false));
232
233         network network(engine, the_topology, opts);
234         network.set_input_data("input", input);
235
236         auto outputs = network.execute();
237
238         auto output_memory = outputs.at("fused_conv").get_memory();
239         auto output_layout = output_memory.get_layout();
240         auto output_ptr = output_memory.pointer<OutputTy>();
241         int y_size = output_layout.size.spatial[1];
242         int x_size = output_layout.size.spatial[0];
243         int f_size = output_layout.size.feature[0];
244         int b_size = output_layout.size.batch[0];
245         EXPECT_EQ(output_layout.format, format::bfyx);
246         EXPECT_EQ(y_size, 1);
247         EXPECT_EQ(x_size, 2);
248         EXPECT_EQ(f_size, n_features);
249         EXPECT_EQ(b_size, 1);
250
251         for (int f = 0; f < f_size; f++)
252             for (int x = 0; x < x_size; ++x)
253             {
254                 // printf("f: %d, x: %d\n", f, x);
255                 OutputPreActivationTy expected =
256                     pre_relu_to_output(output_pre_relu[f * x_size + x]);
257                 auto actual = static_cast<OutputPreActivationTy>(
258                     output_ptr[f * x_size + x]);
259                 expect_eq(expected, actual);
260             }
261     }
262
263 private:
264     template<typename T = OutputPreActivationTy>
265     static typename std::enable_if<std::is_floating_point<T>::value>::type
266     expect_eq(const OutputPreActivationTy& lhs, const OutputPreActivationTy& rhs)
267     {
268         EXPECT_NEAR(lhs, rhs, 0.001f);
269     }
270
271     template<typename T = OutputPreActivationTy>
272     static typename std::enable_if<std::is_integral<T>::value>::type
273     expect_eq(const OutputPreActivationTy& lhs, const OutputPreActivationTy& rhs)
274     {
275         EXPECT_EQ(lhs, rhs);
276     }
277
278     template <typename T>
279     static T pre_relu_to_output(T pre_relu) {
280       // No std::clamp before C++17 :(
281       return std::min(
282           static_cast<T>(std::numeric_limits<OutputTy>::max()),
283           std::max(static_cast<T>(std::numeric_limits<OutputTy>::lowest()),
284                    std::max(static_cast<T>(0), pre_relu)));
285     }
286 };
287
288 class FusedConvTest_all_float : public FusedConvTest<float, float>
289 {};
290
291 TEST_F(FusedConvTest_all_float, basic) {
292     add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
293                 {2.0f, 0.0f, 1.0f},           // weights
294                 1.0f,                         // bias
295                 1.0f,                         // conv_input_quant
296                 1.0f,                         // conv_output_calibration
297                 {-10.0f, -10.0f},             // non_conv_input
298                 1.0f,                         // eltw_output_calibration
299                 {241.0f, 242.0f});            // output_pre_relu
300
301     add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
302                 {2.0f, 0.0f, 1.0f},           // weights
303                 0.0f,                         // bias
304                 1.0f,                         // conv_input_quant
305                 1.0f,                         // conv_output_calibration
306                 {-10.0f, -11.0f},             // non_conv_input
307                 2.0f,                         // eltw_output_calibration
308                 {480.0f, 480.0f});            // output_pre_relu
309
310     do_test(fused_conv_eltwise("fused_conv",
311                                "input",
312                                "sum_input",
313                                eltwise_mode::sum,
314                                {"weights"},
315                                {"biases"},
316                                {"input_quant_factors"},
317                                {"conv_output_calibration"},
318                                1.0f, // conv_i_quantization_factor
319                                1.0f, // non_conv_scale
320                                "eltw_output_calibration",
321                                {{1, 1, 1, 1}}, // eltw_stride
322                                {1, 1, 1, 1},   // stride
323                                {0, 0, 0, 0},   // input_offset
324                                {1, 1, 1, 1},   // dilation
325                                false,          // conv_with_activation
326                                0.0f,           // con_activation_slp
327                                true,           // eltw_activation
328                                0.0f));         // eltw_activation_slp
329 }
330
331 class FusedConvTest_no_conv_calibration : public FusedConvTest<float, float>
332 {};
333
334 TEST_F(FusedConvTest_no_conv_calibration, basic) {
335     // That might happen if both conv output and non-conv input happen to be
336     // normalized to the same dynamic range of if tensor-wise (instead of
337     // per-channel) calibration is used. Also, a similar thing might happen for
338     // a convolution with calibration without quantization (which is the real
339     // target of this test, needed for the Inference Engine).
340
341     // add_feature contains data for conv quantization/calibration, but the
342     // primitive won't use it. It's just much easier to unify different tests
343     // this way.
344     add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
345                 {2.0f, 0.0f, 1.0f},           // weights
346                 1.0f,                         // bias
347                 1.0f,                         // conv_input_quant
348                 ignore,                       // conv_output_calibration
349                 {-10.0f, -10.0f},             // non_conv_input
350                 1.0f,                         // eltw_output_calibration
351                 {241.0f, 242.0f});            // output_pre_relu
352
353     add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
354                 {2.0f, 0.0f, 1.0f},           // weights
355                 0.0f,                         // bias
356                 1.0f,                         // conv_input_quant
357                 ignore,                       // conv_output_calibration
358                 {-10.0f, -11.0f},             // non_conv_input
359                 2.0f,                         // eltw_output_calibration
360                 {480.0f, 480.0f});            // output_pre_relu
361
362     do_test(fused_conv_eltwise("fused_conv",
363                                "input",
364                                "sum_input",
365                                eltwise_mode::sum,
366                                {"weights"},
367                                {"biases"},
368                                {"input_quant_factors"},
369                                {},   // conv_output_calibration
370                                1.0f, // conv_i_quantization_factor
371                                1.0f, // non_conv_scale
372                                "eltw_output_calibration",
373                                {{1, 1, 1, 1}}, // eltw_stride
374                                {1, 1, 1, 1},   // stride
375                                {0, 0, 0, 0},   // input_offset
376                                {1, 1, 1, 1},   // dilation
377                                false,          // conv_with_activation
378                                0.0f,           // con_activation_slp
379                                true,           // eltw_activation
380                                0.0f));         // eltw_activation_slp
381 }
382
383 class FusedConvTest_non_conv_scale_per_primitive : public FusedConvTest<int8_t, int8_t>
384 {};
385
386 TEST_F(FusedConvTest_non_conv_scale_per_primitive, basic) {
387     // NOTE: The data in add_feature calls implicitly assumes this!
388     const float non_conv_scale = 2.0f; // TODO: Need per-channel too?
389
390     // Check that the output precision is `u8` indeed. If it was not, than 251
391     // would eighter be rounded to 250 or 252. Ensure it's not the case and the
392     // outputs actually differ.
393     add_feature({125, 125, 0, 1}, {2, 0, 1}, 1, 1.0f, ignore, {-10, -10}, 1.0f, {231, 232});
394     add_feature({125, 125, 0, 1}, {2, 0, 1}, 0, 1.0f, ignore, {-10, -10}, 1.0f, {230, 231});
395
396     // Verify that activation is done before the final calibration+type
397     // conversion (in other words, in higher precision than the output).
398     add_feature({0, 50, 0, -50}, {0, 4, 4}, 1, 1.0f, ignore, {-10, -10}, 1.0f, {181, -219});
399     add_feature({0, 50, 0, -50}, {0, 4, 4}, 1, 1.0f, ignore, {-5, -5}, 1.0f, {191, -209});
400
401     // Same but with non-unit calibration (just in case).
402     add_feature({0, 50, 0, -50}, {0, 8, 8}, 2, 1.0f, ignore, {10, 10}, 0.5f, {211, -189});
403
404     do_test(fused_conv_eltwise("fused_conv",
405                                "input",
406                                "sum_input",
407                                eltwise_mode::sum,
408                                {"weights"},
409                                {"biases"},
410                                {"input_quant_factors"},
411                                {},   // conv_output_calibration
412                                1.0f, // conv_i_quantization_factor
413                                non_conv_scale, // non_conv_scale
414                                "eltw_output_calibration",
415                                {{1, 1, 1, 1}}, // eltw_stride
416                                {1, 1, 1, 1},   // stride
417                                {0, 0, 0, 0},   // input_offset
418                                {1, 1, 1, 1},   // dilation
419                                false,          // conv_with_activation
420                                0.0f,           // con_activation_slp
421                                true,           // eltw_activation
422                                0.0f));         // eltw_activation_slp
423 }
424
425 class FusedConvTest_i8_to_u8_quantized : public FusedConvTest<int8_t, uint8_t>
426 {};
427
428 TEST_F(FusedConvTest_i8_to_u8_quantized, basic) {
429     add_feature({125, 125, 0, 1}, {2, 0, 1}, 1, ignore, ignore, {-10, -10}, 1, {241, 242});
430     add_feature({125, 125, 0, 1}, {2, 0, 1}, 0, ignore, ignore, {-10, -11}, 2, {480, 480});
431
432     do_test(fused_conv_eltwise("fused_conv",
433                                "input",
434                                "sum_input",
435                                eltwise_mode::sum,
436                                {"weights"},
437                                {"biases"},
438                                {},   // input_quant_factors
439                                {},   // conv_output_calibration
440                                1.0f, // conv_i_quantization_factor
441                                1.0f, // non_conv_scale
442                                "eltw_output_calibration",
443                                std::vector<tensor>{tensor{1, 1, 1, 1}}, // eltw_stride
444                                tensor{1, 1, 1, 1},   // stride
445                                tensor{0, 0, 0, 0},   // input_offset
446                                tensor{1, 1, 1, 1},   // dilation
447                                false,          // conv_with_activation
448                                0.0f,           // con_activation_slp
449                                true,           // eltw_activation
450                                0.0f,           // eltw_activation_slp
451                                padding(),
452                                optional_data_type{data_types::u8}));
453 }
454
455 class FusedConvTest_i8_to_u8_no_eltw_calibration
456     : public FusedConvTest<int8_t, uint8_t>
457 {};
458
459 TEST_F(FusedConvTest_i8_to_u8_no_eltw_calibration, basic) {
460     const float non_conv_scale = 1.0f / 3.0f;
461
462     add_feature({124, 124, 0, -4},             // input
463                 {2, 0, 1},                     // weights
464                 4,                             // bias
465                 0.5f,                          // conv_input_quant
466                 ignore,                        // conv_output_calibration
467                 {-60, -60},                    // non_conv_input
468                 ignore,                        // eltw_output_calibration
469                 {252 / 2 - 20, 248 / 2 - 20}); // output_pre_relu
470
471     add_feature({3, 3, 1, 1}, // input
472                 {2, 0, 1},    // weights
473                 0,            // bias
474                 1.0f / 3.0f,  // conv_input_quant
475                 ignore,       // conv_output_calibration
476                 {1, 1},       // eltw_sum_input
477                 ignore,       // eltw_output_calibration
478                 // TODO: Do we really need that round? Should it be "3" instead?
479                 // { round(2.333) + round (0.333) }
480                 {2, 2}); // output_pre_relu
481
482     do_test(fused_conv_eltwise("fused_conv",
483                                "input",
484                                "sum_input",
485                                eltwise_mode::sum,
486                                {"weights"},
487                                {"biases"},
488                                {"input_quant_factors"},
489                                {}, // conv_output_calibration
490                                1.0f, // conv_i_quantization_factor
491                                non_conv_scale,
492                                {},             // eltw_output_calibration
493                                std::vector<tensor>{tensor{1, 1, 1, 1}}, // eltw_stride
494                                tensor{1, 1, 1, 1},   // stride
495                                tensor{0, 0, 0, 0},   // input_offset
496                                tensor{1, 1, 1, 1},   // dilation
497                                false,          // conv_with_activation
498                                0.0f,           // con_activation_slp
499                                true,           // eltw_activation
500                                0.0f,           // eltw_activation_slp
501                                padding(),
502                                optional_data_type{data_types::u8}));
503 }