7ba200fe6fbf64420b6435cb1ac64227c757826c
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / tests / test_cases / fused_conv_eltwise_gpu_test.cpp
1 /*
2 // Copyright (c) 2016 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18 #include <gtest/gtest.h>
19 #include "api/CPP/memory.hpp"
20 #include <api/CPP/input_layout.hpp>
21 #include "api/CPP/convolution.hpp"
22 #include "api/CPP/eltwise.hpp"
23 #include "api/CPP/reorder.hpp"
24 #include <api/CPP/topology.hpp>
25 #include <api/CPP/network.hpp>
26 #include <api/CPP/engine.hpp>
27 #include "test_utils/test_utils.h"
28 #include <api/CPP/data.hpp>
29
30 #include <api_extension/CPP/fused_conv_eltwise.hpp>
31
32 #include <cassert>
33 #include <cmath>
34 #include <gmock/gmock.h>
35 #include <limits>
36
37 using namespace cldnn;
38 using namespace tests;
39 using namespace testing;
40
41 TEST(fused_conv_eltwise, basic_0)
42 {
43     const auto& engine = get_test_engine();
44
45     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 4, 5 } });
46     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
47
48     set_values(input, {
49         1.0f,  2.0f, -15.f,  3.0f, 4.0f, -15.f, 5.0f,  6.0f, -15.f, 7.0f,
50         -15.f, 0.0f,  0.0f, -15.f, 0.5f, -0.5f, -15.f, 8.0f,  1.5f,  5.2f
51     });
52
53     topology topology(
54         input_layout("input", input.get_layout()),
55         data("weights", weights),
56         convolution("conv", "input", { "weights" }),
57         eltwise("eltwise", "input", "conv", eltwise_mode::sum),
58         reorder("out", "eltwise", format::bfyx, data_types::f32));
59
60     build_options opt;
61     opt.set_option(build_option::optimize_data(true));
62     network network(engine, topology, opt);
63     network.set_input_data("input", input);
64
65     auto outputs = network.execute();
66     EXPECT_EQ(outputs.size(), size_t(1));
67     EXPECT_EQ(outputs.begin()->first, "out");
68
69     auto output = outputs.begin()->second.get_memory();
70     auto&& out_layout = output.get_layout();
71
72     EXPECT_EQ(out_layout.format, format::bfyx);
73     EXPECT_EQ(out_layout.size.batch[0], 1);
74     EXPECT_EQ(out_layout.size.feature[0], 1);
75     EXPECT_EQ(out_layout.size.spatial[0], 4);
76     EXPECT_EQ(out_layout.size.spatial[1], 5);
77 }
78
79
80 TEST(fused_conv_eltwise, dont_fuse_if_conv_elt_are_outputs)
81 {
82     const auto& engine = get_test_engine();
83
84     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 5 } });
85     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
86
87     set_values(input, {
88         1.0f,  2.0f, -15.f,  3.0f, 4.0f, -15.f, 5.0f,  6.0f, -15.f, 7.0f,
89         -15.f, 0.0f,  0.0f, -15.f, 0.5f, -0.5f, -15.f, 8.0f,  1.5f,  5.2f
90         });
91
92     topology topology(
93         input_layout("input", input.get_layout()),
94         data("weights", weights),
95         convolution("conv", "input", { "weights" }),
96         eltwise("out", "input", "conv", eltwise_mode::sum));
97
98     build_options opt;
99     opt.set_option(build_option::optimize_data(true));
100     network network(engine, topology, opt);
101     network.set_input_data("input", input);
102
103     auto outputs = network.execute();
104     EXPECT_EQ(outputs.size(), size_t(1));
105     EXPECT_EQ(outputs.begin()->first, "out");
106
107     auto output = outputs.begin()->second.get_memory();
108     auto&& out_layout = output.get_layout();
109
110     EXPECT_EQ(out_layout.format, format::bfyx);
111     EXPECT_EQ(out_layout.size.batch[0], 1);
112     EXPECT_EQ(out_layout.size.feature[0], 1);
113     EXPECT_EQ(out_layout.size.spatial[0], 4);
114     EXPECT_EQ(out_layout.size.spatial[1], 5);
115 }
116
117 template<typename InputTy,
118          typename OutputTy>
119 class FusedConvTest : public testing::Test
120 {
121 protected:
122     static constexpr bool is_pure_float = std::is_same<InputTy, float>::value;
123     using OutputPreActivationTy = typename std::conditional<is_pure_float, float, int32_t>::type;
124     using WeightsTy = typename std::conditional<is_pure_float, float, int8_t>::type;
125     using BiasesTy = typename std::conditional<is_pure_float, float, int32_t>::type;
126
127     topology the_topology;
128
129     std::vector<InputTy> input_values;
130     std::vector<WeightsTy> weights_values;
131     std::vector<BiasesTy> biases_values;
132     // Note, not all of the quantization/calibration factors are used in all the
133     // tests. However, I didn't come up with a way to correctly reflect that
134     // while unifying the boileplate testing code.
135     static constexpr float ignore = std::numeric_limits<float>::quiet_NaN();
136     std::vector<float> input_quant_factors_values;
137     std::vector<float> calibration_values;
138
139     // Eltw part.
140     std::vector<InputTy> non_conv_input_values;
141     std::vector<float> eltw_output_calibration_values;
142     std::vector<OutputPreActivationTy> output_pre_relu;
143
144     void add_feature(std::vector<InputTy> input,
145                      std::vector<WeightsTy> weights,
146                      BiasesTy bias,
147                      float input_quant_factor,
148                      float conv_calibration,
149                      std::vector<InputTy> non_conv_input,
150                      float eltw_output_calibration,
151                      std::vector<OutputPreActivationTy> output)
152     {
153         assert(non_conv_input.size() == output.size());
154         input_values.insert(input_values.end(), input.begin(), input.end());
155         weights_values.insert(
156             weights_values.end(), weights.begin(), weights.end());
157         biases_values.push_back(bias);
158         input_quant_factors_values.push_back(input_quant_factor);
159         calibration_values.push_back(conv_calibration);
160         non_conv_input_values.insert(non_conv_input_values.end(),
161                                      non_conv_input.begin(),
162                                      non_conv_input.end());
163         eltw_output_calibration_values.push_back(eltw_output_calibration);
164         output_pre_relu.insert(
165             output_pre_relu.end(), output.begin(), output.end());
166     }
167
168     void do_test(const fused_conv_eltwise& fused_prim)
169     {
170         const auto& engine = get_test_engine();
171
172         int n_features = static_cast<int>(biases_values.size());
173
174         auto input_shape = tensor(1, n_features, 4, 1);
175         auto weights_shape = tensor(n_features, n_features, 3, 1);
176         auto biases_shape = tensor(1, 1, n_features, 1);
177         auto sum_input_shape = tensor(1, n_features, 2, 1);
178
179         auto input = memory::allocate(
180             engine,
181             {type_to_data_type<InputTy>::value, format::bfyx, input_shape});
182         auto weights = memory::allocate(
183             engine,
184             {type_to_data_type<WeightsTy>::value, format::bfyx, weights_shape});
185
186         auto biases = memory::allocate(
187             engine,
188             {type_to_data_type<BiasesTy>::value, format::bfyx, biases_shape});
189         auto input_quant_factors = memory::allocate(
190             engine, {data_types::f32, format::bfyx, biases_shape});
191         auto conv_output_calibration = memory::allocate(
192             engine, {data_types::f32, format::bfyx, biases_shape});
193         auto sum_input = memory::allocate(
194             engine,
195             {type_to_data_type<InputTy>::value, format::bfyx, sum_input_shape});
196         auto eltw_output_calibration = memory::allocate(
197             engine, {data_types::f32, format::bfyx, biases_shape});
198
199         set_values(input, input_values);
200         std::vector<WeightsTy> post_processed_weights_values(n_features
201                                                              * n_features * 3);
202         for (int output_feature = 0; output_feature < n_features; ++output_feature)
203             for (int input_feature = 0; input_feature < n_features;
204                  ++input_feature)
205                 for (int x = 0; x < 3; ++x)
206                 {
207                     int idx =
208                         output_feature * n_features * 3 + input_feature * 3 + x;
209                     if (input_feature == output_feature)
210                         post_processed_weights_values[idx] =
211                             weights_values[input_feature * 3 + x];
212                     else
213                         post_processed_weights_values[idx] = 0;
214                 }
215         set_values(weights, post_processed_weights_values);
216         set_values(biases, biases_values);
217         set_values(input_quant_factors, input_quant_factors_values);
218         set_values(conv_output_calibration, calibration_values);
219         set_values(sum_input, non_conv_input_values);
220         set_values(eltw_output_calibration, eltw_output_calibration_values);
221
222         the_topology.add(input_layout("input", input.get_layout()));
223         the_topology.add(data("weights", weights));
224         the_topology.add(data("biases", biases));
225         the_topology.add(data("sum_input", sum_input));
226         the_topology.add(data("input_quant_factors", input_quant_factors));
227         the_topology.add(data("conv_output_calibration", conv_output_calibration));
228         the_topology.add(data("eltw_output_calibration", eltw_output_calibration));
229         the_topology.add(fused_prim);
230
231         build_options opts;
232         opts.set_option(build_option::optimize_data(false));
233
234         network network(engine, the_topology, opts);
235         network.set_input_data("input", input);
236
237         auto outputs = network.execute();
238
239         auto output_memory = outputs.at("fused_conv").get_memory();
240         auto output_layout = output_memory.get_layout();
241         auto output_ptr = output_memory.pointer<OutputTy>();
242         int y_size = output_layout.size.spatial[1];
243         int x_size = output_layout.size.spatial[0];
244         int f_size = output_layout.size.feature[0];
245         int b_size = output_layout.size.batch[0];
246         EXPECT_EQ(output_layout.format, format::bfyx);
247         EXPECT_EQ(y_size, 1);
248         EXPECT_EQ(x_size, 2);
249         EXPECT_EQ(f_size, n_features);
250         EXPECT_EQ(b_size, 1);
251
252         for (int f = 0; f < f_size; f++)
253             for (int x = 0; x < x_size; ++x)
254             {
255                 // printf("f: %d, x: %d\n", f, x);
256                 OutputPreActivationTy expected =
257                     pre_relu_to_output(output_pre_relu[f * x_size + x]);
258                 auto actual = static_cast<OutputPreActivationTy>(
259                     output_ptr[f * x_size + x]);
260                 expect_eq(expected, actual);
261             }
262     }
263
264 private:
265     template<typename T = OutputPreActivationTy>
266     static typename std::enable_if<std::is_floating_point<T>::value>::type
267     expect_eq(const OutputPreActivationTy& lhs, const OutputPreActivationTy& rhs)
268     {
269         EXPECT_NEAR(lhs, rhs, 0.001f);
270     }
271
272     template<typename T = OutputPreActivationTy>
273     static typename std::enable_if<std::is_integral<T>::value>::type
274     expect_eq(const OutputPreActivationTy& lhs, const OutputPreActivationTy& rhs)
275     {
276         EXPECT_EQ(lhs, rhs);
277     }
278
279     template <typename T>
280     static T pre_relu_to_output(T pre_relu) {
281       // No std::clamp before C++17 :(
282       return std::min(
283           static_cast<T>(std::numeric_limits<OutputTy>::max()),
284           std::max(static_cast<T>(std::numeric_limits<OutputTy>::lowest()),
285                    std::max(static_cast<T>(0), pre_relu)));
286     }
287 };
288
289 class FusedConvTest_all_float : public FusedConvTest<float, float>
290 {};
291
292 TEST_F(FusedConvTest_all_float, basic) {
293     add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
294                 {2.0f, 0.0f, 1.0f},           // weights
295                 1.0f,                         // bias
296                 1.0f,                         // conv_input_quant
297                 1.0f,                         // conv_output_calibration
298                 {-10.0f, -10.0f},             // non_conv_input
299                 1.0f,                         // eltw_output_calibration
300                 {241.0f, 242.0f});            // output_pre_relu
301
302     add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
303                 {2.0f, 0.0f, 1.0f},           // weights
304                 0.0f,                         // bias
305                 1.0f,                         // conv_input_quant
306                 1.0f,                         // conv_output_calibration
307                 {-10.0f, -11.0f},             // non_conv_input
308                 2.0f,                         // eltw_output_calibration
309                 {480.0f, 480.0f});            // output_pre_relu
310
311     do_test(fused_conv_eltwise("fused_conv",
312                                "input",
313                                "sum_input",
314                                eltwise_mode::sum,
315                                {"weights"},
316                                {"biases"},
317                                {"input_quant_factors"},
318                                {"conv_output_calibration"},
319                                1.0f, // conv_i_quantization_factor
320                                1.0f, // non_conv_scale
321                                "eltw_output_calibration",
322                                {{1, 1, 1, 1}}, // eltw_stride
323                                {1, 1, 1, 1},   // stride
324                                {0, 0, 0, 0},   // input_offset
325                                {1, 1, 1, 1},   // dilation
326                                false,          // conv_with_activation
327                                0.0f,           // con_activation_slp
328                                true,           // eltw_activation
329                                0.0f));         // eltw_activation_slp
330 }
331
332 class FusedConvTest_no_conv_calibration : public FusedConvTest<float, float>
333 {};
334
335 TEST_F(FusedConvTest_no_conv_calibration, basic) {
336     // That might happen if both conv output and non-conv input happen to be
337     // normalized to the same dynamic range of if tensor-wise (instead of
338     // per-channel) calibration is used. Also, a similar thing might happen for
339     // a convolution with calibration without quantization (which is the real
340     // target of this test, needed for the Inference Engine).
341
342     // add_feature contains data for conv quantization/calibration, but the
343     // primitive won't use it. It's just much easier to unify different tests
344     // this way.
345     add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
346                 {2.0f, 0.0f, 1.0f},           // weights
347                 1.0f,                         // bias
348                 1.0f,                         // conv_input_quant
349                 ignore,                       // conv_output_calibration
350                 {-10.0f, -10.0f},             // non_conv_input
351                 1.0f,                         // eltw_output_calibration
352                 {241.0f, 242.0f});            // output_pre_relu
353
354     add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
355                 {2.0f, 0.0f, 1.0f},           // weights
356                 0.0f,                         // bias
357                 1.0f,                         // conv_input_quant
358                 ignore,                       // conv_output_calibration
359                 {-10.0f, -11.0f},             // non_conv_input
360                 2.0f,                         // eltw_output_calibration
361                 {480.0f, 480.0f});            // output_pre_relu
362
363     do_test(fused_conv_eltwise("fused_conv",
364                                "input",
365                                "sum_input",
366                                eltwise_mode::sum,
367                                {"weights"},
368                                {"biases"},
369                                {"input_quant_factors"},
370                                {},   // conv_output_calibration
371                                1.0f, // conv_i_quantization_factor
372                                1.0f, // non_conv_scale
373                                "eltw_output_calibration",
374                                {{1, 1, 1, 1}}, // eltw_stride
375                                {1, 1, 1, 1},   // stride
376                                {0, 0, 0, 0},   // input_offset
377                                {1, 1, 1, 1},   // dilation
378                                false,          // conv_with_activation
379                                0.0f,           // con_activation_slp
380                                true,           // eltw_activation
381                                0.0f));         // eltw_activation_slp
382 }
383
384 class FusedConvTest_non_conv_scale_per_primitive : public FusedConvTest<int8_t, int8_t>
385 {};
386
387 TEST_F(FusedConvTest_non_conv_scale_per_primitive, basic) {
388     // NOTE: The data in add_feature calls implicitly assumes this!
389     const float non_conv_scale = 2.0f; // TODO: Need per-channel too?
390
391     // Check that the output precision is `u8` indeed. If it was not, than 251
392     // would eighter be rounded to 250 or 252. Ensure it's not the case and the
393     // outputs actually differ.
394     add_feature({125, 125, 0, 1}, {2, 0, 1}, 1, 1.0f, ignore, {-10, -10}, 1.0f, {231, 232});
395     add_feature({125, 125, 0, 1}, {2, 0, 1}, 0, 1.0f, ignore, {-10, -10}, 1.0f, {230, 231});
396
397     // Verify that activation is done before the final calibration+type
398     // conversion (in other words, in higher precision than the output).
399     add_feature({0, 50, 0, -50}, {0, 4, 4}, 1, 1.0f, ignore, {-10, -10}, 1.0f, {181, -219});
400     add_feature({0, 50, 0, -50}, {0, 4, 4}, 1, 1.0f, ignore, {-5, -5}, 1.0f, {191, -209});
401
402     // Same but with non-unit calibration (just in case).
403     add_feature({0, 50, 0, -50}, {0, 8, 8}, 2, 1.0f, ignore, {10, 10}, 0.5f, {211, -189});
404
405     do_test(fused_conv_eltwise("fused_conv",
406                                "input",
407                                "sum_input",
408                                eltwise_mode::sum,
409                                {"weights"},
410                                {"biases"},
411                                {"input_quant_factors"},
412                                {},   // conv_output_calibration
413                                1.0f, // conv_i_quantization_factor
414                                non_conv_scale, // non_conv_scale
415                                "eltw_output_calibration",
416                                {{1, 1, 1, 1}}, // eltw_stride
417                                {1, 1, 1, 1},   // stride
418                                {0, 0, 0, 0},   // input_offset
419                                {1, 1, 1, 1},   // dilation
420                                false,          // conv_with_activation
421                                0.0f,           // con_activation_slp
422                                true,           // eltw_activation
423                                0.0f));         // eltw_activation_slp
424 }
425
426 class FusedConvTest_i8_to_u8_quantized : public FusedConvTest<int8_t, uint8_t>
427 {};
428
429 TEST_F(FusedConvTest_i8_to_u8_quantized, basic) {
430     add_feature({125, 125, 0, 1}, {2, 0, 1}, 1, ignore, ignore, {-10, -10}, 1, {241, 242});
431     add_feature({125, 125, 0, 1}, {2, 0, 1}, 0, ignore, ignore, {-10, -11}, 2, {480, 480});
432
433     do_test(fused_conv_eltwise("fused_conv",
434                                "input",
435                                "sum_input",
436                                eltwise_mode::sum,
437                                {"weights"},
438                                {"biases"},
439                                {},   // input_quant_factors
440                                {},   // conv_output_calibration
441                                1.0f, // conv_i_quantization_factor
442                                1.0f, // non_conv_scale
443                                "eltw_output_calibration",
444                                std::vector<tensor>{tensor{1, 1, 1, 1}}, // eltw_stride
445                                tensor{1, 1, 1, 1},   // stride
446                                tensor{0, 0, 0, 0},   // input_offset
447                                tensor{1, 1, 1, 1},   // dilation
448                                false,          // conv_with_activation
449                                0.0f,           // con_activation_slp
450                                true,           // eltw_activation
451                                0.0f,           // eltw_activation_slp
452                                padding(),
453                                optional_data_type{data_types::u8}));
454 }
455
456 class FusedConvTest_i8_to_u8_no_eltw_calibration
457     : public FusedConvTest<int8_t, uint8_t>
458 {};
459
460 TEST_F(FusedConvTest_i8_to_u8_no_eltw_calibration, basic) {
461     const float non_conv_scale = 1.0f / 3.0f;
462
463     add_feature({124, 124, 0, -4},             // input
464                 {2, 0, 1},                     // weights
465                 4,                             // bias
466                 0.5f,                          // conv_input_quant
467                 ignore,                        // conv_output_calibration
468                 {-60, -60},                    // non_conv_input
469                 ignore,                        // eltw_output_calibration
470                 {252 / 2 - 20, 248 / 2 - 20}); // output_pre_relu
471
472     add_feature({3, 3, 1, 1}, // input
473                 {2, 0, 1},    // weights
474                 0,            // bias
475                 1.0f / 3.0f,  // conv_input_quant
476                 ignore,       // conv_output_calibration
477                 {1, 1},       // eltw_sum_input
478                 ignore,       // eltw_output_calibration
479                 // TODO: Do we really need that round? Should it be "3" instead?
480                 // { round(2.333) + round (0.333) }
481                 {2, 2}); // output_pre_relu
482
483     do_test(fused_conv_eltwise("fused_conv",
484                                "input",
485                                "sum_input",
486                                eltwise_mode::sum,
487                                {"weights"},
488                                {"biases"},
489                                {"input_quant_factors"},
490                                {}, // conv_output_calibration
491                                1.0f, // conv_i_quantization_factor
492                                non_conv_scale,
493                                {},             // eltw_output_calibration
494                                std::vector<tensor>{tensor{1, 1, 1, 1}}, // eltw_stride
495                                tensor{1, 1, 1, 1},   // stride
496                                tensor{0, 0, 0, 0},   // input_offset
497                                tensor{1, 1, 1, 1},   // dilation
498                                false,          // conv_with_activation
499                                0.0f,           // con_activation_slp
500                                true,           // eltw_activation
501                                0.0f,           // eltw_activation_slp
502                                padding(),
503                                optional_data_type{data_types::u8}));
504 }