2 // Copyright (c) 2016-2019 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
19 #include <gtest/gtest.h>
20 #include <gmock/gmock.h>
21 #include "api/CPP/memory.hpp"
22 #include <api/CPP/input_layout.hpp>
23 #include "api/CPP/convolution.hpp"
24 #include <api/CPP/topology.hpp>
25 #include <api/CPP/network.hpp>
26 #include <api/CPP/engine.hpp>
27 #include "test_utils/test_utils.h"
28 #include "test_utils/float16.h"
29 #include <api/CPP/data.hpp>
36 #include <api/CPP/reorder.hpp>
38 using namespace cldnn;
39 using namespace tests;
44 template<> struct type_to_data_type<FLOAT16> { static const data_types value = data_types::f16; };
50 T kahan_summation(std::vector<T> &input) {
63 VVF<T> reference_convolve(VVVF<T> &input, VVVF<T> &filter, int stride_y, int stride_x, float bias, int dilation_y = 1, int dilation_x = 1,
64 int input_padding_y = 0, int input_padding_x = 0, int output_padding_y = 0,
65 int output_padding_x = 0, size_t f_begin = 0)
67 size_t kernel_extent_y = dilation_y * (filter[0].size() - 1) + 1;
68 size_t kernel_extent_x = dilation_x * (filter[0][0].size() - 1) + 1;
69 size_t output_y = 1 + (input[0].size() - kernel_extent_y + 2 * input_padding_y) / stride_y + 2 * output_padding_y;
70 size_t output_x = 1 + (input[0][0].size() - kernel_extent_x + 2 * input_padding_x) / stride_x + 2 * output_padding_x;
71 VVF<T> output(output_y, VF<T>(output_x, bias));
72 for (size_t f = 0; f < filter.size(); ++f) {
73 for (size_t y = 0; y < (output_y - 2 * output_padding_y); ++y) {
74 for (size_t x = 0; x < (output_x - 2 * output_padding_x); ++x) {
76 values.reserve(filter[0].size() * filter[0][0].size());
77 for (size_t yf = 0; yf < filter[0].size(); ++yf) {
78 int yi = -input_padding_y + (int)yf * dilation_y + stride_y * (int)y;
79 if (yi < 0 || (int)input[0].size() <= yi) continue;
80 for (size_t xf = 0; xf < filter[0][0].size(); ++xf) {
81 int xi = -input_padding_x + (int)xf * dilation_x + stride_x * (int)x;
82 if (xi < 0 || (int)input[0][0].size() <= xi) continue;
83 values.push_back(input[f_begin + f][yi][xi] * filter[f][yf][xf]);
86 output[y + output_padding_y][x + output_padding_x] += kahan_summation<T>(values);
93 void dump_buffer(memory const& mem, std::string const& name)
95 std::ofstream out(name);
96 auto size = mem.get_layout().get_buffer_size();
97 auto ptr = mem.pointer<const float>();
98 auto pitches = mem.get_layout().get_pitches();
99 out << "Data size: " << mem.get_layout().size << "\n";
100 out << "Lower padding: " << mem.get_layout().data_padding.lower_size() << "\n";
101 out << "Upper padding: " << mem.get_layout().data_padding.upper_size() << "\n";
104 for (int b = 0; b < size.batch[0]; ++b)
106 out << " ================ BATCH " << b << " =================\n\n";
107 for (int f = 0; f < size.feature[0]; ++f)
109 out << "feature " << f << ":\n";
110 for (int y = 0; y < size.spatial[1]; ++y)
112 for (int x = 0; x < size.spatial[0]; ++x)
114 size_t idx = b * pitches.batch[0] + f * pitches.feature[0] + y * pitches.spatial[1] + x * pitches.spatial[0];
115 out << ptr[idx] << " ";
128 TEST(convolution_f32_fw_gpu, basic_convolution_no_bias) {
148 const auto& engine = get_test_engine();
150 auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ 1, 1, 5, 4 } });
151 auto weights = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 3, 2 } });
153 set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, 6.0f, 3.0f, 3.0f, 3.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f });
154 set_values(weights, { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f });
155 VVF<float> output_vec = {
156 { 20.0f, 27.0f, 38.0f },
157 { 17.0f, 19.0f, 19.0f } };
160 input_layout("input", input.get_layout()),
161 data("weights", weights),
162 convolution("conv", "input", { "weights" }, { 1,1,1,2 }));
164 network network(engine, topology);
165 network.set_input_data("input", input);
167 auto outputs = network.execute();
168 EXPECT_EQ(outputs.size(), size_t(1));
169 EXPECT_EQ(outputs.begin()->first, "conv");
171 auto output_memory = outputs.at("conv").get_memory();
172 auto output_layout = output_memory.get_layout();
173 auto output_ptr = output_memory.pointer<float>();
175 int y_size = output_layout.size.spatial[1];
176 int x_size = output_layout.size.spatial[0];
177 int f_size = output_layout.size.feature[0];
178 int b_size = output_layout.size.batch[0];
179 EXPECT_EQ(output_layout.format, format::yxfb);
180 EXPECT_EQ(y_size, 2);
181 EXPECT_EQ(x_size, 3);
182 EXPECT_EQ(f_size, 1);
183 EXPECT_EQ(b_size, 1);
184 for (int y = 0; y < y_size; ++y) {
185 for (int x = 0; x < x_size; ++x) {
186 EXPECT_EQ(output_vec[y][x], output_ptr[y * x_size + x]);
190 //VVF temp_vec(y_size, VF(x_size, 0.0f));
191 //for (int y = 0; y < y_size; ++y) {
192 // for (int x = 0; x < x_size; ++x) {
193 // temp_vec[y][x] = output_ptr[y * x_size + x];
196 //print_2d(temp_vec);
200 TEST(convolution_f32_fw_gpu, basic_convolution_int8_no_bias) {
220 const auto& engine = get_test_engine();
222 auto input = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 5, 4 } });
223 auto weights = memory::allocate(engine, { data_types::i8,format::bfyx,{ 1, 1, 3, 2 } });
225 set_values(input, { 1.1f, 2.4f, 3.5f, 4.5f, 5.8f, 2.9f, 2.3f, 3.5f, 4.4f, 6.6f, 3.8f, 3.9f, 3.4f, 5.1f, 1.4f, 1.8f, 1.1f, 1.2f, 1.2f, 1.9f });
226 set_values<char>(weights, { 1, 2, 1, 2, 1, 2 });
227 VVF<float> output_vec = {
228 { 20.0f, 27.0f, 38.0f },
229 { 17.0f, 19.0f, 19.0f } };
232 input_layout("input", input.get_layout()),
233 reorder("to_int","input", { data_types::i8,format::bfyx,{ 1, 1, 5, 4 } }),
234 data("weights", weights),
235 convolution("conv", "to_int", { "weights" }, { 1,1,1,2 }),
236 reorder("output", "conv", { data_types::f32,format::bfyx,{ 1, 1, 3, 2 } }));
238 network network(engine, topology);
239 network.set_input_data("input", input);
241 auto outputs = network.execute();
242 EXPECT_EQ(outputs.size(), size_t(1));
243 EXPECT_EQ(outputs.begin()->first, "output");
245 auto output_memory = outputs.at("output").get_memory();
246 auto output_layout = output_memory.get_layout();
247 auto output_ptr = output_memory.pointer<float>();
249 int y_size = output_layout.size.spatial[1];
250 int x_size = output_layout.size.spatial[0];
251 int f_size = output_layout.size.feature[0];
252 int b_size = output_layout.size.batch[0];
253 EXPECT_EQ(output_layout.format, format::bfyx);
254 EXPECT_EQ(y_size, 2);
255 EXPECT_EQ(x_size, 3);
256 EXPECT_EQ(f_size, 1);
257 EXPECT_EQ(b_size, 1);
258 for (int y = 0; y < y_size; ++y) {
259 for (int x = 0; x < x_size; ++x) {
260 EXPECT_EQ(output_vec[y][x], output_ptr[y * x_size + x]);
265 TEST(convolution_f32_fw_gpu, with_output_size_same_input) {
267 const auto& engine = get_test_engine();
269 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 320, 320 } });
270 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 64, 4, 7, 7 } });
271 auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 64, 4, 7, 7 } });
274 input_layout("input", input.get_layout()),
275 data("weights", weights),
276 data("weights2", weights2),
277 convolution::create_with_output_size("conv1", "input", { "weights" }, {1, 64, 160, 160}, {1, 1, 2, 2}, {0, 0, -3, -3}),
278 convolution::create_with_output_size("conv2", "input", { "weights2" }, {1, 64, 320, 320}, {1, 1, 1, 1}, {0, 0, -3, -3})
281 network network(engine, topology);
282 network.set_input_data("input", input);
284 auto outputs = network.execute();
285 EXPECT_EQ(outputs.size(), size_t(2));
286 EXPECT_EQ(outputs.begin()->first, "conv1");
287 EXPECT_EQ(outputs.rbegin()->first, "conv2");
290 TEST(convolution_f32_fw_gpu, three_convolutions_same_weights) {
307 const auto& engine = get_test_engine();
309 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {1,2,2,2} });
310 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 2,2,1,1 } });
312 set_values(input, { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f });
313 set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f });
317 input_layout("input", input.get_layout()),
318 data("weights", weights),
319 convolution("conv1", "input", { "weights" }),
320 convolution("conv2", "conv1", { "weights" }),
321 convolution("conv3", "conv2", { "weights" })
324 cldnn::build_options options;
325 options.set_option(cldnn::build_option::optimize_data(true));
326 network network(engine, topology, options);
327 network.set_input_data("input", input);
329 auto outputs = network.execute();
331 auto output_memory = outputs.at("conv3").get_memory();
332 auto output_layout = output_memory.get_layout();
333 auto output_ptr = output_memory.pointer<float>();
335 int y_size = output_layout.size.spatial[1];
336 int x_size = output_layout.size.spatial[0];
337 int f_size = output_layout.size.feature[0];
338 int b_size = output_layout.size.batch[0];
340 EXPECT_EQ(output_layout.format, format::bfyx);
341 EXPECT_EQ(y_size, 2);
342 EXPECT_EQ(x_size, 2);
343 EXPECT_EQ(f_size, 2);
344 EXPECT_EQ(b_size, 1);
346 for (int y = 0; y < y_size; ++y) {
347 for (int x = 0; x < x_size; ++x) {
348 EXPECT_FLOAT_EQ(8.0f, output_ptr[y * x_size + x]);
354 TEST(convolution_f32_fw_gpu, basic_convolution) {
377 const auto& engine = get_test_engine();
379 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 5, 4 } });
380 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 2 } });
381 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
383 set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, 6.0f, 3.0f, 3.0f, 3.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f });
384 set_values(weights, { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f });
385 set_values(biases, { 1.0f });
386 VVF<float> output_vec = {
387 { 21.0f, 28.0f, 39.0f },
388 { 18.0f, 20.0f, 20.0f } };
391 input_layout("input", input.get_layout()),
392 data("weights", weights),
393 data("biases", biases),
394 convolution( "conv", "input", { "weights" }, { "biases" }, { 0,0,1,2 }));
396 network network(engine, topology);
397 network.set_input_data("input", input);
399 auto outputs = network.execute();
400 EXPECT_EQ(outputs.size(), size_t(1));
401 EXPECT_EQ(outputs.begin()->first, "conv");
403 auto output_memory = outputs.at("conv").get_memory();
404 auto output_layout = output_memory.get_layout();
405 auto output_ptr = output_memory.pointer<float>();
407 int y_size = output_layout.size.spatial[1];
408 int x_size = output_layout.size.spatial[0];
409 int f_size = output_layout.size.feature[0];
410 int b_size = output_layout.size.batch[0];
411 EXPECT_EQ(output_layout.format, format::yxfb);
412 EXPECT_EQ(y_size, 2);
413 EXPECT_EQ(x_size, 3);
414 EXPECT_EQ(f_size, 1);
415 EXPECT_EQ(b_size, 1);
416 for (int y = 0; y < y_size; ++y) {
417 for (int x = 0; x < x_size; ++x) {
418 EXPECT_EQ(output_vec[y][x], output_ptr[y * x_size + x]);
423 TEST(convolution_f32_fw_gpu, basic_convolution_bfyx_weights_as_input_layout) {
424 //Same params as convolution_f32_fw_gpu, basic_convolution but with bfyx optimized data and weights set as input_layout
425 const auto& engine = get_test_engine();
426 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,
429 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,
432 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,
436 { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, 6.0f, 3.0f, 3.0f, 3.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }
439 { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f }
444 VVF<float> output_vec = {
445 { 21.0f, 28.0f, 39.0f }
447 { 18.0f, 20.0f, 20.0f }
450 input_layout("input", input.get_layout()),
451 input_layout("weights", weights.get_layout()),
452 input_layout("biases", biases.get_layout()),
453 convolution("conv", "input",
460 cldnn::build_options options;
461 options.set_option(cldnn::build_option::optimize_data(true));
462 network network(engine, topology, options);
463 network.set_input_data("input", input);
464 network.set_input_data("weights", weights);
465 network.set_input_data("biases", biases);
466 auto outputs = network.execute();
467 EXPECT_EQ(outputs.size(), size_t(1));
468 EXPECT_EQ(outputs.begin()->first, "conv");
470 auto output_memory = outputs.at("conv").get_memory();
471 auto output_layout = output_memory.get_layout();
472 auto output_ptr = output_memory.pointer<float>();
474 int y_size = output_layout.size.spatial[1];
475 int x_size = output_layout.size.spatial[0];
476 int f_size = output_layout.size.feature[0];
477 int b_size = output_layout.size.batch[0];
478 EXPECT_EQ(output_layout.format, format::bfyx);
479 EXPECT_EQ(y_size, 2);
480 EXPECT_EQ(x_size, 3);
481 EXPECT_EQ(f_size, 1);
482 EXPECT_EQ(b_size, 1);
483 for (int y = 0; y < y_size; ++y) {
484 for (int x = 0; x < x_size; ++x) {
485 EXPECT_EQ(output_vec[y][x], output_ptr[y * x_size + x]);
490 TEST(convolution_f32_fw_gpu, basic_convolution_input_padding) {
494 // Input padding : 2x1
522 const auto& engine = get_test_engine();
524 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 3 } });
525 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
526 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
528 set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f });
529 set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f });
530 set_values(biases, { 1.0f });
531 VVF<float> output_vec = {
532 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
533 { 2.0f, 4.0f, 6.0f, 8.0f, 5.0f },
534 { 4.0f, 8.0f, 11.0f, 15.0f, 9.0f },
535 { 6.0f, 11.0f, 12.0f, 16.0f, 10.0f },
536 { 4.0f, 7.0f, 7.0f, 9.0f, 6.0f },
537 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f } };
540 input_layout("input", input.get_layout()),
541 data("weights", weights),
542 data("biases", biases),
553 padding{ { 0,0,0,0 }, 0 })
556 network network(engine, topology);
557 network.set_input_data("input", input);
559 auto outputs = network.execute();
560 EXPECT_EQ(outputs.size(), size_t(1));
561 EXPECT_EQ(outputs.begin()->first, "conv");
563 auto output_memory = outputs.at("conv").get_memory();
564 auto output_layout = output_memory.get_layout();
565 auto output_ptr = output_memory.pointer<float>();
567 int y_size = output_layout.size.spatial[1];
568 int x_size = output_layout.size.spatial[0];
569 int f_size = output_layout.size.feature[0];
570 int b_size = output_layout.size.batch[0];
571 EXPECT_EQ(output_layout.format, format::yxfb);
572 EXPECT_EQ(y_size, 6);
573 EXPECT_EQ(x_size, 5);
574 EXPECT_EQ(f_size, 1);
575 EXPECT_EQ(b_size, 1);
577 for (int y = 0; y < y_size; ++y) {
578 for (int x = 0; x < x_size; ++x) {
579 EXPECT_EQ(output_vec[y][x], output_ptr[y * x_size + x]);
583 //VVF temp_vec(y_size, VF(x_size, 0.0f));
584 //for (int y = 0; y < y_size; ++y) {
585 // for (int x = 0; x < x_size; ++x) {
586 // temp_vec[y][x] = output_ptr[y * x_size + x];
589 //print_2d(temp_vec);
592 TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding) {
596 // Input padding : above 2x1, below 2x1
624 const auto& engine = get_test_engine();
626 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 4, 3 } });
627 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
628 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
630 set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f });
631 set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f });
632 set_values(biases, { 1.0f });
633 VVF<float> output_vec = {
634 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
635 { 2.0f, 4.0f, 6.0f, 8.0f, 5.0f },
636 { 4.0f, 8.0f, 11.0f, 15.0f, 9.0f },
637 { 6.0f, 11.0f, 12.0f, 16.0f, 10.0f },
638 { 4.0f, 7.0f, 7.0f, 9.0f, 6.0f },
639 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f } };
642 input_layout("input", input.get_layout()),
643 data("weights", weights),
644 data("biases", biases),
657 padding{ { 0,0,0,0 }, 0 })
660 network network(engine, topology);
661 network.set_input_data("input", input);
663 auto outputs = network.execute();
664 EXPECT_EQ(outputs.size(), size_t(1));
665 EXPECT_EQ(outputs.begin()->first, "conv");
667 auto output_memory = outputs.at("conv").get_memory();
668 auto output_layout = output_memory.get_layout();
669 auto output_ptr = output_memory.pointer<float>();
671 int y_size = output_layout.size.spatial[1];
672 int x_size = output_layout.size.spatial[0];
673 int f_size = output_layout.size.feature[0];
674 int b_size = output_layout.size.batch[0];
675 EXPECT_EQ(output_layout.format, format::yxfb);
676 EXPECT_EQ(y_size, 6);
677 EXPECT_EQ(x_size, 5);
678 EXPECT_EQ(f_size, 1);
679 EXPECT_EQ(b_size, 1);
681 for (int y = 0; y < y_size; ++y) {
682 for (int x = 0; x < x_size; ++x) {
683 EXPECT_EQ(output_vec[y][x], output_ptr[y * x_size + x]);
688 TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding) {
692 // Input padding : above 2x1, below 3x2
722 const auto& engine = get_test_engine();
724 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 4, 3 } });
725 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
726 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
728 set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f });
729 set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f });
730 set_values(biases, { 1.0f });
731 VVF<float> output_vec = {
732 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
733 { 2.0f, 4.0f, 6.0f, 8.0f, 5.0f, 1.0f },
734 { 4.0f, 8.0f, 11.0f, 15.0f, 9.0f, 1.0f },
735 { 6.0f, 11.0f, 12.0f, 16.0f, 10.0f, 1.0f },
736 { 4.0f, 7.0f, 7.0f, 9.0f, 6.0f, 1.0f },
737 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
738 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f } };
741 input_layout("input", input.get_layout()),
742 data("weights", weights),
743 data("biases", biases),
756 padding{ { 0,0,0,0 }, 0 })
759 network network(engine, topology);
760 network.set_input_data("input", input);
762 auto outputs = network.execute();
763 EXPECT_EQ(outputs.size(), size_t(1));
764 EXPECT_EQ(outputs.begin()->first, "conv");
766 auto output_memory = outputs.at("conv").get_memory();
767 auto output_layout = output_memory.get_layout();
768 auto output_ptr = output_memory.pointer<float>();
770 int y_size = output_layout.size.spatial[1];
771 int x_size = output_layout.size.spatial[0];
772 int f_size = output_layout.size.feature[0];
773 int b_size = output_layout.size.batch[0];
774 EXPECT_EQ(output_layout.format, format::yxfb);
775 EXPECT_EQ(y_size, 7);
776 EXPECT_EQ(x_size, 6);
777 EXPECT_EQ(f_size, 1);
778 EXPECT_EQ(b_size, 1);
780 for (int y = 0; y < y_size; ++y) {
781 for (int x = 0; x < x_size; ++x) {
782 EXPECT_EQ(output_vec[y][x], output_ptr[y * x_size + x]);
787 TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding_with_input_offset) {
791 // Input padding : above 2x1, below 2x1
828 const auto& engine = get_test_engine();
830 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 4, 3 } });
831 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
832 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
834 set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f });
835 set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f });
836 set_values(biases, { 1.0f });
837 VVF<float> output_vec = {
838 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
839 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
840 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
841 { 1.0f, 2.0f, 4.0f, 6.0f, 8.0f, 5.0f, 1.0f },
842 { 1.0f, 4.0f, 8.0f, 11.0f, 15.0f, 9.0f, 1.0f },
843 { 1.0f, 6.0f, 11.0f, 12.0f, 16.0f, 10.0f, 1.0f },
844 { 1.0f, 4.0f, 7.0f, 7.0f, 9.0f, 6.0f, 1.0f },
845 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
846 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
847 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f } };
850 input_layout("input", input.get_layout()),
851 data("weights", weights),
852 data("biases", biases),
865 padding{ { 0,0,0,0 }, 0 })
868 network network(engine, topology);
869 network.set_input_data("input", input);
871 auto outputs = network.execute();
872 EXPECT_EQ(outputs.size(), size_t(1));
873 EXPECT_EQ(outputs.begin()->first, "conv");
875 auto output_memory = outputs.at("conv").get_memory();
876 auto output_layout = output_memory.get_layout();
877 auto output_ptr = output_memory.pointer<float>();
879 int y_size = output_layout.size.spatial[1];
880 int x_size = output_layout.size.spatial[0];
881 int f_size = output_layout.size.feature[0];
882 int b_size = output_layout.size.batch[0];
883 EXPECT_EQ(output_layout.format, format::yxfb);
884 EXPECT_EQ(y_size, 10);
885 EXPECT_EQ(x_size, 7);
886 EXPECT_EQ(f_size, 1);
887 EXPECT_EQ(b_size, 1);
889 for (int y = 0; y < y_size; ++y) {
890 for (int x = 0; x < x_size; ++x) {
891 EXPECT_EQ(output_vec[y][x], output_ptr[y * x_size + x]);
896 TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding_with_input_offset) {
900 // Input padding : above 2x1, below 3x2
929 // 1 6 11 12 16 10 1 1
939 const auto& engine = get_test_engine();
941 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 4, 3 } });
942 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
943 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
945 set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f });
946 set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f });
947 set_values(biases, { 1.0f });
948 VVF<float> output_vec = {
949 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
950 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
951 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
952 { 1.0f, 2.0f, 4.0f, 6.0f, 8.0f, 5.0f, 1.0f, 1.0f },
953 { 1.0f, 4.0f, 8.0f, 11.0f, 15.0f, 9.0f, 1.0f, 1.0f },
954 { 1.0f, 6.0f, 11.0f, 12.0f, 16.0f, 10.0f, 1.0f, 1.0f },
955 { 1.0f, 4.0f, 7.0f, 7.0f, 9.0f, 6.0f, 1.0f, 1.0f },
956 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
957 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
958 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
959 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f } };
962 input_layout("input", input.get_layout()),
963 data("weights", weights),
964 data("biases", biases),
977 padding{ { 0,0,0,0 }, 0 })
980 network network(engine, topology);
981 network.set_input_data("input", input);
983 auto outputs = network.execute();
984 EXPECT_EQ(outputs.size(), size_t(1));
985 EXPECT_EQ(outputs.begin()->first, "conv");
987 auto output_memory = outputs.at("conv").get_memory();
988 auto output_layout = output_memory.get_layout();
989 auto output_ptr = output_memory.pointer<float>();
991 int y_size = output_layout.size.spatial[1];
992 int x_size = output_layout.size.spatial[0];
993 int f_size = output_layout.size.feature[0];
994 int b_size = output_layout.size.batch[0];
995 EXPECT_EQ(output_layout.format, format::yxfb);
996 EXPECT_EQ(y_size, 11);
997 EXPECT_EQ(x_size, 8);
998 EXPECT_EQ(f_size, 1);
999 EXPECT_EQ(b_size, 1);
1001 for (int y = 0; y < y_size; ++y) {
1002 for (int x = 0; x < x_size; ++x) {
1003 EXPECT_EQ(output_vec[y][x], output_ptr[y * x_size + x]);
1008 TEST(convolution_f32_fw_gpu, basic_convolution_input_and_output_padding) {
1012 // Input padding : 2x1
1030 // 1 1 1 1 1 1 1 1 1
1031 // 1 1 1 1 1 1 1 1 1
1032 // 1 1 2 4 6 8 5 1 1
1033 // 1 1 4 8 11 15 9 1 1
1034 // 1 1 6 11 12 16 10 1 1
1035 // 1 1 4 7 7 9 6 1 1
1036 // 1 1 1 1 1 1 1 1 1
1037 // 1 1 1 1 1 1 1 1 1
1042 const auto& engine = get_test_engine();
1044 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 3 } });
1045 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
1046 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1048 set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f });
1049 set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f });
1050 set_values(biases, { 1.0f });
1051 VVF<float> output_vec = {
1052 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
1053 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
1054 { 1.0f, 1.0f, 2.0f, 4.0f, 6.0f, 8.0f, 5.0f, 1.0f, 1.0f },
1055 { 1.0f, 1.0f, 4.0f, 8.0f, 11.0f, 15.0f, 9.0f, 1.0f, 1.0f },
1056 { 1.0f, 1.0f, 6.0f, 11.0f, 12.0f, 16.0f, 10.0f, 1.0f, 1.0f },
1057 { 1.0f, 1.0f, 4.0f, 7.0f, 7.0f, 9.0f, 6.0f, 1.0f, 1.0f },
1058 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
1059 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f } };
1061 const int x_pad = 2;
1062 const int y_pad = 1;
1064 input_layout("input", input.get_layout()),
1065 data("weights", weights),
1066 data("biases", biases),
1077 padding{ { 0,0,-x_pad,-y_pad }, 0 })
1080 network network(engine, topology);
1081 network.set_input_data("input", input);
1083 auto outputs = network.execute();
1084 EXPECT_EQ(outputs.size(), size_t(1));
1085 EXPECT_EQ(outputs.begin()->first, "conv");
1087 auto output_memory = outputs.at("conv").get_memory();
1088 auto output_layout = output_memory.get_layout();
1089 auto output_size = output_layout.get_buffer_size();
1090 auto output_ptr = output_memory.pointer<float>();
1092 int y_size = output_size.spatial[1];
1093 int x_size = output_size.spatial[0];
1094 int f_size = output_size.feature[0];
1095 int b_size = output_size.batch[0];
1096 EXPECT_EQ(output_layout.format, format::yxfb);
1097 EXPECT_EQ(y_size, 8);
1098 EXPECT_EQ(x_size, 9);
1099 EXPECT_EQ(f_size, 1);
1100 EXPECT_EQ(b_size, 1);
1102 for (int y = y_pad; y < y_size - y_pad; ++y)
1104 for (int x = x_pad; x < x_size - x_pad; ++x)
1106 EXPECT_EQ(output_vec[y][x], output_ptr[y * x_size + x]);
1110 //VVF temp_vec(y_size, VF(x_size, 0.0f));
1111 //for (int y = 0; y < y_size; ++y) {
1112 // for (int x = 0; x < x_size; ++x) {
1113 // temp_vec[y][x] = output_ptr[y * x_size + x];
1116 //print_2d(temp_vec);
1119 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x1x1_nopad_random) {
1142 size_t batch = 1, input_f = 1, input_y = 4, input_x = 4;
1144 VVVVF<float> input_rnd = generate_random_4d<float>(batch, input_f, input_y, input_x, -10, 10);
1145 VF<float> input_rnd_vec = flatten_4d<float>(format::yxfb, input_rnd);
1146 VVVVF<float> filter_rnd = generate_random_4d<float>(1, 1, 2, 2, -10, 10);
1147 VF<float> filter_rnd_vec = flatten_4d<float>(format::bfyx, filter_rnd);
1148 VF<float> bias_rnd = generate_random_1d<float>(1, -10, 10);
1149 VVVVF<float> output_rnd(batch, VVVF<float>(filter_rnd.size()));
1150 for (size_t b = 0; b < output_rnd.size(); ++b) {
1151 for (size_t of = 0; of < filter_rnd.size(); ++of) {
1152 output_rnd[b][of] = reference_convolve<float>(input_rnd[b], filter_rnd[of], 2, 2, bias_rnd[of]);
1155 VF<float> output_rnd_vec = flatten_4d<float>(format::yxfb, output_rnd);
1157 const auto& engine = get_test_engine();
1159 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 4 } });
1160 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 2, 2 }, 1 } });
1161 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
1162 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1164 set_values(input, input_rnd_vec);
1165 set_values(weights, filter_rnd_vec);
1166 set_values(biases, bias_rnd);
1169 input_layout("input", input.get_layout()),
1170 data("weights", weights),
1171 data("biases", biases),
1172 convolution("conv", "input", {"weights"}, {"biases"}, {1,1,2,2})
1175 network network(engine, topology);
1176 network.set_input_data("input", input);
1178 auto outputs = network.execute();
1179 EXPECT_EQ(outputs.size(), size_t(1));
1180 EXPECT_EQ(outputs.begin()->first, "conv");
1182 auto output_prim = outputs.begin()->second.get_memory();
1184 auto output_ptr = output_prim.pointer<float>();
1186 for (size_t i = 0; i < output_rnd.size(); ++i) {
1187 float x = float_round(output_rnd_vec[i]), y = float_round(output_ptr[i]);
1188 EXPECT_FLOAT_EQ(x, y) << "random seed = " << random_seed << std::endl;
1192 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in2x2x1x2_nopad_random) {
1212 size_t batch = 2, input_f = 1, input_y = 2, input_x = 2;
1214 VVVVF<float> input_rnd = generate_random_4d<float>(batch, input_f, input_y, input_x, -10, 10);
1215 VF<float> input_rnd_vec = flatten_4d<float>(format::yxfb, input_rnd);
1216 VVVVF<float> filter_rnd = generate_random_4d<float>(1, 1, 2, 2, -10, 10);
1217 VF<float> filter_rnd_vec = flatten_4d<float>(format::bfyx, filter_rnd);
1218 VF<float> bias_rnd = generate_random_1d<float>(1, -10, 10);
1219 VVVVF<float> output_rnd(batch, VVVF<float>(filter_rnd.size()));
1220 for (size_t b = 0; b < output_rnd.size(); ++b) {
1221 for (size_t of = 0; of < filter_rnd.size(); ++of) {
1222 output_rnd[b][of] = reference_convolve<float>(input_rnd[b], filter_rnd[of], 2, 2, bias_rnd[of]);
1225 VF<float> output_rnd_vec = flatten_4d<float>(format::yxfb, output_rnd);
1227 const auto& engine = get_test_engine();
1229 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 1, 2, 2 } });
1230 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 2,{ 1, 1 }, 1 } });
1231 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
1232 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1234 set_values(input, input_rnd_vec);
1235 set_values(weights, filter_rnd_vec);
1236 set_values(biases, bias_rnd);
1239 input_layout("input", input.get_layout()),
1240 data("weights", weights),
1241 data("biases", biases),
1242 convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 })
1245 network network(engine, topology);
1246 network.set_input_data("input", input);
1248 auto outputs = network.execute();
1249 EXPECT_EQ(outputs.size(), size_t(1));
1250 EXPECT_EQ(outputs.begin()->first, "conv");
1252 auto output_prim = outputs.begin()->second.get_memory();
1254 auto output_ptr = output_prim.pointer<float>();
1256 for (size_t i = 0; i < output_rnd.size(); ++i) {
1257 float x = float_round(output_rnd_vec[i]), y = float_round(output_ptr[i]);
1258 EXPECT_FLOAT_EQ(x, y) << "random seed = " << random_seed << std::endl;
1262 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x1x1_nopad) {
1285 const auto& engine = get_test_engine();
1287 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 4 } });
1288 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 2, 2 }, 1 } });
1289 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
1290 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1292 set_values(input, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f });
1293 set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
1294 set_values(biases, { 2.0f });
1297 input_layout("input", input.get_layout()),
1298 data("weights", weights),
1299 data("biases", biases),
1300 convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 })
1303 network network(engine, topology);
1304 network.set_input_data("input", input);
1306 auto outputs = network.execute();
1307 EXPECT_EQ(outputs.size(), size_t(1));
1308 EXPECT_EQ(outputs.begin()->first, "conv");
1310 auto output_prim = outputs.begin()->second.get_memory();
1312 auto output_ptr = output_prim.pointer<float>();
1314 EXPECT_FLOAT_EQ(8.0f, output_ptr[0]);
1315 EXPECT_FLOAT_EQ(0.5f, output_ptr[1]);
1316 EXPECT_FLOAT_EQ(6.0f, output_ptr[2]);
1317 EXPECT_FLOAT_EQ(9.0f, output_ptr[3]);
1320 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in2x2x1x2_nopad) {
1339 const auto& engine = get_test_engine();
1341 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 1, 2, 2 } });
1342 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 2,{ 1, 1 }, 1 } });
1343 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
1344 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1346 set_values(input, { 0.5f, 2.3f, 1.5f, -0.4f, 2.0f, 1.0f, -4.0f, 3.0f });
1347 set_values(weights, { -1.2f, 1.5f, 0.5f, -0.5f });
1348 set_values(biases, { -1.0f });
1351 input_layout("input", input.get_layout()),
1352 data("weights", weights),
1353 data("biases", biases),
1354 convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 } )
1357 network network(engine, topology);
1358 network.set_input_data("input", input);
1360 auto outputs = network.execute();
1361 EXPECT_EQ(outputs.size(), size_t(1));
1362 EXPECT_EQ(outputs.begin()->first, "conv");
1364 auto output_prim = outputs.begin()->second.get_memory();
1366 auto output_ptr = output_prim.pointer<float>();
1368 EXPECT_FLOAT_EQ(3.65f, output_ptr[0]);
1369 EXPECT_FLOAT_EQ(-5.36f, output_ptr[1]);
1372 TEST(convolution_f32_fw_gpu, basic_ofm_wsiz2x1x2x1_in1x2x1_nopad) {
1373 // Filter : 1x2x1x2x1
1391 const auto& engine = get_test_engine();
1393 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 1, 2 } });
1394 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 1, 1 }, 2 } });
1395 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 2 } });
1396 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 1 } });
1398 set_values(input, { 1.0f, 2.0f });
1399 set_values(weights, { 1.0f, 2.0f, -1.0f, -2.0f });
1400 set_values(biases, { 0.1f, -0.2f });
1403 input_layout("input", input.get_layout()),
1404 data("weights", weights),
1405 data("biases", biases),
1406 convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,5,5 })
1409 network network(engine, topology);
1410 network.set_input_data("input", input);
1412 auto outputs = network.execute();
1413 EXPECT_EQ(outputs.size(), size_t(1));
1414 EXPECT_EQ(outputs.begin()->first, "conv");
1416 auto output_prim = outputs.begin()->second.get_memory();
1418 auto output_ptr = output_prim.pointer<float>();
1420 EXPECT_FLOAT_EQ(5.1f, output_ptr[0]);
1421 EXPECT_FLOAT_EQ(-5.2f, output_ptr[1]);
1424 TEST(convolution_f32_fw_gpu, basic_ofm_wsiz3x2x2x1_in2x2x1_nopad) {
1425 // Filter : 1x3x2x2x1
1434 // 1.0 2.0 ifm=0 ofm=0
1437 // 5.0 6.0 ifm=0 ofm=1
1440 // 9.0 10.0 ifm=0 ofm=2
1450 const auto& engine = get_test_engine();
1452 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 2, 1, 2 } });
1453 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 1, 1 }, 3 } });
1454 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 2, 1, 2 } });
1455 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 1 } });
1457 set_values(input, { 1.0f, 3.0f, 2.0f, 4.0f });
1458 set_values(weights, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f });
1459 set_values(biases, { -5.0f, -6.0f, -7.0f });
1462 input_layout("input", input.get_layout()),
1463 data("weights", weights),
1464 data("biases", biases),
1465 convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,5,5 })
1468 network network(engine, topology);
1469 network.set_input_data("input", input);
1471 auto outputs = network.execute();
1472 EXPECT_EQ(outputs.size(), size_t(1));
1473 EXPECT_EQ(outputs.begin()->first, "conv");
1475 auto output_prim = outputs.begin()->second.get_memory();
1477 auto output_ptr = output_prim.pointer<float>();
1479 EXPECT_FLOAT_EQ(25.0f, output_ptr[0]);
1480 EXPECT_FLOAT_EQ(64.0f, output_ptr[1]);
1481 EXPECT_FLOAT_EQ(103.0f, output_ptr[2]);
1484 TEST(convolution_f32_fw_gpu, basic_wsiz2x2x1x3_wstr2x2_in2x2x1x1_nopad) {
1495 // -1.1 1.5 0.1 0.2 2.0 -1.0
1496 // 0.5 -0.5 0.4 0.7 2.5 -1.5
1506 const auto& engine = get_test_engine();
1508 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
1509 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 1, 1 }, 3 } });
1510 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 2, 2 } });
1511 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 1 } });
1513 set_values(input, { -2.3f, -0.1f, 3.1f, 1.9f });
1514 set_values(weights, { -1.1f, 1.5f, 0.5f, -0.5f, 0.1f, 0.2f, 0.4f, 0.7f, 2.0f, -1.0f, 2.5f, -1.5f });
1515 set_values(biases, { 0.1f, -0.2f, 0.3f });
1518 input_layout("input", input.get_layout()),
1519 data("weights", weights),
1520 data("biases", biases),
1521 convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 })
1524 network network(engine, topology);
1525 network.set_input_data("input", input);
1527 auto outputs = network.execute();
1528 EXPECT_EQ(outputs.size(), size_t(1));
1529 EXPECT_EQ(outputs.begin()->first, "conv");
1531 auto output_prim = outputs.begin()->second.get_memory();
1533 auto output_ptr = output_prim.pointer<float>();
1535 EXPECT_TRUE(are_equal(3.08f, output_ptr[0]));
1536 EXPECT_TRUE(are_equal(2.12f, output_ptr[1]));
1537 EXPECT_TRUE(are_equal(0.7f, output_ptr[2]));
1540 TEST(convolution_f32_fw_gpu, wsiz3x3_wstr2x2_in2x2x1x1_zeropad) {
1562 const auto& engine = get_test_engine();
1564 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
1565 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 1 } });
1566 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 3 } });
1567 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1569 set_values(input, { -0.5f, 1.0f, 0.5f, 2.0f });
1570 set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f, 4.0f, -5.0f, 0.5f, 1.5f, -1.5f });
1571 set_values(biases, { 2.0f });
1574 input_layout("input", input.get_layout()),
1575 data("weights", weights),
1576 data("biases", biases),
1577 convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 })
1580 network network(engine, topology);
1581 network.set_input_data("input", input);
1583 auto outputs = network.execute();
1584 EXPECT_EQ(outputs.size(), size_t(1));
1585 EXPECT_EQ(outputs.begin()->first, "conv");
1587 auto output_prim = outputs.begin()->second.get_memory();
1589 auto output_ptr = output_prim.pointer<float>();
1591 EXPECT_FLOAT_EQ(12.25f, output_ptr[0]);
1594 TEST(convolution_f32_fw_gpu, offsets_wsiz3x3_wstr2x2_in2x2x1x1_zeropad) {
1598 // Input offset : -1x-1
1600 // Output offset: 1x1
1619 const auto& engine = get_test_engine();
1621 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
1622 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 2, 2 }, 1 } });
1623 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 3 } });
1624 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1626 set_values(input, { -0.5f, 1.0f, 0.5f, 2.0f });
1627 set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f, 4.0f, -5.0f, 0.5f, 1.5f, -1.5f });
1628 set_values(biases, { 2.0f });
1631 input_layout("input", input.get_layout()),
1632 data("weights", weights),
1633 data("biases", biases),
1644 padding{ { 0,0,1,1 }, 0 })
1647 network network(engine, topology);
1648 network.set_input_data("input", input);
1650 auto outputs = network.execute();
1651 EXPECT_EQ(outputs.size(), size_t(1));
1652 EXPECT_EQ(outputs.begin()->first, "conv");
1654 auto output_prim = outputs.begin()->second.get_memory();
1656 auto output_ptr = output_prim.pointer<float>();
1658 EXPECT_FLOAT_EQ(-7.25f, output_ptr[4]);
1661 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_split2) {
1673 // f1: 0.5 1.5 2.3 -0.4
1696 const auto& engine = get_test_engine();
1698 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 2, 4, 4 } });
1699 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 2, 2 }, 2 } });
1700 auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
1701 auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1702 auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
1703 auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1706 -0.5f, 0.5f, 1.0f, 1.5f, 0.5f, 2.3f, 2.0f, -0.4f,
1707 1.5f, 2.0f, -0.5f, -4.0f, 0.0f, 1.0f, -1.0f, 3.0f,
1708 0.5f, 0.5f, 0.5f, 1.5f, -1.0f, 2.3f, 1.0f, -0.4f,
1709 0.5f, 2.0f, 2.0f, -4.0f, 1.5f, 1.0f, -0.5f, 3.0f
1711 set_values(weights1, { -2.0f, 0.5f, 3.5f, 1.5f });
1712 set_values(biases1, { 2.0f });
1713 set_values(weights2, { -1.2f, 1.5f, 0.5f, -0.5f });
1714 set_values(biases2, { -1.0f });
1717 input_layout("input", input.get_layout()),
1718 data("weights1", weights1),
1719 data("biases1", biases1),
1720 data("weights2", weights2),
1721 data("biases2", biases2),
1725 { "weights1", "weights2" },
1726 { "biases1", "biases2" },
1732 network network(engine, topology);
1733 network.set_input_data("input", input);
1735 auto outputs = network.execute();
1736 EXPECT_EQ(outputs.size(), size_t(1));
1737 EXPECT_EQ(outputs.begin()->first, "conv");
1739 auto output_prim = outputs.begin()->second.get_memory();
1741 auto output_ptr = output_prim.pointer<float>();
1743 EXPECT_FLOAT_EQ(8.0f, get_value<float>(output_ptr, 0));
1744 EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 1));
1745 EXPECT_FLOAT_EQ(0.5f, get_value<float>(output_ptr, 2));
1746 EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 3));
1747 EXPECT_FLOAT_EQ(6.0f, get_value<float>(output_ptr, 4));
1748 EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 5));
1749 EXPECT_FLOAT_EQ(9.0f, get_value<float>(output_ptr, 6));
1750 EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 7));
1753 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2) {
1760 // f0b0: -0.5 1 0.5 2
1765 // f0b1: -0.5 1 0.5 2
1770 // f1b0: 0.5 1.5 2.3 -0.4
1775 // f1b1: 0.5 1.5 2.3 -0.4
1796 // 8 8 3.65 3.65 0.5 0.5 -5.36 -5.36
1797 // 6 6 3.65 3.65 9 9 -5.36 -5.36
1799 const auto& engine = get_test_engine();
1801 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 4, 4 } });
1802 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 2,{ 2, 2 }, 2 } });
1803 auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
1804 auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1805 auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
1806 auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1809 -0.5f, -0.5f, 0.5f, 0.5f, 1.0f, 1.0f, 1.5f, 1.5f, 0.5f, 0.5f, 2.3f, 2.3f, 2.0f, 2.0f, -0.4f, -0.4f,
1810 1.5f, 1.5f, 2.0f, 2.0f, -0.5f, -0.5f, -4.0f, -4.0f, 0.0f, 0.0f, 1.0f, 1.0f, -1.0f, -1.0f, 3.0f, 3.0f,
1811 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 1.5f, 1.5f, -1.0f, -1.0f, 2.3f, 2.3f, 1.0f, 1.0f, -0.4f, -0.4f,
1812 0.5f, 0.5f, 2.0f, 2.0f, 2.0f, 2.0f, -4.0f, -4.0f, 1.5f, 1.5f, 1.0f, 1.0f, -0.5f, -0.5f, 3.0f, 3.0f,
1814 set_values(weights1, { -2.0f, 0.5f, 3.5f, 1.5f });
1815 set_values(biases1, { 2.0f });
1816 set_values(weights2, { -1.2f, 1.5f, 0.5f, -0.5f });
1817 set_values(biases2, { -1.0f });
1820 input_layout("input", input.get_layout()),
1821 data("weights1", weights1),
1822 data("biases1", biases1),
1823 data("weights2", weights2),
1824 data("biases2", biases2),
1828 { "weights1", "weights2" },
1829 { "biases1", "biases2" },
1835 network network(engine, topology);
1836 network.set_input_data("input", input);
1838 auto outputs = network.execute();
1839 EXPECT_EQ(outputs.size(), size_t(1));
1840 EXPECT_EQ(outputs.begin()->first, "conv");
1842 auto output_prim = outputs.begin()->second.get_memory();
1844 auto output_ptr = output_prim.pointer<float>();
1846 EXPECT_FLOAT_EQ(8.0f, get_value<float>(output_ptr, 0));
1847 EXPECT_FLOAT_EQ(8.0f, get_value<float>(output_ptr, 1));
1848 EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 2));
1849 EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 3));
1850 EXPECT_FLOAT_EQ(0.5f, get_value<float>(output_ptr, 4));
1851 EXPECT_FLOAT_EQ(0.5f, get_value<float>(output_ptr, 5));
1852 EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 6));
1853 EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 7));
1854 EXPECT_FLOAT_EQ(6.0f, get_value<float>(output_ptr, 8));
1855 EXPECT_FLOAT_EQ(6.0f, get_value<float>(output_ptr, 9));
1856 EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 10));
1857 EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 11));
1858 EXPECT_FLOAT_EQ(9.0f, get_value<float>(output_ptr, 12));
1859 EXPECT_FLOAT_EQ(9.0f, get_value<float>(output_ptr, 13));
1860 EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 14));
1861 EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 15));
1864 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2) {
1865 // data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_split2
1868 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 4, 4 } });
1869 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
1870 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
1873 -0.5f, 0.5f, 1.0f, 1.5f, 0.5f, 2.3f, 2.0f, -0.4f,
1874 1.5f, 2.0f, -0.5f, -4.0f, 0.0f, 1.0f, -1.0f, 3.0f,
1875 0.5f, 0.5f, 0.5f, 1.5f, -1.0f, 2.3f, 1.0f, -0.4f,
1876 0.5f, 2.0f, 2.0f, -4.0f, 1.5f, 1.0f, -0.5f, 3.0f
1878 set_values(weights, {
1879 -2.0f, 0.5f, 3.5f, 1.5f,
1880 -1.2f, 1.5f, 0.5f, -0.5f
1882 set_values(biases, { 2.0f, -1.0f });
1885 input_layout("input", input.get_layout()),
1886 data("weights", weights),
1887 data("biases", biases),
1893 2, // number of groups
1899 network network(engine, topology);
1900 network.set_input_data("input", input);
1902 auto outputs = network.execute();
1903 EXPECT_EQ(outputs.size(), size_t(1));
1904 EXPECT_EQ(outputs.begin()->first, "conv");
1906 auto output_prim = outputs.begin()->second.get_memory();
1908 auto output_ptr = output_prim.pointer<float>();
1910 EXPECT_FLOAT_EQ(8.0f, get_value<float>(output_ptr, 0));
1911 EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 1));
1912 EXPECT_FLOAT_EQ(0.5f, get_value<float>(output_ptr, 2));
1913 EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 3));
1914 EXPECT_FLOAT_EQ(6.0f, get_value<float>(output_ptr, 4));
1915 EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 5));
1916 EXPECT_FLOAT_EQ(9.0f, get_value<float>(output_ptr, 6));
1917 EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 7));
1920 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2_bfyx) {
1921 // data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_split2
1925 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 4, 4 } });
1926 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
1927 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
1930 -0.5f, 0.5f, 1.0f, 1.5f, 0.5f, 2.3f, 2.0f, -0.4f,
1931 1.5f, 2.0f, -0.5f, -4.0f, 0.0f, 1.0f, -1.0f, 3.0f,
1932 0.5f, 0.5f, 0.5f, 1.5f, -1.0f, 2.3f, 1.0f, -0.4f,
1933 0.5f, 2.0f, 2.0f, -4.0f, 1.5f, 1.0f, -0.5f, 3.0f
1935 set_values(weights, {
1936 -2.0f, 0.5f, 3.5f, 1.5f,
1937 -1.2f, 1.5f, 0.5f, -0.5f
1939 set_values(biases, { 2.0f, -1.0f });
1942 input_layout("input", input.get_layout()),
1943 reorder("input_1", "input", { data_types::f32,format::bfyx,{ 1, 2, 4, 4 } }),
1944 data("weights", weights),
1945 data("biases", biases),
1951 2, // number of groups
1957 network network(engine, topology);
1958 network.set_input_data("input", input);
1960 auto outputs = network.execute();
1961 EXPECT_EQ(outputs.size(), size_t(1));
1962 EXPECT_EQ(outputs.begin()->first, "conv");
1964 auto output_prim = outputs.begin()->second.get_memory();
1966 auto output_ptr = output_prim.pointer<float>();
1968 EXPECT_FLOAT_EQ(8.0f, get_value<float>(output_ptr, 0));
1969 EXPECT_FLOAT_EQ(0.5f, get_value<float>(output_ptr, 1));
1970 EXPECT_FLOAT_EQ(6.0f, get_value<float>(output_ptr, 2));
1971 EXPECT_FLOAT_EQ(9.0f, get_value<float>(output_ptr, 3));
1972 EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 4));
1973 EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 5));
1974 EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 6));
1975 EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 7));
1978 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group2) {
1979 // data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2
1983 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 4, 4 } });
1984 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
1985 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
1988 -0.5f, -0.5f, 0.5f, 0.5f, 1.0f, 1.0f, 1.5f, 1.5f, 0.5f, 0.5f, 2.3f, 2.3f, 2.0f, 2.0f, -0.4f, -0.4f,
1989 1.5f, 1.5f, 2.0f, 2.0f, -0.5f, -0.5f, -4.0f, -4.0f, 0.0f, 0.0f, 1.0f, 1.0f, -1.0f, -1.0f, 3.0f, 3.0f,
1990 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 1.5f, 1.5f, -1.0f, -1.0f, 2.3f, 2.3f, 1.0f, 1.0f, -0.4f, -0.4f,
1991 0.5f, 0.5f, 2.0f, 2.0f, 2.0f, 2.0f, -4.0f, -4.0f, 1.5f, 1.5f, 1.0f, 1.0f, -0.5f, -0.5f, 3.0f, 3.0f,
1993 set_values(weights, {
1994 -2.0f, 0.5f, 3.5f, 1.5f,
1995 -1.2f, 1.5f, 0.5f, -0.5f
1997 set_values(biases, { 2.0f, -1.0f });
2000 input_layout("input", input.get_layout()),
2001 data("weights", weights),
2002 data("biases", biases),
2008 2, // number of groups
2014 network network(engine, topology);
2015 network.set_input_data("input", input);
2017 auto outputs = network.execute();
2018 EXPECT_EQ(outputs.size(), size_t(1));
2019 EXPECT_EQ(outputs.begin()->first, "conv");
2021 auto output_prim = outputs.begin()->second.get_memory();
2023 auto output_ptr = output_prim.pointer<float>();
2025 EXPECT_FLOAT_EQ(8.0f, get_value<float>(output_ptr, 0));
2026 EXPECT_FLOAT_EQ(8.0f, get_value<float>(output_ptr, 1));
2027 EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 2));
2028 EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 3));
2029 EXPECT_FLOAT_EQ(0.5f, get_value<float>(output_ptr, 4));
2030 EXPECT_FLOAT_EQ(0.5f, get_value<float>(output_ptr, 5));
2031 EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 6));
2032 EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 7));
2033 EXPECT_FLOAT_EQ(6.0f, get_value<float>(output_ptr, 8));
2034 EXPECT_FLOAT_EQ(6.0f, get_value<float>(output_ptr, 9));
2035 EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 10));
2036 EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 11));
2037 EXPECT_FLOAT_EQ(9.0f, get_value<float>(output_ptr, 12));
2038 EXPECT_FLOAT_EQ(9.0f, get_value<float>(output_ptr, 13));
2039 EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 14));
2040 EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 15));
2043 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthwise_sep_opt) {
2044 // Test for depthwise separable optimization, there are 16 weights and biases (split 16)
2045 // data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2 but with batch 1
2047 const auto& engine = get_test_engine();
2049 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 16, 4, 4 } });
2052 -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f,
2053 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f,
2054 0.5f, 0.5f, 2.3f, 2.3f, 0.5f, 0.5f, 2.3f, 2.3f, 0.5f, 0.5f, 2.3f, 2.3f, 0.5f, 0.5f, 2.3f, 2.3f, 0.5f, 0.5f, 2.3f, 2.3f, 0.5f, 0.5f, 2.3f, 2.3f, 0.5f, 0.5f, 2.3f, 2.3f, 0.5f, 0.5f, 2.3f, 2.3f,
2055 2.0f, 2.0f, -0.4f, -0.4f, 2.0f, 2.0f, -0.4f, -0.4f, 2.0f, 2.0f, -0.4f, -0.4f, 2.0f, 2.0f, -0.4f, -0.4f, 2.0f, 2.0f, -0.4f, -0.4f, 2.0f, 2.0f, -0.4f, -0.4f, 2.0f, 2.0f, -0.4f, -0.4f, 2.0f, 2.0f, -0.4f, -0.4f,
2056 1.5f, 1.5f, 2.0f, 2.0f, 1.5f, 1.5f, 2.0f, 2.0f, 1.5f, 1.5f, 2.0f, 2.0f, 1.5f, 1.5f, 2.0f, 2.0f, 1.5f, 1.5f, 2.0f, 2.0f, 1.5f, 1.5f, 2.0f, 2.0f, 1.5f, 1.5f, 2.0f, 2.0f, 1.5f, 1.5f, 2.0f, 2.0f,
2057 -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f,
2058 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f,
2059 -1.0f, -1.0f, 3.0f, 3.0f, -1.0f, -1.0f, 3.0f, 3.0f, -1.0f, -1.0f, 3.0f, 3.0f, -1.0f, -1.0f, 3.0f, 3.0f, -1.0f, -1.0f, 3.0f, 3.0f, -1.0f, -1.0f, 3.0f, 3.0f, -1.0f, -1.0f, 3.0f, 3.0f, -1.0f, -1.0f, 3.0f, 3.0f,
2060 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
2061 0.5f, 0.5f, 1.5f, 1.5f, 0.5f, 0.5f, 1.5f, 1.5f, 0.5f, 0.5f, 1.5f, 1.5f, 0.5f, 0.5f, 1.5f, 1.5f, 0.5f, 0.5f, 1.5f, 1.5f, 0.5f, 0.5f, 1.5f, 1.5f, 0.5f, 0.5f, 1.5f, 1.5f, 0.5f, 0.5f, 1.5f, 1.5f,
2062 -1.0f, -1.0f, 2.3f, 2.3f, -1.0f, -1.0f, 2.3f, 2.3f, -1.0f, -1.0f, 2.3f, 2.3f, -1.0f, -1.0f, 2.3f, 2.3f, -1.0f, -1.0f, 2.3f, 2.3f, -1.0f, -1.0f, 2.3f, 2.3f, -1.0f, -1.0f, 2.3f, 2.3f, -1.0f, -1.0f, 2.3f, 2.3f,
2063 1.0f, 1.0f, -0.4f, -0.4f, 1.0f, 1.0f, -0.4f, -0.4f, 1.0f, 1.0f, -0.4f, -0.4f, 1.0f, 1.0f, -0.4f, -0.4f, 1.0f, 1.0f, -0.4f, -0.4f, 1.0f, 1.0f, -0.4f, -0.4f, 1.0f, 1.0f, -0.4f, -0.4f, 1.0f, 1.0f, -0.4f, -0.4f,
2064 0.5f, 0.5f, 2.0f, 2.0f, 0.5f, 0.5f, 2.0f, 2.0f, 0.5f, 0.5f, 2.0f, 2.0f, 0.5f, 0.5f, 2.0f, 2.0f, 0.5f, 0.5f, 2.0f, 2.0f, 0.5f, 0.5f, 2.0f, 2.0f, 0.5f, 0.5f, 2.0f, 2.0f, 0.5f, 0.5f, 2.0f, 2.0f,
2065 2.0f, 2.0f, -4.0f, -4.0f, 2.0f, 2.0f, -4.0f, -4.0f, 2.0f, 2.0f, -4.0f, -4.0f, 2.0f, 2.0f, -4.0f, -4.0f, 2.0f, 2.0f, -4.0f, -4.0f, 2.0f, 2.0f, -4.0f, -4.0f, 2.0f, 2.0f, -4.0f, -4.0f, 2.0f, 2.0f, -4.0f, -4.0f,
2066 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f,
2067 -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f,
2070 topology topology(input_layout("input", input.get_layout()));
2072 std::vector<primitive_id> weights_vec;
2073 std::vector<primitive_id> bias_vec;
2075 for (uint32_t i = 0; i < 8; i++)
2077 auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
2078 auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
2079 auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
2080 auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
2082 set_values(weights1, { -2.0f, 0.5f, 3.5f, 1.5f });
2083 set_values(biases1, { 2.0f });
2084 set_values(weights2, { -1.2f, 1.5f, 0.5f, -0.5f });
2085 set_values(biases2, { -1.0f });
2087 primitive_id weights_id = "weights_" + std::to_string(i);
2088 primitive_id weights2_id = "weights2_" + std::to_string(i);
2089 primitive_id bias_id = "biases_" + std::to_string(i);
2090 primitive_id bias2_id = "biases2_" + std::to_string(i);
2092 weights_vec.push_back(weights_id);
2093 weights_vec.push_back(weights2_id);
2094 bias_vec.push_back(bias_id);
2095 bias_vec.push_back(bias2_id);
2098 data(weights_id, weights1),
2099 data(bias_id, biases1),
2100 data(weights2_id, weights2),
2101 data(bias2_id, biases2)
2117 network network(engine, topology);
2118 network.set_input_data("input", input);
2120 auto outputs = network.execute();
2121 EXPECT_EQ(outputs.size(), size_t(1));
2122 EXPECT_EQ(outputs.begin()->first, "conv");
2124 auto output_prim = outputs.begin()->second.get_memory();
2126 auto output_ptr = output_prim.pointer<float>();
2128 std::vector<float> expected_output_vec = {
2129 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f,
2130 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f,
2131 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f,
2132 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f,
2135 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
2137 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
2141 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthwise_sep_opt_bfyx) {
2142 // Test for depthwise separable optimization, there are 16 weights and biases (split 16)
2143 // data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2 but with batch 1
2144 const auto& engine = get_test_engine();
2146 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 16, 4, 4 } });
2149 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
2150 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
2151 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
2152 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
2153 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
2154 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
2155 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
2156 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
2157 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
2158 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
2159 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
2160 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
2161 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
2162 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
2163 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
2164 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
2167 topology topology(input_layout("input", input.get_layout()));
2169 std::vector<primitive_id> weights_vec;
2170 std::vector<primitive_id> bias_vec;
2172 for (uint32_t i = 0; i < 8; i++)
2174 auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
2175 auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
2176 auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
2177 auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
2179 set_values(weights1, { -2.0f, 0.5f, 3.5f, 1.5f });
2180 set_values(biases1, { 2.0f });
2181 set_values(weights2, { -1.2f, 1.5f, 0.5f, -0.5f });
2182 set_values(biases2, { -1.0f });
2184 primitive_id weights_id = "weights_" + std::to_string(i);
2185 primitive_id weights2_id = "weights2_" + std::to_string(i);
2186 primitive_id bias_id = "biases_" + std::to_string(i);
2187 primitive_id bias2_id = "biases2_" + std::to_string(i);
2189 weights_vec.push_back(weights_id);
2190 weights_vec.push_back(weights2_id);
2191 bias_vec.push_back(bias_id);
2192 bias_vec.push_back(bias2_id);
2195 data(weights_id, weights1),
2196 data(bias_id, biases1),
2197 data(weights2_id, weights2),
2198 data(bias2_id, biases2)
2214 network network(engine, topology);
2215 network.set_input_data("input", input);
2217 auto outputs = network.execute();
2218 EXPECT_EQ(outputs.size(), size_t(1));
2219 EXPECT_EQ(outputs.begin()->first, "conv");
2221 auto output_prim = outputs.begin()->second.get_memory();
2223 auto output_ptr = output_prim.pointer<float>();
2225 std::vector<float> expected_output_vec = {
2226 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
2227 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
2228 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
2229 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
2230 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
2231 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
2232 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
2233 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
2236 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
2238 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
2242 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16) {
2243 // Test for grouped convolution, there are 16 joined weights and biases (group 16)
2244 // data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthwise_sep_opt
2248 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 16, 4, 4 } });
2251 -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f,
2252 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f,
2253 0.5f, 0.5f, 2.3f, 2.3f, 0.5f, 0.5f, 2.3f, 2.3f, 0.5f, 0.5f, 2.3f, 2.3f, 0.5f, 0.5f, 2.3f, 2.3f, 0.5f, 0.5f, 2.3f, 2.3f, 0.5f, 0.5f, 2.3f, 2.3f, 0.5f, 0.5f, 2.3f, 2.3f, 0.5f, 0.5f, 2.3f, 2.3f,
2254 2.0f, 2.0f, -0.4f, -0.4f, 2.0f, 2.0f, -0.4f, -0.4f, 2.0f, 2.0f, -0.4f, -0.4f, 2.0f, 2.0f, -0.4f, -0.4f, 2.0f, 2.0f, -0.4f, -0.4f, 2.0f, 2.0f, -0.4f, -0.4f, 2.0f, 2.0f, -0.4f, -0.4f, 2.0f, 2.0f, -0.4f, -0.4f,
2255 1.5f, 1.5f, 2.0f, 2.0f, 1.5f, 1.5f, 2.0f, 2.0f, 1.5f, 1.5f, 2.0f, 2.0f, 1.5f, 1.5f, 2.0f, 2.0f, 1.5f, 1.5f, 2.0f, 2.0f, 1.5f, 1.5f, 2.0f, 2.0f, 1.5f, 1.5f, 2.0f, 2.0f, 1.5f, 1.5f, 2.0f, 2.0f,
2256 -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f,
2257 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f,
2258 -1.0f, -1.0f, 3.0f, 3.0f, -1.0f, -1.0f, 3.0f, 3.0f, -1.0f, -1.0f, 3.0f, 3.0f, -1.0f, -1.0f, 3.0f, 3.0f, -1.0f, -1.0f, 3.0f, 3.0f, -1.0f, -1.0f, 3.0f, 3.0f, -1.0f, -1.0f, 3.0f, 3.0f, -1.0f, -1.0f, 3.0f, 3.0f,
2259 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
2260 0.5f, 0.5f, 1.5f, 1.5f, 0.5f, 0.5f, 1.5f, 1.5f, 0.5f, 0.5f, 1.5f, 1.5f, 0.5f, 0.5f, 1.5f, 1.5f, 0.5f, 0.5f, 1.5f, 1.5f, 0.5f, 0.5f, 1.5f, 1.5f, 0.5f, 0.5f, 1.5f, 1.5f, 0.5f, 0.5f, 1.5f, 1.5f,
2261 -1.0f, -1.0f, 2.3f, 2.3f, -1.0f, -1.0f, 2.3f, 2.3f, -1.0f, -1.0f, 2.3f, 2.3f, -1.0f, -1.0f, 2.3f, 2.3f, -1.0f, -1.0f, 2.3f, 2.3f, -1.0f, -1.0f, 2.3f, 2.3f, -1.0f, -1.0f, 2.3f, 2.3f, -1.0f, -1.0f, 2.3f, 2.3f,
2262 1.0f, 1.0f, -0.4f, -0.4f, 1.0f, 1.0f, -0.4f, -0.4f, 1.0f, 1.0f, -0.4f, -0.4f, 1.0f, 1.0f, -0.4f, -0.4f, 1.0f, 1.0f, -0.4f, -0.4f, 1.0f, 1.0f, -0.4f, -0.4f, 1.0f, 1.0f, -0.4f, -0.4f, 1.0f, 1.0f, -0.4f, -0.4f,
2263 0.5f, 0.5f, 2.0f, 2.0f, 0.5f, 0.5f, 2.0f, 2.0f, 0.5f, 0.5f, 2.0f, 2.0f, 0.5f, 0.5f, 2.0f, 2.0f, 0.5f, 0.5f, 2.0f, 2.0f, 0.5f, 0.5f, 2.0f, 2.0f, 0.5f, 0.5f, 2.0f, 2.0f, 0.5f, 0.5f, 2.0f, 2.0f,
2264 2.0f, 2.0f, -4.0f, -4.0f, 2.0f, 2.0f, -4.0f, -4.0f, 2.0f, 2.0f, -4.0f, -4.0f, 2.0f, 2.0f, -4.0f, -4.0f, 2.0f, 2.0f, -4.0f, -4.0f, 2.0f, 2.0f, -4.0f, -4.0f, 2.0f, 2.0f, -4.0f, -4.0f, 2.0f, 2.0f, -4.0f, -4.0f,
2265 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f,
2266 -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f,
2269 topology topology(input_layout("input", input.get_layout()));
2271 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 16, 1, 2, 2 } });
2272 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 16, 1 } });
2276 -2.0f, 0.5f, 3.5f, 1.5f,
2277 -1.2f, 1.5f, 0.5f, -0.5f,
2278 -2.0f, 0.5f, 3.5f, 1.5f,
2279 -1.2f, 1.5f, 0.5f, -0.5f,
2280 -2.0f, 0.5f, 3.5f, 1.5f,
2281 -1.2f, 1.5f, 0.5f, -0.5f,
2282 -2.0f, 0.5f, 3.5f, 1.5f,
2283 -1.2f, 1.5f, 0.5f, -0.5f,
2284 -2.0f, 0.5f, 3.5f, 1.5f,
2285 -1.2f, 1.5f, 0.5f, -0.5f,
2286 -2.0f, 0.5f, 3.5f, 1.5f,
2287 -1.2f, 1.5f, 0.5f, -0.5f,
2288 -2.0f, 0.5f, 3.5f, 1.5f,
2289 -1.2f, 1.5f, 0.5f, -0.5f,
2290 -2.0f, 0.5f, 3.5f, 1.5f,
2291 -1.2f, 1.5f, 0.5f, -0.5f
2294 set_values(biases, { 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f});
2297 data("weights", weights),
2298 data("bias", biases)
2313 network network(engine, topology);
2314 network.set_input_data("input", input);
2316 auto outputs = network.execute();
2317 EXPECT_EQ(outputs.size(), size_t(1));
2318 EXPECT_EQ(outputs.begin()->first, "conv");
2320 auto output_prim = outputs.begin()->second.get_memory();
2322 auto output_ptr = output_prim.pointer<float>();
2324 std::vector<float> expected_output_vec = {
2325 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f,
2326 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f,
2327 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f,
2328 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f,
2331 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
2333 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
2337 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx) {
2338 // Test for grouped convolution, there are 16 joined weights and biases (group 16)
2339 // data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthwise_sep_opt_bfyx
2342 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 16, 4, 4 } });
2345 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
2346 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
2347 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
2348 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
2349 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
2350 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
2351 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
2352 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
2353 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
2354 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
2355 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
2356 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
2357 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
2358 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
2359 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
2360 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
2363 topology topology(input_layout("input", input.get_layout()));
2365 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 16, 1, 2, 2 } });
2366 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 16, 1 } });
2370 -2.0f, 0.5f, 3.5f, 1.5f,
2371 -1.2f, 1.5f, 0.5f, -0.5f,
2372 -2.0f, 0.5f, 3.5f, 1.5f,
2373 -1.2f, 1.5f, 0.5f, -0.5f,
2374 -2.0f, 0.5f, 3.5f, 1.5f,
2375 -1.2f, 1.5f, 0.5f, -0.5f,
2376 -2.0f, 0.5f, 3.5f, 1.5f,
2377 -1.2f, 1.5f, 0.5f, -0.5f,
2378 -2.0f, 0.5f, 3.5f, 1.5f,
2379 -1.2f, 1.5f, 0.5f, -0.5f,
2380 -2.0f, 0.5f, 3.5f, 1.5f,
2381 -1.2f, 1.5f, 0.5f, -0.5f,
2382 -2.0f, 0.5f, 3.5f, 1.5f,
2383 -1.2f, 1.5f, 0.5f, -0.5f,
2384 -2.0f, 0.5f, 3.5f, 1.5f,
2385 -1.2f, 1.5f, 0.5f, -0.5f
2389 set_values(biases, { 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f});
2392 data("weights", weights),
2393 data("bias", biases)
2408 network network(engine, topology);
2409 network.set_input_data("input", input);
2411 auto outputs = network.execute();
2412 EXPECT_EQ(outputs.size(), size_t(1));
2413 EXPECT_EQ(outputs.begin()->first, "conv");
2415 auto output_prim = outputs.begin()->second.get_memory();
2417 auto output_ptr = output_prim.pointer<float>();
2419 std::vector<float> expected_output_vec = {
2420 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
2421 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
2422 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
2423 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
2424 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
2425 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
2426 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
2427 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
2430 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
2432 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
2436 TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x4x1_nopad_split2) {
2470 const auto& engine = get_test_engine();
2472 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 4, 1, 1 } });
2473 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 4 } });
2474 auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } });
2475 auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 1 } });
2476 auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } });
2477 auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 1 } });
2480 1.5f, 0.5f, 0.0f, -0.5f
2482 set_values(weights1, { -2.0f, -0.5f, 1.0f, 2.0f });
2483 set_values(biases1, { 1.0f, 5.0f });
2484 set_values(weights2, { 4.0f, 1.5f, 2.0f, 0.5f });
2485 set_values(biases2, { -1.0f, 2.5f });
2488 input_layout("input", input.get_layout()),
2489 data("weights1", weights1),
2490 data("biases1", biases1),
2491 data("weights2", weights2),
2492 data("biases2", biases2),
2496 { "weights1", "weights2" },
2497 { "biases1", "biases2" },
2503 network network(engine, topology);
2504 network.set_input_data("input", input);
2506 auto outputs = network.execute();
2507 EXPECT_EQ(outputs.size(), size_t(1));
2508 EXPECT_EQ(outputs.begin()->first, "conv");
2510 auto output_prim = outputs.begin()->second.get_memory();
2512 auto output_ptr = output_prim.pointer<float>();
2514 EXPECT_FLOAT_EQ(-2.25f, get_value<float>(output_ptr, 0));
2515 EXPECT_FLOAT_EQ(7.5f, get_value<float>(output_ptr, 1));
2516 EXPECT_FLOAT_EQ(-1.75f, get_value<float>(output_ptr, 2));
2517 EXPECT_FLOAT_EQ(2.25f, get_value<float>(output_ptr, 3));
2520 TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x2x1_nopad_split2) {
2552 const auto& engine = get_test_engine();
2554 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 2, 1, 1 } });
2555 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 4 } });
2556 auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
2557 auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 1 } });
2558 auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
2559 auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 1 } });
2564 set_values(weights1, { -2.0f, 1.0f });
2565 set_values(biases1, { 1.0f, 5.0f });
2566 set_values(weights2, { 4.0f, 2.0f });
2567 set_values(biases2, { -1.0f, 2.5f });
2570 input_layout("input", input.get_layout()),
2571 data("weights1", weights1),
2572 data("biases1", biases1),
2573 data("weights2", weights2),
2574 data("biases2", biases2),
2578 { "weights1", "weights2" },
2579 { "biases1", "biases2" },
2585 network network(engine, topology);
2586 network.set_input_data("input", input);
2588 auto outputs = network.execute();
2589 EXPECT_EQ(outputs.size(), size_t(1));
2590 EXPECT_EQ(outputs.begin()->first, "conv");
2592 auto output_prim = outputs.begin()->second.get_memory();
2594 auto output_ptr = output_prim.pointer<float>();
2596 EXPECT_FLOAT_EQ(-2.0f, get_value<float>(output_ptr, 0));
2597 EXPECT_FLOAT_EQ(6.5f, get_value<float>(output_ptr, 1));
2598 EXPECT_FLOAT_EQ(1.0f, get_value<float>(output_ptr, 2));
2599 EXPECT_FLOAT_EQ(3.5f, get_value<float>(output_ptr, 3));
2602 TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x4x1_filter_1x3x2x1x1_nopad_split2) {
2640 const auto& engine = get_test_engine();
2642 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 4, 1, 1 } });
2643 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 6 } });
2644 auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 2, 1, 1 } });
2645 auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 1 } });
2646 auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 2, 1, 1 } });
2647 auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 1 } });
2650 1.5f, 0.5f, 2.0f, -1.0f
2652 set_values(weights1, { -2.0f, 1.0f, 1.0f, 3.0f, 0.5f, 8.0f });
2653 set_values(biases1, { 1.0f, 5.0f, 3.0f });
2654 set_values(weights2, { 4.0f, -4.0f, 2.0f, 0.5f, -0.5f, 3.0f });
2655 set_values(biases2, { -1.0f, 2.5f, 2.0f });
2658 input_layout("input", input.get_layout()),
2659 data("weights1", weights1),
2660 data("biases1", biases1),
2661 data("weights2", weights2),
2662 data("biases2", biases2),
2666 { "weights1", "weights2" },
2667 { "biases1", "biases2" },
2673 network network(engine, topology);
2674 network.set_input_data("input", input);
2676 auto outputs = network.execute();
2677 EXPECT_EQ(outputs.size(), size_t(1));
2678 EXPECT_EQ(outputs.begin()->first, "conv");
2680 auto output_prim = outputs.begin()->second.get_memory();
2682 auto output_ptr = output_prim.pointer<float>();
2684 EXPECT_FLOAT_EQ(-1.5f, get_value<float>(output_ptr, 0));
2685 EXPECT_FLOAT_EQ(8.0f, get_value<float>(output_ptr, 1));
2686 EXPECT_FLOAT_EQ(7.75f, get_value<float>(output_ptr, 2));
2687 EXPECT_FLOAT_EQ(11.0f, get_value<float>(output_ptr, 3));
2688 EXPECT_FLOAT_EQ(6.0f, get_value<float>(output_ptr, 4));
2689 EXPECT_FLOAT_EQ(-2.0f, get_value<float>(output_ptr, 5));
2693 TEST(convolution_gpu, trivial_convolution_relu) {
2717 const auto& engine = get_test_engine();
2719 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 4 } });
2720 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 2, 2 }, 1 } });
2721 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
2722 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
2725 -0.5f, 1.0f, 0.5f, 2.0f,
2726 1.5f, -0.5f, 0.0f, -1.0f,
2727 0.5f, 0.5f, -1.0f, 1.0f,
2728 0.5f, 2.0f, 1.5f, -0.5f
2730 set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
2731 set_values(biases, { -2.0f });
2734 input_layout("input", input.get_layout()),
2735 data("weights", weights),
2736 data("biases", biases),
2749 network network(engine, topology);
2750 network.set_input_data("input", input);
2752 auto outputs = network.execute();
2753 EXPECT_EQ(outputs.size(), size_t(1));
2754 EXPECT_EQ(outputs.begin()->first, "conv");
2756 auto output_prim = outputs.begin()->second.get_memory();
2758 auto output_ptr = output_prim.pointer<float>();
2760 EXPECT_FLOAT_EQ(4.0f, get_value<float>(output_ptr, 0));
2761 EXPECT_FLOAT_EQ(0.0f, get_value<float>(output_ptr, 1));
2762 EXPECT_FLOAT_EQ(2.0f, get_value<float>(output_ptr, 2));
2763 EXPECT_FLOAT_EQ(5.0f, get_value<float>(output_ptr, 3));
2766 TEST(convolution_gpu, relu_with_negative_slope) {
2772 // Negative Slope : 0.1
2791 const auto& engine = get_test_engine();
2793 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 4 } });
2794 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 2, 2 }, 1 } });
2795 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
2796 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
2799 -0.5f, 1.0f, 0.5f, 2.0f,
2800 1.5f, -0.5f, 0.0f, -1.0f,
2801 0.5f, 0.5f, -1.0f, 1.0f,
2802 0.5f, 2.0f, 1.5f, -0.5f
2804 set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
2805 set_values(biases, { -2.0f });
2808 input_layout("input", input.get_layout()),
2809 data("weights", weights),
2810 data("biases", biases),
2823 network network(engine, topology);
2824 network.set_input_data("input", input);
2826 auto outputs = network.execute();
2827 EXPECT_EQ(outputs.size(), size_t(1));
2828 EXPECT_EQ(outputs.begin()->first, "conv");
2830 auto output_prim = outputs.begin()->second.get_memory();
2832 auto output_ptr = output_prim.pointer<float>();
2834 EXPECT_FLOAT_EQ(4.0f, get_value<float>(output_ptr, 0));
2835 EXPECT_FLOAT_EQ(-0.35f, get_value<float>(output_ptr, 1));
2836 EXPECT_FLOAT_EQ(2.0f, get_value<float>(output_ptr, 2));
2837 EXPECT_FLOAT_EQ(5.0f, get_value<float>(output_ptr, 3));
2840 TEST(convolution_gpu, DISABLED_two_1x1_kernels_after_each_other) {
2842 const auto& engine = get_test_engine();
2844 extern const std::vector<float> conv_1x1_output;
2846 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 16, 8, 16, 16 } });
2847 auto weights_conv_1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 8, 8, 1, 1 } });
2848 auto weights_conv_2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 8, 1, 1 } });
2850 set_random_values<float>(input);
2851 set_random_values<float>(weights_conv_1);
2852 set_random_values<float>(weights_conv_2);
2854 auto inp_lay = input_layout("input", input.get_layout());
2855 auto conv_1 = convolution(
2858 { "weights_conv_1" });
2859 auto conv_2 = convolution(
2862 { "weights_conv_2" });
2866 data("weights_conv_1", weights_conv_1),
2868 data("weights_conv_2", weights_conv_2),
2873 bo.set_option(build_option::optimize_data(true));
2874 network network(engine, topology, bo);
2875 network.set_input_data("input", input);
2877 auto outputs = network.execute();
2878 EXPECT_EQ(outputs.size(), size_t(1));
2880 auto output_prim = outputs.at("conv_2").get_memory();
2882 auto output_ptr = output_prim.pointer<float>();
2883 auto output_layout = output_prim.get_layout();
2885 int y_size = output_layout.size.spatial[1];
2886 int x_size = output_layout.size.spatial[0];
2887 int f_size = output_layout.size.feature[0];
2888 int b_size = output_layout.size.batch[0];
2889 int f_offset = y_size * x_size;
2890 int b_offset = f_size * f_offset;
2891 for (int b = 0; b < b_size; ++b)
2893 for (int f = 0; f < f_size; ++f)
2895 for (int y = 0; y < y_size; ++y)
2897 for (int x = 0; x < x_size; ++x)
2899 int idx = b * b_offset + f * f_offset + y * x_size + x;
2900 EXPECT_TRUE(are_equal(conv_1x1_output[idx], get_value<float>(output_ptr, idx)));
2907 TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp32)
2909 #define USE_OLD_WEIGHTS_FORMAT 0
2911 const auto input_format = format::yxfb;
2912 #if USE_OLD_WEIGHTS_FORMAT
2913 const auto weights_format = format::bfyx;
2915 const auto weights_format = format::yxfb;
2917 const auto biases_format = format::bfyx;
2919 const int32_t batch_size = 16;
2920 const int32_t input_feature_count = 2;
2921 const int32_t output_feature_count = 16;
2923 const int32_t stride_x = 2;
2924 const int32_t stride_y = 2;
2926 const int32_t input_x = 4;
2927 const int32_t input_y = 4;
2928 const int32_t weights_x = 2;
2929 const int32_t weights_y = 2;
2930 const int32_t output_x = (input_x - weights_x) / stride_x + 1;
2931 const int32_t output_y = (input_y - weights_y) / stride_y + 1;
2933 const auto& engine = get_test_engine();
2935 auto input_size = tensor( batch_size, input_feature_count, input_x, input_y );
2936 auto input = memory::allocate(engine, { data_types::f32, input_format, input_size });
2937 auto weights_size = tensor( output_feature_count, input_feature_count, weights_x, weights_y );
2938 auto weights = memory::allocate(engine, { data_types::f32, weights_format, weights_size });
2939 auto biases = memory::allocate(engine, { data_types::f32, biases_format, {1,1,output_feature_count,1}});
2941 //auto output = memory::allocate({output_format, {batch_size, {output_x, output_y}, output_feature_count}});
2945 std::vector<float> input_vals_template {
2946 0.25f, 0.50f, 0.75f, 1.00f,
2947 1.25f, 1.50f, 1.75f, 2.00f,
2948 2.25f, 2.50f, 2.75f, 3.00f,
2949 3.25f, 3.50f, 3.75f, 4.00f,
2951 input_vals_template.resize(input_y * input_x);
2953 std::vector<float> input_vals;
2954 input_vals.reserve(input_y * input_x * input_feature_count * batch_size);
2955 for (uint32_t yxi = 0; yxi < input_y * input_x; ++yxi)
2957 for (uint32_t ifi = 0; ifi < input_feature_count; ++ifi)
2959 for (uint32_t bi = 0; bi < batch_size; ++bi)
2961 input_vals.push_back((bi * input_feature_count + ifi + 1) * input_vals_template[yxi]);
2965 set_values(input, input_vals);
2969 std::vector<float> weights_vals_template {
2973 weights_vals_template.resize(weights_y * weights_x);
2975 std::vector<float> weights_vals;
2976 weights_vals.reserve(weights_y * weights_x * input_feature_count * output_feature_count);
2977 #if USE_OLD_WEIGHTS_FORMAT
2978 for (uint32_t ofi = 0; ofi < output_feature_count; ++ofi)
2980 for (uint32_t ifi = 0; ifi < input_feature_count; ++ifi)
2982 for (uint32_t yxi = 0; yxi < weights_y * weights_x; ++yxi)
2984 weights_vals.push_back((ofi * input_feature_count + ifi + 1) * weights_vals_template[yxi]);
2989 for (uint32_t yxi = 0; yxi < weights_y * weights_x; ++yxi)
2991 for (uint32_t ifi = 0; ifi < input_feature_count; ++ifi)
2993 for (uint32_t ofi = 0; ofi < output_feature_count; ++ofi)
2995 weights_vals.push_back((ofi * input_feature_count + ifi + 1) * weights_vals_template[yxi]);
3000 set_values(weights, weights_vals);
3004 std::vector<float> biases_vals;
3005 biases_vals.reserve(output_feature_count);
3006 for (uint32_t ofi = 0; ofi < output_feature_count; ++ofi)
3008 biases_vals.push_back(ofi * 1.0f);
3010 set_values(biases, biases_vals);
3014 std::vector<float> output_vals_template {
3018 output_vals_template.resize(output_y * output_x);
3020 std::vector<float> output_vals;
3021 output_vals.reserve(output_y * output_x * output_feature_count * batch_size);
3022 for (uint32_t yxi = 0; yxi < output_y * output_x; ++yxi)
3024 for (uint32_t ofi = 0; ofi < output_feature_count; ++ofi)
3026 for (uint32_t bi = 0; bi < batch_size; ++bi)
3028 uint32_t template_factor = input_feature_count * input_feature_count * input_feature_count * bi * ofi +
3029 input_feature_count * input_feature_count * (input_feature_count + 1) / 2 * (bi + ofi) +
3030 input_feature_count * (input_feature_count + 1) * (2 * input_feature_count + 1) / 6;
3031 float bias_factor = ofi * 1.0f;
3033 output_vals.push_back(template_factor * output_vals_template[yxi] + bias_factor);
3038 // Computing convolution.
3040 input_layout("input", input.get_layout()),
3041 data("weights", weights),
3042 data("biases", biases),
3048 { 1,1,stride_x,stride_y },
3055 network network(engine, topology);
3056 network.set_input_data("input", input);
3058 auto outputs = network.execute();
3059 EXPECT_EQ(outputs.size(), size_t(1));
3060 EXPECT_EQ(outputs.begin()->first, "conv");
3062 auto output_prim = outputs.begin()->second.get_memory();
3064 auto output_ptr = output_prim.pointer<float>();
3068 for (uint32_t yxi = 0; yxi < output_y * output_x; ++yxi)
3070 for (uint32_t ofi = 0; ofi < output_feature_count; ++ofi)
3072 for (uint32_t bi = 0; bi < batch_size; ++bi, ++i)
3074 auto equal = are_equal(output_vals[i], get_value<float>(output_ptr, i));
3078 std::cout << "Failed at position (" << yxi << ", output feature = " << ofi << ", batch = " << bi << "): "
3079 << output_vals[i] << " != " << get_value<float>(output_ptr, i) << std::endl;
3086 #undef USE_OLD_WEIGHTS_FORMAT
3089 template<typename T>
3090 void quantize_weights(cldnn::memory& weights, cldnn::memory& w_qf)
3094 auto batch_pitch = weights.get_layout().get_pitches().batch[0];
3095 auto ptr = weights.pointer<T>();
3096 auto wqf_ptr = w_qf.pointer<float>();
3098 for (int ofm = 0; ofm < weights.get_layout().size.batch[0]; ofm++)
3101 for (int w = 0; w < batch_pitch; w++)
3102 if (max < abs(ptr[ofm* batch_pitch + w]))
3103 max = abs(ptr[ofm* batch_pitch + w]);
3106 max = (T)1; // do not quantize
3108 for (int w = 0; w < batch_pitch; w++)
3109 ptr[ofm* batch_pitch + w] = (T)round((float)ptr[ofm* batch_pitch + w] * 127.0f / (float)max);
3110 wqf_ptr[ofm] = max/127.0f;
3113 template<typename T>
3114 void calibrate(const cldnn::memory& output, cldnn::memory& calibrations)
3118 auto feature_pitch = output.get_layout().get_pitches().feature[0];
3119 auto ptr = output.pointer<T>();
3120 auto calibrations_ptr = calibrations.pointer<float>();
3122 for (int ofm = 0; ofm < output.get_layout().size.feature[0]; ofm++)
3125 for (int w = 0; w < feature_pitch; w++)
3126 if (max < abs(ptr[ofm* feature_pitch + w]))
3127 max = abs(ptr[ofm* feature_pitch + w]);
3128 calibrations_ptr[ofm] = 127.0f / max;
3132 template<typename T>
3133 T max_abs(const cldnn::memory& mem)
3138 auto ptr = mem.pointer<T>();
3145 template<typename T>
3146 void apply_calibration_on_weights(cldnn::memory& weights, cldnn::memory& qf)
3148 auto batch_pitch = weights.get_layout().get_pitches().batch[0];
3149 auto ptr = weights.pointer<T>();
3150 auto wqf_ptr = qf.pointer<float>();
3151 tensor w_size = weights.get_layout().size;
3153 for (int ofm = 0; ofm < w_size.batch[0]; ofm++)
3154 for (int ifm = 0; ifm < w_size.feature[0]; ifm++)
3155 for (int xy = 0; xy < w_size.spatial[0] * w_size.spatial[1]; xy++)
3157 ptr[index] = ptr[index] / wqf_ptr[ifm];
3162 cldnn::memory create_int8_weights(engine engine, cldnn::memory& in_weights)
3164 auto layout = in_weights.get_layout();
3165 auto out_weights = memory::allocate(engine, { data_types::i8, layout.format, layout.size });
3166 auto in = in_weights.pointer<float>();
3167 auto out = out_weights.pointer<char>();
3170 out[indx++] = (char) a;
3174 void add_primitives(const engine& engine, topology& topology)
3176 auto weights = memory::allocate(engine, { data_types::i8, format::bfyx,{ 2, 1, 3, 2 } });
3178 std::vector<char> weights_values = { 1, 2, 1, 2, 1, 2, 19, 17, -1, -10, 32, 23 };
3179 set_values<char>(weights, weights_values);
3180 cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
3181 auto weigths_qfs = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
3182 set_values(biases, { 1.0f, -8.0f });
3185 data("weights", weights),
3186 data("biases", biases),
3187 data("w_qfs", weigths_qfs),
3188 convolution("conv", "input", { "weights" }, { "biases" }, { 0, 0, 1, 2 }, { 0, 0, 0, 0 }, { 1, 1, 1, 1 }, true));
3191 TEST(convolution_f32_fw_gpu, byte_activation) {
3219 engine_configuration eng_conf(false, false, false, "", "", true, "", "kernels");
3220 engine engine{ eng_conf };
3221 auto input = memory::allocate(engine, { data_types::i8, format::bfyx,{ 1, 1, 5, 4 } });
3223 VVVF<char> output_vec = {
3234 opts.set_option(build_option::optimize_data(true));
3235 opts.set_option(build_option::graph_dumps_dir("graph"));
3237 set_values<char>(input, { 1, 2, -3, 4, -5, 2, -2, 3, -4, 6, -3, 3, -3, 5, -1, -1, -1, -1, -1, -1 });
3240 input_layout("input", input.get_layout()));
3241 add_primitives(engine, topology);
3242 network network(engine, topology, opts);
3243 network.set_input_data("input", input);
3245 auto outputs = network.execute();
3246 EXPECT_EQ(outputs.begin()->first, "conv");
3248 auto output_memory = outputs.at("conv").get_memory();
3249 auto output_layout = output_memory.get_layout();
3250 auto output_ptr = output_memory.pointer<char>();
3252 int y_size = output_layout.size.spatial[1];
3253 int x_size = output_layout.size.spatial[0];
3254 int f_size = output_layout.size.feature[0];
3255 int b_size = output_layout.size.batch[0];
3256 EXPECT_EQ(output_layout.format, format::bfyx);
3257 EXPECT_EQ(y_size, 2);
3258 EXPECT_EQ(x_size, 3);
3259 EXPECT_EQ(f_size, 2);
3260 EXPECT_EQ(b_size, 1);
3261 for (int f = 0; f < f_size; f++)
3262 for (int y = 0; y < y_size; ++y) {
3263 for (int x = 0; x < x_size; ++x) {
3264 EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f*y_size*x_size + y * x_size + x]), 3.0f);
3269 TEST(convolution_f32_fw_gpu, quantized_convolution_low_prec_single_ofq) {
3298 const auto& engine = get_test_engine();
3300 auto input_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 4 } });
3301 auto weights_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 2 } });
3302 cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
3303 auto weigths_qfs = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
3305 std::vector<float> weights_values_f = { 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 19.0, 17.0, -1.0, -10.0, 32.0, 23.0 };
3306 set_values<float>(input_f, { 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 2.0, 3.0, 4.0, 6.0, 3.0, 3.0, 3.0, 5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 });
3307 set_values<float>(weights_f, weights_values_f);
3309 set_values(biases, { 1.0f, -8.0f });
3310 VVVF<float> output_vec = {
3312 { 21.0f, 28.0f, 39.0f },
3313 { 18.0f, 20.0f, 20.0f }
3316 { 155.0f, 245.0f, 348.0f },
3317 { 142.0f, 140.0f, 178.0f }
3320 topology topology_f(
3321 input_layout("input_f", input_f.get_layout()),
3322 data("weights_f", weights_f),
3323 data("biases", biases),
3324 convolution("conv_f", "input_f", { "weights_f" }, { "biases" }, { 0, 0, 1, 2 }));
3327 opts.set_option(build_option::optimize_data(true));
3328 network network_f(engine, topology_f, opts);
3329 network_f.set_input_data("input_f", input_f);
3331 auto outputs_f = network_f.execute();
3332 EXPECT_EQ(outputs_f.begin()->first, "conv_f");
3334 auto output_memory_f = outputs_f.at("conv_f").get_memory();
3335 auto output_ptr_f = output_memory_f.pointer<float>();
3337 auto input = memory::allocate(engine, { data_types::i8, format::bfyx,{ 1, 1, 5, 4 } });
3338 auto weights = memory::allocate(engine, { data_types::i8, format::bfyx,{ 2, 1, 3, 2 } });
3340 float o_qf = 127.0f / max_abs<float>(output_memory_f);
3342 std::vector<char> weights_values = { 1, 2, 1, 2, 1, 2, 19, 17, -1, -10, 32, 23 };
3343 set_values<char>(input, { 1, 2, 3, 4, 5, 2, 2, 3, 4, 6, 3, 3, 3, 5, 1, 1, 1, 1, 1, 1 });
3344 set_values<char>(weights, weights_values);
3345 set_values<float>(weigths_qfs, { 1.0f, 1.0f });
3346 quantize_weights<char>(weights, weigths_qfs);
3349 input_layout("input", input.get_layout()),
3350 data("weights", weights),
3351 data("biases", biases),
3352 data("w_qfs",weigths_qfs),
3353 convolution("conv", "input", { "weights" }, { "biases" }, { "w_qfs" },i_qf, o_qf, { 0, 0, 1, 2 }));
3355 network network(engine, topology, opts);
3356 network.set_input_data("input", input);
3358 auto outputs = network.execute();
3359 EXPECT_EQ(outputs.begin()->first, "conv");
3361 auto output_memory = outputs.at("conv").get_memory();
3362 auto output_layout = output_memory.get_layout();
3363 auto output_ptr = output_memory.pointer<char>();
3365 int y_size = output_layout.size.spatial[1];
3366 int x_size = output_layout.size.spatial[0];
3367 int f_size = output_layout.size.feature[0];
3368 int b_size = output_layout.size.batch[0];
3369 EXPECT_EQ(output_layout.format, format::bfyx);
3370 EXPECT_EQ(y_size, 2);
3371 EXPECT_EQ(x_size, 3);
3372 EXPECT_EQ(f_size, 2);
3373 EXPECT_EQ(b_size, 1);
3374 for (int f = 0; f < f_size; f++)
3375 for (int y = 0; y < y_size; ++y) {
3376 for (int x = 0; x < x_size; ++x) {
3377 EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f*y_size*x_size + y * x_size + x]) / o_qf, 3.0f);
3383 TEST(convolution_f32_fw_gpu, quantized_convolution_high_prec_calib_per_ofm) {
3411 const auto& engine = get_test_engine();
3413 auto input_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 4 } });
3414 auto weights_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 2 } });
3415 cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
3416 auto weigths_qfs = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
3417 auto output_calibrations = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
3419 std::vector<float> weights_values_f = { 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 19.0, 17.0, -1.0, -10.0, 32.0, 23.0 };
3420 set_values<float>(input_f, { 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 2.0, 3.0, 4.0, 6.0, 3.0, 3.0, 3.0, 5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 });
3421 set_values<float>(weights_f, weights_values_f);
3422 set_values(biases, { 1.0f, -8.0f });
3423 VVVF<float> output_vec = {
3425 { 21.0f, 28.0f, 39.0f },
3426 { 18.0f, 20.0f, 20.0f }
3429 { 155.0f, 245.0f, 348.0f },
3430 { 142.0f, 140.0f, 178.0f }
3433 topology topology_f(
3434 input_layout("input_f", input_f.get_layout()),
3435 data("weights_f", weights_f),
3436 data("biases", biases),
3437 convolution("conv_f", "input_f", { "weights_f" }, { "biases" }, { 0, 0, 1, 2 }));
3440 opts.set_option(build_option::optimize_data(true));
3441 network network_f(engine, topology_f, opts);
3442 network_f.set_input_data("input_f", input_f);
3444 auto outputs_f = network_f.execute();
3445 EXPECT_EQ(outputs_f.begin()->first, "conv_f");
3447 auto output_memory_f = outputs_f.at("conv_f").get_memory();
3448 auto output_ptr_f = output_memory_f.pointer<float>();
3450 auto input = memory::allocate(engine, { data_types::i8, format::bfyx,{ 1, 1, 5, 4 } });
3451 auto weights = memory::allocate(engine, { data_types::i8, format::bfyx,{ 2, 1, 3, 2 } });
3454 std::vector<char> weights_values = { 1, 2, 1, 2, 1, 2, 19, 17, -1, -10, 32, 23 };
3455 set_values<char>(input, { 1, 2, 3, 4, 5, 2, 2, 3, 4, 6, 3, 3, 3, 5, 1, 1, 1, 1, 1, 1 });
3456 set_values<char>(weights, weights_values);
3457 set_values<float>(weigths_qfs, { 1.0f, 1.0f });
3458 quantize_weights<char>(weights, weigths_qfs);
3459 calibrate<float>(output_memory_f, output_calibrations);
3462 input_layout("input", input.get_layout()),
3463 data("weights", weights),
3464 data("biases", biases),
3465 data("w_qfs", weigths_qfs),
3466 data("out_calibrations", output_calibrations),
3467 convolution( "conv", "input", { "weights" }, { "biases" },
3468 { "w_qfs" }, { "out_calibrations" }, i_qf, { 0, 0, 1, 2 }));
3470 network network(engine, topology, opts);
3471 network.set_input_data("input", input);
3473 auto outputs = network.execute();
3474 EXPECT_EQ(outputs.begin()->first, "conv");
3476 auto output_memory = outputs.at("conv").get_memory();
3477 auto output_layout = output_memory.get_layout();
3478 auto output_ptr = output_memory.pointer<char>();
3479 auto o_qf = output_calibrations.pointer<float>();
3480 int y_size = output_layout.size.spatial[1];
3481 int x_size = output_layout.size.spatial[0];
3482 int f_size = output_layout.size.feature[0];
3483 int b_size = output_layout.size.batch[0];
3484 EXPECT_EQ(output_layout.format, format::bfyx);
3485 EXPECT_EQ(y_size, 2);
3486 EXPECT_EQ(x_size, 3);
3487 EXPECT_EQ(f_size, 2);
3488 EXPECT_EQ(b_size, 1);
3489 for (int f = 0; f < f_size; f++)
3490 for (int y = 0; y < y_size; ++y) {
3491 for (int x = 0; x < x_size; ++x) {
3492 EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f*y_size*x_size + y * x_size + x]) / o_qf[f], 3.0f);
3496 TEST(convolution_f32_fw_gpu, calibration_advance) {
3536 // 313.32 217.43 118.10
3544 const auto& engine = get_test_engine();
3546 auto input_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 4 } });
3547 auto weights_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 2 } });
3548 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
3549 auto w_qf = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
3550 auto weights_f_2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 3, 2, 3, 2 } });
3551 auto biases_2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
3552 auto w_qf_2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
3554 std::vector<float> weights_values_f = { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, 1.9f, 1.7f, -1.0f, -1.0f, 3.2f, 2.3f };
3555 std::vector<float> weights_values_f_2 = {
3556 1.5f, 2.3f, -1.0f, 3.0f, 5.6f, -1.0f,
3557 3.0f, 5.6f, -1.0f, 1.0f, 2.0f, 3.0f,
3559 1.9f, 1.7f, -1.0f, 1.9f, 1.7f, -1.0f,
3560 -1.0f, 3.2f, 2.3f, -1.0f, 3.2f, 2.3f,
3562 1.0f, 2.0f, -1.0f, 2.0f, 1.0f, -1.0f,
3563 -1.0f, 2.0f, 1.0f, 1.0f, 2.0f, -1.0f,};
3565 set_values<float>(input_f, { 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 2.0, 3.0, 4.0, 6.0, 3.0, 3.0, 3.0, 5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 });
3566 set_values<float>(weights_f, weights_values_f);
3567 set_values<float>(weights_f_2, weights_values_f_2);
3568 set_values(biases, { 1.0f, -8.0f });
3569 set_values(biases_2, { 2.0f, 4.0f, 0.0f });
3571 topology topology_f(
3572 input_layout("input_f", input_f.get_layout()),
3573 data("weights_f", weights_f),
3574 data("biases", biases),
3575 data("weights_f_2", weights_f_2),
3576 data("biases_2", biases_2),
3577 convolution("conv_f", "input_f", { "weights_f" }, { "biases" }, { 0, 0, 1, 2 }),
3578 convolution("conv_f_2", "conv_f", { "weights_f_2" }, { "biases_2" }, { 0, 0, 1, 1 }));
3581 opts.set_option(build_option::optimize_data(true));
3582 opts.set_option(build_option::outputs({ "conv_f", "conv_f_2" }));
3583 network network_f(engine, topology_f, opts);
3584 network_f.set_input_data("input_f", input_f);
3586 auto outputs_f = network_f.execute();
3587 auto output_memory_f = outputs_f.at("conv_f").get_memory();
3588 auto output_memory_f_2 = outputs_f.at("conv_f_2").get_memory();
3589 auto output_calibrations = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
3590 auto output_calibrations_2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
3592 calibrate<float>(output_memory_f, output_calibrations);
3593 calibrate<float>(output_memory_f_2, output_calibrations_2);
3594 apply_calibration_on_weights<float>(weights_f_2, output_calibrations);
3595 quantize_weights<float>(weights_f, w_qf);
3596 quantize_weights<float>(weights_f_2, w_qf_2);
3598 auto weights = create_int8_weights(engine, weights_f);
3599 auto weigths_2 = create_int8_weights(engine, weights_f_2);
3600 auto input = create_int8_weights(engine, input_f);
3603 input_layout("input", input.get_layout()),
3604 data("weights", weights),
3605 data("biases", biases),
3606 data("weights_2", weigths_2),
3607 data("biases_2", biases_2),
3609 data("w_qf_2", w_qf_2),
3610 data("calib", output_calibrations),
3611 data("calib_2", output_calibrations_2),
3612 convolution("conv", "input", { "weights" }, { "biases" }, { "w_qf" }, { "calib" }, 1.0f, { 0, 0, 1, 2 }),
3613 convolution("conv_2", "conv", { "weights_2" }, { "biases_2" }, { "w_qf_2" }, { "calib_2" }, 1.0f, { 0, 0, 1, 1 }));
3615 build_options opts_2;
3616 opts_2.set_option(build_option::optimize_data(true));
3617 opts_2.set_option(build_option::outputs({ "conv", "conv_2" }));
3618 cldnn::network network(engine, topology, opts_2);
3619 network.set_input_data("input", input);
3620 auto outputs = network.execute();
3621 auto output_memory = outputs.at("conv_2").get_memory();
3622 auto ref_ptr = output_memory_f_2.pointer<float>();
3623 auto test_ptr = output_memory.pointer<char>();
3624 auto& out_size = output_memory.get_layout().size;
3625 auto o_qf = output_calibrations_2.pointer<float>();
3627 for (int f = 0; f < out_size.feature[0]; f++)
3629 for (int y = 0; y < out_size.spatial[1]; ++y)
3631 for (int x = 0; x < out_size.spatial[0]; ++x)
3633 EXPECT_NEAR(ref_ptr[x + out_size.spatial[0]
3634 * (y + out_size.spatial[1] * f)], ((float)test_ptr[x + out_size.spatial[0]
3635 * (y + out_size.spatial[1] * f)]) / o_qf[f], 3.0f);
3641 TEST(convolution_f32_fw_gpu, local_basic) {
3642 // Filter : 3x3x2x2 - local sizes
3670 const auto& engine = get_test_engine();
3671 tensor local_size = tensor(1,1,2,2,3,3);
3672 auto input_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 4 } });
3673 auto weights_f = memory::allocate(engine, { data_types::f32, format::bf_lyx_yx, local_size });
3674 cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
3676 std::vector<float> weights_values_f = {
3689 set_values<float>(input_f, { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0 });
3690 set_values<float>(weights_f, weights_values_f);
3691 set_values(biases, { 0.0f });
3692 std::vector<float> output_vec =
3695 18.0f, 24.0f, 30.0f,
3699 topology topology_f(
3700 input_layout("input_f", input_f.get_layout()),
3701 data("weights_f", weights_f),
3702 data("biases", biases),
3703 convolution("conv_f", "input_f", { "weights_f" }, { "biases" }, { 0, 0, 1, 1 }));
3706 opts.set_option(build_option::optimize_data(true));
3707 network network_f(engine, topology_f, opts);
3708 network_f.set_input_data("input_f", input_f);
3710 auto outputs_f = network_f.execute();
3711 EXPECT_EQ(outputs_f.begin()->first, "conv_f");
3713 auto output_memory_f = outputs_f.at("conv_f").get_memory();
3714 auto output_ptr_f = output_memory_f.pointer<float>();
3715 unsigned int cntr = 0;
3716 for (auto fl : output_ptr_f)
3717 EXPECT_FLOAT_EQ(fl, output_vec[cntr++]);
3721 TEST(convolution_f32_fw_gpu, local_multi_out_features) {
3722 // Filter : 3x1x3x3x2x2 - local sizes
3735 // 0 0 1 1 2 2 --- 1 ofm
3744 // 0 0 0 0 0 0 --- 2 ofm
3753 // 0 0 2 2 4 4 --- 3 ofm
3778 const auto& engine = get_test_engine();
3779 tensor local_size = tensor(3,1,2,2,3,3);
3780 auto input_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 4 } });
3781 auto weights_f = memory::allocate(engine, { data_types::f32, format::bf_lyx_yx, local_size });
3782 cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
3784 std::vector<float> weights_values_f = {
3821 set_values<float>(input_f, { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0 });
3822 set_values<float>(weights_f, weights_values_f);
3823 set_values(biases, { 0.0f, 0.0f, 0.0f });
3824 std::vector<float> output_vec =
3827 18.0f, 24.0f, 30.0f,
3828 48.0f, 56.0f, 64.0f,
3836 24.0f, 40.0f, 56.0f,
3839 topology topology_f(
3840 input_layout("input_f", input_f.get_layout()),
3841 data("weights_f", weights_f),
3842 data("biases", biases),
3843 convolution("conv_f", "input_f", { "weights_f" }, { "biases" }, { 0, 0, 1, 1 }));
3846 opts.set_option(build_option::optimize_data(true));
3847 network network_f(engine, topology_f, opts);
3848 network_f.set_input_data("input_f", input_f);
3850 auto outputs_f = network_f.execute();
3851 EXPECT_EQ(outputs_f.begin()->first, "conv_f");
3853 auto output_memory_f = outputs_f.at("conv_f").get_memory();
3854 auto output_ptr_f = output_memory_f.pointer<float>();
3855 unsigned int cntr = 0;
3856 for (auto fl : output_ptr_f)
3858 EXPECT_FLOAT_EQ(fl, output_vec[cntr++]);
3862 TEST(convolution_f32_fw_gpu, local_multi_input_features) {
3863 // Filter : 1x3x3x3x2x2 - local sizes
3919 const auto& engine = get_test_engine();
3920 tensor local_size = tensor(1,3,2,2,3,3);
3921 auto input_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 3, 4, 4 } });
3922 auto weights_f = memory::allocate(engine, { data_types::f32, format::bf_lyx_yx, local_size });
3923 cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
3925 std::vector<float> weights_values_f = {
3962 set_values<float>(input_f, {
3963 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
3964 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
3965 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0 });
3966 set_values<float>(weights_f, weights_values_f);
3967 set_values(biases, { 0.0f });
3968 std::vector<float> output_vec =
3970 60.0f, 72.0f, 84.0f,
3971 24.0f, 36.0f, 48.0f,
3975 topology topology_f(
3976 input_layout("input_f", input_f.get_layout()),
3977 data("weights_f", weights_f),
3978 data("biases", biases),
3979 convolution("conv_f", "input_f", { "weights_f" }, { "biases" }, { 0, 0, 1, 1 }));
3982 opts.set_option(build_option::optimize_data(true));
3983 network network_f(engine, topology_f, opts);
3984 network_f.set_input_data("input_f", input_f);
3986 auto outputs_f = network_f.execute();
3987 EXPECT_EQ(outputs_f.begin()->first, "conv_f");
3989 auto output_memory_f = outputs_f.at("conv_f").get_memory();
3990 auto output_ptr_f = output_memory_f.pointer<float>();
3991 unsigned int cntr = 0;
3992 for (auto fl : output_ptr_f)
3993 EXPECT_FLOAT_EQ(fl, output_vec[cntr++]);
3997 TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp16)
3999 #define USE_OLD_WEIGHTS_FORMAT 0
4001 const auto& engine = get_test_engine();
4003 if (!engine.get_info().supports_fp16)
4005 std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl;
4011 const auto input_format = format::yxfb;
4012 #if USE_OLD_WEIGHTS_FORMAT
4013 const auto weights_format = format::bfyx;
4015 const auto weights_format = format::yxfb;
4017 const auto biases_format = format::bfyx;
4018 const auto output_format = input_format;
4020 const int32_t batch_size = 16;
4021 const int32_t input_feature_count = 2;
4022 const int32_t output_feature_count = 16;
4024 const int32_t stride_x = 2;
4025 const int32_t stride_y = 2;
4027 const int32_t input_x = 4;
4028 const int32_t input_y = 4;
4029 const int32_t weights_x = 2;
4030 const int32_t weights_y = 2;
4031 const int32_t output_x = (input_x - weights_x) / stride_x + 1;
4032 const int32_t output_y = (input_y - weights_y) / stride_y + 1;
4035 auto input_size = tensor( batch_size, input_feature_count, input_x, input_y );
4036 auto input = memory::allocate(engine, { data_types::f32, input_format, input_size });
4037 auto weights_size = tensor( output_feature_count, input_feature_count, weights_x, weights_y );
4038 auto weights = memory::allocate(engine, { data_types::f32, weights_format, weights_size });
4039 auto biases_size = tensor( 1,1,output_feature_count,1 );
4040 auto biases = memory::allocate(engine, { data_types::f32, biases_format, biases_size });
4041 auto output_size = tensor( batch_size, output_feature_count, output_x, output_y );
4042 //auto output = memory::allocate({output_format, {batch_size, {output_x, output_y}, output_feature_count}});
4044 //auto input_cvtd = memory::allocate(engine, { data_types::f16, input_size });
4045 //auto weights_cvtd = memory::allocate(engine, { data_types::f16, weights_size });
4046 //auto biases_cvtd = memory::allocate(engine, { data_types::f16, biases_size });
4047 //auto output_cvtd = memory::allocate({output_cvt_format, {batch_size, {output_x, output_y}, output_feature_count}});
4051 std::vector<float> input_vals_template {
4052 0.25f, 0.50f, 0.75f, 1.00f,
4053 1.25f, 1.50f, 1.75f, 2.00f,
4054 2.25f, 2.50f, 2.75f, 3.00f,
4055 3.25f, 3.50f, 3.75f, 4.00f,
4057 input_vals_template.resize(input_y * input_x);
4059 std::vector<float> input_vals;
4060 input_vals.reserve(input_y * input_x * input_feature_count * batch_size);
4061 for (uint32_t yxi = 0; yxi < input_y * input_x; ++yxi)
4063 for (uint32_t ifi = 0; ifi < input_feature_count; ++ifi)
4065 for (uint32_t bi = 0; bi < batch_size; ++bi)
4067 input_vals.push_back((bi * input_feature_count + ifi + 1) * input_vals_template[yxi]);
4071 set_values(input, input_vals);
4075 std::vector<float> weights_vals_template {
4079 weights_vals_template.resize(weights_y * weights_x);
4081 std::vector<float> weights_vals;
4082 weights_vals.reserve(weights_y * weights_x * input_feature_count * output_feature_count);
4083 #if USE_OLD_WEIGHTS_FORMAT
4084 for (uint32_t ofi = 0; ofi < output_feature_count; ++ofi)
4086 for (uint32_t ifi = 0; ifi < input_feature_count; ++ifi)
4088 for (uint32_t yxi = 0; yxi < weights_y * weights_x; ++yxi)
4090 weights_vals.push_back((ofi * input_feature_count + ifi + 1) * weights_vals_template[yxi]);
4095 for (uint32_t yxi = 0; yxi < weights_y * weights_x; ++yxi)
4097 for (uint32_t ifi = 0; ifi < input_feature_count; ++ifi)
4099 for (uint32_t ofi = 0; ofi < output_feature_count; ++ofi)
4101 weights_vals.push_back((ofi * input_feature_count + ifi + 1) * weights_vals_template[yxi]);
4106 set_values(weights, weights_vals);
4110 std::vector<float> biases_vals;
4111 biases_vals.reserve(output_feature_count);
4112 for (uint32_t ofi = 0; ofi < output_feature_count; ++ofi)
4114 biases_vals.push_back(ofi * 1.0f);
4116 set_values(biases, biases_vals);
4120 std::vector<float> output_vals_template {
4124 output_vals_template.resize(output_y * output_x);
4126 std::vector<float> output_vals;
4127 output_vals.reserve(output_y * output_x * output_feature_count * batch_size);
4128 for (uint32_t yxi = 0; yxi < output_y * output_x; ++yxi)
4130 for (uint32_t ofi = 0; ofi < output_feature_count; ++ofi)
4132 for (uint32_t bi = 0; bi < batch_size; ++bi)
4134 uint32_t template_factor = input_feature_count * input_feature_count * input_feature_count * bi * ofi +
4135 input_feature_count * input_feature_count * (input_feature_count + 1) / 2 * (bi + ofi) +
4136 input_feature_count * (input_feature_count + 1) * (2 * input_feature_count + 1) / 6;
4137 float bias_factor = ofi * 1.0f;
4139 output_vals.push_back(template_factor * output_vals_template[yxi] + bias_factor);
4144 //auto expected_float = memory::allocate(engine, { data_types::f32,{ format::x,{ static_cast<int32_t>(output_vals.size()) } } });
4145 //auto expected_half = memory::allocate(engine, { data_types::f16,{ format::x,{ static_cast<int32_t>(output_vals.size()) } } });
4146 //auto expected = memory::allocate(engine, { data_types::f32,{ format::x,{ static_cast<int32_t>(output_vals.size()) } } });
4148 // set_values(expected_float, output_vals);
4149 // auto cvt_expected_f32_f16 = reorder::create({expected_float, expected_half});
4150 // auto cvt_expected_f16_f32 = reorder::create({expected_half, expected});
4151 // execute({cvt_expected_f32_f16, cvt_expected_f16_f32}).wait();
4153 // auto expected_ptr = expected.as<const memory&>().pointer<float>();
4156 // Computing convolution.
4158 input_layout("input", input.get_layout()),
4159 reorder("cvt_input", "input", {data_types::f16, input_format, input_size}),
4160 data("weights", weights),
4161 reorder("cvt_weights", "weights", {data_types::f16, weights_format, weights_size}),
4162 data("biases", biases),
4163 reorder("cvt_biases", "biases", {data_types::f16, biases_format, biases_size}),
4169 { 1,1,stride_x,stride_y }),
4170 reorder("output", "conv", {data_types::f32, output_format, output_size})
4173 network network(engine, topology);
4174 network.set_input_data("input", input);
4176 auto outputs = network.execute();
4177 EXPECT_EQ(outputs.size(), size_t(1));
4178 EXPECT_EQ(outputs.begin()->first, "output");
4180 auto output_prim = outputs.begin()->second.get_memory();
4182 auto output_ptr = output_prim.pointer<float>();
4186 for (uint32_t yxi = 0; yxi < output_y * output_x; ++yxi)
4188 for (uint32_t ofi = 0; ofi < output_feature_count; ++ofi)
4190 for (uint32_t bi = 0; bi < batch_size; ++bi, ++i)
4192 auto equal = are_equal(output_vals[i] /*get_value(expected_ptr, i)*/, output_ptr[i], 0.002f);
4196 std::cout << "Failed at position (" << yxi << ", output feature = " << ofi << ", batch = " << bi << "): "
4197 << output_vals[i] /*get_value(expected_ptr, i)*/ << " != " << output_ptr[i] << std::endl;
4204 #undef USE_OLD_WEIGHTS_FORMAT
4207 using TestParamType_convolution_gpu = ::testing::tuple<int, // 0 - Filter size
4208 int, // 1 - Input features
4210 int, // 3 - Output padding
4211 bool>; // 4 - With bias
4213 struct convolution_gpu : public ::testing::TestWithParam<TestParamType_convolution_gpu>
4216 PrintToStringParamName(testing::TestParamInfo<TestParamType_convolution_gpu> param_info)
4218 // construct a readable name
4219 return std::to_string(testing::get<0>(param_info.param))
4220 + 'x' + std::to_string(testing::get<0>(param_info.param))
4221 + "_f" + std::to_string(testing::get<1>(param_info.param))
4222 + "_stride" + std::to_string(testing::get<2>(param_info.param))
4223 + "_pad" + std::to_string(testing::get<3>(param_info.param))
4224 + (testing::get<4>(param_info.param) ? "_bias" : "");
4228 TEST_P(convolution_gpu, b_fs_yx_fsv4)
4231 const int in_X = 56;
4232 const int in_Y = 56;
4233 const int _OuD = 32;
4234 const int W_B = _OuD;
4237 int W_X = testing::get<0>(GetParam());
4240 // Convoluiton offset
4241 int offSet = -(W_X / 2);
4244 int in_F = testing::get<1>(GetParam());
4248 int stride = testing::get<2>(GetParam());
4251 int output_padding = testing::get<3>(GetParam());
4254 bool with_bias = testing::get<4>(GetParam());
4259 std::vector<char> Data(in_B * in_F * in_X * in_Y);
4260 std::iota(Data.begin(), Data.end(), 0);
4261 auto input = memory::allocate(engine, {data_types::i8, format::bfyx, {in_B, in_F, in_X, in_Y}});
4262 set_values(input, std::move(Data));
4264 // Create a topology
4265 topology topology(input_layout("input", input.get_layout()));
4268 topology.add(reorder("reorder_in",
4270 layout(data_types::i8, format::b_fs_yx_fsv4, {in_B, in_F, in_X, in_Y})));
4273 std::vector<char> Weights(W_B * W_F * W_X * W_Y);
4274 std::iota(Weights.begin(), Weights.end(), 0);
4276 memory::allocate(engine, {data_types::i8, format::bfyx, {W_B, W_F, W_X, W_Y}});
4278 memory::allocate(engine, {data_types::i8, format::bfyx, {W_B, W_F, W_X, W_Y}});
4279 set_values(weights_gold, Weights);
4280 set_values(weights_imad, std::move(Weights));
4281 topology.add(data("weights_gold", weights_gold), data("weights_imad", weights_imad));
4285 // Bias, Callibraiton, Quantization
4286 std::vector<float> vB(_OuD), vC(_OuD), vQ(_OuD);
4288 std::generate(vB.begin(), vB.end(), [x]() mutable {
4295 std::generate(vC.begin(), vC.end(), [x]() mutable {
4302 std::generate(vQ.begin(), vQ.end(), [x]() mutable {
4308 auto bias_gold = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, _OuD, 1}});
4309 auto bias_imad = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, _OuD, 1}});
4310 auto callib_gold = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, _OuD, 1}});
4311 auto callib_imad = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, _OuD, 1}});
4312 auto quant_gold = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, _OuD, 1}});
4313 auto quant_imad = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, _OuD, 1}});
4314 set_values(bias_gold, vB);
4315 set_values(bias_imad, std::move(vB));
4316 set_values(callib_gold, vC);
4317 set_values(callib_imad, std::move(vC));
4318 set_values(quant_gold, vQ);
4319 set_values(quant_imad, std::move(vQ));
4320 topology.add(data("bias_gold", bias_gold),
4321 data("callib_gold", callib_gold),
4322 data("quant_gold", quant_gold));
4323 topology.add(data("bias_imad", bias_imad),
4324 data("callib_imad", callib_imad),
4325 data("quant_imad", quant_imad));
4328 convolution conv_gold("conv_gold",
4335 {1, 1, stride, stride},
4336 {0, 0, offSet, offSet});
4337 convolution conv_imad("conv_imad",
4344 {1, 1, stride, stride},
4345 {0, 0, offSet, offSet});
4346 conv_gold.output_padding = padding({0, 0, output_padding, output_padding}, 0.0f);
4347 conv_imad.output_padding = padding({0, 0, output_padding, output_padding}, 0.0f);
4348 topology.add(conv_gold, conv_imad);
4353 convolution conv_gold(
4354 "conv_gold", "input", {"weights_gold"}, {1, 1, stride, stride}, {0, 0, offSet, offSet});
4355 convolution conv_imad(
4356 "conv_imad", "reorder_in", {"weights_imad"}, {1, 1, stride, stride}, {0, 0, offSet, offSet});
4357 conv_gold.output_padding = padding({0, 0, output_padding, output_padding}, 0.0f);
4358 conv_imad.output_padding = padding({0, 0, output_padding, output_padding}, 0.0f);
4359 topology.add(conv_gold, conv_imad);
4363 topology.add(reorder("reorder_out",
4365 layout(data_types::i8,
4367 {in_B, W_B, (in_X + stride - 1) / stride, (in_Y + stride - 1) / stride},
4368 padding({0, 0, output_padding, output_padding}, 0.0f))));
4371 build_options build_opt;
4372 build_opt.set_option(build_option::optimize_data(true));
4373 network network(engine, topology, build_opt);
4375 // Network execuiton
4376 network.set_input_data("input", input);
4377 auto outputs = network.execute();
4379 auto out_gold = outputs.find("conv_gold");
4380 auto out_test = outputs.find("reorder_out");
4381 ASSERT_NE(out_gold, outputs.end());
4382 ASSERT_NE(out_test, outputs.end());
4384 auto gold_ptr = out_gold->second.get_memory().pointer<char>();
4385 auto test_ptr = out_test->second.get_memory().pointer<char>();
4387 ASSERT_EQ(gold_ptr.size(), test_ptr.size());
4388 for (size_t i = 0; i < gold_ptr.size(); i++)
4390 ASSERT_EQ(gold_ptr[i], test_ptr[i]);
4394 // Select particular test cases
4395 INSTANTIATE_TEST_CASE_P(convolution_gpu_imad,
4398 // Filter size, Input features, Stride, Output padding, With bias
4399 TestParamType_convolution_gpu(1, 32, 1, 0, false),
4400 TestParamType_convolution_gpu(3, 32, 1, 0, false),
4401 TestParamType_convolution_gpu(7, 3, 1, 0, false),
4402 TestParamType_convolution_gpu(1, 32, 1, 0, true),
4403 TestParamType_convolution_gpu(3, 32, 1, 0, true),
4404 TestParamType_convolution_gpu(7, 3, 1, 0, true),
4405 TestParamType_convolution_gpu(1, 32, 1, 1, false),
4406 TestParamType_convolution_gpu(3, 32, 1, 1, false),
4407 TestParamType_convolution_gpu(7, 3, 1, 1, false),
4408 TestParamType_convolution_gpu(1, 32, 2, 0, false),
4409 TestParamType_convolution_gpu(3, 32, 2, 0, false),
4410 TestParamType_convolution_gpu(7, 3, 2, 0, false)),
4411 convolution_gpu::PrintToStringParamName);
4412 //// or test all combinations
4413 //INSTANTIATE_TEST_CASE_P(convolution_gpu_imad,
4415 // ::testing::Combine(::testing::Values(1, 3, 7), // Filter size
4416 // ::testing::Values(3, 32), // Input features
4417 // ::testing::Values(1, 2), // Stride
4418 // ::testing::Values(0, 1), // Output padding
4419 // ::testing::Values(false, true) // With bias
4421 // convolution_gpu::PrintToStringParamName);
4423 class convolution_test : public tests::generic_test
4428 static void TearDownTestCase()
4430 for (auto generic_params : all_generic_params)
4432 delete generic_params;
4435 for (auto layer_params : all_layer_params)
4437 delete layer_params;
4441 static std::vector<cldnn::primitive*> generate_specific_test_params()
4443 // TODO: check split
4445 // TODO: check convolution without bias
4447 const std::vector<primitive_id>& weights = { "input1" };
4448 const std::vector<primitive_id>& bias = { "input2" };
4450 std::vector<tensor> stride_sizes = { tensor(1, 1, 1, 1), tensor(1, 1, 2, 3), tensor(1, 1, 4, 1), tensor(1, 1, 5, 5) };
4451 std::vector<tensor> dilation_sizes = { tensor(1, 1, 1, 1), tensor(1, 1, 5, 4), tensor(1, 1, 1, 3), tensor(1, 1, 7, 2) };
4452 std::vector<tensor> input_offset_sizes = { tensor(0, 0, 0, 0), tensor(0, 0, 2, 2), tensor(0, 0, -5, -2), tensor(0, 0, 3, -3) };
4454 std::vector<bool> activations = { false, true };
4455 std::vector<float> activation_slopes = { 0.f, -2.3f };
4458 all_layer_params.push_back(new convolution("convolution", "input0", weights, bias, stride_sizes[0], input_offset_sizes[0], dilation_sizes[0], activations[0], activation_slopes[0]));
4459 all_layer_params.push_back(new convolution("convolution", "input0", weights, bias, stride_sizes[1], input_offset_sizes[1], dilation_sizes[1], activations[0], activation_slopes[0]));
4460 all_layer_params.push_back(new convolution("convolution", "input0", weights, bias, stride_sizes[2], input_offset_sizes[2], dilation_sizes[2], activations[1], activation_slopes[0]));
4461 all_layer_params.push_back(new convolution("convolution", "input0", weights, bias, stride_sizes[3], input_offset_sizes[3], dilation_sizes[3], activations[1], activation_slopes[1]));
4464 all_layer_params.push_back(new convolution("convolution", "reorder0", weights, bias, stride_sizes[1], input_offset_sizes[1], dilation_sizes[1], activations[0], activation_slopes[0]));
4465 all_layer_params.push_back(new convolution("convolution", "reorder0", weights, bias, stride_sizes[3], input_offset_sizes[3], dilation_sizes[3], activations[1], activation_slopes[1]));
4468 all_layer_params.push_back(new convolution("convolution", "input0", weights, bias, stride_sizes[1], input_offset_sizes[1], dilation_sizes[1], activations[0], activation_slopes[0], { { 0, 0, 2, 4 },{ 0, 0, 0, 19 } }));
4469 all_layer_params.push_back(new convolution("convolution", "input0", weights, bias, stride_sizes[2], input_offset_sizes[2], dilation_sizes[2], activations[1], activation_slopes[0], { { 0, 0, 1, 0 },{ 0, 0, 13, 9 } }));
4471 // Input + Output padding
4472 all_layer_params.push_back(new convolution("convolution", "reorder0", weights, bias, stride_sizes[0], input_offset_sizes[0], dilation_sizes[0], activations[0], activation_slopes[0], { { 0, 0, 1, 5 },{ 0, 0, 19, 4 } }));
4473 all_layer_params.push_back(new convolution("convolution", "reorder0", weights, bias, stride_sizes[3], input_offset_sizes[3], dilation_sizes[3], activations[1], activation_slopes[1], { { 0, 0, 1, 2 },{ 0, 0, 3, 4 } }));
4475 return all_layer_params;
4478 static std::vector<std::tuple<tests::test_params*, cldnn::primitive*>> generate_all_test_params()
4480 generate_specific_test_params();
4482 std::vector<cldnn::format> input_formats = { cldnn::format::bfyx, cldnn::format::yxfb };
4483 std::vector<cldnn::format> weights_formats = { cldnn::format::bfyx, cldnn::format::yxfb };
4485 std::vector<int32_t> output_features_sizes = { 1, 3, 16 };
4486 std::vector<cldnn::tensor> kernel_sizes = { tensor(1, 1, 1, 1), tensor(1, 1, 4, 7), tensor(1, 1, 5, 3) };
4488 std::vector<tensor> input_tensor_size = { tensor(1, 5, 59, 72), tensor(8, 3, 63, 56), tensor(16, 2, 50, 50), tensor(32, 1, 44, 62) };
4490 auto data_types = test_data_types();
4492 for (cldnn::data_types data_type : data_types)
4494 for (cldnn::format input_format : input_formats)
4496 for (cldnn::format weights_format : weights_formats)
4498 cldnn::build_options network_build_options;
4499 if (input_format == cldnn::format::bfyx)
4501 network_build_options.set_option(cldnn::build_option::optimize_data(true));
4503 for (cldnn::tensor input_size : input_tensor_size)
4505 for (cldnn::tensor kernel_size : kernel_sizes)
4507 for (auto output_features : output_features_sizes)
4509 test_params* params = new test_params(data_type, input_format, input_size.batch[0], input_size.feature[0], tensor(1, 1, input_size.spatial[0], input_size.spatial[1]), network_build_options);
4510 int input_features = params->input_layouts[0].size.feature[0];
4511 params->input_layouts.push_back(cldnn::layout(params->data_type, weights_format, cldnn::tensor(output_features, input_features, kernel_size.spatial[0], kernel_size.spatial[1]))); // weights
4512 params->input_layouts.push_back(cldnn::layout(params->data_type, params->fmt, cldnn::tensor(1, 1, output_features, 1))); // biases
4513 all_generic_params.push_back(params);
4521 // Create all the combinations for the test.
4522 for (cldnn::primitive* layer_param : all_layer_params)
4524 for (tests::test_params* test_param : all_generic_params)
4526 all_test_params.push_back(std::make_tuple(test_param, layer_param));
4530 return all_test_params;
4533 virtual bool is_format_supported(cldnn::format format)
4535 return ((format == cldnn_format_type::cldnn_format_bfyx) || (format == cldnn_format_type::cldnn_format_yxfb));
4538 virtual cldnn::tensor get_expected_output_tensor()
4540 const cldnn::convolution* convolution = (cldnn::convolution*)layer_params;
4541 tensor input_size = generic_params->input_layouts[0].size;
4542 tensor dilation = convolution->dilation;
4543 tensor stride = convolution->stride;
4544 tensor input_offset = convolution->input_offset;
4545 tensor weights_size = generic_params->input_layouts[1].size;
4547 int kernel_extent_y = dilation.spatial[1] * (weights_size.spatial[1] - 1) + 1;
4548 int kernel_extent_x = dilation.spatial[0] * (weights_size.spatial[0] - 1) + 1;
4550 // Calculate output size
4551 int output_size_y = 1 + (input_size.spatial[1] - kernel_extent_y - 2 * input_offset.spatial[1]) / stride.spatial[1];
4552 int output_size_x = 1 + (input_size.spatial[0] - kernel_extent_x - 2 * input_offset.spatial[0]) / stride.spatial[0];
4553 int output_features = weights_size.batch[0];
4555 return cldnn::tensor(input_size.batch[0], output_features, output_size_x, output_size_y);
4558 virtual void prepare_input_for_test(std::vector<cldnn::memory>& inputs)
4560 if (generic_params->data_type == data_types::f32)
4562 prepare_input_for_test_typed<float>(inputs);
4566 prepare_input_for_test_typed<FLOAT16>(inputs);
4570 template<typename Type>
4571 void prepare_input_for_test_typed(std::vector<cldnn::memory>& inputs)
4573 int k = (generic_params->data_type == data_types::f32) ? 8 : 4;
4576 auto input = inputs[0];
4577 auto input_size = inputs[0].get_layout().size;
4578 VVVVF<Type> input_rnd = generate_random_4d<Type>(input_size.batch[0], input_size.feature[0], input_size.spatial[1], input_size.spatial[0], -2, 2, k);
4579 VF<Type> input_rnd_vec = flatten_4d<Type>(input.get_layout().format, input_rnd);
4580 set_values(input, input_rnd_vec);
4583 auto weight_input = inputs[1];
4584 auto weight_size = inputs[1].get_layout().size;
4585 VVVVF<Type> weight_rnd = generate_random_4d<Type>(weight_size.batch[0], weight_size.feature[0], weight_size.spatial[1], weight_size.spatial[0], -2, 2, k);
4586 VF<Type> weight_rnd_vec = flatten_4d<Type>(weight_input.get_layout().format, weight_rnd);
4587 set_values(weight_input, weight_rnd_vec);
4590 auto bias_input = inputs[2];
4591 auto bias_size = inputs[2].get_layout().size;
4592 VF<Type> bias_rnd = generate_random_1d<Type>(bias_size.spatial[0], -2, 2, k);
4593 set_values(bias_input, bias_rnd);
4596 template<typename Type>
4597 memory generate_reference_typed(const std::vector<cldnn::memory>& inputs)
4599 // Output reference is always bfyx.
4601 const cldnn::convolution* convolution = (cldnn::convolution*)layer_params;
4603 data_types dt = inputs[0].get_layout().data_type;
4605 tensor input_size = inputs[0].get_layout().size;
4606 tensor dilation = convolution->dilation;
4607 tensor stride = convolution->stride;
4608 bool is_relu_fused = convolution->with_activation;
4609 float activation_slope = convolution->activation_negative_slope;
4610 tensor input_offset = convolution->input_offset;
4611 tensor weights_size = inputs[1].get_layout().size;
4612 padding output_padding = convolution->output_padding;
4614 tensor output_size = get_expected_output_tensor();
4616 // Calculate output size
4617 int output_size_y = output_size.spatial[1];
4618 int output_size_x = output_size.spatial[0];
4619 int output_features = weights_size.batch[0];
4620 int input_features = weights_size.feature[0];
4622 auto output = memory::allocate( engine, cldnn::layout(dt, cldnn::format::bfyx, output_size, output_padding) );
4624 auto input_mem = inputs[0].pointer<Type>();
4625 auto weights_mem = inputs[1].pointer<Type>();
4626 auto bias_mem = inputs[2].pointer<Type>();
4627 auto output_mem = output.pointer<Type>();
4629 tensor output_buffer_size = output.get_layout().get_buffer_size();
4631 // Initialized output with zeros.
4632 std::fill(output_mem.begin(), output_mem.end(), static_cast<Type>(0));
4635 for (int b = 0; b < input_size.batch[0]; b++)
4637 for (int out_f = 0; out_f < output_features; out_f++)
4639 for (int y = 0; y < output_size_y; y++)
4641 for (int x = 0; x < output_size_x; x++)
4643 int output_index = (b * output_buffer_size.feature[0] + out_f) * output_buffer_size.spatial[1] * output_buffer_size.spatial[0];
4644 tensor lower_output_padding = convolution->output_padding.lower_size();
4645 output_index += (lower_output_padding.spatial[1] + y) * output_buffer_size.spatial[0] + lower_output_padding.spatial[0] + x;
4647 output_mem[output_index] += bias_mem[out_f];
4653 const auto input0_desc = get_linear_memory_desc(inputs[0].get_layout());
4654 const auto input1_desc = get_linear_memory_desc(inputs[1].get_layout());
4656 // Convolve with weights
4657 for (int b = 0; b < input_size.batch[0]; b++)
4660 for (int out_f = 0; out_f < output_features; out_f++)
4662 for (int in_f = 0; in_f < input_features; in_f++)
4664 int input_fi = in_f;
4665 for (int y = 0; y < output_size_y; y++)
4667 for (int x = 0; x < output_size_x; x++)
4670 int output_fi = out_f;
4673 int output_index = (output_bi * output_buffer_size.feature[0] + output_fi) * output_buffer_size.spatial[1] * output_buffer_size.spatial[0];
4674 tensor lower_output_padding = convolution->output_padding.lower_size();
4675 output_index += (lower_output_padding.spatial[1] + output_yi) * output_buffer_size.spatial[0] + lower_output_padding.spatial[0] + output_xi;
4677 for (int kernel_y = 0; kernel_y < weights_size.spatial[1]; kernel_y++)
4679 int input_yi = y * stride.spatial[1] + input_offset.spatial[1] + kernel_y * dilation.spatial[1];
4680 if ((input_yi < 0) || (input_yi >= input_size.spatial[1]))
4685 for (int kernel_x = 0; kernel_x < weights_size.spatial[0]; kernel_x++)
4687 int input_xi = x * stride.spatial[0] + input_offset.spatial[0] + kernel_x * dilation.spatial[0];
4688 if ((input_xi < 0) || (input_xi >= input_size.spatial[0]))
4693 size_t input_index = get_linear_index(inputs[0].get_layout(), input_bi, input_fi, input_yi, input_xi, input0_desc);
4695 int weight_bi = out_f;
4696 int weight_fi = in_f;
4697 int weight_yi = kernel_y;
4698 int weight_xi = kernel_x;
4699 size_t weight_index = get_linear_index(inputs[1].get_layout(), weight_bi, weight_fi, weight_yi, weight_xi, input1_desc);
4700 output_mem[output_index] += input_mem[input_index] * weights_mem[weight_index];
4712 for (int i = 0; i < (int)output_buffer_size.count(); i++)
4714 output_mem[i] = (output_mem[i] > 0.f) ? output_mem[i] : (output_mem[i] * (Type)activation_slope);
4721 virtual memory generate_reference(const std::vector<cldnn::memory>& inputs)
4723 if (generic_params->data_type == data_types::f32)
4725 return generate_reference_typed<float>(inputs);
4729 return generate_reference_typed<FLOAT16>(inputs);
4735 static std::vector<tests::test_params*> all_generic_params;
4736 static std::vector<cldnn::primitive*> all_layer_params;
4737 static std::vector<std::tuple<tests::test_params*, cldnn::primitive*>> all_test_params;
4740 std::vector<tests::test_params*> convolution_test::all_generic_params = {};
4741 std::vector<cldnn::primitive*> convolution_test::all_layer_params = {};
4742 std::vector<std::tuple<tests::test_params*, cldnn::primitive*>> convolution_test::all_test_params = {};
4744 TEST_P(convolution_test, CONVOLUTION)
4749 INSTANTIATE_TEST_CASE_P(DISABLED_CONVOLUTION,
4751 ::testing::ValuesIn(convolution_test::generate_all_test_params()),
4752 tests::generic_test::custom_param_name_functor());