2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
19 #include <gtest/gtest.h>
20 #include "api/CPP/memory.hpp"
21 #include <api/CPP/input_layout.hpp>
22 #include "api/CPP/convolution.hpp"
23 #include <api/CPP/topology.hpp>
24 #include <api/CPP/network.hpp>
25 #include <api/CPP/engine.hpp>
26 #include "test_utils/test_utils.h"
27 #include "test_utils/float16.h"
28 #include <api/CPP/data.hpp>
35 #include <api/CPP/reorder.hpp>
37 using namespace cldnn;
38 using namespace tests;
43 template<> struct type_to_data_type<FLOAT16> { static const data_types value = data_types::f16; };
49 T kahan_summation(std::vector<T> &input) {
62 VVF<T> reference_convolve(VVVF<T> &input, VVVF<T> &filter, int stride_y, int stride_x, float bias, int dilation_y = 1, int dilation_x = 1,
63 int input_padding_y = 0, int input_padding_x = 0, int output_padding_y = 0,
64 int output_padding_x = 0, size_t f_begin = 0)
66 size_t kernel_extent_y = dilation_y * (filter[0].size() - 1) + 1;
67 size_t kernel_extent_x = dilation_x * (filter[0][0].size() - 1) + 1;
68 size_t output_y = 1 + (input[0].size() - kernel_extent_y + 2 * input_padding_y) / stride_y + 2 * output_padding_y;
69 size_t output_x = 1 + (input[0][0].size() - kernel_extent_x + 2 * input_padding_x) / stride_x + 2 * output_padding_x;
70 VVF<T> output(output_y, VF<T>(output_x, bias));
71 for (size_t f = 0; f < filter.size(); ++f) {
72 for (size_t y = 0; y < (output_y - 2 * output_padding_y); ++y) {
73 for (size_t x = 0; x < (output_x - 2 * output_padding_x); ++x) {
75 values.reserve(filter[0].size() * filter[0][0].size());
76 for (size_t yf = 0; yf < filter[0].size(); ++yf) {
77 int yi = -input_padding_y + (int)yf * dilation_y + stride_y * (int)y;
78 if (yi < 0 || (int)input[0].size() <= yi) continue;
79 for (size_t xf = 0; xf < filter[0][0].size(); ++xf) {
80 int xi = -input_padding_x + (int)xf * dilation_x + stride_x * (int)x;
81 if (xi < 0 || (int)input[0][0].size() <= xi) continue;
82 values.push_back(input[f_begin + f][yi][xi] * filter[f][yf][xf]);
85 output[y + output_padding_y][x + output_padding_x] += kahan_summation<T>(values);
92 void dump_buffer(memory const& mem, std::string const& name)
94 std::ofstream out(name);
95 auto size = mem.get_layout().get_buffer_size();
96 auto ptr = mem.pointer<const float>();
97 auto pitches = mem.get_layout().get_pitches();
98 out << "Data size: " << mem.get_layout().size << "\n";
99 out << "Lower padding: " << mem.get_layout().data_padding.lower_size() << "\n";
100 out << "Upper padding: " << mem.get_layout().data_padding.upper_size() << "\n";
103 for (int b = 0; b < size.batch[0]; ++b)
105 out << " ================ BATCH " << b << " =================\n\n";
106 for (int f = 0; f < size.feature[0]; ++f)
108 out << "feature " << f << ":\n";
109 for (int y = 0; y < size.spatial[1]; ++y)
111 for (int x = 0; x < size.spatial[0]; ++x)
113 size_t idx = b * pitches.batch[0] + f * pitches.feature[0] + y * pitches.spatial[1] + x * pitches.spatial[0];
114 out << ptr[idx] << " ";
127 TEST(convolution_f32_fw_gpu, basic_convolution_no_bias) {
149 auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ 1, 1, 5, 4 } });
150 auto weights = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 3, 2 } });
152 set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, 6.0f, 3.0f, 3.0f, 3.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f });
153 set_values(weights, { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f });
154 VVF<float> output_vec = {
155 { 20.0f, 27.0f, 38.0f },
156 { 17.0f, 19.0f, 19.0f } };
159 input_layout("input", input.get_layout()),
160 data("weights", weights),
161 convolution("conv", "input", { "weights" }, { 1,1,1,2 }));
163 network network(engine, topology);
164 network.set_input_data("input", input);
166 auto outputs = network.execute();
167 EXPECT_EQ(outputs.size(), size_t(1));
168 EXPECT_EQ(outputs.begin()->first, "conv");
170 auto output_memory = outputs.at("conv").get_memory();
171 auto output_layout = output_memory.get_layout();
172 auto output_ptr = output_memory.pointer<float>();
174 int y_size = output_layout.size.spatial[1];
175 int x_size = output_layout.size.spatial[0];
176 int f_size = output_layout.size.feature[0];
177 int b_size = output_layout.size.batch[0];
178 EXPECT_EQ(output_layout.format, format::yxfb);
179 EXPECT_EQ(y_size, 2);
180 EXPECT_EQ(x_size, 3);
181 EXPECT_EQ(f_size, 1);
182 EXPECT_EQ(b_size, 1);
183 for (int y = 0; y < y_size; ++y) {
184 for (int x = 0; x < x_size; ++x) {
185 EXPECT_EQ(output_vec[y][x], output_ptr[y * x_size + x]);
189 //VVF temp_vec(y_size, VF(x_size, 0.0f));
190 //for (int y = 0; y < y_size; ++y) {
191 // for (int x = 0; x < x_size; ++x) {
192 // temp_vec[y][x] = output_ptr[y * x_size + x];
195 //print_2d(temp_vec);
199 TEST(convolution_f32_fw_gpu, basic_convolution_int8_no_bias) {
221 auto input = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 5, 4 } });
222 auto weights = memory::allocate(engine, { data_types::i8,format::bfyx,{ 1, 1, 3, 2 } });
224 set_values(input, { 1.1f, 2.4f, 3.5f, 4.5f, 5.8f, 2.9f, 2.3f, 3.5f, 4.4f, 6.6f, 3.8f, 3.9f, 3.4f, 5.1f, 1.4f, 1.8f, 1.1f, 1.2f, 1.2f, 1.9f });
225 set_values<char>(weights, { 1, 2, 1, 2, 1, 2 });
226 VVF<float> output_vec = {
227 { 20.0f, 27.0f, 38.0f },
228 { 17.0f, 19.0f, 19.0f } };
231 input_layout("input", input.get_layout()),
232 reorder("to_int","input", { data_types::i8,format::bfyx,{ 1, 1, 5, 4 } }),
233 data("weights", weights),
234 convolution("conv", "to_int", { "weights" }, { 1,1,1,2 }),
235 reorder("output", "conv", { data_types::f32,format::bfyx,{ 1, 1, 3, 2 } }));
237 network network(engine, topology);
238 network.set_input_data("input", input);
240 auto outputs = network.execute();
241 EXPECT_EQ(outputs.size(), size_t(1));
242 EXPECT_EQ(outputs.begin()->first, "output");
244 auto output_memory = outputs.at("output").get_memory();
245 auto output_layout = output_memory.get_layout();
246 auto output_ptr = output_memory.pointer<float>();
248 int y_size = output_layout.size.spatial[1];
249 int x_size = output_layout.size.spatial[0];
250 int f_size = output_layout.size.feature[0];
251 int b_size = output_layout.size.batch[0];
252 EXPECT_EQ(output_layout.format, format::bfyx);
253 EXPECT_EQ(y_size, 2);
254 EXPECT_EQ(x_size, 3);
255 EXPECT_EQ(f_size, 1);
256 EXPECT_EQ(b_size, 1);
257 for (int y = 0; y < y_size; ++y) {
258 for (int x = 0; x < x_size; ++x) {
259 EXPECT_EQ(output_vec[y][x], output_ptr[y * x_size + x]);
265 TEST(convolution_f32_fw_gpu, basic_convolution) {
290 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 5, 4 } });
291 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 2 } });
292 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
294 set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, 6.0f, 3.0f, 3.0f, 3.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f });
295 set_values(weights, { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f });
296 set_values(biases, { 1.0f });
297 VVF<float> output_vec = {
298 { 21.0f, 28.0f, 39.0f },
299 { 18.0f, 20.0f, 20.0f } };
302 input_layout("input", input.get_layout()),
303 data("weights", weights),
304 data("biases", biases),
305 convolution( "conv", "input", { "weights" }, { "biases" }, { 0,0,1,2 }));
307 network network(engine, topology);
308 network.set_input_data("input", input);
310 auto outputs = network.execute();
311 EXPECT_EQ(outputs.size(), size_t(1));
312 EXPECT_EQ(outputs.begin()->first, "conv");
314 auto output_memory = outputs.at("conv").get_memory();
315 auto output_layout = output_memory.get_layout();
316 auto output_ptr = output_memory.pointer<float>();
318 int y_size = output_layout.size.spatial[1];
319 int x_size = output_layout.size.spatial[0];
320 int f_size = output_layout.size.feature[0];
321 int b_size = output_layout.size.batch[0];
322 EXPECT_EQ(output_layout.format, format::yxfb);
323 EXPECT_EQ(y_size, 2);
324 EXPECT_EQ(x_size, 3);
325 EXPECT_EQ(f_size, 1);
326 EXPECT_EQ(b_size, 1);
327 for (int y = 0; y < y_size; ++y) {
328 for (int x = 0; x < x_size; ++x) {
329 EXPECT_EQ(output_vec[y][x], output_ptr[y * x_size + x]);
334 TEST(convolution_f32_fw_gpu, basic_convolution_bfyx_weights_as_input_layout) {
335 //Same params as convolution_f32_fw_gpu, basic_convolution but with bfyx optimized data and weights set as input_layout
337 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,
340 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,
343 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,
347 { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, 6.0f, 3.0f, 3.0f, 3.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }
350 { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f }
355 VVF<float> output_vec = {
356 { 21.0f, 28.0f, 39.0f }
358 { 18.0f, 20.0f, 20.0f }
361 input_layout("input", input.get_layout()),
362 input_layout("weights", weights.get_layout()),
363 input_layout("biases", biases.get_layout()),
364 convolution("conv", "input",
371 cldnn::build_options options;
372 options.set_option(cldnn::build_option::optimize_data(true));
373 network network(engine, topology, options);
374 network.set_input_data("input", input);
375 network.set_input_data("weights", weights);
376 network.set_input_data("biases", biases);
377 auto outputs = network.execute();
378 EXPECT_EQ(outputs.size(), size_t(1));
379 EXPECT_EQ(outputs.begin()->first, "conv");
381 auto output_memory = outputs.at("conv").get_memory();
382 auto output_layout = output_memory.get_layout();
383 auto output_ptr = output_memory.pointer<float>();
385 int y_size = output_layout.size.spatial[1];
386 int x_size = output_layout.size.spatial[0];
387 int f_size = output_layout.size.feature[0];
388 int b_size = output_layout.size.batch[0];
389 EXPECT_EQ(output_layout.format, format::bfyx);
390 EXPECT_EQ(y_size, 2);
391 EXPECT_EQ(x_size, 3);
392 EXPECT_EQ(f_size, 1);
393 EXPECT_EQ(b_size, 1);
394 for (int y = 0; y < y_size; ++y) {
395 for (int x = 0; x < x_size; ++x) {
396 EXPECT_EQ(output_vec[y][x], output_ptr[y * x_size + x]);
401 TEST(convolution_f32_fw_gpu, basic_convolution_input_padding) {
405 // Input padding : 2x1
435 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 3 } });
436 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
437 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
439 set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f });
440 set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f });
441 set_values(biases, { 1.0f });
442 VVF<float> output_vec = {
443 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
444 { 2.0f, 4.0f, 6.0f, 8.0f, 5.0f },
445 { 4.0f, 8.0f, 11.0f, 15.0f, 9.0f },
446 { 6.0f, 11.0f, 12.0f, 16.0f, 10.0f },
447 { 4.0f, 7.0f, 7.0f, 9.0f, 6.0f },
448 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f } };
451 input_layout("input", input.get_layout()),
452 data("weights", weights),
453 data("biases", biases),
464 padding{ { 0,0,0,0 }, 0 })
467 network network(engine, topology);
468 network.set_input_data("input", input);
470 auto outputs = network.execute();
471 EXPECT_EQ(outputs.size(), size_t(1));
472 EXPECT_EQ(outputs.begin()->first, "conv");
474 auto output_memory = outputs.at("conv").get_memory();
475 auto output_layout = output_memory.get_layout();
476 auto output_ptr = output_memory.pointer<float>();
478 int y_size = output_layout.size.spatial[1];
479 int x_size = output_layout.size.spatial[0];
480 int f_size = output_layout.size.feature[0];
481 int b_size = output_layout.size.batch[0];
482 EXPECT_EQ(output_layout.format, format::yxfb);
483 EXPECT_EQ(y_size, 6);
484 EXPECT_EQ(x_size, 5);
485 EXPECT_EQ(f_size, 1);
486 EXPECT_EQ(b_size, 1);
488 for (int y = 0; y < y_size; ++y) {
489 for (int x = 0; x < x_size; ++x) {
490 EXPECT_EQ(output_vec[y][x], output_ptr[y * x_size + x]);
494 //VVF temp_vec(y_size, VF(x_size, 0.0f));
495 //for (int y = 0; y < y_size; ++y) {
496 // for (int x = 0; x < x_size; ++x) {
497 // temp_vec[y][x] = output_ptr[y * x_size + x];
500 //print_2d(temp_vec);
503 TEST(convolution_f32_fw_gpu, basic_convolution_input_and_output_padding) {
507 // Input padding : 2x1
528 // 1 1 4 8 11 15 9 1 1
529 // 1 1 6 11 12 16 10 1 1
539 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 3 } });
540 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
541 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
543 set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f });
544 set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f });
545 set_values(biases, { 1.0f });
546 VVF<float> output_vec = {
547 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
548 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
549 { 1.0f, 1.0f, 2.0f, 4.0f, 6.0f, 8.0f, 5.0f, 1.0f, 1.0f },
550 { 1.0f, 1.0f, 4.0f, 8.0f, 11.0f, 15.0f, 9.0f, 1.0f, 1.0f },
551 { 1.0f, 1.0f, 6.0f, 11.0f, 12.0f, 16.0f, 10.0f, 1.0f, 1.0f },
552 { 1.0f, 1.0f, 4.0f, 7.0f, 7.0f, 9.0f, 6.0f, 1.0f, 1.0f },
553 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
554 { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f } };
559 input_layout("input", input.get_layout()),
560 data("weights", weights),
561 data("biases", biases),
572 padding{ { 0,0,-x_pad,-y_pad }, 0 })
575 network network(engine, topology);
576 network.set_input_data("input", input);
578 auto outputs = network.execute();
579 EXPECT_EQ(outputs.size(), size_t(1));
580 EXPECT_EQ(outputs.begin()->first, "conv");
582 auto output_memory = outputs.at("conv").get_memory();
583 auto output_layout = output_memory.get_layout();
584 auto output_size = output_layout.get_buffer_size();
585 auto output_ptr = output_memory.pointer<float>();
587 int y_size = output_size.spatial[1];
588 int x_size = output_size.spatial[0];
589 int f_size = output_size.feature[0];
590 int b_size = output_size.batch[0];
591 EXPECT_EQ(output_layout.format, format::yxfb);
592 EXPECT_EQ(y_size, 8);
593 EXPECT_EQ(x_size, 9);
594 EXPECT_EQ(f_size, 1);
595 EXPECT_EQ(b_size, 1);
597 for (int y = y_pad; y < y_size - y_pad; ++y)
599 for (int x = x_pad; x < x_size - x_pad; ++x)
601 EXPECT_EQ(output_vec[y][x], output_ptr[y * x_size + x]);
605 //VVF temp_vec(y_size, VF(x_size, 0.0f));
606 //for (int y = 0; y < y_size; ++y) {
607 // for (int x = 0; x < x_size; ++x) {
608 // temp_vec[y][x] = output_ptr[y * x_size + x];
611 //print_2d(temp_vec);
614 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x1x1_nopad_random) {
637 size_t batch = 1, input_f = 1, input_y = 4, input_x = 4;
639 VVVVF<float> input_rnd = generate_random_4d<float>(batch, input_f, input_y, input_x, -10, 10);
640 VF<float> input_rnd_vec = flatten_4d<float>(format::yxfb, input_rnd);
641 VVVVF<float> filter_rnd = generate_random_4d<float>(1, 1, 2, 2, -10, 10);
642 VF<float> filter_rnd_vec = flatten_4d<float>(format::bfyx, filter_rnd);
643 VF<float> bias_rnd = generate_random_1d<float>(1, -10, 10);
644 VVVVF<float> output_rnd(batch, VVVF<float>(filter_rnd.size()));
645 for (size_t b = 0; b < output_rnd.size(); ++b) {
646 for (size_t of = 0; of < filter_rnd.size(); ++of) {
647 output_rnd[b][of] = reference_convolve<float>(input_rnd[b], filter_rnd[of], 2, 2, bias_rnd[of]);
650 VF<float> output_rnd_vec = flatten_4d<float>(format::yxfb, output_rnd);
654 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 4 } });
655 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 2, 2 }, 1 } });
656 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
657 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
659 set_values(input, input_rnd_vec);
660 set_values(weights, filter_rnd_vec);
661 set_values(biases, bias_rnd);
664 input_layout("input", input.get_layout()),
665 data("weights", weights),
666 data("biases", biases),
667 convolution("conv", "input", {"weights"}, {"biases"}, {1,1,2,2})
670 network network(engine, topology);
671 network.set_input_data("input", input);
673 auto outputs = network.execute();
674 EXPECT_EQ(outputs.size(), size_t(1));
675 EXPECT_EQ(outputs.begin()->first, "conv");
677 auto output_prim = outputs.begin()->second.get_memory();
679 auto output_ptr = output_prim.pointer<float>();
681 for (size_t i = 0; i < output_rnd.size(); ++i) {
682 float x = float_round(output_rnd_vec[i]), y = float_round(output_ptr[i]);
683 EXPECT_FLOAT_EQ(x, y) << "random seed = " << random_seed << std::endl;
687 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in2x2x1x2_nopad_random) {
707 size_t batch = 2, input_f = 1, input_y = 2, input_x = 2;
709 VVVVF<float> input_rnd = generate_random_4d<float>(batch, input_f, input_y, input_x, -10, 10);
710 VF<float> input_rnd_vec = flatten_4d<float>(format::yxfb, input_rnd);
711 VVVVF<float> filter_rnd = generate_random_4d<float>(1, 1, 2, 2, -10, 10);
712 VF<float> filter_rnd_vec = flatten_4d<float>(format::bfyx, filter_rnd);
713 VF<float> bias_rnd = generate_random_1d<float>(1, -10, 10);
714 VVVVF<float> output_rnd(batch, VVVF<float>(filter_rnd.size()));
715 for (size_t b = 0; b < output_rnd.size(); ++b) {
716 for (size_t of = 0; of < filter_rnd.size(); ++of) {
717 output_rnd[b][of] = reference_convolve<float>(input_rnd[b], filter_rnd[of], 2, 2, bias_rnd[of]);
720 VF<float> output_rnd_vec = flatten_4d<float>(format::yxfb, output_rnd);
724 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 1, 2, 2 } });
725 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 2,{ 1, 1 }, 1 } });
726 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
727 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
729 set_values(input, input_rnd_vec);
730 set_values(weights, filter_rnd_vec);
731 set_values(biases, bias_rnd);
734 input_layout("input", input.get_layout()),
735 data("weights", weights),
736 data("biases", biases),
737 convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 })
740 network network(engine, topology);
741 network.set_input_data("input", input);
743 auto outputs = network.execute();
744 EXPECT_EQ(outputs.size(), size_t(1));
745 EXPECT_EQ(outputs.begin()->first, "conv");
747 auto output_prim = outputs.begin()->second.get_memory();
749 auto output_ptr = output_prim.pointer<float>();
751 for (size_t i = 0; i < output_rnd.size(); ++i) {
752 float x = float_round(output_rnd_vec[i]), y = float_round(output_ptr[i]);
753 EXPECT_FLOAT_EQ(x, y) << "random seed = " << random_seed << std::endl;
757 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x1x1_nopad) {
782 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 4 } });
783 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 2, 2 }, 1 } });
784 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
785 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
787 set_values(input, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f });
788 set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
789 set_values(biases, { 2.0f });
792 input_layout("input", input.get_layout()),
793 data("weights", weights),
794 data("biases", biases),
795 convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 })
798 network network(engine, topology);
799 network.set_input_data("input", input);
801 auto outputs = network.execute();
802 EXPECT_EQ(outputs.size(), size_t(1));
803 EXPECT_EQ(outputs.begin()->first, "conv");
805 auto output_prim = outputs.begin()->second.get_memory();
807 auto output_ptr = output_prim.pointer<float>();
809 EXPECT_FLOAT_EQ(8.0f, output_ptr[0]);
810 EXPECT_FLOAT_EQ(0.5f, output_ptr[1]);
811 EXPECT_FLOAT_EQ(6.0f, output_ptr[2]);
812 EXPECT_FLOAT_EQ(9.0f, output_ptr[3]);
815 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in2x2x1x2_nopad) {
836 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 1, 2, 2 } });
837 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 2,{ 1, 1 }, 1 } });
838 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
839 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
841 set_values(input, { 0.5f, 2.3f, 1.5f, -0.4f, 2.0f, 1.0f, -4.0f, 3.0f });
842 set_values(weights, { -1.2f, 1.5f, 0.5f, -0.5f });
843 set_values(biases, { -1.0f });
846 input_layout("input", input.get_layout()),
847 data("weights", weights),
848 data("biases", biases),
849 convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 } )
852 network network(engine, topology);
853 network.set_input_data("input", input);
855 auto outputs = network.execute();
856 EXPECT_EQ(outputs.size(), size_t(1));
857 EXPECT_EQ(outputs.begin()->first, "conv");
859 auto output_prim = outputs.begin()->second.get_memory();
861 auto output_ptr = output_prim.pointer<float>();
863 EXPECT_FLOAT_EQ(3.65f, output_ptr[0]);
864 EXPECT_FLOAT_EQ(-5.36f, output_ptr[1]);
867 TEST(convolution_f32_fw_gpu, basic_ofm_wsiz2x1x2x1_in1x2x1_nopad) {
868 // Filter : 1x2x1x2x1
888 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 1, 2 } });
889 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 1, 1 }, 2 } });
890 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 2 } });
891 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 1 } });
893 set_values(input, { 1.0f, 2.0f });
894 set_values(weights, { 1.0f, 2.0f, -1.0f, -2.0f });
895 set_values(biases, { 0.1f, -0.2f });
898 input_layout("input", input.get_layout()),
899 data("weights", weights),
900 data("biases", biases),
901 convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,5,5 })
904 network network(engine, topology);
905 network.set_input_data("input", input);
907 auto outputs = network.execute();
908 EXPECT_EQ(outputs.size(), size_t(1));
909 EXPECT_EQ(outputs.begin()->first, "conv");
911 auto output_prim = outputs.begin()->second.get_memory();
913 auto output_ptr = output_prim.pointer<float>();
915 EXPECT_FLOAT_EQ(5.1f, output_ptr[0]);
916 EXPECT_FLOAT_EQ(-5.2f, output_ptr[1]);
919 TEST(convolution_f32_fw_gpu, basic_ofm_wsiz3x2x2x1_in2x2x1_nopad) {
920 // Filter : 1x3x2x2x1
929 // 1.0 2.0 ifm=0 ofm=0
932 // 5.0 6.0 ifm=0 ofm=1
935 // 9.0 10.0 ifm=0 ofm=2
947 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 2, 1, 2 } });
948 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 1, 1 }, 3 } });
949 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 2, 1, 2 } });
950 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 1 } });
952 set_values(input, { 1.0f, 3.0f, 2.0f, 4.0f });
953 set_values(weights, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f });
954 set_values(biases, { -5.0f, -6.0f, -7.0f });
957 input_layout("input", input.get_layout()),
958 data("weights", weights),
959 data("biases", biases),
960 convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,5,5 })
963 network network(engine, topology);
964 network.set_input_data("input", input);
966 auto outputs = network.execute();
967 EXPECT_EQ(outputs.size(), size_t(1));
968 EXPECT_EQ(outputs.begin()->first, "conv");
970 auto output_prim = outputs.begin()->second.get_memory();
972 auto output_ptr = output_prim.pointer<float>();
974 EXPECT_FLOAT_EQ(25.0f, output_ptr[0]);
975 EXPECT_FLOAT_EQ(64.0f, output_ptr[1]);
976 EXPECT_FLOAT_EQ(103.0f, output_ptr[2]);
979 TEST(convolution_f32_fw_gpu, basic_wsiz2x2x1x3_wstr2x2_in2x2x1x1_nopad) {
990 // -1.1 1.5 0.1 0.2 2.0 -1.0
991 // 0.5 -0.5 0.4 0.7 2.5 -1.5
1003 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
1004 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 1, 1 }, 3 } });
1005 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 2, 2 } });
1006 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 1 } });
1008 set_values(input, { -2.3f, -0.1f, 3.1f, 1.9f });
1009 set_values(weights, { -1.1f, 1.5f, 0.5f, -0.5f, 0.1f, 0.2f, 0.4f, 0.7f, 2.0f, -1.0f, 2.5f, -1.5f });
1010 set_values(biases, { 0.1f, -0.2f, 0.3f });
1013 input_layout("input", input.get_layout()),
1014 data("weights", weights),
1015 data("biases", biases),
1016 convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 })
1019 network network(engine, topology);
1020 network.set_input_data("input", input);
1022 auto outputs = network.execute();
1023 EXPECT_EQ(outputs.size(), size_t(1));
1024 EXPECT_EQ(outputs.begin()->first, "conv");
1026 auto output_prim = outputs.begin()->second.get_memory();
1028 auto output_ptr = output_prim.pointer<float>();
1030 EXPECT_TRUE(are_equal(3.08f, output_ptr[0]));
1031 EXPECT_TRUE(are_equal(2.12f, output_ptr[1]));
1032 EXPECT_TRUE(are_equal(0.7f, output_ptr[2]));
1035 TEST(convolution_f32_fw_gpu, wsiz3x3_wstr2x2_in2x2x1x1_zeropad) {
1059 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
1060 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 1 } });
1061 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 3 } });
1062 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1064 set_values(input, { -0.5f, 1.0f, 0.5f, 2.0f });
1065 set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f, 4.0f, -5.0f, 0.5f, 1.5f, -1.5f });
1066 set_values(biases, { 2.0f });
1069 input_layout("input", input.get_layout()),
1070 data("weights", weights),
1071 data("biases", biases),
1072 convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 })
1075 network network(engine, topology);
1076 network.set_input_data("input", input);
1078 auto outputs = network.execute();
1079 EXPECT_EQ(outputs.size(), size_t(1));
1080 EXPECT_EQ(outputs.begin()->first, "conv");
1082 auto output_prim = outputs.begin()->second.get_memory();
1084 auto output_ptr = output_prim.pointer<float>();
1086 EXPECT_FLOAT_EQ(12.25f, output_ptr[0]);
1089 TEST(convolution_f32_fw_gpu, offsets_wsiz3x3_wstr2x2_in2x2x1x1_zeropad) {
1093 // Input offset : -1x-1
1095 // Output offset: 1x1
1116 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
1117 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 2, 2 }, 1 } });
1118 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 3 } });
1119 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1121 set_values(input, { -0.5f, 1.0f, 0.5f, 2.0f });
1122 set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f, 4.0f, -5.0f, 0.5f, 1.5f, -1.5f });
1123 set_values(biases, { 2.0f });
1126 input_layout("input", input.get_layout()),
1127 data("weights", weights),
1128 data("biases", biases),
1139 padding{ { 0,0,1,1 }, 0 })
1142 network network(engine, topology);
1143 network.set_input_data("input", input);
1145 auto outputs = network.execute();
1146 EXPECT_EQ(outputs.size(), size_t(1));
1147 EXPECT_EQ(outputs.begin()->first, "conv");
1149 auto output_prim = outputs.begin()->second.get_memory();
1151 auto output_ptr = output_prim.pointer<float>();
1153 EXPECT_FLOAT_EQ(-7.25f, output_ptr[4]);
1156 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_split2) {
1168 // f1: 0.5 1.5 2.3 -0.4
1193 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 2, 4, 4 } });
1194 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 2, 2 }, 2 } });
1195 auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
1196 auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1197 auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
1198 auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1201 -0.5f, 0.5f, 1.0f, 1.5f, 0.5f, 2.3f, 2.0f, -0.4f,
1202 1.5f, 2.0f, -0.5f, -4.0f, 0.0f, 1.0f, -1.0f, 3.0f,
1203 0.5f, 0.5f, 0.5f, 1.5f, -1.0f, 2.3f, 1.0f, -0.4f,
1204 0.5f, 2.0f, 2.0f, -4.0f, 1.5f, 1.0f, -0.5f, 3.0f
1206 set_values(weights1, { -2.0f, 0.5f, 3.5f, 1.5f });
1207 set_values(biases1, { 2.0f });
1208 set_values(weights2, { -1.2f, 1.5f, 0.5f, -0.5f });
1209 set_values(biases2, { -1.0f });
1212 input_layout("input", input.get_layout()),
1213 data("weights1", weights1),
1214 data("biases1", biases1),
1215 data("weights2", weights2),
1216 data("biases2", biases2),
1220 { "weights1", "weights2" },
1221 { "biases1", "biases2" },
1227 network network(engine, topology);
1228 network.set_input_data("input", input);
1230 auto outputs = network.execute();
1231 EXPECT_EQ(outputs.size(), size_t(1));
1232 EXPECT_EQ(outputs.begin()->first, "conv");
1234 auto output_prim = outputs.begin()->second.get_memory();
1236 auto output_ptr = output_prim.pointer<float>();
1238 EXPECT_FLOAT_EQ(8.0f, get_value<float>(output_ptr, 0));
1239 EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 1));
1240 EXPECT_FLOAT_EQ(0.5f, get_value<float>(output_ptr, 2));
1241 EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 3));
1242 EXPECT_FLOAT_EQ(6.0f, get_value<float>(output_ptr, 4));
1243 EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 5));
1244 EXPECT_FLOAT_EQ(9.0f, get_value<float>(output_ptr, 6));
1245 EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 7));
1248 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2) {
1255 // f0b0: -0.5 1 0.5 2
1260 // f0b1: -0.5 1 0.5 2
1265 // f1b0: 0.5 1.5 2.3 -0.4
1270 // f1b1: 0.5 1.5 2.3 -0.4
1291 // 8 8 3.65 3.65 0.5 0.5 -5.36 -5.36
1292 // 6 6 3.65 3.65 9 9 -5.36 -5.36
1296 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 4, 4 } });
1297 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 2,{ 2, 2 }, 2 } });
1298 auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
1299 auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1300 auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
1301 auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1304 -0.5f, -0.5f, 0.5f, 0.5f, 1.0f, 1.0f, 1.5f, 1.5f, 0.5f, 0.5f, 2.3f, 2.3f, 2.0f, 2.0f, -0.4f, -0.4f,
1305 1.5f, 1.5f, 2.0f, 2.0f, -0.5f, -0.5f, -4.0f, -4.0f, 0.0f, 0.0f, 1.0f, 1.0f, -1.0f, -1.0f, 3.0f, 3.0f,
1306 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 1.5f, 1.5f, -1.0f, -1.0f, 2.3f, 2.3f, 1.0f, 1.0f, -0.4f, -0.4f,
1307 0.5f, 0.5f, 2.0f, 2.0f, 2.0f, 2.0f, -4.0f, -4.0f, 1.5f, 1.5f, 1.0f, 1.0f, -0.5f, -0.5f, 3.0f, 3.0f,
1309 set_values(weights1, { -2.0f, 0.5f, 3.5f, 1.5f });
1310 set_values(biases1, { 2.0f });
1311 set_values(weights2, { -1.2f, 1.5f, 0.5f, -0.5f });
1312 set_values(biases2, { -1.0f });
1315 input_layout("input", input.get_layout()),
1316 data("weights1", weights1),
1317 data("biases1", biases1),
1318 data("weights2", weights2),
1319 data("biases2", biases2),
1323 { "weights1", "weights2" },
1324 { "biases1", "biases2" },
1330 network network(engine, topology);
1331 network.set_input_data("input", input);
1333 auto outputs = network.execute();
1334 EXPECT_EQ(outputs.size(), size_t(1));
1335 EXPECT_EQ(outputs.begin()->first, "conv");
1337 auto output_prim = outputs.begin()->second.get_memory();
1339 auto output_ptr = output_prim.pointer<float>();
1341 EXPECT_FLOAT_EQ(8.0f, get_value<float>(output_ptr, 0));
1342 EXPECT_FLOAT_EQ(8.0f, get_value<float>(output_ptr, 1));
1343 EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 2));
1344 EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 3));
1345 EXPECT_FLOAT_EQ(0.5f, get_value<float>(output_ptr, 4));
1346 EXPECT_FLOAT_EQ(0.5f, get_value<float>(output_ptr, 5));
1347 EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 6));
1348 EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 7));
1349 EXPECT_FLOAT_EQ(6.0f, get_value<float>(output_ptr, 8));
1350 EXPECT_FLOAT_EQ(6.0f, get_value<float>(output_ptr, 9));
1351 EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 10));
1352 EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 11));
1353 EXPECT_FLOAT_EQ(9.0f, get_value<float>(output_ptr, 12));
1354 EXPECT_FLOAT_EQ(9.0f, get_value<float>(output_ptr, 13));
1355 EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 14));
1356 EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 15));
1359 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthwise_sep_opt) {
1360 // Test for depthwise separable optimization, there are 16 weights and biases (split 16)
1361 // data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2 but with batch 1
1365 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 16, 4, 4 } });
1368 -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f,
1369 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f,
1370 0.5f, 0.5f, 2.3f, 2.3f, 0.5f, 0.5f, 2.3f, 2.3f, 0.5f, 0.5f, 2.3f, 2.3f, 0.5f, 0.5f, 2.3f, 2.3f, 0.5f, 0.5f, 2.3f, 2.3f, 0.5f, 0.5f, 2.3f, 2.3f, 0.5f, 0.5f, 2.3f, 2.3f, 0.5f, 0.5f, 2.3f, 2.3f,
1371 2.0f, 2.0f, -0.4f, -0.4f, 2.0f, 2.0f, -0.4f, -0.4f, 2.0f, 2.0f, -0.4f, -0.4f, 2.0f, 2.0f, -0.4f, -0.4f, 2.0f, 2.0f, -0.4f, -0.4f, 2.0f, 2.0f, -0.4f, -0.4f, 2.0f, 2.0f, -0.4f, -0.4f, 2.0f, 2.0f, -0.4f, -0.4f,
1372 1.5f, 1.5f, 2.0f, 2.0f, 1.5f, 1.5f, 2.0f, 2.0f, 1.5f, 1.5f, 2.0f, 2.0f, 1.5f, 1.5f, 2.0f, 2.0f, 1.5f, 1.5f, 2.0f, 2.0f, 1.5f, 1.5f, 2.0f, 2.0f, 1.5f, 1.5f, 2.0f, 2.0f, 1.5f, 1.5f, 2.0f, 2.0f,
1373 -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f,
1374 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f,
1375 -1.0f, -1.0f, 3.0f, 3.0f, -1.0f, -1.0f, 3.0f, 3.0f, -1.0f, -1.0f, 3.0f, 3.0f, -1.0f, -1.0f, 3.0f, 3.0f, -1.0f, -1.0f, 3.0f, 3.0f, -1.0f, -1.0f, 3.0f, 3.0f, -1.0f, -1.0f, 3.0f, 3.0f, -1.0f, -1.0f, 3.0f, 3.0f,
1376 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
1377 0.5f, 0.5f, 1.5f, 1.5f, 0.5f, 0.5f, 1.5f, 1.5f, 0.5f, 0.5f, 1.5f, 1.5f, 0.5f, 0.5f, 1.5f, 1.5f, 0.5f, 0.5f, 1.5f, 1.5f, 0.5f, 0.5f, 1.5f, 1.5f, 0.5f, 0.5f, 1.5f, 1.5f, 0.5f, 0.5f, 1.5f, 1.5f,
1378 -1.0f, -1.0f, 2.3f, 2.3f, -1.0f, -1.0f, 2.3f, 2.3f, -1.0f, -1.0f, 2.3f, 2.3f, -1.0f, -1.0f, 2.3f, 2.3f, -1.0f, -1.0f, 2.3f, 2.3f, -1.0f, -1.0f, 2.3f, 2.3f, -1.0f, -1.0f, 2.3f, 2.3f, -1.0f, -1.0f, 2.3f, 2.3f,
1379 1.0f, 1.0f, -0.4f, -0.4f, 1.0f, 1.0f, -0.4f, -0.4f, 1.0f, 1.0f, -0.4f, -0.4f, 1.0f, 1.0f, -0.4f, -0.4f, 1.0f, 1.0f, -0.4f, -0.4f, 1.0f, 1.0f, -0.4f, -0.4f, 1.0f, 1.0f, -0.4f, -0.4f, 1.0f, 1.0f, -0.4f, -0.4f,
1380 0.5f, 0.5f, 2.0f, 2.0f, 0.5f, 0.5f, 2.0f, 2.0f, 0.5f, 0.5f, 2.0f, 2.0f, 0.5f, 0.5f, 2.0f, 2.0f, 0.5f, 0.5f, 2.0f, 2.0f, 0.5f, 0.5f, 2.0f, 2.0f, 0.5f, 0.5f, 2.0f, 2.0f, 0.5f, 0.5f, 2.0f, 2.0f,
1381 2.0f, 2.0f, -4.0f, -4.0f, 2.0f, 2.0f, -4.0f, -4.0f, 2.0f, 2.0f, -4.0f, -4.0f, 2.0f, 2.0f, -4.0f, -4.0f, 2.0f, 2.0f, -4.0f, -4.0f, 2.0f, 2.0f, -4.0f, -4.0f, 2.0f, 2.0f, -4.0f, -4.0f, 2.0f, 2.0f, -4.0f, -4.0f,
1382 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f, 1.5f, 1.5f, 1.0f, 1.0f,
1383 -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f,
1386 topology topology(input_layout("input", input.get_layout()));
1388 std::vector<primitive_id> weights_vec;
1389 std::vector<primitive_id> bias_vec;
1391 for (uint32_t i = 0; i < 8; i++)
1393 auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
1394 auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
1395 auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
1396 auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
1398 set_values(weights1, { -2.0f, 0.5f, 3.5f, 1.5f });
1399 set_values(biases1, { 2.0f });
1400 set_values(weights2, { -1.2f, 1.5f, 0.5f, -0.5f });
1401 set_values(biases2, { -1.0f });
1403 primitive_id weights_id = "weights_" + std::to_string(i);
1404 primitive_id weights2_id = "weights2_" + std::to_string(i);
1405 primitive_id bias_id = "biases_" + std::to_string(i);
1406 primitive_id bias2_id = "biases2_" + std::to_string(i);
1408 weights_vec.push_back(weights_id);
1409 weights_vec.push_back(weights2_id);
1410 bias_vec.push_back(bias_id);
1411 bias_vec.push_back(bias2_id);
1414 data(weights_id, weights1),
1415 data(bias_id, biases1),
1416 data(weights2_id, weights2),
1417 data(bias2_id, biases2)
1433 network network(engine, topology);
1434 network.set_input_data("input", input);
1436 auto outputs = network.execute();
1437 EXPECT_EQ(outputs.size(), size_t(1));
1438 EXPECT_EQ(outputs.begin()->first, "conv");
1440 auto output_prim = outputs.begin()->second.get_memory();
1442 auto output_ptr = output_prim.pointer<float>();
1444 std::vector<float> expected_output_vec = {
1445 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f,
1446 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f,
1447 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f,
1448 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f,
1451 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1453 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1457 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthwise_sep_opt_bfyx) {
1458 // Test for depthwise separable optimization, there are 16 weights and biases (split 16)
1459 // data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2 but with batch 1
1462 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 16, 4, 4 } });
1465 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
1466 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
1467 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
1468 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
1469 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
1470 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
1471 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
1472 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
1473 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
1474 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
1475 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
1476 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
1477 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
1478 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
1479 -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
1480 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
1483 topology topology(input_layout("input", input.get_layout()));
1485 std::vector<primitive_id> weights_vec;
1486 std::vector<primitive_id> bias_vec;
1488 for (uint32_t i = 0; i < 8; i++)
1490 auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
1491 auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
1492 auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
1493 auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
1495 set_values(weights1, { -2.0f, 0.5f, 3.5f, 1.5f });
1496 set_values(biases1, { 2.0f });
1497 set_values(weights2, { -1.2f, 1.5f, 0.5f, -0.5f });
1498 set_values(biases2, { -1.0f });
1500 primitive_id weights_id = "weights_" + std::to_string(i);
1501 primitive_id weights2_id = "weights2_" + std::to_string(i);
1502 primitive_id bias_id = "biases_" + std::to_string(i);
1503 primitive_id bias2_id = "biases2_" + std::to_string(i);
1505 weights_vec.push_back(weights_id);
1506 weights_vec.push_back(weights2_id);
1507 bias_vec.push_back(bias_id);
1508 bias_vec.push_back(bias2_id);
1511 data(weights_id, weights1),
1512 data(bias_id, biases1),
1513 data(weights2_id, weights2),
1514 data(bias2_id, biases2)
1530 network network(engine, topology);
1531 network.set_input_data("input", input);
1533 auto outputs = network.execute();
1534 EXPECT_EQ(outputs.size(), size_t(1));
1535 EXPECT_EQ(outputs.begin()->first, "conv");
1537 auto output_prim = outputs.begin()->second.get_memory();
1539 auto output_ptr = output_prim.pointer<float>();
1541 std::vector<float> expected_output_vec = {
1542 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
1543 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
1544 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
1545 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
1546 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
1547 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
1548 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
1549 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
1552 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1554 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1558 TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x4x1_nopad_split2) {
1594 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 4, 1, 1 } });
1595 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 4 } });
1596 auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } });
1597 auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 1 } });
1598 auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } });
1599 auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 1 } });
1602 1.5f, 0.5f, 0.0f, -0.5f
1604 set_values(weights1, { -2.0f, -0.5f, 1.0f, 2.0f });
1605 set_values(biases1, { 1.0f, 5.0f });
1606 set_values(weights2, { 4.0f, 1.5f, 2.0f, 0.5f });
1607 set_values(biases2, { -1.0f, 2.5f });
1610 input_layout("input", input.get_layout()),
1611 data("weights1", weights1),
1612 data("biases1", biases1),
1613 data("weights2", weights2),
1614 data("biases2", biases2),
1618 { "weights1", "weights2" },
1619 { "biases1", "biases2" },
1625 network network(engine, topology);
1626 network.set_input_data("input", input);
1628 auto outputs = network.execute();
1629 EXPECT_EQ(outputs.size(), size_t(1));
1630 EXPECT_EQ(outputs.begin()->first, "conv");
1632 auto output_prim = outputs.begin()->second.get_memory();
1634 auto output_ptr = output_prim.pointer<float>();
1636 EXPECT_FLOAT_EQ(-2.25f, get_value<float>(output_ptr, 0));
1637 EXPECT_FLOAT_EQ(7.5f, get_value<float>(output_ptr, 1));
1638 EXPECT_FLOAT_EQ(-1.75f, get_value<float>(output_ptr, 2));
1639 EXPECT_FLOAT_EQ(2.25f, get_value<float>(output_ptr, 3));
1642 TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x2x1_nopad_split2) {
1676 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 2, 1, 1 } });
1677 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 4 } });
1678 auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
1679 auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 1 } });
1680 auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
1681 auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 1 } });
1686 set_values(weights1, { -2.0f, 1.0f });
1687 set_values(biases1, { 1.0f, 5.0f });
1688 set_values(weights2, { 4.0f, 2.0f });
1689 set_values(biases2, { -1.0f, 2.5f });
1692 input_layout("input", input.get_layout()),
1693 data("weights1", weights1),
1694 data("biases1", biases1),
1695 data("weights2", weights2),
1696 data("biases2", biases2),
1700 { "weights1", "weights2" },
1701 { "biases1", "biases2" },
1707 network network(engine, topology);
1708 network.set_input_data("input", input);
1710 auto outputs = network.execute();
1711 EXPECT_EQ(outputs.size(), size_t(1));
1712 EXPECT_EQ(outputs.begin()->first, "conv");
1714 auto output_prim = outputs.begin()->second.get_memory();
1716 auto output_ptr = output_prim.pointer<float>();
1718 EXPECT_FLOAT_EQ(-2.0f, get_value<float>(output_ptr, 0));
1719 EXPECT_FLOAT_EQ(6.5f, get_value<float>(output_ptr, 1));
1720 EXPECT_FLOAT_EQ(1.0f, get_value<float>(output_ptr, 2));
1721 EXPECT_FLOAT_EQ(3.5f, get_value<float>(output_ptr, 3));
1724 TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x4x1_filter_1x3x2x1x1_nopad_split2) {
1764 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 4, 1, 1 } });
1765 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 6 } });
1766 auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 2, 1, 1 } });
1767 auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 1 } });
1768 auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 2, 1, 1 } });
1769 auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 1 } });
1772 1.5f, 0.5f, 2.0f, -1.0f
1774 set_values(weights1, { -2.0f, 1.0f, 1.0f, 3.0f, 0.5f, 8.0f });
1775 set_values(biases1, { 1.0f, 5.0f, 3.0f });
1776 set_values(weights2, { 4.0f, -4.0f, 2.0f, 0.5f, -0.5f, 3.0f });
1777 set_values(biases2, { -1.0f, 2.5f, 2.0f });
1780 input_layout("input", input.get_layout()),
1781 data("weights1", weights1),
1782 data("biases1", biases1),
1783 data("weights2", weights2),
1784 data("biases2", biases2),
1788 { "weights1", "weights2" },
1789 { "biases1", "biases2" },
1795 network network(engine, topology);
1796 network.set_input_data("input", input);
1798 auto outputs = network.execute();
1799 EXPECT_EQ(outputs.size(), size_t(1));
1800 EXPECT_EQ(outputs.begin()->first, "conv");
1802 auto output_prim = outputs.begin()->second.get_memory();
1804 auto output_ptr = output_prim.pointer<float>();
1806 EXPECT_FLOAT_EQ(-1.5f, get_value<float>(output_ptr, 0));
1807 EXPECT_FLOAT_EQ(8.0f, get_value<float>(output_ptr, 1));
1808 EXPECT_FLOAT_EQ(7.75f, get_value<float>(output_ptr, 2));
1809 EXPECT_FLOAT_EQ(11.0f, get_value<float>(output_ptr, 3));
1810 EXPECT_FLOAT_EQ(6.0f, get_value<float>(output_ptr, 4));
1811 EXPECT_FLOAT_EQ(-2.0f, get_value<float>(output_ptr, 5));
1815 TEST(convolution_gpu, trivial_convolution_relu) {
1841 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 4 } });
1842 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 2, 2 }, 1 } });
1843 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
1844 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1847 -0.5f, 1.0f, 0.5f, 2.0f,
1848 1.5f, -0.5f, 0.0f, -1.0f,
1849 0.5f, 0.5f, -1.0f, 1.0f,
1850 0.5f, 2.0f, 1.5f, -0.5f
1852 set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
1853 set_values(biases, { -2.0f });
1856 input_layout("input", input.get_layout()),
1857 data("weights", weights),
1858 data("biases", biases),
1871 network network(engine, topology);
1872 network.set_input_data("input", input);
1874 auto outputs = network.execute();
1875 EXPECT_EQ(outputs.size(), size_t(1));
1876 EXPECT_EQ(outputs.begin()->first, "conv");
1878 auto output_prim = outputs.begin()->second.get_memory();
1880 auto output_ptr = output_prim.pointer<float>();
1882 EXPECT_FLOAT_EQ(4.0f, get_value<float>(output_ptr, 0));
1883 EXPECT_FLOAT_EQ(0.0f, get_value<float>(output_ptr, 1));
1884 EXPECT_FLOAT_EQ(2.0f, get_value<float>(output_ptr, 2));
1885 EXPECT_FLOAT_EQ(5.0f, get_value<float>(output_ptr, 3));
1888 TEST(convolution_gpu, relu_with_negative_slope) {
1894 // Negative Slope : 0.1
1915 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 4 } });
1916 //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 2, 2 }, 1 } });
1917 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
1918 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1921 -0.5f, 1.0f, 0.5f, 2.0f,
1922 1.5f, -0.5f, 0.0f, -1.0f,
1923 0.5f, 0.5f, -1.0f, 1.0f,
1924 0.5f, 2.0f, 1.5f, -0.5f
1926 set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
1927 set_values(biases, { -2.0f });
1930 input_layout("input", input.get_layout()),
1931 data("weights", weights),
1932 data("biases", biases),
1945 network network(engine, topology);
1946 network.set_input_data("input", input);
1948 auto outputs = network.execute();
1949 EXPECT_EQ(outputs.size(), size_t(1));
1950 EXPECT_EQ(outputs.begin()->first, "conv");
1952 auto output_prim = outputs.begin()->second.get_memory();
1954 auto output_ptr = output_prim.pointer<float>();
1956 EXPECT_FLOAT_EQ(4.0f, get_value<float>(output_ptr, 0));
1957 EXPECT_FLOAT_EQ(-0.35f, get_value<float>(output_ptr, 1));
1958 EXPECT_FLOAT_EQ(2.0f, get_value<float>(output_ptr, 2));
1959 EXPECT_FLOAT_EQ(5.0f, get_value<float>(output_ptr, 3));
1962 TEST(convolution_gpu, DISABLED_two_1x1_kernels_after_each_other) {
1966 extern const std::vector<float> conv_1x1_output;
1968 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 16, 8, 16, 16 } });
1969 auto weights_conv_1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 8, 8, 1, 1 } });
1970 auto weights_conv_2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 8, 1, 1 } });
1972 set_random_values<float>(input);
1973 set_random_values<float>(weights_conv_1);
1974 set_random_values<float>(weights_conv_2);
1976 auto inp_lay = input_layout("input", input.get_layout());
1977 auto conv_1 = convolution(
1980 { "weights_conv_1" });
1981 auto conv_2 = convolution(
1984 { "weights_conv_2" });
1988 data("weights_conv_1", weights_conv_1),
1990 data("weights_conv_2", weights_conv_2),
1995 bo.set_option(build_option::optimize_data(true));
1996 network network(engine, topology, bo);
1997 network.set_input_data("input", input);
1999 auto outputs = network.execute();
2000 EXPECT_EQ(outputs.size(), size_t(1));
2002 auto output_prim = outputs.at("conv_2").get_memory();
2004 auto output_ptr = output_prim.pointer<float>();
2005 auto output_layout = output_prim.get_layout();
2007 int y_size = output_layout.size.spatial[1];
2008 int x_size = output_layout.size.spatial[0];
2009 int f_size = output_layout.size.feature[0];
2010 int b_size = output_layout.size.batch[0];
2011 int f_offset = y_size * x_size;
2012 int b_offset = f_size * f_offset;
2013 for (int b = 0; b < b_size; ++b)
2015 for (int f = 0; f < f_size; ++f)
2017 for (int y = 0; y < y_size; ++y)
2019 for (int x = 0; x < x_size; ++x)
2021 int idx = b * b_offset + f * f_offset + y * x_size + x;
2022 EXPECT_TRUE(are_equal(conv_1x1_output[idx], get_value<float>(output_ptr, idx)));
2029 TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp32)
2031 #define USE_OLD_WEIGHTS_FORMAT 0
2033 const auto input_format = format::yxfb;
2034 #if USE_OLD_WEIGHTS_FORMAT
2035 const auto weights_format = format::bfyx;
2037 const auto weights_format = format::yxfb;
2039 const auto biases_format = format::bfyx;
2041 const int32_t batch_size = 16;
2042 const int32_t input_feature_count = 2;
2043 const int32_t output_feature_count = 16;
2045 const int32_t stride_x = 2;
2046 const int32_t stride_y = 2;
2048 const int32_t input_x = 4;
2049 const int32_t input_y = 4;
2050 const int32_t weights_x = 2;
2051 const int32_t weights_y = 2;
2052 const int32_t output_x = (input_x - weights_x) / stride_x + 1;
2053 const int32_t output_y = (input_y - weights_y) / stride_y + 1;
2057 auto input_size = tensor( batch_size, input_feature_count, input_x, input_y );
2058 auto input = memory::allocate(engine, { data_types::f32, input_format, input_size });
2059 auto weights_size = tensor( output_feature_count, input_feature_count, weights_x, weights_y );
2060 auto weights = memory::allocate(engine, { data_types::f32, weights_format, weights_size });
2061 auto biases = memory::allocate(engine, { data_types::f32, biases_format, {1,1,output_feature_count,1}});
2063 //auto output = memory::allocate({output_format, {batch_size, {output_x, output_y}, output_feature_count}});
2067 std::vector<float> input_vals_template {
2068 0.25f, 0.50f, 0.75f, 1.00f,
2069 1.25f, 1.50f, 1.75f, 2.00f,
2070 2.25f, 2.50f, 2.75f, 3.00f,
2071 3.25f, 3.50f, 3.75f, 4.00f,
2073 input_vals_template.resize(input_y * input_x);
2075 std::vector<float> input_vals;
2076 input_vals.reserve(input_y * input_x * input_feature_count * batch_size);
2077 for (uint32_t yxi = 0; yxi < input_y * input_x; ++yxi)
2079 for (uint32_t ifi = 0; ifi < input_feature_count; ++ifi)
2081 for (uint32_t bi = 0; bi < batch_size; ++bi)
2083 input_vals.push_back((bi * input_feature_count + ifi + 1) * input_vals_template[yxi]);
2087 set_values(input, input_vals);
2091 std::vector<float> weights_vals_template {
2095 weights_vals_template.resize(weights_y * weights_x);
2097 std::vector<float> weights_vals;
2098 weights_vals.reserve(weights_y * weights_x * input_feature_count * output_feature_count);
2099 #if USE_OLD_WEIGHTS_FORMAT
2100 for (uint32_t ofi = 0; ofi < output_feature_count; ++ofi)
2102 for (uint32_t ifi = 0; ifi < input_feature_count; ++ifi)
2104 for (uint32_t yxi = 0; yxi < weights_y * weights_x; ++yxi)
2106 weights_vals.push_back((ofi * input_feature_count + ifi + 1) * weights_vals_template[yxi]);
2111 for (uint32_t yxi = 0; yxi < weights_y * weights_x; ++yxi)
2113 for (uint32_t ifi = 0; ifi < input_feature_count; ++ifi)
2115 for (uint32_t ofi = 0; ofi < output_feature_count; ++ofi)
2117 weights_vals.push_back((ofi * input_feature_count + ifi + 1) * weights_vals_template[yxi]);
2122 set_values(weights, weights_vals);
2126 std::vector<float> biases_vals;
2127 biases_vals.reserve(output_feature_count);
2128 for (uint32_t ofi = 0; ofi < output_feature_count; ++ofi)
2130 biases_vals.push_back(ofi * 1.0f);
2132 set_values(biases, biases_vals);
2136 std::vector<float> output_vals_template {
2140 output_vals_template.resize(output_y * output_x);
2142 std::vector<float> output_vals;
2143 output_vals.reserve(output_y * output_x * output_feature_count * batch_size);
2144 for (uint32_t yxi = 0; yxi < output_y * output_x; ++yxi)
2146 for (uint32_t ofi = 0; ofi < output_feature_count; ++ofi)
2148 for (uint32_t bi = 0; bi < batch_size; ++bi)
2150 uint32_t template_factor = input_feature_count * input_feature_count * input_feature_count * bi * ofi +
2151 input_feature_count * input_feature_count * (input_feature_count + 1) / 2 * (bi + ofi) +
2152 input_feature_count * (input_feature_count + 1) * (2 * input_feature_count + 1) / 6;
2153 float bias_factor = ofi * 1.0f;
2155 output_vals.push_back(template_factor * output_vals_template[yxi] + bias_factor);
2160 // Computing convolution.
2162 input_layout("input", input.get_layout()),
2163 data("weights", weights),
2164 data("biases", biases),
2170 { 1,1,stride_x,stride_y },
2177 network network(engine, topology);
2178 network.set_input_data("input", input);
2180 auto outputs = network.execute();
2181 EXPECT_EQ(outputs.size(), size_t(1));
2182 EXPECT_EQ(outputs.begin()->first, "conv");
2184 auto output_prim = outputs.begin()->second.get_memory();
2186 auto output_ptr = output_prim.pointer<float>();
2190 for (uint32_t yxi = 0; yxi < output_y * output_x; ++yxi)
2192 for (uint32_t ofi = 0; ofi < output_feature_count; ++ofi)
2194 for (uint32_t bi = 0; bi < batch_size; ++bi, ++i)
2196 auto equal = are_equal(output_vals[i], get_value<float>(output_ptr, i));
2200 std::cout << "Failed at position (" << yxi << ", output feature = " << ofi << ", batch = " << bi << "): "
2201 << output_vals[i] << " != " << get_value<float>(output_ptr, i) << std::endl;
2208 #undef USE_OLD_WEIGHTS_FORMAT
2211 template<typename T>
2212 void quantize_weights(cldnn::memory& weights, cldnn::memory& w_qf)
2216 auto batch_pitch = weights.get_layout().get_pitches().batch[0];
2217 auto ptr = weights.pointer<T>();
2218 auto wqf_ptr = w_qf.pointer<float>();
2220 for (int ofm = 0; ofm < weights.get_layout().size.batch[0]; ofm++)
2223 for (int w = 0; w < batch_pitch; w++)
2224 if (max < abs(ptr[ofm* batch_pitch + w]))
2225 max = abs(ptr[ofm* batch_pitch + w]);
2228 max = (T)1; // do not quantize
2230 for (int w = 0; w < batch_pitch; w++)
2231 ptr[ofm* batch_pitch + w] = (T)round((float)ptr[ofm* batch_pitch + w] * 127.0f / (float)max);
2232 wqf_ptr[ofm] = max/127.0f;
2235 template<typename T>
2236 void calibrate(const cldnn::memory& output, cldnn::memory& calibrations)
2240 auto feature_pitch = output.get_layout().get_pitches().feature[0];
2241 auto ptr = output.pointer<T>();
2242 auto calibrations_ptr = calibrations.pointer<float>();
2244 for (int ofm = 0; ofm < output.get_layout().size.feature[0]; ofm++)
2247 for (int w = 0; w < feature_pitch; w++)
2248 if (max < abs(ptr[ofm* feature_pitch + w]))
2249 max = abs(ptr[ofm* feature_pitch + w]);
2250 calibrations_ptr[ofm] = 127.0f / max;
2254 template<typename T>
2255 T max_abs(const cldnn::memory& mem)
2260 auto ptr = mem.pointer<T>();
2267 template<typename T>
2268 void apply_calibration_on_weights(cldnn::memory& weights, cldnn::memory& qf)
2270 auto batch_pitch = weights.get_layout().get_pitches().batch[0];
2271 auto ptr = weights.pointer<T>();
2272 auto wqf_ptr = qf.pointer<float>();
2273 tensor w_size = weights.get_layout().size;
2275 for (int ofm = 0; ofm < w_size.batch[0]; ofm++)
2276 for (int ifm = 0; ifm < w_size.feature[0]; ifm++)
2277 for (int xy = 0; xy < w_size.spatial[0] * w_size.spatial[1]; xy++)
2279 ptr[index] = ptr[index] / wqf_ptr[ifm];
2284 cldnn::memory create_int8_weights(engine engine, cldnn::memory& in_weights)
2286 auto layout = in_weights.get_layout();
2287 auto out_weights = memory::allocate(engine, { data_types::i8, layout.format, layout.size });
2288 auto in = in_weights.pointer<float>();
2289 auto out = out_weights.pointer<char>();
2292 out[indx++] = (char) a;
2296 void add_primitives(const engine& engine, topology& topology)
2298 auto weights = memory::allocate(engine, { data_types::i8, format::bfyx,{ 2, 1, 3, 2 } });
2300 std::vector<char> weights_values = { 1, 2, 1, 2, 1, 2, 19, 17, -1, -10, 32, 23 };
2301 set_values<char>(weights, weights_values);
2302 cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
2303 auto weigths_qfs = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
2304 set_values(biases, { 1.0f, -8.0f });
2307 data("weights", weights),
2308 data("biases", biases),
2309 data("w_qfs", weigths_qfs),
2310 convolution("conv", "input", { "weights" }, { "biases" }, { 0, 0, 1, 2 }, { 0, 0, 0, 0 }, { 1, 1, 1, 1 }, true));
2313 TEST(convolution_f32_fw_gpu, byte_activation) {
2341 engine_configuration eng_conf(false, false, false, "", "", true, "", "kernels");
2342 engine engine{ eng_conf };
2343 auto input = memory::allocate(engine, { data_types::i8, format::bfyx,{ 1, 1, 5, 4 } });
2345 VVVF<char> output_vec = {
2356 opts.set_option(build_option::optimize_data(true));
2357 opts.set_option(build_option::graph_dumps_dir("graph"));
2359 set_values<char>(input, { 1, 2, -3, 4, -5, 2, -2, 3, -4, 6, -3, 3, -3, 5, -1, -1, -1, -1, -1, -1 });
2362 input_layout("input", input.get_layout()));
2363 add_primitives(engine, topology);
2364 network network(engine, topology, opts);
2365 network.set_input_data("input", input);
2367 auto outputs = network.execute();
2368 EXPECT_EQ(outputs.begin()->first, "conv");
2370 auto output_memory = outputs.at("conv").get_memory();
2371 auto output_layout = output_memory.get_layout();
2372 auto output_ptr = output_memory.pointer<char>();
2374 int y_size = output_layout.size.spatial[1];
2375 int x_size = output_layout.size.spatial[0];
2376 int f_size = output_layout.size.feature[0];
2377 int b_size = output_layout.size.batch[0];
2378 EXPECT_EQ(output_layout.format, format::bfyx);
2379 EXPECT_EQ(y_size, 2);
2380 EXPECT_EQ(x_size, 3);
2381 EXPECT_EQ(f_size, 2);
2382 EXPECT_EQ(b_size, 1);
2383 for (int f = 0; f < f_size; f++)
2384 for (int y = 0; y < y_size; ++y) {
2385 for (int x = 0; x < x_size; ++x) {
2386 EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f*y_size*x_size + y * x_size + x]), 3.0f);
2391 TEST(convolution_f32_fw_gpu, quantized_convolution_low_prec_single_ofq) {
2422 auto input_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 4 } });
2423 auto weights_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 2 } });
2424 cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
2425 auto weigths_qfs = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
2427 std::vector<float> weights_values_f = { 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 19.0, 17.0, -1.0, -10.0, 32.0, 23.0 };
2428 set_values<float>(input_f, { 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 2.0, 3.0, 4.0, 6.0, 3.0, 3.0, 3.0, 5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 });
2429 set_values<float>(weights_f, weights_values_f);
2431 set_values(biases, { 1.0f, -8.0f });
2432 VVVF<float> output_vec = {
2434 { 21.0f, 28.0f, 39.0f },
2435 { 18.0f, 20.0f, 20.0f }
2438 { 155.0f, 245.0f, 348.0f },
2439 { 142.0f, 140.0f, 178.0f }
2442 topology topology_f(
2443 input_layout("input_f", input_f.get_layout()),
2444 data("weights_f", weights_f),
2445 data("biases", biases),
2446 convolution("conv_f", "input_f", { "weights_f" }, { "biases" }, { 0, 0, 1, 2 }));
2449 opts.set_option(build_option::optimize_data(true));
2450 network network_f(engine, topology_f, opts);
2451 network_f.set_input_data("input_f", input_f);
2453 auto outputs_f = network_f.execute();
2454 EXPECT_EQ(outputs_f.begin()->first, "conv_f");
2456 auto output_memory_f = outputs_f.at("conv_f").get_memory();
2457 auto output_ptr_f = output_memory_f.pointer<float>();
2459 auto input = memory::allocate(engine, { data_types::i8, format::bfyx,{ 1, 1, 5, 4 } });
2460 auto weights = memory::allocate(engine, { data_types::i8, format::bfyx,{ 2, 1, 3, 2 } });
2462 float o_qf = 127.0f / max_abs<float>(output_memory_f);
2464 std::vector<char> weights_values = { 1, 2, 1, 2, 1, 2, 19, 17, -1, -10, 32, 23 };
2465 set_values<char>(input, { 1, 2, 3, 4, 5, 2, 2, 3, 4, 6, 3, 3, 3, 5, 1, 1, 1, 1, 1, 1 });
2466 set_values<char>(weights, weights_values);
2467 set_values<float>(weigths_qfs, { 1.0f, 1.0f });
2468 quantize_weights<char>(weights, weigths_qfs);
2471 input_layout("input", input.get_layout()),
2472 data("weights", weights),
2473 data("biases", biases),
2474 data("w_qfs",weigths_qfs),
2475 convolution("conv", "input", { "weights" }, { "biases" }, { "w_qfs" },i_qf, o_qf, { 0, 0, 1, 2 }));
2477 network network(engine, topology, opts);
2478 network.set_input_data("input", input);
2480 auto outputs = network.execute();
2481 EXPECT_EQ(outputs.begin()->first, "conv");
2483 auto output_memory = outputs.at("conv").get_memory();
2484 auto output_layout = output_memory.get_layout();
2485 auto output_ptr = output_memory.pointer<char>();
2487 int y_size = output_layout.size.spatial[1];
2488 int x_size = output_layout.size.spatial[0];
2489 int f_size = output_layout.size.feature[0];
2490 int b_size = output_layout.size.batch[0];
2491 EXPECT_EQ(output_layout.format, format::bfyx);
2492 EXPECT_EQ(y_size, 2);
2493 EXPECT_EQ(x_size, 3);
2494 EXPECT_EQ(f_size, 2);
2495 EXPECT_EQ(b_size, 1);
2496 for (int f = 0; f < f_size; f++)
2497 for (int y = 0; y < y_size; ++y) {
2498 for (int x = 0; x < x_size; ++x) {
2499 EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f*y_size*x_size + y * x_size + x]) / o_qf, 3.0f);
2505 TEST(convolution_f32_fw_gpu, quantized_convolution_high_prec_calib_per_ofm) {
2535 auto input_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 4 } });
2536 auto weights_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 2 } });
2537 cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
2538 auto weigths_qfs = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
2539 auto output_calibrations = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
2541 std::vector<float> weights_values_f = { 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 19.0, 17.0, -1.0, -10.0, 32.0, 23.0 };
2542 set_values<float>(input_f, { 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 2.0, 3.0, 4.0, 6.0, 3.0, 3.0, 3.0, 5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 });
2543 set_values<float>(weights_f, weights_values_f);
2544 set_values(biases, { 1.0f, -8.0f });
2545 VVVF<float> output_vec = {
2547 { 21.0f, 28.0f, 39.0f },
2548 { 18.0f, 20.0f, 20.0f }
2551 { 155.0f, 245.0f, 348.0f },
2552 { 142.0f, 140.0f, 178.0f }
2555 topology topology_f(
2556 input_layout("input_f", input_f.get_layout()),
2557 data("weights_f", weights_f),
2558 data("biases", biases),
2559 convolution("conv_f", "input_f", { "weights_f" }, { "biases" }, { 0, 0, 1, 2 }));
2562 opts.set_option(build_option::optimize_data(true));
2563 network network_f(engine, topology_f, opts);
2564 network_f.set_input_data("input_f", input_f);
2566 auto outputs_f = network_f.execute();
2567 EXPECT_EQ(outputs_f.begin()->first, "conv_f");
2569 auto output_memory_f = outputs_f.at("conv_f").get_memory();
2570 auto output_ptr_f = output_memory_f.pointer<float>();
2572 auto input = memory::allocate(engine, { data_types::i8, format::bfyx,{ 1, 1, 5, 4 } });
2573 auto weights = memory::allocate(engine, { data_types::i8, format::bfyx,{ 2, 1, 3, 2 } });
2576 std::vector<char> weights_values = { 1, 2, 1, 2, 1, 2, 19, 17, -1, -10, 32, 23 };
2577 set_values<char>(input, { 1, 2, 3, 4, 5, 2, 2, 3, 4, 6, 3, 3, 3, 5, 1, 1, 1, 1, 1, 1 });
2578 set_values<char>(weights, weights_values);
2579 set_values<float>(weigths_qfs, { 1.0f, 1.0f });
2580 quantize_weights<char>(weights, weigths_qfs);
2581 calibrate<float>(output_memory_f, output_calibrations);
2584 input_layout("input", input.get_layout()),
2585 data("weights", weights),
2586 data("biases", biases),
2587 data("w_qfs", weigths_qfs),
2588 data("out_calibrations", output_calibrations),
2589 convolution( "conv", "input", { "weights" }, { "biases" },
2590 { "w_qfs" }, { "out_calibrations" }, i_qf, { 0, 0, 1, 2 }));
2592 network network(engine, topology, opts);
2593 network.set_input_data("input", input);
2595 auto outputs = network.execute();
2596 EXPECT_EQ(outputs.begin()->first, "conv");
2598 auto output_memory = outputs.at("conv").get_memory();
2599 auto output_layout = output_memory.get_layout();
2600 auto output_ptr = output_memory.pointer<char>();
2601 auto o_qf = output_calibrations.pointer<float>();
2602 int y_size = output_layout.size.spatial[1];
2603 int x_size = output_layout.size.spatial[0];
2604 int f_size = output_layout.size.feature[0];
2605 int b_size = output_layout.size.batch[0];
2606 EXPECT_EQ(output_layout.format, format::bfyx);
2607 EXPECT_EQ(y_size, 2);
2608 EXPECT_EQ(x_size, 3);
2609 EXPECT_EQ(f_size, 2);
2610 EXPECT_EQ(b_size, 1);
2611 for (int f = 0; f < f_size; f++)
2612 for (int y = 0; y < y_size; ++y) {
2613 for (int x = 0; x < x_size; ++x) {
2614 EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f*y_size*x_size + y * x_size + x]) / o_qf[f], 3.0f);
2618 TEST(convolution_f32_fw_gpu, calibration_advance) {
2658 // 313.32 217.43 118.10
2668 auto input_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 4 } });
2669 auto weights_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 2 } });
2670 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
2671 auto w_qf = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
2672 auto weights_f_2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 3, 2, 3, 2 } });
2673 auto biases_2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
2674 auto w_qf_2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
2676 std::vector<float> weights_values_f = { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, 1.9f, 1.7f, -1.0f, -1.0f, 3.2f, 2.3f };
2677 std::vector<float> weights_values_f_2 = {
2678 1.5f, 2.3f, -1.0f, 3.0f, 5.6f, -1.0f,
2679 3.0f, 5.6f, -1.0f, 1.0f, 2.0f, 3.0f,
2681 1.9f, 1.7f, -1.0f, 1.9f, 1.7f, -1.0f,
2682 -1.0f, 3.2f, 2.3f, -1.0f, 3.2f, 2.3f,
2684 1.0f, 2.0f, -1.0f, 2.0f, 1.0f, -1.0f,
2685 -1.0f, 2.0f, 1.0f, 1.0f, 2.0f, -1.0f,};
2687 set_values<float>(input_f, { 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 2.0, 3.0, 4.0, 6.0, 3.0, 3.0, 3.0, 5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 });
2688 set_values<float>(weights_f, weights_values_f);
2689 set_values<float>(weights_f_2, weights_values_f_2);
2690 set_values(biases, { 1.0f, -8.0f });
2691 set_values(biases_2, { 2.0f, 4.0f, 0.0f });
2693 topology topology_f(
2694 input_layout("input_f", input_f.get_layout()),
2695 data("weights_f", weights_f),
2696 data("biases", biases),
2697 data("weights_f_2", weights_f_2),
2698 data("biases_2", biases_2),
2699 convolution("conv_f", "input_f", { "weights_f" }, { "biases" }, { 0, 0, 1, 2 }),
2700 convolution("conv_f_2", "conv_f", { "weights_f_2" }, { "biases_2" }, { 0, 0, 1, 1 }));
2703 opts.set_option(build_option::optimize_data(true));
2704 opts.set_option(build_option::outputs({ "conv_f", "conv_f_2" }));
2705 network network_f(engine, topology_f, opts);
2706 network_f.set_input_data("input_f", input_f);
2708 auto outputs_f = network_f.execute();
2709 auto output_memory_f = outputs_f.at("conv_f").get_memory();
2710 auto output_memory_f_2 = outputs_f.at("conv_f_2").get_memory();
2711 auto output_calibrations = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
2712 auto output_calibrations_2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
2714 calibrate<float>(output_memory_f, output_calibrations);
2715 calibrate<float>(output_memory_f_2, output_calibrations_2);
2716 apply_calibration_on_weights<float>(weights_f_2, output_calibrations);
2717 quantize_weights<float>(weights_f, w_qf);
2718 quantize_weights<float>(weights_f_2, w_qf_2);
2720 auto weights = create_int8_weights(engine, weights_f);
2721 auto weigths_2 = create_int8_weights(engine, weights_f_2);
2722 auto input = create_int8_weights(engine, input_f);
2725 input_layout("input", input.get_layout()),
2726 data("weights", weights),
2727 data("biases", biases),
2728 data("weights_2", weigths_2),
2729 data("biases_2", biases_2),
2731 data("w_qf_2", w_qf_2),
2732 data("calib", output_calibrations),
2733 data("calib_2", output_calibrations_2),
2734 convolution("conv", "input", { "weights" }, { "biases" }, { "w_qf" }, { "calib" }, 1.0f, { 0, 0, 1, 2 }),
2735 convolution("conv_2", "conv", { "weights_2" }, { "biases_2" }, { "w_qf_2" }, { "calib_2" }, 1.0f, { 0, 0, 1, 1 }));
2737 build_options opts_2;
2738 opts_2.set_option(build_option::optimize_data(true));
2739 opts_2.set_option(build_option::outputs({ "conv", "conv_2" }));
2740 cldnn::network network(engine, topology, opts_2);
2741 network.set_input_data("input", input);
2742 auto outputs = network.execute();
2743 auto output_memory = outputs.at("conv_2").get_memory();
2744 auto ref_ptr = output_memory_f_2.pointer<float>();
2745 auto test_ptr = output_memory.pointer<char>();
2746 auto& out_size = output_memory.get_layout().size;
2747 auto o_qf = output_calibrations_2.pointer<float>();
2749 for (int f = 0; f < out_size.feature[0]; f++)
2750 for (int y = 0; y < out_size.spatial[1]; ++y) {
2751 for (int x = 0; x < out_size.spatial[0]; ++x) {
2752 EXPECT_NEAR(ref_ptr[x + out_size.spatial[0] * (y + out_size.spatial[1]*f)], ((float)test_ptr[x + out_size.spatial[0] * (y + out_size.spatial[1] * f)]) / o_qf[f], 3.0f);
2758 TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp16)
2760 #define USE_OLD_WEIGHTS_FORMAT 0
2764 if (!engine.get_info().supports_fp16)
2766 std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl;
2772 const auto input_format = format::yxfb;
2773 #if USE_OLD_WEIGHTS_FORMAT
2774 const auto weights_format = format::bfyx;
2776 const auto weights_format = format::yxfb;
2778 const auto biases_format = format::bfyx;
2779 const auto output_format = input_format;
2781 const int32_t batch_size = 16;
2782 const int32_t input_feature_count = 2;
2783 const int32_t output_feature_count = 16;
2785 const int32_t stride_x = 2;
2786 const int32_t stride_y = 2;
2788 const int32_t input_x = 4;
2789 const int32_t input_y = 4;
2790 const int32_t weights_x = 2;
2791 const int32_t weights_y = 2;
2792 const int32_t output_x = (input_x - weights_x) / stride_x + 1;
2793 const int32_t output_y = (input_y - weights_y) / stride_y + 1;
2796 auto input_size = tensor( batch_size, input_feature_count, input_x, input_y );
2797 auto input = memory::allocate(engine, { data_types::f32, input_format, input_size });
2798 auto weights_size = tensor( output_feature_count, input_feature_count, weights_x, weights_y );
2799 auto weights = memory::allocate(engine, { data_types::f32, weights_format, weights_size });
2800 auto biases_size = tensor( 1,1,output_feature_count,1 );
2801 auto biases = memory::allocate(engine, { data_types::f32, biases_format, biases_size });
2802 auto output_size = tensor( batch_size, output_feature_count, output_x, output_y );
2803 //auto output = memory::allocate({output_format, {batch_size, {output_x, output_y}, output_feature_count}});
2805 //auto input_cvtd = memory::allocate(engine, { data_types::f16, input_size });
2806 //auto weights_cvtd = memory::allocate(engine, { data_types::f16, weights_size });
2807 //auto biases_cvtd = memory::allocate(engine, { data_types::f16, biases_size });
2808 //auto output_cvtd = memory::allocate({output_cvt_format, {batch_size, {output_x, output_y}, output_feature_count}});
2812 std::vector<float> input_vals_template {
2813 0.25f, 0.50f, 0.75f, 1.00f,
2814 1.25f, 1.50f, 1.75f, 2.00f,
2815 2.25f, 2.50f, 2.75f, 3.00f,
2816 3.25f, 3.50f, 3.75f, 4.00f,
2818 input_vals_template.resize(input_y * input_x);
2820 std::vector<float> input_vals;
2821 input_vals.reserve(input_y * input_x * input_feature_count * batch_size);
2822 for (uint32_t yxi = 0; yxi < input_y * input_x; ++yxi)
2824 for (uint32_t ifi = 0; ifi < input_feature_count; ++ifi)
2826 for (uint32_t bi = 0; bi < batch_size; ++bi)
2828 input_vals.push_back((bi * input_feature_count + ifi + 1) * input_vals_template[yxi]);
2832 set_values(input, input_vals);
2836 std::vector<float> weights_vals_template {
2840 weights_vals_template.resize(weights_y * weights_x);
2842 std::vector<float> weights_vals;
2843 weights_vals.reserve(weights_y * weights_x * input_feature_count * output_feature_count);
2844 #if USE_OLD_WEIGHTS_FORMAT
2845 for (uint32_t ofi = 0; ofi < output_feature_count; ++ofi)
2847 for (uint32_t ifi = 0; ifi < input_feature_count; ++ifi)
2849 for (uint32_t yxi = 0; yxi < weights_y * weights_x; ++yxi)
2851 weights_vals.push_back((ofi * input_feature_count + ifi + 1) * weights_vals_template[yxi]);
2856 for (uint32_t yxi = 0; yxi < weights_y * weights_x; ++yxi)
2858 for (uint32_t ifi = 0; ifi < input_feature_count; ++ifi)
2860 for (uint32_t ofi = 0; ofi < output_feature_count; ++ofi)
2862 weights_vals.push_back((ofi * input_feature_count + ifi + 1) * weights_vals_template[yxi]);
2867 set_values(weights, weights_vals);
2871 std::vector<float> biases_vals;
2872 biases_vals.reserve(output_feature_count);
2873 for (uint32_t ofi = 0; ofi < output_feature_count; ++ofi)
2875 biases_vals.push_back(ofi * 1.0f);
2877 set_values(biases, biases_vals);
2881 std::vector<float> output_vals_template {
2885 output_vals_template.resize(output_y * output_x);
2887 std::vector<float> output_vals;
2888 output_vals.reserve(output_y * output_x * output_feature_count * batch_size);
2889 for (uint32_t yxi = 0; yxi < output_y * output_x; ++yxi)
2891 for (uint32_t ofi = 0; ofi < output_feature_count; ++ofi)
2893 for (uint32_t bi = 0; bi < batch_size; ++bi)
2895 uint32_t template_factor = input_feature_count * input_feature_count * input_feature_count * bi * ofi +
2896 input_feature_count * input_feature_count * (input_feature_count + 1) / 2 * (bi + ofi) +
2897 input_feature_count * (input_feature_count + 1) * (2 * input_feature_count + 1) / 6;
2898 float bias_factor = ofi * 1.0f;
2900 output_vals.push_back(template_factor * output_vals_template[yxi] + bias_factor);
2905 //auto expected_float = memory::allocate(engine, { data_types::f32,{ format::x,{ static_cast<int32_t>(output_vals.size()) } } });
2906 //auto expected_half = memory::allocate(engine, { data_types::f16,{ format::x,{ static_cast<int32_t>(output_vals.size()) } } });
2907 //auto expected = memory::allocate(engine, { data_types::f32,{ format::x,{ static_cast<int32_t>(output_vals.size()) } } });
2909 // set_values(expected_float, output_vals);
2910 // auto cvt_expected_f32_f16 = reorder::create({expected_float, expected_half});
2911 // auto cvt_expected_f16_f32 = reorder::create({expected_half, expected});
2912 // execute({cvt_expected_f32_f16, cvt_expected_f16_f32}).wait();
2914 // auto expected_ptr = expected.as<const memory&>().pointer<float>();
2917 // Computing convolution.
2919 input_layout("input", input.get_layout()),
2920 reorder("cvt_input", "input", {data_types::f16, input_format, input_size}),
2921 data("weights", weights),
2922 reorder("cvt_weights", "weights", {data_types::f16, weights_format, weights_size}),
2923 data("biases", biases),
2924 reorder("cvt_biases", "biases", {data_types::f16, biases_format, biases_size}),
2930 { 1,1,stride_x,stride_y }),
2931 reorder("output", "conv", {data_types::f32, output_format, output_size})
2934 network network(engine, topology);
2935 network.set_input_data("input", input);
2937 auto outputs = network.execute();
2938 EXPECT_EQ(outputs.size(), size_t(1));
2939 EXPECT_EQ(outputs.begin()->first, "output");
2941 auto output_prim = outputs.begin()->second.get_memory();
2943 auto output_ptr = output_prim.pointer<float>();
2947 for (uint32_t yxi = 0; yxi < output_y * output_x; ++yxi)
2949 for (uint32_t ofi = 0; ofi < output_feature_count; ++ofi)
2951 for (uint32_t bi = 0; bi < batch_size; ++bi, ++i)
2953 auto equal = are_equal(output_vals[i] /*get_value(expected_ptr, i)*/, output_ptr[i], 0.002f);
2957 std::cout << "Failed at position (" << yxi << ", output feature = " << ofi << ", batch = " << bi << "): "
2958 << output_vals[i] /*get_value(expected_ptr, i)*/ << " != " << output_ptr[i] << std::endl;
2965 #undef USE_OLD_WEIGHTS_FORMAT
2968 class convolution_test : public tests::generic_test
2973 static void TearDownTestCase()
2975 for (auto generic_params : all_generic_params)
2977 delete generic_params;
2980 for (auto layer_params : all_layer_params)
2982 delete layer_params;
2986 static std::vector<cldnn::primitive*> generate_specific_test_params()
2988 // TODO: check split
2990 // TODO: check convolution without bias
2992 const std::vector<primitive_id>& weights = { "input1" };
2993 const std::vector<primitive_id>& bias = { "input2" };
2995 std::vector<tensor> stride_sizes = { tensor(1, 1, 1, 1), tensor(1, 1, 2, 3), tensor(1, 1, 4, 1), tensor(1, 1, 5, 5) };
2996 std::vector<tensor> dilation_sizes = { tensor(1, 1, 1, 1), tensor(1, 1, 5, 4), tensor(1, 1, 1, 3), tensor(1, 1, 7, 2) };
2997 std::vector<tensor> input_offset_sizes = { tensor(0, 0, 0, 0), tensor(0, 0, 2, 2), tensor(0, 0, -5, -2), tensor(0, 0, 3, -3) };
2999 std::vector<bool> activations = { false, true };
3000 std::vector<float> activation_slopes = { 0.f, -2.3f };
3003 all_layer_params.push_back(new convolution("convolution", "input0", weights, bias, stride_sizes[0], input_offset_sizes[0], dilation_sizes[0], activations[0], activation_slopes[0]));
3004 all_layer_params.push_back(new convolution("convolution", "input0", weights, bias, stride_sizes[1], input_offset_sizes[1], dilation_sizes[1], activations[0], activation_slopes[0]));
3005 all_layer_params.push_back(new convolution("convolution", "input0", weights, bias, stride_sizes[2], input_offset_sizes[2], dilation_sizes[2], activations[1], activation_slopes[0]));
3006 all_layer_params.push_back(new convolution("convolution", "input0", weights, bias, stride_sizes[3], input_offset_sizes[3], dilation_sizes[3], activations[1], activation_slopes[1]));
3009 all_layer_params.push_back(new convolution("convolution", "reorder0", weights, bias, stride_sizes[1], input_offset_sizes[1], dilation_sizes[1], activations[0], activation_slopes[0]));
3010 all_layer_params.push_back(new convolution("convolution", "reorder0", weights, bias, stride_sizes[3], input_offset_sizes[3], dilation_sizes[3], activations[1], activation_slopes[1]));
3013 all_layer_params.push_back(new convolution("convolution", "input0", weights, bias, stride_sizes[1], input_offset_sizes[1], dilation_sizes[1], activations[0], activation_slopes[0], { { 0, 0, 2, 4 },{ 0, 0, 0, 19 } }));
3014 all_layer_params.push_back(new convolution("convolution", "input0", weights, bias, stride_sizes[2], input_offset_sizes[2], dilation_sizes[2], activations[1], activation_slopes[0], { { 0, 0, 1, 0 },{ 0, 0, 13, 9 } }));
3016 // Input + Output padding
3017 all_layer_params.push_back(new convolution("convolution", "reorder0", weights, bias, stride_sizes[0], input_offset_sizes[0], dilation_sizes[0], activations[0], activation_slopes[0], { { 0, 0, 1, 5 },{ 0, 0, 19, 4 } }));
3018 all_layer_params.push_back(new convolution("convolution", "reorder0", weights, bias, stride_sizes[3], input_offset_sizes[3], dilation_sizes[3], activations[1], activation_slopes[1], { { 0, 0, 1, 2 },{ 0, 0, 3, 4 } }));
3020 return all_layer_params;
3023 static std::vector<std::tuple<tests::test_params*, cldnn::primitive*>> generate_all_test_params()
3025 generate_specific_test_params();
3027 std::vector<cldnn::format> input_formats = { cldnn::format::bfyx, cldnn::format::yxfb };
3028 std::vector<cldnn::format> weights_formats = { cldnn::format::bfyx, cldnn::format::yxfb };
3030 std::vector<int32_t> output_features_sizes = { 1, 3, 16 };
3031 std::vector<cldnn::tensor> kernel_sizes = { tensor(1, 1, 1, 1), tensor(1, 1, 4, 7), tensor(1, 1, 5, 3) };
3033 std::vector<tensor> input_tensor_size = { tensor(1, 5, 59, 72), tensor(8, 3, 63, 56), tensor(16, 2, 50, 50), tensor(32, 1, 44, 62) };
3035 for (cldnn::data_types data_type : test_data_types())
3037 for (cldnn::format input_format : input_formats)
3039 for (cldnn::format weights_format : weights_formats)
3041 cldnn::build_options network_build_options;
3042 if (input_format == cldnn::format::bfyx)
3044 network_build_options.set_option(cldnn::build_option::optimize_data(true));
3046 for (cldnn::tensor input_size : input_tensor_size)
3048 for (cldnn::tensor kernel_size : kernel_sizes)
3050 for (auto output_features : output_features_sizes)
3052 test_params* params = new test_params(data_type, input_format, input_size.batch[0], input_size.feature[0], tensor(1, 1, input_size.spatial[0], input_size.spatial[1]), network_build_options);
3053 int input_features = params->input_layouts[0].size.feature[0];
3054 params->input_layouts.push_back(cldnn::layout(params->data_type, weights_format, cldnn::tensor(output_features, input_features, kernel_size.spatial[0], kernel_size.spatial[1]))); // weights
3055 params->input_layouts.push_back(cldnn::layout(params->data_type, params->fmt, cldnn::tensor(1, 1, output_features, 1))); // biases
3056 all_generic_params.push_back(params);
3064 // Create all the combinations for the test.
3065 for (cldnn::primitive* layer_param : all_layer_params)
3067 for (tests::test_params* test_param : all_generic_params)
3069 all_test_params.push_back(std::make_tuple(test_param, layer_param));
3073 return all_test_params;
3076 virtual bool is_format_supported(cldnn::format format)
3078 return ((format == cldnn_format_type::cldnn_format_bfyx) || (format == cldnn_format_type::cldnn_format_yxfb));
3081 virtual cldnn::tensor get_expected_output_tensor()
3083 const cldnn::convolution* convolution = (cldnn::convolution*)layer_params;
3084 tensor input_size = generic_params->input_layouts[0].size;
3085 tensor dilation = convolution->dilation;
3086 tensor stride = convolution->stride;
3087 tensor input_offset = convolution->input_offset;
3088 tensor weights_size = generic_params->input_layouts[1].size;
3090 int kernel_extent_y = dilation.spatial[1] * (weights_size.spatial[1] - 1) + 1;
3091 int kernel_extent_x = dilation.spatial[0] * (weights_size.spatial[0] - 1) + 1;
3093 // Calculate output size
3094 int output_size_y = 1 + (input_size.spatial[1] - kernel_extent_y - 2 * input_offset.spatial[1]) / stride.spatial[1];
3095 int output_size_x = 1 + (input_size.spatial[0] - kernel_extent_x - 2 * input_offset.spatial[0]) / stride.spatial[0];
3096 int output_features = weights_size.batch[0];
3098 return cldnn::tensor(input_size.batch[0], output_features, output_size_x, output_size_y);
3101 virtual void prepare_input_for_test(std::vector<cldnn::memory>& inputs)
3103 if (generic_params->data_type == data_types::f32)
3105 prepare_input_for_test_typed<float>(inputs);
3109 prepare_input_for_test_typed<FLOAT16>(inputs);
3113 template<typename Type>
3114 void prepare_input_for_test_typed(std::vector<cldnn::memory>& inputs)
3116 int k = (generic_params->data_type == data_types::f32) ? 8 : 4;
3119 auto input = inputs[0];
3120 auto input_size = inputs[0].get_layout().size;
3121 VVVVF<Type> input_rnd = generate_random_4d<Type>(input_size.batch[0], input_size.feature[0], input_size.spatial[1], input_size.spatial[0], -2, 2, k);
3122 VF<Type> input_rnd_vec = flatten_4d<Type>(input.get_layout().format, input_rnd);
3123 set_values(input, input_rnd_vec);
3126 auto weight_input = inputs[1];
3127 auto weight_size = inputs[1].get_layout().size;
3128 VVVVF<Type> weight_rnd = generate_random_4d<Type>(weight_size.batch[0], weight_size.feature[0], weight_size.spatial[1], weight_size.spatial[0], -2, 2, k);
3129 VF<Type> weight_rnd_vec = flatten_4d<Type>(weight_input.get_layout().format, weight_rnd);
3130 set_values(weight_input, weight_rnd_vec);
3133 auto bias_input = inputs[2];
3134 auto bias_size = inputs[2].get_layout().size;
3135 VF<Type> bias_rnd = generate_random_1d<Type>(bias_size.spatial[0], -2, 2, k);
3136 set_values(bias_input, bias_rnd);
3139 template<typename Type>
3140 memory generate_reference_typed(const std::vector<cldnn::memory>& inputs)
3142 // Output reference is always bfyx.
3144 const cldnn::convolution* convolution = (cldnn::convolution*)layer_params;
3146 data_types dt = inputs[0].get_layout().data_type;
3148 tensor input_size = inputs[0].get_layout().size;
3149 tensor dilation = convolution->dilation;
3150 tensor stride = convolution->stride;
3151 bool is_relu_fused = convolution->with_activation;
3152 float activation_slope = convolution->activation_negative_slope;
3153 tensor input_offset = convolution->input_offset;
3154 tensor weights_size = inputs[1].get_layout().size;
3155 padding output_padding = convolution->output_padding;
3157 tensor output_size = get_expected_output_tensor();
3159 // Calculate output size
3160 int output_size_y = output_size.spatial[1];
3161 int output_size_x = output_size.spatial[0];
3162 int output_features = weights_size.batch[0];
3163 int input_features = weights_size.feature[0];
3165 auto output = memory::allocate( engine, cldnn::layout(dt, cldnn::format::bfyx, output_size, output_padding) );
3167 auto input_mem = inputs[0].pointer<Type>();
3168 auto weights_mem = inputs[1].pointer<Type>();
3169 auto bias_mem = inputs[2].pointer<Type>();
3170 auto output_mem = output.pointer<Type>();
3172 tensor output_buffer_size = output.get_layout().get_buffer_size();
3174 // Initialized output with zeros.
3175 std::fill(output_mem.begin(), output_mem.end(), static_cast<Type>(0));
3178 for (int b = 0; b < input_size.batch[0]; b++)
3180 for (int out_f = 0; out_f < output_features; out_f++)
3182 for (int y = 0; y < output_size_y; y++)
3184 for (int x = 0; x < output_size_x; x++)
3186 int output_index = (b * output_buffer_size.feature[0] + out_f) * output_buffer_size.spatial[1] * output_buffer_size.spatial[0];
3187 tensor lower_output_padding = convolution->output_padding.lower_size();
3188 output_index += (lower_output_padding.spatial[1] + y) * output_buffer_size.spatial[0] + lower_output_padding.spatial[0] + x;
3190 output_mem[output_index] += bias_mem[out_f];
3196 const auto input0_desc = get_linear_memory_desc(inputs[0].get_layout());
3197 const auto input1_desc = get_linear_memory_desc(inputs[1].get_layout());
3199 // Convolve with weights
3200 for (int b = 0; b < input_size.batch[0]; b++)
3203 for (int out_f = 0; out_f < output_features; out_f++)
3205 for (int in_f = 0; in_f < input_features; in_f++)
3207 int input_fi = in_f;
3208 for (int y = 0; y < output_size_y; y++)
3210 for (int x = 0; x < output_size_x; x++)
3213 int output_fi = out_f;
3216 int output_index = (output_bi * output_buffer_size.feature[0] + output_fi) * output_buffer_size.spatial[1] * output_buffer_size.spatial[0];
3217 tensor lower_output_padding = convolution->output_padding.lower_size();
3218 output_index += (lower_output_padding.spatial[1] + output_yi) * output_buffer_size.spatial[0] + lower_output_padding.spatial[0] + output_xi;
3220 for (int kernel_y = 0; kernel_y < weights_size.spatial[1]; kernel_y++)
3222 int input_yi = y * stride.spatial[1] + input_offset.spatial[1] + kernel_y * dilation.spatial[1];
3223 if ((input_yi < 0) || (input_yi >= input_size.spatial[1]))
3228 for (int kernel_x = 0; kernel_x < weights_size.spatial[0]; kernel_x++)
3230 int input_xi = x * stride.spatial[0] + input_offset.spatial[0] + kernel_x * dilation.spatial[0];
3231 if ((input_xi < 0) || (input_xi >= input_size.spatial[0]))
3236 size_t input_index = get_linear_index(inputs[0].get_layout(), input_bi, input_fi, input_yi, input_xi, input0_desc);
3238 int weight_bi = out_f;
3239 int weight_fi = in_f;
3240 int weight_yi = kernel_y;
3241 int weight_xi = kernel_x;
3242 size_t weight_index = get_linear_index(inputs[1].get_layout(), weight_bi, weight_fi, weight_yi, weight_xi, input1_desc);
3243 output_mem[output_index] += input_mem[input_index] * weights_mem[weight_index];
3255 for (int i = 0; i < (int)output_buffer_size.count(); i++)
3257 output_mem[i] = (output_mem[i] > 0.f) ? output_mem[i] : (output_mem[i] * (Type)activation_slope);
3264 virtual memory generate_reference(const std::vector<cldnn::memory>& inputs)
3266 if (generic_params->data_type == data_types::f32)
3268 return generate_reference_typed<float>(inputs);
3272 return generate_reference_typed<FLOAT16>(inputs);
3278 static std::vector<tests::test_params*> all_generic_params;
3279 static std::vector<cldnn::primitive*> all_layer_params;
3280 static std::vector<std::tuple<tests::test_params*, cldnn::primitive*>> all_test_params;
3283 std::vector<tests::test_params*> convolution_test::all_generic_params = {};
3284 std::vector<cldnn::primitive*> convolution_test::all_layer_params = {};
3285 std::vector<std::tuple<tests::test_params*, cldnn::primitive*>> convolution_test::all_test_params = {};
3287 TEST_P(convolution_test, CONVOLUTION)
3292 INSTANTIATE_TEST_CASE_P(DISABLED_CONVOLUTION,
3294 ::testing::ValuesIn(convolution_test::generate_all_test_params()),
3295 tests::generic_test::custom_param_name_functor());