2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
19 #include <gtest/gtest.h>
20 #include "api/memory.hpp"
21 #include <api/input_layout.hpp>
22 #include "api/deconvolution.hpp"
23 #include <api/data.hpp>
24 #include <api/topology.hpp>
25 #include <api/network.hpp>
26 #include <api/engine.hpp>
27 #include "test_utils/test_utils.h"
28 #include "test_utils/float16.h"
29 #include "api/reorder.hpp"
31 using namespace cldnn;
32 using namespace tests;
34 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad) {
55 const auto& engine = get_test_engine();
57 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
58 auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
59 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
61 set_values(input, { 8.f, 0.5f, 6.f, 9.f });
62 set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
63 set_values(biases, { 2.0f });
66 input_layout("input", input.get_layout()),
67 data("weights", weights),
68 data("biases", biases),
69 deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1,1,1,1 })
72 network network(engine, topology);
73 network.set_input_data("input", input);
75 auto outputs = network.execute();
76 EXPECT_EQ(outputs.size(), size_t(1));
77 EXPECT_EQ(outputs.begin()->first, "deconv");
79 auto output_prim = outputs.begin()->second.get_memory();
81 auto output_ptr = output_prim.pointer<float>();
83 std::vector<float> expected_output_vec = {
89 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
91 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
95 TEST(deconvolution_f32_fw_gpu, no_bias_basic_wsiz2x2_in2x2x1x1_nopad) {
116 const auto& engine = get_test_engine();
118 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
119 auto weights = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
121 set_values(input, { 8.f, 0.5f, 6.f, 9.f });
122 set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
125 input_layout("input", input.get_layout()),
126 data("weights", weights),
127 deconvolution("deconv", "input", { "weights" })
130 network network(engine, topology);
131 network.set_input_data("input", input);
133 auto outputs = network.execute();
134 EXPECT_EQ(outputs.size(), size_t(1));
135 EXPECT_EQ(outputs.begin()->first, "deconv");
137 auto output_prim = outputs.begin()->second.get_memory();
139 auto output_ptr = output_prim.pointer<float>();
141 std::vector<float> expected_output_vec = {
147 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
149 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
153 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad_bfyx) { // Filter : 2x2
173 const auto& engine = get_test_engine();
175 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
176 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
177 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
179 set_values(input, { 8.f, 0.5f, 6.f, 9.f });
180 set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
181 set_values(biases, { 2.0f });
184 input_layout("input", input.get_layout()),
185 data("weights", weights),
186 data("biases", biases),
187 deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1,1,1,1 })
190 network network(engine, topology);
191 network.set_input_data("input", input);
193 auto outputs = network.execute();
194 EXPECT_EQ(outputs.size(), size_t(1));
195 EXPECT_EQ(outputs.begin()->first, "deconv");
197 auto output_prim = outputs.begin()->second.get_memory();
199 auto output_ptr = output_prim.pointer<float>();
201 std::vector<float> expected_output_vec = {
207 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
209 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
213 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_pad1) {
233 const auto& engine = get_test_engine();
235 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
236 auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
237 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
239 set_values(input, { 8.f, 0.5f, 6.f, 9.f });
240 set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
241 set_values(biases, { 2.0f });
244 input_layout("input", input.get_layout()),
245 data("weights", weights),
246 data("biases", biases),
247 deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 1, 1 }, { 0, 0, -1, -1 })
250 network network(engine, topology);
251 network.set_input_data("input", input);
253 auto outputs = network.execute();
254 EXPECT_EQ(outputs.size(), size_t(1));
255 EXPECT_EQ(outputs.begin()->first, "deconv");
257 auto output_prim = outputs.begin()->second.get_memory();
259 auto output_ptr = output_prim.pointer<float>();
261 EXPECT_FLOAT_EQ(0.75f, output_ptr[0]);
264 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride2_nopad) {
284 const auto& engine = get_test_engine();
286 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
287 auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
288 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
290 set_values(input, { 8.f, 0.5f, 6.f, 9.f });
291 set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
292 set_values(biases, { 1.0f });
295 input_layout("input", input.get_layout()),
296 data("weights", weights),
297 data("biases", biases),
298 deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1,1,2,2 })
301 network network(engine, topology);
302 network.set_input_data("input", input);
304 auto outputs = network.execute();
305 EXPECT_EQ(outputs.size(), size_t(1));
306 EXPECT_EQ(outputs.begin()->first, "deconv");
308 auto output_prim = outputs.begin()->second.get_memory();
310 auto output_ptr = output_prim.pointer<float>();
312 std::vector<float> expected_output_vec = {
313 -15.f, 5.f, 0.f, 1.25f,
314 29.f, 13.f, 2.75f, 1.75,
315 -11.f, 4.f, -17.f, 5.5f,
316 22.f, 10.f, 32.5f, 14.5f
319 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
321 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
325 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride4_pad2) {
349 const auto& engine = get_test_engine();
351 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
352 auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } });
353 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
355 set_values(input, { 8.f, 0.5f, 6.f, 9.f });
356 set_values(weights, { -2.0f, 0.5f, 1.f, 3.5f, 1.5f, 2.f, 3.f, 4.f, 5.f });
357 set_values(biases, { 0.0f });
360 input_layout("input", input.get_layout()),
361 data("weights", weights),
362 data("biases", biases),
363 deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 4, 4 }, { 0, 0, -2, -2 })
366 network network(engine, topology);
367 network.set_input_data("input", input);
369 auto outputs = network.execute();
370 EXPECT_EQ(outputs.size(), size_t(1));
371 EXPECT_EQ(outputs.begin()->first, "deconv");
373 auto output_prim = outputs.begin()->second.get_memory();
375 auto output_ptr = output_prim.pointer<float>();
377 std::vector<float> expected_output_vec = {
383 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
385 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
389 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_stride2_pad1) {
411 const auto& engine = get_test_engine();
413 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 1, 2, 2 } });
414 auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
415 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
417 set_values(input, { 8.f, 1.f, 0.5f, 3.f, 6.f, 2.f, 9.f, 4.f });
418 set_values(weights, { -2.f, 2.f, 7.f, -0.5f });
419 set_values(biases, { 1.0f });
422 input_layout("input", input.get_layout()),
423 data("weights", weights),
424 data("biases", biases),
425 deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
428 network network(engine, topology);
429 network.set_input_data("input", input);
431 auto outputs = network.execute();
432 EXPECT_EQ(outputs.size(), size_t(1));
433 EXPECT_EQ(outputs.begin()->first, "deconv");
435 auto output_prim = outputs.begin()->second.get_memory();
437 auto output_ptr = output_prim.pointer<float>();
439 std::vector<float> expected_output_vec = {
440 -3.f, 0.5f, 4.5f, 22.f,
441 13.f, 5.f, -17.f, -7.f
444 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
446 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
450 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1) {
476 const auto& engine = get_test_engine();
478 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
479 auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 1, 2, 2 } });
480 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
482 set_values(input, { 8.f, 0.5f, 6.f, 9.f });
483 set_values(weights, { -2.f, -2.f, 2.f, 2.f, 7.f, 7.f, -0.5f, -0.5f });
484 set_values(biases, { 1.0f, 5.0f });
487 input_layout("input", input.get_layout()),
488 data("weights", weights),
489 data("biases", biases),
490 deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
493 network network(engine, topology);
494 network.set_input_data("input", input);
496 auto outputs = network.execute();
497 EXPECT_EQ(outputs.size(), size_t(1));
498 EXPECT_EQ(outputs.begin()->first, "deconv");
500 auto output_prim = outputs.begin()->second.get_memory();
502 auto output_ptr = output_prim.pointer<float>();
504 std::vector<float> expected_output_vec = {
505 -3.f, 1.f, 4.5f, 8.5f,
506 13.f, 17.f, -17.f, -13.f
509 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
511 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
515 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad1) {
537 const auto& engine = get_test_engine();
539 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 2, 2 } });
540 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
541 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
543 set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f });
544 set_values(weights, { -2.f, 2.f, 7.f, -0.5f });
545 set_values(biases, { 1.0f });
548 input_layout("input", input.get_layout()),
549 data("weights", weights),
550 data("biases", biases),
551 deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
554 network network(engine, topology);
555 network.set_input_data("input", input);
557 auto outputs = network.execute();
558 EXPECT_EQ(outputs.size(), size_t(1));
559 EXPECT_EQ(outputs.begin()->first, "deconv");
561 auto output_prim = outputs.begin()->second.get_memory();
563 auto output_ptr = output_prim.pointer<float>();
565 std::vector<float> expected_output_vec = {
566 -3.f, 4.5f, 13.f, -17.f,
570 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
572 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
576 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad1_input_padding) {
582 // Input Padding : 2x1 (with reorder)
599 const auto& engine = get_test_engine();
601 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
602 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
603 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
605 set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f });
606 set_values(weights, { -2.f, 2.f, 7.f, -0.5f });
607 set_values(biases, { 1.0f });
610 input_layout("input", input.get_layout()),
611 reorder("reorder", "input", input.get_layout().with_padding(padding{ { 0, 0, 1, 2 }, 0 })),
612 data("weights", weights),
613 data("biases", biases),
614 deconvolution("deconv", "reorder", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
617 network network(engine, topology);
618 network.set_input_data("input", input);
620 auto outputs = network.execute();
621 EXPECT_EQ(outputs.size(), size_t(1));
622 EXPECT_EQ(outputs.begin()->first, "deconv");
624 auto output_prim = outputs.begin()->second.get_memory();
626 auto output_ptr = output_prim.pointer<float>();
628 std::vector<float> expected_output_vec = {
629 -3.f, 4.5f, 13.f, -17.f,
633 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
635 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
639 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1_input_padding) {
645 // Input Padding : 2x1 (with reorder)
666 const auto& engine = get_test_engine();
668 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
669 auto weights = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 1, 2, 2 } });
670 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
672 set_values(input, { 8.f, 0.5f, 6.f, 9.f });
673 set_values(weights, { -2.f, -2.f, 2.f, 2.f, 7.f, 7.f, -0.5f, -0.5f });
674 set_values(biases, { 1.0f, 5.0f });
677 input_layout("input", input.get_layout()),
678 reorder("reorder", "input", input.get_layout().with_padding(padding{ { 0, 0, 1, 2 }, 0 })),
679 data("weights", weights),
680 data("biases", biases),
681 deconvolution("deconv", "reorder", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
684 network network(engine, topology);
685 network.set_input_data("input", input);
687 auto outputs = network.execute();
688 EXPECT_EQ(outputs.size(), size_t(1));
689 EXPECT_EQ(outputs.begin()->first, "deconv");
691 auto output_prim = outputs.begin()->second.get_memory();
693 auto output_ptr = output_prim.pointer<float>();
695 std::vector<float> expected_output_vec = {
696 -3.f, 1.f, 4.5f, 8.5f,
697 13.f, 17.f, -17.f, -13.f
700 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
702 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
706 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_yxfb_stride2_pad1) {
728 const auto& engine = get_test_engine();
730 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 2, 2 } });
731 auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
732 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
734 set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f });
735 set_values(weights, { -2.f, 2.f, 7.f, -0.5f });
736 set_values(biases, { 1.0f });
739 input_layout("input", input.get_layout()),
740 data("weights", weights),
741 data("biases", biases),
742 deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
745 network network(engine, topology);
746 network.set_input_data("input", input);
748 auto outputs = network.execute();
749 EXPECT_EQ(outputs.size(), size_t(1));
750 EXPECT_EQ(outputs.begin()->first, "deconv");
752 auto output_prim = outputs.begin()->second.get_memory();
754 auto output_ptr = output_prim.pointer<float>();
756 std::vector<float> expected_output_vec = {
757 -3.f, 4.5f, 13.f, -17.f,
761 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
763 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
767 TEST(deconvolution_f16_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_yxfb_stride2_pad1) {
789 const auto& engine = get_test_engine();
791 auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 2, 1, 2, 2 } });
792 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
793 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
795 cldnn::build_options options;
796 options.set_option(cldnn::build_option::optimize_data(true));
798 set_values(input, { FLOAT16(8.f), FLOAT16(0.5f), FLOAT16(6.f), FLOAT16(9.f),
799 FLOAT16(1.f), FLOAT16(3.f), FLOAT16(2.f), FLOAT16(4.f) });
800 set_values(weights, { -2.f, 2.f, 7.f, -0.5f});
801 set_values(biases, { 1.0f });
804 input_layout("input", input.get_layout()),
805 data("weights", weights),
806 data("biases", biases),
807 deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
810 network network(engine, topology, options);
811 network.set_input_data("input", input);
813 auto outputs = network.execute();
814 EXPECT_EQ(outputs.size(), size_t(1));
815 EXPECT_EQ(outputs.begin()->first, "deconv");
817 auto output_prim = outputs.begin()->second.get_memory();
819 auto output_ptr = output_prim.pointer<uint16_t>();
821 std::vector<float> expected_output_vec = {
822 -3.f, 4.5f, 13.f, -17.f,
826 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
828 EXPECT_FLOAT_EQ(expected_output_vec[i], float16_to_float32(output_ptr[i]));
832 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2) {
861 const auto& engine = get_test_engine();
863 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 2, 2 } });
864 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
865 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
866 auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
867 auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
869 set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f });
870 set_values(weights, { -2.f, 2.f, 7.f, -0.5f });
871 set_values(biases, { 1.0f });
872 set_values(weights2, { -4.f, 1.f, -9.f, -7.f });
873 set_values(biases2, { -1.0f });
876 input_layout("input", input.get_layout()),
877 data("weights", weights),
878 data("biases", biases),
879 data("weights2", weights2),
880 data("biases2", biases2),
881 deconvolution("deconv", "input", { "weights", "weights2" }, { "biases", "biases2" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
884 network network(engine, topology);
885 network.set_input_data("input", input);
887 auto outputs = network.execute();
888 EXPECT_EQ(outputs.size(), size_t(1));
889 EXPECT_EQ(outputs.begin()->first, "deconv");
891 auto output_prim = outputs.begin()->second.get_memory();
893 auto output_ptr = output_prim.pointer<float>();
895 std::vector<float> expected_output_vec = {
896 -3.f, 4.5f, 13.f, -17.f,
897 -8.f, -28.f, 1.f, -17.f
900 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
902 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
906 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group2) {
907 // data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2
911 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 2, 2 } });
912 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
913 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
915 set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f });
916 set_values(weights, {
917 -2.f, 2.f, 7.f, -0.5f,
918 -4.f, 1.f, -9.f, -7.f
920 set_values(biases, { 1.0f, -1.0f });
923 input_layout("input", input.get_layout()),
924 data("weights", weights),
925 data("biases", biases),
926 deconvolution("deconv", "input", { "weights" }, { "biases" }, 2, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
929 network network(engine, topology);
930 network.set_input_data("input", input);
932 auto outputs = network.execute();
933 EXPECT_EQ(outputs.size(), size_t(1));
934 EXPECT_EQ(outputs.begin()->first, "deconv");
936 auto output_prim = outputs.begin()->second.get_memory();
938 auto output_ptr = output_prim.pointer<float>();
940 std::vector<float> expected_output_vec = {
941 -3.f, 4.5f, 13.f, -17.f,
942 -8.f, -28.f, 1.f, -17.f
945 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
947 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
951 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2_depthwise_sep_opt) {
952 // Test for depthwise separable optimization, there are 16 weights and biases (split 16)
953 // data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2
955 const auto& engine = get_test_engine();
957 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 2, 2 } });
959 { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
960 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
961 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
962 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
963 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
964 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
965 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
966 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f
969 topology topology(input_layout("input", input.get_layout()));
971 std::vector<primitive_id> weights_vec;
972 std::vector<primitive_id> bias_vec;
974 for (uint32_t i = 0; i < 8; i++)
976 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
977 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
978 auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
979 auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
981 set_values(weights, { -2.f, 2.f, 7.f, -0.5f });
982 set_values(biases, { 1.0f });
983 set_values(weights2, { -4.f, 1.f, -9.f, -7.f });
984 set_values(biases2, { -1.0f });
986 primitive_id weights_id = "weights_" + std::to_string(i);
987 primitive_id weights2_id = "weights2_" + std::to_string(i);
988 primitive_id bias_id = "biases_" + std::to_string(i);
989 primitive_id bias2_id = "biases2_" + std::to_string(i);
991 weights_vec.push_back(weights_id);
992 weights_vec.push_back(weights2_id);
993 bias_vec.push_back(bias_id);
994 bias_vec.push_back(bias2_id);
997 data(weights_id, weights),
998 data(bias_id, biases),
999 data(weights2_id, weights2),
1000 data(bias2_id, biases2)
1004 topology.add(deconvolution("deconv", "input", weights_vec, bias_vec, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }));
1006 network network(engine, topology);
1007 network.set_input_data("input", input);
1009 auto outputs = network.execute();
1010 EXPECT_EQ(outputs.size(), size_t(1));
1011 EXPECT_EQ(outputs.begin()->first, "deconv");
1013 auto output_prim = outputs.begin()->second.get_memory();
1015 auto output_ptr = output_prim.pointer<float>();
1017 std::vector<float> expected_output_vec = {
1018 -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1019 -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1020 -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1021 -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1022 -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1023 -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1024 -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1025 -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1028 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1030 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1034 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group16) {
1035 // Test for depthwise separable optimization, there are 16 joined weights and biases (group 16)
1036 // data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2_depthwise_sep_opt
1040 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 2, 2 } });
1042 { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1043 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1044 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1045 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1046 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1047 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1048 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1049 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f
1052 topology topology(input_layout("input", input.get_layout()));
1054 std::vector<primitive_id> weights_vec;
1055 std::vector<primitive_id> bias_vec;
1057 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 16, 1, 2, 2 } });
1058 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 1, 1 } });
1062 -2.f, 2.f, 7.f, -0.5f,
1063 -4.f, 1.f, -9.f, -7.f,
1064 -2.f, 2.f, 7.f, -0.5f,
1065 -4.f, 1.f, -9.f, -7.f,
1066 -2.f, 2.f, 7.f, -0.5f,
1067 -4.f, 1.f, -9.f, -7.f,
1068 -2.f, 2.f, 7.f, -0.5f,
1069 -4.f, 1.f, -9.f, -7.f,
1070 -2.f, 2.f, 7.f, -0.5f,
1071 -4.f, 1.f, -9.f, -7.f,
1072 -2.f, 2.f, 7.f, -0.5f,
1073 -4.f, 1.f, -9.f, -7.f,
1074 -2.f, 2.f, 7.f, -0.5f,
1075 -4.f, 1.f, -9.f, -7.f,
1076 -2.f, 2.f, 7.f, -0.5f,
1077 -4.f, 1.f, -9.f, -7.f
1080 set_values(biases, { 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f });
1082 data("weights", weights),
1083 data("bias", biases)
1086 topology.add(deconvolution("deconv", "input", { "weights" }, { "bias" }, 16, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }));
1088 network network(engine, topology);
1089 network.set_input_data("input", input);
1091 auto outputs = network.execute();
1092 EXPECT_EQ(outputs.size(), size_t(1));
1093 EXPECT_EQ(outputs.begin()->first, "deconv");
1095 auto output_prim = outputs.begin()->second.get_memory();
1097 auto output_ptr = output_prim.pointer<float>();
1099 std::vector<float> expected_output_vec = {
1100 -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1101 -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1102 -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1103 -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1104 -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1105 -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1106 -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1107 -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1110 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1112 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1116 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2_depthwise_sep_opt_ofm2) {
1117 // Test for depthwise separable optimization, there are 16 weights and biases (split 16)
1118 // data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2
1120 const auto& engine = get_test_engine();
1122 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 2, 2 } });
1124 { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1125 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1126 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1127 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1128 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1129 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1130 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1131 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f
1134 topology topology(input_layout("input", input.get_layout()));
1136 std::vector<primitive_id> weights_vec;
1137 std::vector<primitive_id> bias_vec;
1139 for (uint32_t i = 0; i < 8; i++)
1141 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
1142 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
1143 auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
1144 auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
1146 set_values(weights, { -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f });
1147 set_values(biases, { 1.0f, 1.0f });
1148 set_values(weights2, { -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f });
1149 set_values(biases2, { -1.0f, -1.0f });
1151 primitive_id weights_id = "weights_" + std::to_string(i);
1152 primitive_id weights2_id = "weights2_" + std::to_string(i);
1153 primitive_id bias_id = "biases_" + std::to_string(i);
1154 primitive_id bias2_id = "biases2_" + std::to_string(i);
1156 weights_vec.push_back(weights_id);
1157 weights_vec.push_back(weights2_id);
1158 bias_vec.push_back(bias_id);
1159 bias_vec.push_back(bias2_id);
1162 data(weights_id, weights),
1163 data(bias_id, biases),
1164 data(weights2_id, weights2),
1165 data(bias2_id, biases2)
1169 topology.add(deconvolution("deconv", "input", weights_vec, bias_vec, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }));
1171 network network(engine, topology);
1172 network.set_input_data("input", input);
1174 auto outputs = network.execute();
1175 EXPECT_EQ(outputs.size(), size_t(1));
1176 EXPECT_EQ(outputs.begin()->first, "deconv");
1178 auto output_prim = outputs.begin()->second.get_memory();
1180 auto output_ptr = output_prim.pointer<float>();
1182 std::vector<float> expected_output_vec = {
1183 -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1184 -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1185 -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1186 -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1187 -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1188 -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1189 -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1190 -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1193 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1195 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1199 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group16_ofm2) {
1200 // Test for depthwise separable optimization, there are 16 joined weights and biases (group 16)
1201 // data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2_depthwise_sep_opt_ofm2
1205 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 2, 2 } });
1207 { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1208 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1209 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1210 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1211 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1212 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1213 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1214 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f
1217 topology topology(input_layout("input", input.get_layout()));
1219 std::vector<primitive_id> weights_vec;
1220 std::vector<primitive_id> bias_vec;
1222 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 32, 1, 2, 2 } });
1223 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 32, 1, 1 } });
1227 -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1228 -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1229 -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1230 -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1231 -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1232 -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1233 -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1234 -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1235 -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1236 -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1237 -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1238 -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1239 -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1240 -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1241 -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1242 -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1248 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f,
1249 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f
1254 data("weights", weights),
1255 data("bias", biases)
1258 topology.add(deconvolution("deconv", "input", { "weights" }, { "bias" }, 16, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }));
1260 network network(engine, topology);
1261 network.set_input_data("input", input);
1263 auto outputs = network.execute();
1264 EXPECT_EQ(outputs.size(), size_t(1));
1265 EXPECT_EQ(outputs.begin()->first, "deconv");
1267 auto output_prim = outputs.begin()->second.get_memory();
1269 auto output_ptr = output_prim.pointer<float>();
1271 std::vector<float> expected_output_vec = {
1272 -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1273 -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1274 -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1275 -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1276 -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1277 -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1278 -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1279 -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1282 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1284 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1288 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_split2_ofm3) {
1325 const auto& engine = get_test_engine();
1327 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 4, 1, 1 } });
1328 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 3, 2, 1, 1 } });
1329 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 3, 1, 1 } });
1330 auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 3, 2, 1, 1 } });
1331 auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 3, 1, 1 } });
1334 1.5f, 0.5f, 2.0f, -1.0f
1336 set_values(weights, { -2.0f, 1.0f, 1.0f, 3.0f, 0.5f, 8.0f });
1337 set_values(biases, { 1.0f, 5.0f, 3.0f });
1338 set_values(weights2, { 4.0f, -4.0f, 2.0f, 0.5f, -0.5f, 3.0f });
1339 set_values(biases2, { -1.0f, 2.5f, 2.0f });
1342 input_layout("input", input.get_layout()),
1343 data("weights", weights),
1344 data("biases", biases),
1345 data("weights2", weights2),
1346 data("biases2", biases2),
1347 deconvolution("deconv", "input", { "weights", "weights2" }, { "biases", "biases2" }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 })
1350 network network(engine, topology);
1351 network.set_input_data("input", input);
1353 auto outputs = network.execute();
1354 EXPECT_EQ(outputs.size(), size_t(1));
1355 EXPECT_EQ(outputs.begin()->first, "deconv");
1357 auto output_prim = outputs.begin()->second.get_memory();
1359 auto output_ptr = output_prim.pointer<float>();
1361 std::vector<float> expected_output_vec = {
1362 -1.5f, 8.0f, 7.75f, 11.0f, 6.0f, -2.0f
1364 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1366 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1370 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_group2_ofm3) {
1371 // data is similar as in basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_split2_ofm3
1375 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 4, 1, 1 } });
1376 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 6, 2, 1, 1 } });
1377 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 6, 1, 1 } });
1380 1.5f, 0.5f, 2.0f, -1.0f
1382 set_values(weights, {
1383 -2.0f, 1.0f, 1.0f, 3.0f, 0.5f, 8.0f,
1384 4.0f, -4.0f, 2.0f, 0.5f, -0.5f, 3.0f
1386 set_values(biases, {
1392 input_layout("input", input.get_layout()),
1393 data("weights", weights),
1394 data("biases", biases),
1395 deconvolution("deconv", "input", { "weights" }, { "biases" }, 2, { 1, 1, 1, 1 }, { 0, 0, 0, 0 })
1398 network network(engine, topology);
1399 network.set_input_data("input", input);
1401 auto outputs = network.execute();
1402 EXPECT_EQ(outputs.size(), size_t(1));
1403 EXPECT_EQ(outputs.begin()->first, "deconv");
1405 auto output_prim = outputs.begin()->second.get_memory();
1407 auto output_ptr = output_prim.pointer<float>();
1409 std::vector<float> expected_output_vec = {
1410 -1.5f, 8.0f, 7.75f, 11.0f, 6.0f, -2.0f
1412 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1414 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1417 TEST(deconvolution_f32_fw_gpu, basic3D_wsiz2x2x1_in1x1x2x2x1_nopad) {
1438 const auto& engine = get_test_engine();
1440 auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 2, 2, 1 } });
1441 auto weights = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 2, 2, 1 } });
1442 auto biases = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 1, 1, 1 } });
1444 set_values(input, { 8.f, 0.5f, 6.f, 9.f });
1445 set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
1446 set_values(biases, { 2.0f });
1449 input_layout("input", input.get_layout()),
1450 data("weights", weights),
1451 data("biases", biases),
1452 deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1,1,1,1,1 })
1455 network network(engine, topology);
1456 network.set_input_data("input", input);
1458 auto outputs = network.execute();
1459 EXPECT_EQ(outputs.size(), size_t(1));
1460 EXPECT_EQ(outputs.begin()->first, "deconv");
1462 auto output_prim = outputs.begin()->second.get_memory();
1464 auto output_ptr = output_prim.pointer<float>();
1466 std::vector<float> expected_output_vec = {
1472 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1474 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1478 TEST(deconvolution_f32_fw_gpu, basic3D_wsiz3x3x3_in1x1x4x4x4_nopad) {
1564 const auto& engine = get_test_engine();
1566 auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 4, 4, 4 } });
1567 auto weights = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 3, 3, 3 } });
1571 1.0f, 1.0f, 1.0f, 1.0f,
1572 1.0f, 1.0f, 1.0f, 1.0f,
1573 1.0f, 1.0f, 1.0f, 1.0f,
1574 1.0f, 1.0f, 1.0f, 1.0f,
1575 1.0f, 1.0f, 1.0f, 1.0f,
1576 1.0f, 1.0f, 1.0f, 1.0f,
1577 1.0f, 1.0f, 1.0f, 1.0f,
1578 1.0f, 1.0f, 1.0f, 1.0f,
1579 1.0f, 1.0f, 1.0f, 1.0f,
1580 1.0f, 1.0f, 1.0f, 1.0f,
1581 1.0f, 1.0f, 1.0f, 1.0f,
1582 1.0f, 1.0f, 1.0f, 1.0f,
1583 1.0f, 1.0f, 1.0f, 1.0f,
1584 1.0f, 1.0f, 1.0f, 1.0f,
1585 1.0f, 1.0f, 1.0f, 1.0f,
1586 1.0f, 1.0f, 1.0f, 1.0f
1588 set_values(weights, {
1601 input_layout("input", input.get_layout()),
1602 data("weights", weights),
1603 deconvolution("deconv", "input", { "weights" })
1606 network network(engine, topology);
1607 network.set_input_data("input", input);
1609 auto outputs = network.execute();
1610 EXPECT_EQ(outputs.size(), size_t(1));
1611 EXPECT_EQ(outputs.begin()->first, "deconv");
1613 auto output_prim = outputs.begin()->second.get_memory();
1615 auto output_ptr = output_prim.pointer<float>();
1617 std::vector<float> expected_output_vec = {
1618 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f,
1619 2.0f, 4.0f, 6.0f, 6.0f, 4.0f, 2.0f,
1620 3.0f, 6.0f, 9.0f, 9.0f, 6.0f, 3.0f,
1621 3.0f, 6.0f, 9.0f, 9.0f, 6.0f, 3.0f,
1622 2.0f, 4.0f, 6.0f, 6.0f, 4.0f, 2.0f,
1623 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f,
1625 2.0f, 4.0f, 6.0f, 6.0f, 4.0f, 2.0f,
1626 4.0f, 8.0f, 12.0f, 12.0f, 8.0f, 4.0f,
1627 6.0f, 12.0f, 18.0f, 18.0f, 12.0f, 6.0f,
1628 6.0f, 12.0f, 18.0f, 18.0f, 12.0f, 6.0f,
1629 4.0f, 8.0f, 12.0f, 12.0f, 8.0f, 4.0f,
1630 2.0f, 4.0f, 6.0f, 6.0f, 4.0f, 2.0f,
1632 3.0f, 6.0f, 9.0f, 9.0f, 6.0f, 3.0f,
1633 6.0f, 12.0f, 18.0f, 18.0f, 12.0f, 6.0f,
1634 9.0f, 18.0f, 27.0f, 27.0f, 18.0f, 9.0f,
1635 9.0f, 18.0f, 27.0f, 27.0f, 18.0f, 9.0f,
1636 6.0f, 12.0f, 18.0f, 18.0f, 12.0f, 6.0f,
1637 3.0f, 6.0f, 9.0f, 9.0f, 6.0f, 3.0f,
1639 3.0f, 6.0f, 9.0f, 9.0f, 6.0f, 3.0f,
1640 6.0f, 12.0f, 18.0f, 18.0f, 12.0f, 6.0f,
1641 9.0f, 18.0f, 27.0f, 27.0f, 18.0f, 9.0f,
1642 9.0f, 18.0f, 27.0f, 27.0f, 18.0f, 9.0f,
1643 6.0f, 12.0f, 18.0f, 18.0f, 12.0f, 6.0f,
1644 3.0f, 6.0f, 9.0f, 9.0f, 6.0f, 3.0f,
1646 2.0f, 4.0f, 6.0f, 6.0f, 4.0f, 2.0f,
1647 4.0f, 8.0f, 12.0f, 12.0f, 8.0f, 4.0f,
1648 6.0f, 12.0f, 18.0f, 18.0f, 12.0f, 6.0f,
1649 6.0f, 12.0f, 18.0f, 18.0f, 12.0f, 6.0f,
1650 4.0f, 8.0f, 12.0f, 12.0f, 8.0f, 4.0f,
1651 2.0f, 4.0f, 6.0f, 6.0f, 4.0f, 2.0f,
1653 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f,
1654 2.0f, 4.0f, 6.0f, 6.0f, 4.0f, 2.0f,
1655 3.0f, 6.0f, 9.0f, 9.0f, 6.0f, 3.0f,
1656 3.0f, 6.0f, 9.0f, 9.0f, 6.0f, 3.0f,
1657 2.0f, 4.0f, 6.0f, 6.0f, 4.0f, 2.0f,
1658 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f,
1661 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1663 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1667 TEST(deconvolution_f32_fw_gpu, basic3D_wsiz2x2x2_in1x1x2x2x2_stride2_nopad) {
1686 const auto& engine = get_test_engine();
1688 auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 2, 2, 2 } });
1689 auto weights = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 2, 2, 2 } });
1691 set_values(input, { 8.f, 0.5f, 6.f, 9.f, 8.f, 0.5f, 6.f, 9.f });
1692 set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f, -2.0f, 0.5f, 3.5f, 1.5f });
1693 //set_values(input, { 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f });
1694 //set_values(weights, { 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f });
1697 input_layout("input", input.get_layout()),
1698 data("weights", weights),
1699 deconvolution("deconv", "input", { "weights" }, { 1,1,2,2,2 })
1702 network network(engine, topology);
1703 network.set_input_data("input", input);
1705 auto outputs = network.execute();
1706 EXPECT_EQ(outputs.size(), size_t(1));
1707 EXPECT_EQ(outputs.begin()->first, "deconv");
1709 auto output_prim = outputs.begin()->second.get_memory();
1710 auto output_ptr = output_prim.pointer<float>();
1712 std::vector<float> expected_output_vec = {
1713 -16.f, 4.f, -1.f, 0.25f,
1714 28.f, 12.f, 1.75f, 0.75f,
1715 -12.f, 3.f, -18.f, 4.5f,
1716 21.f, 9.f, 31.5f, 13.5f,
1717 -16.f, 4.f, -1.f, 0.25f,
1718 28.f, 12.f, 1.75f, 0.75f,
1719 -12.f, 3.f, -18.f, 4.5f,
1720 21.f, 9.f, 31.5f, 13.5f,
1721 -16.f, 4.f, -1.f, 0.25f,
1722 28.f, 12.f, 1.75f, 0.75f,
1723 -12.f, 3.f, -18.f, 4.5f,
1724 21.f, 9.f, 31.5f, 13.5f,
1725 -16.f, 4.f, -1.f, 0.25f,
1726 28.f, 12.f, 1.75f, 0.75f,
1727 -12.f, 3.f, -18.f, 4.5f,
1728 21.f, 9.f, 31.5f, 13.5f
1731 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1733 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1737 TEST(deconvolution_f32_fw_gpu, basic3D_wsiz2x2x2_in1x1x2x2x2_stride2_pad1) {
1761 const auto& engine = get_test_engine();
1763 auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 2, 2, 2 } });
1764 auto weights = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 2, 2, 2 } });
1766 set_values(input, { 8.f, 0.5f, 6.f, 9.f, 8.f, 0.5f, 6.f, 9.f });
1767 set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f, -2.0f, 0.5f, 3.5f, 1.5f });
1770 input_layout("input", input.get_layout()),
1771 data("weights", weights),
1772 deconvolution("deconv", "input", { "weights" }, { 1,1,2,2,2 }, { 0, 0, -1, -1, -1 })
1775 network network(engine, topology);
1776 network.set_input_data("input", input);
1778 auto outputs = network.execute();
1779 EXPECT_EQ(outputs.size(), size_t(1));
1780 EXPECT_EQ(outputs.begin()->first, "deconv");
1782 auto output_prim = outputs.begin()->second.get_memory();
1783 auto output_ptr = output_prim.pointer<float>();
1785 std::vector<float> expected_output_vec = {
1786 12.f, 1.75f, 3.f, -18.f,
1787 12.f, 1.75f, 3.f, -18.f
1790 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1792 EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);