inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 ///////////////////////////////////////////////////////////////////////////////////////////////////
  18
  19 #include <gtest/gtest.h>
  20 #include "api/memory.hpp"
  21 #include <api/input_layout.hpp>
  22 #include "api/deconvolution.hpp"
  23 #include <api/data.hpp>
  24 #include <api/topology.hpp>
  25 #include <api/network.hpp>
  26 #include <api/engine.hpp>
  27 #include "test_utils/test_utils.h"
  28 #include "test_utils/float16.h"
  29 #include "api/reorder.hpp"
  30
  31 using namespace cldnn;
  32 using namespace tests;
  33
  34 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad) {
  35     //  Filter : 2x2
  36     //  Input  : 2x2
  37     //  Output : 3x3
  38     //
  39     //  Input:
  40     //  8  0.5
  41     //  6  9
  42     //
  43     //  Filter
  44     //  -2   0.5
  45     //   3.5 1.5
  46     //
  47     //  Bias
  48     //  2
  49     //
  50     //  Output:
  51     //  -14    5     2.25
  52     //   18    0.75  7.25
  53     //   23    42.5  15.5
  54
  55     const auto& engine = get_test_engine();
  56
  57     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
  58     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
  59     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
  60
  61     set_values(input, { 8.f, 0.5f, 6.f, 9.f });
  62     set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
  63     set_values(biases, { 2.0f });
  64
  65     topology topology(
  66         input_layout("input", input.get_layout()),
  67         data("weights", weights),
  68         data("biases", biases),
  69         deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1,1,1,1 })
  70     );
  71
  72     network network(engine, topology);
  73     network.set_input_data("input", input);
  74
  75     auto outputs = network.execute();
  76     EXPECT_EQ(outputs.size(), size_t(1));
  77     EXPECT_EQ(outputs.begin()->first, "deconv");
  78
  79     auto output_prim = outputs.begin()->second.get_memory();
  80
  81     auto output_ptr = output_prim.pointer<float>();
  82
  83     std::vector<float> expected_output_vec = {
  84         -14.f, 5.f, 2.25f,
  85         18.f, 0.75f, 7.25f,
  86         23.f, 42.5f, 15.5f
  87     };
  88
  89     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
  90     {
  91         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
  92     }
  93 }
  94
  95 TEST(deconvolution_f32_fw_gpu, no_bias_basic_wsiz2x2_in2x2x1x1_nopad) {
  96     //  Filter : 2x2
  97     //  Input  : 2x2
  98     //  Output : 3x3
  99     //
 100     //  Input:
 101     //  8  0.5
 102     //  6  9
 103     //
 104     //  Filter
 105     //  -2   0.5
 106     //   3.5 1.5
 107     //
 108     //  no bias
 109     //
 110     //
 111     //  Output:
 112     //  -14    5     2.25
 113     //   18    0.75  7.25
 114     //   23    42.5  15.5
 115
 116     const auto& engine = get_test_engine();
 117
 118     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
 119     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
 120
 121     set_values(input, { 8.f, 0.5f, 6.f, 9.f });
 122     set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
 123
 124     topology topology(
 125         input_layout("input", input.get_layout()),
 126         data("weights", weights),
 127         deconvolution("deconv", "input", { "weights" })
 128     );
 129
 130     network network(engine, topology);
 131     network.set_input_data("input", input);
 132
 133     auto outputs = network.execute();
 134     EXPECT_EQ(outputs.size(), size_t(1));
 135     EXPECT_EQ(outputs.begin()->first, "deconv");
 136
 137     auto output_prim = outputs.begin()->second.get_memory();
 138
 139     auto output_ptr = output_prim.pointer<float>();
 140
 141     std::vector<float> expected_output_vec = {
 142         -16.f, 3.f, 0.25f,
 143         16.f, -1.25f, 5.25f,
 144         21.f, 40.5f, 13.5f
 145     };
 146
 147     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 148     {
 149         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 150     }
 151 }
 152
 153 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad_bfyx) {    //  Filter : 2x2
 154     //  Input  : 2x2
 155     //  Output : 3x3
 156     //
 157     //  Input:
 158     //  8  0.5
 159     //  6  9
 160     //
 161     //  Filter
 162     //  -2   0.5
 163     //   3.5 1.5
 164     //
 165     //  Bias
 166     //  2
 167     //
 168     //  Output:
 169     //  -14    5     2.25
 170     //   18    0.75  7.25
 171     //   23    42.5  15.5
 172
 173     const auto& engine = get_test_engine();
 174
 175     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
 176     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
 177     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 178
 179     set_values(input, { 8.f, 0.5f, 6.f, 9.f });
 180     set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
 181     set_values(biases, { 2.0f });
 182
 183     topology topology(
 184         input_layout("input", input.get_layout()),
 185         data("weights", weights),
 186         data("biases", biases),
 187         deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1,1,1,1 })
 188     );
 189
 190     network network(engine, topology);
 191     network.set_input_data("input", input);
 192
 193     auto outputs = network.execute();
 194     EXPECT_EQ(outputs.size(), size_t(1));
 195     EXPECT_EQ(outputs.begin()->first, "deconv");
 196
 197     auto output_prim = outputs.begin()->second.get_memory();
 198
 199     auto output_ptr = output_prim.pointer<float>();
 200
 201     std::vector<float> expected_output_vec = {
 202         -14.f, 5.f, 2.25f,
 203         18.f, 0.75f, 7.25f,
 204         23.f, 42.5f, 15.5f
 205     };
 206
 207     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 208     {
 209         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 210     }
 211 }
 212
 213 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_pad1) {
 214     //  Filter : 2x2
 215     //  Input  : 2x2
 216     //  Output : 1x1
 217     //  Pad    : 1x1
 218     //
 219     //  Input:
 220     //  8  0.5
 221     //  6  9
 222     //
 223     //  Filter
 224     //  -2   0.5
 225     //   3.5 1.5
 226     //
 227     //  Bias
 228     //  2
 229     //
 230     //  Output:
 231     //  0.75
 232
 233     const auto& engine = get_test_engine();
 234
 235     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
 236     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
 237     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 238
 239     set_values(input, { 8.f, 0.5f, 6.f, 9.f });
 240     set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
 241     set_values(biases, { 2.0f });
 242
 243     topology topology(
 244         input_layout("input", input.get_layout()),
 245         data("weights", weights),
 246         data("biases", biases),
 247         deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 1, 1 }, { 0, 0, -1, -1 })
 248     );
 249
 250     network network(engine, topology);
 251     network.set_input_data("input", input);
 252
 253     auto outputs = network.execute();
 254     EXPECT_EQ(outputs.size(), size_t(1));
 255     EXPECT_EQ(outputs.begin()->first, "deconv");
 256
 257     auto output_prim = outputs.begin()->second.get_memory();
 258
 259     auto output_ptr = output_prim.pointer<float>();
 260
 261     EXPECT_FLOAT_EQ(0.75f, output_ptr[0]);
 262 }
 263
 264 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride2_nopad) {
 265     //  Filter : 2x2
 266     //  Input  : 2x2
 267     //  Output : 1x1
 268     //  Stride : 2x2
 269     //
 270     //  Input:
 271     //  8  0.5
 272     //  6  9
 273     //
 274     //  Filter
 275     //  -2   0.5
 276     //   3.5 1.5
 277     //
 278     //  Bias
 279     //  1
 280     //
 281     //  Output:
 282     //  0.75
 283
 284     const auto& engine = get_test_engine();
 285
 286     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
 287     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
 288     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 289
 290     set_values(input, { 8.f, 0.5f, 6.f, 9.f });
 291     set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
 292     set_values(biases, { 1.0f });
 293
 294     topology topology(
 295         input_layout("input", input.get_layout()),
 296         data("weights", weights),
 297         data("biases", biases),
 298         deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1,1,2,2 })
 299     );
 300
 301     network network(engine, topology);
 302     network.set_input_data("input", input);
 303
 304     auto outputs = network.execute();
 305     EXPECT_EQ(outputs.size(), size_t(1));
 306     EXPECT_EQ(outputs.begin()->first, "deconv");
 307
 308     auto output_prim = outputs.begin()->second.get_memory();
 309
 310     auto output_ptr = output_prim.pointer<float>();
 311
 312     std::vector<float> expected_output_vec = {
 313         -15.f, 5.f, 0.f, 1.25f,
 314         29.f, 13.f, 2.75f, 1.75,
 315         -11.f, 4.f, -17.f, 5.5f,
 316         22.f, 10.f, 32.5f, 14.5f
 317     };
 318
 319     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 320     {
 321         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 322     }
 323 }
 324
 325 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride4_pad2) {
 326     //  Filter : 3x3
 327     //  Input  : 2x2
 328     //  Output : 1x1
 329     //  Stride : 4x4
 330     //  Pad    : 2x2
 331     //
 332     //  Input:
 333     //  8  0.5
 334     //  6  9
 335     //
 336     //  Filter
 337     //  -2   0.5   1
 338     //   3.5 1.5   2
 339     //   3   4     5
 340     //
 341     //  Bias
 342     //  0
 343     //
 344     //  Output:
 345     //  40   0    1.5
 346     //  0    0    0
 347     //  6    0   -18
 348
 349     const auto& engine = get_test_engine();
 350
 351     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
 352     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } });
 353     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 354
 355     set_values(input, { 8.f, 0.5f, 6.f, 9.f });
 356     set_values(weights, { -2.0f, 0.5f, 1.f, 3.5f, 1.5f, 2.f, 3.f, 4.f, 5.f });
 357     set_values(biases, { 0.0f });
 358
 359     topology topology(
 360         input_layout("input", input.get_layout()),
 361         data("weights", weights),
 362         data("biases", biases),
 363         deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 4, 4 }, { 0, 0, -2, -2 })
 364     );
 365
 366     network network(engine, topology);
 367     network.set_input_data("input", input);
 368
 369     auto outputs = network.execute();
 370     EXPECT_EQ(outputs.size(), size_t(1));
 371     EXPECT_EQ(outputs.begin()->first, "deconv");
 372
 373     auto output_prim = outputs.begin()->second.get_memory();
 374
 375     auto output_ptr = output_prim.pointer<float>();
 376
 377     std::vector<float> expected_output_vec = {
 378         40.f, 0.f, 1.5f,
 379         0.f, 0.f, 0.f,
 380         6.f, 0.f, -18.f
 381     };
 382
 383     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 384     {
 385         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 386     }
 387 }
 388
 389 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_stride2_pad1) {
 390     //  Filter : 2x2
 391     //  Input  : 2x2x1x2
 392     //  Output : 2x2x1x2
 393     //  Stride : 2x2
 394     //  Pad    : 1x1
 395     //
 396     //  Input:
 397     //  8  0.5    1   3
 398     //  6  9      2   4
 399     //
 400     //  Filter
 401     //  -2   2
 402     //   7  -0.5
 403     //
 404     //  Bias
 405     //  1
 406     //
 407     //  Output:
 408     //  -3    4.5    0.5   22
 409     //   13  -17     5    -7
 410
 411     const auto& engine = get_test_engine();
 412
 413     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 1, 2, 2 } });
 414     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
 415     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 416
 417     set_values(input, { 8.f, 1.f, 0.5f, 3.f, 6.f, 2.f, 9.f, 4.f });
 418     set_values(weights, { -2.f, 2.f, 7.f, -0.5f });
 419     set_values(biases, { 1.0f });
 420
 421     topology topology(
 422         input_layout("input", input.get_layout()),
 423         data("weights", weights),
 424         data("biases", biases),
 425         deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
 426     );
 427
 428     network network(engine, topology);
 429     network.set_input_data("input", input);
 430
 431     auto outputs = network.execute();
 432     EXPECT_EQ(outputs.size(), size_t(1));
 433     EXPECT_EQ(outputs.begin()->first, "deconv");
 434
 435     auto output_prim = outputs.begin()->second.get_memory();
 436
 437     auto output_ptr = output_prim.pointer<float>();
 438
 439     std::vector<float> expected_output_vec = {
 440         -3.f, 0.5f, 4.5f, 22.f,
 441         13.f, 5.f, -17.f, -7.f
 442     };
 443
 444     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 445     {
 446         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 447     }
 448 }
 449
 450 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1) {
 451     //  Filter : 2x2
 452     //  Input  : 2x2x1x1
 453     //  Output : 2x2x1x1
 454     //  Stride : 2x2
 455     //  Pad    : 1x1
 456     //
 457     //  Input:
 458     //  8  0.5
 459     //  6  9
 460     //
 461     //  Filter
 462     //  f0:-2   2
 463     //  f0: 7  -0.5
 464     //  f1:-2   2
 465     //  f1: 7  -0.5
 466     //
 467     //  Bias
 468     //  1  5
 469     //
 470     //  Output:
 471     //  f0: -3   4.5
 472     //  f0: 13   -17
 473     //  f1: 1    8.5
 474     //  f1: 17 - 13
 475
 476     const auto& engine = get_test_engine();
 477
 478     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
 479     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 1, 2, 2 } });
 480     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
 481
 482     set_values(input, { 8.f, 0.5f, 6.f, 9.f });
 483     set_values(weights, { -2.f, -2.f, 2.f, 2.f, 7.f, 7.f, -0.5f, -0.5f });
 484     set_values(biases, { 1.0f, 5.0f });
 485
 486     topology topology(
 487         input_layout("input", input.get_layout()),
 488         data("weights", weights),
 489         data("biases", biases),
 490         deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
 491     );
 492
 493     network network(engine, topology);
 494     network.set_input_data("input", input);
 495
 496     auto outputs = network.execute();
 497     EXPECT_EQ(outputs.size(), size_t(1));
 498     EXPECT_EQ(outputs.begin()->first, "deconv");
 499
 500     auto output_prim = outputs.begin()->second.get_memory();
 501
 502     auto output_ptr = output_prim.pointer<float>();
 503
 504     std::vector<float> expected_output_vec = {
 505         -3.f, 1.f, 4.5f, 8.5f,
 506         13.f, 17.f, -17.f, -13.f
 507     };
 508
 509     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 510     {
 511         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 512     }
 513 }
 514
 515 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad1) {
 516     //  Filter : 2x2
 517     //  Input  : 2x2x1x2
 518     //  Output : 2x2x1x2
 519     //  Stride : 2x2
 520     //  Pad    : 1x1
 521     //
 522     //  Input:
 523     //  8  0.5    1   3
 524     //  6  9      2   4
 525     //
 526     //  Filter
 527     //  -2   2
 528     //   7  -0.5
 529     //
 530     //  Bias
 531     //  1
 532     //
 533     //  Output:
 534     //  -3    4.5    0.5   22
 535     //   13  -17     5    -7
 536
 537     const auto& engine = get_test_engine();
 538
 539     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 2, 2 } });
 540     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
 541     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 542
 543     set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f });
 544     set_values(weights, { -2.f, 2.f, 7.f, -0.5f });
 545     set_values(biases, { 1.0f });
 546
 547     topology topology(
 548         input_layout("input", input.get_layout()),
 549         data("weights", weights),
 550         data("biases", biases),
 551         deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
 552     );
 553
 554     network network(engine, topology);
 555     network.set_input_data("input", input);
 556
 557     auto outputs = network.execute();
 558     EXPECT_EQ(outputs.size(), size_t(1));
 559     EXPECT_EQ(outputs.begin()->first, "deconv");
 560
 561     auto output_prim = outputs.begin()->second.get_memory();
 562
 563     auto output_ptr = output_prim.pointer<float>();
 564
 565     std::vector<float> expected_output_vec = {
 566         -3.f, 4.5f, 13.f, -17.f,
 567         .5f, 22.f, 5.f, -7.f
 568     };
 569
 570     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 571     {
 572         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 573     }
 574 }
 575
 576 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad1_input_padding) {
 577     //  Filter : 2x2
 578     //  Input  : 2x2x1x2
 579     //  Output : 2x2x1x2
 580     //  Stride : 2x2
 581     //  Out Padding   : 1x1
 582     //  Input Padding : 2x1 (with reorder)
 583     //
 584     //  Input:
 585     //  8  0.5    1   3
 586     //  6  9      2   4
 587     //
 588     //  Filter
 589     //  -2   2
 590     //   7  -0.5
 591     //
 592     //  Bias
 593     //  1
 594     //
 595     //  Output:
 596     //  -3    4.5    0.5   22
 597     //   13  -17     5    -7
 598
 599     const auto& engine = get_test_engine();
 600
 601     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
 602     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
 603     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
 604
 605     set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f });
 606     set_values(weights, { -2.f, 2.f, 7.f, -0.5f });
 607     set_values(biases, { 1.0f });
 608
 609     topology topology(
 610         input_layout("input", input.get_layout()),
 611         reorder("reorder", "input", input.get_layout().with_padding(padding{ { 0, 0, 1, 2 }, 0 })),
 612         data("weights", weights),
 613         data("biases", biases),
 614         deconvolution("deconv", "reorder", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
 615     );
 616
 617     network network(engine, topology);
 618     network.set_input_data("input", input);
 619
 620     auto outputs = network.execute();
 621     EXPECT_EQ(outputs.size(), size_t(1));
 622     EXPECT_EQ(outputs.begin()->first, "deconv");
 623
 624     auto output_prim = outputs.begin()->second.get_memory();
 625
 626     auto output_ptr = output_prim.pointer<float>();
 627
 628     std::vector<float> expected_output_vec = {
 629         -3.f, 4.5f, 13.f, -17.f,
 630         .5f, 22.f, 5.f, -7.f
 631     };
 632
 633     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 634     {
 635         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 636     }
 637 }
 638
 639 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1_input_padding) {
 640     //  Filter : 2x2
 641     //  Input  : 2x2x1x1
 642     //  Output : 2x2x1x1
 643     //  Stride : 2x2
 644     //  Out Padding   : 1x1
 645     //  Input Padding : 2x1 (with reorder)
 646     //
 647     //  Input:
 648     //  8  0.5
 649     //  6  9
 650     //
 651     //  Filter
 652     //  f0:-2   2
 653     //  f0: 7  -0.5
 654     //  f1:-2   2
 655     //  f1: 7  -0.5
 656     //
 657     //  Bias
 658     //  1  5
 659     //
 660     //  Output:
 661     //  f0: -3   4.5
 662     //  f0: 13   -17
 663     //  f1: 1    8.5
 664     //  f1: 17 - 13
 665
 666     const auto& engine = get_test_engine();
 667
 668     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
 669     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 1, 2, 2 } });
 670     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
 671
 672     set_values(input, { 8.f, 0.5f, 6.f, 9.f });
 673     set_values(weights, { -2.f, -2.f, 2.f, 2.f, 7.f, 7.f, -0.5f, -0.5f });
 674     set_values(biases, { 1.0f, 5.0f });
 675
 676     topology topology(
 677         input_layout("input", input.get_layout()),
 678         reorder("reorder", "input", input.get_layout().with_padding(padding{ { 0, 0, 1, 2 }, 0 })),
 679         data("weights", weights),
 680         data("biases", biases),
 681         deconvolution("deconv", "reorder", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
 682     );
 683
 684     network network(engine, topology);
 685     network.set_input_data("input", input);
 686
 687     auto outputs = network.execute();
 688     EXPECT_EQ(outputs.size(), size_t(1));
 689     EXPECT_EQ(outputs.begin()->first, "deconv");
 690
 691     auto output_prim = outputs.begin()->second.get_memory();
 692
 693     auto output_ptr = output_prim.pointer<float>();
 694
 695     std::vector<float> expected_output_vec = {
 696         -3.f, 1.f, 4.5f, 8.5f,
 697         13.f, 17.f, -17.f, -13.f
 698     };
 699
 700     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 701     {
 702         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 703     }
 704 }
 705
 706 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_yxfb_stride2_pad1) {
 707     //  Filter : 2x2
 708     //  Input  : 2x2x1x2
 709     //  Output : 2x2x1x2
 710     //  Stride : 2x2
 711     //  Pad    : 1x1
 712     //
 713     //  Input:
 714     //  8  0.5    1   3
 715     //  6  9      2   4
 716     //
 717     //  Filter
 718     //  -2   2
 719     //   7  -0.5
 720     //
 721     //  Bias
 722     //  1
 723     //
 724     //  Output:
 725     //  -3    4.5    0.5   22
 726     //   13  -17     5    -7
 727
 728     const auto& engine = get_test_engine();
 729
 730     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 2, 2 } });
 731     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
 732     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 733
 734     set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f });
 735     set_values(weights, { -2.f, 2.f, 7.f, -0.5f });
 736     set_values(biases, { 1.0f });
 737
 738     topology topology(
 739         input_layout("input", input.get_layout()),
 740         data("weights", weights),
 741         data("biases", biases),
 742         deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
 743     );
 744
 745     network network(engine, topology);
 746     network.set_input_data("input", input);
 747
 748     auto outputs = network.execute();
 749     EXPECT_EQ(outputs.size(), size_t(1));
 750     EXPECT_EQ(outputs.begin()->first, "deconv");
 751
 752     auto output_prim = outputs.begin()->second.get_memory();
 753
 754     auto output_ptr = output_prim.pointer<float>();
 755
 756     std::vector<float> expected_output_vec = {
 757         -3.f, 4.5f, 13.f, -17.f,
 758         .5f, 22.f, 5.f, -7.f
 759     };
 760
 761     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 762     {
 763         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 764     }
 765 }
 766
 767 TEST(deconvolution_f16_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_yxfb_stride2_pad1) {
 768     //  Filter : 2x2
 769     //  Input  : 2x2x1x2
 770     //  Output : 2x2x1x2
 771     //  Stride : 2x2
 772     //  Pad    : 1x1
 773     //
 774     //  Input:
 775     //  8  0.5    1   3
 776     //  6  9      2   4
 777     //
 778     //  Filter
 779     //  -2   2
 780     //   7  -0.5
 781     //
 782     //  Bias
 783     //  1
 784     //
 785     //  Output:
 786     //  -3    4.5    0.5   22
 787     //   13  -17     5    -7
 788
 789     const auto& engine = get_test_engine();
 790
 791     auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 2, 1, 2, 2 } });
 792     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
 793     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
 794
 795     cldnn::build_options options;
 796     options.set_option(cldnn::build_option::optimize_data(true));
 797
 798     set_values(input, { FLOAT16(8.f), FLOAT16(0.5f), FLOAT16(6.f), FLOAT16(9.f),
 799         FLOAT16(1.f), FLOAT16(3.f), FLOAT16(2.f), FLOAT16(4.f) });
 800     set_values(weights, { -2.f, 2.f, 7.f, -0.5f});
 801     set_values(biases, { 1.0f });
 802
 803     topology topology(
 804         input_layout("input", input.get_layout()),
 805         data("weights", weights),
 806         data("biases", biases),
 807         deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
 808     );
 809
 810     network network(engine, topology, options);
 811     network.set_input_data("input", input);
 812
 813     auto outputs = network.execute();
 814     EXPECT_EQ(outputs.size(), size_t(1));
 815     EXPECT_EQ(outputs.begin()->first, "deconv");
 816
 817     auto output_prim = outputs.begin()->second.get_memory();
 818
 819     auto output_ptr = output_prim.pointer<uint16_t>();
 820
 821     std::vector<float> expected_output_vec = {
 822         -3.f, 4.5f, 13.f, -17.f,
 823         .5f, 22.f, 5.f, -7.f
 824     };
 825
 826     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 827     {
 828         EXPECT_FLOAT_EQ(expected_output_vec[i], float16_to_float32(output_ptr[i]));
 829     }
 830 }
 831
 832 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2) {
 833     //  Filter : 2x2x2x2
 834     //  Input  : 2x2x1x2
 835     //  Output : 2x2x1x2
 836     //  Stride : 2x2
 837     //  Pad    : 1x1
 838     //
 839     //  Input:
 840     //  8  0.5    1   3
 841     //  6  9      2   4
 842     //
 843     //  Filter1
 844     //  -2   2
 845     //   7  -0.5
 846     //
 847     //  Bias
 848     //  1
 849     //
 850     //  Filter2
 851     //  -4   1
 852     //  -9  -7
 853     //
 854     //  Bias
 855     //  -1
 856     //
 857     //  Output:
 858     //  -3    4.5    -8   -28
 859     //   13  -17     1    -17
 860
 861     const auto& engine = get_test_engine();
 862
 863     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 2, 2 } });
 864     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
 865     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
 866     auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
 867     auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
 868
 869     set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f });
 870     set_values(weights, { -2.f, 2.f, 7.f, -0.5f });
 871     set_values(biases, { 1.0f });
 872     set_values(weights2, { -4.f, 1.f, -9.f, -7.f });
 873     set_values(biases2, { -1.0f });
 874
 875     topology topology(
 876         input_layout("input", input.get_layout()),
 877         data("weights", weights),
 878         data("biases", biases),
 879         data("weights2", weights2),
 880         data("biases2", biases2),
 881         deconvolution("deconv", "input", { "weights", "weights2" }, { "biases", "biases2" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
 882     );
 883
 884     network network(engine, topology);
 885     network.set_input_data("input", input);
 886
 887     auto outputs = network.execute();
 888     EXPECT_EQ(outputs.size(), size_t(1));
 889     EXPECT_EQ(outputs.begin()->first, "deconv");
 890
 891     auto output_prim = outputs.begin()->second.get_memory();
 892
 893     auto output_ptr = output_prim.pointer<float>();
 894
 895     std::vector<float> expected_output_vec = {
 896         -3.f, 4.5f, 13.f, -17.f,
 897         -8.f, -28.f, 1.f, -17.f
 898     };
 899
 900     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 901     {
 902         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 903     }
 904 }
 905
 906 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group2) {
 907     //  data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2
 908
 909     engine engine;
 910
 911     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 2, 2 } });
 912     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
 913     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
 914
 915     set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f });
 916     set_values(weights, {
 917         -2.f, 2.f, 7.f, -0.5f,
 918         -4.f, 1.f, -9.f, -7.f
 919     });
 920     set_values(biases, { 1.0f, -1.0f });
 921
 922     topology topology(
 923         input_layout("input", input.get_layout()),
 924         data("weights", weights),
 925         data("biases", biases),
 926         deconvolution("deconv", "input", { "weights" }, { "biases" }, 2, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
 927     );
 928
 929     network network(engine, topology);
 930     network.set_input_data("input", input);
 931
 932     auto outputs = network.execute();
 933     EXPECT_EQ(outputs.size(), size_t(1));
 934     EXPECT_EQ(outputs.begin()->first, "deconv");
 935
 936     auto output_prim = outputs.begin()->second.get_memory();
 937
 938     auto output_ptr = output_prim.pointer<float>();
 939
 940     std::vector<float> expected_output_vec = {
 941         -3.f, 4.5f, 13.f, -17.f,
 942         -8.f, -28.f, 1.f, -17.f
 943     };
 944
 945     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 946     {
 947         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 948     }
 949 }
 950
 951 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2_depthwise_sep_opt) {
 952     //  Test for depthwise separable optimization, there are 16 weights and biases (split 16)
 953     //  data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2
 954
 955     const auto& engine = get_test_engine();
 956
 957     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 2, 2 } });
 958     set_values(input,
 959     { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
 960         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
 961         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
 962         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
 963         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
 964         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
 965         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
 966         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f
 967     });
 968
 969     topology topology(input_layout("input", input.get_layout()));
 970
 971     std::vector<primitive_id> weights_vec;
 972     std::vector<primitive_id> bias_vec;
 973
 974     for (uint32_t i = 0; i < 8; i++)
 975     {
 976         auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
 977         auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
 978         auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
 979         auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
 980
 981         set_values(weights, { -2.f, 2.f, 7.f, -0.5f });
 982         set_values(biases, { 1.0f });
 983         set_values(weights2, { -4.f, 1.f, -9.f, -7.f });
 984         set_values(biases2, { -1.0f });
 985
 986         primitive_id weights_id = "weights_" + std::to_string(i);
 987         primitive_id weights2_id = "weights2_" + std::to_string(i);
 988         primitive_id bias_id = "biases_" + std::to_string(i);
 989         primitive_id bias2_id = "biases2_" + std::to_string(i);
 990
 991         weights_vec.push_back(weights_id);
 992         weights_vec.push_back(weights2_id);
 993         bias_vec.push_back(bias_id);
 994         bias_vec.push_back(bias2_id);
 995
 996         topology.add(
 997             data(weights_id, weights),
 998             data(bias_id, biases),
 999             data(weights2_id, weights2),
1000             data(bias2_id, biases2)
1001             );
1002     }
1003
1004     topology.add(deconvolution("deconv", "input", weights_vec, bias_vec, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }));
1005
1006     network network(engine, topology);
1007     network.set_input_data("input", input);
1008
1009     auto outputs = network.execute();
1010     EXPECT_EQ(outputs.size(), size_t(1));
1011     EXPECT_EQ(outputs.begin()->first, "deconv");
1012
1013     auto output_prim = outputs.begin()->second.get_memory();
1014
1015     auto output_ptr = output_prim.pointer<float>();
1016
1017     std::vector<float> expected_output_vec = {
1018         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1019         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1020         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1021         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1022         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1023         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1024         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1025         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1026     };
1027
1028     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1029     {
1030         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1031     }
1032 }
1033
1034 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group16) {
1035     //  Test for depthwise separable optimization, there are 16 joined weights and biases (group 16)
1036     //  data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2_depthwise_sep_opt
1037
1038     engine engine;
1039
1040     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 2, 2 } });
1041     set_values(input,
1042     { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1043         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1044         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1045         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1046         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1047         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1048         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1049         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f
1050     });
1051
1052     topology topology(input_layout("input", input.get_layout()));
1053
1054     std::vector<primitive_id> weights_vec;
1055     std::vector<primitive_id> bias_vec;
1056
1057     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 16, 1, 2, 2 } });
1058     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 1, 1 } });
1059
1060     set_values(weights,
1061         {
1062             -2.f, 2.f, 7.f, -0.5f,
1063             -4.f, 1.f, -9.f, -7.f,
1064             -2.f, 2.f, 7.f, -0.5f,
1065             -4.f, 1.f, -9.f, -7.f,
1066             -2.f, 2.f, 7.f, -0.5f,
1067             -4.f, 1.f, -9.f, -7.f,
1068             -2.f, 2.f, 7.f, -0.5f,
1069             -4.f, 1.f, -9.f, -7.f,
1070             -2.f, 2.f, 7.f, -0.5f,
1071             -4.f, 1.f, -9.f, -7.f,
1072             -2.f, 2.f, 7.f, -0.5f,
1073             -4.f, 1.f, -9.f, -7.f,
1074             -2.f, 2.f, 7.f, -0.5f,
1075             -4.f, 1.f, -9.f, -7.f,
1076             -2.f, 2.f, 7.f, -0.5f,
1077             -4.f, 1.f, -9.f, -7.f
1078         }
1079     );
1080     set_values(biases, { 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f });
1081     topology.add(
1082         data("weights", weights),
1083         data("bias", biases)
1084     );
1085
1086     topology.add(deconvolution("deconv", "input", { "weights" }, { "bias" }, 16, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }));
1087
1088     network network(engine, topology);
1089     network.set_input_data("input", input);
1090
1091     auto outputs = network.execute();
1092     EXPECT_EQ(outputs.size(), size_t(1));
1093     EXPECT_EQ(outputs.begin()->first, "deconv");
1094
1095     auto output_prim = outputs.begin()->second.get_memory();
1096
1097     auto output_ptr = output_prim.pointer<float>();
1098
1099     std::vector<float> expected_output_vec = {
1100         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1101         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1102         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1103         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1104         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1105         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1106         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1107         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1108     };
1109
1110     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1111     {
1112         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1113     }
1114 }
1115
1116 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2_depthwise_sep_opt_ofm2) {
1117     //  Test for depthwise separable optimization, there are 16 weights and biases (split 16)
1118     //  data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2
1119
1120     const auto& engine = get_test_engine();
1121
1122     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 2, 2 } });
1123     set_values(input,
1124     { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1125         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1126         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1127         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1128         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1129         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1130         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1131         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f
1132     });
1133
1134     topology topology(input_layout("input", input.get_layout()));
1135
1136     std::vector<primitive_id> weights_vec;
1137     std::vector<primitive_id> bias_vec;
1138
1139     for (uint32_t i = 0; i < 8; i++)
1140     {
1141         auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
1142         auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
1143         auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
1144         auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
1145
1146         set_values(weights, { -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f });
1147         set_values(biases, { 1.0f, 1.0f });
1148         set_values(weights2, { -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f });
1149         set_values(biases2, { -1.0f, -1.0f });
1150
1151         primitive_id weights_id = "weights_" + std::to_string(i);
1152         primitive_id weights2_id = "weights2_" + std::to_string(i);
1153         primitive_id bias_id = "biases_" + std::to_string(i);
1154         primitive_id bias2_id = "biases2_" + std::to_string(i);
1155
1156         weights_vec.push_back(weights_id);
1157         weights_vec.push_back(weights2_id);
1158         bias_vec.push_back(bias_id);
1159         bias_vec.push_back(bias2_id);
1160
1161         topology.add(
1162             data(weights_id, weights),
1163             data(bias_id, biases),
1164             data(weights2_id, weights2),
1165             data(bias2_id, biases2)
1166         );
1167     }
1168
1169     topology.add(deconvolution("deconv", "input", weights_vec, bias_vec, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }));
1170
1171     network network(engine, topology);
1172     network.set_input_data("input", input);
1173
1174     auto outputs = network.execute();
1175     EXPECT_EQ(outputs.size(), size_t(1));
1176     EXPECT_EQ(outputs.begin()->first, "deconv");
1177
1178     auto output_prim = outputs.begin()->second.get_memory();
1179
1180     auto output_ptr = output_prim.pointer<float>();
1181
1182     std::vector<float> expected_output_vec = {
1183         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1184         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1185         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1186         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1187         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1188         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1189         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1190         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1191     };
1192
1193     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1194     {
1195         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1196     }
1197 }
1198
1199 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group16_ofm2) {
1200     //  Test for depthwise separable optimization, there are 16 joined weights and biases (group 16)
1201     //  data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2_depthwise_sep_opt_ofm2
1202
1203     engine engine;
1204
1205     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 2, 2 } });
1206     set_values(input,
1207     { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1208         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1209         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1210         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1211         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1212         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1213         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1214         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f
1215     });
1216
1217     topology topology(input_layout("input", input.get_layout()));
1218
1219     std::vector<primitive_id> weights_vec;
1220     std::vector<primitive_id> bias_vec;
1221
1222     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 32, 1, 2, 2 } });
1223     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 32, 1, 1 } });
1224
1225     set_values(weights,
1226         {
1227             -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1228             -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1229             -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1230             -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1231             -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1232             -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1233             -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1234             -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1235             -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1236             -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1237             -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1238             -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1239             -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1240             -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1241             -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1242             -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1243         }
1244     );
1245
1246     set_values(biases,
1247         {
1248             1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f,
1249             1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f
1250         }
1251     );
1252
1253     topology.add(
1254         data("weights", weights),
1255         data("bias", biases)
1256     );
1257
1258     topology.add(deconvolution("deconv", "input", { "weights" }, { "bias" }, 16, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }));
1259
1260     network network(engine, topology);
1261     network.set_input_data("input", input);
1262
1263     auto outputs = network.execute();
1264     EXPECT_EQ(outputs.size(), size_t(1));
1265     EXPECT_EQ(outputs.begin()->first, "deconv");
1266
1267     auto output_prim = outputs.begin()->second.get_memory();
1268
1269     auto output_ptr = output_prim.pointer<float>();
1270
1271     std::vector<float> expected_output_vec = {
1272         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1273         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1274         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1275         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1276         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1277         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1278         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1279         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1280     };
1281
1282     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1283     {
1284         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1285     }
1286 }
1287
1288 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_split2_ofm3) {
1289     //  Filter : 1x1
1290     //  Stride : 1x1
1291     //  Input  : 1x1x4
1292     //  Output : 1x1x6
1293     //
1294     //  Input:
1295     //  f0:  1.5
1296     //  f1:  0.5
1297     //
1298     //  f2:  2
1299     //  f3: -1.0
1300     //
1301     //  Filter1:
1302     //  -2   1   ofm=0
1303     //   1   3   ofm=1
1304     //   0.5 8   ofm=2
1305     //  Bias1:
1306     //   1   5   3
1307     //
1308     //  Filter2:
1309     //   4  -4   ofm=3
1310     //   2   0.5 ofm=4
1311     //  -0.5 3   ofm=5
1312     //
1313     //  Bias2:
1314     //  -1   2.5 2
1315     //
1316     //  Output:
1317     //  -1.5
1318     //   8
1319     //   7.75
1320     //
1321     //   11
1322     //   6
1323     //  -2
1324
1325     const auto& engine = get_test_engine();
1326
1327     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 4, 1, 1 } });
1328     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 3, 2, 1, 1 } });
1329     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 3, 1, 1 } });
1330     auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 3, 2, 1, 1 } });
1331     auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 3, 1, 1 } });
1332
1333     set_values(input, {
1334         1.5f, 0.5f, 2.0f, -1.0f
1335     });
1336     set_values(weights, { -2.0f, 1.0f, 1.0f, 3.0f, 0.5f, 8.0f });
1337     set_values(biases, { 1.0f, 5.0f, 3.0f });
1338     set_values(weights2, { 4.0f, -4.0f, 2.0f, 0.5f, -0.5f, 3.0f });
1339     set_values(biases2, { -1.0f, 2.5f, 2.0f });
1340
1341     topology topology(
1342         input_layout("input", input.get_layout()),
1343         data("weights", weights),
1344         data("biases", biases),
1345         data("weights2", weights2),
1346         data("biases2", biases2),
1347         deconvolution("deconv", "input", { "weights", "weights2" }, { "biases", "biases2" }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 })
1348     );
1349
1350     network network(engine, topology);
1351     network.set_input_data("input", input);
1352
1353     auto outputs = network.execute();
1354     EXPECT_EQ(outputs.size(), size_t(1));
1355     EXPECT_EQ(outputs.begin()->first, "deconv");
1356
1357     auto output_prim = outputs.begin()->second.get_memory();
1358
1359     auto output_ptr = output_prim.pointer<float>();
1360
1361     std::vector<float> expected_output_vec = {
1362         -1.5f, 8.0f, 7.75f, 11.0f, 6.0f, -2.0f
1363     };
1364     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1365     {
1366         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1367     }
1368 }
1369
1370 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_group2_ofm3) {
1371     //  data is similar as in basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_split2_ofm3
1372
1373     engine engine;
1374
1375     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 4, 1, 1 } });
1376     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 6, 2, 1, 1 } });
1377     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 6, 1, 1 } });
1378
1379     set_values(input, {
1380         1.5f, 0.5f, 2.0f, -1.0f
1381     });
1382     set_values(weights, {
1383         -2.0f, 1.0f, 1.0f, 3.0f, 0.5f, 8.0f,
1384         4.0f, -4.0f, 2.0f, 0.5f, -0.5f, 3.0f
1385     });
1386     set_values(biases, {
1387         1.0f, 5.0f, 3.0f,
1388         -1.0f, 2.5f, 2.0f
1389     });
1390
1391     topology topology(
1392         input_layout("input", input.get_layout()),
1393         data("weights", weights),
1394         data("biases", biases),
1395         deconvolution("deconv", "input", { "weights" }, { "biases" }, 2, { 1, 1, 1, 1 }, { 0, 0, 0, 0 })
1396     );
1397
1398     network network(engine, topology);
1399     network.set_input_data("input", input);
1400
1401     auto outputs = network.execute();
1402     EXPECT_EQ(outputs.size(), size_t(1));
1403     EXPECT_EQ(outputs.begin()->first, "deconv");
1404
1405     auto output_prim = outputs.begin()->second.get_memory();
1406
1407     auto output_ptr = output_prim.pointer<float>();
1408
1409     std::vector<float> expected_output_vec = {
1410         -1.5f, 8.0f, 7.75f, 11.0f, 6.0f, -2.0f
1411     };
1412     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1413     {
1414         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1415     }
1416 }
1417 TEST(deconvolution_f32_fw_gpu, basic3D_wsiz2x2x1_in1x1x2x2x1_nopad) {
1418     //  Filter : 2x2x1
1419     //  Input  : 2x2x1
1420     //  Output : 3x3x1
1421     //
1422     //  Input:
1423     //  8  0.5
1424     //  6  9
1425     //
1426     //  Filter
1427     //  -2   0.5
1428     //   3.5 1.5
1429     //
1430     //  Bias
1431     //  2
1432     //
1433     //  Output:
1434     //  -14    5     2.25
1435     //   18    0.75  7.25
1436     //   23    42.5  15.5
1437
1438     const auto& engine = get_test_engine();
1439
1440     auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 2, 2, 1 } });
1441     auto weights = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 2, 2, 1 } });
1442     auto biases = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 1, 1, 1 } });
1443
1444     set_values(input, { 8.f, 0.5f, 6.f, 9.f });
1445     set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
1446     set_values(biases, { 2.0f });
1447
1448     topology topology(
1449         input_layout("input", input.get_layout()),
1450         data("weights", weights),
1451         data("biases", biases),
1452         deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1,1,1,1,1 })
1453     );
1454
1455     network network(engine, topology);
1456     network.set_input_data("input", input);
1457
1458     auto outputs = network.execute();
1459     EXPECT_EQ(outputs.size(), size_t(1));
1460     EXPECT_EQ(outputs.begin()->first, "deconv");
1461
1462     auto output_prim = outputs.begin()->second.get_memory();
1463
1464     auto output_ptr = output_prim.pointer<float>();
1465
1466     std::vector<float> expected_output_vec = {
1467         -14.f, 5.f, 2.25f,
1468         18.f, 0.75f, 7.25f,
1469         23.f, 42.5f, 15.5f
1470     };
1471
1472     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1473     {
1474         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1475     }
1476 }
1477
1478 TEST(deconvolution_f32_fw_gpu, basic3D_wsiz3x3x3_in1x1x4x4x4_nopad) {
1479     //  Filter : 3x3x3
1480     //  Input  : 3x3x3
1481     //  Output : 6x6x6
1482     //
1483     //  Input:
1484     //  1  1  1  1
1485     //  1  1  1  1
1486     //  1  1  1  1
1487     //  1  1  1  1
1488     //
1489     //  1  1  1  1
1490     //  1  1  1  1
1491     //  1  1  1  1
1492     //  1  1  1  1
1493     //
1494     //  1  1  1  1
1495     //  1  1  1  1
1496     //  1  1  1  1
1497     //  1  1  1  1
1498     //
1499     //  1  1  1  1
1500     //  1  1  1  1
1501     //  1  1  1  1
1502     //  1  1  1  1
1503     //
1504     //
1505     //  Filter
1506     //  1  1  1
1507     //  1  1  1
1508     //  1  1  1
1509     //
1510     //  1  1  1
1511     //  1  1  1
1512     //  1  1  1
1513     //
1514     //  1  1  1
1515     //  1  1  1
1516     //  1  1  1
1517     //
1518     //
1519     //  Output:
1520     //
1521     //  1  2  3  3  2  1
1522     //  2  4  6  6  4  2
1523     //  3  6  9  9  6  3
1524     //  3  6  9  9  6  3
1525     //  2  4  6  6  4  2
1526     //  1  2  3  3  2  1
1527     //
1528     //  2   4   6   6   4  2
1529     //  4   8  12  12   8  4
1530     //  6  12  18  18  12  6
1531     //  6  12  18  18  12  6
1532     //  4   8  12  12   8  4
1533     //  2   4   6   6   4  2
1534     //
1535     //  3   6   9   9   6  3
1536     //  6  12  18  18  12  6
1537     //  9  18  27  27  18  9
1538     //  9  18  27  27  18  9
1539     //  6  12  18  18  12  6
1540     //  3   6   9   9   6  3
1541     //
1542     //  3   6   9   9   6  3
1543     //  6  12  18  18  12  6
1544     //  9  18  27  27  18  9
1545     //  9  18  27  27  18  9
1546     //  6  12  18  18  12  6
1547     //  3   6   9   9   6  3
1548     //
1549     //  2   4   6   6   4  2
1550     //  4   8  12  12   8  4
1551     //  6  12  18  18  12  6
1552     //  6  12  18  18  12  6
1553     //  4   8  12  12   8  4
1554     //  2   4   6   6   4  2
1555     //
1556     //  1  2  3  3  2  1
1557     //  2  4  6  6  4  2
1558     //  3  6  9  9  6  3
1559     //  3  6  9  9  6  3
1560     //  2  4  6  6  4  2
1561     //  1  2  3  3  2  1
1562     //
1563
1564     const auto& engine = get_test_engine();
1565
1566     auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 4, 4, 4 } });
1567     auto weights = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 3, 3, 3 } });
1568
1569     set_values(input,
1570     {
1571         1.0f, 1.0f, 1.0f, 1.0f,
1572         1.0f, 1.0f, 1.0f, 1.0f,
1573         1.0f, 1.0f, 1.0f, 1.0f,
1574         1.0f, 1.0f, 1.0f, 1.0f,
1575         1.0f, 1.0f, 1.0f, 1.0f,
1576         1.0f, 1.0f, 1.0f, 1.0f,
1577         1.0f, 1.0f, 1.0f, 1.0f,
1578         1.0f, 1.0f, 1.0f, 1.0f,
1579         1.0f, 1.0f, 1.0f, 1.0f,
1580         1.0f, 1.0f, 1.0f, 1.0f,
1581         1.0f, 1.0f, 1.0f, 1.0f,
1582         1.0f, 1.0f, 1.0f, 1.0f,
1583         1.0f, 1.0f, 1.0f, 1.0f,
1584         1.0f, 1.0f, 1.0f, 1.0f,
1585         1.0f, 1.0f, 1.0f, 1.0f,
1586         1.0f, 1.0f, 1.0f, 1.0f
1587     });
1588     set_values(weights, {
1589         1.0f,  1.0f, 1.0f,
1590         1.0f,  1.0f, 1.0f,
1591         1.0f,  1.0f, 1.0f,
1592         1.0f,  1.0f, 1.0f,
1593         1.0f,  1.0f, 1.0f,
1594         1.0f,  1.0f, 1.0f,
1595         1.0f,  1.0f, 1.0f,
1596         1.0f,  1.0f, 1.0f,
1597         1.0f,  1.0f, 1.0f
1598     });
1599
1600     topology topology(
1601         input_layout("input", input.get_layout()),
1602         data("weights", weights),
1603         deconvolution("deconv", "input", { "weights" })
1604     );
1605
1606     network network(engine, topology);
1607     network.set_input_data("input", input);
1608
1609     auto outputs = network.execute();
1610     EXPECT_EQ(outputs.size(), size_t(1));
1611     EXPECT_EQ(outputs.begin()->first, "deconv");
1612
1613     auto output_prim = outputs.begin()->second.get_memory();
1614
1615     auto output_ptr = output_prim.pointer<float>();
1616
1617     std::vector<float> expected_output_vec = {
1618         1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f,
1619         2.0f, 4.0f, 6.0f, 6.0f, 4.0f, 2.0f,
1620         3.0f, 6.0f, 9.0f, 9.0f, 6.0f, 3.0f,
1621         3.0f, 6.0f, 9.0f, 9.0f, 6.0f, 3.0f,
1622         2.0f, 4.0f, 6.0f, 6.0f, 4.0f, 2.0f,
1623         1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f,
1624
1625         2.0f, 4.0f, 6.0f, 6.0f, 4.0f, 2.0f,
1626         4.0f, 8.0f, 12.0f, 12.0f, 8.0f, 4.0f,
1627         6.0f, 12.0f, 18.0f, 18.0f, 12.0f, 6.0f,
1628         6.0f, 12.0f, 18.0f, 18.0f, 12.0f, 6.0f,
1629         4.0f, 8.0f, 12.0f, 12.0f, 8.0f, 4.0f,
1630         2.0f, 4.0f, 6.0f, 6.0f, 4.0f, 2.0f,
1631
1632         3.0f, 6.0f, 9.0f, 9.0f, 6.0f, 3.0f,
1633         6.0f, 12.0f, 18.0f, 18.0f, 12.0f, 6.0f,
1634         9.0f, 18.0f, 27.0f, 27.0f, 18.0f, 9.0f,
1635         9.0f, 18.0f, 27.0f, 27.0f, 18.0f, 9.0f,
1636         6.0f, 12.0f, 18.0f, 18.0f, 12.0f, 6.0f,
1637         3.0f, 6.0f, 9.0f, 9.0f, 6.0f, 3.0f,
1638
1639         3.0f, 6.0f, 9.0f, 9.0f, 6.0f, 3.0f,
1640         6.0f, 12.0f, 18.0f, 18.0f, 12.0f, 6.0f,
1641         9.0f, 18.0f, 27.0f, 27.0f, 18.0f, 9.0f,
1642         9.0f, 18.0f, 27.0f, 27.0f, 18.0f, 9.0f,
1643         6.0f, 12.0f, 18.0f, 18.0f, 12.0f, 6.0f,
1644         3.0f, 6.0f, 9.0f, 9.0f, 6.0f, 3.0f,
1645
1646         2.0f, 4.0f, 6.0f, 6.0f, 4.0f, 2.0f,
1647         4.0f, 8.0f, 12.0f, 12.0f, 8.0f, 4.0f,
1648         6.0f, 12.0f, 18.0f, 18.0f, 12.0f, 6.0f,
1649         6.0f, 12.0f, 18.0f, 18.0f, 12.0f, 6.0f,
1650         4.0f, 8.0f, 12.0f, 12.0f, 8.0f, 4.0f,
1651         2.0f, 4.0f, 6.0f, 6.0f, 4.0f, 2.0f,
1652
1653         1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f,
1654         2.0f, 4.0f, 6.0f, 6.0f, 4.0f, 2.0f,
1655         3.0f, 6.0f, 9.0f, 9.0f, 6.0f, 3.0f,
1656         3.0f, 6.0f, 9.0f, 9.0f, 6.0f, 3.0f,
1657         2.0f, 4.0f, 6.0f, 6.0f, 4.0f, 2.0f,
1658         1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f,
1659     };
1660
1661     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1662     {
1663         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1664     }
1665 }
1666
1667 TEST(deconvolution_f32_fw_gpu, basic3D_wsiz2x2x2_in1x1x2x2x2_stride2_nopad) {
1668     //  Filter : 2x2x2
1669     //  Input  : 2x2x2
1670     //  Output : 1x1
1671     //  Stride : 2x2x2
1672     //
1673     //  Input:
1674     //  8  0.5
1675     //  6  9
1676     //  8  0.5
1677     //  6  9
1678     //
1679     //  Filter
1680     //  -2   0.5
1681     //   3.5 1.5
1682     //  -2   0.5
1683     //   3.5 1.5
1684     //
1685
1686     const auto& engine = get_test_engine();
1687
1688     auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 2, 2, 2 } });
1689     auto weights = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 2, 2, 2 } });
1690
1691     set_values(input, { 8.f, 0.5f, 6.f, 9.f, 8.f, 0.5f, 6.f, 9.f });
1692     set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f, -2.0f, 0.5f, 3.5f, 1.5f });
1693     //set_values(input, { 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f });
1694     //set_values(weights, { 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f });
1695
1696     topology topology(
1697         input_layout("input", input.get_layout()),
1698         data("weights", weights),
1699         deconvolution("deconv", "input", { "weights" }, { 1,1,2,2,2 })
1700     );
1701
1702     network network(engine, topology);
1703     network.set_input_data("input", input);
1704
1705     auto outputs = network.execute();
1706     EXPECT_EQ(outputs.size(), size_t(1));
1707     EXPECT_EQ(outputs.begin()->first, "deconv");
1708
1709     auto output_prim = outputs.begin()->second.get_memory();
1710     auto output_ptr = output_prim.pointer<float>();
1711
1712     std::vector<float> expected_output_vec = {
1713         -16.f, 4.f, -1.f, 0.25f,
1714         28.f, 12.f, 1.75f, 0.75f,
1715         -12.f, 3.f, -18.f, 4.5f,
1716         21.f, 9.f, 31.5f, 13.5f,
1717         -16.f, 4.f, -1.f, 0.25f,
1718         28.f, 12.f, 1.75f, 0.75f,
1719         -12.f, 3.f, -18.f, 4.5f,
1720         21.f, 9.f, 31.5f, 13.5f,
1721         -16.f, 4.f, -1.f, 0.25f,
1722         28.f, 12.f, 1.75f, 0.75f,
1723         -12.f, 3.f, -18.f, 4.5f,
1724         21.f, 9.f, 31.5f, 13.5f,
1725         -16.f, 4.f, -1.f, 0.25f,
1726         28.f, 12.f, 1.75f, 0.75f,
1727         -12.f, 3.f, -18.f, 4.5f,
1728         21.f, 9.f, 31.5f, 13.5f
1729     };
1730
1731     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1732     {
1733         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1734     }
1735 }
1736
1737 TEST(deconvolution_f32_fw_gpu, basic3D_wsiz2x2x2_in1x1x2x2x2_stride2_pad1) {
1738     //  Filter : 2x2x2
1739     //  Input  : 2x2x2
1740     //  Output : 1x1
1741     //  Stride : 2x2x2
1742     //
1743     //  Input:
1744     //  8  0.5
1745     //  6  9
1746     //  8  0.5
1747     //  6  9
1748     //
1749     //  Filter
1750     //  -2   0.5
1751     //   3.5 1.5
1752     //  -2   0.5
1753     //   3.5 1.5
1754     //
1755     //  Output:
1756     //  12 1.75
1757     //   3 -18
1758     //  12 1.75
1759     //   3 -18
1760
1761     const auto& engine = get_test_engine();
1762
1763     auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 2, 2, 2 } });
1764     auto weights = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 2, 2, 2 } });
1765
1766     set_values(input, { 8.f, 0.5f, 6.f, 9.f, 8.f, 0.5f, 6.f, 9.f });
1767     set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f, -2.0f, 0.5f, 3.5f, 1.5f });
1768
1769     topology topology(
1770         input_layout("input", input.get_layout()),
1771         data("weights", weights),
1772         deconvolution("deconv", "input", { "weights" }, { 1,1,2,2,2 }, { 0, 0, -1, -1, -1 })
1773     );
1774
1775     network network(engine, topology);
1776     network.set_input_data("input", input);
1777
1778     auto outputs = network.execute();
1779     EXPECT_EQ(outputs.size(), size_t(1));
1780     EXPECT_EQ(outputs.begin()->first, "deconv");
1781
1782     auto output_prim = outputs.begin()->second.get_memory();
1783     auto output_ptr = output_prim.pointer<float>();
1784
1785     std::vector<float> expected_output_vec = {
1786         12.f, 1.75f, 3.f, -18.f,
1787         12.f, 1.75f, 3.f, -18.f
1788     };
1789
1790     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1791     {
1792         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1793     }
1794
1795 }