inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 ///////////////////////////////////////////////////////////////////////////////////////////////////
  18
  19 #include <gtest/gtest.h>
  20 #include "api/CPP/memory.hpp"
  21 #include <api/CPP/input_layout.hpp>
  22 #include "api/CPP/deconvolution.hpp"
  23 #include <api/CPP/data.hpp>
  24 #include <api/CPP/topology.hpp>
  25 #include <api/CPP/network.hpp>
  26 #include <api/CPP/engine.hpp>
  27 #include "test_utils/test_utils.h"
  28 #include "test_utils/float16.h"
  29 #include "api/CPP/reorder.hpp"
  30
  31
  32 using namespace cldnn;
  33 using namespace tests;
  34
  35 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad) {
  36     //  Filter : 2x2
  37     //  Input  : 2x2
  38     //  Output : 3x3
  39     //
  40     //  Input:
  41     //  8  0.5
  42     //  6  9
  43     //
  44     //  Filter
  45     //  -2   0.5
  46     //   3.5 1.5
  47     //
  48     //  Bias
  49     //  2
  50     //
  51     //  Output:
  52     //  -14    5     2.25
  53     //   18    0.75  7.25
  54     //   23    42.5  15.5
  55
  56     const auto& engine = get_test_engine();
  57
  58     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
  59     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
  60     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
  61
  62     set_values(input, { 8.f, 0.5f, 6.f, 9.f });
  63     set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
  64     set_values(biases, { 2.0f });
  65
  66     topology topology(
  67         input_layout("input", input.get_layout()),
  68         data("weights", weights),
  69         data("biases", biases),
  70         deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1,1,1,1 })
  71     );
  72
  73     network network(engine, topology);
  74     network.set_input_data("input", input);
  75
  76     auto outputs = network.execute();
  77     EXPECT_EQ(outputs.size(), size_t(1));
  78     EXPECT_EQ(outputs.begin()->first, "deconv");
  79
  80     auto output_prim = outputs.begin()->second.get_memory();
  81
  82     auto output_ptr = output_prim.pointer<float>();
  83
  84     std::vector<float> expected_output_vec = {
  85         -14.f, 5.f, 2.25f,
  86         18.f, 0.75f, 7.25f,
  87         23.f, 42.5f, 15.5f
  88     };
  89
  90     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
  91     {
  92         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
  93     }
  94 }
  95
  96
  97 TEST(deconvolution_f32_fw_gpu, no_bias_basic_wsiz2x2_in2x2x1x1_nopad) {
  98     //  Filter : 2x2
  99     //  Input  : 2x2
 100     //  Output : 3x3
 101     //
 102     //  Input:
 103     //  8  0.5
 104     //  6  9
 105     //
 106     //  Filter
 107     //  -2   0.5
 108     //   3.5 1.5
 109     //
 110     //  no bias
 111     //
 112     //
 113     //  Output:
 114     //  -14    5     2.25
 115     //   18    0.75  7.25
 116     //   23    42.5  15.5
 117
 118     const auto& engine = get_test_engine();
 119
 120     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
 121     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
 122
 123     set_values(input, { 8.f, 0.5f, 6.f, 9.f });
 124     set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
 125
 126     topology topology(
 127         input_layout("input", input.get_layout()),
 128         data("weights", weights),
 129         deconvolution("deconv", "input", { "weights" })
 130     );
 131
 132     network network(engine, topology);
 133     network.set_input_data("input", input);
 134
 135     auto outputs = network.execute();
 136     EXPECT_EQ(outputs.size(), size_t(1));
 137     EXPECT_EQ(outputs.begin()->first, "deconv");
 138
 139     auto output_prim = outputs.begin()->second.get_memory();
 140
 141     auto output_ptr = output_prim.pointer<float>();
 142
 143     std::vector<float> expected_output_vec = {
 144         -16.f, 3.f, 0.25f,
 145         16.f, -1.25f, 5.25f,
 146         21.f, 40.5f, 13.5f
 147     };
 148
 149     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 150     {
 151         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 152     }
 153 }
 154
 155
 156 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad_bfyx) {    //  Filter : 2x2
 157     //  Input  : 2x2
 158     //  Output : 3x3
 159     //
 160     //  Input:
 161     //  8  0.5
 162     //  6  9
 163     //
 164     //  Filter
 165     //  -2   0.5
 166     //   3.5 1.5
 167     //
 168     //  Bias
 169     //  2
 170     //
 171     //  Output:
 172     //  -14    5     2.25
 173     //   18    0.75  7.25
 174     //   23    42.5  15.5
 175
 176     const auto& engine = get_test_engine();
 177
 178     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
 179     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
 180     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 181
 182     set_values(input, { 8.f, 0.5f, 6.f, 9.f });
 183     set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
 184     set_values(biases, { 2.0f });
 185
 186     topology topology(
 187         input_layout("input", input.get_layout()),
 188         data("weights", weights),
 189         data("biases", biases),
 190         deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1,1,1,1 })
 191     );
 192
 193     network network(engine, topology);
 194     network.set_input_data("input", input);
 195
 196     auto outputs = network.execute();
 197     EXPECT_EQ(outputs.size(), size_t(1));
 198     EXPECT_EQ(outputs.begin()->first, "deconv");
 199
 200     auto output_prim = outputs.begin()->second.get_memory();
 201
 202     auto output_ptr = output_prim.pointer<float>();
 203
 204     std::vector<float> expected_output_vec = {
 205         -14.f, 5.f, 2.25f,
 206         18.f, 0.75f, 7.25f,
 207         23.f, 42.5f, 15.5f
 208     };
 209
 210     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 211     {
 212         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 213     }
 214 }
 215
 216 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_pad1) {
 217     //  Filter : 2x2
 218     //  Input  : 2x2
 219     //  Output : 1x1
 220     //  Pad    : 1x1
 221     //
 222     //  Input:
 223     //  8  0.5
 224     //  6  9
 225     //
 226     //  Filter
 227     //  -2   0.5
 228     //   3.5 1.5
 229     //
 230     //  Bias
 231     //  2
 232     //
 233     //  Output:
 234     //  0.75
 235
 236     const auto& engine = get_test_engine();
 237
 238     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
 239     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
 240     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 241
 242     set_values(input, { 8.f, 0.5f, 6.f, 9.f });
 243     set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
 244     set_values(biases, { 2.0f });
 245
 246     topology topology(
 247         input_layout("input", input.get_layout()),
 248         data("weights", weights),
 249         data("biases", biases),
 250         deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 1, 1 }, { 0, 0, -1, -1 })
 251     );
 252
 253     network network(engine, topology);
 254     network.set_input_data("input", input);
 255
 256     auto outputs = network.execute();
 257     EXPECT_EQ(outputs.size(), size_t(1));
 258     EXPECT_EQ(outputs.begin()->first, "deconv");
 259
 260     auto output_prim = outputs.begin()->second.get_memory();
 261
 262     auto output_ptr = output_prim.pointer<float>();
 263
 264     EXPECT_FLOAT_EQ(0.75f, output_ptr[0]);
 265 }
 266
 267 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride2_nopad) {
 268     //  Filter : 2x2
 269     //  Input  : 2x2
 270     //  Output : 1x1
 271     //  Stride : 2x2
 272     //
 273     //  Input:
 274     //  8  0.5
 275     //  6  9
 276     //
 277     //  Filter
 278     //  -2   0.5
 279     //   3.5 1.5
 280     //
 281     //  Bias
 282     //  1
 283     //
 284     //  Output:
 285     //  0.75
 286
 287     const auto& engine = get_test_engine();
 288
 289     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
 290     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
 291     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 292
 293     set_values(input, { 8.f, 0.5f, 6.f, 9.f });
 294     set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
 295     set_values(biases, { 1.0f });
 296
 297     topology topology(
 298         input_layout("input", input.get_layout()),
 299         data("weights", weights),
 300         data("biases", biases),
 301         deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1,1,2,2 })
 302     );
 303
 304     network network(engine, topology);
 305     network.set_input_data("input", input);
 306
 307     auto outputs = network.execute();
 308     EXPECT_EQ(outputs.size(), size_t(1));
 309     EXPECT_EQ(outputs.begin()->first, "deconv");
 310
 311     auto output_prim = outputs.begin()->second.get_memory();
 312
 313     auto output_ptr = output_prim.pointer<float>();
 314
 315     std::vector<float> expected_output_vec = {
 316         -15.f, 5.f, 0.f, 1.25f,
 317         29.f, 13.f, 2.75f, 1.75,
 318         -11.f, 4.f, -17.f, 5.5f,
 319         22.f, 10.f, 32.5f, 14.5f
 320     };
 321
 322     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 323     {
 324         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 325     }
 326 }
 327
 328 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride4_pad2) {
 329     //  Filter : 3x3
 330     //  Input  : 2x2
 331     //  Output : 1x1
 332     //  Stride : 4x4
 333     //  Pad    : 2x2
 334     //
 335     //  Input:
 336     //  8  0.5
 337     //  6  9
 338     //
 339     //  Filter
 340     //  -2   0.5   1
 341     //   3.5 1.5   2
 342     //   3   4     5
 343     //
 344     //  Bias
 345     //  0
 346     //
 347     //  Output:
 348     //  40   0    1.5
 349     //  0    0    0
 350     //  6    0   -18
 351
 352     const auto& engine = get_test_engine();
 353
 354     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
 355     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } });
 356     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 357
 358     set_values(input, { 8.f, 0.5f, 6.f, 9.f });
 359     set_values(weights, { -2.0f, 0.5f, 1.f, 3.5f, 1.5f, 2.f, 3.f, 4.f, 5.f });
 360     set_values(biases, { 0.0f });
 361
 362     topology topology(
 363         input_layout("input", input.get_layout()),
 364         data("weights", weights),
 365         data("biases", biases),
 366         deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 4, 4 }, { 0, 0, -2, -2 })
 367     );
 368
 369     network network(engine, topology);
 370     network.set_input_data("input", input);
 371
 372     auto outputs = network.execute();
 373     EXPECT_EQ(outputs.size(), size_t(1));
 374     EXPECT_EQ(outputs.begin()->first, "deconv");
 375
 376     auto output_prim = outputs.begin()->second.get_memory();
 377
 378     auto output_ptr = output_prim.pointer<float>();
 379
 380     std::vector<float> expected_output_vec = {
 381         40.f, 0.f, 1.5f,
 382         0.f, 0.f, 0.f,
 383         6.f, 0.f, -18.f
 384     };
 385
 386     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 387     {
 388         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 389     }
 390 }
 391
 392 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_stride2_pad1) {
 393     //  Filter : 2x2
 394     //  Input  : 2x2x1x2
 395     //  Output : 2x2x1x2
 396     //  Stride : 2x2
 397     //  Pad    : 1x1
 398     //
 399     //  Input:
 400     //  8  0.5    1   3
 401     //  6  9      2   4
 402     //
 403     //  Filter
 404     //  -2   2
 405     //   7  -0.5
 406     //
 407     //  Bias
 408     //  1
 409     //
 410     //  Output:
 411     //  -3    4.5    0.5   22
 412     //   13  -17     5    -7
 413
 414     const auto& engine = get_test_engine();
 415
 416     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 1, 2, 2 } });
 417     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
 418     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 419
 420     set_values(input, { 8.f, 1.f, 0.5f, 3.f, 6.f, 2.f, 9.f, 4.f });
 421     set_values(weights, { -2.f, 2.f, 7.f, -0.5f });
 422     set_values(biases, { 1.0f });
 423
 424     topology topology(
 425         input_layout("input", input.get_layout()),
 426         data("weights", weights),
 427         data("biases", biases),
 428         deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
 429     );
 430
 431     network network(engine, topology);
 432     network.set_input_data("input", input);
 433
 434     auto outputs = network.execute();
 435     EXPECT_EQ(outputs.size(), size_t(1));
 436     EXPECT_EQ(outputs.begin()->first, "deconv");
 437
 438     auto output_prim = outputs.begin()->second.get_memory();
 439
 440     auto output_ptr = output_prim.pointer<float>();
 441
 442     std::vector<float> expected_output_vec = {
 443         -3.f, 0.5f, 4.5f, 22.f,
 444         13.f, 5.f, -17.f, -7.f
 445     };
 446
 447     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 448     {
 449         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 450     }
 451 }
 452
 453 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1) {
 454     //  Filter : 2x2
 455     //  Input  : 2x2x1x1
 456     //  Output : 2x2x1x1
 457     //  Stride : 2x2
 458     //  Pad    : 1x1
 459     //
 460     //  Input:
 461     //  8  0.5
 462     //  6  9
 463     //
 464     //  Filter
 465     //  f0:-2   2
 466     //  f0: 7  -0.5
 467     //  f1:-2   2
 468     //  f1: 7  -0.5
 469     //
 470     //  Bias
 471     //  1  5
 472     //
 473     //  Output:
 474     //  f0: -3   4.5
 475     //  f0: 13   -17
 476     //  f1: 1    8.5
 477     //  f1: 17 - 13
 478
 479     const auto& engine = get_test_engine();
 480
 481     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
 482     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 1, 2, 2 } });
 483     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 1 } });
 484
 485     set_values(input, { 8.f, 0.5f, 6.f, 9.f });
 486     set_values(weights, { -2.f, -2.f, 2.f, 2.f, 7.f, 7.f, -0.5f, -0.5f });
 487     set_values(biases, { 1.0f, 5.0f });
 488
 489     topology topology(
 490         input_layout("input", input.get_layout()),
 491         data("weights", weights),
 492         data("biases", biases),
 493         deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
 494     );
 495
 496     network network(engine, topology);
 497     network.set_input_data("input", input);
 498
 499     auto outputs = network.execute();
 500     EXPECT_EQ(outputs.size(), size_t(1));
 501     EXPECT_EQ(outputs.begin()->first, "deconv");
 502
 503     auto output_prim = outputs.begin()->second.get_memory();
 504
 505     auto output_ptr = output_prim.pointer<float>();
 506
 507     std::vector<float> expected_output_vec = {
 508         -3.f, 1.f, 4.5f, 8.5f,
 509         13.f, 17.f, -17.f, -13.f
 510     };
 511
 512     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 513     {
 514         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 515     }
 516 }
 517
 518 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad1) {
 519     //  Filter : 2x2
 520     //  Input  : 2x2x1x2
 521     //  Output : 2x2x1x2
 522     //  Stride : 2x2
 523     //  Pad    : 1x1
 524     //
 525     //  Input:
 526     //  8  0.5    1   3
 527     //  6  9      2   4
 528     //
 529     //  Filter
 530     //  -2   2
 531     //   7  -0.5
 532     //
 533     //  Bias
 534     //  1
 535     //
 536     //  Output:
 537     //  -3    4.5    0.5   22
 538     //   13  -17     5    -7
 539
 540     const auto& engine = get_test_engine();
 541
 542     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 2, 2 } });
 543     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
 544     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 545
 546     set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f });
 547     set_values(weights, { -2.f, 2.f, 7.f, -0.5f });
 548     set_values(biases, { 1.0f });
 549
 550     topology topology(
 551         input_layout("input", input.get_layout()),
 552         data("weights", weights),
 553         data("biases", biases),
 554         deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
 555     );
 556
 557     network network(engine, topology);
 558     network.set_input_data("input", input);
 559
 560     auto outputs = network.execute();
 561     EXPECT_EQ(outputs.size(), size_t(1));
 562     EXPECT_EQ(outputs.begin()->first, "deconv");
 563
 564     auto output_prim = outputs.begin()->second.get_memory();
 565
 566     auto output_ptr = output_prim.pointer<float>();
 567
 568     std::vector<float> expected_output_vec = {
 569         -3.f, 4.5f, 13.f, -17.f,
 570         .5f, 22.f, 5.f, -7.f
 571     };
 572
 573     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 574     {
 575         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 576     }
 577 }
 578
 579 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad1_input_padding) {
 580     //  Filter : 2x2
 581     //  Input  : 2x2x1x2
 582     //  Output : 2x2x1x2
 583     //  Stride : 2x2
 584     //  Out Padding   : 1x1
 585     //  Input Padding : 2x1 (with reorder)
 586     //
 587     //  Input:
 588     //  8  0.5    1   3
 589     //  6  9      2   4
 590     //
 591     //  Filter
 592     //  -2   2
 593     //   7  -0.5
 594     //
 595     //  Bias
 596     //  1
 597     //
 598     //  Output:
 599     //  -3    4.5    0.5   22
 600     //   13  -17     5    -7
 601
 602     const auto& engine = get_test_engine();
 603
 604     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
 605     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
 606     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
 607
 608     set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f });
 609     set_values(weights, { -2.f, 2.f, 7.f, -0.5f });
 610     set_values(biases, { 1.0f });
 611
 612     topology topology(
 613         input_layout("input", input.get_layout()),
 614         reorder("reorder", "input", input.get_layout().with_padding({ { 0, 0, 1, 2 }, 0 })),
 615         data("weights", weights),
 616         data("biases", biases),
 617         deconvolution("deconv", "reorder", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
 618     );
 619
 620     network network(engine, topology);
 621     network.set_input_data("input", input);
 622
 623     auto outputs = network.execute();
 624     EXPECT_EQ(outputs.size(), size_t(1));
 625     EXPECT_EQ(outputs.begin()->first, "deconv");
 626
 627     auto output_prim = outputs.begin()->second.get_memory();
 628
 629     auto output_ptr = output_prim.pointer<float>();
 630
 631     std::vector<float> expected_output_vec = {
 632         -3.f, 4.5f, 13.f, -17.f,
 633         .5f, 22.f, 5.f, -7.f
 634     };
 635
 636     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 637     {
 638         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 639     }
 640 }
 641
 642 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1_input_padding) {
 643     //  Filter : 2x2
 644     //  Input  : 2x2x1x1
 645     //  Output : 2x2x1x1
 646     //  Stride : 2x2
 647     //  Out Padding   : 1x1
 648     //  Input Padding : 2x1 (with reorder)
 649     //
 650     //  Input:
 651     //  8  0.5
 652     //  6  9
 653     //
 654     //  Filter
 655     //  f0:-2   2
 656     //  f0: 7  -0.5
 657     //  f1:-2   2
 658     //  f1: 7  -0.5
 659     //
 660     //  Bias
 661     //  1  5
 662     //
 663     //  Output:
 664     //  f0: -3   4.5
 665     //  f0: 13   -17
 666     //  f1: 1    8.5
 667     //  f1: 17 - 13
 668
 669     const auto& engine = get_test_engine();
 670
 671     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
 672     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 1, 2, 2 } });
 673     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
 674
 675     set_values(input, { 8.f, 0.5f, 6.f, 9.f });
 676     set_values(weights, { -2.f, -2.f, 2.f, 2.f, 7.f, 7.f, -0.5f, -0.5f });
 677     set_values(biases, { 1.0f, 5.0f });
 678
 679     topology topology(
 680         input_layout("input", input.get_layout()),
 681         reorder("reorder", "input", input.get_layout().with_padding({ { 0, 0, 1, 2 }, 0 })),
 682         data("weights", weights),
 683         data("biases", biases),
 684         deconvolution("deconv", "reorder", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
 685     );
 686
 687     network network(engine, topology);
 688     network.set_input_data("input", input);
 689
 690     auto outputs = network.execute();
 691     EXPECT_EQ(outputs.size(), size_t(1));
 692     EXPECT_EQ(outputs.begin()->first, "deconv");
 693
 694     auto output_prim = outputs.begin()->second.get_memory();
 695
 696     auto output_ptr = output_prim.pointer<float>();
 697
 698     std::vector<float> expected_output_vec = {
 699         -3.f, 1.f, 4.5f, 8.5f,
 700         13.f, 17.f, -17.f, -13.f
 701     };
 702
 703     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 704     {
 705         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 706     }
 707 }
 708
 709 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_yxfb_stride2_pad1) {
 710     //  Filter : 2x2
 711     //  Input  : 2x2x1x2
 712     //  Output : 2x2x1x2
 713     //  Stride : 2x2
 714     //  Pad    : 1x1
 715     //
 716     //  Input:
 717     //  8  0.5    1   3
 718     //  6  9      2   4
 719     //
 720     //  Filter
 721     //  -2   2
 722     //   7  -0.5
 723     //
 724     //  Bias
 725     //  1
 726     //
 727     //  Output:
 728     //  -3    4.5    0.5   22
 729     //   13  -17     5    -7
 730
 731     const auto& engine = get_test_engine();
 732
 733     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 2, 2 } });
 734     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
 735     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 736
 737     set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f });
 738     set_values(weights, { -2.f, 2.f, 7.f, -0.5f });
 739     set_values(biases, { 1.0f });
 740
 741     topology topology(
 742         input_layout("input", input.get_layout()),
 743         data("weights", weights),
 744         data("biases", biases),
 745         deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
 746     );
 747
 748     network network(engine, topology);
 749     network.set_input_data("input", input);
 750
 751     auto outputs = network.execute();
 752     EXPECT_EQ(outputs.size(), size_t(1));
 753     EXPECT_EQ(outputs.begin()->first, "deconv");
 754
 755     auto output_prim = outputs.begin()->second.get_memory();
 756
 757     auto output_ptr = output_prim.pointer<float>();
 758
 759     std::vector<float> expected_output_vec = {
 760         -3.f, 4.5f, 13.f, -17.f,
 761         .5f, 22.f, 5.f, -7.f
 762     };
 763
 764     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 765     {
 766         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 767     }
 768 }
 769
 770 TEST(deconvolution_f16_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_yxfb_stride2_pad1) {
 771     //  Filter : 2x2
 772     //  Input  : 2x2x1x2
 773     //  Output : 2x2x1x2
 774     //  Stride : 2x2
 775     //  Pad    : 1x1
 776     //
 777     //  Input:
 778     //  8  0.5    1   3
 779     //  6  9      2   4
 780     //
 781     //  Filter
 782     //  -2   2
 783     //   7  -0.5
 784     //
 785     //  Bias
 786     //  1
 787     //
 788     //  Output:
 789     //  -3    4.5    0.5   22
 790     //   13  -17     5    -7
 791
 792     const auto& engine = get_test_engine();
 793
 794     auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 2, 1, 2, 2 } });
 795     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
 796     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
 797
 798     cldnn::build_options options;
 799     options.set_option(cldnn::build_option::optimize_data(true));
 800
 801     set_values(input, { FLOAT16(8.f), FLOAT16(0.5f), FLOAT16(6.f), FLOAT16(9.f),
 802         FLOAT16(1.f), FLOAT16(3.f), FLOAT16(2.f), FLOAT16(4.f) });
 803     set_values(weights, { -2.f, 2.f, 7.f, -0.5f});
 804     set_values(biases, { 1.0f });
 805
 806     topology topology(
 807         input_layout("input", input.get_layout()),
 808         data("weights", weights),
 809         data("biases", biases),
 810         deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
 811     );
 812
 813     network network(engine, topology, options);
 814     network.set_input_data("input", input);
 815
 816     auto outputs = network.execute();
 817     EXPECT_EQ(outputs.size(), size_t(1));
 818     EXPECT_EQ(outputs.begin()->first, "deconv");
 819
 820     auto output_prim = outputs.begin()->second.get_memory();
 821
 822     auto output_ptr = output_prim.pointer<uint16_t>();
 823
 824     std::vector<float> expected_output_vec = {
 825         -3.f, 4.5f, 13.f, -17.f,
 826         .5f, 22.f, 5.f, -7.f
 827     };
 828
 829     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 830     {
 831         EXPECT_FLOAT_EQ(expected_output_vec[i], float16_to_float32(output_ptr[i]));
 832     }
 833 }
 834
 835 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2) {
 836     //  Filter : 2x2x2x2
 837     //  Input  : 2x2x1x2
 838     //  Output : 2x2x1x2
 839     //  Stride : 2x2
 840     //  Pad    : 1x1
 841     //
 842     //  Input:
 843     //  8  0.5    1   3
 844     //  6  9      2   4
 845     //
 846     //  Filter1
 847     //  -2   2
 848     //   7  -0.5
 849     //
 850     //  Bias
 851     //  1
 852     //
 853     //  Filter2
 854     //  -4   1
 855     //  -9  -7
 856     //
 857     //  Bias
 858     //  -1
 859     //
 860     //  Output:
 861     //  -3    4.5    -8   -28
 862     //   13  -17     1    -17
 863
 864     const auto& engine = get_test_engine();
 865
 866     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 2, 2 } });
 867     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
 868     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
 869     auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
 870     auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
 871
 872     set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f });
 873     set_values(weights, { -2.f, 2.f, 7.f, -0.5f });
 874     set_values(biases, { 1.0f });
 875     set_values(weights2, { -4.f, 1.f, -9.f, -7.f });
 876     set_values(biases2, { -1.0f });
 877
 878     topology topology(
 879         input_layout("input", input.get_layout()),
 880         data("weights", weights),
 881         data("biases", biases),
 882         data("weights2", weights2),
 883         data("biases2", biases2),
 884         deconvolution("deconv", "input", { "weights", "weights2" }, { "biases", "biases2" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
 885     );
 886
 887     network network(engine, topology);
 888     network.set_input_data("input", input);
 889
 890     auto outputs = network.execute();
 891     EXPECT_EQ(outputs.size(), size_t(1));
 892     EXPECT_EQ(outputs.begin()->first, "deconv");
 893
 894     auto output_prim = outputs.begin()->second.get_memory();
 895
 896     auto output_ptr = output_prim.pointer<float>();
 897
 898     std::vector<float> expected_output_vec = {
 899         -3.f, 4.5f, 13.f, -17.f,
 900         -8.f, -28.f, 1.f, -17.f
 901     };
 902
 903     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 904     {
 905         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 906     }
 907 }
 908
 909 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group2) {
 910     //  data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2
 911
 912     engine engine;
 913
 914     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 2, 2 } });
 915     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
 916     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
 917
 918     set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f });
 919     set_values(weights, {
 920         -2.f, 2.f, 7.f, -0.5f,
 921         -4.f, 1.f, -9.f, -7.f
 922     });
 923     set_values(biases, { 1.0f, -1.0f });
 924
 925     topology topology(
 926         input_layout("input", input.get_layout()),
 927         data("weights", weights),
 928         data("biases", biases),
 929         deconvolution("deconv", "input", { "weights" }, { "biases" }, 2, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
 930     );
 931
 932     network network(engine, topology);
 933     network.set_input_data("input", input);
 934
 935     auto outputs = network.execute();
 936     EXPECT_EQ(outputs.size(), size_t(1));
 937     EXPECT_EQ(outputs.begin()->first, "deconv");
 938
 939     auto output_prim = outputs.begin()->second.get_memory();
 940
 941     auto output_ptr = output_prim.pointer<float>();
 942
 943     std::vector<float> expected_output_vec = {
 944         -3.f, 4.5f, 13.f, -17.f,
 945         -8.f, -28.f, 1.f, -17.f
 946     };
 947
 948     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
 949     {
 950         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
 951     }
 952 }
 953
 954 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2_depthwise_sep_opt) {
 955     //  Test for depthwise separable optimization, there are 16 weights and biases (split 16)
 956     //  data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2
 957
 958     const auto& engine = get_test_engine();
 959
 960     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 2, 2 } });
 961     set_values(input,
 962     { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
 963         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
 964         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
 965         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
 966         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
 967         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
 968         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
 969         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f
 970     });
 971
 972     topology topology(input_layout("input", input.get_layout()));
 973
 974     std::vector<primitive_id> weights_vec;
 975     std::vector<primitive_id> bias_vec;
 976
 977     for (uint32_t i = 0; i < 8; i++)
 978     {
 979         auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
 980         auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
 981         auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
 982         auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
 983
 984         set_values(weights, { -2.f, 2.f, 7.f, -0.5f });
 985         set_values(biases, { 1.0f });
 986         set_values(weights2, { -4.f, 1.f, -9.f, -7.f });
 987         set_values(biases2, { -1.0f });
 988
 989         primitive_id weights_id = "weights_" + std::to_string(i);
 990         primitive_id weights2_id = "weights2_" + std::to_string(i);
 991         primitive_id bias_id = "biases_" + std::to_string(i);
 992         primitive_id bias2_id = "biases2_" + std::to_string(i);
 993
 994         weights_vec.push_back(weights_id);
 995         weights_vec.push_back(weights2_id);
 996         bias_vec.push_back(bias_id);
 997         bias_vec.push_back(bias2_id);
 998
 999         topology.add(
1000             data(weights_id, weights),
1001             data(bias_id, biases),
1002             data(weights2_id, weights2),
1003             data(bias2_id, biases2)
1004             );
1005     }
1006
1007     topology.add(deconvolution("deconv", "input", weights_vec, bias_vec, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }));
1008
1009     network network(engine, topology);
1010     network.set_input_data("input", input);
1011
1012     auto outputs = network.execute();
1013     EXPECT_EQ(outputs.size(), size_t(1));
1014     EXPECT_EQ(outputs.begin()->first, "deconv");
1015
1016     auto output_prim = outputs.begin()->second.get_memory();
1017
1018     auto output_ptr = output_prim.pointer<float>();
1019
1020     std::vector<float> expected_output_vec = {
1021         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1022         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1023         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1024         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1025         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1026         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1027         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1028         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1029     };
1030
1031     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1032     {
1033         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1034     }
1035 }
1036
1037 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group16) {
1038     //  Test for depthwise separable optimization, there are 16 joined weights and biases (group 16)
1039     //  data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2_depthwise_sep_opt
1040
1041     engine engine;
1042
1043     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 2, 2 } });
1044     set_values(input,
1045     { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1046         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1047         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1048         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1049         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1050         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1051         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1052         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f
1053     });
1054
1055     topology topology(input_layout("input", input.get_layout()));
1056
1057     std::vector<primitive_id> weights_vec;
1058     std::vector<primitive_id> bias_vec;
1059
1060     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 16, 1, 2, 2 } });
1061     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 16, 1 } });
1062
1063     set_values(weights,
1064         {
1065             -2.f, 2.f, 7.f, -0.5f,
1066             -4.f, 1.f, -9.f, -7.f,
1067             -2.f, 2.f, 7.f, -0.5f,
1068             -4.f, 1.f, -9.f, -7.f,
1069             -2.f, 2.f, 7.f, -0.5f,
1070             -4.f, 1.f, -9.f, -7.f,
1071             -2.f, 2.f, 7.f, -0.5f,
1072             -4.f, 1.f, -9.f, -7.f,
1073             -2.f, 2.f, 7.f, -0.5f,
1074             -4.f, 1.f, -9.f, -7.f,
1075             -2.f, 2.f, 7.f, -0.5f,
1076             -4.f, 1.f, -9.f, -7.f,
1077             -2.f, 2.f, 7.f, -0.5f,
1078             -4.f, 1.f, -9.f, -7.f,
1079             -2.f, 2.f, 7.f, -0.5f,
1080             -4.f, 1.f, -9.f, -7.f
1081         }
1082     );
1083     set_values(biases, { 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f });
1084     topology.add(
1085         data("weights", weights),
1086         data("bias", biases)
1087     );
1088
1089     topology.add(deconvolution("deconv", "input", { "weights" }, { "bias" }, 16, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }));
1090
1091     network network(engine, topology);
1092     network.set_input_data("input", input);
1093
1094     auto outputs = network.execute();
1095     EXPECT_EQ(outputs.size(), size_t(1));
1096     EXPECT_EQ(outputs.begin()->first, "deconv");
1097
1098     auto output_prim = outputs.begin()->second.get_memory();
1099
1100     auto output_ptr = output_prim.pointer<float>();
1101
1102     std::vector<float> expected_output_vec = {
1103         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1104         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1105         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1106         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1107         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1108         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1109         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1110         -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1111     };
1112
1113     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1114     {
1115         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1116     }
1117 }
1118
1119 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2_depthwise_sep_opt_ofm2) {
1120     //  Test for depthwise separable optimization, there are 16 weights and biases (split 16)
1121     //  data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2
1122
1123     const auto& engine = get_test_engine();
1124
1125     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 2, 2 } });
1126     set_values(input,
1127     { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1128         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1129         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1130         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1131         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1132         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1133         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1134         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f
1135     });
1136
1137     topology topology(input_layout("input", input.get_layout()));
1138
1139     std::vector<primitive_id> weights_vec;
1140     std::vector<primitive_id> bias_vec;
1141
1142     for (uint32_t i = 0; i < 8; i++)
1143     {
1144         auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
1145         auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
1146         auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
1147         auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
1148
1149         set_values(weights, { -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f });
1150         set_values(biases, { 1.0f, 1.0f });
1151         set_values(weights2, { -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f });
1152         set_values(biases2, { -1.0f, -1.0f });
1153
1154         primitive_id weights_id = "weights_" + std::to_string(i);
1155         primitive_id weights2_id = "weights2_" + std::to_string(i);
1156         primitive_id bias_id = "biases_" + std::to_string(i);
1157         primitive_id bias2_id = "biases2_" + std::to_string(i);
1158
1159         weights_vec.push_back(weights_id);
1160         weights_vec.push_back(weights2_id);
1161         bias_vec.push_back(bias_id);
1162         bias_vec.push_back(bias2_id);
1163
1164         topology.add(
1165             data(weights_id, weights),
1166             data(bias_id, biases),
1167             data(weights2_id, weights2),
1168             data(bias2_id, biases2)
1169         );
1170     }
1171
1172     topology.add(deconvolution("deconv", "input", weights_vec, bias_vec, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }));
1173
1174     network network(engine, topology);
1175     network.set_input_data("input", input);
1176
1177     auto outputs = network.execute();
1178     EXPECT_EQ(outputs.size(), size_t(1));
1179     EXPECT_EQ(outputs.begin()->first, "deconv");
1180
1181     auto output_prim = outputs.begin()->second.get_memory();
1182
1183     auto output_ptr = output_prim.pointer<float>();
1184
1185     std::vector<float> expected_output_vec = {
1186         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1187         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1188         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1189         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1190         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1191         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1192         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1193         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1194     };
1195
1196     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1197     {
1198         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1199     }
1200 }
1201
1202 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group16_ofm2) {
1203     //  Test for depthwise separable optimization, there are 16 joined weights and biases (group 16)
1204     //  data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2_depthwise_sep_opt_ofm2
1205
1206     engine engine;
1207
1208     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 2, 2 } });
1209     set_values(input,
1210     { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1211         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1212         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1213         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1214         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1215         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1216         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
1217         8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f
1218     });
1219
1220     topology topology(input_layout("input", input.get_layout()));
1221
1222     std::vector<primitive_id> weights_vec;
1223     std::vector<primitive_id> bias_vec;
1224
1225     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 32, 1, 2, 2 } });
1226     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 32, 1 } });
1227
1228     set_values(weights,
1229         {
1230             -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1231             -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1232             -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1233             -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1234             -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1235             -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1236             -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1237             -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1238             -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1239             -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1240             -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1241             -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1242             -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1243             -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1244             -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
1245             -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
1246         }
1247     );
1248
1249     set_values(biases,
1250         {
1251             1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f,
1252             1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f
1253         }
1254     );
1255
1256     topology.add(
1257         data("weights", weights),
1258         data("bias", biases)
1259     );
1260
1261     topology.add(deconvolution("deconv", "input", { "weights" }, { "bias" }, 16, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }));
1262
1263     network network(engine, topology);
1264     network.set_input_data("input", input);
1265
1266     auto outputs = network.execute();
1267     EXPECT_EQ(outputs.size(), size_t(1));
1268     EXPECT_EQ(outputs.begin()->first, "deconv");
1269
1270     auto output_prim = outputs.begin()->second.get_memory();
1271
1272     auto output_ptr = output_prim.pointer<float>();
1273
1274     std::vector<float> expected_output_vec = {
1275         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1276         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1277         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1278         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1279         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1280         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1281         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1282         -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
1283     };
1284
1285     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1286     {
1287         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1288     }
1289 }
1290
1291
1292 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_split2_ofm3) {
1293     //  Filter : 1x1
1294     //  Stride : 1x1
1295     //  Input  : 1x1x4
1296     //  Output : 1x1x6
1297     //
1298     //  Input:
1299     //  f0:  1.5
1300     //  f1:  0.5
1301     //
1302     //  f2:  2
1303     //  f3: -1.0
1304     //
1305     //  Filter1:
1306     //  -2   1   ofm=0
1307     //   1   3   ofm=1
1308     //   0.5 8   ofm=2
1309     //  Bias1:
1310     //   1   5   3
1311     //
1312     //  Filter2:
1313     //   4  -4   ofm=3
1314     //   2   0.5 ofm=4
1315     //  -0.5 3   ofm=5
1316     //
1317     //  Bias2:
1318     //  -1   2.5 2
1319     //
1320     //  Output:
1321     //  -1.5
1322     //   8
1323     //   7.75
1324     //
1325     //   11
1326     //   6
1327     //  -2
1328
1329     const auto& engine = get_test_engine();
1330
1331     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 4, 1, 1 } });
1332     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 3, 2, 1, 1 } });
1333     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
1334     auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 3, 2, 1, 1 } });
1335     auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
1336
1337     set_values(input, {
1338         1.5f, 0.5f, 2.0f, -1.0f
1339     });
1340     set_values(weights, { -2.0f, 1.0f, 1.0f, 3.0f, 0.5f, 8.0f });
1341     set_values(biases, { 1.0f, 5.0f, 3.0f });
1342     set_values(weights2, { 4.0f, -4.0f, 2.0f, 0.5f, -0.5f, 3.0f });
1343     set_values(biases2, { -1.0f, 2.5f, 2.0f });
1344
1345     topology topology(
1346         input_layout("input", input.get_layout()),
1347         data("weights", weights),
1348         data("biases", biases),
1349         data("weights2", weights2),
1350         data("biases2", biases2),
1351         deconvolution("deconv", "input", { "weights", "weights2" }, { "biases", "biases2" }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 })
1352     );
1353
1354     network network(engine, topology);
1355     network.set_input_data("input", input);
1356
1357     auto outputs = network.execute();
1358     EXPECT_EQ(outputs.size(), size_t(1));
1359     EXPECT_EQ(outputs.begin()->first, "deconv");
1360
1361     auto output_prim = outputs.begin()->second.get_memory();
1362
1363     auto output_ptr = output_prim.pointer<float>();
1364
1365     std::vector<float> expected_output_vec = {
1366         -1.5f, 8.0f, 7.75f, 11.0f, 6.0f, -2.0f
1367     };
1368     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1369     {
1370         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1371     }
1372 }
1373
1374 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_group2_ofm3) {
1375     //  data is similar as in basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_split2_ofm3
1376
1377     engine engine;
1378
1379     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 4, 1, 1 } });
1380     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 6, 2, 1, 1 } });
1381     auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 6, 1 } });
1382
1383     set_values(input, {
1384         1.5f, 0.5f, 2.0f, -1.0f
1385     });
1386     set_values(weights, {
1387         -2.0f, 1.0f, 1.0f, 3.0f, 0.5f, 8.0f,
1388         4.0f, -4.0f, 2.0f, 0.5f, -0.5f, 3.0f
1389     });
1390     set_values(biases, {
1391         1.0f, 5.0f, 3.0f,
1392         -1.0f, 2.5f, 2.0f
1393     });
1394
1395     topology topology(
1396         input_layout("input", input.get_layout()),
1397         data("weights", weights),
1398         data("biases", biases),
1399         deconvolution("deconv", "input", { "weights" }, { "biases" }, 2, { 1, 1, 1, 1 }, { 0, 0, 0, 0 })
1400     );
1401
1402     network network(engine, topology);
1403     network.set_input_data("input", input);
1404
1405     auto outputs = network.execute();
1406     EXPECT_EQ(outputs.size(), size_t(1));
1407     EXPECT_EQ(outputs.begin()->first, "deconv");
1408
1409     auto output_prim = outputs.begin()->second.get_memory();
1410
1411     auto output_ptr = output_prim.pointer<float>();
1412
1413     std::vector<float> expected_output_vec = {
1414         -1.5f, 8.0f, 7.75f, 11.0f, 6.0f, -2.0f
1415     };
1416     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
1417     {
1418         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
1419     }
1420 }