inference-engine/thirdparty/clDNN/tests/test_cases/depth_concatenate_gpu_test.cpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 ///////////////////////////////////////////////////////////////////////////////////////////////////
  18
  19 #include <gtest/gtest.h>
  20 #include "api/CPP/memory.hpp"
  21 #include <api/CPP/input_layout.hpp>
  22 #include "api/CPP/concatenation.hpp"
  23 #include "api/CPP/convolution.hpp"
  24 #include "api/CPP/data.hpp"
  25 #include "api/CPP/pooling.hpp"
  26 #include "api/CPP/upsampling.hpp"
  27 #include <api/CPP/topology.hpp>
  28 #include <api/CPP/network.hpp>
  29 #include <api/CPP/engine.hpp>
  30 #include "test_utils/test_utils.h"
  31
  32 using namespace cldnn;
  33 using namespace tests;
  34
  35 template<typename T>
  36 std::vector<T> generate_random_input(size_t b, size_t f, size_t y, size_t x, int min, int max) {
  37     static std::default_random_engine generator(random_seed);
  38     int k = 8; // 1/k is the resolution of the floating point numbers
  39     std::uniform_int_distribution<int> distribution(k * min, k * max);
  40     std::vector<T> v(b*f*x*y);
  41     for (size_t i = 0; i < b*f*x*y; ++i) {
  42         v[i] = (T)distribution(generator);
  43         v[i] /= k;
  44     }
  45     return v;
  46 }
  47
  48 TEST(depth_concatenate_f32_gpu, test01) {
  49     //  Input count : 2
  50     //  Input1 : 2x 1x1 x 2
  51     //  Input2 : 2x 1x1 x 3
  52     //
  53     //  Input1:
  54     //  0.5  0.7  :f0
  55     //  0.2  0.4  :f1
  56     //
  57     //  Input2:
  58     //  1    0.1  :f0
  59     //  0.3 -0.5  :f1
  60     //  0   -0.2  :f2
  61     //
  62     //  Output:
  63     //  0.5  0.7  :f0
  64     //  0.2  0.4  :f1
  65     //  1    0.1  :f2
  66     //  0.3 -0.5  :f3
  67     //  0   -0.2  :f4
  68     //
  69
  70     const auto& engine = get_test_engine();
  71     auto input1 = memory::allocate(engine, {data_types::f32, format::yxfb, { 2,2,1,1 }});
  72     auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2,3,1,1 }});
  73
  74     set_values(input1, { 0.5f, 0.7f, 0.2f, 0.4f });
  75     set_values(input2, { 1.0f, 0.1f, 0.3f, -0.5f, 0.0f, -0.2f });
  76
  77     topology topology;
  78     topology.add(input_layout("input1", input1.get_layout()));
  79     topology.add(input_layout("input2", input2.get_layout()));
  80     topology.add(concatenation("depth1", { "input1", "input2" }, concatenation::along_f));
  81
  82     network network(engine, topology);
  83
  84     network.set_input_data("input1", input1);
  85     network.set_input_data("input2", input2);
  86
  87     auto outputs = network.execute({});
  88     EXPECT_EQ(outputs.size(), size_t(1));
  89     EXPECT_EQ(outputs.begin()->first, "depth1");
  90
  91     auto output = outputs.at("depth1").get_memory();
  92
  93     auto output_ptr = output.pointer<float>();
  94     EXPECT_FLOAT_EQ(0.5f, output_ptr[0]);
  95     EXPECT_FLOAT_EQ(0.7f, output_ptr[1]);
  96     EXPECT_FLOAT_EQ(0.2f, output_ptr[2]);
  97     EXPECT_FLOAT_EQ(0.4f, output_ptr[3]);
  98     EXPECT_FLOAT_EQ(1.0f, output_ptr[4]);
  99     EXPECT_FLOAT_EQ(0.1f, output_ptr[5]);
 100     EXPECT_FLOAT_EQ(0.3f, output_ptr[6]);
 101     EXPECT_FLOAT_EQ(-0.5f, output_ptr[7]);
 102     EXPECT_FLOAT_EQ(0.0f, output_ptr[8]);
 103     EXPECT_FLOAT_EQ(-0.2f, output_ptr[9]);
 104 }
 105
 106 template<data_types DType>
 107 void concat_basic_with_reorder()
 108 {
 109     //  Input count : 2
 110     //  Input1 : 2x 1x1 x 2
 111     //  Input2 : 2x 1x1 x 3
 112     //
 113     //  Input1:
 114     //  2.5  3.7  :f0
 115     //  0.2  1.4  :f1
 116     //
 117     //  Input2:
 118     //  1    4.1  :f0
 119     // -4.3 -7.5  :f1
 120     //  0   -0.2  :f2
 121     //
 122     //  Output:
 123     //  2    3  :f0
 124     //  0    1  :f1
 125     //  1    4  :f2
 126     // -4   -7  :f3
 127     //  0    0  :f4
 128     //
 129
 130     const auto& engine = get_test_engine();
 131     auto input1 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2,2,1,1 } });
 132     auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2,3,1,1 } });
 133     auto outs = { 2.0f, 3.0f, 0.0f, 1.0f, 1.0f, 4.0f, -4.0f, -7.0f, 0.0f, 0.0f };
 134     set_values(input1, { 2.5f, 3.7f, 0.2f, 1.4f });
 135     set_values(input2, { 1.0f, 4.1f, -4.3f, -7.5f, 0.0f, -0.2f });
 136
 137     topology topology;
 138     topology.add(input_layout("input1", input1.get_layout()));
 139     topology.add(input_layout("input2", input2.get_layout()));
 140     topology.add(reorder("to_int1", "input1", { DType, format::yxfb,{ 2,2,1,1 } }));
 141     topology.add(reorder("to_int2", "input2", { DType, format::yxfb,{ 2,3,1,1 } }));
 142     topology.add(concatenation("depth1", { "to_int1", "to_int2" }, concatenation::along_f));
 143     topology.add(reorder("to_float", "depth1", { data_types::f32, format::yxfb,{ 2,5,1,1 } }));
 144
 145     network network(engine, topology);
 146
 147     network.set_input_data("input1", input1);
 148     network.set_input_data("input2", input2);
 149
 150     auto outputs = network.execute({});
 151     ASSERT_EQ(outputs.size(), size_t(1));
 152     EXPECT_EQ(outputs.begin()->first, "to_float");
 153
 154     auto output = outputs.at("to_float").get_memory();
 155
 156     auto output_ptr = output.pointer<float>();
 157     int ptr_cntr = 0;
 158     for (const auto& ref : outs)
 159     {
 160         EXPECT_FLOAT_EQ(ref, output_ptr[ptr_cntr++]);
 161     }
 162 }
 163
 164 TEST(depth_concatenate_int8_gpu, concat_basic) {
 165     concat_basic_with_reorder<data_types::i8>();
 166 }
 167
 168 TEST(depth_concatenate_int32_gpu, concat_basic) {
 169     concat_basic_with_reorder<data_types::i32>();
 170 }
 171
 172 TEST(depth_concatenate_int64_gpu, concat_basic) {
 173     concat_basic_with_reorder<data_types::i64>();
 174 }
 175
 176 TEST(depth_concatenate_f32_gpu, test02) {
 177     //  Input count : 3 (yxfb, yxfb, bfyx)
 178     //  Input1 : 2x 1x1 x 2
 179     //  Input2 : 2x 1x1 x 3
 180     //  Input3 : 2x 1x1 x 3
 181     //
 182     //  Input1 (yxfb):
 183     //  0.5  0.7  :f0
 184     //  0.2  0.4  :f1
 185     //
 186     //  Input2 (yxfb):
 187     //  1    0.1  :f0
 188     //  0.3 -0.5  :f1
 189     //  0   -0.2  :f2
 190     //
 191     //  Input3 (bfyx):
 192     //  1    0.1  :f0
 193     //  0.3 -0.5  :f1
 194     //  0   -0.2  :f2
 195     //
 196     //  Output:
 197     //  0.5  0.7  :f0
 198     //  0.2  0.4  :f1
 199     //  1    0.1  :f2
 200     //  0.3 -0.5  :f3
 201     //  0   -0.2  :f4
 202     //  1    0.1  :f5
 203     //  0.3 -0.5  :f6
 204     //  0   -0.2  :f7
 205     //
 206
 207     const auto& engine = get_test_engine();
 208     auto input1 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2,2,1,1 } });
 209     auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2,3,1,1 } });
 210     auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2,3,1,1 } });
 211
 212     set_values(input1, { 0.5f, 0.7f, 0.2f, 0.4f });
 213     set_values(input2, { 1.0f, 0.1f, 0.3f, -0.5f, 0.0f, -0.2f });
 214     set_values(input3, { 1.0f, 0.3f, 0.0f, 0.1f, -0.5f, -0.2f });
 215
 216     topology topology;
 217     topology.add(input_layout("input1", input1.get_layout()));
 218     topology.add(input_layout("input2", input2.get_layout()));
 219     topology.add(input_layout("input3", input3.get_layout()));
 220     topology.add(concatenation("depth1", { "input1", "input2", "input3" }, concatenation::along_f));
 221
 222     network network(engine, topology);
 223
 224     network.set_input_data("input1", input1);
 225     network.set_input_data("input2", input2);
 226     network.set_input_data("input3", input3);
 227
 228     auto outputs = network.execute({});
 229     EXPECT_EQ(outputs.size(), size_t(1));
 230     EXPECT_EQ(outputs.begin()->first, "depth1");
 231
 232     auto output = outputs.at("depth1").get_memory();
 233
 234     auto output_ptr = output.pointer<float>();
 235     EXPECT_FLOAT_EQ(0.5f, output_ptr[0]);
 236     EXPECT_FLOAT_EQ(0.7f, output_ptr[1]);
 237     EXPECT_FLOAT_EQ(0.2f, output_ptr[2]);
 238     EXPECT_FLOAT_EQ(0.4f, output_ptr[3]);
 239     EXPECT_FLOAT_EQ(1.0f, output_ptr[4]);
 240     EXPECT_FLOAT_EQ(0.1f, output_ptr[5]);
 241     EXPECT_FLOAT_EQ(0.3f, output_ptr[6]);
 242     EXPECT_FLOAT_EQ(-0.5f, output_ptr[7]);
 243     EXPECT_FLOAT_EQ(0.0f, output_ptr[8]);
 244     EXPECT_FLOAT_EQ(-0.2f, output_ptr[9]);
 245     EXPECT_FLOAT_EQ(1.0f, output_ptr[10]);
 246     EXPECT_FLOAT_EQ(0.1f, output_ptr[11]);
 247     EXPECT_FLOAT_EQ(0.3f, output_ptr[12]);
 248     EXPECT_FLOAT_EQ(-0.5f, output_ptr[13]);
 249     EXPECT_FLOAT_EQ(0.0f, output_ptr[14]);
 250     EXPECT_FLOAT_EQ(-0.2f, output_ptr[15]);
 251 }
 252
 253 TEST(concatenate_f32_gpu, test_concatenation_of_pool_and_unpool)
 254 {
 255     engine engine;
 256     auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
 257     auto weights = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 2, 1 } });
 258
 259     set_values(input1, { 16.0f, 32.0f, 128.0f, 256.0f });
 260     set_values(weights, { .1f, .2f });
 261     topology topology;
 262     topology.add(input_layout("input1", input1.get_layout()));
 263     topology.add(pooling("pool1", "input1",
 264         cldnn::pooling_mode::max,
 265         { 1,1,2,1 },          /*kernel*/
 266         { 1,1,1,1 }           /*stride*/
 267     ));
 268     topology.add(upsampling("unpool1", "input1", 1, 0, upsampling_sample_type::nearest));
 269     topology.add(concatenation("concat1", { "pool1", "unpool1" }, cldnn::concatenation::along_x));
 270     topology.add(data("weights", weights)),
 271     topology.add(convolution("conv", "concat1", { "weights" }));
 272
 273     cldnn::build_options options;
 274     options.set_option(cldnn::build_option::optimize_data(true));
 275     network network(engine, topology, options);
 276     network.set_input_data("input1", input1);
 277
 278     auto outputs = network.execute({});
 279     auto output = outputs.at("conv").get_memory();
 280     std::vector<float> out_ref = { 6.4f, 8.f, 51.2f, 64.f };
 281     auto output_ptr = output.pointer<float>();
 282     for (int i=0; i<4; i++)
 283     {
 284         EXPECT_NEAR(output_ptr[i], out_ref[i], 1e-3);
 285     }
 286 }
 287
 288 TEST(depth_concatenate_f32_gpu, test03_cascade_concat_opt) {
 289     //  Test for cascade concatenation optimization.
 290     //  Despite having concatenations one after another and connected to different non padded activation primitives,
 291     //  graph should remove all concatenations from execution.
 292
 293     const auto& engine = get_test_engine();
 294     auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1,2,2,1 } });
 295
 296     set_values(input1, { 16.0f, 32.0f, 128.0f, 256.0f });
 297
 298     topology topology;
 299     topology.add(input_layout("input1", input1.get_layout()));
 300     topology.add(activation("relu1", "input1", activation_relu));
 301     topology.add(activation("relu2", "relu1", activation_sqrt));
 302     topology.add(concatenation("depth1", { "relu2", "relu1" }, concatenation::along_f));
 303     topology.add(activation("relu3", "depth1", activation_sqrt));
 304     topology.add(concatenation("depth2", { "relu3", "depth1" }, concatenation::along_f));
 305     topology.add(activation("relu4", "depth2", activation_sqrt));
 306     topology.add(concatenation("depth3", { "relu4", "depth2" }, concatenation::along_f));
 307     topology.add(activation("relu5", "depth3", activation_relu));
 308
 309     cldnn::build_options options;
 310     options.set_option(cldnn::build_option::optimize_data(true));
 311     network network(engine, topology, options);
 312
 313     network.set_input_data("input1", input1);
 314
 315     auto outputs = network.execute({});
 316     auto output_prim = outputs.begin()->second.get_memory();
 317
 318     auto output_ptr = output_prim.pointer<float>();
 319     auto executed_primitives = network.get_executed_primitives();
 320
 321     EXPECT_TRUE(executed_primitives.count("depth1") == 0);
 322     EXPECT_TRUE(executed_primitives.count("depth2") == 0);
 323     EXPECT_TRUE(executed_primitives.count("depth3") == 0);
 324
 325     EXPECT_NEAR(1.4142f, output_ptr[0], 1e-3);
 326     EXPECT_NEAR(1.5422f, output_ptr[1], 1e-3);
 327     EXPECT_NEAR(1.8340f, output_ptr[2], 1e-3);
 328     EXPECT_NEAR(2.0f, output_ptr[3], 1e-3);
 329     EXPECT_NEAR(2.0f, output_ptr[4], 1e-3);
 330     EXPECT_NEAR(2.3784f, output_ptr[5], 1e-3);
 331     EXPECT_NEAR(3.3635f, output_ptr[6], 1e-3);
 332     EXPECT_NEAR(4.0f, output_ptr[7], 1e-3);
 333     EXPECT_NEAR(2.0f, output_ptr[8], 1e-3);
 334     EXPECT_NEAR(2.3784f, output_ptr[9], 1e-3);
 335     EXPECT_NEAR(3.3635f, output_ptr[10], 1e-3);
 336     EXPECT_NEAR(4.0f, output_ptr[11], 1e-3);
 337     EXPECT_NEAR(4.0f, output_ptr[12], 1e-3);
 338     EXPECT_NEAR(5.6568f, output_ptr[13], 1e-3);
 339     EXPECT_NEAR(11.3137f, output_ptr[14], 1e-3);
 340     EXPECT_NEAR(16.0f, output_ptr[15], 1e-3);
 341
 342 }
 343
 344 TEST(depth_concatenate_f32_gpu, test04_fused_relu) {
 345     // 2 inputs of size 3x10x10 concatenated on f axis with fused relu
 346
 347     const auto& engine = get_test_engine();
 348     auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1,3,10,10 } });
 349     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1,3,10,10 } });
 350
 351     std::vector<float> input1_vec = generate_random_input<float>(1, 3, 10, 10, -10, 10);
 352     set_values(input1, input1_vec);
 353     std::vector<float> input2_vec = generate_random_input<float>(1, 3, 10, 10, -10, 10);
 354     set_values(input2, input2_vec);
 355
 356     topology topology;
 357     topology.add(input_layout("input1", input1.get_layout()));
 358     topology.add(input_layout("input2", input2.get_layout()));
 359     topology.add(concatenation("depth1", { "input1", "input2" }, concatenation::along_f));
 360     topology.add(activation("relu1", "depth1", activation_relu));
 361
 362     cldnn::build_options options;
 363     options.set_option(cldnn::build_option::optimize_data(true));
 364     network network(engine, topology, options);
 365
 366     network.set_input_data("input1", input1);
 367     network.set_input_data("input2", input2);
 368
 369     auto outputs = network.execute({});
 370     EXPECT_EQ(outputs.size(), size_t(1));
 371     EXPECT_EQ(outputs.begin()->first, "relu1");
 372
 373     auto output = outputs.at("relu1").get_memory();
 374
 375     auto output_ptr = output.pointer<float>();
 376     unsigned int elements_count = 600;
 377     unsigned int input_element_count = 300;
 378     for (unsigned int i = 0; i < 600; i++)
 379     {
 380         if(i < input_element_count)
 381             EXPECT_FLOAT_EQ(input1_vec[i] < 0.0f ? 0.0f : input1_vec[i], output_ptr[i]);
 382         else
 383             EXPECT_FLOAT_EQ(input2_vec[i - input_element_count] < 0.0f ? 0.0f : input2_vec[i - input_element_count], output_ptr[i]);
 384     }
 385 }
 386
 387 TEST(depth_concatenate_f32_gpu, test05_different_formats) {
 388     // 2 inputs of size 3x10x10 concatenated on f axis
 389
 390     const auto& engine = get_test_engine();
 391     auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1,3,2,2 } });
 392     auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1,3,2,2 } });
 393
 394     set_values(input1, { 1.0f, 1.0f, 1.0f, 1.0f,
 395                         2.0f, 2.0f, 2.0f, 2.0f,
 396                         3.0f, 3.0f, 3.0f, 3.0f });
 397     set_values(input2, { -1.0f, -2.0f, -3.0f,
 398                          -1.0f, -2.0f, -3.0f,
 399                          -1.0f, -2.0f, -3.0f,
 400                         - 1.0f, -2.0f, -3.0f });
 401
 402     std::vector<float> out_ref = {
 403         1.0f, 1.0f, 1.0f, 1.0f,
 404         2.0f, 2.0f, 2.0f, 2.0f,
 405         3.0f, 3.0f, 3.0f, 3.0f,
 406         -1.0f, -1.0f, -1.0f, -1.0f,
 407         -2.0f, -2.0f, -2.0f, -2.0f,
 408         -3.0f, -3.0f, -3.0f, -3.0f
 409     };
 410
 411     topology topology;
 412     topology.add(input_layout("input1", input1.get_layout()));
 413     topology.add(input_layout("input2", input2.get_layout()));
 414     topology.add(concatenation("depth1", { "input1", "input2" }, concatenation::along_f));
 415
 416     cldnn::build_options options;
 417     options.set_option(cldnn::build_option::optimize_data(true));
 418     network network(engine, topology, options);
 419
 420     network.set_input_data("input1", input1);
 421     network.set_input_data("input2", input2);
 422
 423     auto outputs = network.execute({});
 424     EXPECT_EQ(outputs.size(), size_t(1));
 425     EXPECT_EQ(outputs.begin()->first, "depth1");
 426
 427     auto output = outputs.at("depth1").get_memory();
 428     auto output_ptr = output.pointer<float>();
 429     int cntr = 0;
 430     for (float val : output_ptr)
 431     {
 432         EXPECT_EQ(val, out_ref[cntr++]);
 433     }
 434
 435
 436 }
 437
 438 TEST(depth_concatenate_i32_gpu, optimize_data01) {
 439
 440     const auto& engine = get_test_engine();
 441     build_options build_opt;
 442     auto input = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,1,1 } });
 443
 444     topology topology;
 445     topology.add(
 446         input_layout("input", input.get_layout())
 447     );
 448     topology.add(cldnn::concatenation("int1", { "input" }, cldnn::concatenation::along_f));
 449     topology.add(cldnn::concatenation("result1", { "int1" }, cldnn::concatenation::along_f));
 450     topology.add(cldnn::concatenation("result2", { "int1" }, cldnn::concatenation::along_f));
 451
 452
 453     std::vector<int> input_data = { 4 };
 454     std::vector<int> out_data = { 4 };
 455     set_values(input, input_data);
 456
 457     build_opt.set_option(build_option::optimize_data(true));
 458     network network(engine, topology, build_opt);
 459     network.set_input_data("input", input);
 460     auto outputs = network.execute();
 461
 462     for (auto& it : outputs)
 463     {
 464         auto output_ptr = it.second.get_memory().pointer<int>();
 465         EXPECT_EQ(output_ptr[0], out_data[0]);
 466     }
 467 }
 468
 469 TEST(depth_concatenate_i32_gpu, optimize_data02) {
 470
 471     const auto& engine = get_test_engine();
 472     build_options build_opt;
 473     auto input1 = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,2,2 } });
 474     auto input2 = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,2,2 } });
 475     auto input3 = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,2,2 } });
 476     auto input4 = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,2,2 } });
 477
 478     topology topology;
 479     topology.add(
 480         input_layout("input1", input1.get_layout())
 481     );
 482     topology.add(
 483         input_layout("input2", input2.get_layout())
 484     );
 485     topology.add(
 486         input_layout("input3", input3.get_layout())
 487     );
 488     topology.add(
 489         input_layout("input4", input4.get_layout())
 490     );
 491
 492     topology.add(cldnn::concatenation("concat1", { "input1", "input2" }, cldnn::concatenation::along_x));
 493     topology.add(cldnn::concatenation("concat2", { "input3", "input4" }, cldnn::concatenation::along_x));
 494     topology.add(cldnn::concatenation("concat3", { "input2", "input4" }, cldnn::concatenation::along_x));
 495
 496     topology.add(cldnn::concatenation("concat4", { "concat1", "concat2" }, cldnn::concatenation::along_x));
 497     topology.add(cldnn::concatenation("concat5", { "concat2", "concat3" }, cldnn::concatenation::along_x));
 498
 499     topology.add(cldnn::concatenation("concat6", { "concat4", "concat5" }, cldnn::concatenation::along_x));
 500
 501     std::vector<int> input_data1 =
 502     { 1, 2,
 503       3, 4 };
 504
 505     std::vector<int> input_data2 =
 506     { 5, 6,
 507       7, 8 };
 508
 509     std::vector<int> input_data3 =
 510     { 9, 10,
 511      11, 12 };
 512
 513     std::vector<int> input_data4 =
 514     { 12, 14,
 515       15, 16 };
 516
 517     std::vector<int> c6_data =
 518     { 1, 2, 5, 6,  9, 10, 12, 14,  9, 10, 12, 14, 5, 6, 12, 14,
 519       3, 4, 7, 8, 11, 12, 15, 16, 11, 12, 15, 16, 7, 8, 15, 16 };
 520
 521     set_values(input1, input_data1);
 522     set_values(input2, input_data2);
 523     set_values(input3, input_data3);
 524     set_values(input4, input_data4);
 525
 526     build_opt.set_option(build_option::optimize_data(true));
 527     network network(engine, topology, build_opt);
 528     network.set_input_data("input1", input1);
 529     network.set_input_data("input2", input2);
 530     network.set_input_data("input3", input3);
 531     network.set_input_data("input4", input4);
 532     auto outputs = network.execute();
 533
 534     auto output_concat6 = outputs.at("concat6").get_memory().pointer<int>();
 535
 536     for (size_t i = 0; i < output_concat6.size(); i++) {
 537         EXPECT_EQ(output_concat6[i], c6_data[i]);
 538     }
 539 }
 540
 541 TEST(depth_concatenate_i32_gpu, optimize_data03) {
 542
 543     const auto& engine = get_test_engine();
 544     build_options build_opt;
 545     auto input1 = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,2,2 } });
 546
 547     topology topology;
 548     topology.add(
 549         input_layout("input1", input1.get_layout())
 550     );
 551
 552     topology.add(cldnn::concatenation("concat1", { "input1" }, cldnn::concatenation::along_x));
 553
 554     topology.add(cldnn::concatenation("concat2", { "concat1" }, cldnn::concatenation::along_x));
 555     topology.add(cldnn::concatenation("concat3", { "concat1" }, cldnn::concatenation::along_x));
 556
 557     topology.add(cldnn::concatenation("concat4", { "concat3" }, cldnn::concatenation::along_x));
 558
 559     std::vector<int> input_data1 =
 560     { 1, 2,
 561       3, 4 };
 562
 563     std::vector<int> output_data =
 564     { 1, 2,
 565       3, 4 };
 566
 567     set_values(input1, input_data1);
 568
 569     build_opt.set_option(build_option::optimize_data(true));
 570     network network(engine, topology, build_opt);
 571     network.set_input_data("input1", input1);
 572
 573     auto outputs = network.execute();
 574
 575     for (auto& it : outputs)
 576     {
 577         auto output_ptr = it.second.get_memory().pointer<int>();
 578         for (size_t i = 0; i < output_ptr.size(); i++) {
 579             EXPECT_EQ(output_ptr[i], output_data[i]);
 580         }
 581     }
 582 }
 583
 584 TEST(depth_concatenate_i32_gpu, optimize_data04) {
 585
 586     const auto& engine = get_test_engine();
 587     build_options build_opt;
 588     auto input1 = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,2,2 } });
 589
 590     topology topology;
 591     topology.add(
 592         input_layout("input1", input1.get_layout())
 593     );
 594
 595     topology.add(cldnn::concatenation("concat1", { "input1" }, cldnn::concatenation::along_x));
 596
 597     topology.add(cldnn::concatenation("concat2", { "concat1" }, cldnn::concatenation::along_x));
 598     topology.add(cldnn::concatenation("concat3", { "concat1" }, cldnn::concatenation::along_x));
 599
 600     topology.add(cldnn::concatenation("concat4", { "concat2", "concat3" }, cldnn::concatenation::along_x));
 601
 602     std::vector<int> input_data1 =
 603     { 1, 2,
 604       3, 4 };
 605
 606     std::vector<int> output_data =
 607     { 1, 2, 1, 2,
 608       3, 4, 3, 4 };
 609
 610     set_values(input1, input_data1);
 611
 612     build_opt.set_option(build_option::optimize_data(true));
 613     network network(engine, topology, build_opt);
 614     network.set_input_data("input1", input1);
 615
 616     auto outputs = network.execute();
 617
 618     for (auto& it : outputs)
 619     {
 620         auto output_ptr = it.second.get_memory().pointer<int>();
 621         for (size_t i = 0; i < output_ptr.size(); i++) {
 622             EXPECT_EQ(output_ptr[i], output_data[i]);
 623         }
 624     }
 625 }
 626
 627 TEST(depth_concatenate_i32_gpu, optimize_data05) {
 628
 629     const auto& engine = get_test_engine();
 630     build_options build_opt;
 631     auto input1 = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,2,2 } });
 632
 633     topology topology;
 634     topology.add(
 635         input_layout("input1", input1.get_layout())
 636     );
 637
 638     topology.add(cldnn::concatenation("concat1", { "input1" }, cldnn::concatenation::along_x));
 639
 640     topology.add(cldnn::concatenation("concat2", { "concat1" }, cldnn::concatenation::along_x));
 641     topology.add(cldnn::concatenation("concat3", { "concat1" }, cldnn::concatenation::along_x));
 642
 643     topology.add(cldnn::concatenation("concat4", { "concat2", "concat3" }, cldnn::concatenation::along_x));
 644     topology.add(cldnn::concatenation("concat5", { "concat1", "concat4" }, cldnn::concatenation::along_x));
 645
 646     std::vector<int> input_data1 =
 647     { 1, 2,
 648       3, 4 };
 649
 650     std::vector<int> c5_data =
 651     { 1, 2, 1, 2, 1, 2,
 652       3, 4, 3, 4, 3, 4 };
 653
 654     set_values(input1, input_data1);
 655
 656     build_opt.set_option(build_option::optimize_data(true));
 657     network network(engine, topology, build_opt);
 658     network.set_input_data("input1", input1);
 659
 660     auto outputs = network.execute();
 661
 662     auto output_concat5 = outputs.at("concat5").get_memory().pointer<int>();
 663
 664     for (size_t i = 0; i < output_concat5.size(); i++) {
 665         EXPECT_EQ(output_concat5[i], c5_data[i]);
 666     }
 667 }
 668
 669 //////////////////////////////////////////////////////////////////////////////
 670 //                                                                          //
 671 //                      Exhaustive Negative Matrix tests                    //
 672 //                                                                          //
 673 //////////////////////////////////////////////////////////////////////////////
 674
 675 //TODO: this should be done using TEST_P or some equivallent construct
 676 static network setup_depth_concatatenate_network(const std::vector<data_types> dts, const std::vector<tensor> ts, const std::vector<cldnn::format> fmt)
 677 {
 678     assert(dts.size() == ts.size());
 679     const size_t sz = ts.size();
 680
 681     const auto& engine = get_test_engine();
 682     topology topology;
 683
 684     std::vector<std::string> input_names;
 685     input_names.resize(sz);
 686
 687     for (size_t i = 0; i < sz; ++i)
 688     {
 689         auto input = memory::allocate(engine, { dts[i], fmt[i], ts[i] });
 690
 691         input_names[i] = "input";
 692         input_names[i] += std::to_string(i);
 693
 694         topology.add(input_layout(input_names[i], input.get_layout()));
 695     }
 696     //TODO: ask Uzi if something tests cases where there's missing input_names (nodes not present in the topology, etc.)
 697     topology.add(concatenation("depth_concat_node", input_names, concatenation::along_f));
 698
 699     return network(engine, topology);
 700 }
 701
 702 TEST(NegativeDepthConcatenateTest, DISABLED_TestAll) {
 703     auto d = data_types::f32;
 704     auto od = data_types::f16;
 705
 706     auto f = format::bfyx;
 707
 708     std::vector<int> t { 1, 2, 3, 4 };
 709     std::vector<int> t0 { 7, 2, 3, 4 };
 710     std::vector<int> t1 { 1, 2, 7, 4 };
 711     std::vector<int> t2 { 1, 2, 3, 7 };
 712
 713     //TODO: should be ASSERT_THROW(statement, exception_type) - but what exception type?
 714     ASSERT_ANY_THROW(setup_depth_concatatenate_network({ }, { }, { }));
 715
 716     ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, od }, { tensor(t), tensor(t) }, { f, f }));
 717     ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, d }, { tensor(t), tensor(t0) }, { f, f }));
 718     ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, d }, { tensor(t), tensor(t1) }, { f, f }));
 719     ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, d }, { tensor(t), tensor(t2) }, { f, f }));
 720
 721     ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, od, d }, { tensor(t), tensor(t), tensor(t) }, { f, f, f }));
 722     ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, d, od }, { tensor(t), tensor(t), tensor(t) }, { f, f, f }));
 723     ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, d, d }, { tensor(t), tensor(t0), tensor(t) }, { f, f, f }));
 724     ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, d, d }, { tensor(t), tensor(t1), tensor(t) }, { f, f, f }));
 725     ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, d, d }, { tensor(t), tensor(t2), tensor(t) }, { f, f, f }));
 726     ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, d, d }, { tensor(t), tensor(t), tensor(t0) }, { f, f, f }));
 727     ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, d, d }, { tensor(t), tensor(t), tensor(t1) }, { f, f, f }));
 728     ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, d, d }, { tensor(t), tensor(t), tensor(t2) }, { f, f, f }));
 729 }
 730
 731 //////////////////////////////////////////////////////////////////////////////
 732 //                                                                          //
 733 //                      Exhaustive Positive Matrix tests                    //
 734 //                                                                          //
 735 //////////////////////////////////////////////////////////////////////////////
 736
 737 using namespace cldnn;
 738
 739 class depth_concatenate_test : public tests::generic_test
 740 {
 741
 742 public:
 743
 744     static void TearDownTestCase()
 745     {
 746         for (auto generic_params : all_generic_params)
 747         {
 748             delete generic_params;
 749         }
 750
 751         for (auto layer_params : all_layer_params)
 752         {
 753             delete layer_params;
 754         }
 755     }
 756
 757     static std::vector<cldnn::primitive*> generate_specific_test_params(int i)
 758     {
 759         std::vector<cldnn::primitive*> all_layer_params;
 760
 761         switch(i)
 762         {
 763             case 1 : all_layer_params.push_back(new concatenation("depth_concatenate", {"input0"}, concatenation::along_f)); break;
 764             case 2 : all_layer_params.push_back(new concatenation("depth_concatenate", {"input0", "input1"}, concatenation::along_f)); break;
 765             case 3 : all_layer_params.push_back(new concatenation("depth_concatenate", {"input0", "input1", "input2"}, concatenation::along_f)); break;
 766             default: assert(0);
 767         }
 768
 769         return all_layer_params;
 770     }
 771
 772     static std::vector<tests::test_params*> generate_generic_test_params(int input_count)
 773     {
 774         std::vector<tests::test_params*> all_generic_params;
 775
 776         auto data_types = test_data_types();
 777
 778         for (cldnn::data_types dt : data_types)
 779         for (int32_t b : test_batch_sizes)
 780         for (tensor & t : test_input_sizes)
 781         {
 782             const int w = t.spatial[0];
 783             const int h = t.spatial[1];
 784
 785             switch(input_count)
 786             {
 787                 case 1:
 788                     for(auto f0 : test_feature_sizes)
 789                     {
 790                         test_params * tp = new test_params();
 791                         tp->data_type = dt;
 792
 793                         tp->input_layouts.push_back( cldnn::layout(tp->data_type, tp->fmt, cldnn::tensor(  b, f0, w, h )) );
 794
 795                         all_generic_params.emplace_back(tp);
 796                     }
 797                     break;
 798                 case 2:
 799                     for(auto f0 : test_feature_sizes)
 800                     for(auto f1 : test_feature_sizes)
 801                     {
 802                         test_params * tp = new test_params();
 803                         tp->data_type = dt;
 804
 805                         tp->input_layouts.push_back(cldnn::layout(tp->data_type, tp->fmt, cldnn::tensor(  b, f0, w, h )) );
 806                       tp->input_layouts.push_back(cldnn::layout(tp->data_type, tp->fmt, cldnn::tensor(  b, f1, w, h )) );
 807
 808                         all_generic_params.emplace_back(tp);
 809                     }
 810                     break;
 811                 case 3:
 812                     for(auto f0 : test_feature_sizes)
 813                     for(auto f1 : test_feature_sizes)
 814                     for(auto f2 : test_feature_sizes)
 815                     {
 816                         test_params * tp = new test_params();
 817                         tp->data_type = dt;
 818
 819                         tp->input_layouts.push_back(cldnn::layout(tp->data_type, tp->fmt, cldnn::tensor( b, f0, w, h )) );
 820                         tp->input_layouts.push_back(cldnn::layout(tp->data_type, tp->fmt, cldnn::tensor( b, f1, w, h )) );
 821                         tp->input_layouts.push_back(cldnn::layout(tp->data_type, tp->fmt, cldnn::tensor( b, f2, w, h )) );
 822
 823                         all_generic_params.emplace_back(tp);
 824                     }
 825                     break;
 826                 default:
 827                     assert(0);
 828             }
 829         }
 830
 831         return all_generic_params;
 832     }
 833
 834     static std::vector<std::tuple<test_params*, cldnn::primitive*>> generate_all_test_params()
 835     {
 836         std::vector<std::tuple<test_params*, cldnn::primitive*>> res;
 837
 838         for (int i = 1; i <= 3; ++i)
 839         {
 840             auto tpv = generate_generic_test_params(i);
 841             auto pv = generate_specific_test_params(i);
 842
 843             all_generic_params.insert(all_generic_params.end(), tpv.begin(), tpv.end());
 844             all_layer_params.insert(all_layer_params.end(), pv.begin(), pv.end());
 845
 846             for (auto & tp : tpv)
 847             for (auto & p: pv)
 848                 res.emplace_back(tp, p);
 849         }
 850
 851         return res;
 852     }
 853
 854     virtual bool is_format_supported(cldnn::format format) override
 855     {
 856         return format == cldnn_format_type::cldnn_format_bfyx;
 857     }
 858
 859     virtual cldnn::tensor get_expected_output_tensor() override
 860     {
 861         cldnn::tensor::value_type features = 0;
 862         for (const auto& t : generic_params->input_layouts)
 863         {
 864             features += t.size.feature[0];
 865         }
 866
 867         const auto& t = generic_params->input_layouts[0].size;
 868         return{ t.batch[0], features, t.spatial[0], t.spatial[1] };
 869     }
 870
 871     template<typename Type>
 872     memory generate_reference_typed(const std::vector<memory> & inputs)
 873     {
 874         assert(!inputs.empty());
 875
 876         const int in_b = inputs[0].get_layout().size.batch[0];
 877         const int in_h = inputs[0].get_layout().size.spatial[1];
 878         const int in_w = inputs[0].get_layout().size.spatial[0];
 879
 880         int out_f = 0;
 881
 882         for (const memory & input : inputs)
 883         {
 884             assert(input.get_layout().size.batch[0] == in_b);
 885             assert(input.get_layout().size.spatial[1] == in_h);
 886             assert(input.get_layout().size.spatial[0] == in_w);
 887
 888             out_f += input.get_layout().size.feature[0];
 889
 890             assert(input.get_layout().data_type == inputs[0].get_layout().data_type);
 891             assert(input.get_layout().format == inputs[0].get_layout().format);
 892         }
 893
 894         //Output is bfyx
 895         auto output = memory::allocate(engine, cldnn::layout(inputs[0].get_layout().data_type, cldnn::format::bfyx, tensor( in_b, out_f, in_w, in_h )));
 896         auto out_mem = output.pointer<Type>();
 897
 898         int out_f_off = 0;
 899         for (const memory & input : inputs)
 900         {
 901             const auto input_desc = get_linear_memory_desc(input.get_layout());
 902             const auto output_desc = get_linear_memory_desc(output.get_layout());
 903
 904             const int in_f = input.get_layout().size.feature[0];
 905             const auto in_mem = input.pointer<Type>();
 906
 907             for (int n = 0; n < in_b; ++n)
 908             for (int f = 0; f < in_f; ++f)
 909             for (int y = 0; y < in_h; ++y)
 910             for (int x = 0; x < in_w; ++x)
 911             {
 912                 const size_t in_idx = get_linear_index(input.get_layout(), n, f, y, x, input_desc);
 913                 const size_t out_idx = get_linear_index(output.get_layout(), n, out_f_off + f, y, x, output_desc);
 914
 915                 out_mem[out_idx] = in_mem[in_idx];
 916             }
 917
 918             out_f_off += in_f;
 919         }
 920
 921         return output;
 922     }
 923
 924     virtual memory generate_reference(const std::vector<memory> & inputs) override
 925     {
 926         if (generic_params->data_type == data_types::f32)
 927         {
 928             return generate_reference_typed<float>(inputs);
 929         }
 930         else
 931         {
 932             return generate_reference_typed<FLOAT16>(inputs);
 933         }
 934     }
 935
 936     static std::string custom_param_name(const ::testing::TestParamInfo<std::tuple<test_params*, cldnn::primitive*>>& info)
 937     {
 938         std::stringstream res;
 939
 940         const auto & p = std::get<0>(info.param);
 941
 942         assert (p->data_type == data_types::f32 ||
 943                 p->data_type == data_types::f16);
 944
 945         res << info.index
 946             << "_" << (p->data_type == data_types::f32 ? "f32" : "f16");
 947
 948         for (unsigned i = 0; i < p->input_layouts.size(); ++i)
 949         {
 950             const auto chans = p->fmt.order();
 951
 952             res << "_" << "Input" << i;
 953             for (unsigned int j = 0; j < p->input_layouts[i].size.sizes(p->fmt).size(); ++j)
 954             {
 955                 res << chans[j] << p->input_layouts[i].size.sizes(p->fmt)[j];
 956             }
 957         }
 958
 959         return res.str();
 960     }
 961
 962 private:
 963
 964     static std::vector<tests::test_params*> all_generic_params;
 965     static std::vector<cldnn::primitive*> all_layer_params;
 966
 967 };
 968
 969 std::vector<cldnn::primitive*> depth_concatenate_test::all_layer_params = {};
 970 std::vector<tests::test_params*> depth_concatenate_test::all_generic_params = {};
 971
 972 TEST_P(depth_concatenate_test, DEPTHCONCATENATE)
 973 {
 974     run_single_test();
 975 }
 976
 977 INSTANTIATE_TEST_CASE_P(DISABLED_DEPTHCONCATENATE,
 978     depth_concatenate_test,
 979     ::testing::ValuesIn(depth_concatenate_test::generate_all_test_params()),
 980     depth_concatenate_test::custom_param_name);
 981