2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
19 #include <gtest/gtest.h>
20 #include "api/CPP/memory.hpp"
21 #include <api/CPP/input_layout.hpp>
22 #include "api/CPP/concatenation.hpp"
23 #include "api/CPP/convolution.hpp"
24 #include "api/CPP/data.hpp"
25 #include "api/CPP/pooling.hpp"
26 #include "api/CPP/upsampling.hpp"
27 #include <api/CPP/topology.hpp>
28 #include <api/CPP/network.hpp>
29 #include <api/CPP/engine.hpp>
30 #include "test_utils/test_utils.h"
32 using namespace cldnn;
33 using namespace tests;
36 std::vector<T> generate_random_input(size_t b, size_t f, size_t y, size_t x, int min, int max) {
37 static std::default_random_engine generator(random_seed);
38 int k = 8; // 1/k is the resolution of the floating point numbers
39 std::uniform_int_distribution<int> distribution(k * min, k * max);
40 std::vector<T> v(b*f*x*y);
41 for (size_t i = 0; i < b*f*x*y; ++i) {
42 v[i] = (T)distribution(generator);
48 TEST(depth_concatenate_f32_gpu, test01) {
50 // Input1 : 2x 1x1 x 2
51 // Input2 : 2x 1x1 x 3
70 const auto& engine = get_test_engine();
71 auto input1 = memory::allocate(engine, {data_types::f32, format::yxfb, { 2,2,1,1 }});
72 auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2,3,1,1 }});
74 set_values(input1, { 0.5f, 0.7f, 0.2f, 0.4f });
75 set_values(input2, { 1.0f, 0.1f, 0.3f, -0.5f, 0.0f, -0.2f });
78 topology.add(input_layout("input1", input1.get_layout()));
79 topology.add(input_layout("input2", input2.get_layout()));
80 topology.add(concatenation("depth1", { "input1", "input2" }, concatenation::along_f));
82 network network(engine, topology);
84 network.set_input_data("input1", input1);
85 network.set_input_data("input2", input2);
87 auto outputs = network.execute({});
88 EXPECT_EQ(outputs.size(), size_t(1));
89 EXPECT_EQ(outputs.begin()->first, "depth1");
91 auto output = outputs.at("depth1").get_memory();
93 auto output_ptr = output.pointer<float>();
94 EXPECT_FLOAT_EQ(0.5f, output_ptr[0]);
95 EXPECT_FLOAT_EQ(0.7f, output_ptr[1]);
96 EXPECT_FLOAT_EQ(0.2f, output_ptr[2]);
97 EXPECT_FLOAT_EQ(0.4f, output_ptr[3]);
98 EXPECT_FLOAT_EQ(1.0f, output_ptr[4]);
99 EXPECT_FLOAT_EQ(0.1f, output_ptr[5]);
100 EXPECT_FLOAT_EQ(0.3f, output_ptr[6]);
101 EXPECT_FLOAT_EQ(-0.5f, output_ptr[7]);
102 EXPECT_FLOAT_EQ(0.0f, output_ptr[8]);
103 EXPECT_FLOAT_EQ(-0.2f, output_ptr[9]);
106 template<data_types DType>
107 void concat_basic_with_reorder()
110 // Input1 : 2x 1x1 x 2
111 // Input2 : 2x 1x1 x 3
130 const auto& engine = get_test_engine();
131 auto input1 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2,2,1,1 } });
132 auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2,3,1,1 } });
133 auto outs = { 2.0f, 3.0f, 0.0f, 1.0f, 1.0f, 4.0f, -4.0f, -7.0f, 0.0f, 0.0f };
134 set_values(input1, { 2.5f, 3.7f, 0.2f, 1.4f });
135 set_values(input2, { 1.0f, 4.1f, -4.3f, -7.5f, 0.0f, -0.2f });
138 topology.add(input_layout("input1", input1.get_layout()));
139 topology.add(input_layout("input2", input2.get_layout()));
140 topology.add(reorder("to_int1", "input1", { DType, format::yxfb,{ 2,2,1,1 } }));
141 topology.add(reorder("to_int2", "input2", { DType, format::yxfb,{ 2,3,1,1 } }));
142 topology.add(concatenation("depth1", { "to_int1", "to_int2" }, concatenation::along_f));
143 topology.add(reorder("to_float", "depth1", { data_types::f32, format::yxfb,{ 2,5,1,1 } }));
145 network network(engine, topology);
147 network.set_input_data("input1", input1);
148 network.set_input_data("input2", input2);
150 auto outputs = network.execute({});
151 ASSERT_EQ(outputs.size(), size_t(1));
152 EXPECT_EQ(outputs.begin()->first, "to_float");
154 auto output = outputs.at("to_float").get_memory();
156 auto output_ptr = output.pointer<float>();
158 for (const auto& ref : outs)
160 EXPECT_FLOAT_EQ(ref, output_ptr[ptr_cntr++]);
164 TEST(depth_concatenate_int8_gpu, concat_basic) {
165 concat_basic_with_reorder<data_types::i8>();
168 TEST(depth_concatenate_int32_gpu, concat_basic) {
169 concat_basic_with_reorder<data_types::i32>();
172 TEST(depth_concatenate_int64_gpu, concat_basic) {
173 concat_basic_with_reorder<data_types::i64>();
176 TEST(depth_concatenate_f32_gpu, test02) {
177 // Input count : 3 (yxfb, yxfb, bfyx)
178 // Input1 : 2x 1x1 x 2
179 // Input2 : 2x 1x1 x 3
180 // Input3 : 2x 1x1 x 3
207 const auto& engine = get_test_engine();
208 auto input1 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2,2,1,1 } });
209 auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2,3,1,1 } });
210 auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2,3,1,1 } });
212 set_values(input1, { 0.5f, 0.7f, 0.2f, 0.4f });
213 set_values(input2, { 1.0f, 0.1f, 0.3f, -0.5f, 0.0f, -0.2f });
214 set_values(input3, { 1.0f, 0.3f, 0.0f, 0.1f, -0.5f, -0.2f });
217 topology.add(input_layout("input1", input1.get_layout()));
218 topology.add(input_layout("input2", input2.get_layout()));
219 topology.add(input_layout("input3", input3.get_layout()));
220 topology.add(concatenation("depth1", { "input1", "input2", "input3" }, concatenation::along_f));
222 network network(engine, topology);
224 network.set_input_data("input1", input1);
225 network.set_input_data("input2", input2);
226 network.set_input_data("input3", input3);
228 auto outputs = network.execute({});
229 EXPECT_EQ(outputs.size(), size_t(1));
230 EXPECT_EQ(outputs.begin()->first, "depth1");
232 auto output = outputs.at("depth1").get_memory();
234 auto output_ptr = output.pointer<float>();
235 EXPECT_FLOAT_EQ(0.5f, output_ptr[0]);
236 EXPECT_FLOAT_EQ(0.7f, output_ptr[1]);
237 EXPECT_FLOAT_EQ(0.2f, output_ptr[2]);
238 EXPECT_FLOAT_EQ(0.4f, output_ptr[3]);
239 EXPECT_FLOAT_EQ(1.0f, output_ptr[4]);
240 EXPECT_FLOAT_EQ(0.1f, output_ptr[5]);
241 EXPECT_FLOAT_EQ(0.3f, output_ptr[6]);
242 EXPECT_FLOAT_EQ(-0.5f, output_ptr[7]);
243 EXPECT_FLOAT_EQ(0.0f, output_ptr[8]);
244 EXPECT_FLOAT_EQ(-0.2f, output_ptr[9]);
245 EXPECT_FLOAT_EQ(1.0f, output_ptr[10]);
246 EXPECT_FLOAT_EQ(0.1f, output_ptr[11]);
247 EXPECT_FLOAT_EQ(0.3f, output_ptr[12]);
248 EXPECT_FLOAT_EQ(-0.5f, output_ptr[13]);
249 EXPECT_FLOAT_EQ(0.0f, output_ptr[14]);
250 EXPECT_FLOAT_EQ(-0.2f, output_ptr[15]);
253 TEST(concatenate_f32_gpu, test_concatenation_of_pool_and_unpool)
256 auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
257 auto weights = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 2, 1 } });
259 set_values(input1, { 16.0f, 32.0f, 128.0f, 256.0f });
260 set_values(weights, { .1f, .2f });
262 topology.add(input_layout("input1", input1.get_layout()));
263 topology.add(pooling("pool1", "input1",
264 cldnn::pooling_mode::max,
265 { 1,1,2,1 }, /*kernel*/
266 { 1,1,1,1 } /*stride*/
268 topology.add(upsampling("unpool1", "input1", 1, 0, upsampling_sample_type::nearest));
269 topology.add(concatenation("concat1", { "pool1", "unpool1" }, cldnn::concatenation::along_x));
270 topology.add(data("weights", weights)),
271 topology.add(convolution("conv", "concat1", { "weights" }));
273 cldnn::build_options options;
274 options.set_option(cldnn::build_option::optimize_data(true));
275 network network(engine, topology, options);
276 network.set_input_data("input1", input1);
278 auto outputs = network.execute({});
279 auto output = outputs.at("conv").get_memory();
280 std::vector<float> out_ref = { 6.4f, 8.f, 51.2f, 64.f };
281 auto output_ptr = output.pointer<float>();
282 for (int i=0; i<4; i++)
284 EXPECT_NEAR(output_ptr[i], out_ref[i], 1e-3);
288 TEST(depth_concatenate_f32_gpu, test03_cascade_concat_opt) {
289 // Test for cascade concatenation optimization.
290 // Despite having concatenations one after another and connected to different non padded activation primitives,
291 // graph should remove all concatenations from execution.
293 const auto& engine = get_test_engine();
294 auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1,2,2,1 } });
296 set_values(input1, { 16.0f, 32.0f, 128.0f, 256.0f });
299 topology.add(input_layout("input1", input1.get_layout()));
300 topology.add(activation("relu1", "input1", activation_relu));
301 topology.add(activation("relu2", "relu1", activation_sqrt));
302 topology.add(concatenation("depth1", { "relu2", "relu1" }, concatenation::along_f));
303 topology.add(activation("relu3", "depth1", activation_sqrt));
304 topology.add(concatenation("depth2", { "relu3", "depth1" }, concatenation::along_f));
305 topology.add(activation("relu4", "depth2", activation_sqrt));
306 topology.add(concatenation("depth3", { "relu4", "depth2" }, concatenation::along_f));
307 topology.add(activation("relu5", "depth3", activation_relu));
309 cldnn::build_options options;
310 options.set_option(cldnn::build_option::optimize_data(true));
311 network network(engine, topology, options);
313 network.set_input_data("input1", input1);
315 auto outputs = network.execute({});
316 auto output_prim = outputs.begin()->second.get_memory();
318 auto output_ptr = output_prim.pointer<float>();
319 auto executed_primitives = network.get_executed_primitives();
321 EXPECT_TRUE(executed_primitives.count("depth1") == 0);
322 EXPECT_TRUE(executed_primitives.count("depth2") == 0);
323 EXPECT_TRUE(executed_primitives.count("depth3") == 0);
325 EXPECT_NEAR(1.4142f, output_ptr[0], 1e-3);
326 EXPECT_NEAR(1.5422f, output_ptr[1], 1e-3);
327 EXPECT_NEAR(1.8340f, output_ptr[2], 1e-3);
328 EXPECT_NEAR(2.0f, output_ptr[3], 1e-3);
329 EXPECT_NEAR(2.0f, output_ptr[4], 1e-3);
330 EXPECT_NEAR(2.3784f, output_ptr[5], 1e-3);
331 EXPECT_NEAR(3.3635f, output_ptr[6], 1e-3);
332 EXPECT_NEAR(4.0f, output_ptr[7], 1e-3);
333 EXPECT_NEAR(2.0f, output_ptr[8], 1e-3);
334 EXPECT_NEAR(2.3784f, output_ptr[9], 1e-3);
335 EXPECT_NEAR(3.3635f, output_ptr[10], 1e-3);
336 EXPECT_NEAR(4.0f, output_ptr[11], 1e-3);
337 EXPECT_NEAR(4.0f, output_ptr[12], 1e-3);
338 EXPECT_NEAR(5.6568f, output_ptr[13], 1e-3);
339 EXPECT_NEAR(11.3137f, output_ptr[14], 1e-3);
340 EXPECT_NEAR(16.0f, output_ptr[15], 1e-3);
344 TEST(depth_concatenate_f32_gpu, test04_fused_relu) {
345 // 2 inputs of size 3x10x10 concatenated on f axis with fused relu
347 const auto& engine = get_test_engine();
348 auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1,3,10,10 } });
349 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1,3,10,10 } });
351 std::vector<float> input1_vec = generate_random_input<float>(1, 3, 10, 10, -10, 10);
352 set_values(input1, input1_vec);
353 std::vector<float> input2_vec = generate_random_input<float>(1, 3, 10, 10, -10, 10);
354 set_values(input2, input2_vec);
357 topology.add(input_layout("input1", input1.get_layout()));
358 topology.add(input_layout("input2", input2.get_layout()));
359 topology.add(concatenation("depth1", { "input1", "input2" }, concatenation::along_f));
360 topology.add(activation("relu1", "depth1", activation_relu));
362 cldnn::build_options options;
363 options.set_option(cldnn::build_option::optimize_data(true));
364 network network(engine, topology, options);
366 network.set_input_data("input1", input1);
367 network.set_input_data("input2", input2);
369 auto outputs = network.execute({});
370 EXPECT_EQ(outputs.size(), size_t(1));
371 EXPECT_EQ(outputs.begin()->first, "relu1");
373 auto output = outputs.at("relu1").get_memory();
375 auto output_ptr = output.pointer<float>();
376 unsigned int elements_count = 600;
377 unsigned int input_element_count = 300;
378 for (unsigned int i = 0; i < 600; i++)
380 if(i < input_element_count)
381 EXPECT_FLOAT_EQ(input1_vec[i] < 0.0f ? 0.0f : input1_vec[i], output_ptr[i]);
383 EXPECT_FLOAT_EQ(input2_vec[i - input_element_count] < 0.0f ? 0.0f : input2_vec[i - input_element_count], output_ptr[i]);
387 TEST(depth_concatenate_f32_gpu, test05_different_formats) {
388 // 2 inputs of size 3x10x10 concatenated on f axis
390 const auto& engine = get_test_engine();
391 auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1,3,2,2 } });
392 auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1,3,2,2 } });
394 set_values(input1, { 1.0f, 1.0f, 1.0f, 1.0f,
395 2.0f, 2.0f, 2.0f, 2.0f,
396 3.0f, 3.0f, 3.0f, 3.0f });
397 set_values(input2, { -1.0f, -2.0f, -3.0f,
400 - 1.0f, -2.0f, -3.0f });
402 std::vector<float> out_ref = {
403 1.0f, 1.0f, 1.0f, 1.0f,
404 2.0f, 2.0f, 2.0f, 2.0f,
405 3.0f, 3.0f, 3.0f, 3.0f,
406 -1.0f, -1.0f, -1.0f, -1.0f,
407 -2.0f, -2.0f, -2.0f, -2.0f,
408 -3.0f, -3.0f, -3.0f, -3.0f
412 topology.add(input_layout("input1", input1.get_layout()));
413 topology.add(input_layout("input2", input2.get_layout()));
414 topology.add(concatenation("depth1", { "input1", "input2" }, concatenation::along_f));
416 cldnn::build_options options;
417 options.set_option(cldnn::build_option::optimize_data(true));
418 network network(engine, topology, options);
420 network.set_input_data("input1", input1);
421 network.set_input_data("input2", input2);
423 auto outputs = network.execute({});
424 EXPECT_EQ(outputs.size(), size_t(1));
425 EXPECT_EQ(outputs.begin()->first, "depth1");
427 auto output = outputs.at("depth1").get_memory();
428 auto output_ptr = output.pointer<float>();
430 for (float val : output_ptr)
432 EXPECT_EQ(val, out_ref[cntr++]);
438 TEST(depth_concatenate_i32_gpu, optimize_data01) {
440 const auto& engine = get_test_engine();
441 build_options build_opt;
442 auto input = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,1,1 } });
446 input_layout("input", input.get_layout())
448 topology.add(cldnn::concatenation("int1", { "input" }, cldnn::concatenation::along_f));
449 topology.add(cldnn::concatenation("result1", { "int1" }, cldnn::concatenation::along_f));
450 topology.add(cldnn::concatenation("result2", { "int1" }, cldnn::concatenation::along_f));
453 std::vector<int> input_data = { 4 };
454 std::vector<int> out_data = { 4 };
455 set_values(input, input_data);
457 build_opt.set_option(build_option::optimize_data(true));
458 network network(engine, topology, build_opt);
459 network.set_input_data("input", input);
460 auto outputs = network.execute();
462 for (auto& it : outputs)
464 auto output_ptr = it.second.get_memory().pointer<int>();
465 EXPECT_EQ(output_ptr[0], out_data[0]);
469 TEST(depth_concatenate_i32_gpu, optimize_data02) {
471 const auto& engine = get_test_engine();
472 build_options build_opt;
473 auto input1 = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,2,2 } });
474 auto input2 = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,2,2 } });
475 auto input3 = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,2,2 } });
476 auto input4 = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,2,2 } });
480 input_layout("input1", input1.get_layout())
483 input_layout("input2", input2.get_layout())
486 input_layout("input3", input3.get_layout())
489 input_layout("input4", input4.get_layout())
492 topology.add(cldnn::concatenation("concat1", { "input1", "input2" }, cldnn::concatenation::along_x));
493 topology.add(cldnn::concatenation("concat2", { "input3", "input4" }, cldnn::concatenation::along_x));
494 topology.add(cldnn::concatenation("concat3", { "input2", "input4" }, cldnn::concatenation::along_x));
496 topology.add(cldnn::concatenation("concat4", { "concat1", "concat2" }, cldnn::concatenation::along_x));
497 topology.add(cldnn::concatenation("concat5", { "concat2", "concat3" }, cldnn::concatenation::along_x));
499 topology.add(cldnn::concatenation("concat6", { "concat4", "concat5" }, cldnn::concatenation::along_x));
501 std::vector<int> input_data1 =
505 std::vector<int> input_data2 =
509 std::vector<int> input_data3 =
513 std::vector<int> input_data4 =
517 std::vector<int> c6_data =
518 { 1, 2, 5, 6, 9, 10, 12, 14, 9, 10, 12, 14, 5, 6, 12, 14,
519 3, 4, 7, 8, 11, 12, 15, 16, 11, 12, 15, 16, 7, 8, 15, 16 };
521 set_values(input1, input_data1);
522 set_values(input2, input_data2);
523 set_values(input3, input_data3);
524 set_values(input4, input_data4);
526 build_opt.set_option(build_option::optimize_data(true));
527 network network(engine, topology, build_opt);
528 network.set_input_data("input1", input1);
529 network.set_input_data("input2", input2);
530 network.set_input_data("input3", input3);
531 network.set_input_data("input4", input4);
532 auto outputs = network.execute();
534 auto output_concat6 = outputs.at("concat6").get_memory().pointer<int>();
536 for (size_t i = 0; i < output_concat6.size(); i++) {
537 EXPECT_EQ(output_concat6[i], c6_data[i]);
541 TEST(depth_concatenate_i32_gpu, optimize_data03) {
543 const auto& engine = get_test_engine();
544 build_options build_opt;
545 auto input1 = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,2,2 } });
549 input_layout("input1", input1.get_layout())
552 topology.add(cldnn::concatenation("concat1", { "input1" }, cldnn::concatenation::along_x));
554 topology.add(cldnn::concatenation("concat2", { "concat1" }, cldnn::concatenation::along_x));
555 topology.add(cldnn::concatenation("concat3", { "concat1" }, cldnn::concatenation::along_x));
557 topology.add(cldnn::concatenation("concat4", { "concat3" }, cldnn::concatenation::along_x));
559 std::vector<int> input_data1 =
563 std::vector<int> output_data =
567 set_values(input1, input_data1);
569 build_opt.set_option(build_option::optimize_data(true));
570 network network(engine, topology, build_opt);
571 network.set_input_data("input1", input1);
573 auto outputs = network.execute();
575 for (auto& it : outputs)
577 auto output_ptr = it.second.get_memory().pointer<int>();
578 for (size_t i = 0; i < output_ptr.size(); i++) {
579 EXPECT_EQ(output_ptr[i], output_data[i]);
584 TEST(depth_concatenate_i32_gpu, optimize_data04) {
586 const auto& engine = get_test_engine();
587 build_options build_opt;
588 auto input1 = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,2,2 } });
592 input_layout("input1", input1.get_layout())
595 topology.add(cldnn::concatenation("concat1", { "input1" }, cldnn::concatenation::along_x));
597 topology.add(cldnn::concatenation("concat2", { "concat1" }, cldnn::concatenation::along_x));
598 topology.add(cldnn::concatenation("concat3", { "concat1" }, cldnn::concatenation::along_x));
600 topology.add(cldnn::concatenation("concat4", { "concat2", "concat3" }, cldnn::concatenation::along_x));
602 std::vector<int> input_data1 =
606 std::vector<int> output_data =
610 set_values(input1, input_data1);
612 build_opt.set_option(build_option::optimize_data(true));
613 network network(engine, topology, build_opt);
614 network.set_input_data("input1", input1);
616 auto outputs = network.execute();
618 for (auto& it : outputs)
620 auto output_ptr = it.second.get_memory().pointer<int>();
621 for (size_t i = 0; i < output_ptr.size(); i++) {
622 EXPECT_EQ(output_ptr[i], output_data[i]);
627 TEST(depth_concatenate_i32_gpu, optimize_data05) {
629 const auto& engine = get_test_engine();
630 build_options build_opt;
631 auto input1 = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,2,2 } });
635 input_layout("input1", input1.get_layout())
638 topology.add(cldnn::concatenation("concat1", { "input1" }, cldnn::concatenation::along_x));
640 topology.add(cldnn::concatenation("concat2", { "concat1" }, cldnn::concatenation::along_x));
641 topology.add(cldnn::concatenation("concat3", { "concat1" }, cldnn::concatenation::along_x));
643 topology.add(cldnn::concatenation("concat4", { "concat2", "concat3" }, cldnn::concatenation::along_x));
644 topology.add(cldnn::concatenation("concat5", { "concat1", "concat4" }, cldnn::concatenation::along_x));
646 std::vector<int> input_data1 =
650 std::vector<int> c5_data =
654 set_values(input1, input_data1);
656 build_opt.set_option(build_option::optimize_data(true));
657 network network(engine, topology, build_opt);
658 network.set_input_data("input1", input1);
660 auto outputs = network.execute();
662 auto output_concat5 = outputs.at("concat5").get_memory().pointer<int>();
664 for (size_t i = 0; i < output_concat5.size(); i++) {
665 EXPECT_EQ(output_concat5[i], c5_data[i]);
669 //////////////////////////////////////////////////////////////////////////////
671 // Exhaustive Negative Matrix tests //
673 //////////////////////////////////////////////////////////////////////////////
675 //TODO: this should be done using TEST_P or some equivallent construct
676 static network setup_depth_concatatenate_network(const std::vector<data_types> dts, const std::vector<tensor> ts, const std::vector<cldnn::format> fmt)
678 assert(dts.size() == ts.size());
679 const size_t sz = ts.size();
681 const auto& engine = get_test_engine();
684 std::vector<std::string> input_names;
685 input_names.resize(sz);
687 for (size_t i = 0; i < sz; ++i)
689 auto input = memory::allocate(engine, { dts[i], fmt[i], ts[i] });
691 input_names[i] = "input";
692 input_names[i] += std::to_string(i);
694 topology.add(input_layout(input_names[i], input.get_layout()));
696 //TODO: ask Uzi if something tests cases where there's missing input_names (nodes not present in the topology, etc.)
697 topology.add(concatenation("depth_concat_node", input_names, concatenation::along_f));
699 return network(engine, topology);
702 TEST(NegativeDepthConcatenateTest, DISABLED_TestAll) {
703 auto d = data_types::f32;
704 auto od = data_types::f16;
706 auto f = format::bfyx;
708 std::vector<int> t { 1, 2, 3, 4 };
709 std::vector<int> t0 { 7, 2, 3, 4 };
710 std::vector<int> t1 { 1, 2, 7, 4 };
711 std::vector<int> t2 { 1, 2, 3, 7 };
713 //TODO: should be ASSERT_THROW(statement, exception_type) - but what exception type?
714 ASSERT_ANY_THROW(setup_depth_concatatenate_network({ }, { }, { }));
716 ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, od }, { tensor(t), tensor(t) }, { f, f }));
717 ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, d }, { tensor(t), tensor(t0) }, { f, f }));
718 ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, d }, { tensor(t), tensor(t1) }, { f, f }));
719 ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, d }, { tensor(t), tensor(t2) }, { f, f }));
721 ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, od, d }, { tensor(t), tensor(t), tensor(t) }, { f, f, f }));
722 ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, d, od }, { tensor(t), tensor(t), tensor(t) }, { f, f, f }));
723 ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, d, d }, { tensor(t), tensor(t0), tensor(t) }, { f, f, f }));
724 ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, d, d }, { tensor(t), tensor(t1), tensor(t) }, { f, f, f }));
725 ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, d, d }, { tensor(t), tensor(t2), tensor(t) }, { f, f, f }));
726 ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, d, d }, { tensor(t), tensor(t), tensor(t0) }, { f, f, f }));
727 ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, d, d }, { tensor(t), tensor(t), tensor(t1) }, { f, f, f }));
728 ASSERT_ANY_THROW(setup_depth_concatatenate_network({ d, d, d }, { tensor(t), tensor(t), tensor(t2) }, { f, f, f }));
731 //////////////////////////////////////////////////////////////////////////////
733 // Exhaustive Positive Matrix tests //
735 //////////////////////////////////////////////////////////////////////////////
737 using namespace cldnn;
739 class depth_concatenate_test : public tests::generic_test
744 static void TearDownTestCase()
746 for (auto generic_params : all_generic_params)
748 delete generic_params;
751 for (auto layer_params : all_layer_params)
757 static std::vector<cldnn::primitive*> generate_specific_test_params(int i)
759 std::vector<cldnn::primitive*> all_layer_params;
763 case 1 : all_layer_params.push_back(new concatenation("depth_concatenate", {"input0"}, concatenation::along_f)); break;
764 case 2 : all_layer_params.push_back(new concatenation("depth_concatenate", {"input0", "input1"}, concatenation::along_f)); break;
765 case 3 : all_layer_params.push_back(new concatenation("depth_concatenate", {"input0", "input1", "input2"}, concatenation::along_f)); break;
769 return all_layer_params;
772 static std::vector<tests::test_params*> generate_generic_test_params(int input_count)
774 std::vector<tests::test_params*> all_generic_params;
776 auto data_types = test_data_types();
778 for (cldnn::data_types dt : data_types)
779 for (int32_t b : test_batch_sizes)
780 for (tensor & t : test_input_sizes)
782 const int w = t.spatial[0];
783 const int h = t.spatial[1];
788 for(auto f0 : test_feature_sizes)
790 test_params * tp = new test_params();
793 tp->input_layouts.push_back( cldnn::layout(tp->data_type, tp->fmt, cldnn::tensor( b, f0, w, h )) );
795 all_generic_params.emplace_back(tp);
799 for(auto f0 : test_feature_sizes)
800 for(auto f1 : test_feature_sizes)
802 test_params * tp = new test_params();
805 tp->input_layouts.push_back(cldnn::layout(tp->data_type, tp->fmt, cldnn::tensor( b, f0, w, h )) );
806 tp->input_layouts.push_back(cldnn::layout(tp->data_type, tp->fmt, cldnn::tensor( b, f1, w, h )) );
808 all_generic_params.emplace_back(tp);
812 for(auto f0 : test_feature_sizes)
813 for(auto f1 : test_feature_sizes)
814 for(auto f2 : test_feature_sizes)
816 test_params * tp = new test_params();
819 tp->input_layouts.push_back(cldnn::layout(tp->data_type, tp->fmt, cldnn::tensor( b, f0, w, h )) );
820 tp->input_layouts.push_back(cldnn::layout(tp->data_type, tp->fmt, cldnn::tensor( b, f1, w, h )) );
821 tp->input_layouts.push_back(cldnn::layout(tp->data_type, tp->fmt, cldnn::tensor( b, f2, w, h )) );
823 all_generic_params.emplace_back(tp);
831 return all_generic_params;
834 static std::vector<std::tuple<test_params*, cldnn::primitive*>> generate_all_test_params()
836 std::vector<std::tuple<test_params*, cldnn::primitive*>> res;
838 for (int i = 1; i <= 3; ++i)
840 auto tpv = generate_generic_test_params(i);
841 auto pv = generate_specific_test_params(i);
843 all_generic_params.insert(all_generic_params.end(), tpv.begin(), tpv.end());
844 all_layer_params.insert(all_layer_params.end(), pv.begin(), pv.end());
846 for (auto & tp : tpv)
848 res.emplace_back(tp, p);
854 virtual bool is_format_supported(cldnn::format format) override
856 return format == cldnn_format_type::cldnn_format_bfyx;
859 virtual cldnn::tensor get_expected_output_tensor() override
861 cldnn::tensor::value_type features = 0;
862 for (const auto& t : generic_params->input_layouts)
864 features += t.size.feature[0];
867 const auto& t = generic_params->input_layouts[0].size;
868 return{ t.batch[0], features, t.spatial[0], t.spatial[1] };
871 template<typename Type>
872 memory generate_reference_typed(const std::vector<memory> & inputs)
874 assert(!inputs.empty());
876 const int in_b = inputs[0].get_layout().size.batch[0];
877 const int in_h = inputs[0].get_layout().size.spatial[1];
878 const int in_w = inputs[0].get_layout().size.spatial[0];
882 for (const memory & input : inputs)
884 assert(input.get_layout().size.batch[0] == in_b);
885 assert(input.get_layout().size.spatial[1] == in_h);
886 assert(input.get_layout().size.spatial[0] == in_w);
888 out_f += input.get_layout().size.feature[0];
890 assert(input.get_layout().data_type == inputs[0].get_layout().data_type);
891 assert(input.get_layout().format == inputs[0].get_layout().format);
895 auto output = memory::allocate(engine, cldnn::layout(inputs[0].get_layout().data_type, cldnn::format::bfyx, tensor( in_b, out_f, in_w, in_h )));
896 auto out_mem = output.pointer<Type>();
899 for (const memory & input : inputs)
901 const auto input_desc = get_linear_memory_desc(input.get_layout());
902 const auto output_desc = get_linear_memory_desc(output.get_layout());
904 const int in_f = input.get_layout().size.feature[0];
905 const auto in_mem = input.pointer<Type>();
907 for (int n = 0; n < in_b; ++n)
908 for (int f = 0; f < in_f; ++f)
909 for (int y = 0; y < in_h; ++y)
910 for (int x = 0; x < in_w; ++x)
912 const size_t in_idx = get_linear_index(input.get_layout(), n, f, y, x, input_desc);
913 const size_t out_idx = get_linear_index(output.get_layout(), n, out_f_off + f, y, x, output_desc);
915 out_mem[out_idx] = in_mem[in_idx];
924 virtual memory generate_reference(const std::vector<memory> & inputs) override
926 if (generic_params->data_type == data_types::f32)
928 return generate_reference_typed<float>(inputs);
932 return generate_reference_typed<FLOAT16>(inputs);
936 static std::string custom_param_name(const ::testing::TestParamInfo<std::tuple<test_params*, cldnn::primitive*>>& info)
938 std::stringstream res;
940 const auto & p = std::get<0>(info.param);
942 assert (p->data_type == data_types::f32 ||
943 p->data_type == data_types::f16);
946 << "_" << (p->data_type == data_types::f32 ? "f32" : "f16");
948 for (unsigned i = 0; i < p->input_layouts.size(); ++i)
950 const auto chans = p->fmt.order();
952 res << "_" << "Input" << i;
953 for (unsigned int j = 0; j < p->input_layouts[i].size.sizes(p->fmt).size(); ++j)
955 res << chans[j] << p->input_layouts[i].size.sizes(p->fmt)[j];
964 static std::vector<tests::test_params*> all_generic_params;
965 static std::vector<cldnn::primitive*> all_layer_params;
969 std::vector<cldnn::primitive*> depth_concatenate_test::all_layer_params = {};
970 std::vector<tests::test_params*> depth_concatenate_test::all_generic_params = {};
972 TEST_P(depth_concatenate_test, DEPTHCONCATENATE)
977 INSTANTIATE_TEST_CASE_P(DISABLED_DEPTHCONCATENATE,
978 depth_concatenate_test,
979 ::testing::ValuesIn(depth_concatenate_test::generate_all_test_params()),
980 depth_concatenate_test::custom_param_name);