inference-engine/thirdparty/clDNN/tests/test_cases/split_gpu_test.cpp

   1 /*
   2 // Copyright (c) 2017 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 ///////////////////////////////////////////////////////////////////////////////////////////////////
  18 #include <gtest/gtest.h>
  19 #include "api/CPP/memory.hpp"
  20 #include <api/CPP/input_layout.hpp>
  21 #include "api/CPP/split.hpp"
  22 #include "api/CPP/scale.hpp"
  23 #include <api/CPP/topology.hpp>
  24 #include <api/CPP/network.hpp>
  25 #include <api/CPP/engine.hpp>
  26 #include <api/CPP/reorder.hpp>
  27 #include "test_utils/test_utils.h"
  28
  29 #include <sstream>
  30 #include <iomanip>
  31
  32 using namespace cldnn;
  33 using namespace tests;
  34
  35 template<typename T>
  36 std::vector<T> generate_random_input(size_t b, size_t f, size_t y, size_t x, int min, int max) {
  37     static std::default_random_engine generator(random_seed);
  38     int k = 8; // 1/k is the resolution of the floating point numbers
  39     std::uniform_int_distribution<int> distribution(k * min, k * max);
  40     std::vector<T> v(b*f*x*y);
  41     for (size_t i = 0; i < b*f*x*y; ++i) {
  42         v[i] = (T)distribution(generator);
  43         v[i] /= k;
  44     }
  45     return v;
  46 }
  47
  48 template<typename T>
  49 void check_feature_map(cldnn::pointer<T> output_ptr, std::vector<T> &input_vec, size_t batch_num, size_t feature_num, size_t y_size, size_t x_size, size_t feature_id, size_t factor)
  50 {
  51     for (size_t b = 0; b < batch_num; ++b) { //B
  52         for (size_t y = 0; y < y_size; ++y) { //Y
  53             for (size_t x = 0; x < x_size; ++x) { //X
  54                 auto linear_id = x + x_size * (y + y_size * (feature_id + feature_num * b));
  55                 auto output_linear_id = x + x_size * (y + y_size * b);
  56                 EXPECT_EQ(output_ptr[output_linear_id], input_vec[linear_id] * factor);
  57             }
  58         }
  59     }
  60 }
  61
  62 template<typename T>
  63 void split_test(int batch_num, int feature_num, int x_size, int y_size, std::vector<cldnn::tensor> split_offsets)
  64 {
  65     const auto& engine = get_test_engine();
  66     cldnn::tensor reference_input_size = { batch_num, feature_num, x_size, y_size };
  67
  68     cldnn::memory input = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, reference_input_size });
  69     std::vector<std::pair<primitive_id, cldnn::tensor> > input_ids_offsets;
  70
  71     topology topology;
  72     topology.add(input_layout("input", input.get_layout()));
  73
  74     // lambda exoression to create the primitive id for the splits
  75     auto create_split_id = [](size_t splitNum) {
  76         std::stringstream ss;
  77         ss << std::setw(5) << std::setfill('0') << splitNum;
  78
  79         return ss.str();
  80     };
  81
  82     // Create the splits with the split ids for the topology
  83     for (size_t splitNum = 0; splitNum < split_offsets.size(); splitNum++)
  84     {
  85         input_ids_offsets.push_back({ create_split_id(splitNum), split_offsets[splitNum]});
  86     }
  87
  88     topology.add(split("split", "input", input_ids_offsets));
  89
  90     std::vector<T> input_vec = generate_random_input<T>(batch_num, feature_num, y_size, x_size, -10, 10);
  91     set_values(input, input_vec);
  92
  93     network network(engine, topology);
  94     network.set_input_data("input", input);
  95
  96     auto outputs = network.execute();
  97
  98     // The number of splits should match the expected number of splits
  99     EXPECT_EQ(outputs.size(), size_t(split_offsets.size()));
 100
 101     std::vector<cldnn::tensor> expected_sizes;
 102     for (size_t splitNum = 0; splitNum < split_offsets.size(); splitNum++)  // Calculate the expected sizes
 103     {
 104         cldnn::tensor size;
 105
 106         if (splitNum < (split_offsets.size() - 1))
 107         {
 108             size = split_offsets[splitNum + 1] - split_offsets[splitNum];
 109         }
 110         else
 111         {
 112             size = reference_input_size - split_offsets[splitNum];
 113         }
 114
 115         // For all the other dimensions, copy from the split_input
 116         for (int dimension = 0; dimension < CLDNN_TENSOR_DIM_MAX; dimension++)
 117         {
 118             size.raw[dimension]
 119                 = (size.raw[dimension] == 0) ? reference_input_size.raw[dimension] : size.raw[dimension];
 120         }
 121
 122         expected_sizes.push_back(size);
 123     }
 124
 125     pointer<T> input_ptr = input.pointer<T>();
 126
 127     for (size_t splitNum = 0; splitNum < split_offsets.size(); splitNum++)
 128     {
 129         primitive_id split_id = "split:" + create_split_id(splitNum);
 130         cldnn::memory output = outputs.at(split_id).get_memory();
 131         auto prim = output.get_layout();
 132         EXPECT_EQ(prim.size, expected_sizes[splitNum]);
 133         auto output_ptr = output.pointer<T>();
 134
 135         // Output tensor size
 136         auto output_batch = prim.size.batch[0];
 137         auto output_feature = prim.size.feature[0];
 138         auto output_x = prim.size.spatial[0];
 139         auto output_y = prim.size.spatial[1];
 140
 141         // Input offsets, starting from which we will compare the output
 142         auto input_batch_offset = split_offsets[splitNum].batch[0];
 143         auto input_feature_offset = split_offsets[splitNum].feature[0];
 144         auto input_y_offset = split_offsets[splitNum].spatial[1];
 145         auto input_x_offset = split_offsets[splitNum].spatial[0];
 146
 147         // iterator to iterate through input buffer
 148         auto input_batch_itr = input_batch_offset;
 149         auto input_feature_itr = input_feature_offset;
 150         auto input_y_itr = input_y_offset;
 151         auto input_x_itr = input_x_offset;
 152
 153         for (auto b = 0; b < output_batch; ++b) {  // B
 154
 155                 // reset the input feature iterator
 156             input_feature_itr = input_feature_offset;
 157             for (auto f = 0; f < output_feature; f++) {  // F
 158
 159                 // reset the input y iterator
 160                 input_y_itr = input_y_offset;
 161                 for (auto y = 0; y < output_y; y++) {  // Y
 162
 163                     // reset the input x iterator
 164                     input_x_itr = input_x_offset;
 165                     for (auto x = 0; x < output_x; x++) {  // X
 166                         auto linear_id = input_x_itr + x_size * (input_y_itr + y_size * (input_feature_itr + feature_num * input_batch_itr)); // index in input
 167                         auto output_linear_id = x + output_x * (y + output_y * (f + output_feature * b)); // index in output
 168                         EXPECT_EQ(output_ptr[output_linear_id], input_vec[linear_id]);
 169                         input_x_itr++;  // update the input x iterator
 170                     }
 171                     input_y_itr++;  // update the input y iterator
 172                 }
 173                 input_feature_itr++;  // update the input feature iterator
 174             }
 175             input_batch_itr++;  // update the input batch iterator
 176         }
 177     }
 178 }
 179
 180 TEST(split_gpu, split_1d_uneven_2_splits) {
 181
 182     //  Input      : 2x4x3x3
 183     //  Output1    : 2x1x3x3
 184     //  Output2    : 2x3x3x3
 185     //  Split params:
 186     //  id: "out0", offsets: { 0, 0, 0, 0 }
 187     //  id: "out1", offsets: { 0, 1, 0, 0 }
 188
 189     auto batch_num = 2;
 190     auto feature_num = 4;
 191     auto x_size = 3;
 192     auto y_size = 3;
 193     std::vector<cldnn::tensor> split_offsets = {
 194                                                 {0, 0, 0, 0},
 195                                                 {0, 1, 0, 0}
 196                                                };
 197
 198     split_test<float>(batch_num, feature_num, x_size, y_size, split_offsets);
 199 }
 200
 201
 202 TEST(split_gpu, basic_split_concat_optimization) {
 203
 204     const auto& engine = get_test_engine();
 205
 206     auto input = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 25, 1, 256 } });
 207     tests::set_random_values<float>(input);
 208
 209     topology topology;
 210     topology.add(input_layout("input", input.get_layout()));
 211     std::vector<std::pair<primitive_id, tensor>> offsets;
 212     std::vector<primitive_id> ids;
 213     for (int i = 0; i < 25; i++)
 214     {
 215         auto id = "crop_" + std::to_string(i);
 216         ids.push_back("split:" + id);
 217         offsets.push_back({ id, {0, i, 0, 0} });
 218     }
 219
 220     topology.add(split("split", "input", offsets));
 221     topology.add(concatenation("concat", ids, concatenation::along_f));
 222     topology.add(reorder("output", "concat", format::bfyx, data_types::f32));
 223
 224     build_options opts;
 225     opts.set_option(build_option::optimize_data(true));
 226     network network(engine, topology, opts);
 227
 228     network.set_input_data("input", input);
 229
 230     auto outputs = network.execute();
 231
 232     auto output = outputs.at("output").get_memory();
 233     auto output_ptr = output.pointer<float>();
 234     auto input_ptr = input.pointer<float>();
 235
 236     for (int i = 0; i < 25*256; ++i)
 237     {
 238         EXPECT_EQ(output_ptr[i], input_ptr[i]);
 239     }
 240 }
 241
 242 TEST(split_gpu, split_1d_uneven_3_splits) {
 243
 244     //  Input      : 2x8x3x3
 245     //  Output1    : 2x1x3x3
 246     //  Output2    : 2x3x3x3
 247     //  Output3    : 2x4x3x3
 248     //  Split params:
 249     //  id: "out0", offsets: { 0, 0, 0, 0 }
 250     //  id: "out1", offsets: { 0, 1, 0, 0 }
 251     //  id: "out2", offsets: { 0, 4, 0, 0 }
 252
 253     auto batch_num = 2;
 254     auto feature_num = 8;
 255     auto x_size = 3;
 256     auto y_size = 3;
 257     std::vector<cldnn::tensor> split_offsets = {
 258                                                 {0, 0, 0, 0},
 259                                                 {0, 1, 0, 0},
 260                                                 {0, 4, 0, 0},
 261                                                };
 262
 263     split_test<float>(batch_num, feature_num, x_size, y_size, split_offsets);
 264 }
 265
 266 TEST(split_gpu, split_2d_uneven_2_splits) {
 267
 268     //  Input      : 2x8x10x3
 269     //  Output1    : 2x1x4x3
 270     //  Output2    : 2x3x6x3
 271     //  Split params:
 272     //  id: "out0", offsets: { 0, 0, 0, 0 }
 273     //  id: "out1", offsets: { 0, 1, 4, 0 }
 274
 275     auto batch_num = 2;
 276     auto feature_num = 8;
 277     auto x_size = 10;
 278     auto y_size = 3;
 279     std::vector<cldnn::tensor> split_offsets = {
 280                                                 {0, 0, 0, 0},
 281                                                 {0, 1, 4, 0}
 282                                                };
 283
 284     split_test<float>(batch_num, feature_num, x_size, y_size, split_offsets);
 285 }
 286
 287 TEST(split_gpu, split_2d_uneven_3_split3) {
 288
 289     //  Input      : 2x8x10x3
 290     //  Output1    : 2x1x4x3
 291     //  Output2    : 2x3x3x3
 292     //  Output3    : 2x4x3x3
 293     //  Split params:
 294     //  id: "out0", offsets: { 0, 0, 0, 0 }
 295     //  id: "out1", offsets: { 0, 1, 4, 0 }
 296     //  id: "out2", offsets: { 0, 4, 7, 0 }
 297
 298     auto batch_num = 2;
 299     auto feature_num = 8;
 300     auto x_size = 10;
 301     auto y_size = 3;
 302     std::vector<cldnn::tensor> split_offsets = {
 303                                                 {0, 0, 0, 0},
 304                                                 {0, 1, 4, 0},
 305                                                 {0, 4, 7, 0},
 306                                                };
 307
 308     split_test<float>(batch_num, feature_num, x_size, y_size, split_offsets);
 309 }
 310
 311 TEST(split_gpu, split_3d_uneven_2_splits) {
 312
 313     //  Input      : 2x8x10x3
 314     //  Output1    : 2x1x4x1
 315     //  Output2    : 2x7x6x2
 316     //  Split params:
 317     //  id: "out0", offsets: { 0, 0, 0, 0 }
 318     //  id: "out1", offsets: { 0, 1, 4, 1 }
 319
 320     auto batch_num = 2;
 321     auto feature_num = 8;
 322     auto x_size = 10;
 323     auto y_size = 3;
 324     std::vector<cldnn::tensor> split_offsets = {
 325                                                 {0, 0, 0, 0},
 326                                                 {0, 1, 4, 1}
 327                                                };
 328
 329     split_test<float>(batch_num, feature_num, x_size, y_size, split_offsets);
 330 }
 331
 332 TEST(split_gpu, split_3d_uneven_3_splits) {
 333
 334     //  Input      : 2x8x10x5
 335     //  Output1    : 2x1x4x1
 336     //  Output2    : 2x6x4x1
 337     //  Output3    : 2x1x2x1
 338     //  Split params:
 339     //  id: "out0", offsets: { 0, 0, 0, 0 }
 340     //  id: "out1", offsets: { 0, 1, 4, 1 }
 341     //  id: "out2", offsets: { 0, 7, 8, 2 }
 342
 343     auto batch_num = 2;
 344     auto feature_num = 8;
 345     auto x_size = 10;
 346     auto y_size = 3;
 347     std::vector<cldnn::tensor> split_offsets = {
 348                                                 {0, 0, 0, 0},
 349                                                 {0, 1, 4, 1},
 350                                                 {0, 7, 8, 2}
 351                                                };
 352
 353     split_test<float>(batch_num, feature_num, x_size, y_size, split_offsets);
 354 }
 355
 356 TEST(split_gpu, basic_in2x3x2x2_split_feature_bfyx) {
 357     //  Input      : 6x3x4x3
 358     //  3 x Outputs: 6x1x4x3
 359     //  Split params:
 360     //  id: "out0", offsets: { 0, 0, 0, 0 }
 361     //  id: "out1", offsets: { 0, 1, 0, 0 }
 362     //  id: "out2", offsets: { 0, 2, 0, 0 }
 363
 364     const auto& engine = get_test_engine();
 365
 366     auto batch_num = 6;
 367     auto feature_num = 3;
 368     auto x_size = 4;
 369     auto y_size = 3;
 370
 371     auto input = memory::allocate(engine, { data_types::f32,format::bfyx,{ batch_num, feature_num, x_size, y_size } });
 372
 373     topology topology;
 374     topology.add(input_layout("input", input.get_layout()));
 375     topology.add(split("split", "input",
 376     {
 377         { "out0", { 0, 0, 0, 0 } },
 378         { "out1", { 0, 1, 0, 0 } },
 379         { "out2", { 0, 2, 0, 0 } }
 380     } ));
 381
 382     std::vector<float> input_vec = generate_random_input<float>(batch_num, feature_num, y_size, x_size, -10, 10);
 383     set_values(input, input_vec);
 384
 385     network network(engine, topology);
 386
 387     network.set_input_data("input", input);
 388
 389     auto outputs = network.execute();
 390
 391     EXPECT_EQ(outputs.size(), size_t(3));
 392
 393     for (unsigned int i = 0; i < 3; i++)
 394     {
 395         auto split_id = "split:out" + std::to_string(i);
 396         auto output = outputs.at(split_id).get_memory();
 397         auto output_ptr = output.pointer<float>();
 398         check_feature_map<float>(output_ptr, input_vec, batch_num, feature_num, y_size, x_size, i, 1);
 399     }
 400 }
 401
 402 TEST(split_gpu, basic_in2x3x2x2_split_scale_feature_bfyx) {
 403     //  Input      : 6x3x4x3
 404     //  3 x Outputs: 6x1x4x3
 405     //  Split params:
 406     //  id: "out0", offsets: { 0, 0, 0, 0 }
 407     //  id: "out1", offsets: { 0, 1, 0, 0 }
 408     //  id: "out2", offsets: { 0, 2, 0, 0 }
 409     //  Additional scale layer at the end
 410
 411     const auto& engine = get_test_engine();
 412
 413     auto batch_num = 6;
 414     auto feature_num = 3;
 415     auto x_size = 4;
 416     auto y_size = 3;
 417
 418     auto input = memory::allocate(engine, { data_types::f32,format::bfyx,{ batch_num, feature_num, x_size, y_size } });
 419     auto scale_input0 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
 420     auto scale_input1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
 421     auto scale_input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
 422
 423     topology topology;
 424     topology.add(input_layout("input", input.get_layout()));
 425     topology.add(input_layout("scale_input0", scale_input0.get_layout()));
 426     topology.add(input_layout("scale_input1", scale_input1.get_layout()));
 427     topology.add(input_layout("scale_input2", scale_input2.get_layout()));
 428     topology.add(split("split", "input",
 429     {
 430         { "out0",{ 0, 0, 0, 0 } },
 431         { "out1",{ 0, 1, 0, 0 } },
 432         { "out2",{ 0, 2, 0, 0 } }
 433     }));
 434     topology.add(scale("scale0", "split:out0", "scale_input0"));
 435     topology.add(scale("scale1", "split:out1", "scale_input1"));
 436     topology.add(scale("scale2", "split:out2", "scale_input2"));
 437
 438     std::vector<float> scale_input_vec0 = { 1.f };
 439     set_values(scale_input0, scale_input_vec0);
 440     std::vector<float> scale_input_vec1 = { 2.f };
 441     set_values(scale_input1, scale_input_vec1);
 442     std::vector<float> scale_input_vec2 = { 3.f };
 443     set_values(scale_input2, scale_input_vec2);
 444
 445     std::vector<float> input_vec = generate_random_input<float>(batch_num, feature_num, y_size, x_size, -10, 10);
 446     set_values(input, input_vec);
 447
 448     network network(engine, topology);
 449
 450     network.set_input_data("input", input);
 451     network.set_input_data("scale_input0", scale_input0);
 452     network.set_input_data("scale_input1", scale_input1);
 453     network.set_input_data("scale_input2", scale_input2);
 454
 455     auto outputs = network.execute();
 456
 457     EXPECT_EQ(outputs.size(), size_t(3));
 458
 459     for (unsigned int i = 0; i < 3; i++)
 460     {
 461         auto split_id = "scale" + std::to_string(i);
 462         auto output = outputs.at(split_id).get_memory();
 463         auto output_ptr = output.pointer<float>();
 464         check_feature_map<float>(output_ptr, input_vec, batch_num, feature_num, y_size, x_size, i, i + 1);
 465     }
 466 }