inference-engine/thirdparty/clDNN/tests/test_cases/gather_gpu_test.cpp

   1 // Copyright (c) 2019 Intel Corporation
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //      http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15
  16
  17 ///////////////////////////////////////////////////////////////////////////////////////////////////
  18 #include <gtest/gtest.h>
  19
  20 #include <api/CPP/input_layout.hpp>
  21 #include <api/CPP/memory.hpp>
  22 #include <api/CPP/gather.hpp>
  23 #include <api/CPP/topology.hpp>
  24 #include <api/CPP/network.hpp>
  25
  26 #include <cstddef>
  27 #include <tests/test_utils/test_utils.h>
  28
  29 using namespace cldnn;
  30 using namespace ::tests;
  31
  32 TEST(gather_gpu_fp16, d14_axisB) {
  33     //  Indexes  : 2x2x1x1
  34     //  Dictionary : 1x4x1x1
  35     //  Axis : 0
  36     //  Output : 1x4x2x1
  37     //  Input values in fp16
  38
  39     //  Indexes:
  40     //  0.f, 1.f, 1.f, 0.f
  41     //
  42     //  Dictionary:
  43     //  1.f, 2.f, 3.f, 4.f
  44     //
  45     //  Output:
  46     //  1.f, 2.f, 3.f, 4.f, 3.f, 4.f, 1.f, 2.f
  47
  48     engine engine;
  49
  50     auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // Dictionary
  51     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 1, 1 } }); // Indexes
  52     auto axis = cldnn::gather::gather_axis::along_b;
  53
  54     set_values(input1, {
  55         FLOAT16(1.0f), FLOAT16(2.0f),
  56         FLOAT16(3.0f), FLOAT16(4.0f)
  57     });
  58
  59     set_values(input2, {
  60         0.f, 1.f,
  61         1.f, 0.f
  62     });
  63
  64     topology topology;
  65     topology.add(input_layout("InputDictionary", input1.get_layout()));
  66     topology.add(input_layout("InputText", input2.get_layout()));
  67     topology.add(
  68         gather("gather", "InputDictionary", "InputText", axis, tensor(1, 4, 1, 2))
  69     );
  70
  71     network network(engine, topology);
  72
  73     network.set_input_data("InputDictionary", input1);
  74     network.set_input_data("InputText", input2);
  75
  76     auto outputs = network.execute();
  77
  78     auto output = outputs.at("gather").get_memory();
  79     auto output_ptr = output.pointer<uint16_t>();
  80
  81     std::vector<float> expected_results = {
  82         1.f, 2.f, 3.f, 4.f, 3.f, 4.f, 1.f, 2.f
  83     };
  84
  85     for (size_t i = 0; i < expected_results.size(); ++i) {
  86         EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
  87     }
  88 }
  89
  90 TEST(gather_gpu_fp16, d222_axisB) {
  91     //  Indexes  : 3x2x2x1
  92     //  Dictionary : 2x2x1x1
  93     //  Axis : 0
  94     //  Output : 2x2x2x2
  95     //  Input values in fp16
  96
  97     //  Indexes:
  98     //  0.f, 1.f, 2.f, 1.f
  99     //
 100     //  Dictionary:
 101     //  1.f, 2.f, 3.f, 4.f, 5.f, 6.f,
 102     //  7.f, 8.f, 9.f, 10.f, 11.f, 12.f
 103     //
 104     //  Output:
 105     //  1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 5.f, 6.f, 7.f, 8.f
 106
 107     engine engine;
 108
 109     auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 1, 2 } }); // Dictionary
 110     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes
 111     auto axis = cldnn::gather::gather_axis::along_b;
 112
 113     set_values(input1, {
 114         FLOAT16(1.f), FLOAT16(2.f), FLOAT16(3.f),
 115         FLOAT16(4.f), FLOAT16(5.f), FLOAT16(6.f),
 116
 117         FLOAT16(7.f), FLOAT16(8.f), FLOAT16(9.f),
 118         FLOAT16(10.f), FLOAT16(11.f), FLOAT16(12.f)
 119     });
 120
 121     set_values(input2, {
 122         0.f, 1.f,
 123         2.f, 1.f
 124     });
 125
 126     topology topology;
 127     topology.add(input_layout("InputDictionary", input1.get_layout()));
 128     topology.add(input_layout("InputText", input2.get_layout()));
 129     topology.add(
 130         gather("gather", "InputDictionary", "InputText", axis, tensor(2, 2, 2, 2))
 131     );
 132
 133     network network(engine, topology);
 134
 135     network.set_input_data("InputDictionary", input1);
 136     network.set_input_data("InputText", input2);
 137
 138     auto outputs = network.execute();
 139
 140     auto output = outputs.at("gather").get_memory();
 141     auto output_ptr = output.pointer<uint16_t>();
 142
 143     std::vector<float> expected_results = {
 144         1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 5.f, 6.f, 7.f, 8.f
 145     };
 146
 147     for (size_t i = 0; i < expected_results.size(); ++i) {
 148         EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
 149     }
 150 }
 151
 152 TEST(gather_gpu_fp16, d22_axisY) {
 153     //  Indexes  : 2x2x3x1
 154     //  Dictionary : 2x2x1x1
 155     //  Axis : 2
 156     //  Output : 2x2x2x2
 157     //  Input values in fp16
 158
 159     //  Indexes:
 160     //  0.f, 1.f, 2.f, 1.f
 161     //
 162     //  Dictionary:
 163     //  1.f, 2.f, 3.f, 4.f, 5.f, 6.f,
 164     //  7.f, 8.f, 9.f, 10.f, 11.f, 12.f
 165     //
 166     //  Output:
 167     //  1.f, 2.f, 3.f, 2.f, 4.f, 5.f, 6.f, 5.f, 7.f, 8.f, 9.f, 8.f, 10.f, 11.f, 12.f, 11.f
 168
 169     engine engine;
 170
 171     auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 3 } }); // Dictionary
 172     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes
 173     auto axis = cldnn::gather::gather_axis::along_y;
 174
 175     set_values(input1, {
 176         FLOAT16(1.f), FLOAT16(2.f), FLOAT16(3.f),
 177         FLOAT16(4.f), FLOAT16(5.f), FLOAT16(6.f),
 178
 179         FLOAT16(7.f), FLOAT16(8.f), FLOAT16(9.f),
 180         FLOAT16(10.f), FLOAT16(11.f), FLOAT16(12.f)
 181     });
 182
 183     set_values(input2, {
 184         0.f, 1.f, 2.f, 1.f
 185     });
 186
 187     topology topology;
 188     topology.add(input_layout("InputDictionary", input1.get_layout()));
 189     topology.add(input_layout("InputText", input2.get_layout()));
 190     topology.add(
 191         gather("gather", "InputDictionary", "InputText", axis, tensor(2, 2, 2, 2))
 192     );
 193
 194     network network(engine, topology);
 195
 196     network.set_input_data("InputDictionary", input1);
 197     network.set_input_data("InputText", input2);
 198
 199     auto outputs = network.execute();
 200
 201     auto output = outputs.at("gather").get_memory();
 202     auto output_ptr = output.pointer<uint16_t>();
 203
 204     std::vector<float> expected_results = {
 205         1.f, 2.f, 3.f, 2.f, 4.f, 5.f, 6.f, 5.f, 7.f, 8.f, 9.f, 8.f, 10.f, 11.f, 12.f, 11.f
 206     };
 207
 208     for (size_t i = 0; i < expected_results.size(); ++i) {
 209         EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
 210     }
 211 }
 212
 213 TEST(gather_gpu_fp16, d22_axisF) {
 214     //  Indexes  : 2x3x2x1
 215     //  Dictionary : 2x2x1x1
 216     //  Axis : 2
 217     //  Output : 2x2x2x2
 218     //  Input values in fp16
 219
 220     //  Indexes:
 221     //  0.f, 1.f, 2.f, 1.f
 222     //
 223     //  Dictionary:
 224     //  1.f, 2.f, 3.f, 4.f, 5.f, 6.f,
 225     //  7.f, 8.f, 9.f, 10.f, 11.f, 12.f
 226     //
 227     //  Output:
 228     //  1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 3.f, 4.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 9.f, 10.f
 229
 230     engine engine;
 231
 232     auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 3, 1, 2 } }); // Dictionary
 233     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes
 234     auto axis = cldnn::gather::gather_axis::along_f;
 235
 236     set_values(input1, {
 237             FLOAT16(1.f), FLOAT16(2.f), FLOAT16(3.f),
 238             FLOAT16(4.f), FLOAT16(5.f), FLOAT16(6.f),
 239
 240             FLOAT16(7.f), FLOAT16(8.f), FLOAT16(9.f),
 241             FLOAT16(10.f), FLOAT16(11.f), FLOAT16(12.f)
 242     });
 243
 244     set_values(input2, {
 245             0.f, 1.f, 2.f, 1.f
 246     });
 247
 248     topology topology;
 249     topology.add(input_layout("InputDictionary", input1.get_layout()));
 250     topology.add(input_layout("InputText", input2.get_layout()));
 251     topology.add(
 252             gather("gather", "InputDictionary", "InputText", axis, tensor(2, 2, 2, 2))
 253     );
 254
 255     network network(engine, topology);
 256
 257     network.set_input_data("InputDictionary", input1);
 258     network.set_input_data("InputText", input2);
 259
 260     auto outputs = network.execute();
 261
 262     auto output = outputs.at("gather").get_memory();
 263     auto output_ptr = output.pointer<uint16_t>();
 264
 265     std::vector<float> expected_results = {
 266             1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 3.f, 4.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 9.f, 10.f
 267     };
 268
 269     for (size_t i = 0; i < expected_results.size(); ++i) {
 270         EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
 271     }
 272 }
 273
 274 TEST(gather_gpu_fp32, d14_axisB) {
 275     //  Indexes  : 2x2x1x1
 276     //  Dictionary : 1x4x1x1
 277     //  Axis : 0
 278     //  Output : 1x4x2x1
 279     //  Input values in fp32
 280
 281     //  Indexes:
 282     //  0.f, 1.f, 1.f, 0.f
 283     //
 284     //  Dictionary:
 285     //  1.f, 2.f, 3.f, 4.f
 286     //
 287     //  Output:
 288     //  1.f, 2.f, 3.f, 4.f, 3.f, 4.f, 1.f, 2.f
 289
 290     engine engine;
 291
 292     auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Dictionary
 293     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 1, 1 } }); // Indexes
 294     auto axis = cldnn::gather::gather_axis::along_b;
 295
 296     set_values(input1, {
 297         1.0f, 2.0f,
 298         3.0f, 4.0f
 299     });
 300
 301     set_values(input2, {
 302         0.f, 1.f,
 303         1.f, 0.f
 304     });
 305
 306     topology topology;
 307     topology.add(input_layout("InputDictionary", input1.get_layout()));
 308     topology.add(input_layout("InputText", input2.get_layout()));
 309     topology.add(
 310         gather("gather", "InputDictionary", "InputText", axis, tensor(1, 4, 1, 2))
 311     );
 312
 313     network network(engine, topology);
 314
 315     network.set_input_data("InputDictionary", input1);
 316     network.set_input_data("InputText", input2);
 317
 318     auto outputs = network.execute();
 319
 320     auto output = outputs.at("gather").get_memory();
 321     auto output_ptr = output.pointer<float>();
 322
 323     std::vector<float> expected_results = {
 324         1.f, 2.f, 3.f, 4.f, 3.f, 4.f, 1.f, 2.f
 325     };
 326
 327     for (size_t i = 0; i < expected_results.size(); ++i) {
 328         EXPECT_EQ(expected_results[i], output_ptr[i]);
 329     }
 330 }
 331
 332 TEST(gather_gpu_fp32, d222_axisB) {
 333     //  Indexes  : 3x2x2x1
 334     //  Dictionary : 2x2x1x1
 335     //  Axis : 0
 336     //  Output : 2x2x2x2
 337     //  Input values in fp32
 338
 339     //  Indexes:
 340     //  0.f, 1.f, 2.f, 1.f
 341     //
 342     //  Dictionary:
 343     //  1.f, 2.f, 3.f, 4.f, 5.f, 6.f,
 344     //  7.f, 8.f, 9.f, 10.f, 11.f, 12.f
 345     //
 346     //  Output:
 347     //  1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 5.f, 6.f, 7.f, 8.f
 348
 349     engine engine;
 350
 351     auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 2, 1, 2 } }); // Dictionary
 352     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes
 353     auto axis = cldnn::gather::gather_axis::along_b;
 354
 355     set_values(input1, {
 356         1.f, 2.f, 3.f,
 357         4.f, 5.f, 6.f,
 358
 359         7.f, 8.f, 9.f,
 360         10.f, 11.f, 12.f
 361     });
 362
 363     set_values(input2, {
 364         0.f, 1.f, 2.f, 1.f
 365     });
 366
 367     topology topology;
 368     topology.add(input_layout("InputDictionary", input1.get_layout()));
 369     topology.add(input_layout("InputText", input2.get_layout()));
 370     topology.add(
 371         gather("gather", "InputDictionary", "InputText", axis, tensor(2, 2, 2, 2))
 372     );
 373
 374     network network(engine, topology);
 375
 376     network.set_input_data("InputDictionary", input1);
 377     network.set_input_data("InputText", input2);
 378
 379     auto outputs = network.execute();
 380
 381     auto output = outputs.at("gather").get_memory();
 382     auto output_ptr = output.pointer<float>();
 383
 384     std::vector<float> expected_results = {
 385         1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 5.f, 6.f, 7.f, 8.f
 386     };
 387
 388     for (size_t i = 0; i < expected_results.size(); ++i) {
 389         EXPECT_EQ(expected_results[i], output_ptr[i]);
 390     }
 391 }
 392
 393 TEST(gather_gpu_fp32, d22_axisY) {
 394     //  Indexes  : 2x2x3x1
 395     //  Dictionary : 2x2x1x1
 396     //  Axis : 2
 397     //  Output : 2x2x2x2
 398     //  Input values in fp32
 399
 400     //  Indexes:
 401     //  0.f, 1.f, 2.f, 1.f
 402     //
 403     //  Dictionary:
 404     //  1.f, 2.f, 3.f, 4.f, 5.f, 6.f,
 405     //  7.f, 8.f, 9.f, 10.f, 11.f, 12.f
 406     //
 407     //  Output:
 408     //  1.f, 2.f, 3.f, 2.f, 4.f, 5.f, 6.f, 5.f, 7.f, 8.f, 9.f, 8.f, 10.f, 11.f, 12.f, 11.f
 409
 410     engine engine;
 411
 412     auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 3 } }); // Dictionary
 413     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes
 414     auto axis = cldnn::gather::gather_axis::along_y;
 415
 416     set_values(input1, {
 417         1.f, 2.f, 3.f,
 418         4.f, 5.f, 6.f,
 419
 420         7.f, 8.f, 9.f,
 421         10.f, 11.f, 12.f
 422     });
 423
 424     set_values(input2, {
 425         0.f, 1.f, 2.f, 1.f
 426     });
 427
 428     topology topology;
 429     topology.add(input_layout("InputDictionary", input1.get_layout()));
 430     topology.add(input_layout("InputText", input2.get_layout()));
 431     topology.add(
 432         gather("gather", "InputDictionary", "InputText", axis, tensor(2, 2, 2, 2))
 433     );
 434
 435     network network(engine, topology);
 436
 437     network.set_input_data("InputDictionary", input1);
 438     network.set_input_data("InputText", input2);
 439
 440     auto outputs = network.execute();
 441
 442     auto output = outputs.at("gather").get_memory();
 443     auto output_ptr = output.pointer<float>();
 444
 445     std::vector<float> expected_results = {
 446         1.f, 2.f, 3.f, 2.f, 4.f, 5.f, 6.f, 5.f, 7.f, 8.f, 9.f, 8.f, 10.f, 11.f, 12.f, 11.f
 447     };
 448
 449     for (size_t i = 0; i < expected_results.size(); ++i) {
 450         EXPECT_EQ(expected_results[i], output_ptr[i]);
 451     }
 452 }
 453
 454 TEST(gather_gpu_fp32, d22_axisF) {
 455     //  Indexes  : 2x3x2x1
 456     //  Dictionary : 2x2x1x1
 457     //  Axis : 1
 458     //  Output : 2x2x2x2
 459     //  Input values in fp32
 460
 461     //  Indexes:
 462     //  0.f, 1.f, 2.f, 1.f
 463     //
 464     //  Dictionary:
 465     //  1.f, 2.f, 3.f, 4.f, 5.f, 6.f,
 466     //  7.f, 8.f, 9.f, 10.f, 11.f, 12.f
 467     //
 468     //  Output:
 469     //  1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 3.f, 4.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 9.f, 10.f
 470
 471     engine engine;
 472
 473     auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 3, 1, 2 } }); // Dictionary
 474     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes
 475     auto axis = cldnn::gather::gather_axis::along_f;
 476
 477     set_values(input1, {
 478             1.f, 2.f, 3.f,
 479             4.f, 5.f, 6.f,
 480
 481             7.f, 8.f, 9.f,
 482             10.f, 11.f, 12.f
 483     });
 484
 485     set_values(input2, {
 486             0.f, 1.f, 2.f, 1.f
 487     });
 488
 489     topology topology;
 490     topology.add(input_layout("InputDictionary", input1.get_layout()));
 491     topology.add(input_layout("InputText", input2.get_layout()));
 492     topology.add(
 493             gather("gather", "InputDictionary", "InputText", axis, tensor(2, 2, 2, 2))
 494     );
 495
 496     network network(engine, topology);
 497
 498     network.set_input_data("InputDictionary", input1);
 499     network.set_input_data("InputText", input2);
 500
 501     auto outputs = network.execute();
 502
 503     auto output = outputs.at("gather").get_memory();
 504     auto output_ptr = output.pointer<float>();
 505
 506     std::vector<float> expected_results = {
 507             1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 3.f, 4.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 9.f, 10.f
 508     };
 509
 510     for (size_t i = 0; i < expected_results.size(); ++i) {
 511         EXPECT_EQ(expected_results[i], output_ptr[i]);
 512     }
 513 }