1 // Copyright (c) 2019 Intel Corporation
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18 #include <gtest/gtest.h>
20 #include <api/CPP/input_layout.hpp>
21 #include <api/CPP/memory.hpp>
22 #include <api/CPP/gather.hpp>
23 #include <api/CPP/topology.hpp>
24 #include <api/CPP/network.hpp>
27 #include <tests/test_utils/test_utils.h>
29 using namespace cldnn;
30 using namespace ::tests;
32 TEST(gather_gpu_fp16, d14_axisB) {
34 // Dictionary : 1x4x1x1
37 // Input values in fp16
46 // 1.f, 2.f, 3.f, 4.f, 3.f, 4.f, 1.f, 2.f
50 auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // Dictionary
51 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 1, 1 } }); // Indexes
52 auto axis = cldnn::gather::gather_axis::along_b;
55 FLOAT16(1.0f), FLOAT16(2.0f),
56 FLOAT16(3.0f), FLOAT16(4.0f)
65 topology.add(input_layout("InputDictionary", input1.get_layout()));
66 topology.add(input_layout("InputText", input2.get_layout()));
68 gather("gather", "InputDictionary", "InputText", axis, tensor(1, 4, 1, 2))
71 network network(engine, topology);
73 network.set_input_data("InputDictionary", input1);
74 network.set_input_data("InputText", input2);
76 auto outputs = network.execute();
78 auto output = outputs.at("gather").get_memory();
79 auto output_ptr = output.pointer<uint16_t>();
81 std::vector<float> expected_results = {
82 1.f, 2.f, 3.f, 4.f, 3.f, 4.f, 1.f, 2.f
85 for (size_t i = 0; i < expected_results.size(); ++i) {
86 EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
90 TEST(gather_gpu_fp16, d222_axisB) {
92 // Dictionary : 2x2x1x1
95 // Input values in fp16
101 // 1.f, 2.f, 3.f, 4.f, 5.f, 6.f,
102 // 7.f, 8.f, 9.f, 10.f, 11.f, 12.f
105 // 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 5.f, 6.f, 7.f, 8.f
109 auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 1, 2 } }); // Dictionary
110 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes
111 auto axis = cldnn::gather::gather_axis::along_b;
114 FLOAT16(1.f), FLOAT16(2.f), FLOAT16(3.f),
115 FLOAT16(4.f), FLOAT16(5.f), FLOAT16(6.f),
117 FLOAT16(7.f), FLOAT16(8.f), FLOAT16(9.f),
118 FLOAT16(10.f), FLOAT16(11.f), FLOAT16(12.f)
127 topology.add(input_layout("InputDictionary", input1.get_layout()));
128 topology.add(input_layout("InputText", input2.get_layout()));
130 gather("gather", "InputDictionary", "InputText", axis, tensor(2, 2, 2, 2))
133 network network(engine, topology);
135 network.set_input_data("InputDictionary", input1);
136 network.set_input_data("InputText", input2);
138 auto outputs = network.execute();
140 auto output = outputs.at("gather").get_memory();
141 auto output_ptr = output.pointer<uint16_t>();
143 std::vector<float> expected_results = {
144 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 5.f, 6.f, 7.f, 8.f
147 for (size_t i = 0; i < expected_results.size(); ++i) {
148 EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
152 TEST(gather_gpu_fp16, d22_axisY) {
154 // Dictionary : 2x2x1x1
157 // Input values in fp16
160 // 0.f, 1.f, 2.f, 1.f
163 // 1.f, 2.f, 3.f, 4.f, 5.f, 6.f,
164 // 7.f, 8.f, 9.f, 10.f, 11.f, 12.f
167 // 1.f, 2.f, 3.f, 2.f, 4.f, 5.f, 6.f, 5.f, 7.f, 8.f, 9.f, 8.f, 10.f, 11.f, 12.f, 11.f
171 auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 3 } }); // Dictionary
172 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes
173 auto axis = cldnn::gather::gather_axis::along_y;
176 FLOAT16(1.f), FLOAT16(2.f), FLOAT16(3.f),
177 FLOAT16(4.f), FLOAT16(5.f), FLOAT16(6.f),
179 FLOAT16(7.f), FLOAT16(8.f), FLOAT16(9.f),
180 FLOAT16(10.f), FLOAT16(11.f), FLOAT16(12.f)
188 topology.add(input_layout("InputDictionary", input1.get_layout()));
189 topology.add(input_layout("InputText", input2.get_layout()));
191 gather("gather", "InputDictionary", "InputText", axis, tensor(2, 2, 2, 2))
194 network network(engine, topology);
196 network.set_input_data("InputDictionary", input1);
197 network.set_input_data("InputText", input2);
199 auto outputs = network.execute();
201 auto output = outputs.at("gather").get_memory();
202 auto output_ptr = output.pointer<uint16_t>();
204 std::vector<float> expected_results = {
205 1.f, 2.f, 3.f, 2.f, 4.f, 5.f, 6.f, 5.f, 7.f, 8.f, 9.f, 8.f, 10.f, 11.f, 12.f, 11.f
208 for (size_t i = 0; i < expected_results.size(); ++i) {
209 EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
213 TEST(gather_gpu_fp16, d22_axisF) {
215 // Dictionary : 2x2x1x1
218 // Input values in fp16
221 // 0.f, 1.f, 2.f, 1.f
224 // 1.f, 2.f, 3.f, 4.f, 5.f, 6.f,
225 // 7.f, 8.f, 9.f, 10.f, 11.f, 12.f
228 // 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 3.f, 4.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 9.f, 10.f
232 auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 3, 1, 2 } }); // Dictionary
233 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes
234 auto axis = cldnn::gather::gather_axis::along_f;
237 FLOAT16(1.f), FLOAT16(2.f), FLOAT16(3.f),
238 FLOAT16(4.f), FLOAT16(5.f), FLOAT16(6.f),
240 FLOAT16(7.f), FLOAT16(8.f), FLOAT16(9.f),
241 FLOAT16(10.f), FLOAT16(11.f), FLOAT16(12.f)
249 topology.add(input_layout("InputDictionary", input1.get_layout()));
250 topology.add(input_layout("InputText", input2.get_layout()));
252 gather("gather", "InputDictionary", "InputText", axis, tensor(2, 2, 2, 2))
255 network network(engine, topology);
257 network.set_input_data("InputDictionary", input1);
258 network.set_input_data("InputText", input2);
260 auto outputs = network.execute();
262 auto output = outputs.at("gather").get_memory();
263 auto output_ptr = output.pointer<uint16_t>();
265 std::vector<float> expected_results = {
266 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 3.f, 4.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 9.f, 10.f
269 for (size_t i = 0; i < expected_results.size(); ++i) {
270 EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
274 TEST(gather_gpu_fp32, d14_axisB) {
276 // Dictionary : 1x4x1x1
279 // Input values in fp32
282 // 0.f, 1.f, 1.f, 0.f
285 // 1.f, 2.f, 3.f, 4.f
288 // 1.f, 2.f, 3.f, 4.f, 3.f, 4.f, 1.f, 2.f
292 auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Dictionary
293 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 1, 1 } }); // Indexes
294 auto axis = cldnn::gather::gather_axis::along_b;
307 topology.add(input_layout("InputDictionary", input1.get_layout()));
308 topology.add(input_layout("InputText", input2.get_layout()));
310 gather("gather", "InputDictionary", "InputText", axis, tensor(1, 4, 1, 2))
313 network network(engine, topology);
315 network.set_input_data("InputDictionary", input1);
316 network.set_input_data("InputText", input2);
318 auto outputs = network.execute();
320 auto output = outputs.at("gather").get_memory();
321 auto output_ptr = output.pointer<float>();
323 std::vector<float> expected_results = {
324 1.f, 2.f, 3.f, 4.f, 3.f, 4.f, 1.f, 2.f
327 for (size_t i = 0; i < expected_results.size(); ++i) {
328 EXPECT_EQ(expected_results[i], output_ptr[i]);
332 TEST(gather_gpu_fp32, d222_axisB) {
334 // Dictionary : 2x2x1x1
337 // Input values in fp32
340 // 0.f, 1.f, 2.f, 1.f
343 // 1.f, 2.f, 3.f, 4.f, 5.f, 6.f,
344 // 7.f, 8.f, 9.f, 10.f, 11.f, 12.f
347 // 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 5.f, 6.f, 7.f, 8.f
351 auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 2, 1, 2 } }); // Dictionary
352 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes
353 auto axis = cldnn::gather::gather_axis::along_b;
368 topology.add(input_layout("InputDictionary", input1.get_layout()));
369 topology.add(input_layout("InputText", input2.get_layout()));
371 gather("gather", "InputDictionary", "InputText", axis, tensor(2, 2, 2, 2))
374 network network(engine, topology);
376 network.set_input_data("InputDictionary", input1);
377 network.set_input_data("InputText", input2);
379 auto outputs = network.execute();
381 auto output = outputs.at("gather").get_memory();
382 auto output_ptr = output.pointer<float>();
384 std::vector<float> expected_results = {
385 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 5.f, 6.f, 7.f, 8.f
388 for (size_t i = 0; i < expected_results.size(); ++i) {
389 EXPECT_EQ(expected_results[i], output_ptr[i]);
393 TEST(gather_gpu_fp32, d22_axisY) {
395 // Dictionary : 2x2x1x1
398 // Input values in fp32
401 // 0.f, 1.f, 2.f, 1.f
404 // 1.f, 2.f, 3.f, 4.f, 5.f, 6.f,
405 // 7.f, 8.f, 9.f, 10.f, 11.f, 12.f
408 // 1.f, 2.f, 3.f, 2.f, 4.f, 5.f, 6.f, 5.f, 7.f, 8.f, 9.f, 8.f, 10.f, 11.f, 12.f, 11.f
412 auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 3 } }); // Dictionary
413 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes
414 auto axis = cldnn::gather::gather_axis::along_y;
429 topology.add(input_layout("InputDictionary", input1.get_layout()));
430 topology.add(input_layout("InputText", input2.get_layout()));
432 gather("gather", "InputDictionary", "InputText", axis, tensor(2, 2, 2, 2))
435 network network(engine, topology);
437 network.set_input_data("InputDictionary", input1);
438 network.set_input_data("InputText", input2);
440 auto outputs = network.execute();
442 auto output = outputs.at("gather").get_memory();
443 auto output_ptr = output.pointer<float>();
445 std::vector<float> expected_results = {
446 1.f, 2.f, 3.f, 2.f, 4.f, 5.f, 6.f, 5.f, 7.f, 8.f, 9.f, 8.f, 10.f, 11.f, 12.f, 11.f
449 for (size_t i = 0; i < expected_results.size(); ++i) {
450 EXPECT_EQ(expected_results[i], output_ptr[i]);
454 TEST(gather_gpu_fp32, d22_axisF) {
456 // Dictionary : 2x2x1x1
459 // Input values in fp32
462 // 0.f, 1.f, 2.f, 1.f
465 // 1.f, 2.f, 3.f, 4.f, 5.f, 6.f,
466 // 7.f, 8.f, 9.f, 10.f, 11.f, 12.f
469 // 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 3.f, 4.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 9.f, 10.f
473 auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 3, 1, 2 } }); // Dictionary
474 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes
475 auto axis = cldnn::gather::gather_axis::along_f;
490 topology.add(input_layout("InputDictionary", input1.get_layout()));
491 topology.add(input_layout("InputText", input2.get_layout()));
493 gather("gather", "InputDictionary", "InputText", axis, tensor(2, 2, 2, 2))
496 network network(engine, topology);
498 network.set_input_data("InputDictionary", input1);
499 network.set_input_data("InputText", input2);
501 auto outputs = network.execute();
503 auto output = outputs.at("gather").get_memory();
504 auto output_ptr = output.pointer<float>();
506 std::vector<float> expected_results = {
507 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 3.f, 4.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 9.f, 10.f
510 for (size_t i = 0; i < expected_results.size(); ++i) {
511 EXPECT_EQ(expected_results[i], output_ptr[i]);