2 // Copyright (c) 2018 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 #include <gtest/gtest.h>
18 #include "api/CPP/memory.hpp"
19 #include <api/CPP/input_layout.hpp>
20 #include "api/CPP/arg_max_min.hpp"
21 #include <api/CPP/topology.hpp>
22 #include <api/CPP/network.hpp>
23 #include <api/CPP/engine.hpp>
24 #include "test_utils/test_utils.h"
26 using namespace cldnn;
28 using namespace tests;
33 TEST(arg_max_gpu, base) {
35 static const int32_t x_size = 2, y_size = 2, feature_num = 3, batch_num = 2;
38 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
40 topology.add(input_layout("input", input.get_layout()));
41 topology.add(arg_max_min("arg_max", "input", arg_max_min::max));
43 vector<float> input_vec = {
45 /*b0f0*/0.1f, -0.1f, 0.9f, 1.5f,
46 /*b0f1*/0.2f, 0.2f, -10.f, 5.2f,
47 /*b0f2*/0.2f, 0.2f, -10.f, 5.2f,
49 /*b1f0*/3.f, 0.5f, 7.f, 10.f,
50 /*b1f1*/4.f, 0.5f, 8.f, 8.2f,
51 /*b1f2*/0.2f, 0.2f, -10.f, 5.2f
53 set_values(input, input_vec);
55 network network(engine, topology);
57 network.set_input_data("input", input);
58 auto outputs = network.execute();
60 EXPECT_EQ(outputs.size(), size_t(1));
61 EXPECT_EQ(outputs.begin()->first, "arg_max");
63 auto output = outputs.at("arg_max").get_memory();
64 auto output_ptr = output.pointer<float>();
65 float out_buffer[batch_num];
66 for (uint32_t i = 0; i < batch_num; i++)
68 out_buffer[i] = get_value<float>(output_ptr, i);
70 int size = x_size * y_size * feature_num;
73 for (int i = 0; i < batch_num; i++) {
74 EXPECT_GE(out_buffer[i], 0);
75 EXPECT_LT(out_buffer[i], size);
76 index = (int)out_buffer[i];
77 value = input_vec[i*size + (int)index];
78 for (int j = 0; j < size; j++)
80 EXPECT_LE(input_vec[i*size + j], value);
85 TEST(arg_max_gpu_batch_one, base) {
87 static const int32_t x_size = 2, y_size = 2, feature_num = 5, batch_num = 1, top_k = 8;
90 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
92 topology.add(input_layout("input", input.get_layout()));
93 topology.add(arg_max_min("arg_max", "input", arg_max_min::max, top_k));
95 vector<float> input_vec = {
97 /*b0f0*/0.1f, -0.1f, 0.9f, 1.5f,
98 /*b0f1*/0.2f, 0.2f, -10.f, 5.2f,
99 /*b0f2*/0.2f, 0.2f, -10.f, 5.2f,
100 /*b0f3*/0.2f, 0.2f, -10.f, 4.2f,
101 /*b0f3*/0.1f, 0.3f, -11.f, 15.0f
103 set_values(input, input_vec);
105 network network(engine, topology);
107 network.set_input_data("input", input);
108 auto outputs = network.execute();
110 EXPECT_EQ(outputs.size(), size_t(1));
111 EXPECT_EQ(outputs.begin()->first, "arg_max");
113 auto output = outputs.at("arg_max").get_memory();
114 auto output_ptr = output.pointer<float>();
115 float out_buffer[batch_num * top_k];
116 for (uint32_t i = 0; i < batch_num * top_k; i++)
118 out_buffer[i] = get_value<float>(output_ptr, i);
120 int size = x_size * y_size * feature_num;
123 for (int i = 0; i < batch_num; i++) {
128 for (j = 0; j < top_k; j++) {
129 EXPECT_GE((int)out_buffer[i*top_k + j], 0);
130 EXPECT_LT((int)out_buffer[i*top_k + j], size);
131 if (top_k - 1 == j) {
132 if (input_vec[i*size + (int)out_buffer[i*top_k + j]] != input_vec[i*size + (int)out_buffer[i*top_k + j - 1]]) {
136 amount += same_values * (j - same_values + 1);
138 else if (input_vec[i*size + (int)out_buffer[i*top_k + j]] != input_vec[i*size + (int)out_buffer[i*top_k + j + 1]]) {
139 if (same_values != j + 1) {
140 amount += same_values * (j - same_values + 1);
147 EXPECT_GE(out_buffer[i*top_k + top_k - 1], 0);
148 EXPECT_LT(out_buffer[i*top_k + top_k - 1], size);
149 for (int j = 0; j < top_k; j++)
151 index = (int)out_buffer[i*top_k + j];
152 value = input_vec[i*size + index];
153 for (int k = 0; k < size; k++)
155 if (input_vec[i*size + k] > value)
159 EXPECT_EQ(count, amount);
164 TEST(arg_max_gpu_top_k, base) {
166 static const int32_t x_size = 2, y_size = 2, feature_num = 5, batch_num = 2;
169 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
171 topology.add(input_layout("input", input.get_layout()));
172 topology.add(arg_max_min("arg_max", "input", arg_max_min::max, top_k));
174 vector<float> input_vec = {
175 //y0x0 y0x1 y1x0 y1x1
176 /*b0f0*/0.1f, -0.1f, 0.9f, 1.5f,
177 /*b0f1*/0.2f, 0.2f, -10.f, 5.2f,
178 /*b0f2*/0.2f, 0.2f, -10.f, 5.2f,
179 /*b0f3*/0.2f, 0.2f, -10.f, 4.2f,
180 /*b0f3*/0.1f, 0.3f, -11.f, 15.0f,
182 /*b1f0*/3.f, 0.5f, 7.f, 10.f,
183 /*b1f1*/4.f, 0.5f, 8.f, 8.2f,
184 /*b1f2*/0.2f, 0.2f, -10.f, 5.2f,
185 /*b1f3*/4.f, 0.5f, 8.f, 8.2f,
186 /*b0f3*/0.1f, 0.3f, -11.f, 15.0f,
188 set_values(input, input_vec);
190 network network(engine, topology);
192 network.set_input_data("input", input);
193 auto outputs = network.execute();
195 EXPECT_EQ(outputs.size(), size_t(1));
196 EXPECT_EQ(outputs.begin()->first, "arg_max");
198 auto output = outputs.at("arg_max").get_memory();
199 auto output_ptr = output.pointer<float>();
200 float out_buffer[batch_num * top_k];
201 for (uint32_t i = 0; i < batch_num * top_k; i++)
203 out_buffer[i] = get_value<float>(output_ptr, i);
205 int size = x_size * y_size * feature_num;
208 for (int i = 0; i < batch_num; i++) {
213 for (j = 0; j < top_k; j++) {
214 EXPECT_GE((int)out_buffer[i*top_k + j], 0);
215 EXPECT_LT((int)out_buffer[i*top_k + j], size);
216 if (top_k - 1 == j) {
217 if (input_vec[i*size + (int)(int)out_buffer[i*top_k + j]] != input_vec[i*size + (int)(int)out_buffer[i*top_k + j - 1]]) {
221 amount += same_values * (j - same_values + 1);
223 else if (input_vec[i*size + (int)(int)out_buffer[i*top_k + j]] != input_vec[i*size + (int)(int)out_buffer[i*top_k + j + 1]]) {
224 if (same_values != j+1) {
225 amount += same_values * (j - same_values + 1);
232 EXPECT_GE(out_buffer[i*top_k + top_k - 1], 0);
233 EXPECT_LT(out_buffer[i*top_k + top_k - 1], size);
234 for (int j = 0; j < top_k; j++)
236 index = (int)out_buffer[i*top_k + j];
237 value = input_vec[i*size + index];
238 for (int k = 0; k < size; k++)
240 if (input_vec[i*size + k] > value)
244 EXPECT_EQ(count, amount);
248 TEST(arg_max_gpu_min, base) {
250 static const int32_t x_size = 2, y_size = 2, feature_num = 4,
254 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
256 topology.add(input_layout("input", input.get_layout()));
257 topology.add(arg_max_min("arg_max", "input", arg_max_min::min));
259 vector<float> input_vec = {
260 //y0x0 y0x1 y1x0 y1x1
261 /*b0f0*/0.1f, -0.1f, 0.9f, 1.5f,
262 /*b0f1*/0.2f, 0.2f, -10.f, 5.2f,
263 /*b0f2*/0.2f, 0.2f, -10.f, 5.2f,
264 /*b0f2*/0.2f, 0.2f, -10.f, 5.2f,
266 /*b1f0*/3.f, 0.5f, 7.f, 10.f,
267 /*b1f1*/4.f, 0.5f, 8.f, 8.2f,
268 /*b1f2*/0.2f, 0.2f, -10.f, 5.2f,
269 /*b0f2*/0.2f, 0.2f, -10.f, 5.2f
271 set_values(input, input_vec);
273 network network(engine, topology);
275 network.set_input_data("input", input);
276 auto outputs = network.execute();
278 EXPECT_EQ(outputs.size(), size_t(1));
279 EXPECT_EQ(outputs.begin()->first, "arg_max");
281 auto output = outputs.at("arg_max").get_memory();
282 auto output_ptr = output.pointer<float>();
283 float out_buffer[batch_num];
284 for (uint32_t i = 0; i < batch_num; i++)
286 out_buffer[i] = get_value<float>(output_ptr, i);
288 int size = x_size * y_size * feature_num;
291 for (int i = 0; i < batch_num; i++) {
292 EXPECT_GE(out_buffer[i], 0);
293 EXPECT_LT(out_buffer[i], size);
294 index = (int)out_buffer[i];
295 value = input_vec[i*size + index];
296 for (int j = 0; j < size; j++)
298 EXPECT_GE(input_vec[i*size + j], value);
303 TEST(arg_max_gpu_min_top_k, base) {
305 static const int32_t x_size = 2, y_size = 2, feature_num = 4, batch_num = 2;
308 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
310 topology.add(input_layout("input", input.get_layout()));
311 topology.add(arg_max_min("arg_max", "input", arg_max_min::min, top_k));
313 vector<float> input_vec = {
314 //f0b0 f0b1 f1b0 f1b1
315 /*x0y0*/0.1f, -0.1f, 0.9f, 1.5f,
316 /*x0y1*/0.2f, 0.2f, -10.f, 5.2f,
317 /*x0y2*/0.2f, 0.2f, -10.f, 5.2f,
318 /*x0f3*/0.2f, 0.2f, -10.f, 4.2f,
320 /*x1y0*/3.f, 0.5f, 7.f, 10.f,
321 /*x1y1*/4.f, 0.5f, 8.f, 8.2f,
322 /*x1y2*/0.2f, 0.2f, -10.f, 5.2f,
323 /*x1y3*/4.f, 0.5f, 8.f, 8.2f
325 set_values(input, input_vec);
327 network network(engine, topology);
329 network.set_input_data("input", input);
330 auto outputs = network.execute();
332 EXPECT_EQ(outputs.size(), size_t(1));
333 EXPECT_EQ(outputs.begin()->first, "arg_max");
335 auto output = outputs.at("arg_max").get_memory();
336 auto output_ptr = output.pointer<float>();
337 float out_buffer[batch_num * top_k];
338 for (uint32_t i = 0; i < batch_num * top_k; i++)
340 out_buffer[i] = get_value<float>(output_ptr, i);
342 int size = x_size * y_size * feature_num;
345 for (int i = 0; i < batch_num; i++) {
350 for (j = 0; j < top_k; j++) {
351 EXPECT_GE((int)out_buffer[i*top_k + j], 0);
352 EXPECT_LT((int)out_buffer[i*top_k + j], size);
353 if (top_k - 1 == j) {
354 if (input_vec[i*size + (int)out_buffer[i*top_k + j]] != input_vec[i*size + (int)out_buffer[i*top_k + j - 1]]) {
358 amount += same_values * (j - same_values + 1);
360 else if (input_vec[i*size + (int)out_buffer[i*top_k + j]] != input_vec[i*size + (int)out_buffer[i*top_k + j + 1]]) {
361 if (same_values != j + 1) {
362 amount += same_values * (j - same_values + 1);
369 EXPECT_GE(out_buffer[i*top_k + top_k - 1], 0);
370 EXPECT_LT(out_buffer[i*top_k + top_k - 1], size);
371 for (int j = 0; j < top_k; j++)
373 index = (int)out_buffer[i*top_k + j];
374 value = input_vec[i*size + index];
375 for (int k = 0; k < size; k++)
377 if (input_vec[i*size + k] < value)
381 EXPECT_EQ(count, amount);
385 TEST(arg_max_gpu_min_axis_batch, base) {
387 static const int32_t x_size = 2, y_size = 2, feature_num = 4, batch_num = 2;
390 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
392 topology.add(input_layout("input", input.get_layout()));
393 topology.add(arg_max_min("arg_max", "input", arg_max_min::min, top_k, arg_max_min::batch));
395 vector<float> input_vec = {
396 //y0x0 y0x1 y1x0 y1x1
397 /*b0f0*/0.1f, -0.1f, 0.9f, 1.5f,
398 /*b0f1*/0.2f, 0.2f, -10.f, 5.2f,
399 /*b0f2*/0.2f, 0.2f, -10.f, 5.2f,
400 /*b0f3*/0.2f, 0.2f, -10.f, 4.2f,
402 /*b1f0*/3.f, 0.5f, 7.f, 10.f,
403 /*b1f1*/4.f, 0.5f, 8.f, 8.2f,
404 /*b1f2*/0.2f, 0.2f, -10.f, 5.2f,
405 /*b1f3*/4.f, 0.5f, 8.f, 8.2f
407 set_values(input, input_vec);
409 network network(engine, topology);
411 network.set_input_data("input", input);
412 auto outputs = network.execute();
414 EXPECT_EQ(outputs.size(), size_t(1));
415 EXPECT_EQ(outputs.begin()->first, "arg_max");
416 const int out_size = y_size * feature_num * x_size * top_k;
417 auto output = outputs.at("arg_max").get_memory();
418 auto output_ptr = output.pointer<float>();
419 float out_buffer[out_size];
420 for (uint32_t i = 0; i < out_size; i++)
422 out_buffer[i] = get_value<float>(output_ptr, i);
424 for (int i = 0; i < out_size; i++)
426 EXPECT_EQ(out_buffer[i], i % 2 == 0 ? 0 : 1);