Publishing R3
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / tests / test_cases / arg_max_gpu_test.cpp
1 /*
2 // Copyright (c) 2018 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include <gtest/gtest.h>
18 #include "api/CPP/memory.hpp"
19 #include <api/CPP/input_layout.hpp>
20 #include "api/CPP/arg_max_min.hpp"
21 #include <api/CPP/topology.hpp>
22 #include <api/CPP/network.hpp>
23 #include <api/CPP/engine.hpp>
24 #include "test_utils/test_utils.h"
25
26 using namespace cldnn;
27 using namespace std;
28 using namespace tests;
29
30
31
32
33 TEST(arg_max_gpu, base) {
34         //  Input  : 2x3x2x2
35         static const int32_t x_size = 2, y_size = 2, feature_num = 3, batch_num = 2;
36         engine engine;
37
38         auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
39         topology topology;
40         topology.add(input_layout("input", input.get_layout()));
41         topology.add(arg_max_min("arg_max", "input", arg_max_min::max));
42
43         vector<float> input_vec = {
44                         //y0x0 y0x1 y1x0 y1x1
45                 /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
46                 /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
47                 /*b0f2*/0.2f, 0.2f,  -10.f, 5.2f,
48
49                 /*b1f0*/3.f,  0.5f,  7.f,   10.f,
50                 /*b1f1*/4.f,  0.5f,  8.f,   8.2f,
51                 /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f
52         };
53         set_values(input, input_vec);
54
55         network network(engine, topology);
56
57         network.set_input_data("input", input);
58         auto outputs = network.execute();
59
60         EXPECT_EQ(outputs.size(), size_t(1));
61         EXPECT_EQ(outputs.begin()->first, "arg_max");
62
63         auto output = outputs.at("arg_max").get_memory();
64         auto output_ptr = output.pointer<float>();
65         float out_buffer[batch_num];
66         for (uint32_t i = 0; i < batch_num; i++)
67         {
68                 out_buffer[i] = get_value<float>(output_ptr, i);
69         }       
70         int size = x_size * y_size * feature_num;
71         int index;
72         float value;
73         for (int i = 0; i < batch_num; i++) {
74                 EXPECT_GE(out_buffer[i], 0);
75                 EXPECT_LT(out_buffer[i], size);
76                 index = (int)out_buffer[i];
77                 value = input_vec[i*size + (int)index];
78                 for (int j = 0; j < size; j++)
79                 {
80                         EXPECT_LE(input_vec[i*size + j], value);
81                 }
82         }
83 }
84
85 TEST(arg_max_gpu_batch_one, base) {
86     //  Input  : 2x3x2x2
87     static const int32_t x_size = 2, y_size = 2, feature_num = 5, batch_num = 1, top_k = 8;
88     engine engine;
89
90     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
91     topology topology;
92     topology.add(input_layout("input", input.get_layout()));
93     topology.add(arg_max_min("arg_max", "input", arg_max_min::max, top_k));
94
95     vector<float> input_vec = {
96         //y0x0 y0x1 y1x0 y1x1
97         /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
98         /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
99         /*b0f2*/0.2f, 0.2f,  -10.f, 5.2f,
100         /*b0f3*/0.2f, 0.2f,  -10.f, 4.2f,
101         /*b0f3*/0.1f, 0.3f,  -11.f, 15.0f
102     };
103     set_values(input, input_vec);
104
105     network network(engine, topology);
106
107     network.set_input_data("input", input);
108     auto outputs = network.execute();
109
110     EXPECT_EQ(outputs.size(), size_t(1));
111     EXPECT_EQ(outputs.begin()->first, "arg_max");
112
113     auto output = outputs.at("arg_max").get_memory();
114     auto output_ptr = output.pointer<float>();
115     float out_buffer[batch_num * top_k];
116     for (uint32_t i = 0; i < batch_num * top_k; i++)
117     {
118         out_buffer[i] = get_value<float>(output_ptr, i);
119     }
120      int size = x_size * y_size * feature_num;
121      int index;
122      float value;
123      for (int i = 0; i < batch_num; i++) {
124          int count = 0;
125          int amount = 0;
126          int same_values = 1;
127          int j;
128          for (j = 0; j < top_k; j++) {
129              EXPECT_GE((int)out_buffer[i*top_k + j], 0);
130              EXPECT_LT((int)out_buffer[i*top_k + j], size);
131              if (top_k - 1 == j) {
132                  if (input_vec[i*size + (int)out_buffer[i*top_k + j]] != input_vec[i*size + (int)out_buffer[i*top_k + j - 1]]) {
133                      amount += j;
134                  }
135                  else
136                      amount += same_values * (j - same_values + 1);
137              }
138              else if (input_vec[i*size + (int)out_buffer[i*top_k + j]] != input_vec[i*size + (int)out_buffer[i*top_k + j + 1]]) {
139                  if (same_values != j + 1) {
140                      amount += same_values * (j - same_values + 1);
141                      same_values = 1;
142                  }
143              }
144              else
145                  same_values++;
146          }
147          EXPECT_GE(out_buffer[i*top_k + top_k - 1], 0);
148          EXPECT_LT(out_buffer[i*top_k + top_k - 1], size);
149          for (int j = 0; j < top_k; j++)
150          {
151              index = (int)out_buffer[i*top_k + j];
152              value = input_vec[i*size + index];
153              for (int k = 0; k < size; k++)
154              {
155                  if (input_vec[i*size + k] > value)
156                      count++;
157              }
158          }
159          EXPECT_EQ(count, amount);
160      }
161 }
162
163
164 TEST(arg_max_gpu_top_k, base) {
165         //  Input  : 2x3x2x2
166         static const int32_t x_size = 2, y_size = 2, feature_num = 5, batch_num = 2;
167         engine engine;
168         const int top_k = 8;
169         auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
170         topology topology;
171         topology.add(input_layout("input", input.get_layout()));
172         topology.add(arg_max_min("arg_max", "input", arg_max_min::max, top_k));
173
174         vector<float> input_vec = {
175                 //y0x0 y0x1 y1x0 y1x1
176                 /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
177                 /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
178                 /*b0f2*/0.2f, 0.2f,  -10.f, 5.2f,
179                 /*b0f3*/0.2f, 0.2f,  -10.f, 4.2f,
180                 /*b0f3*/0.1f, 0.3f,  -11.f, 15.0f,
181
182                 /*b1f0*/3.f,  0.5f,  7.f,   10.f,
183                 /*b1f1*/4.f,  0.5f,  8.f,   8.2f,
184                 /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f,
185                 /*b1f3*/4.f,  0.5f,  8.f,   8.2f,
186                 /*b0f3*/0.1f, 0.3f,  -11.f, 15.0f,
187         };
188         set_values(input, input_vec);
189
190         network network(engine, topology);
191
192         network.set_input_data("input", input);
193         auto outputs = network.execute();
194
195         EXPECT_EQ(outputs.size(), size_t(1));
196         EXPECT_EQ(outputs.begin()->first, "arg_max");
197
198         auto output = outputs.at("arg_max").get_memory();
199         auto output_ptr = output.pointer<float>();
200         float out_buffer[batch_num * top_k];
201         for (uint32_t i = 0; i < batch_num * top_k; i++)
202         {
203                 out_buffer[i] = get_value<float>(output_ptr, i);
204         }
205         int size = x_size * y_size * feature_num;
206         int index;
207         float value;
208         for (int i = 0; i < batch_num; i++) {
209                 int count = 0;
210                 int amount = 0;
211                 int same_values = 1;
212                 int j;
213                 for (j = 0; j < top_k; j++) {
214                         EXPECT_GE((int)out_buffer[i*top_k + j], 0);
215                         EXPECT_LT((int)out_buffer[i*top_k + j], size);
216                         if (top_k - 1 == j) {
217                                 if (input_vec[i*size + (int)(int)out_buffer[i*top_k + j]] != input_vec[i*size + (int)(int)out_buffer[i*top_k + j - 1]]) {
218                                         amount += j;
219                                 }
220                                 else
221                                         amount += same_values * (j - same_values + 1);
222                         }
223                         else if (input_vec[i*size + (int)(int)out_buffer[i*top_k + j]] != input_vec[i*size + (int)(int)out_buffer[i*top_k + j + 1]]) {
224                                 if (same_values != j+1) {
225                                         amount += same_values * (j - same_values + 1);
226                                         same_values = 1;
227                                 }
228                         }
229                         else
230                                 same_values++;
231                 }
232                 EXPECT_GE(out_buffer[i*top_k + top_k - 1], 0);
233                 EXPECT_LT(out_buffer[i*top_k + top_k - 1], size);
234                 for (int j = 0; j < top_k; j++)
235                 {
236                         index = (int)out_buffer[i*top_k + j];
237                         value = input_vec[i*size + index];
238                         for (int k = 0; k < size; k++)
239                         {
240                                 if (input_vec[i*size + k] > value)
241                                         count++;
242                         }
243                 }
244                 EXPECT_EQ(count, amount);
245         }
246 }
247
248 TEST(arg_max_gpu_min, base) {
249         //  Input  : 2x3x2x2
250         static const int32_t x_size = 2, y_size = 2, feature_num = 4,
251                 batch_num = 2;
252         engine engine;
253
254         auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
255         topology topology;
256         topology.add(input_layout("input", input.get_layout()));
257         topology.add(arg_max_min("arg_max", "input", arg_max_min::min));
258
259         vector<float> input_vec = {
260                 //y0x0 y0x1 y1x0 y1x1
261                 /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
262                 /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
263                 /*b0f2*/0.2f, 0.2f,  -10.f, 5.2f,
264                 /*b0f2*/0.2f, 0.2f,  -10.f, 5.2f,
265
266                 /*b1f0*/3.f,  0.5f,  7.f,   10.f,
267                 /*b1f1*/4.f,  0.5f,  8.f,   8.2f,
268                 /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f,
269                 /*b0f2*/0.2f, 0.2f,  -10.f, 5.2f
270         };
271         set_values(input, input_vec);
272
273         network network(engine, topology);
274
275         network.set_input_data("input", input);
276         auto outputs = network.execute();
277
278         EXPECT_EQ(outputs.size(), size_t(1));
279         EXPECT_EQ(outputs.begin()->first, "arg_max");
280
281         auto output = outputs.at("arg_max").get_memory();
282         auto output_ptr = output.pointer<float>();
283         float out_buffer[batch_num];
284         for (uint32_t i = 0; i < batch_num; i++)
285         {
286                 out_buffer[i] = get_value<float>(output_ptr, i);
287         }
288         int size = x_size * y_size * feature_num;
289         int index;
290         float value;
291         for (int i = 0; i < batch_num; i++) {
292                 EXPECT_GE(out_buffer[i], 0);
293                 EXPECT_LT(out_buffer[i], size);
294                 index = (int)out_buffer[i];
295                 value = input_vec[i*size + index];
296                 for (int j = 0; j < size; j++)
297                 {
298                         EXPECT_GE(input_vec[i*size + j], value);
299                 }
300         }
301 }
302
303 TEST(arg_max_gpu_min_top_k, base) {
304         //  Input  : 2x3x2x2
305         static const int32_t x_size = 2, y_size = 2, feature_num = 4, batch_num = 2;
306         engine engine;
307         const int top_k = 3;
308         auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
309         topology topology;
310         topology.add(input_layout("input", input.get_layout()));
311         topology.add(arg_max_min("arg_max", "input", arg_max_min::min, top_k));
312
313         vector<float> input_vec = {
314                         //f0b0 f0b1 f1b0 f1b1
315                 /*x0y0*/0.1f, -0.1f, 0.9f,  1.5f,
316                 /*x0y1*/0.2f, 0.2f,  -10.f, 5.2f,
317                 /*x0y2*/0.2f, 0.2f,  -10.f, 5.2f,
318                 /*x0f3*/0.2f, 0.2f,  -10.f, 4.2f,
319
320                 /*x1y0*/3.f,  0.5f,  7.f,   10.f,
321                 /*x1y1*/4.f,  0.5f,  8.f,   8.2f,
322                 /*x1y2*/0.2f, 0.2f,  -10.f, 5.2f,
323                 /*x1y3*/4.f,  0.5f,  8.f,   8.2f
324         };
325         set_values(input, input_vec);
326
327         network network(engine, topology);
328
329         network.set_input_data("input", input);
330         auto outputs = network.execute();
331
332         EXPECT_EQ(outputs.size(), size_t(1));
333         EXPECT_EQ(outputs.begin()->first, "arg_max");
334
335         auto output = outputs.at("arg_max").get_memory();
336         auto output_ptr = output.pointer<float>();
337         float out_buffer[batch_num * top_k];
338         for (uint32_t i = 0; i < batch_num * top_k; i++)
339         {
340                 out_buffer[i] = get_value<float>(output_ptr, i);
341         }
342         int size = x_size * y_size * feature_num;
343         int index;
344         float value;
345         for (int i = 0; i < batch_num; i++) {
346                 int count = 0;
347                 int amount = 0;
348                 int same_values = 1;
349                 int j;
350                 for (j = 0; j < top_k; j++) {
351                         EXPECT_GE((int)out_buffer[i*top_k + j], 0);
352                         EXPECT_LT((int)out_buffer[i*top_k + j], size);
353                         if (top_k - 1 == j) {
354                                 if (input_vec[i*size + (int)out_buffer[i*top_k + j]] != input_vec[i*size + (int)out_buffer[i*top_k + j - 1]]) {
355                                         amount += j;
356                                 }
357                                 else
358                                         amount += same_values * (j - same_values + 1);
359                         }
360                         else if (input_vec[i*size + (int)out_buffer[i*top_k + j]] != input_vec[i*size + (int)out_buffer[i*top_k + j + 1]]) {
361                                 if (same_values != j + 1) {
362                                         amount += same_values * (j - same_values + 1);
363                                         same_values = 1;
364                                 }
365                         }
366                         else
367                                 same_values++;
368                 }
369                 EXPECT_GE(out_buffer[i*top_k + top_k - 1], 0);
370                 EXPECT_LT(out_buffer[i*top_k + top_k - 1], size);
371                 for (int j = 0; j < top_k; j++)
372                 {
373                         index = (int)out_buffer[i*top_k + j];
374                         value = input_vec[i*size + index];
375                         for (int k = 0; k < size; k++)
376                         {
377                                 if (input_vec[i*size + k] < value)
378                                         count++;
379                         }
380                 }
381                 EXPECT_EQ(count, amount);
382         }
383 }
384
385 TEST(arg_max_gpu_min_axis_batch, base) {
386     //  Input  : 2x3x2x2
387     static const int32_t x_size = 2, y_size = 2, feature_num = 4, batch_num = 2;
388     engine engine;
389     const int top_k = 2;
390     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
391     topology topology;
392     topology.add(input_layout("input", input.get_layout()));
393     topology.add(arg_max_min("arg_max", "input", arg_max_min::min, top_k, arg_max_min::batch));
394
395     vector<float> input_vec = {
396         //y0x0 y0x1 y1x0 y1x1
397         /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
398         /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
399         /*b0f2*/0.2f, 0.2f,  -10.f, 5.2f,
400         /*b0f3*/0.2f, 0.2f,  -10.f, 4.2f,
401
402         /*b1f0*/3.f,  0.5f,  7.f,   10.f,
403         /*b1f1*/4.f,  0.5f,  8.f,   8.2f,
404         /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f,
405         /*b1f3*/4.f,  0.5f,  8.f,   8.2f
406     };
407     set_values(input, input_vec);
408
409     network network(engine, topology);
410
411     network.set_input_data("input", input);
412     auto outputs = network.execute();
413
414     EXPECT_EQ(outputs.size(), size_t(1));
415     EXPECT_EQ(outputs.begin()->first, "arg_max");
416     const int out_size = y_size * feature_num * x_size * top_k;
417     auto output = outputs.at("arg_max").get_memory();
418     auto output_ptr = output.pointer<float>();
419     float out_buffer[out_size];
420     for (uint32_t i = 0; i < out_size; i++)
421     {
422         out_buffer[i] = get_value<float>(output_ptr, i);
423     }
424     for (int i = 0; i < out_size; i++)
425     {
426         EXPECT_EQ(out_buffer[i], i % 2 == 0 ? 0 : 1);
427     }
428 }