Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / tests / test_cases / softmax_gpu_test.cpp
1 /*
2 // Copyright (c) 2016 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include <gtest/gtest.h>
18 #include "api/CPP/memory.hpp"
19 #include <api/CPP/input_layout.hpp>
20 #include "api/CPP/softmax.hpp"
21 #include <api/CPP/topology.hpp>
22 #include <api/CPP/network.hpp>
23 #include <api/CPP/engine.hpp>
24 #include "test_utils/test_utils.h"
25
26 using namespace cldnn;
27 using namespace std;
28 using namespace tests;
29
30
31 class softmax_gpu_xb_f32_test_fixture: public ::testing::Test {
32 public:
33     static const int32_t
34         output_x  = 10, output_b  = 2,  // size of whole output buffer
35         input_x   = 10, input_b   = 2,  // size of whole input buffer
36         in_size   = input_x*input_b,
37         out_size  = output_x*output_b;
38
39
40     float in_buffer[in_size];
41     float out_buffer[out_size];
42     float expected_buffer[out_size];
43
44     const cldnn::engine& engine;
45     cldnn::memory input;
46
47     //neural::primitive output = memory::allocate({ memory::format::xb_f32, {output_b, {{output_x}}, 1}});
48
49     softmax_gpu_xb_f32_test_fixture()
50         : engine(get_test_engine())
51         ,input(memory::allocate(engine, { data_types::f32, format::yxfb, { input_b, 1, input_x, 1}}))
52     {}
53
54     void compare_out_buffer_with_expected() {
55         for(size_t i = 0; i < out_size; ++i) {
56             // does output have expected values
57             EXPECT_TRUE(are_equal(out_buffer[i], expected_buffer[i]))
58                 << "At ["<< i <<  "] Expected : " << expected_buffer[i] << " actual : " << out_buffer[i];
59         }
60     }
61
62     void compare_out_buffer_with_expected_batch_wise() {
63         for(size_t b = 0; b < output_b; ++b) {
64             float batch_wise_sum = 0;
65             for(size_t x = 0; x < output_x; ++x) {
66                 auto idx = b+x*output_b;
67                 batch_wise_sum += out_buffer[idx];
68                 // does output have expected values
69                 EXPECT_TRUE(are_equal(out_buffer[idx], expected_buffer[idx]))
70                     << "At ["<< idx <<  "] Expected : " << expected_buffer[idx] << " actual : " << out_buffer[idx];
71             }
72             // does it sum to 1 batch wise
73             EXPECT_TRUE(are_equal(batch_wise_sum, 1.0f))
74                 << "Expected : " << 1.0f << " actual : " << batch_wise_sum;
75         }
76     }
77 };
78
79 TEST_F(softmax_gpu_xb_f32_test_fixture, input_same_values) {
80 // in_buffer filled with same value == 1.0f
81     for(uint32_t i = 0; i < out_size; ++i) {
82               in_buffer[i] = 1.0f;
83         expected_buffer[i] = 0.1f;
84     }
85     std::vector<float> in_b(std::begin(in_buffer), std::end(in_buffer));
86
87     set_values(input, in_b);
88
89     network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input")));
90     network.set_input_data("input", input);
91
92     auto outputs = network.execute();
93     EXPECT_EQ(outputs.size(), size_t(1));
94     EXPECT_EQ(outputs.begin()->first, "softmax");
95
96     auto output_prim = outputs.begin()->second.get_memory();
97
98     auto output_ptr = output_prim.pointer<float>();
99     for (uint32_t i = 0; i < out_size; i++)
100     {
101         out_buffer[i] = get_value<float>(output_ptr, i);
102     }
103     compare_out_buffer_with_expected();
104 }
105
106 TEST_F(softmax_gpu_xb_f32_test_fixture, input_same_values_batch_wise) {
107 // in_buffer filled with same value == 1..2 each batch accordingly (softmax can only xb_f32 )
108     for(size_t i = 0; i < output_x; ++i) {
109         for(size_t j = 0; j < output_b; ++j)
110             in_buffer[j+i*output_b] = (j+i*output_b) % 2 +1.0f;
111     }
112
113     std::vector<float> in_b(std::begin(in_buffer), std::end(in_buffer));
114     set_values(input, in_b);
115     // fill buffer with the expected 0.1f value
116     for(size_t i = 0; i < out_size; ++i)
117         expected_buffer[i] = 0.1f;
118
119     network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input")));
120     network.set_input_data("input", input);
121
122     auto outputs = network.execute();
123     EXPECT_EQ(outputs.size(), size_t(1));
124     EXPECT_EQ(outputs.begin()->first, "softmax");
125
126     auto output_prim = outputs.begin()->second.get_memory();
127
128     auto output_ptr = output_prim.pointer<float>();
129     for (uint32_t i = 0; i < out_size; i++)
130     {
131         out_buffer[i] = get_value<float>(output_ptr, i);
132     }
133     compare_out_buffer_with_expected_batch_wise();
134 }
135
136 TEST_F(softmax_gpu_xb_f32_test_fixture, values_batch_wise) {
137
138     float in_buf[in_size] = {
139        //b0  b1
140         2.0f, 2.0f, //x0
141         2.0f, 2.0f, //x1
142         2.0f, 2.0f, //x2
143         3.0f, 3.0f, //x3
144         5.0f, 5.0f, //x4
145         4.0f, 4.0f, //x5
146         3.0f, 3.0f, //x6
147         2.0f, 2.0f, //x7
148         2.0f, 2.0f, //x8
149         2.0f, 2.0f  //x9
150     };
151
152     float exp_buf[out_size] = {
153         0.02569957f,     0.02569957f,
154         0.02569957f,     0.02569957f,
155         0.02569957f,     0.02569957f,
156         0.069858674f,    0.069858674f,
157         0.516189665f,    0.516189665f,
158         0.189895565f,    0.189895565f,
159         0.069858674f,    0.069858674f,
160         0.02569957f,     0.02569957f,
161         0.02569957f,     0.02569957f,
162         0.02569957f,     0.02569957f
163
164     };
165
166     std::vector<float> in_b(std::begin(in_buf), std::end(in_buf));
167     set_values(input, in_b);
168     std::copy(exp_buf, exp_buf+in_size, expected_buffer);
169
170     // out_buffer filled with non-signaling NaN
171     for(size_t i = 0; i < out_size; ++i)
172         out_buffer[i] = NAN;
173
174     network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input")));
175     network.set_input_data("input", input);
176
177     auto outputs = network.execute();
178     EXPECT_EQ(outputs.size(), size_t(1));
179     EXPECT_EQ(outputs.begin()->first, "softmax");
180
181     auto output_prim = outputs.begin()->second.get_memory();
182
183     auto output_ptr = output_prim.pointer<float>();
184     for (uint32_t i = 0; i < out_size; i++)
185     {
186         out_buffer[i] = get_value<float>(output_ptr, i);
187     }
188     compare_out_buffer_with_expected_batch_wise();
189 }
190
191 TEST(softmax_gpu_bfyx_f32, normalize_fyx) {
192     //  Input  : 2x3x2x2
193     static const int32_t x_size = 2, y_size = 2, feature_num = 3,
194         batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
195     const auto& engine = get_test_engine();
196
197     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
198     topology topology;
199     topology.add(input_layout("input", input.get_layout()));
200     topology.add(softmax("softmax", "input"));
201
202     set_values(input, {  //bfyx    
203              //y0x0  y0x1   y1x0    y1x1
204         /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
205         /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
206         /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f,
207         /*b1f0*/3.f,  0.5f,  7.f,   12.f,
208         /*b1f1*/4.f,  0.5f,  8.f,   8.2f,
209         /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f
210     });
211
212     float expected_max_values[2] = {
213         0.481618381f, 0.953259517f
214     };
215
216     network network(engine, topology);
217
218     network.set_input_data("input", input);
219     auto outputs = network.execute();
220
221     EXPECT_EQ(outputs.size(), size_t(1));
222     EXPECT_EQ(outputs.begin()->first, "softmax");
223
224     auto output = outputs.at("softmax").get_memory();
225     auto output_ptr = output.pointer<float>();
226     float out_buffer[buf_size];
227     for (uint32_t i = 0; i < buf_size; i++)
228     {
229         out_buffer[i] = get_value<float>(output_ptr, i);
230     }
231
232     float sum = 0;
233     float expected_sum = 1.0f;
234     
235     float temp_max = 0;
236     int max_value_buffer_index = 0;
237     
238     for (uint32_t i = 0; i < batch_num; i++) //this for loops will sum results in a batch per feature, we expect that: sum = 1.0f
239     {
240         for (uint32_t j = 0; j < y_size; j++)
241         {
242             for (uint32_t k = 0; k < x_size; k++)
243             {
244                 for (uint32_t l = 0; l < feature_num; l++)
245                 {
246                     int index = i * feature_num * x_size * y_size + j * x_size + k + l * x_size * y_size;
247                     sum += out_buffer[index];
248                     if (out_buffer[index] >= temp_max)
249                     {
250                         temp_max = out_buffer[index];
251                     }
252                 }
253             }
254         }
255
256         EXPECT_EQ(true, are_equal(sum, expected_sum));
257         sum = 0.0f;
258         EXPECT_EQ(true, are_equal(temp_max, expected_max_values[max_value_buffer_index]));
259         temp_max = 0;
260         max_value_buffer_index++;
261     }
262 }
263
264 TEST(softmax_gpu_bfyx_f32, normalize_y) {
265     //  Input  : 2x3x2x2
266     static const int32_t x_size = 2, y_size = 2, feature_num = 3,
267         batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
268     const auto& engine = get_test_engine();
269
270     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
271     topology topology;
272     topology.add(input_layout("input", input.get_layout()));
273     topology.add(softmax("softmax", "input", softmax::normalize_y));
274
275     vector<float> input_vec = {
276               //y0x0  y0x1   y1x0    y1x1
277         /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
278         /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
279         /*b0f2*/0.2f, 0.2f,  -10.f, 5.2f,
280
281         /*b1f0*/3.f,  0.5f,  7.f,   12.f,
282         /*b1f1*/4.f,  0.5f,  8.f,   8.2f,
283         /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f
284     };
285     set_values(input, input_vec);
286
287     float expected_max_values[12] = {
288         0.689974481f,   //b=0, f=0, x=0
289         0.832018385f,   //b=0, f=0, x=1
290
291         0.999962831f,   //b=0, f=1, x=0
292         0.993307149f,   //b=0, f=1, x=1
293
294         0.999962831f,   //b=0, f=2, x=0
295         0.993307149f,   //b=0, f=2, x=1
296
297
298         0.98201379f,    //b=1, f=0, x=0
299         0.99998987f,    //b=1, f=0, x=1
300
301         0.98201379f,    //b=1, f=1, x=0
302         0.999547378f,   //b=1, f=1, x=1
303
304         0.999962831f,   //b=1, f=2, x=0
305         0.993307149f    //b=1, f=2, x=1
306     };
307
308     network network(engine, topology);
309
310     network.set_input_data("input", input);
311     auto outputs = network.execute();
312
313     EXPECT_EQ(outputs.size(), size_t(1));
314     EXPECT_EQ(outputs.begin()->first, "softmax");
315
316     auto output = outputs.at("softmax").get_memory();
317     auto output_ptr = output.pointer<float>();
318     float out_buffer[buf_size];
319     for (uint32_t i = 0; i < buf_size; i++)
320     {
321         out_buffer[i] = get_value<float>(output_ptr, i);
322     }
323
324     float temp_max = 0;
325     float expected_sum = 1.0f;
326     int max_value_buffer_index = 0;
327     for (uint32_t i = 0; i < batch_num; i++) //this for loops will sum results in a batch per feature, we expect that: sum = 1.0f
328     {
329         for (uint32_t l = 0; l < feature_num; l++)
330         {
331             for (uint32_t k = 0; k < x_size; k++)
332             {
333                 float sum = 0.0f;
334                 for (uint32_t j = 0; j < y_size; j++)
335                 {
336                     int index = i * feature_num * x_size * y_size +
337                         l * x_size * y_size +
338                         j * x_size +
339                         k;
340
341                     if (out_buffer[index] >= temp_max)
342                     {
343                         temp_max = out_buffer[index];
344                     }
345
346                     sum += out_buffer[index];
347                 }
348                 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[max_value_buffer_index]));
349                 temp_max = 0;
350                 max_value_buffer_index++;
351
352                 EXPECT_EQ(true, are_equal(sum, expected_sum));
353                 sum = 0.0f;
354             }
355         }
356     }
357 }
358
359 TEST(softmax_gpu_bfyx_f32, normalize_f) {
360     //  Input  : 2x3x2x2
361     static const int32_t x_size = 2, y_size = 2, feature_num = 3,
362         batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
363     const auto& engine = get_test_engine();
364
365     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
366     topology topology;
367     topology.add(input_layout("input", input.get_layout()));
368     topology.add(softmax("softmax", "input", softmax::normalize_f));
369
370     vector<float> input_vec = {
371         //y0x0  y0x1   y1x0    y1x1
372         /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
373         /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
374         /*b0f2*/0.2f, 0.2f,  -10.f, 5.2f,
375
376         /*b1f0*/3.f,  0.5f,  7.f,   12.f,
377         /*b1f1*/4.f,  0.5f,  8.f,   8.2f,
378         /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f
379     };
380     set_values(input, input_vec);
381
382     float expected_max_values[8] = {
383         0.344253346f, //b=0, y=0, x=0
384         0.364854551f, //b=0, y=0, x=1
385
386         0.999963085f, //b=0, y=1, x=0
387         0.493894592f, //b=0, y=1, x=1
388
389         0.719294981f, //b=1, y=0, x=0
390         0.364854551f, //b=1, y=0, x=1
391
392         0.73105857f, //b=1, y=1, x=0
393         0.977054322f //b=1, y=1, x=1
394     };
395
396     network network(engine, topology);
397
398     network.set_input_data("input", input);
399     auto outputs = network.execute();
400
401     EXPECT_EQ(outputs.size(), size_t(1));
402     EXPECT_EQ(outputs.begin()->first, "softmax");
403
404     auto output = outputs.at("softmax").get_memory();
405     auto output_ptr = output.pointer<float>();
406     float out_buffer[buf_size];
407     for (uint32_t i = 0; i < buf_size; i++)
408     {
409         out_buffer[i] = get_value<float>(output_ptr, i);
410     }
411
412     float temp_max = 0;
413     float expected_sum = 1.0f;
414     int max_value_buffer_index = 0;
415     for (uint32_t i = 0; i < batch_num; i++) //this for loops will sum results in a batch per feature, we expect that: sum = 1.0f
416     {
417         for (uint32_t j = 0; j < y_size; j++)
418         {
419             for (uint32_t k = 0; k < x_size; k++)
420             {
421                 float sum = 0.0f;
422                 for (uint32_t l = 0; l < feature_num; l++)
423                 {
424                     int index = i * feature_num * x_size * y_size +
425                         l * x_size * y_size +
426                         j * x_size +
427                         k;
428
429                     if (out_buffer[index] >= temp_max)
430                     {
431                         temp_max = out_buffer[index];
432                     }
433
434                     sum += out_buffer[index];
435                 }
436                 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[max_value_buffer_index]));
437                 temp_max = 0;
438                 max_value_buffer_index++;
439
440                 EXPECT_EQ(true, are_equal(sum, expected_sum));
441                 sum = 0.0f;
442             }
443         }
444     }
445 }
446
447 TEST(softmax_gpu_yxfb_f32, normalize_f) {
448
449     static const int32_t x_size = 1, y_size = 2, feature_num = 1,
450         batch_num = 12, buf_size = x_size*y_size * batch_num * feature_num;
451     const auto& engine = get_test_engine();
452
453     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ batch_num, feature_num, y_size , x_size } });
454     topology topology;
455     topology.add(input_layout("input", input.get_layout()));
456     topology.add(softmax("softmax", "input", softmax::normalize_fyx));
457
458     set_values(input, {  //yxfb
459                 //f0b0  f0b1  f0b2  f0b3  f0b4    f0b5    f0b6   f0b7   f0b8    f0b9   f0b10  f0b11 
460         /*y0x0*/ 0.1f, -0.1f, 0.9f, 1.5f, 0.15f, -0.01f, 0.19f,  0.45f, 0.41f, -0.12f, 0.39f, 0.65f,
461         /*y1x0*/ 0.2f, 0.2f, -10.f, 5.2f, 0.01f, 0.015f, 0.29f,  0.05f, 0.41f, -0.31f, 0.29f, 1.35f
462     });
463
464     float expected_max_values[batch_num * feature_num * x_size] = {
465         0.524979174f,
466         0.574442506f,
467         0.999981523f,
468         0.975872993f,
469         0.534942925f,
470         0.506249666f,
471         0.524979174f,
472         0.598687649f,
473         0.500000000f,
474         0.547357619f,
475         0.524979174f,
476         0.668187797f
477     };
478
479     network network(engine, topology);
480
481     network.set_input_data("input", input);
482     auto outputs = network.execute();
483
484     EXPECT_EQ(outputs.size(), size_t(1));
485     EXPECT_EQ(outputs.begin()->first, "softmax");
486
487     auto output = outputs.at("softmax").get_memory();
488     auto output_ptr = output.pointer<float>();
489     float out_buffer[buf_size];
490     for (uint32_t i = 0; i < buf_size; i++)
491     {
492         out_buffer[i] = get_value<float>(output_ptr, i);
493     }
494
495     float sum = 0;
496     float expected_sum = 1.0f;
497
498     float temp_max = 0;
499
500     for (uint32_t b = 0; b < batch_num; b++)
501     {
502         for (uint32_t f = 0; f < feature_num; f++)
503         {
504             for (uint32_t x = 0; x < x_size; x++)
505             {
506                 float sum = 0.0f;
507                 for (uint32_t y = 0; y < y_size; y++)
508                 {
509                     int index = b + y * batch_num + f * feature_num + x * x_size;
510                     if (out_buffer[index] >= temp_max)
511                     {
512                         temp_max = out_buffer[index];
513                     }
514                     sum += out_buffer[index];
515                 }
516                 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[b * feature_num * x_size + f * x_size + x]));
517                 temp_max = 0;
518                 EXPECT_EQ(true, are_equal(sum, expected_sum));
519                 sum = 0.0f;
520             }
521         }
522     }
523 }
524
525
526 //////////////////////////////////////////////////////////////////////////////
527 //                                                                          //
528 //                      Exhaustive Negative Matrix tests                    //
529 //                                                                          //
530 //////////////////////////////////////////////////////////////////////////////
531
532 //TODO:
533 //TEST(NegativeSoftmaxTest, DISABLED_TestAll) {
534 //}
535
536 //////////////////////////////////////////////////////////////////////////////
537 //                                                                          //
538 //                      Exhaustive Positive Matrix tests                    //
539 //                                                                          //
540 //////////////////////////////////////////////////////////////////////////////
541
542 using namespace cldnn;
543
544 class softmax_test : public tests::generic_test
545 {
546
547 public:
548     softmax_test() : tests::generic_test()
549     {
550     }
551
552     virtual void SetUp() override
553     {
554         max_ulps_diff_allowed = 6;
555     }
556
557     static void TearDownTestCase()
558     {
559         for (auto generic_params : all_generic_params)
560         {
561             delete generic_params;
562         }
563
564         for (auto layer_params : all_layer_params)
565         {
566             delete layer_params;
567         }
568     }
569
570     static std::vector<cldnn::primitive*> generate_specific_test_params()
571     {
572         all_layer_params.push_back(new softmax("softmax", "input0", softmax::normalize_f));
573
574         //The test checks only valid combinations.
575         //TODO: add more combinations.
576
577         return all_layer_params;
578     }
579
580     static std::vector<tests::test_params*> generate_generic_test_params()
581     {
582         return generic_test::generate_generic_test_params(all_generic_params);
583     }
584
585     virtual bool is_format_supported(cldnn::format format) override
586     {
587         return
588             format == cldnn_format_type::cldnn_format_yxfb ||
589             format == cldnn_format_type::cldnn_format_bfyx;
590     }
591
592     template<typename Type>
593     memory generate_reference_typed(const std::vector<memory> & inputs)
594     {
595         assert(inputs.size() == 1);
596         const memory & input = inputs[0];
597
598         //Output is bfyx
599         auto output = memory::allocate(engine, cldnn::layout(input.get_layout().data_type, input.get_layout().format, input.get_layout().size));
600
601 //        const auto params = static_cast<cldnn::softmax *>(layer_parmas);
602
603         const auto in0_mem = input.pointer<Type>();
604         auto out_mem = output.pointer<Type>();
605
606         const int in0_b = input.get_layout().size.sizes()[0];
607         const int in0_f = input.get_layout().size.sizes()[1];
608         const int in0_h = input.get_layout().size.sizes()[3];
609         const int in0_w = input.get_layout().size.sizes()[2];
610
611 //        const int out_b = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[0];
612 //        const int out_f = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[1];
613 //        const int out_h = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[2];
614 //        const int out_w = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[3];
615
616 //        assert(in0_b == out_b);
617 //        assert(in0_f == out_f);
618 //        assert(in0_h == out_h);
619 //        assert(in0_w == out_w);
620
621         std::vector<float> cached_exp_vals;
622         cached_exp_vals.resize(in0_f);
623
624         const auto input_desc = get_linear_memory_desc(input.get_layout());
625
626         for (int n = 0; n < in0_b; ++n)
627         for (int y = 0; y < in0_h; ++y)
628         for (int x = 0; x < in0_w; ++x)
629         {
630             float max_val = -std::numeric_limits<float>::infinity();
631
632             for (int c = 0; c < in0_f; ++c)
633             {
634                 const size_t in0_idx = get_linear_index(input.get_layout(), n, c, y, x, input_desc);
635
636                 max_val = std::max(max_val, static_cast<float>(in0_mem[in0_idx]));
637             }
638
639             float Z = 0;
640
641             for (int c = 0; c < in0_f; ++c)
642             {
643                 const size_t in0_idx = get_linear_index(input.get_layout(), n, c, y, x, input_desc);
644
645                 float tmp = static_cast<float>((Type)std::exp(static_cast<float>(in0_mem[in0_idx]) - max_val));
646                 Z += tmp;
647                 cached_exp_vals[c] = tmp;
648             }
649
650             for (int c = 0; c < in0_f; ++c)
651             {
652                 const size_t out_idx = get_linear_index(output.get_layout(), n, c, y, x, input_desc);
653                 out_mem[out_idx] = (Type)(cached_exp_vals[c] / Z);
654             }
655         }
656
657         return output;
658     }
659
660     virtual memory generate_reference(const std::vector<memory> & inputs) override
661     {
662         if (generic_params->data_type == data_types::f32)
663         {
664             return generate_reference_typed<float>(inputs);
665         }
666         else
667         {
668             return generate_reference_typed<FLOAT16>(inputs);
669         }
670     }
671
672     static std::string custom_param_name(const ::testing::TestParamInfo<std::tuple<test_params*, cldnn::primitive*>>& info)
673     {
674         std::stringstream res;
675
676         const auto & p = std::get<0>(info.param);
677
678         assert (p->data_type == data_types::f32 ||
679                 p->data_type == data_types::f16);
680
681         res << info.index
682             << "_" << (p->data_type == data_types::f32 ? "f32" : "f16");
683
684         for (unsigned i = 0; i < p->input_layouts.size(); ++i)
685         {
686             const auto chans = format::traits(p->fmt).order;
687
688             res << "_" << "Input" << i;
689             for (unsigned int j = 0; j < p->input_layouts[i].size.sizes(p->fmt).size(); ++j)
690             {
691                 res << chans[j] << p->input_layouts[i].size.sizes(p->fmt)[j];
692             }
693         }
694
695         return res.str();
696     }
697
698 private:
699
700     static std::vector<tests::test_params*> all_generic_params;
701     static std::vector<cldnn::primitive*> all_layer_params;
702
703 };
704
705 std::vector<cldnn::primitive*> softmax_test::all_layer_params = {};
706 std::vector<tests::test_params*> softmax_test::all_generic_params = {};
707
708 TEST_P(softmax_test, SOFTMAX)
709 {
710     run_single_test();
711 }
712
713 INSTANTIATE_TEST_CASE_P(DISABLED_SOFTMAX,
714     softmax_test,
715     ::testing::Combine(::testing::ValuesIn(softmax_test::generate_generic_test_params()), ::testing::ValuesIn(softmax_test::generate_specific_test_params())),
716     softmax_test::custom_param_name);
717