Publishing R3
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / tests / test_cases / softmax_gpu_test.cpp
1 /*
2 // Copyright (c) 2016 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include <gtest/gtest.h>
18 #include "api/CPP/memory.hpp"
19 #include <api/CPP/input_layout.hpp>
20 #include "api/CPP/softmax.hpp"
21 #include <api/CPP/topology.hpp>
22 #include <api/CPP/network.hpp>
23 #include <api/CPP/engine.hpp>
24 #include "test_utils/test_utils.h"
25
26 using namespace cldnn;
27 using namespace std;
28 using namespace tests;
29
30
31 class softmax_gpu_xb_f32_test_fixture: public ::testing::Test {
32 public:
33     static const int32_t
34         output_x  = 10, output_b  = 2,  // size of whole output buffer
35         input_x   = 10, input_b   = 2,  // size of whole input buffer
36         in_size   = input_x*input_b,
37         out_size  = output_x*output_b;
38
39
40     float in_buffer[in_size];
41     float out_buffer[out_size];
42     float expected_buffer[out_size];
43
44     cldnn::engine engine;
45     cldnn::memory input;
46     //neural::primitive output = memory::allocate({ memory::format::xb_f32, {output_b, {{output_x}}, 1}});
47
48     softmax_gpu_xb_f32_test_fixture()
49         :engine()
50         ,input(memory::allocate(engine, { data_types::f32, format::yxfb, { input_b, 1, input_x, 1}}))
51     {}
52
53     void compare_out_buffer_with_expected() {
54         for(size_t i = 0; i < out_size; ++i) {
55             // does output have expected values
56             EXPECT_TRUE(are_equal(out_buffer[i], expected_buffer[i]))
57                 << "At ["<< i <<  "] Expected : " << expected_buffer[i] << " actual : " << out_buffer[i];
58         }
59     }
60
61     void compare_out_buffer_with_expected_batch_wise() {
62         for(size_t b = 0; b < output_b; ++b) {
63             float batch_wise_sum = 0;
64             for(size_t x = 0; x < output_x; ++x) {
65                 auto idx = b+x*output_b;
66                 batch_wise_sum += out_buffer[idx];
67                 // does output have expected values
68                 EXPECT_TRUE(are_equal(out_buffer[idx], expected_buffer[idx]))
69                     << "At ["<< idx <<  "] Expected : " << expected_buffer[idx] << " actual : " << out_buffer[idx];
70             }
71             // does it sum to 1 batch wise
72             EXPECT_TRUE(are_equal(batch_wise_sum, 1.0f))
73                 << "Expected : " << 1.0f << " actual : " << batch_wise_sum;
74         }
75     }
76 };
77
78 TEST_F(softmax_gpu_xb_f32_test_fixture, input_same_values) {
79 // in_buffer filled with same value == 1.0f
80     for(uint32_t i = 0; i < out_size; ++i) {
81               in_buffer[i] = 1.0f;
82         expected_buffer[i] = 0.1f;
83     }
84     std::vector<float> in_b(std::begin(in_buffer), std::end(in_buffer));
85
86     set_values(input, in_b);
87
88     network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input")));
89     network.set_input_data("input", input);
90
91     auto outputs = network.execute();
92     EXPECT_EQ(outputs.size(), size_t(1));
93     EXPECT_EQ(outputs.begin()->first, "softmax");
94
95     auto output_prim = outputs.begin()->second.get_memory();
96
97     auto output_ptr = output_prim.pointer<float>();
98     for (uint32_t i = 0; i < out_size; i++)
99     {
100         out_buffer[i] = get_value<float>(output_ptr, i);
101     }
102     compare_out_buffer_with_expected();
103 }
104
105 TEST_F(softmax_gpu_xb_f32_test_fixture, input_same_values_batch_wise) {
106 // in_buffer filled with same value == 1..2 each batch accordingly (softmax can only xb_f32 )
107     for(size_t i = 0; i < output_x; ++i) {
108         for(size_t j = 0; j < output_b; ++j)
109             in_buffer[j+i*output_b] = (j+i*output_b) % 2 +1.0f;
110     }
111
112     std::vector<float> in_b(std::begin(in_buffer), std::end(in_buffer));
113     set_values(input, in_b);
114     // fill buffer with the expected 0.1f value
115     for(size_t i = 0; i < out_size; ++i)
116         expected_buffer[i] = 0.1f;
117
118     network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input")));
119     network.set_input_data("input", input);
120
121     auto outputs = network.execute();
122     EXPECT_EQ(outputs.size(), size_t(1));
123     EXPECT_EQ(outputs.begin()->first, "softmax");
124
125     auto output_prim = outputs.begin()->second.get_memory();
126
127     auto output_ptr = output_prim.pointer<float>();
128     for (uint32_t i = 0; i < out_size; i++)
129     {
130         out_buffer[i] = get_value<float>(output_ptr, i);
131     }
132     compare_out_buffer_with_expected_batch_wise();
133 }
134
135 TEST_F(softmax_gpu_xb_f32_test_fixture, values_batch_wise) {
136
137     float in_buf[in_size] = {
138        //b0  b1
139         2.0f, 2.0f, //x0
140         2.0f, 2.0f, //x1
141         2.0f, 2.0f, //x2
142         3.0f, 3.0f, //x3
143         5.0f, 5.0f, //x4
144         4.0f, 4.0f, //x5
145         3.0f, 3.0f, //x6
146         2.0f, 2.0f, //x7
147         2.0f, 2.0f, //x8
148         2.0f, 2.0f  //x9
149     };
150
151     float exp_buf[out_size] = {
152         0.02569957f,     0.02569957f,
153         0.02569957f,     0.02569957f,
154         0.02569957f,     0.02569957f,
155         0.069858674f,    0.069858674f,
156         0.516189665f,    0.516189665f,
157         0.189895565f,    0.189895565f,
158         0.069858674f,    0.069858674f,
159         0.02569957f,     0.02569957f,
160         0.02569957f,     0.02569957f,
161         0.02569957f,     0.02569957f
162
163     };
164
165     std::vector<float> in_b(std::begin(in_buf), std::end(in_buf));
166     set_values(input, in_b);
167     std::copy(exp_buf, exp_buf+in_size, expected_buffer);
168
169     // out_buffer filled with non-signaling NaN
170     for(size_t i = 0; i < out_size; ++i)
171         out_buffer[i] = NAN;
172
173     network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input")));
174     network.set_input_data("input", input);
175
176     auto outputs = network.execute();
177     EXPECT_EQ(outputs.size(), size_t(1));
178     EXPECT_EQ(outputs.begin()->first, "softmax");
179
180     auto output_prim = outputs.begin()->second.get_memory();
181
182     auto output_ptr = output_prim.pointer<float>();
183     for (uint32_t i = 0; i < out_size; i++)
184     {
185         out_buffer[i] = get_value<float>(output_ptr, i);
186     }
187     compare_out_buffer_with_expected_batch_wise();
188 }
189
190 TEST(softmax_gpu_bfyx_f32, normalize_fyx) {
191     //  Input  : 2x3x2x2
192     static const int32_t x_size = 2, y_size = 2, feature_num = 3,
193         batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
194     engine engine;
195
196     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
197     topology topology;
198     topology.add(input_layout("input", input.get_layout()));
199     topology.add(softmax("softmax", "input"));
200
201     set_values(input, {  //bfyx    
202              //y0x0  y0x1   y1x0    y1x1
203         /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
204         /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
205         /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f,
206         /*b1f0*/3.f,  0.5f,  7.f,   12.f,
207         /*b1f1*/4.f,  0.5f,  8.f,   8.2f,
208         /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f
209     });
210
211     float expected_max_values[2] = {
212         0.481618381f, 0.953259517f
213     };
214
215     network network(engine, topology);
216
217     network.set_input_data("input", input);
218     auto outputs = network.execute();
219
220     EXPECT_EQ(outputs.size(), size_t(1));
221     EXPECT_EQ(outputs.begin()->first, "softmax");
222
223     auto output = outputs.at("softmax").get_memory();
224     auto output_ptr = output.pointer<float>();
225     float out_buffer[buf_size];
226     for (uint32_t i = 0; i < buf_size; i++)
227     {
228         out_buffer[i] = get_value<float>(output_ptr, i);
229     }
230
231     float sum = 0;
232     float expected_sum = 1.0f;
233     
234     float temp_max = 0;
235     int max_value_buffer_index = 0;
236     
237     for (uint32_t i = 0; i < batch_num; i++) //this for loops will sum results in a batch per feature, we expect that: sum = 1.0f
238     {
239         for (uint32_t j = 0; j < y_size; j++)
240         {
241             for (uint32_t k = 0; k < x_size; k++)
242             {
243                 for (uint32_t l = 0; l < feature_num; l++)
244                 {
245                     int index = i * feature_num * x_size * y_size + j * x_size + k + l * x_size * y_size;
246                     sum += out_buffer[index];
247                     if (out_buffer[index] >= temp_max)
248                     {
249                         temp_max = out_buffer[index];
250                     }
251                 }
252             }
253         }
254
255         EXPECT_EQ(true, are_equal(sum, expected_sum));
256         sum = 0.0f;
257         EXPECT_EQ(true, are_equal(temp_max, expected_max_values[max_value_buffer_index]));
258         temp_max = 0;
259         max_value_buffer_index++;
260     }
261 }
262
263 TEST(softmax_gpu_bfyx_f32, normalize_y) {
264     //  Input  : 2x3x2x2
265     static const int32_t x_size = 2, y_size = 2, feature_num = 3,
266         batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
267     engine engine;
268
269     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
270     topology topology;
271     topology.add(input_layout("input", input.get_layout()));
272     topology.add(softmax("softmax", "input", softmax::normalize_y));
273
274     vector<float> input_vec = {
275               //y0x0  y0x1   y1x0    y1x1
276         /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
277         /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
278         /*b0f2*/0.2f, 0.2f,  -10.f, 5.2f,
279
280         /*b1f0*/3.f,  0.5f,  7.f,   12.f,
281         /*b1f1*/4.f,  0.5f,  8.f,   8.2f,
282         /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f
283     };
284     set_values(input, input_vec);
285
286     float expected_max_values[12] = {
287         0.689974481f,   //b=0, f=0, x=0
288         0.832018385f,   //b=0, f=0, x=1
289
290         0.999962831f,   //b=0, f=1, x=0
291         0.993307149f,   //b=0, f=1, x=1
292
293         0.999962831f,   //b=0, f=2, x=0
294         0.993307149f,   //b=0, f=2, x=1
295
296
297         0.98201379f,    //b=1, f=0, x=0
298         0.99998987f,    //b=1, f=0, x=1
299
300         0.98201379f,    //b=1, f=1, x=0
301         0.999547378f,   //b=1, f=1, x=1
302
303         0.999962831f,   //b=1, f=2, x=0
304         0.993307149f    //b=1, f=2, x=1
305     };
306
307     network network(engine, topology);
308
309     network.set_input_data("input", input);
310     auto outputs = network.execute();
311
312     EXPECT_EQ(outputs.size(), size_t(1));
313     EXPECT_EQ(outputs.begin()->first, "softmax");
314
315     auto output = outputs.at("softmax").get_memory();
316     auto output_ptr = output.pointer<float>();
317     float out_buffer[buf_size];
318     for (uint32_t i = 0; i < buf_size; i++)
319     {
320         out_buffer[i] = get_value<float>(output_ptr, i);
321     }
322
323     float temp_max = 0;
324     float expected_sum = 1.0f;
325     int max_value_buffer_index = 0;
326     for (uint32_t i = 0; i < batch_num; i++) //this for loops will sum results in a batch per feature, we expect that: sum = 1.0f
327     {
328         for (uint32_t l = 0; l < feature_num; l++)
329         {
330             for (uint32_t k = 0; k < x_size; k++)
331             {
332                 float sum = 0.0f;
333                 for (uint32_t j = 0; j < y_size; j++)
334                 {
335                     int index = i * feature_num * x_size * y_size +
336                         l * x_size * y_size +
337                         j * x_size +
338                         k;
339
340                     if (out_buffer[index] >= temp_max)
341                     {
342                         temp_max = out_buffer[index];
343                     }
344
345                     sum += out_buffer[index];
346                 }
347                 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[max_value_buffer_index]));
348                 temp_max = 0;
349                 max_value_buffer_index++;
350
351                 EXPECT_EQ(true, are_equal(sum, expected_sum));
352                 sum = 0.0f;
353             }
354         }
355     }
356 }
357
358 TEST(softmax_gpu_bfyx_f32, normalize_f) {
359     //  Input  : 2x3x2x2
360     static const int32_t x_size = 2, y_size = 2, feature_num = 3,
361         batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
362     engine engine;
363
364     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
365     topology topology;
366     topology.add(input_layout("input", input.get_layout()));
367     topology.add(softmax("softmax", "input", softmax::normalize_f));
368
369     vector<float> input_vec = {
370         //y0x0  y0x1   y1x0    y1x1
371         /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
372         /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
373         /*b0f2*/0.2f, 0.2f,  -10.f, 5.2f,
374
375         /*b1f0*/3.f,  0.5f,  7.f,   12.f,
376         /*b1f1*/4.f,  0.5f,  8.f,   8.2f,
377         /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f
378     };
379     set_values(input, input_vec);
380
381     float expected_max_values[8] = {
382         0.344253346f, //b=0, y=0, x=0
383         0.364854551f, //b=0, y=0, x=1
384
385         0.999963085f, //b=0, y=1, x=0
386         0.493894592f, //b=0, y=1, x=1
387
388         0.719294981f, //b=1, y=0, x=0
389         0.364854551f, //b=1, y=0, x=1
390
391         0.73105857f, //b=1, y=1, x=0
392         0.977054322f //b=1, y=1, x=1
393     };
394
395     network network(engine, topology);
396
397     network.set_input_data("input", input);
398     auto outputs = network.execute();
399
400     EXPECT_EQ(outputs.size(), size_t(1));
401     EXPECT_EQ(outputs.begin()->first, "softmax");
402
403     auto output = outputs.at("softmax").get_memory();
404     auto output_ptr = output.pointer<float>();
405     float out_buffer[buf_size];
406     for (uint32_t i = 0; i < buf_size; i++)
407     {
408         out_buffer[i] = get_value<float>(output_ptr, i);
409     }
410
411     float temp_max = 0;
412     float expected_sum = 1.0f;
413     int max_value_buffer_index = 0;
414     for (uint32_t i = 0; i < batch_num; i++) //this for loops will sum results in a batch per feature, we expect that: sum = 1.0f
415     {
416         for (uint32_t j = 0; j < y_size; j++)
417         {
418             for (uint32_t k = 0; k < x_size; k++)
419             {
420                 float sum = 0.0f;
421                 for (uint32_t l = 0; l < feature_num; l++)
422                 {
423                     int index = i * feature_num * x_size * y_size +
424                         l * x_size * y_size +
425                         j * x_size +
426                         k;
427
428                     if (out_buffer[index] >= temp_max)
429                     {
430                         temp_max = out_buffer[index];
431                     }
432
433                     sum += out_buffer[index];
434                 }
435                 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[max_value_buffer_index]));
436                 temp_max = 0;
437                 max_value_buffer_index++;
438
439                 EXPECT_EQ(true, are_equal(sum, expected_sum));
440                 sum = 0.0f;
441             }
442         }
443     }
444 }
445
446 TEST(softmax_gpu_yxfb_f32, normalize_f) {
447
448     static const int32_t x_size = 1, y_size = 2, feature_num = 1,
449         batch_num = 12, buf_size = x_size*y_size * batch_num * feature_num;
450     engine engine;
451
452     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ batch_num, feature_num, y_size , x_size } });
453     topology topology;
454     topology.add(input_layout("input", input.get_layout()));
455     topology.add(softmax("softmax", "input", softmax::normalize_fyx));
456
457     set_values(input, {  //yxfb
458                 //f0b0  f0b1  f0b2  f0b3  f0b4    f0b5    f0b6   f0b7   f0b8    f0b9   f0b10  f0b11 
459         /*y0x0*/ 0.1f, -0.1f, 0.9f, 1.5f, 0.15f, -0.01f, 0.19f,  0.45f, 0.41f, -0.12f, 0.39f, 0.65f,
460         /*y1x0*/ 0.2f, 0.2f, -10.f, 5.2f, 0.01f, 0.015f, 0.29f,  0.05f, 0.41f, -0.31f, 0.29f, 1.35f
461     });
462
463     float expected_max_values[batch_num * feature_num * x_size] = {
464         0.524979174f,
465         0.574442506f,
466         0.999981523f,
467         0.975872993f,
468         0.534942925f,
469         0.506249666f,
470         0.524979174f,
471         0.598687649f,
472         0.500000000f,
473         0.547357619f,
474         0.524979174f,
475         0.668187797f
476     };
477
478     network network(engine, topology);
479
480     network.set_input_data("input", input);
481     auto outputs = network.execute();
482
483     EXPECT_EQ(outputs.size(), size_t(1));
484     EXPECT_EQ(outputs.begin()->first, "softmax");
485
486     auto output = outputs.at("softmax").get_memory();
487     auto output_ptr = output.pointer<float>();
488     float out_buffer[buf_size];
489     for (uint32_t i = 0; i < buf_size; i++)
490     {
491         out_buffer[i] = get_value<float>(output_ptr, i);
492     }
493
494     float sum = 0;
495     float expected_sum = 1.0f;
496
497     float temp_max = 0;
498
499     for (uint32_t b = 0; b < batch_num; b++)
500     {
501         for (uint32_t f = 0; f < feature_num; f++)
502         {
503             for (uint32_t x = 0; x < x_size; x++)
504             {
505                 float sum = 0.0f;
506                 for (uint32_t y = 0; y < y_size; y++)
507                 {
508                     int index = b + y * batch_num + f * feature_num + x * x_size;
509                     if (out_buffer[index] >= temp_max)
510                     {
511                         temp_max = out_buffer[index];
512                     }
513                     sum += out_buffer[index];
514                 }
515                 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[b * feature_num * x_size + f * x_size + x]));
516                 temp_max = 0;
517                 EXPECT_EQ(true, are_equal(sum, expected_sum));
518                 sum = 0.0f;
519             }
520         }
521     }
522 }
523
524
525 //////////////////////////////////////////////////////////////////////////////
526 //                                                                          //
527 //                      Exhaustive Negative Matrix tests                    //
528 //                                                                          //
529 //////////////////////////////////////////////////////////////////////////////
530
531 //TODO:
532 //TEST(NegativeSoftmaxTest, DISABLED_TestAll) {
533 //}
534
535 //////////////////////////////////////////////////////////////////////////////
536 //                                                                          //
537 //                      Exhaustive Positive Matrix tests                    //
538 //                                                                          //
539 //////////////////////////////////////////////////////////////////////////////
540
541 using namespace cldnn;
542
543 class softmax_test : public tests::generic_test
544 {
545
546 public:
547     softmax_test() : tests::generic_test()
548     {
549     }
550
551     virtual void SetUp() override
552     {
553         max_ulps_diff_allowed = 6;
554     }
555
556     static void TearDownTestCase()
557     {
558         for (auto generic_params : all_generic_params)
559         {
560             delete generic_params;
561         }
562
563         for (auto layer_params : all_layer_params)
564         {
565             delete layer_params;
566         }
567     }
568
569     static std::vector<cldnn::primitive*> generate_specific_test_params()
570     {
571         all_layer_params.push_back(new softmax("softmax", "input0", softmax::normalize_f));
572
573         //The test checks only valid combinations.
574         //TODO: add more combinations.
575
576         return all_layer_params;
577     }
578
579     static std::vector<tests::test_params*> generate_generic_test_params()
580     {
581         return generic_test::generate_generic_test_params(all_generic_params);
582     }
583
584     virtual bool is_format_supported(cldnn::format format) override
585     {
586         return
587             format == cldnn_format_type::cldnn_format_yxfb ||
588             format == cldnn_format_type::cldnn_format_bfyx;
589     }
590
591     template<typename Type>
592     memory generate_reference_typed(const std::vector<memory> & inputs)
593     {
594         assert(inputs.size() == 1);
595         const memory & input = inputs[0];
596
597         //Output is bfyx
598         auto output = memory::allocate(engine, cldnn::layout(input.get_layout().data_type, input.get_layout().format, input.get_layout().size));
599
600 //        const auto params = static_cast<cldnn::softmax *>(layer_parmas);
601
602         const auto in0_mem = input.pointer<Type>();
603         auto out_mem = output.pointer<Type>();
604
605         const int in0_b = input.get_layout().size.sizes()[0];
606         const int in0_f = input.get_layout().size.sizes()[1];
607         const int in0_h = input.get_layout().size.sizes()[3];
608         const int in0_w = input.get_layout().size.sizes()[2];
609
610 //        const int out_b = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[0];
611 //        const int out_f = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[1];
612 //        const int out_h = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[2];
613 //        const int out_w = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[3];
614
615 //        assert(in0_b == out_b);
616 //        assert(in0_f == out_f);
617 //        assert(in0_h == out_h);
618 //        assert(in0_w == out_w);
619
620         std::vector<float> cached_exp_vals;
621         cached_exp_vals.resize(in0_f);
622
623         const auto input_desc = get_linear_memory_desc(input.get_layout());
624
625         for (int n = 0; n < in0_b; ++n)
626         for (int y = 0; y < in0_h; ++y)
627         for (int x = 0; x < in0_w; ++x)
628         {
629             float max_val = -std::numeric_limits<float>::infinity();
630
631             for (int c = 0; c < in0_f; ++c)
632             {
633                 const size_t in0_idx = get_linear_index(input.get_layout(), n, c, y, x, input_desc);
634
635                 max_val = std::max(max_val, static_cast<float>(in0_mem[in0_idx]));
636             }
637
638             float Z = 0;
639
640             for (int c = 0; c < in0_f; ++c)
641             {
642                 const size_t in0_idx = get_linear_index(input.get_layout(), n, c, y, x, input_desc);
643
644                 float tmp = static_cast<float>((Type)std::exp(static_cast<float>(in0_mem[in0_idx]) - max_val));
645                 Z += tmp;
646                 cached_exp_vals[c] = tmp;
647             }
648
649             for (int c = 0; c < in0_f; ++c)
650             {
651                 const size_t out_idx = get_linear_index(output.get_layout(), n, c, y, x, input_desc);
652                 out_mem[out_idx] = (Type)(cached_exp_vals[c] / Z);
653             }
654         }
655
656         return output;
657     }
658
659     virtual memory generate_reference(const std::vector<memory> & inputs) override
660     {
661         if (generic_params->data_type == data_types::f32)
662         {
663             return generate_reference_typed<float>(inputs);
664         }
665         else
666         {
667             return generate_reference_typed<FLOAT16>(inputs);
668         }
669     }
670
671     static std::string custom_param_name(const ::testing::TestParamInfo<std::tuple<test_params*, cldnn::primitive*>>& info)
672     {
673         std::stringstream res;
674
675         const auto & p = std::get<0>(info.param);
676
677         assert (p->data_type == data_types::f32 ||
678                 p->data_type == data_types::f16);
679
680         res << info.index
681             << "_" << (p->data_type == data_types::f32 ? "f32" : "f16");
682
683         for (unsigned i = 0; i < p->input_layouts.size(); ++i)
684         {
685             const auto chans = format::traits(p->fmt).order;
686
687             res << "_" << "Input" << i;
688             for (unsigned int j = 0; j < p->input_layouts[i].size.sizes(p->fmt).size(); ++j)
689             {
690                 res << chans[j] << p->input_layouts[i].size.sizes(p->fmt)[j];
691             }
692         }
693
694         return res.str();
695     }
696
697 private:
698
699     static std::vector<tests::test_params*> all_generic_params;
700     static std::vector<cldnn::primitive*> all_layer_params;
701
702 };
703
704 std::vector<cldnn::primitive*> softmax_test::all_layer_params = {};
705 std::vector<tests::test_params*> softmax_test::all_generic_params = {};
706
707 TEST_P(softmax_test, SOFTMAX)
708 {
709     run_single_test();
710 }
711
712 INSTANTIATE_TEST_CASE_P(DISABLED_SOFTMAX,
713     softmax_test,
714     ::testing::Combine(::testing::ValuesIn(softmax_test::generate_generic_test_params()), ::testing::ValuesIn(softmax_test::generate_specific_test_params())),
715     softmax_test::custom_param_name);
716