inference-engine/thirdparty/clDNN/tests/test_cases/softmax_gpu_test.cpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #include <gtest/gtest.h>
  18 #include "api/CPP/memory.hpp"
  19 #include <api/CPP/input_layout.hpp>
  20 #include "api/CPP/softmax.hpp"
  21 #include <api/CPP/topology.hpp>
  22 #include <api/CPP/network.hpp>
  23 #include <api/CPP/engine.hpp>
  24 #include "test_utils/test_utils.h"
  25
  26 using namespace cldnn;
  27 using namespace std;
  28 using namespace tests;
  29
  30
  31 class softmax_gpu_xb_f32_test_fixture: public ::testing::Test {
  32 public:
  33     static const int32_t
  34         output_x  = 10, output_b  = 2,  // size of whole output buffer
  35         input_x   = 10, input_b   = 2,  // size of whole input buffer
  36         in_size   = input_x*input_b,
  37         out_size  = output_x*output_b;
  38
  39
  40     float in_buffer[in_size];
  41     float out_buffer[out_size];
  42     float expected_buffer[out_size];
  43
  44     const cldnn::engine& engine;
  45     cldnn::memory input;
  46
  47     //neural::primitive output = memory::allocate({ memory::format::xb_f32, {output_b, {{output_x}}, 1}});
  48
  49     softmax_gpu_xb_f32_test_fixture()
  50         : engine(get_test_engine())
  51         ,input(memory::allocate(engine, { data_types::f32, format::yxfb, { input_b, 1, input_x, 1}}))
  52     {}
  53
  54     void compare_out_buffer_with_expected() {
  55         for(size_t i = 0; i < out_size; ++i) {
  56             // does output have expected values
  57             EXPECT_TRUE(are_equal(out_buffer[i], expected_buffer[i]))
  58                 << "At ["<< i <<  "] Expected : " << expected_buffer[i] << " actual : " << out_buffer[i];
  59         }
  60     }
  61
  62     void compare_out_buffer_with_expected_batch_wise() {
  63         for(size_t b = 0; b < output_b; ++b) {
  64             float batch_wise_sum = 0;
  65             for(size_t x = 0; x < output_x; ++x) {
  66                 auto idx = b+x*output_b;
  67                 batch_wise_sum += out_buffer[idx];
  68                 // does output have expected values
  69                 EXPECT_TRUE(are_equal(out_buffer[idx], expected_buffer[idx]))
  70                     << "At ["<< idx <<  "] Expected : " << expected_buffer[idx] << " actual : " << out_buffer[idx];
  71             }
  72             // does it sum to 1 batch wise
  73             EXPECT_TRUE(are_equal(batch_wise_sum, 1.0f))
  74                 << "Expected : " << 1.0f << " actual : " << batch_wise_sum;
  75         }
  76     }
  77 };
  78
  79 TEST_F(softmax_gpu_xb_f32_test_fixture, input_same_values) {
  80 // in_buffer filled with same value == 1.0f
  81     for(uint32_t i = 0; i < out_size; ++i) {
  82               in_buffer[i] = 1.0f;
  83         expected_buffer[i] = 0.1f;
  84     }
  85     std::vector<float> in_b(std::begin(in_buffer), std::end(in_buffer));
  86
  87     set_values(input, in_b);
  88
  89     network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input")));
  90     network.set_input_data("input", input);
  91
  92     auto outputs = network.execute();
  93     EXPECT_EQ(outputs.size(), size_t(1));
  94     EXPECT_EQ(outputs.begin()->first, "softmax");
  95
  96     auto output_prim = outputs.begin()->second.get_memory();
  97
  98     auto output_ptr = output_prim.pointer<float>();
  99     for (uint32_t i = 0; i < out_size; i++)
 100     {
 101         out_buffer[i] = get_value<float>(output_ptr, i);
 102     }
 103     compare_out_buffer_with_expected();
 104 }
 105
 106 TEST_F(softmax_gpu_xb_f32_test_fixture, input_same_values_batch_wise) {
 107 // in_buffer filled with same value == 1..2 each batch accordingly (softmax can only xb_f32 )
 108     for(size_t i = 0; i < output_x; ++i) {
 109         for(size_t j = 0; j < output_b; ++j)
 110             in_buffer[j+i*output_b] = (j+i*output_b) % 2 +1.0f;
 111     }
 112
 113     std::vector<float> in_b(std::begin(in_buffer), std::end(in_buffer));
 114     set_values(input, in_b);
 115     // fill buffer with the expected 0.1f value
 116     for(size_t i = 0; i < out_size; ++i)
 117         expected_buffer[i] = 0.1f;
 118
 119     network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input")));
 120     network.set_input_data("input", input);
 121
 122     auto outputs = network.execute();
 123     EXPECT_EQ(outputs.size(), size_t(1));
 124     EXPECT_EQ(outputs.begin()->first, "softmax");
 125
 126     auto output_prim = outputs.begin()->second.get_memory();
 127
 128     auto output_ptr = output_prim.pointer<float>();
 129     for (uint32_t i = 0; i < out_size; i++)
 130     {
 131         out_buffer[i] = get_value<float>(output_ptr, i);
 132     }
 133     compare_out_buffer_with_expected_batch_wise();
 134 }
 135
 136 TEST_F(softmax_gpu_xb_f32_test_fixture, values_batch_wise) {
 137
 138     float in_buf[in_size] = {
 139        //b0  b1
 140         2.0f, 2.0f, //x0
 141         2.0f, 2.0f, //x1
 142         2.0f, 2.0f, //x2
 143         3.0f, 3.0f, //x3
 144         5.0f, 5.0f, //x4
 145         4.0f, 4.0f, //x5
 146         3.0f, 3.0f, //x6
 147         2.0f, 2.0f, //x7
 148         2.0f, 2.0f, //x8
 149         2.0f, 2.0f  //x9
 150     };
 151
 152     float exp_buf[out_size] = {
 153         0.02569957f,     0.02569957f,
 154         0.02569957f,     0.02569957f,
 155         0.02569957f,     0.02569957f,
 156         0.069858674f,    0.069858674f,
 157         0.516189665f,    0.516189665f,
 158         0.189895565f,    0.189895565f,
 159         0.069858674f,    0.069858674f,
 160         0.02569957f,     0.02569957f,
 161         0.02569957f,     0.02569957f,
 162         0.02569957f,     0.02569957f
 163
 164     };
 165
 166     std::vector<float> in_b(std::begin(in_buf), std::end(in_buf));
 167     set_values(input, in_b);
 168     std::copy(exp_buf, exp_buf+in_size, expected_buffer);
 169
 170     // out_buffer filled with non-signaling NaN
 171     for(size_t i = 0; i < out_size; ++i)
 172         out_buffer[i] = NAN;
 173
 174     network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input")));
 175     network.set_input_data("input", input);
 176
 177     auto outputs = network.execute();
 178     EXPECT_EQ(outputs.size(), size_t(1));
 179     EXPECT_EQ(outputs.begin()->first, "softmax");
 180
 181     auto output_prim = outputs.begin()->second.get_memory();
 182
 183     auto output_ptr = output_prim.pointer<float>();
 184     for (uint32_t i = 0; i < out_size; i++)
 185     {
 186         out_buffer[i] = get_value<float>(output_ptr, i);
 187     }
 188     compare_out_buffer_with_expected_batch_wise();
 189 }
 190
 191 TEST(softmax_gpu_bfyx_f32, normalize_fyx) {
 192     //  Input  : 2x3x2x2
 193     static const int32_t x_size = 2, y_size = 2, feature_num = 3,
 194         batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
 195     const auto& engine = get_test_engine();
 196
 197     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
 198     topology topology;
 199     topology.add(input_layout("input", input.get_layout()));
 200     topology.add(softmax("softmax", "input"));
 201
 202     set_values(input, {  //bfyx
 203              //y0x0  y0x1   y1x0    y1x1
 204         /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
 205         /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
 206         /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f,
 207         /*b1f0*/3.f,  0.5f,  7.f,   12.f,
 208         /*b1f1*/4.f,  0.5f,  8.f,   8.2f,
 209         /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f
 210     });
 211
 212     float expected_max_values[2] = {
 213         0.481618381f, 0.953259517f
 214     };
 215
 216     network network(engine, topology);
 217
 218     network.set_input_data("input", input);
 219     auto outputs = network.execute();
 220
 221     EXPECT_EQ(outputs.size(), size_t(1));
 222     EXPECT_EQ(outputs.begin()->first, "softmax");
 223
 224     auto output = outputs.at("softmax").get_memory();
 225     auto output_ptr = output.pointer<float>();
 226     float out_buffer[buf_size];
 227     for (uint32_t i = 0; i < buf_size; i++)
 228     {
 229         out_buffer[i] = get_value<float>(output_ptr, i);
 230     }
 231
 232     float sum = 0;
 233     float expected_sum = 1.0f;
 234
 235     float temp_max = 0;
 236     int max_value_buffer_index = 0;
 237
 238     for (uint32_t i = 0; i < batch_num; i++) //this for loops will sum results in a batch per feature, we expect that: sum = 1.0f
 239     {
 240         for (uint32_t j = 0; j < y_size; j++)
 241         {
 242             for (uint32_t k = 0; k < x_size; k++)
 243             {
 244                 for (uint32_t l = 0; l < feature_num; l++)
 245                 {
 246                     int index = i * feature_num * x_size * y_size + j * x_size + k + l * x_size * y_size;
 247                     sum += out_buffer[index];
 248                     if (out_buffer[index] >= temp_max)
 249                     {
 250                         temp_max = out_buffer[index];
 251                     }
 252                 }
 253             }
 254         }
 255
 256         EXPECT_EQ(true, are_equal(sum, expected_sum));
 257         sum = 0.0f;
 258         EXPECT_EQ(true, are_equal(temp_max, expected_max_values[max_value_buffer_index]));
 259         temp_max = 0;
 260         max_value_buffer_index++;
 261     }
 262 }
 263
 264 TEST(softmax_gpu_bfyx_f32, normalize_y) {
 265     //  Input  : 2x3x2x2
 266     static const int32_t x_size = 2, y_size = 2, feature_num = 3,
 267         batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
 268     const auto& engine = get_test_engine();
 269
 270     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
 271     topology topology;
 272     topology.add(input_layout("input", input.get_layout()));
 273     topology.add(softmax("softmax", "input", softmax::normalize_y));
 274
 275     vector<float> input_vec = {
 276               //y0x0  y0x1   y1x0    y1x1
 277         /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
 278         /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
 279         /*b0f2*/0.2f, 0.2f,  -10.f, 5.2f,
 280
 281         /*b1f0*/3.f,  0.5f,  7.f,   12.f,
 282         /*b1f1*/4.f,  0.5f,  8.f,   8.2f,
 283         /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f
 284     };
 285     set_values(input, input_vec);
 286
 287     float expected_max_values[12] = {
 288         0.689974481f,   //b=0, f=0, x=0
 289         0.832018385f,   //b=0, f=0, x=1
 290
 291         0.999962831f,   //b=0, f=1, x=0
 292         0.993307149f,   //b=0, f=1, x=1
 293
 294         0.999962831f,   //b=0, f=2, x=0
 295         0.993307149f,   //b=0, f=2, x=1
 296
 297
 298         0.98201379f,    //b=1, f=0, x=0
 299         0.99998987f,    //b=1, f=0, x=1
 300
 301         0.98201379f,    //b=1, f=1, x=0
 302         0.999547378f,   //b=1, f=1, x=1
 303
 304         0.999962831f,   //b=1, f=2, x=0
 305         0.993307149f    //b=1, f=2, x=1
 306     };
 307
 308     network network(engine, topology);
 309
 310     network.set_input_data("input", input);
 311     auto outputs = network.execute();
 312
 313     EXPECT_EQ(outputs.size(), size_t(1));
 314     EXPECT_EQ(outputs.begin()->first, "softmax");
 315
 316     auto output = outputs.at("softmax").get_memory();
 317     auto output_ptr = output.pointer<float>();
 318     float out_buffer[buf_size];
 319     for (uint32_t i = 0; i < buf_size; i++)
 320     {
 321         out_buffer[i] = get_value<float>(output_ptr, i);
 322     }
 323
 324     float temp_max = 0;
 325     float expected_sum = 1.0f;
 326     int max_value_buffer_index = 0;
 327     for (uint32_t i = 0; i < batch_num; i++) //this for loops will sum results in a batch per feature, we expect that: sum = 1.0f
 328     {
 329         for (uint32_t l = 0; l < feature_num; l++)
 330         {
 331             for (uint32_t k = 0; k < x_size; k++)
 332             {
 333                 float sum = 0.0f;
 334                 for (uint32_t j = 0; j < y_size; j++)
 335                 {
 336                     int index = i * feature_num * x_size * y_size +
 337                         l * x_size * y_size +
 338                         j * x_size +
 339                         k;
 340
 341                     if (out_buffer[index] >= temp_max)
 342                     {
 343                         temp_max = out_buffer[index];
 344                     }
 345
 346                     sum += out_buffer[index];
 347                 }
 348                 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[max_value_buffer_index]));
 349                 temp_max = 0;
 350                 max_value_buffer_index++;
 351
 352                 EXPECT_EQ(true, are_equal(sum, expected_sum));
 353                 sum = 0.0f;
 354             }
 355         }
 356     }
 357 }
 358
 359 TEST(softmax_gpu_bfyx_f32, normalize_f) {
 360     //  Input  : 2x3x2x2
 361     static const int32_t x_size = 2, y_size = 2, feature_num = 3,
 362         batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
 363     const auto& engine = get_test_engine();
 364
 365     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
 366     topology topology;
 367     topology.add(input_layout("input", input.get_layout()));
 368     topology.add(softmax("softmax", "input", softmax::normalize_f));
 369
 370     vector<float> input_vec = {
 371         //y0x0  y0x1   y1x0    y1x1
 372         /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
 373         /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
 374         /*b0f2*/0.2f, 0.2f,  -10.f, 5.2f,
 375
 376         /*b1f0*/3.f,  0.5f,  7.f,   12.f,
 377         /*b1f1*/4.f,  0.5f,  8.f,   8.2f,
 378         /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f
 379     };
 380     set_values(input, input_vec);
 381
 382     float expected_max_values[8] = {
 383         0.344253346f, //b=0, y=0, x=0
 384         0.364854551f, //b=0, y=0, x=1
 385
 386         0.999963085f, //b=0, y=1, x=0
 387         0.493894592f, //b=0, y=1, x=1
 388
 389         0.719294981f, //b=1, y=0, x=0
 390         0.364854551f, //b=1, y=0, x=1
 391
 392         0.73105857f, //b=1, y=1, x=0
 393         0.977054322f //b=1, y=1, x=1
 394     };
 395
 396     network network(engine, topology);
 397
 398     network.set_input_data("input", input);
 399     auto outputs = network.execute();
 400
 401     EXPECT_EQ(outputs.size(), size_t(1));
 402     EXPECT_EQ(outputs.begin()->first, "softmax");
 403
 404     auto output = outputs.at("softmax").get_memory();
 405     auto output_ptr = output.pointer<float>();
 406     float out_buffer[buf_size];
 407     for (uint32_t i = 0; i < buf_size; i++)
 408     {
 409         out_buffer[i] = get_value<float>(output_ptr, i);
 410     }
 411
 412     float temp_max = 0;
 413     float expected_sum = 1.0f;
 414     int max_value_buffer_index = 0;
 415     for (uint32_t i = 0; i < batch_num; i++) //this for loops will sum results in a batch per feature, we expect that: sum = 1.0f
 416     {
 417         for (uint32_t j = 0; j < y_size; j++)
 418         {
 419             for (uint32_t k = 0; k < x_size; k++)
 420             {
 421                 float sum = 0.0f;
 422                 for (uint32_t l = 0; l < feature_num; l++)
 423                 {
 424                     int index = i * feature_num * x_size * y_size +
 425                         l * x_size * y_size +
 426                         j * x_size +
 427                         k;
 428
 429                     if (out_buffer[index] >= temp_max)
 430                     {
 431                         temp_max = out_buffer[index];
 432                     }
 433
 434                     sum += out_buffer[index];
 435                 }
 436                 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[max_value_buffer_index]));
 437                 temp_max = 0;
 438                 max_value_buffer_index++;
 439
 440                 EXPECT_EQ(true, are_equal(sum, expected_sum));
 441                 sum = 0.0f;
 442             }
 443         }
 444     }
 445 }
 446
 447 TEST(softmax_gpu_yxfb_f32, normalize_f) {
 448
 449     static const int32_t x_size = 1, y_size = 2, feature_num = 1,
 450         batch_num = 12, buf_size = x_size*y_size * batch_num * feature_num;
 451     const auto& engine = get_test_engine();
 452
 453     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ batch_num, feature_num, y_size , x_size } });
 454     topology topology;
 455     topology.add(input_layout("input", input.get_layout()));
 456     topology.add(softmax("softmax", "input", softmax::normalize_fyx));
 457
 458     set_values(input, {  //yxfb
 459                 //f0b0  f0b1  f0b2  f0b3  f0b4    f0b5    f0b6   f0b7   f0b8    f0b9   f0b10  f0b11
 460         /*y0x0*/ 0.1f, -0.1f, 0.9f, 1.5f, 0.15f, -0.01f, 0.19f,  0.45f, 0.41f, -0.12f, 0.39f, 0.65f,
 461         /*y1x0*/ 0.2f, 0.2f, -10.f, 5.2f, 0.01f, 0.015f, 0.29f,  0.05f, 0.41f, -0.31f, 0.29f, 1.35f
 462     });
 463
 464     float expected_max_values[batch_num * feature_num * x_size] = {
 465         0.524979174f,
 466         0.574442506f,
 467         0.999981523f,
 468         0.975872993f,
 469         0.534942925f,
 470         0.506249666f,
 471         0.524979174f,
 472         0.598687649f,
 473         0.500000000f,
 474         0.547357619f,
 475         0.524979174f,
 476         0.668187797f
 477     };
 478
 479     network network(engine, topology);
 480
 481     network.set_input_data("input", input);
 482     auto outputs = network.execute();
 483
 484     EXPECT_EQ(outputs.size(), size_t(1));
 485     EXPECT_EQ(outputs.begin()->first, "softmax");
 486
 487     auto output = outputs.at("softmax").get_memory();
 488     auto output_ptr = output.pointer<float>();
 489     float out_buffer[buf_size];
 490     for (uint32_t i = 0; i < buf_size; i++)
 491     {
 492         out_buffer[i] = get_value<float>(output_ptr, i);
 493     }
 494
 495     float sum = 0;
 496     float expected_sum = 1.0f;
 497
 498     float temp_max = 0;
 499
 500     for (uint32_t b = 0; b < batch_num; b++)
 501     {
 502         for (uint32_t f = 0; f < feature_num; f++)
 503         {
 504             for (uint32_t x = 0; x < x_size; x++)
 505             {
 506                 float sum = 0.0f;
 507                 for (uint32_t y = 0; y < y_size; y++)
 508                 {
 509                     int index = b + y * batch_num + f * feature_num + x * x_size;
 510                     if (out_buffer[index] >= temp_max)
 511                     {
 512                         temp_max = out_buffer[index];
 513                     }
 514                     sum += out_buffer[index];
 515                 }
 516                 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[b * feature_num * x_size + f * x_size + x]));
 517                 temp_max = 0;
 518                 EXPECT_EQ(true, are_equal(sum, expected_sum));
 519                 sum = 0.0f;
 520             }
 521         }
 522     }
 523 }
 524
 525
 526 //////////////////////////////////////////////////////////////////////////////
 527 //                                                                          //
 528 //                      Exhaustive Negative Matrix tests                    //
 529 //                                                                          //
 530 //////////////////////////////////////////////////////////////////////////////
 531
 532 //TODO:
 533 //TEST(NegativeSoftmaxTest, DISABLED_TestAll) {
 534 //}
 535
 536 //////////////////////////////////////////////////////////////////////////////
 537 //                                                                          //
 538 //                      Exhaustive Positive Matrix tests                    //
 539 //                                                                          //
 540 //////////////////////////////////////////////////////////////////////////////
 541
 542 using namespace cldnn;
 543
 544 class softmax_test : public tests::generic_test
 545 {
 546
 547 public:
 548     softmax_test() : tests::generic_test()
 549     {
 550     }
 551
 552     virtual void SetUp() override
 553     {
 554         max_ulps_diff_allowed = 6;
 555     }
 556
 557     static void TearDownTestCase()
 558     {
 559         for (auto generic_params : all_generic_params)
 560         {
 561             delete generic_params;
 562         }
 563
 564         for (auto layer_params : all_layer_params)
 565         {
 566             delete layer_params;
 567         }
 568     }
 569
 570     static std::vector<cldnn::primitive*> generate_specific_test_params()
 571     {
 572         all_layer_params.push_back(new softmax("softmax", "input0", softmax::normalize_f));
 573
 574         //The test checks only valid combinations.
 575         //TODO: add more combinations.
 576
 577         return all_layer_params;
 578     }
 579
 580     static std::vector<tests::test_params*> generate_generic_test_params()
 581     {
 582         return generic_test::generate_generic_test_params(all_generic_params);
 583     }
 584
 585     virtual bool is_format_supported(cldnn::format format) override
 586     {
 587         return
 588             format == cldnn_format_type::cldnn_format_yxfb ||
 589             format == cldnn_format_type::cldnn_format_bfyx;
 590     }
 591
 592     template<typename Type>
 593     memory generate_reference_typed(const std::vector<memory> & inputs)
 594     {
 595         assert(inputs.size() == 1);
 596         const memory & input = inputs[0];
 597
 598         //Output is bfyx
 599         auto output = memory::allocate(engine, cldnn::layout(input.get_layout().data_type, input.get_layout().format, input.get_layout().size));
 600
 601 //        const auto params = static_cast<cldnn::softmax *>(layer_parmas);
 602
 603         const auto in0_mem = input.pointer<Type>();
 604         auto out_mem = output.pointer<Type>();
 605
 606         const int in0_b = input.get_layout().size.sizes()[0];
 607         const int in0_f = input.get_layout().size.sizes()[1];
 608         const int in0_h = input.get_layout().size.sizes()[3];
 609         const int in0_w = input.get_layout().size.sizes()[2];
 610
 611 //        const int out_b = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[0];
 612 //        const int out_f = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[1];
 613 //        const int out_h = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[2];
 614 //        const int out_w = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[3];
 615
 616 //        assert(in0_b == out_b);
 617 //        assert(in0_f == out_f);
 618 //        assert(in0_h == out_h);
 619 //        assert(in0_w == out_w);
 620
 621         std::vector<float> cached_exp_vals;
 622         cached_exp_vals.resize(in0_f);
 623
 624         const auto input_desc = get_linear_memory_desc(input.get_layout());
 625
 626         for (int n = 0; n < in0_b; ++n)
 627         for (int y = 0; y < in0_h; ++y)
 628         for (int x = 0; x < in0_w; ++x)
 629         {
 630             float max_val = -std::numeric_limits<float>::infinity();
 631
 632             for (int c = 0; c < in0_f; ++c)
 633             {
 634                 const size_t in0_idx = get_linear_index(input.get_layout(), n, c, y, x, input_desc);
 635
 636                 max_val = std::max(max_val, static_cast<float>(in0_mem[in0_idx]));
 637             }
 638
 639             float Z = 0;
 640
 641             for (int c = 0; c < in0_f; ++c)
 642             {
 643                 const size_t in0_idx = get_linear_index(input.get_layout(), n, c, y, x, input_desc);
 644
 645                 float tmp = static_cast<float>((Type)std::exp(static_cast<float>(in0_mem[in0_idx]) - max_val));
 646                 Z += tmp;
 647                 cached_exp_vals[c] = tmp;
 648             }
 649
 650             for (int c = 0; c < in0_f; ++c)
 651             {
 652                 const size_t out_idx = get_linear_index(output.get_layout(), n, c, y, x, input_desc);
 653                 out_mem[out_idx] = (Type)(cached_exp_vals[c] / Z);
 654             }
 655         }
 656
 657         return output;
 658     }
 659
 660     virtual memory generate_reference(const std::vector<memory> & inputs) override
 661     {
 662         if (generic_params->data_type == data_types::f32)
 663         {
 664             return generate_reference_typed<float>(inputs);
 665         }
 666         else
 667         {
 668             return generate_reference_typed<FLOAT16>(inputs);
 669         }
 670     }
 671
 672     static std::string custom_param_name(const ::testing::TestParamInfo<std::tuple<test_params*, cldnn::primitive*>>& info)
 673     {
 674         std::stringstream res;
 675
 676         const auto & p = std::get<0>(info.param);
 677
 678         assert (p->data_type == data_types::f32 ||
 679                 p->data_type == data_types::f16);
 680
 681         res << info.index
 682             << "_" << (p->data_type == data_types::f32 ? "f32" : "f16");
 683
 684         for (unsigned i = 0; i < p->input_layouts.size(); ++i)
 685         {
 686             const auto chans = format::traits(p->fmt).order;
 687
 688             res << "_" << "Input" << i;
 689             for (unsigned int j = 0; j < p->input_layouts[i].size.sizes(p->fmt).size(); ++j)
 690             {
 691                 res << chans[j] << p->input_layouts[i].size.sizes(p->fmt)[j];
 692             }
 693         }
 694
 695         return res.str();
 696     }
 697
 698 private:
 699
 700     static std::vector<tests::test_params*> all_generic_params;
 701     static std::vector<cldnn::primitive*> all_layer_params;
 702
 703 };
 704
 705 std::vector<cldnn::primitive*> softmax_test::all_layer_params = {};
 706 std::vector<tests::test_params*> softmax_test::all_generic_params = {};
 707
 708 TEST_P(softmax_test, SOFTMAX)
 709 {
 710     run_single_test();
 711 }
 712
 713 INSTANTIATE_TEST_CASE_P(DISABLED_SOFTMAX,
 714     softmax_test,
 715     ::testing::Combine(::testing::ValuesIn(softmax_test::generate_generic_test_params()), ::testing::ValuesIn(softmax_test::generate_specific_test_params())),
 716     softmax_test::custom_param_name);
 717