inference-engine/thirdparty/clDNN/tests/test_cases/softmax_gpu_test.cpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #include <gtest/gtest.h>
  18 #include "api/CPP/memory.hpp"
  19 #include <api/CPP/input_layout.hpp>
  20 #include "api/CPP/softmax.hpp"
  21 #include <api/CPP/topology.hpp>
  22 #include <api/CPP/network.hpp>
  23 #include <api/CPP/engine.hpp>
  24 #include "test_utils/test_utils.h"
  25
  26 using namespace cldnn;
  27 using namespace std;
  28 using namespace tests;
  29
  30
  31 class softmax_gpu_xb_f32_test_fixture: public ::testing::Test {
  32 public:
  33     static const int32_t
  34         output_x  = 10, output_b  = 2,  // size of whole output buffer
  35         input_x   = 10, input_b   = 2,  // size of whole input buffer
  36         in_size   = input_x*input_b,
  37         out_size  = output_x*output_b;
  38
  39
  40     float in_buffer[in_size];
  41     float out_buffer[out_size];
  42     float expected_buffer[out_size];
  43
  44     cldnn::engine engine;
  45     cldnn::memory input;
  46     //neural::primitive output = memory::allocate({ memory::format::xb_f32, {output_b, {{output_x}}, 1}});
  47
  48     softmax_gpu_xb_f32_test_fixture()
  49         :engine()
  50         ,input(memory::allocate(engine, { data_types::f32, format::yxfb, { input_b, 1, input_x, 1}}))
  51     {}
  52
  53     void compare_out_buffer_with_expected() {
  54         for(size_t i = 0; i < out_size; ++i) {
  55             // does output have expected values
  56             EXPECT_TRUE(are_equal(out_buffer[i], expected_buffer[i]))
  57                 << "At ["<< i <<  "] Expected : " << expected_buffer[i] << " actual : " << out_buffer[i];
  58         }
  59     }
  60
  61     void compare_out_buffer_with_expected_batch_wise() {
  62         for(size_t b = 0; b < output_b; ++b) {
  63             float batch_wise_sum = 0;
  64             for(size_t x = 0; x < output_x; ++x) {
  65                 auto idx = b+x*output_b;
  66                 batch_wise_sum += out_buffer[idx];
  67                 // does output have expected values
  68                 EXPECT_TRUE(are_equal(out_buffer[idx], expected_buffer[idx]))
  69                     << "At ["<< idx <<  "] Expected : " << expected_buffer[idx] << " actual : " << out_buffer[idx];
  70             }
  71             // does it sum to 1 batch wise
  72             EXPECT_TRUE(are_equal(batch_wise_sum, 1.0f))
  73                 << "Expected : " << 1.0f << " actual : " << batch_wise_sum;
  74         }
  75     }
  76 };
  77
  78 TEST_F(softmax_gpu_xb_f32_test_fixture, input_same_values) {
  79 // in_buffer filled with same value == 1.0f
  80     for(uint32_t i = 0; i < out_size; ++i) {
  81               in_buffer[i] = 1.0f;
  82         expected_buffer[i] = 0.1f;
  83     }
  84     std::vector<float> in_b(std::begin(in_buffer), std::end(in_buffer));
  85
  86     set_values(input, in_b);
  87
  88     network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input")));
  89     network.set_input_data("input", input);
  90
  91     auto outputs = network.execute();
  92     EXPECT_EQ(outputs.size(), size_t(1));
  93     EXPECT_EQ(outputs.begin()->first, "softmax");
  94
  95     auto output_prim = outputs.begin()->second.get_memory();
  96
  97     auto output_ptr = output_prim.pointer<float>();
  98     for (uint32_t i = 0; i < out_size; i++)
  99     {
 100         out_buffer[i] = get_value<float>(output_ptr, i);
 101     }
 102     compare_out_buffer_with_expected();
 103 }
 104
 105 TEST_F(softmax_gpu_xb_f32_test_fixture, input_same_values_batch_wise) {
 106 // in_buffer filled with same value == 1..2 each batch accordingly (softmax can only xb_f32 )
 107     for(size_t i = 0; i < output_x; ++i) {
 108         for(size_t j = 0; j < output_b; ++j)
 109             in_buffer[j+i*output_b] = (j+i*output_b) % 2 +1.0f;
 110     }
 111
 112     std::vector<float> in_b(std::begin(in_buffer), std::end(in_buffer));
 113     set_values(input, in_b);
 114     // fill buffer with the expected 0.1f value
 115     for(size_t i = 0; i < out_size; ++i)
 116         expected_buffer[i] = 0.1f;
 117
 118     network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input")));
 119     network.set_input_data("input", input);
 120
 121     auto outputs = network.execute();
 122     EXPECT_EQ(outputs.size(), size_t(1));
 123     EXPECT_EQ(outputs.begin()->first, "softmax");
 124
 125     auto output_prim = outputs.begin()->second.get_memory();
 126
 127     auto output_ptr = output_prim.pointer<float>();
 128     for (uint32_t i = 0; i < out_size; i++)
 129     {
 130         out_buffer[i] = get_value<float>(output_ptr, i);
 131     }
 132     compare_out_buffer_with_expected_batch_wise();
 133 }
 134
 135 TEST_F(softmax_gpu_xb_f32_test_fixture, values_batch_wise) {
 136
 137     float in_buf[in_size] = {
 138        //b0  b1
 139         2.0f, 2.0f, //x0
 140         2.0f, 2.0f, //x1
 141         2.0f, 2.0f, //x2
 142         3.0f, 3.0f, //x3
 143         5.0f, 5.0f, //x4
 144         4.0f, 4.0f, //x5
 145         3.0f, 3.0f, //x6
 146         2.0f, 2.0f, //x7
 147         2.0f, 2.0f, //x8
 148         2.0f, 2.0f  //x9
 149     };
 150
 151     float exp_buf[out_size] = {
 152         0.02569957f,     0.02569957f,
 153         0.02569957f,     0.02569957f,
 154         0.02569957f,     0.02569957f,
 155         0.069858674f,    0.069858674f,
 156         0.516189665f,    0.516189665f,
 157         0.189895565f,    0.189895565f,
 158         0.069858674f,    0.069858674f,
 159         0.02569957f,     0.02569957f,
 160         0.02569957f,     0.02569957f,
 161         0.02569957f,     0.02569957f
 162
 163     };
 164
 165     std::vector<float> in_b(std::begin(in_buf), std::end(in_buf));
 166     set_values(input, in_b);
 167     std::copy(exp_buf, exp_buf+in_size, expected_buffer);
 168
 169     // out_buffer filled with non-signaling NaN
 170     for(size_t i = 0; i < out_size; ++i)
 171         out_buffer[i] = NAN;
 172
 173     network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input")));
 174     network.set_input_data("input", input);
 175
 176     auto outputs = network.execute();
 177     EXPECT_EQ(outputs.size(), size_t(1));
 178     EXPECT_EQ(outputs.begin()->first, "softmax");
 179
 180     auto output_prim = outputs.begin()->second.get_memory();
 181
 182     auto output_ptr = output_prim.pointer<float>();
 183     for (uint32_t i = 0; i < out_size; i++)
 184     {
 185         out_buffer[i] = get_value<float>(output_ptr, i);
 186     }
 187     compare_out_buffer_with_expected_batch_wise();
 188 }
 189
 190 TEST(softmax_gpu_bfyx_f32, normalize_fyx) {
 191     //  Input  : 2x3x2x2
 192     static const int32_t x_size = 2, y_size = 2, feature_num = 3,
 193         batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
 194     engine engine;
 195
 196     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
 197     topology topology;
 198     topology.add(input_layout("input", input.get_layout()));
 199     topology.add(softmax("softmax", "input"));
 200
 201     set_values(input, {  //bfyx
 202              //y0x0  y0x1   y1x0    y1x1
 203         /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
 204         /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
 205         /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f,
 206         /*b1f0*/3.f,  0.5f,  7.f,   12.f,
 207         /*b1f1*/4.f,  0.5f,  8.f,   8.2f,
 208         /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f
 209     });
 210
 211     float expected_max_values[2] = {
 212         0.481618381f, 0.953259517f
 213     };
 214
 215     network network(engine, topology);
 216
 217     network.set_input_data("input", input);
 218     auto outputs = network.execute();
 219
 220     EXPECT_EQ(outputs.size(), size_t(1));
 221     EXPECT_EQ(outputs.begin()->first, "softmax");
 222
 223     auto output = outputs.at("softmax").get_memory();
 224     auto output_ptr = output.pointer<float>();
 225     float out_buffer[buf_size];
 226     for (uint32_t i = 0; i < buf_size; i++)
 227     {
 228         out_buffer[i] = get_value<float>(output_ptr, i);
 229     }
 230
 231     float sum = 0;
 232     float expected_sum = 1.0f;
 233
 234     float temp_max = 0;
 235     int max_value_buffer_index = 0;
 236
 237     for (uint32_t i = 0; i < batch_num; i++) //this for loops will sum results in a batch per feature, we expect that: sum = 1.0f
 238     {
 239         for (uint32_t j = 0; j < y_size; j++)
 240         {
 241             for (uint32_t k = 0; k < x_size; k++)
 242             {
 243                 for (uint32_t l = 0; l < feature_num; l++)
 244                 {
 245                     int index = i * feature_num * x_size * y_size + j * x_size + k + l * x_size * y_size;
 246                     sum += out_buffer[index];
 247                     if (out_buffer[index] >= temp_max)
 248                     {
 249                         temp_max = out_buffer[index];
 250                     }
 251                 }
 252             }
 253         }
 254
 255         EXPECT_EQ(true, are_equal(sum, expected_sum));
 256         sum = 0.0f;
 257         EXPECT_EQ(true, are_equal(temp_max, expected_max_values[max_value_buffer_index]));
 258         temp_max = 0;
 259         max_value_buffer_index++;
 260     }
 261 }
 262
 263 TEST(softmax_gpu_bfyx_f32, normalize_y) {
 264     //  Input  : 2x3x2x2
 265     static const int32_t x_size = 2, y_size = 2, feature_num = 3,
 266         batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
 267     engine engine;
 268
 269     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
 270     topology topology;
 271     topology.add(input_layout("input", input.get_layout()));
 272     topology.add(softmax("softmax", "input", softmax::normalize_y));
 273
 274     vector<float> input_vec = {
 275               //y0x0  y0x1   y1x0    y1x1
 276         /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
 277         /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
 278         /*b0f2*/0.2f, 0.2f,  -10.f, 5.2f,
 279
 280         /*b1f0*/3.f,  0.5f,  7.f,   12.f,
 281         /*b1f1*/4.f,  0.5f,  8.f,   8.2f,
 282         /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f
 283     };
 284     set_values(input, input_vec);
 285
 286     float expected_max_values[12] = {
 287         0.689974481f,   //b=0, f=0, x=0
 288         0.832018385f,   //b=0, f=0, x=1
 289
 290         0.999962831f,   //b=0, f=1, x=0
 291         0.993307149f,   //b=0, f=1, x=1
 292
 293         0.999962831f,   //b=0, f=2, x=0
 294         0.993307149f,   //b=0, f=2, x=1
 295
 296
 297         0.98201379f,    //b=1, f=0, x=0
 298         0.99998987f,    //b=1, f=0, x=1
 299
 300         0.98201379f,    //b=1, f=1, x=0
 301         0.999547378f,   //b=1, f=1, x=1
 302
 303         0.999962831f,   //b=1, f=2, x=0
 304         0.993307149f    //b=1, f=2, x=1
 305     };
 306
 307     network network(engine, topology);
 308
 309     network.set_input_data("input", input);
 310     auto outputs = network.execute();
 311
 312     EXPECT_EQ(outputs.size(), size_t(1));
 313     EXPECT_EQ(outputs.begin()->first, "softmax");
 314
 315     auto output = outputs.at("softmax").get_memory();
 316     auto output_ptr = output.pointer<float>();
 317     float out_buffer[buf_size];
 318     for (uint32_t i = 0; i < buf_size; i++)
 319     {
 320         out_buffer[i] = get_value<float>(output_ptr, i);
 321     }
 322
 323     float temp_max = 0;
 324     float expected_sum = 1.0f;
 325     int max_value_buffer_index = 0;
 326     for (uint32_t i = 0; i < batch_num; i++) //this for loops will sum results in a batch per feature, we expect that: sum = 1.0f
 327     {
 328         for (uint32_t l = 0; l < feature_num; l++)
 329         {
 330             for (uint32_t k = 0; k < x_size; k++)
 331             {
 332                 float sum = 0.0f;
 333                 for (uint32_t j = 0; j < y_size; j++)
 334                 {
 335                     int index = i * feature_num * x_size * y_size +
 336                         l * x_size * y_size +
 337                         j * x_size +
 338                         k;
 339
 340                     if (out_buffer[index] >= temp_max)
 341                     {
 342                         temp_max = out_buffer[index];
 343                     }
 344
 345                     sum += out_buffer[index];
 346                 }
 347                 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[max_value_buffer_index]));
 348                 temp_max = 0;
 349                 max_value_buffer_index++;
 350
 351                 EXPECT_EQ(true, are_equal(sum, expected_sum));
 352                 sum = 0.0f;
 353             }
 354         }
 355     }
 356 }
 357
 358 TEST(softmax_gpu_bfyx_f32, normalize_f) {
 359     //  Input  : 2x3x2x2
 360     static const int32_t x_size = 2, y_size = 2, feature_num = 3,
 361         batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
 362     engine engine;
 363
 364     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
 365     topology topology;
 366     topology.add(input_layout("input", input.get_layout()));
 367     topology.add(softmax("softmax", "input", softmax::normalize_f));
 368
 369     vector<float> input_vec = {
 370         //y0x0  y0x1   y1x0    y1x1
 371         /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
 372         /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
 373         /*b0f2*/0.2f, 0.2f,  -10.f, 5.2f,
 374
 375         /*b1f0*/3.f,  0.5f,  7.f,   12.f,
 376         /*b1f1*/4.f,  0.5f,  8.f,   8.2f,
 377         /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f
 378     };
 379     set_values(input, input_vec);
 380
 381     float expected_max_values[8] = {
 382         0.344253346f, //b=0, y=0, x=0
 383         0.364854551f, //b=0, y=0, x=1
 384
 385         0.999963085f, //b=0, y=1, x=0
 386         0.493894592f, //b=0, y=1, x=1
 387
 388         0.719294981f, //b=1, y=0, x=0
 389         0.364854551f, //b=1, y=0, x=1
 390
 391         0.73105857f, //b=1, y=1, x=0
 392         0.977054322f //b=1, y=1, x=1
 393     };
 394
 395     network network(engine, topology);
 396
 397     network.set_input_data("input", input);
 398     auto outputs = network.execute();
 399
 400     EXPECT_EQ(outputs.size(), size_t(1));
 401     EXPECT_EQ(outputs.begin()->first, "softmax");
 402
 403     auto output = outputs.at("softmax").get_memory();
 404     auto output_ptr = output.pointer<float>();
 405     float out_buffer[buf_size];
 406     for (uint32_t i = 0; i < buf_size; i++)
 407     {
 408         out_buffer[i] = get_value<float>(output_ptr, i);
 409     }
 410
 411     float temp_max = 0;
 412     float expected_sum = 1.0f;
 413     int max_value_buffer_index = 0;
 414     for (uint32_t i = 0; i < batch_num; i++) //this for loops will sum results in a batch per feature, we expect that: sum = 1.0f
 415     {
 416         for (uint32_t j = 0; j < y_size; j++)
 417         {
 418             for (uint32_t k = 0; k < x_size; k++)
 419             {
 420                 float sum = 0.0f;
 421                 for (uint32_t l = 0; l < feature_num; l++)
 422                 {
 423                     int index = i * feature_num * x_size * y_size +
 424                         l * x_size * y_size +
 425                         j * x_size +
 426                         k;
 427
 428                     if (out_buffer[index] >= temp_max)
 429                     {
 430                         temp_max = out_buffer[index];
 431                     }
 432
 433                     sum += out_buffer[index];
 434                 }
 435                 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[max_value_buffer_index]));
 436                 temp_max = 0;
 437                 max_value_buffer_index++;
 438
 439                 EXPECT_EQ(true, are_equal(sum, expected_sum));
 440                 sum = 0.0f;
 441             }
 442         }
 443     }
 444 }
 445
 446 TEST(softmax_gpu_yxfb_f32, normalize_f) {
 447
 448     static const int32_t x_size = 1, y_size = 2, feature_num = 1,
 449         batch_num = 12, buf_size = x_size*y_size * batch_num * feature_num;
 450     engine engine;
 451
 452     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ batch_num, feature_num, y_size , x_size } });
 453     topology topology;
 454     topology.add(input_layout("input", input.get_layout()));
 455     topology.add(softmax("softmax", "input", softmax::normalize_fyx));
 456
 457     set_values(input, {  //yxfb
 458                 //f0b0  f0b1  f0b2  f0b3  f0b4    f0b5    f0b6   f0b7   f0b8    f0b9   f0b10  f0b11
 459         /*y0x0*/ 0.1f, -0.1f, 0.9f, 1.5f, 0.15f, -0.01f, 0.19f,  0.45f, 0.41f, -0.12f, 0.39f, 0.65f,
 460         /*y1x0*/ 0.2f, 0.2f, -10.f, 5.2f, 0.01f, 0.015f, 0.29f,  0.05f, 0.41f, -0.31f, 0.29f, 1.35f
 461     });
 462
 463     float expected_max_values[batch_num * feature_num * x_size] = {
 464         0.524979174f,
 465         0.574442506f,
 466         0.999981523f,
 467         0.975872993f,
 468         0.534942925f,
 469         0.506249666f,
 470         0.524979174f,
 471         0.598687649f,
 472         0.500000000f,
 473         0.547357619f,
 474         0.524979174f,
 475         0.668187797f
 476     };
 477
 478     network network(engine, topology);
 479
 480     network.set_input_data("input", input);
 481     auto outputs = network.execute();
 482
 483     EXPECT_EQ(outputs.size(), size_t(1));
 484     EXPECT_EQ(outputs.begin()->first, "softmax");
 485
 486     auto output = outputs.at("softmax").get_memory();
 487     auto output_ptr = output.pointer<float>();
 488     float out_buffer[buf_size];
 489     for (uint32_t i = 0; i < buf_size; i++)
 490     {
 491         out_buffer[i] = get_value<float>(output_ptr, i);
 492     }
 493
 494     float sum = 0;
 495     float expected_sum = 1.0f;
 496
 497     float temp_max = 0;
 498
 499     for (uint32_t b = 0; b < batch_num; b++)
 500     {
 501         for (uint32_t f = 0; f < feature_num; f++)
 502         {
 503             for (uint32_t x = 0; x < x_size; x++)
 504             {
 505                 float sum = 0.0f;
 506                 for (uint32_t y = 0; y < y_size; y++)
 507                 {
 508                     int index = b + y * batch_num + f * feature_num + x * x_size;
 509                     if (out_buffer[index] >= temp_max)
 510                     {
 511                         temp_max = out_buffer[index];
 512                     }
 513                     sum += out_buffer[index];
 514                 }
 515                 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[b * feature_num * x_size + f * x_size + x]));
 516                 temp_max = 0;
 517                 EXPECT_EQ(true, are_equal(sum, expected_sum));
 518                 sum = 0.0f;
 519             }
 520         }
 521     }
 522 }
 523
 524
 525 //////////////////////////////////////////////////////////////////////////////
 526 //                                                                          //
 527 //                      Exhaustive Negative Matrix tests                    //
 528 //                                                                          //
 529 //////////////////////////////////////////////////////////////////////////////
 530
 531 //TODO:
 532 //TEST(NegativeSoftmaxTest, DISABLED_TestAll) {
 533 //}
 534
 535 //////////////////////////////////////////////////////////////////////////////
 536 //                                                                          //
 537 //                      Exhaustive Positive Matrix tests                    //
 538 //                                                                          //
 539 //////////////////////////////////////////////////////////////////////////////
 540
 541 using namespace cldnn;
 542
 543 class softmax_test : public tests::generic_test
 544 {
 545
 546 public:
 547     softmax_test() : tests::generic_test()
 548     {
 549     }
 550
 551     virtual void SetUp() override
 552     {
 553         max_ulps_diff_allowed = 6;
 554     }
 555
 556     static void TearDownTestCase()
 557     {
 558         for (auto generic_params : all_generic_params)
 559         {
 560             delete generic_params;
 561         }
 562
 563         for (auto layer_params : all_layer_params)
 564         {
 565             delete layer_params;
 566         }
 567     }
 568
 569     static std::vector<cldnn::primitive*> generate_specific_test_params()
 570     {
 571         all_layer_params.push_back(new softmax("softmax", "input0", softmax::normalize_f));
 572
 573         //The test checks only valid combinations.
 574         //TODO: add more combinations.
 575
 576         return all_layer_params;
 577     }
 578
 579     static std::vector<tests::test_params*> generate_generic_test_params()
 580     {
 581         return generic_test::generate_generic_test_params(all_generic_params);
 582     }
 583
 584     virtual bool is_format_supported(cldnn::format format) override
 585     {
 586         return
 587             format == cldnn_format_type::cldnn_format_yxfb ||
 588             format == cldnn_format_type::cldnn_format_bfyx;
 589     }
 590
 591     template<typename Type>
 592     memory generate_reference_typed(const std::vector<memory> & inputs)
 593     {
 594         assert(inputs.size() == 1);
 595         const memory & input = inputs[0];
 596
 597         //Output is bfyx
 598         auto output = memory::allocate(engine, cldnn::layout(input.get_layout().data_type, input.get_layout().format, input.get_layout().size));
 599
 600 //        const auto params = static_cast<cldnn::softmax *>(layer_parmas);
 601
 602         const auto in0_mem = input.pointer<Type>();
 603         auto out_mem = output.pointer<Type>();
 604
 605         const int in0_b = input.get_layout().size.sizes()[0];
 606         const int in0_f = input.get_layout().size.sizes()[1];
 607         const int in0_h = input.get_layout().size.sizes()[3];
 608         const int in0_w = input.get_layout().size.sizes()[2];
 609
 610 //        const int out_b = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[0];
 611 //        const int out_f = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[1];
 612 //        const int out_h = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[2];
 613 //        const int out_w = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[3];
 614
 615 //        assert(in0_b == out_b);
 616 //        assert(in0_f == out_f);
 617 //        assert(in0_h == out_h);
 618 //        assert(in0_w == out_w);
 619
 620         std::vector<float> cached_exp_vals;
 621         cached_exp_vals.resize(in0_f);
 622
 623         const auto input_desc = get_linear_memory_desc(input.get_layout());
 624
 625         for (int n = 0; n < in0_b; ++n)
 626         for (int y = 0; y < in0_h; ++y)
 627         for (int x = 0; x < in0_w; ++x)
 628         {
 629             float max_val = -std::numeric_limits<float>::infinity();
 630
 631             for (int c = 0; c < in0_f; ++c)
 632             {
 633                 const size_t in0_idx = get_linear_index(input.get_layout(), n, c, y, x, input_desc);
 634
 635                 max_val = std::max(max_val, static_cast<float>(in0_mem[in0_idx]));
 636             }
 637
 638             float Z = 0;
 639
 640             for (int c = 0; c < in0_f; ++c)
 641             {
 642                 const size_t in0_idx = get_linear_index(input.get_layout(), n, c, y, x, input_desc);
 643
 644                 float tmp = static_cast<float>((Type)std::exp(static_cast<float>(in0_mem[in0_idx]) - max_val));
 645                 Z += tmp;
 646                 cached_exp_vals[c] = tmp;
 647             }
 648
 649             for (int c = 0; c < in0_f; ++c)
 650             {
 651                 const size_t out_idx = get_linear_index(output.get_layout(), n, c, y, x, input_desc);
 652                 out_mem[out_idx] = (Type)(cached_exp_vals[c] / Z);
 653             }
 654         }
 655
 656         return output;
 657     }
 658
 659     virtual memory generate_reference(const std::vector<memory> & inputs) override
 660     {
 661         if (generic_params->data_type == data_types::f32)
 662         {
 663             return generate_reference_typed<float>(inputs);
 664         }
 665         else
 666         {
 667             return generate_reference_typed<FLOAT16>(inputs);
 668         }
 669     }
 670
 671     static std::string custom_param_name(const ::testing::TestParamInfo<std::tuple<test_params*, cldnn::primitive*>>& info)
 672     {
 673         std::stringstream res;
 674
 675         const auto & p = std::get<0>(info.param);
 676
 677         assert (p->data_type == data_types::f32 ||
 678                 p->data_type == data_types::f16);
 679
 680         res << info.index
 681             << "_" << (p->data_type == data_types::f32 ? "f32" : "f16");
 682
 683         for (unsigned i = 0; i < p->input_layouts.size(); ++i)
 684         {
 685             const auto chans = format::traits(p->fmt).order;
 686
 687             res << "_" << "Input" << i;
 688             for (unsigned int j = 0; j < p->input_layouts[i].size.sizes(p->fmt).size(); ++j)
 689             {
 690                 res << chans[j] << p->input_layouts[i].size.sizes(p->fmt)[j];
 691             }
 692         }
 693
 694         return res.str();
 695     }
 696
 697 private:
 698
 699     static std::vector<tests::test_params*> all_generic_params;
 700     static std::vector<cldnn::primitive*> all_layer_params;
 701
 702 };
 703
 704 std::vector<cldnn::primitive*> softmax_test::all_layer_params = {};
 705 std::vector<tests::test_params*> softmax_test::all_generic_params = {};
 706
 707 TEST_P(softmax_test, SOFTMAX)
 708 {
 709     run_single_test();
 710 }
 711
 712 INSTANTIATE_TEST_CASE_P(DISABLED_SOFTMAX,
 713     softmax_test,
 714     ::testing::Combine(::testing::ValuesIn(softmax_test::generate_generic_test_params()), ::testing::ValuesIn(softmax_test::generate_specific_test_params())),
 715     softmax_test::custom_param_name);
 716