2 // Copyright (c) 2016-2019 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 #include <gtest/gtest.h>
18 #include "api/memory.hpp"
19 #include <api/input_layout.hpp>
20 #include "api/softmax.hpp"
21 #include <api/topology.hpp>
22 #include <api/network.hpp>
23 #include <api/engine.hpp>
24 #include "test_utils/test_utils.h"
26 using namespace cldnn;
28 using namespace tests;
30 class softmax_gpu_xb_f32_test_fixture: public ::testing::Test {
33 output_x = 10, output_b = 2, // size of whole output buffer
34 input_x = 10, input_b = 2, // size of whole input buffer
35 in_size = input_x*input_b,
36 out_size = output_x*output_b;
38 float in_buffer[in_size];
39 float out_buffer[out_size];
40 float expected_buffer[out_size];
42 const cldnn::engine& engine;
45 //neural::primitive output = memory::allocate({ memory::format::xb_f32, {output_b, {{output_x}}, 1}});
47 softmax_gpu_xb_f32_test_fixture()
48 : engine(get_test_engine())
49 ,input(memory::allocate(engine, { data_types::f32, format::yxfb, { input_b, 1, input_x, 1}}))
52 void compare_out_buffer_with_expected() {
53 for(size_t i = 0; i < out_size; ++i) {
54 // does output have expected values
55 EXPECT_TRUE(are_equal(out_buffer[i], expected_buffer[i]))
56 << "At ["<< i << "] Expected : " << expected_buffer[i] << " actual : " << out_buffer[i];
60 void compare_out_buffer_with_expected_batch_wise() {
61 for(size_t b = 0; b < output_b; ++b) {
62 float batch_wise_sum = 0;
63 for(size_t x = 0; x < output_x; ++x) {
64 auto idx = b+x*output_b;
65 batch_wise_sum += out_buffer[idx];
66 // does output have expected values
67 EXPECT_TRUE(are_equal(out_buffer[idx], expected_buffer[idx]))
68 << "At ["<< idx << "] Expected : " << expected_buffer[idx] << " actual : " << out_buffer[idx];
70 // does it sum to 1 batch wise
71 EXPECT_TRUE(are_equal(batch_wise_sum, 1.0f))
72 << "Expected : " << 1.0f << " actual : " << batch_wise_sum;
77 TEST_F(softmax_gpu_xb_f32_test_fixture, input_same_values) {
78 // in_buffer filled with same value == 1.0f
79 for(uint32_t i = 0; i < out_size; ++i) {
81 expected_buffer[i] = 0.1f;
83 std::vector<float> in_b(std::begin(in_buffer), std::end(in_buffer));
85 set_values(input, in_b);
87 network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input")));
88 network.set_input_data("input", input);
90 auto outputs = network.execute();
91 EXPECT_EQ(outputs.size(), size_t(1));
92 EXPECT_EQ(outputs.begin()->first, "softmax");
94 auto output_prim = outputs.begin()->second.get_memory();
96 auto output_ptr = output_prim.pointer<float>();
97 for (uint32_t i = 0; i < out_size; i++)
99 out_buffer[i] = get_value<float>(output_ptr, i);
101 compare_out_buffer_with_expected();
104 TEST_F(softmax_gpu_xb_f32_test_fixture, input_same_values_batch_wise) {
105 // in_buffer filled with same value == 1..2 each batch accordingly (softmax can only xb_f32 )
106 for(size_t i = 0; i < output_x; ++i) {
107 for(size_t j = 0; j < output_b; ++j)
108 in_buffer[j+i*output_b] = (j+i*output_b) % 2 +1.0f;
111 std::vector<float> in_b(std::begin(in_buffer), std::end(in_buffer));
112 set_values(input, in_b);
113 // fill buffer with the expected 0.1f value
114 for(size_t i = 0; i < out_size; ++i)
115 expected_buffer[i] = 0.1f;
117 network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input")));
118 network.set_input_data("input", input);
120 auto outputs = network.execute();
121 EXPECT_EQ(outputs.size(), size_t(1));
122 EXPECT_EQ(outputs.begin()->first, "softmax");
124 auto output_prim = outputs.begin()->second.get_memory();
126 auto output_ptr = output_prim.pointer<float>();
127 for (uint32_t i = 0; i < out_size; i++)
129 out_buffer[i] = get_value<float>(output_ptr, i);
131 compare_out_buffer_with_expected_batch_wise();
134 TEST_F(softmax_gpu_xb_f32_test_fixture, values_batch_wise) {
136 float in_buf[in_size] = {
150 float exp_buf[out_size] = {
151 0.02569957f, 0.02569957f,
152 0.02569957f, 0.02569957f,
153 0.02569957f, 0.02569957f,
154 0.069858674f, 0.069858674f,
155 0.516189665f, 0.516189665f,
156 0.189895565f, 0.189895565f,
157 0.069858674f, 0.069858674f,
158 0.02569957f, 0.02569957f,
159 0.02569957f, 0.02569957f,
160 0.02569957f, 0.02569957f
164 std::vector<float> in_b(std::begin(in_buf), std::end(in_buf));
165 set_values(input, in_b);
166 std::copy(exp_buf, exp_buf+in_size, expected_buffer);
168 // out_buffer filled with non-signaling NaN
169 for(size_t i = 0; i < out_size; ++i)
172 network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input")));
173 network.set_input_data("input", input);
175 auto outputs = network.execute();
176 EXPECT_EQ(outputs.size(), size_t(1));
177 EXPECT_EQ(outputs.begin()->first, "softmax");
179 auto output_prim = outputs.begin()->second.get_memory();
181 auto output_ptr = output_prim.pointer<float>();
182 for (uint32_t i = 0; i < out_size; i++)
184 out_buffer[i] = get_value<float>(output_ptr, i);
186 compare_out_buffer_with_expected_batch_wise();
189 TEST(softmax_gpu_bfyx_f32, normalize_fyx) {
191 static const int32_t x_size = 2, y_size = 2, feature_num = 3,
192 batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
193 const auto& engine = get_test_engine();
195 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
197 topology.add(input_layout("input", input.get_layout()));
198 topology.add(softmax("softmax", "input"));
200 set_values(input, { //bfyx
201 //y0x0 y0x1 y1x0 y1x1
202 /*b0f0*/0.1f, -0.1f, 0.9f, 1.5f,
203 /*b0f1*/0.2f, 0.2f, -10.f, 5.2f,
204 /*b1f2*/0.2f, 0.2f, -10.f, 5.2f,
205 /*b1f0*/3.f, 0.5f, 7.f, 12.f,
206 /*b1f1*/4.f, 0.5f, 8.f, 8.2f,
207 /*b1f2*/0.2f, 0.2f, -10.f, 5.2f
210 float expected_max_values[2] = {
211 0.481618381f, 0.953259517f
214 network network(engine, topology);
216 network.set_input_data("input", input);
217 auto outputs = network.execute();
219 EXPECT_EQ(outputs.size(), size_t(1));
220 EXPECT_EQ(outputs.begin()->first, "softmax");
222 auto output = outputs.at("softmax").get_memory();
223 auto output_ptr = output.pointer<float>();
224 float out_buffer[buf_size];
225 for (uint32_t i = 0; i < buf_size; i++)
227 out_buffer[i] = get_value<float>(output_ptr, i);
231 float expected_sum = 1.0f;
234 int max_value_buffer_index = 0;
236 for (uint32_t i = 0; i < batch_num; i++) //this for loops will sum results in a batch per feature, we expect that: sum = 1.0f
238 for (uint32_t j = 0; j < y_size; j++)
240 for (uint32_t k = 0; k < x_size; k++)
242 for (uint32_t l = 0; l < feature_num; l++)
244 int index = i * feature_num * x_size * y_size + j * x_size + k + l * x_size * y_size;
245 sum += out_buffer[index];
246 if (out_buffer[index] >= temp_max)
248 temp_max = out_buffer[index];
254 EXPECT_EQ(true, are_equal(sum, expected_sum));
256 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[max_value_buffer_index]));
258 max_value_buffer_index++;
262 TEST(softmax_gpu_bfyx_f32, normalize_y) {
264 static const int32_t x_size = 2, y_size = 2, feature_num = 3,
265 batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
266 const auto& engine = get_test_engine();
268 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
270 topology.add(input_layout("input", input.get_layout()));
271 topology.add(softmax("softmax", "input", softmax::normalize_y));
273 vector<float> input_vec = {
274 //y0x0 y0x1 y1x0 y1x1
275 /*b0f0*/0.1f, -0.1f, 0.9f, 1.5f,
276 /*b0f1*/0.2f, 0.2f, -10.f, 5.2f,
277 /*b0f2*/0.2f, 0.2f, -10.f, 5.2f,
279 /*b1f0*/3.f, 0.5f, 7.f, 12.f,
280 /*b1f1*/4.f, 0.5f, 8.f, 8.2f,
281 /*b1f2*/0.2f, 0.2f, -10.f, 5.2f
283 set_values(input, input_vec);
285 float expected_max_values[12] = {
286 0.689974481f, //b=0, f=0, x=0
287 0.832018385f, //b=0, f=0, x=1
289 0.999962831f, //b=0, f=1, x=0
290 0.993307149f, //b=0, f=1, x=1
292 0.999962831f, //b=0, f=2, x=0
293 0.993307149f, //b=0, f=2, x=1
295 0.98201379f, //b=1, f=0, x=0
296 0.99998987f, //b=1, f=0, x=1
298 0.98201379f, //b=1, f=1, x=0
299 0.999547378f, //b=1, f=1, x=1
301 0.999962831f, //b=1, f=2, x=0
302 0.993307149f //b=1, f=2, x=1
305 network network(engine, topology);
307 network.set_input_data("input", input);
308 auto outputs = network.execute();
310 EXPECT_EQ(outputs.size(), size_t(1));
311 EXPECT_EQ(outputs.begin()->first, "softmax");
313 auto output = outputs.at("softmax").get_memory();
314 auto output_ptr = output.pointer<float>();
315 float out_buffer[buf_size];
316 for (uint32_t i = 0; i < buf_size; i++)
318 out_buffer[i] = get_value<float>(output_ptr, i);
322 float expected_sum = 1.0f;
323 int max_value_buffer_index = 0;
324 for (uint32_t i = 0; i < batch_num; i++) //this for loops will sum results in a batch per feature, we expect that: sum = 1.0f
326 for (uint32_t l = 0; l < feature_num; l++)
328 for (uint32_t k = 0; k < x_size; k++)
331 for (uint32_t j = 0; j < y_size; j++)
333 int index = i * feature_num * x_size * y_size +
334 l * x_size * y_size +
338 if (out_buffer[index] >= temp_max)
340 temp_max = out_buffer[index];
343 sum += out_buffer[index];
345 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[max_value_buffer_index]));
347 max_value_buffer_index++;
349 EXPECT_EQ(true, are_equal(sum, expected_sum));
356 TEST(softmax_gpu_bfyx_f32, normalize_f) {
358 static const int32_t x_size = 2, y_size = 2, feature_num = 3,
359 batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
360 const auto& engine = get_test_engine();
362 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
364 topology.add(input_layout("input", input.get_layout()));
365 topology.add(softmax("softmax", "input", softmax::normalize_f));
367 vector<float> input_vec = {
368 //y0x0 y0x1 y1x0 y1x1
369 /*b0f0*/0.1f, -0.1f, 0.9f, 1.5f,
370 /*b0f1*/0.2f, 0.2f, -10.f, 5.2f,
371 /*b0f2*/0.2f, 0.2f, -10.f, 5.2f,
373 /*b1f0*/3.f, 0.5f, 7.f, 12.f,
374 /*b1f1*/4.f, 0.5f, 8.f, 8.2f,
375 /*b1f2*/0.2f, 0.2f, -10.f, 5.2f
377 set_values(input, input_vec);
379 float expected_max_values[8] = {
380 0.344253346f, //b=0, y=0, x=0
381 0.364854551f, //b=0, y=0, x=1
383 0.999963085f, //b=0, y=1, x=0
384 0.493894592f, //b=0, y=1, x=1
386 0.719294981f, //b=1, y=0, x=0
387 0.364854551f, //b=1, y=0, x=1
389 0.73105857f, //b=1, y=1, x=0
390 0.977054322f //b=1, y=1, x=1
393 network network(engine, topology);
395 network.set_input_data("input", input);
396 auto outputs = network.execute();
398 EXPECT_EQ(outputs.size(), size_t(1));
399 EXPECT_EQ(outputs.begin()->first, "softmax");
401 auto output = outputs.at("softmax").get_memory();
402 auto output_ptr = output.pointer<float>();
403 float out_buffer[buf_size];
404 for (uint32_t i = 0; i < buf_size; i++)
406 out_buffer[i] = get_value<float>(output_ptr, i);
410 float expected_sum = 1.0f;
411 int max_value_buffer_index = 0;
412 for (uint32_t i = 0; i < batch_num; i++) //this for loops will sum results in a batch per feature, we expect that: sum = 1.0f
414 for (uint32_t j = 0; j < y_size; j++)
416 for (uint32_t k = 0; k < x_size; k++)
419 for (uint32_t l = 0; l < feature_num; l++)
421 int index = i * feature_num * x_size * y_size +
422 l * x_size * y_size +
426 if (out_buffer[index] >= temp_max)
428 temp_max = out_buffer[index];
431 sum += out_buffer[index];
433 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[max_value_buffer_index]));
435 max_value_buffer_index++;
437 EXPECT_EQ(true, are_equal(sum, expected_sum));
444 TEST(softmax_gpu_yxfb_f32, normalize_f) {
446 static const int32_t x_size = 1, y_size = 2, feature_num = 1,
447 batch_num = 12, buf_size = x_size*y_size * batch_num * feature_num;
448 const auto& engine = get_test_engine();
450 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ batch_num, feature_num, y_size , x_size } });
452 topology.add(input_layout("input", input.get_layout()));
453 topology.add(softmax("softmax", "input", softmax::normalize_fyx));
455 set_values(input, { //yxfb
456 //f0b0 f0b1 f0b2 f0b3 f0b4 f0b5 f0b6 f0b7 f0b8 f0b9 f0b10 f0b11
457 /*y0x0*/ 0.1f, -0.1f, 0.9f, 1.5f, 0.15f, -0.01f, 0.19f, 0.45f, 0.41f, -0.12f, 0.39f, 0.65f,
458 /*y1x0*/ 0.2f, 0.2f, -10.f, 5.2f, 0.01f, 0.015f, 0.29f, 0.05f, 0.41f, -0.31f, 0.29f, 1.35f
461 float expected_max_values[batch_num * feature_num * x_size] = {
476 network network(engine, topology);
478 network.set_input_data("input", input);
479 auto outputs = network.execute();
481 EXPECT_EQ(outputs.size(), size_t(1));
482 EXPECT_EQ(outputs.begin()->first, "softmax");
484 auto output = outputs.at("softmax").get_memory();
485 auto output_ptr = output.pointer<float>();
486 float out_buffer[buf_size];
487 for (uint32_t i = 0; i < buf_size; i++)
489 out_buffer[i] = get_value<float>(output_ptr, i);
493 float expected_sum = 1.0f;
497 for (uint32_t b = 0; b < batch_num; b++)
499 for (uint32_t f = 0; f < feature_num; f++)
501 for (uint32_t x = 0; x < x_size; x++)
504 for (uint32_t y = 0; y < y_size; y++)
506 int index = b + y * batch_num + f * feature_num + x * x_size;
507 if (out_buffer[index] >= temp_max)
509 temp_max = out_buffer[index];
511 sum += out_buffer[index];
513 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[b * feature_num * x_size + f * x_size + x]));
515 EXPECT_EQ(true, are_equal(sum, expected_sum));
522 TEST(softmax_gpu_bfzyx_f32, normalize_z) {
524 static const int32_t x_size = 2, y_size = 2, z_size = 2, feature_num = 3,
525 batch_num = 2, buf_size = x_size *y_size * z_size * batch_num * feature_num;
526 const auto& engine = get_test_engine();
528 auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ batch_num, feature_num, x_size , y_size, z_size } });
530 topology.add(input_layout("input", input.get_layout()));
531 topology.add(softmax("softmax", "input", softmax::normalize_z));
533 vector<float> input_vec = {
534 // z0y0x0 z0y0x1 z0y1x0 z0y1x1 z1y0x0 z1y0x1 z1y1x0 z1y1x1
535 /*b0f0*/0.1f, -0.1f, 0.9f, 1.5f, 0.2f, -0.2f, 0.9f, 2.5f,
536 /*b0f1*/0.2f, 0.2f, -10.f, 5.2f, 0.3f, 0.1f, -11.f, 6.2f,
537 /*b0f2*/0.2f, 0.2f, -10.f, 5.2f, 0.1f, 0.3f, -9.f, 4.2f,
539 /*b1f0*/3.f, 0.5f, 7.f, 12.f, 5.f, 0.1f, 6.f, 22.f,
540 /*b1f1*/4.f, 0.5f, 8.f, 8.2f, 2.2f, 0.3f, 6.f, 5.2f,
541 /*b1f2*/0.2f, 0.2f, -10.f, 5.2f, 1.2f, 0.3f, -12.f, 2.2f
543 set_values(input, input_vec);
545 float expected_max_values[24] = {
546 0.524979f, 0.524979f,
548 0.524979f, 0.524979f,
549 0.731059f, 0.731059f,
550 0.524979f, 0.524979f,
551 0.731059f, 0.731059f,
552 0.880797f, 0.598688f,
553 0.731059f, 0.999955f,
554 0.858149f, 0.549834f,
555 0.880797f, 0.952574f,
556 0.731059f, 0.524979f,
557 0.880797f, 0.952574f,
560 network network(engine, topology);
562 network.set_input_data("input", input);
563 auto outputs = network.execute();
565 EXPECT_EQ(outputs.size(), size_t(1));
566 EXPECT_EQ(outputs.begin()->first, "softmax");
568 auto output = outputs.at("softmax").get_memory();
569 auto output_ptr = output.pointer<float>();
570 float out_buffer[buf_size];
571 for (uint32_t i = 0; i < buf_size; i++)
573 out_buffer[i] = get_value<float>(output_ptr, i);
577 float expected_sum = 1.0f;
578 int max_value_buffer_index = 0;
579 for (uint32_t i = 0; i < batch_num; i++)
581 for (uint32_t l = 0; l < feature_num; l++)
583 for (uint32_t j = 0; j < y_size; j++)
585 for (uint32_t k = 0; k < x_size; k++)
588 for (uint32_t m = 0; m < z_size; m++)
590 int index = i * feature_num * x_size * y_size * z_size +
591 l * x_size * y_size * z_size +
592 m * x_size * y_size +
596 if (out_buffer[index] >= temp_max)
598 temp_max = out_buffer[index];
601 sum += out_buffer[index];
603 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[max_value_buffer_index]));
605 max_value_buffer_index++;
606 EXPECT_EQ(true, are_equal(sum, expected_sum));
614 TEST(softmax_gpu_bfyx_f32, normalize_all) {
616 static const int32_t x_size = 2, y_size = 2, feature_num = 3,
617 batch_num = 2, buf_size = x_size * y_size * batch_num * feature_num;
618 const auto& engine = get_test_engine();
620 auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {batch_num, feature_num, x_size, y_size}});
622 topology.add(input_layout("input", input.get_layout()));
623 topology.add(softmax("softmax", "input", softmax::normalize_all));
625 set_values(input, {//bfyx
626 // y0x0 y0x1 y1x0 y1x1
627 /*b0f0*/ 0.1f, -0.1f, 0.9f, 1.5f,
628 /*b0f1*/ 0.2f, 0.2f, -10.f, 5.2f,
629 /*b0f2*/ 0.2f, 0.2f, -10.f, 5.2f,
630 /*b1f0*/ 3.f, 0.5f, 7.f, 12.f,
631 /*b1f1*/ 4.f, 0.5f, 8.f, 8.2f,
632 /*b1f2*/ 0.2f, 0.2f, -10.f, 5.2f});
634 network network(engine, topology);
636 network.set_input_data("input", input);
637 auto outputs = network.execute();
639 EXPECT_EQ(outputs.size(), size_t(1));
640 EXPECT_EQ(outputs.begin()->first, "softmax");
642 auto output = outputs.at("softmax").get_memory();
643 auto output_ptr = output.pointer<float>();
645 float expected_sum = 1.0f;
646 for (uint32_t i = 0; i < buf_size; i++) {
647 sum += get_value<float>(output_ptr, i);
649 EXPECT_EQ(true, are_equal(sum, expected_sum));
652 TEST(softmax_gpu_yxfb_f32, normalize_all) {
654 static const int32_t x_size = 2, y_size = 2, feature_num = 3,
655 batch_num = 2, buf_size = x_size * y_size * batch_num * feature_num;
656 const auto& engine = get_test_engine();
658 auto input = memory::allocate(engine, {data_types::f32, format::yxfb, {y_size, x_size, feature_num, batch_num}});
660 topology.add(input_layout("input", input.get_layout()));
661 topology.add(softmax("softmax", "input", softmax::normalize_all));
663 set_values(input, {//yxfb
664 // f0b0 f0b1 f1b0 f1b1
665 /*y0x0*/ 0.1f, -0.1f, 0.9f, 1.5f,
666 /*y0x1*/ 0.2f, 0.2f, -10.f, 5.2f,
667 /*y0x2*/ 0.2f, 0.2f, -10.f, 5.2f,
668 /*y1x0*/ 3.f, 0.5f, 7.f, 12.f,
669 /*y1x1*/ 4.f, 0.5f, 8.f, 8.2f,
670 /*y1x2*/ 0.2f, 0.2f, -10.f, 5.2f});
672 network network(engine, topology);
674 network.set_input_data("input", input);
675 auto outputs = network.execute();
677 EXPECT_EQ(outputs.size(), size_t(1));
678 EXPECT_EQ(outputs.begin()->first, "softmax");
680 auto output = outputs.at("softmax").get_memory();
681 auto output_ptr = output.pointer<float>();
683 float expected_sum = 1.0f;
684 for (uint32_t i = 0; i < buf_size; i++) {
685 sum += get_value<float>(output_ptr, i);
687 EXPECT_EQ(true, are_equal(sum, expected_sum));
690 TEST(softmax_gpu_bfzyx_f32, normalize_all) {
692 static const int32_t x_size = 2, y_size = 2, z_size = 2, feature_num = 3,
693 batch_num = 2, buf_size = x_size * y_size * z_size * batch_num * feature_num;
694 const auto& engine = get_test_engine();
696 auto input = memory::allocate(engine, {data_types::f32, format::bfzyx, {batch_num, feature_num, x_size, y_size, z_size}});
698 topology.add(input_layout("input", input.get_layout()));
699 topology.add(softmax("softmax", "input", softmax::normalize_all));
701 set_values(input, {// z0y0x0 z0y0x1 z0y1x0 z0y1x1 z1y0x0 z1y0x1 z1y1x0 z1y1x1
702 /*b0f0*/ 0.1f, -0.1f, 0.9f, 1.5f, 0.2f, -0.2f, 0.9f, 2.5f,
703 /*b0f1*/ 0.2f, 0.2f, -10.f, 5.2f, 0.3f, 0.1f, -11.f, 6.2f,
704 /*b0f2*/ 0.2f, 0.2f, -10.f, 5.2f, 0.1f, 0.3f, -9.f, 4.2f,
706 /*b1f0*/ 3.f, 0.5f, 7.f, 12.f, 5.f, 0.1f, 6.f, 22.f,
707 /*b1f1*/ 4.f, 0.5f, 8.f, 8.2f, 2.2f, 0.3f, 6.f, 5.2f,
708 /*b1f2*/ 0.2f, 0.2f, -10.f, 5.2f, 1.2f, 0.3f, -12.f, 2.2f});
709 network network(engine, topology);
711 network.set_input_data("input", input);
712 auto outputs = network.execute();
714 EXPECT_EQ(outputs.size(), size_t(1));
715 EXPECT_EQ(outputs.begin()->first, "softmax");
717 auto output = outputs.at("softmax").get_memory();
718 auto output_ptr = output.pointer<float>();
720 float expected_sum = 1.0f;
721 for (uint32_t i = 0; i < buf_size; i++) {
722 sum += get_value<float>(output_ptr, i);
724 EXPECT_EQ(true, are_equal(sum, expected_sum));
727 TEST(softmax_gpu_bfyx_f16, normalize_all) {
729 static const int32_t x_size = 2, y_size = 2, feature_num = 3,
730 batch_num = 2, buf_size = x_size * y_size * batch_num * feature_num;
731 const auto& engine = get_test_engine();
733 auto input = memory::allocate(engine, {data_types::f16, format::bfyx, {batch_num, feature_num, x_size, y_size}});
735 topology.add(input_layout("input", input.get_layout()));
736 topology.add(softmax("softmax", "input", softmax::normalize_all));
738 set_values(input, {//bfyx
739 // y0x0 y0x1 y1x0 y1x1
740 /*b0f0*/ FLOAT16(0.1f), FLOAT16(-0.1f), FLOAT16(0.9f), FLOAT16(1.5f),
741 /*b0f1*/ FLOAT16(0.2f), FLOAT16(0.2f), FLOAT16(-10.f), FLOAT16(5.2f),
742 /*b0f2*/ FLOAT16(0.2f), FLOAT16(0.2f), FLOAT16(-10.f), FLOAT16(5.2f),
743 /*b1f0*/ FLOAT16(3.f), FLOAT16(0.5f), FLOAT16(7.f), FLOAT16(12.f),
744 /*b1f1*/ FLOAT16(4.f), FLOAT16(0.5f), FLOAT16(8.f), FLOAT16(8.2f),
745 /*b1f2*/ FLOAT16(0.2f), FLOAT16(0.2f), FLOAT16(-10.f), FLOAT16(5.2f)});
747 network network(engine, topology);
749 network.set_input_data("input", input);
750 auto outputs = network.execute();
752 EXPECT_EQ(outputs.size(), size_t(1));
753 EXPECT_EQ(outputs.begin()->first, "softmax");
755 auto output = outputs.at("softmax").get_memory();
756 auto output_ptr = output.pointer<uint16_t>();
758 float expected_sum = 1.0f;
759 for (uint32_t i = 0; i < buf_size; i++) {
760 sum += float16_to_float32(get_value<uint16_t>(output_ptr, i));
762 ASSERT_NEAR(sum, expected_sum, 0.001);
765 TEST(softmax_gpu_yxfb_f16, normalize_all) {
767 static const int32_t x_size = 2, y_size = 2, feature_num = 3,
768 batch_num = 2, buf_size = x_size * y_size * batch_num * feature_num;
769 const auto& engine = get_test_engine();
771 auto input = memory::allocate(engine, {data_types::f16, format::yxfb, {y_size, x_size, feature_num, batch_num}});
773 topology.add(input_layout("input", input.get_layout()));
774 topology.add(softmax("softmax", "input", softmax::normalize_all));
776 set_values(input, {//yxfb
777 // f0b0 f0b1 f1b0 f1b1
778 /*y0x0*/ FLOAT16(0.1f), FLOAT16(-0.1f), FLOAT16(0.9f), FLOAT16(1.5f),
779 /*y0x1*/ FLOAT16(0.2f), FLOAT16(0.2f), FLOAT16(-10.f), FLOAT16(5.2f),
780 /*y0x2*/ FLOAT16(0.2f), FLOAT16(0.2f), FLOAT16(-10.f), FLOAT16(5.2f),
781 /*y1x0*/ FLOAT16(3.f), FLOAT16(0.5f), FLOAT16(7.f), FLOAT16(12.f),
782 /*y1x1*/ FLOAT16(4.f), FLOAT16(0.5f), FLOAT16(8.f), FLOAT16(8.2f),
783 /*y1x2*/ FLOAT16(0.2f), FLOAT16(0.2f), FLOAT16(-10.f), FLOAT16(5.2f)});
785 network network(engine, topology);
787 network.set_input_data("input", input);
788 auto outputs = network.execute();
790 EXPECT_EQ(outputs.size(), size_t(1));
791 EXPECT_EQ(outputs.begin()->first, "softmax");
793 auto output = outputs.at("softmax").get_memory();
794 auto output_ptr = output.pointer<uint16_t>();
796 float expected_sum = 1.0f;
797 for (uint32_t i = 0; i < buf_size; i++) {
798 sum += float16_to_float32(get_value<uint16_t>(output_ptr, i));
800 ASSERT_NEAR(sum, expected_sum, 0.001);
803 TEST(softmax_gpu_bfzyx_f16, normalize_all) {
805 static const int32_t x_size = 2, y_size = 2, z_size = 2, feature_num = 3,
806 batch_num = 2, buf_size = x_size * y_size * z_size * batch_num * feature_num;
807 const auto& engine = get_test_engine();
809 auto input = memory::allocate(engine, {data_types::f16, format::bfzyx, {batch_num, feature_num, x_size, y_size, z_size}});
811 topology.add(input_layout("input", input.get_layout()));
812 topology.add(softmax("softmax", "input", softmax::normalize_all));
814 set_values(input, {// z0y0x0 z0y0x1 z0y1x0 z0y1x1 z1y0x0 z1y0x1 z1y1x0 z1y1x1
815 /*b0f0*/ FLOAT16(0.1f), FLOAT16(-0.1f), FLOAT16(0.9f), FLOAT16(1.5f), FLOAT16(0.2f), FLOAT16(-0.2f), FLOAT16(0.9f), FLOAT16(2.5f),
816 /*b0f1*/ FLOAT16(0.2f), FLOAT16(0.2f), FLOAT16(-10.f), FLOAT16(5.2f), FLOAT16(0.3f), FLOAT16(0.1f), FLOAT16(-11.f), FLOAT16(6.2f),
817 /*b0f2*/ FLOAT16(0.2f), FLOAT16(0.2f), FLOAT16(-10.f), FLOAT16(5.2f), FLOAT16(0.1f), FLOAT16(0.3f), FLOAT16(-9.f), FLOAT16(4.2f),
819 /*b1f0*/ FLOAT16(3.f), FLOAT16(0.5f), FLOAT16(7.f), FLOAT16(12.f), FLOAT16(5.f), FLOAT16(0.1f), FLOAT16(6.f), FLOAT16(22.f),
820 /*b1f1*/ FLOAT16(4.f), FLOAT16(0.5f), FLOAT16(8.f), FLOAT16(8.2f), FLOAT16(2.2f), FLOAT16(0.3f), FLOAT16(6.f), FLOAT16(5.2f),
821 /*b1f2*/ FLOAT16(0.2f), FLOAT16(0.2f), FLOAT16(-10.f), FLOAT16(5.2f), FLOAT16(1.2f), FLOAT16(0.3f), FLOAT16(-12.f), FLOAT16(2.2f)});
822 network network(engine, topology);
824 network.set_input_data("input", input);
825 auto outputs = network.execute();
827 EXPECT_EQ(outputs.size(), size_t(1));
828 EXPECT_EQ(outputs.begin()->first, "softmax");
830 auto output = outputs.at("softmax").get_memory();
831 auto output_ptr = output.pointer<uint16_t>();
833 float expected_sum = 1.0f;
834 for (uint32_t i = 0; i < buf_size; i++) {
835 sum += float16_to_float32(get_value<uint16_t>(output_ptr, i));
837 ASSERT_NEAR(sum, expected_sum, 0.001);
840 //////////////////////////////////////////////////////////////////////////////
842 // Exhaustive Negative Matrix tests //
844 //////////////////////////////////////////////////////////////////////////////
847 //TEST(NegativeSoftmaxTest, DISABLED_TestAll) {
850 //////////////////////////////////////////////////////////////////////////////
852 // Exhaustive Positive Matrix tests //
854 //////////////////////////////////////////////////////////////////////////////
856 using namespace cldnn;
858 class softmax_test : public tests::generic_test
862 softmax_test() : tests::generic_test()
866 virtual void SetUp() override
868 max_ulps_diff_allowed = 6;
871 static void TearDownTestCase()
873 for (auto generic_params : all_generic_params)
875 delete generic_params;
878 all_layer_params.clear();
881 static std::vector<std::shared_ptr<cldnn::primitive>> generate_specific_test_params()
883 all_layer_params.emplace_back(new softmax("softmax", "input0", softmax::normalize_f));
885 //The test checks only valid combinations.
886 //TODO: add more combinations.
888 return all_layer_params;
891 static std::vector<tests::test_params*> generate_generic_test_params()
893 return generic_test::generate_generic_test_params(all_generic_params);
896 virtual bool is_format_supported(cldnn::format format) override
899 format == cldnn::format::yxfb ||
900 format == cldnn::format::bfyx;
903 template<typename Type>
904 memory generate_reference_typed(const std::vector<memory> & inputs)
906 assert(inputs.size() == 1);
907 const memory & input = inputs[0];
910 auto output = memory::allocate(engine, cldnn::layout(input.get_layout().data_type, input.get_layout().format, input.get_layout().size));
912 // const auto params = static_cast<cldnn::softmax *>(layer_parmas);
914 const auto in0_mem = input.pointer<Type>();
915 auto out_mem = output.pointer<Type>();
917 const int in0_b = input.get_layout().size.sizes()[0];
918 const int in0_f = input.get_layout().size.sizes()[1];
919 const int in0_h = input.get_layout().size.sizes()[3];
920 const int in0_w = input.get_layout().size.sizes()[2];
922 // const int out_b = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[0];
923 // const int out_f = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[1];
924 // const int out_h = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[2];
925 // const int out_w = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[3];
927 // assert(in0_b == out_b);
928 // assert(in0_f == out_f);
929 // assert(in0_h == out_h);
930 // assert(in0_w == out_w);
932 std::vector<float> cached_exp_vals;
933 cached_exp_vals.resize(in0_f);
935 const auto input_desc = get_linear_memory_desc(input.get_layout());
937 for (int n = 0; n < in0_b; ++n)
938 for (int y = 0; y < in0_h; ++y)
939 for (int x = 0; x < in0_w; ++x)
941 float max_val = -std::numeric_limits<float>::infinity();
943 for (int c = 0; c < in0_f; ++c)
945 const size_t in0_idx = get_linear_index(input.get_layout(), n, c, y, x, input_desc);
947 max_val = std::max(max_val, static_cast<float>(in0_mem[in0_idx]));
952 for (int c = 0; c < in0_f; ++c)
954 const size_t in0_idx = get_linear_index(input.get_layout(), n, c, y, x, input_desc);
956 float tmp = static_cast<float>((Type)std::exp(static_cast<float>(in0_mem[in0_idx]) - max_val));
958 cached_exp_vals[c] = tmp;
961 for (int c = 0; c < in0_f; ++c)
963 const size_t out_idx = get_linear_index(output.get_layout(), n, c, y, x, input_desc);
964 out_mem[out_idx] = (Type)(cached_exp_vals[c] / Z);
971 virtual memory generate_reference(const std::vector<memory> & inputs) override
973 if (generic_params->data_type == data_types::f32)
975 return generate_reference_typed<float>(inputs);
979 return generate_reference_typed<FLOAT16>(inputs);
983 static std::string custom_param_name(const ::testing::TestParamInfo<std::tuple<test_params*, std::shared_ptr<cldnn::primitive>>>& info)
985 std::stringstream res;
987 const auto & p = std::get<0>(info.param);
989 assert (p->data_type == data_types::f32 ||
990 p->data_type == data_types::f16);
993 << "_" << (p->data_type == data_types::f32 ? "f32" : "f16");
995 for (unsigned i = 0; i < p->input_layouts.size(); ++i)
997 const auto chans = format::traits(p->fmt).order;
999 res << "_" << "Input" << i;
1000 for (unsigned int j = 0; j < p->input_layouts[i].size.sizes(p->fmt).size(); ++j)
1002 res << chans[j] << p->input_layouts[i].size.sizes(p->fmt)[j];
1011 static std::vector<tests::test_params*> all_generic_params;
1012 static std::vector<std::shared_ptr<cldnn::primitive>> all_layer_params;
1016 std::vector<std::shared_ptr<cldnn::primitive>> softmax_test::all_layer_params = {};
1017 std::vector<tests::test_params*> softmax_test::all_generic_params = {};
1019 TEST_P(softmax_test, SOFTMAX)
1024 INSTANTIATE_TEST_CASE_P(DISABLED_SOFTMAX,
1026 ::testing::Combine(::testing::ValuesIn(softmax_test::generate_generic_test_params()), ::testing::ValuesIn(softmax_test::generate_specific_test_params())),
1027 softmax_test::custom_param_name);