2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 #include <gtest/gtest.h>
18 #include "api/CPP/memory.hpp"
19 #include <api/CPP/input_layout.hpp>
20 #include "api/CPP/softmax.hpp"
21 #include <api/CPP/topology.hpp>
22 #include <api/CPP/network.hpp>
23 #include <api/CPP/engine.hpp>
24 #include "test_utils/test_utils.h"
26 using namespace cldnn;
28 using namespace tests;
31 class softmax_gpu_xb_f32_test_fixture: public ::testing::Test {
34 output_x = 10, output_b = 2, // size of whole output buffer
35 input_x = 10, input_b = 2, // size of whole input buffer
36 in_size = input_x*input_b,
37 out_size = output_x*output_b;
40 float in_buffer[in_size];
41 float out_buffer[out_size];
42 float expected_buffer[out_size];
46 //neural::primitive output = memory::allocate({ memory::format::xb_f32, {output_b, {{output_x}}, 1}});
48 softmax_gpu_xb_f32_test_fixture()
50 ,input(memory::allocate(engine, { data_types::f32, format::yxfb, { input_b, 1, input_x, 1}}))
53 void compare_out_buffer_with_expected() {
54 for(size_t i = 0; i < out_size; ++i) {
55 // does output have expected values
56 EXPECT_TRUE(are_equal(out_buffer[i], expected_buffer[i]))
57 << "At ["<< i << "] Expected : " << expected_buffer[i] << " actual : " << out_buffer[i];
61 void compare_out_buffer_with_expected_batch_wise() {
62 for(size_t b = 0; b < output_b; ++b) {
63 float batch_wise_sum = 0;
64 for(size_t x = 0; x < output_x; ++x) {
65 auto idx = b+x*output_b;
66 batch_wise_sum += out_buffer[idx];
67 // does output have expected values
68 EXPECT_TRUE(are_equal(out_buffer[idx], expected_buffer[idx]))
69 << "At ["<< idx << "] Expected : " << expected_buffer[idx] << " actual : " << out_buffer[idx];
71 // does it sum to 1 batch wise
72 EXPECT_TRUE(are_equal(batch_wise_sum, 1.0f))
73 << "Expected : " << 1.0f << " actual : " << batch_wise_sum;
78 TEST_F(softmax_gpu_xb_f32_test_fixture, input_same_values) {
79 // in_buffer filled with same value == 1.0f
80 for(uint32_t i = 0; i < out_size; ++i) {
82 expected_buffer[i] = 0.1f;
84 std::vector<float> in_b(std::begin(in_buffer), std::end(in_buffer));
86 set_values(input, in_b);
88 network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input")));
89 network.set_input_data("input", input);
91 auto outputs = network.execute();
92 EXPECT_EQ(outputs.size(), size_t(1));
93 EXPECT_EQ(outputs.begin()->first, "softmax");
95 auto output_prim = outputs.begin()->second.get_memory();
97 auto output_ptr = output_prim.pointer<float>();
98 for (uint32_t i = 0; i < out_size; i++)
100 out_buffer[i] = get_value<float>(output_ptr, i);
102 compare_out_buffer_with_expected();
105 TEST_F(softmax_gpu_xb_f32_test_fixture, input_same_values_batch_wise) {
106 // in_buffer filled with same value == 1..2 each batch accordingly (softmax can only xb_f32 )
107 for(size_t i = 0; i < output_x; ++i) {
108 for(size_t j = 0; j < output_b; ++j)
109 in_buffer[j+i*output_b] = (j+i*output_b) % 2 +1.0f;
112 std::vector<float> in_b(std::begin(in_buffer), std::end(in_buffer));
113 set_values(input, in_b);
114 // fill buffer with the expected 0.1f value
115 for(size_t i = 0; i < out_size; ++i)
116 expected_buffer[i] = 0.1f;
118 network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input")));
119 network.set_input_data("input", input);
121 auto outputs = network.execute();
122 EXPECT_EQ(outputs.size(), size_t(1));
123 EXPECT_EQ(outputs.begin()->first, "softmax");
125 auto output_prim = outputs.begin()->second.get_memory();
127 auto output_ptr = output_prim.pointer<float>();
128 for (uint32_t i = 0; i < out_size; i++)
130 out_buffer[i] = get_value<float>(output_ptr, i);
132 compare_out_buffer_with_expected_batch_wise();
135 TEST_F(softmax_gpu_xb_f32_test_fixture, values_batch_wise) {
137 float in_buf[in_size] = {
151 float exp_buf[out_size] = {
152 0.02569957f, 0.02569957f,
153 0.02569957f, 0.02569957f,
154 0.02569957f, 0.02569957f,
155 0.069858674f, 0.069858674f,
156 0.516189665f, 0.516189665f,
157 0.189895565f, 0.189895565f,
158 0.069858674f, 0.069858674f,
159 0.02569957f, 0.02569957f,
160 0.02569957f, 0.02569957f,
161 0.02569957f, 0.02569957f
165 std::vector<float> in_b(std::begin(in_buf), std::end(in_buf));
166 set_values(input, in_b);
167 std::copy(exp_buf, exp_buf+in_size, expected_buffer);
169 // out_buffer filled with non-signaling NaN
170 for(size_t i = 0; i < out_size; ++i)
173 network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input")));
174 network.set_input_data("input", input);
176 auto outputs = network.execute();
177 EXPECT_EQ(outputs.size(), size_t(1));
178 EXPECT_EQ(outputs.begin()->first, "softmax");
180 auto output_prim = outputs.begin()->second.get_memory();
182 auto output_ptr = output_prim.pointer<float>();
183 for (uint32_t i = 0; i < out_size; i++)
185 out_buffer[i] = get_value<float>(output_ptr, i);
187 compare_out_buffer_with_expected_batch_wise();
190 TEST(softmax_gpu_bfyx_f32, normalize_fyx) {
192 static const int32_t x_size = 2, y_size = 2, feature_num = 3,
193 batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
196 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
198 topology.add(input_layout("input", input.get_layout()));
199 topology.add(softmax("softmax", "input"));
201 set_values(input, { //bfyx
202 //y0x0 y0x1 y1x0 y1x1
203 /*b0f0*/0.1f, -0.1f, 0.9f, 1.5f,
204 /*b0f1*/0.2f, 0.2f, -10.f, 5.2f,
205 /*b1f2*/0.2f, 0.2f, -10.f, 5.2f,
206 /*b1f0*/3.f, 0.5f, 7.f, 12.f,
207 /*b1f1*/4.f, 0.5f, 8.f, 8.2f,
208 /*b1f2*/0.2f, 0.2f, -10.f, 5.2f
211 float expected_max_values[2] = {
212 0.481618381f, 0.953259517f
215 network network(engine, topology);
217 network.set_input_data("input", input);
218 auto outputs = network.execute();
220 EXPECT_EQ(outputs.size(), size_t(1));
221 EXPECT_EQ(outputs.begin()->first, "softmax");
223 auto output = outputs.at("softmax").get_memory();
224 auto output_ptr = output.pointer<float>();
225 float out_buffer[buf_size];
226 for (uint32_t i = 0; i < buf_size; i++)
228 out_buffer[i] = get_value<float>(output_ptr, i);
232 float expected_sum = 1.0f;
235 int max_value_buffer_index = 0;
237 for (uint32_t i = 0; i < batch_num; i++) //this for loops will sum results in a batch per feature, we expect that: sum = 1.0f
239 for (uint32_t j = 0; j < y_size; j++)
241 for (uint32_t k = 0; k < x_size; k++)
243 for (uint32_t l = 0; l < feature_num; l++)
245 int index = i * feature_num * x_size * y_size + j * x_size + k + l * x_size * y_size;
246 sum += out_buffer[index];
247 if (out_buffer[index] >= temp_max)
249 temp_max = out_buffer[index];
255 EXPECT_EQ(true, are_equal(sum, expected_sum));
257 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[max_value_buffer_index]));
259 max_value_buffer_index++;
263 TEST(softmax_gpu_bfyx_f32, normalize_y) {
265 static const int32_t x_size = 2, y_size = 2, feature_num = 3,
266 batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
269 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
271 topology.add(input_layout("input", input.get_layout()));
272 topology.add(softmax("softmax", "input", softmax::normalize_y));
274 vector<float> input_vec = {
275 //y0x0 y0x1 y1x0 y1x1
276 /*b0f0*/0.1f, -0.1f, 0.9f, 1.5f,
277 /*b0f1*/0.2f, 0.2f, -10.f, 5.2f,
278 /*b0f2*/0.2f, 0.2f, -10.f, 5.2f,
280 /*b1f0*/3.f, 0.5f, 7.f, 12.f,
281 /*b1f1*/4.f, 0.5f, 8.f, 8.2f,
282 /*b1f2*/0.2f, 0.2f, -10.f, 5.2f
284 set_values(input, input_vec);
286 float expected_max_values[12] = {
287 0.689974481f, //b=0, f=0, x=0
288 0.832018385f, //b=0, f=0, x=1
290 0.999962831f, //b=0, f=1, x=0
291 0.993307149f, //b=0, f=1, x=1
293 0.999962831f, //b=0, f=2, x=0
294 0.993307149f, //b=0, f=2, x=1
297 0.98201379f, //b=1, f=0, x=0
298 0.99998987f, //b=1, f=0, x=1
300 0.98201379f, //b=1, f=1, x=0
301 0.999547378f, //b=1, f=1, x=1
303 0.999962831f, //b=1, f=2, x=0
304 0.993307149f //b=1, f=2, x=1
307 network network(engine, topology);
309 network.set_input_data("input", input);
310 auto outputs = network.execute();
312 EXPECT_EQ(outputs.size(), size_t(1));
313 EXPECT_EQ(outputs.begin()->first, "softmax");
315 auto output = outputs.at("softmax").get_memory();
316 auto output_ptr = output.pointer<float>();
317 float out_buffer[buf_size];
318 for (uint32_t i = 0; i < buf_size; i++)
320 out_buffer[i] = get_value<float>(output_ptr, i);
324 float expected_sum = 1.0f;
325 int max_value_buffer_index = 0;
326 for (uint32_t i = 0; i < batch_num; i++) //this for loops will sum results in a batch per feature, we expect that: sum = 1.0f
328 for (uint32_t l = 0; l < feature_num; l++)
330 for (uint32_t k = 0; k < x_size; k++)
333 for (uint32_t j = 0; j < y_size; j++)
335 int index = i * feature_num * x_size * y_size +
336 l * x_size * y_size +
340 if (out_buffer[index] >= temp_max)
342 temp_max = out_buffer[index];
345 sum += out_buffer[index];
347 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[max_value_buffer_index]));
349 max_value_buffer_index++;
351 EXPECT_EQ(true, are_equal(sum, expected_sum));
358 TEST(softmax_gpu_bfyx_f32, normalize_f) {
360 static const int32_t x_size = 2, y_size = 2, feature_num = 3,
361 batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
364 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
366 topology.add(input_layout("input", input.get_layout()));
367 topology.add(softmax("softmax", "input", softmax::normalize_f));
369 vector<float> input_vec = {
370 //y0x0 y0x1 y1x0 y1x1
371 /*b0f0*/0.1f, -0.1f, 0.9f, 1.5f,
372 /*b0f1*/0.2f, 0.2f, -10.f, 5.2f,
373 /*b0f2*/0.2f, 0.2f, -10.f, 5.2f,
375 /*b1f0*/3.f, 0.5f, 7.f, 12.f,
376 /*b1f1*/4.f, 0.5f, 8.f, 8.2f,
377 /*b1f2*/0.2f, 0.2f, -10.f, 5.2f
379 set_values(input, input_vec);
381 float expected_max_values[8] = {
382 0.344253346f, //b=0, y=0, x=0
383 0.364854551f, //b=0, y=0, x=1
385 0.999963085f, //b=0, y=1, x=0
386 0.493894592f, //b=0, y=1, x=1
388 0.719294981f, //b=1, y=0, x=0
389 0.364854551f, //b=1, y=0, x=1
391 0.73105857f, //b=1, y=1, x=0
392 0.977054322f //b=1, y=1, x=1
395 network network(engine, topology);
397 network.set_input_data("input", input);
398 auto outputs = network.execute();
400 EXPECT_EQ(outputs.size(), size_t(1));
401 EXPECT_EQ(outputs.begin()->first, "softmax");
403 auto output = outputs.at("softmax").get_memory();
404 auto output_ptr = output.pointer<float>();
405 float out_buffer[buf_size];
406 for (uint32_t i = 0; i < buf_size; i++)
408 out_buffer[i] = get_value<float>(output_ptr, i);
412 float expected_sum = 1.0f;
413 int max_value_buffer_index = 0;
414 for (uint32_t i = 0; i < batch_num; i++) //this for loops will sum results in a batch per feature, we expect that: sum = 1.0f
416 for (uint32_t j = 0; j < y_size; j++)
418 for (uint32_t k = 0; k < x_size; k++)
421 for (uint32_t l = 0; l < feature_num; l++)
423 int index = i * feature_num * x_size * y_size +
424 l * x_size * y_size +
428 if (out_buffer[index] >= temp_max)
430 temp_max = out_buffer[index];
433 sum += out_buffer[index];
435 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[max_value_buffer_index]));
437 max_value_buffer_index++;
439 EXPECT_EQ(true, are_equal(sum, expected_sum));
446 TEST(softmax_gpu_yxfb_f32, normalize_f) {
448 static const int32_t x_size = 1, y_size = 2, feature_num = 1,
449 batch_num = 12, buf_size = x_size*y_size * batch_num * feature_num;
452 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ batch_num, feature_num, y_size , x_size } });
454 topology.add(input_layout("input", input.get_layout()));
455 topology.add(softmax("softmax", "input", softmax::normalize_fyx));
457 set_values(input, { //yxfb
458 //f0b0 f0b1 f0b2 f0b3 f0b4 f0b5 f0b6 f0b7 f0b8 f0b9 f0b10 f0b11
459 /*y0x0*/ 0.1f, -0.1f, 0.9f, 1.5f, 0.15f, -0.01f, 0.19f, 0.45f, 0.41f, -0.12f, 0.39f, 0.65f,
460 /*y1x0*/ 0.2f, 0.2f, -10.f, 5.2f, 0.01f, 0.015f, 0.29f, 0.05f, 0.41f, -0.31f, 0.29f, 1.35f
463 float expected_max_values[batch_num * feature_num * x_size] = {
478 network network(engine, topology);
480 network.set_input_data("input", input);
481 auto outputs = network.execute();
483 EXPECT_EQ(outputs.size(), size_t(1));
484 EXPECT_EQ(outputs.begin()->first, "softmax");
486 auto output = outputs.at("softmax").get_memory();
487 auto output_ptr = output.pointer<float>();
488 float out_buffer[buf_size];
489 for (uint32_t i = 0; i < buf_size; i++)
491 out_buffer[i] = get_value<float>(output_ptr, i);
495 float expected_sum = 1.0f;
499 for (uint32_t b = 0; b < batch_num; b++)
501 for (uint32_t f = 0; f < feature_num; f++)
503 for (uint32_t x = 0; x < x_size; x++)
506 for (uint32_t y = 0; y < y_size; y++)
508 int index = b + y * batch_num + f * feature_num + x * x_size;
509 if (out_buffer[index] >= temp_max)
511 temp_max = out_buffer[index];
513 sum += out_buffer[index];
515 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[b * feature_num * x_size + f * x_size + x]));
517 EXPECT_EQ(true, are_equal(sum, expected_sum));
525 //////////////////////////////////////////////////////////////////////////////
527 // Exhaustive Negative Matrix tests //
529 //////////////////////////////////////////////////////////////////////////////
532 //TEST(NegativeSoftmaxTest, DISABLED_TestAll) {
535 //////////////////////////////////////////////////////////////////////////////
537 // Exhaustive Positive Matrix tests //
539 //////////////////////////////////////////////////////////////////////////////
541 using namespace cldnn;
543 class softmax_test : public tests::generic_test
547 softmax_test() : tests::generic_test()
551 virtual void SetUp() override
553 max_ulps_diff_allowed = 6;
556 static void TearDownTestCase()
558 for (auto generic_params : all_generic_params)
560 delete generic_params;
563 for (auto layer_params : all_layer_params)
569 static std::vector<cldnn::primitive*> generate_specific_test_params()
571 all_layer_params.push_back(new softmax("softmax", "input0", softmax::normalize_f));
573 //The test checks only valid combinations.
574 //TODO: add more combinations.
576 return all_layer_params;
579 static std::vector<tests::test_params*> generate_generic_test_params()
581 return generic_test::generate_generic_test_params(all_generic_params);
584 virtual bool is_format_supported(cldnn::format format) override
587 format == cldnn_format_type::cldnn_format_yxfb ||
588 format == cldnn_format_type::cldnn_format_bfyx;
591 template<typename Type>
592 memory generate_reference_typed(const std::vector<memory> & inputs)
594 assert(inputs.size() == 1);
595 const memory & input = inputs[0];
598 auto output = memory::allocate(engine, cldnn::layout(input.get_layout().data_type, input.get_layout().format, input.get_layout().size));
600 // const auto params = static_cast<cldnn::softmax *>(layer_parmas);
602 const auto in0_mem = input.pointer<Type>();
603 auto out_mem = output.pointer<Type>();
605 const int in0_b = input.get_layout().size.sizes()[0];
606 const int in0_f = input.get_layout().size.sizes()[1];
607 const int in0_h = input.get_layout().size.sizes()[3];
608 const int in0_w = input.get_layout().size.sizes()[2];
610 // const int out_b = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[0];
611 // const int out_f = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[1];
612 // const int out_h = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[2];
613 // const int out_w = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[3];
615 // assert(in0_b == out_b);
616 // assert(in0_f == out_f);
617 // assert(in0_h == out_h);
618 // assert(in0_w == out_w);
620 std::vector<float> cached_exp_vals;
621 cached_exp_vals.resize(in0_f);
623 const auto input_desc = get_linear_memory_desc(input.get_layout());
625 for (int n = 0; n < in0_b; ++n)
626 for (int y = 0; y < in0_h; ++y)
627 for (int x = 0; x < in0_w; ++x)
629 float max_val = -std::numeric_limits<float>::infinity();
631 for (int c = 0; c < in0_f; ++c)
633 const size_t in0_idx = get_linear_index(input.get_layout(), n, c, y, x, input_desc);
635 max_val = std::max(max_val, static_cast<float>(in0_mem[in0_idx]));
640 for (int c = 0; c < in0_f; ++c)
642 const size_t in0_idx = get_linear_index(input.get_layout(), n, c, y, x, input_desc);
644 float tmp = static_cast<float>((Type)std::exp(static_cast<float>(in0_mem[in0_idx]) - max_val));
646 cached_exp_vals[c] = tmp;
649 for (int c = 0; c < in0_f; ++c)
651 const size_t out_idx = get_linear_index(output.get_layout(), n, c, y, x, input_desc);
652 out_mem[out_idx] = (Type)(cached_exp_vals[c] / Z);
659 virtual memory generate_reference(const std::vector<memory> & inputs) override
661 if (generic_params->data_type == data_types::f32)
663 return generate_reference_typed<float>(inputs);
667 return generate_reference_typed<FLOAT16>(inputs);
671 static std::string custom_param_name(const ::testing::TestParamInfo<std::tuple<test_params*, cldnn::primitive*>>& info)
673 std::stringstream res;
675 const auto & p = std::get<0>(info.param);
677 assert (p->data_type == data_types::f32 ||
678 p->data_type == data_types::f16);
681 << "_" << (p->data_type == data_types::f32 ? "f32" : "f16");
683 for (unsigned i = 0; i < p->input_layouts.size(); ++i)
685 const auto chans = format::traits(p->fmt).order;
687 res << "_" << "Input" << i;
688 for (unsigned int j = 0; j < p->input_layouts[i].size.sizes(p->fmt).size(); ++j)
690 res << chans[j] << p->input_layouts[i].size.sizes(p->fmt)[j];
699 static std::vector<tests::test_params*> all_generic_params;
700 static std::vector<cldnn::primitive*> all_layer_params;
704 std::vector<cldnn::primitive*> softmax_test::all_layer_params = {};
705 std::vector<tests::test_params*> softmax_test::all_generic_params = {};
707 TEST_P(softmax_test, SOFTMAX)
712 INSTANTIATE_TEST_CASE_P(DISABLED_SOFTMAX,
714 ::testing::Combine(::testing::ValuesIn(softmax_test::generate_generic_test_params()), ::testing::ValuesIn(softmax_test::generate_specific_test_params())),
715 softmax_test::custom_param_name);