2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 #include <gtest/gtest.h>
18 #include "api/CPP/memory.hpp"
19 #include <api/CPP/input_layout.hpp>
20 #include "api/CPP/softmax.hpp"
21 #include <api/CPP/topology.hpp>
22 #include <api/CPP/network.hpp>
23 #include <api/CPP/engine.hpp>
24 #include "test_utils/test_utils.h"
26 using namespace cldnn;
28 using namespace tests;
31 class softmax_gpu_xb_f32_test_fixture: public ::testing::Test {
34 output_x = 10, output_b = 2, // size of whole output buffer
35 input_x = 10, input_b = 2, // size of whole input buffer
36 in_size = input_x*input_b,
37 out_size = output_x*output_b;
40 float in_buffer[in_size];
41 float out_buffer[out_size];
42 float expected_buffer[out_size];
44 const cldnn::engine& engine;
47 //neural::primitive output = memory::allocate({ memory::format::xb_f32, {output_b, {{output_x}}, 1}});
49 softmax_gpu_xb_f32_test_fixture()
50 : engine(get_test_engine())
51 ,input(memory::allocate(engine, { data_types::f32, format::yxfb, { input_b, 1, input_x, 1}}))
54 void compare_out_buffer_with_expected() {
55 for(size_t i = 0; i < out_size; ++i) {
56 // does output have expected values
57 EXPECT_TRUE(are_equal(out_buffer[i], expected_buffer[i]))
58 << "At ["<< i << "] Expected : " << expected_buffer[i] << " actual : " << out_buffer[i];
62 void compare_out_buffer_with_expected_batch_wise() {
63 for(size_t b = 0; b < output_b; ++b) {
64 float batch_wise_sum = 0;
65 for(size_t x = 0; x < output_x; ++x) {
66 auto idx = b+x*output_b;
67 batch_wise_sum += out_buffer[idx];
68 // does output have expected values
69 EXPECT_TRUE(are_equal(out_buffer[idx], expected_buffer[idx]))
70 << "At ["<< idx << "] Expected : " << expected_buffer[idx] << " actual : " << out_buffer[idx];
72 // does it sum to 1 batch wise
73 EXPECT_TRUE(are_equal(batch_wise_sum, 1.0f))
74 << "Expected : " << 1.0f << " actual : " << batch_wise_sum;
79 TEST_F(softmax_gpu_xb_f32_test_fixture, input_same_values) {
80 // in_buffer filled with same value == 1.0f
81 for(uint32_t i = 0; i < out_size; ++i) {
83 expected_buffer[i] = 0.1f;
85 std::vector<float> in_b(std::begin(in_buffer), std::end(in_buffer));
87 set_values(input, in_b);
89 network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input")));
90 network.set_input_data("input", input);
92 auto outputs = network.execute();
93 EXPECT_EQ(outputs.size(), size_t(1));
94 EXPECT_EQ(outputs.begin()->first, "softmax");
96 auto output_prim = outputs.begin()->second.get_memory();
98 auto output_ptr = output_prim.pointer<float>();
99 for (uint32_t i = 0; i < out_size; i++)
101 out_buffer[i] = get_value<float>(output_ptr, i);
103 compare_out_buffer_with_expected();
106 TEST_F(softmax_gpu_xb_f32_test_fixture, input_same_values_batch_wise) {
107 // in_buffer filled with same value == 1..2 each batch accordingly (softmax can only xb_f32 )
108 for(size_t i = 0; i < output_x; ++i) {
109 for(size_t j = 0; j < output_b; ++j)
110 in_buffer[j+i*output_b] = (j+i*output_b) % 2 +1.0f;
113 std::vector<float> in_b(std::begin(in_buffer), std::end(in_buffer));
114 set_values(input, in_b);
115 // fill buffer with the expected 0.1f value
116 for(size_t i = 0; i < out_size; ++i)
117 expected_buffer[i] = 0.1f;
119 network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input")));
120 network.set_input_data("input", input);
122 auto outputs = network.execute();
123 EXPECT_EQ(outputs.size(), size_t(1));
124 EXPECT_EQ(outputs.begin()->first, "softmax");
126 auto output_prim = outputs.begin()->second.get_memory();
128 auto output_ptr = output_prim.pointer<float>();
129 for (uint32_t i = 0; i < out_size; i++)
131 out_buffer[i] = get_value<float>(output_ptr, i);
133 compare_out_buffer_with_expected_batch_wise();
136 TEST_F(softmax_gpu_xb_f32_test_fixture, values_batch_wise) {
138 float in_buf[in_size] = {
152 float exp_buf[out_size] = {
153 0.02569957f, 0.02569957f,
154 0.02569957f, 0.02569957f,
155 0.02569957f, 0.02569957f,
156 0.069858674f, 0.069858674f,
157 0.516189665f, 0.516189665f,
158 0.189895565f, 0.189895565f,
159 0.069858674f, 0.069858674f,
160 0.02569957f, 0.02569957f,
161 0.02569957f, 0.02569957f,
162 0.02569957f, 0.02569957f
166 std::vector<float> in_b(std::begin(in_buf), std::end(in_buf));
167 set_values(input, in_b);
168 std::copy(exp_buf, exp_buf+in_size, expected_buffer);
170 // out_buffer filled with non-signaling NaN
171 for(size_t i = 0; i < out_size; ++i)
174 network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input")));
175 network.set_input_data("input", input);
177 auto outputs = network.execute();
178 EXPECT_EQ(outputs.size(), size_t(1));
179 EXPECT_EQ(outputs.begin()->first, "softmax");
181 auto output_prim = outputs.begin()->second.get_memory();
183 auto output_ptr = output_prim.pointer<float>();
184 for (uint32_t i = 0; i < out_size; i++)
186 out_buffer[i] = get_value<float>(output_ptr, i);
188 compare_out_buffer_with_expected_batch_wise();
191 TEST(softmax_gpu_bfyx_f32, normalize_fyx) {
193 static const int32_t x_size = 2, y_size = 2, feature_num = 3,
194 batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
195 const auto& engine = get_test_engine();
197 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
199 topology.add(input_layout("input", input.get_layout()));
200 topology.add(softmax("softmax", "input"));
202 set_values(input, { //bfyx
203 //y0x0 y0x1 y1x0 y1x1
204 /*b0f0*/0.1f, -0.1f, 0.9f, 1.5f,
205 /*b0f1*/0.2f, 0.2f, -10.f, 5.2f,
206 /*b1f2*/0.2f, 0.2f, -10.f, 5.2f,
207 /*b1f0*/3.f, 0.5f, 7.f, 12.f,
208 /*b1f1*/4.f, 0.5f, 8.f, 8.2f,
209 /*b1f2*/0.2f, 0.2f, -10.f, 5.2f
212 float expected_max_values[2] = {
213 0.481618381f, 0.953259517f
216 network network(engine, topology);
218 network.set_input_data("input", input);
219 auto outputs = network.execute();
221 EXPECT_EQ(outputs.size(), size_t(1));
222 EXPECT_EQ(outputs.begin()->first, "softmax");
224 auto output = outputs.at("softmax").get_memory();
225 auto output_ptr = output.pointer<float>();
226 float out_buffer[buf_size];
227 for (uint32_t i = 0; i < buf_size; i++)
229 out_buffer[i] = get_value<float>(output_ptr, i);
233 float expected_sum = 1.0f;
236 int max_value_buffer_index = 0;
238 for (uint32_t i = 0; i < batch_num; i++) //this for loops will sum results in a batch per feature, we expect that: sum = 1.0f
240 for (uint32_t j = 0; j < y_size; j++)
242 for (uint32_t k = 0; k < x_size; k++)
244 for (uint32_t l = 0; l < feature_num; l++)
246 int index = i * feature_num * x_size * y_size + j * x_size + k + l * x_size * y_size;
247 sum += out_buffer[index];
248 if (out_buffer[index] >= temp_max)
250 temp_max = out_buffer[index];
256 EXPECT_EQ(true, are_equal(sum, expected_sum));
258 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[max_value_buffer_index]));
260 max_value_buffer_index++;
264 TEST(softmax_gpu_bfyx_f32, normalize_y) {
266 static const int32_t x_size = 2, y_size = 2, feature_num = 3,
267 batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
268 const auto& engine = get_test_engine();
270 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
272 topology.add(input_layout("input", input.get_layout()));
273 topology.add(softmax("softmax", "input", softmax::normalize_y));
275 vector<float> input_vec = {
276 //y0x0 y0x1 y1x0 y1x1
277 /*b0f0*/0.1f, -0.1f, 0.9f, 1.5f,
278 /*b0f1*/0.2f, 0.2f, -10.f, 5.2f,
279 /*b0f2*/0.2f, 0.2f, -10.f, 5.2f,
281 /*b1f0*/3.f, 0.5f, 7.f, 12.f,
282 /*b1f1*/4.f, 0.5f, 8.f, 8.2f,
283 /*b1f2*/0.2f, 0.2f, -10.f, 5.2f
285 set_values(input, input_vec);
287 float expected_max_values[12] = {
288 0.689974481f, //b=0, f=0, x=0
289 0.832018385f, //b=0, f=0, x=1
291 0.999962831f, //b=0, f=1, x=0
292 0.993307149f, //b=0, f=1, x=1
294 0.999962831f, //b=0, f=2, x=0
295 0.993307149f, //b=0, f=2, x=1
298 0.98201379f, //b=1, f=0, x=0
299 0.99998987f, //b=1, f=0, x=1
301 0.98201379f, //b=1, f=1, x=0
302 0.999547378f, //b=1, f=1, x=1
304 0.999962831f, //b=1, f=2, x=0
305 0.993307149f //b=1, f=2, x=1
308 network network(engine, topology);
310 network.set_input_data("input", input);
311 auto outputs = network.execute();
313 EXPECT_EQ(outputs.size(), size_t(1));
314 EXPECT_EQ(outputs.begin()->first, "softmax");
316 auto output = outputs.at("softmax").get_memory();
317 auto output_ptr = output.pointer<float>();
318 float out_buffer[buf_size];
319 for (uint32_t i = 0; i < buf_size; i++)
321 out_buffer[i] = get_value<float>(output_ptr, i);
325 float expected_sum = 1.0f;
326 int max_value_buffer_index = 0;
327 for (uint32_t i = 0; i < batch_num; i++) //this for loops will sum results in a batch per feature, we expect that: sum = 1.0f
329 for (uint32_t l = 0; l < feature_num; l++)
331 for (uint32_t k = 0; k < x_size; k++)
334 for (uint32_t j = 0; j < y_size; j++)
336 int index = i * feature_num * x_size * y_size +
337 l * x_size * y_size +
341 if (out_buffer[index] >= temp_max)
343 temp_max = out_buffer[index];
346 sum += out_buffer[index];
348 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[max_value_buffer_index]));
350 max_value_buffer_index++;
352 EXPECT_EQ(true, are_equal(sum, expected_sum));
359 TEST(softmax_gpu_bfyx_f32, normalize_f) {
361 static const int32_t x_size = 2, y_size = 2, feature_num = 3,
362 batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
363 const auto& engine = get_test_engine();
365 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
367 topology.add(input_layout("input", input.get_layout()));
368 topology.add(softmax("softmax", "input", softmax::normalize_f));
370 vector<float> input_vec = {
371 //y0x0 y0x1 y1x0 y1x1
372 /*b0f0*/0.1f, -0.1f, 0.9f, 1.5f,
373 /*b0f1*/0.2f, 0.2f, -10.f, 5.2f,
374 /*b0f2*/0.2f, 0.2f, -10.f, 5.2f,
376 /*b1f0*/3.f, 0.5f, 7.f, 12.f,
377 /*b1f1*/4.f, 0.5f, 8.f, 8.2f,
378 /*b1f2*/0.2f, 0.2f, -10.f, 5.2f
380 set_values(input, input_vec);
382 float expected_max_values[8] = {
383 0.344253346f, //b=0, y=0, x=0
384 0.364854551f, //b=0, y=0, x=1
386 0.999963085f, //b=0, y=1, x=0
387 0.493894592f, //b=0, y=1, x=1
389 0.719294981f, //b=1, y=0, x=0
390 0.364854551f, //b=1, y=0, x=1
392 0.73105857f, //b=1, y=1, x=0
393 0.977054322f //b=1, y=1, x=1
396 network network(engine, topology);
398 network.set_input_data("input", input);
399 auto outputs = network.execute();
401 EXPECT_EQ(outputs.size(), size_t(1));
402 EXPECT_EQ(outputs.begin()->first, "softmax");
404 auto output = outputs.at("softmax").get_memory();
405 auto output_ptr = output.pointer<float>();
406 float out_buffer[buf_size];
407 for (uint32_t i = 0; i < buf_size; i++)
409 out_buffer[i] = get_value<float>(output_ptr, i);
413 float expected_sum = 1.0f;
414 int max_value_buffer_index = 0;
415 for (uint32_t i = 0; i < batch_num; i++) //this for loops will sum results in a batch per feature, we expect that: sum = 1.0f
417 for (uint32_t j = 0; j < y_size; j++)
419 for (uint32_t k = 0; k < x_size; k++)
422 for (uint32_t l = 0; l < feature_num; l++)
424 int index = i * feature_num * x_size * y_size +
425 l * x_size * y_size +
429 if (out_buffer[index] >= temp_max)
431 temp_max = out_buffer[index];
434 sum += out_buffer[index];
436 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[max_value_buffer_index]));
438 max_value_buffer_index++;
440 EXPECT_EQ(true, are_equal(sum, expected_sum));
447 TEST(softmax_gpu_yxfb_f32, normalize_f) {
449 static const int32_t x_size = 1, y_size = 2, feature_num = 1,
450 batch_num = 12, buf_size = x_size*y_size * batch_num * feature_num;
451 const auto& engine = get_test_engine();
453 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ batch_num, feature_num, y_size , x_size } });
455 topology.add(input_layout("input", input.get_layout()));
456 topology.add(softmax("softmax", "input", softmax::normalize_fyx));
458 set_values(input, { //yxfb
459 //f0b0 f0b1 f0b2 f0b3 f0b4 f0b5 f0b6 f0b7 f0b8 f0b9 f0b10 f0b11
460 /*y0x0*/ 0.1f, -0.1f, 0.9f, 1.5f, 0.15f, -0.01f, 0.19f, 0.45f, 0.41f, -0.12f, 0.39f, 0.65f,
461 /*y1x0*/ 0.2f, 0.2f, -10.f, 5.2f, 0.01f, 0.015f, 0.29f, 0.05f, 0.41f, -0.31f, 0.29f, 1.35f
464 float expected_max_values[batch_num * feature_num * x_size] = {
479 network network(engine, topology);
481 network.set_input_data("input", input);
482 auto outputs = network.execute();
484 EXPECT_EQ(outputs.size(), size_t(1));
485 EXPECT_EQ(outputs.begin()->first, "softmax");
487 auto output = outputs.at("softmax").get_memory();
488 auto output_ptr = output.pointer<float>();
489 float out_buffer[buf_size];
490 for (uint32_t i = 0; i < buf_size; i++)
492 out_buffer[i] = get_value<float>(output_ptr, i);
496 float expected_sum = 1.0f;
500 for (uint32_t b = 0; b < batch_num; b++)
502 for (uint32_t f = 0; f < feature_num; f++)
504 for (uint32_t x = 0; x < x_size; x++)
507 for (uint32_t y = 0; y < y_size; y++)
509 int index = b + y * batch_num + f * feature_num + x * x_size;
510 if (out_buffer[index] >= temp_max)
512 temp_max = out_buffer[index];
514 sum += out_buffer[index];
516 EXPECT_EQ(true, are_equal(temp_max, expected_max_values[b * feature_num * x_size + f * x_size + x]));
518 EXPECT_EQ(true, are_equal(sum, expected_sum));
526 //////////////////////////////////////////////////////////////////////////////
528 // Exhaustive Negative Matrix tests //
530 //////////////////////////////////////////////////////////////////////////////
533 //TEST(NegativeSoftmaxTest, DISABLED_TestAll) {
536 //////////////////////////////////////////////////////////////////////////////
538 // Exhaustive Positive Matrix tests //
540 //////////////////////////////////////////////////////////////////////////////
542 using namespace cldnn;
544 class softmax_test : public tests::generic_test
548 softmax_test() : tests::generic_test()
552 virtual void SetUp() override
554 max_ulps_diff_allowed = 6;
557 static void TearDownTestCase()
559 for (auto generic_params : all_generic_params)
561 delete generic_params;
564 for (auto layer_params : all_layer_params)
570 static std::vector<cldnn::primitive*> generate_specific_test_params()
572 all_layer_params.push_back(new softmax("softmax", "input0", softmax::normalize_f));
574 //The test checks only valid combinations.
575 //TODO: add more combinations.
577 return all_layer_params;
580 static std::vector<tests::test_params*> generate_generic_test_params()
582 return generic_test::generate_generic_test_params(all_generic_params);
585 virtual bool is_format_supported(cldnn::format format) override
588 format == cldnn_format_type::cldnn_format_yxfb ||
589 format == cldnn_format_type::cldnn_format_bfyx;
592 template<typename Type>
593 memory generate_reference_typed(const std::vector<memory> & inputs)
595 assert(inputs.size() == 1);
596 const memory & input = inputs[0];
599 auto output = memory::allocate(engine, cldnn::layout(input.get_layout().data_type, input.get_layout().format, input.get_layout().size));
601 // const auto params = static_cast<cldnn::softmax *>(layer_parmas);
603 const auto in0_mem = input.pointer<Type>();
604 auto out_mem = output.pointer<Type>();
606 const int in0_b = input.get_layout().size.sizes()[0];
607 const int in0_f = input.get_layout().size.sizes()[1];
608 const int in0_h = input.get_layout().size.sizes()[3];
609 const int in0_w = input.get_layout().size.sizes()[2];
611 // const int out_b = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[0];
612 // const int out_f = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[1];
613 // const int out_h = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[2];
614 // const int out_w = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[3];
616 // assert(in0_b == out_b);
617 // assert(in0_f == out_f);
618 // assert(in0_h == out_h);
619 // assert(in0_w == out_w);
621 std::vector<float> cached_exp_vals;
622 cached_exp_vals.resize(in0_f);
624 const auto input_desc = get_linear_memory_desc(input.get_layout());
626 for (int n = 0; n < in0_b; ++n)
627 for (int y = 0; y < in0_h; ++y)
628 for (int x = 0; x < in0_w; ++x)
630 float max_val = -std::numeric_limits<float>::infinity();
632 for (int c = 0; c < in0_f; ++c)
634 const size_t in0_idx = get_linear_index(input.get_layout(), n, c, y, x, input_desc);
636 max_val = std::max(max_val, static_cast<float>(in0_mem[in0_idx]));
641 for (int c = 0; c < in0_f; ++c)
643 const size_t in0_idx = get_linear_index(input.get_layout(), n, c, y, x, input_desc);
645 float tmp = static_cast<float>((Type)std::exp(static_cast<float>(in0_mem[in0_idx]) - max_val));
647 cached_exp_vals[c] = tmp;
650 for (int c = 0; c < in0_f; ++c)
652 const size_t out_idx = get_linear_index(output.get_layout(), n, c, y, x, input_desc);
653 out_mem[out_idx] = (Type)(cached_exp_vals[c] / Z);
660 virtual memory generate_reference(const std::vector<memory> & inputs) override
662 if (generic_params->data_type == data_types::f32)
664 return generate_reference_typed<float>(inputs);
668 return generate_reference_typed<FLOAT16>(inputs);
672 static std::string custom_param_name(const ::testing::TestParamInfo<std::tuple<test_params*, cldnn::primitive*>>& info)
674 std::stringstream res;
676 const auto & p = std::get<0>(info.param);
678 assert (p->data_type == data_types::f32 ||
679 p->data_type == data_types::f16);
682 << "_" << (p->data_type == data_types::f32 ? "f32" : "f16");
684 for (unsigned i = 0; i < p->input_layouts.size(); ++i)
686 const auto chans = format::traits(p->fmt).order;
688 res << "_" << "Input" << i;
689 for (unsigned int j = 0; j < p->input_layouts[i].size.sizes(p->fmt).size(); ++j)
691 res << chans[j] << p->input_layouts[i].size.sizes(p->fmt)[j];
700 static std::vector<tests::test_params*> all_generic_params;
701 static std::vector<cldnn::primitive*> all_layer_params;
705 std::vector<cldnn::primitive*> softmax_test::all_layer_params = {};
706 std::vector<tests::test_params*> softmax_test::all_generic_params = {};
708 TEST_P(softmax_test, SOFTMAX)
713 INSTANTIATE_TEST_CASE_P(DISABLED_SOFTMAX,
715 ::testing::Combine(::testing::ValuesIn(softmax_test::generate_generic_test_params()), ::testing::ValuesIn(softmax_test::generate_specific_test_params())),
716 softmax_test::custom_param_name);