2 // Copyright (c) 2019 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18 #include <gtest/gtest.h>
19 #include <api/input_layout.hpp>
20 #include "api/reduce.hpp"
21 #include <api/topology.hpp>
22 #include <api/network.hpp>
23 #include <api/engine.hpp>
24 #include "test_utils/test_utils.h"
25 #include <api/data.hpp>
26 #include "test_utils/float16.h"
28 using namespace cldnn;
29 using namespace tests;
31 TEST(reduce_gpu, common_bfyx) {
32 const auto& engine = get_test_engine();
33 auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, 1, 1}});
35 set_values(input, {1.0f});
38 topology.add(input_layout("input", input.get_layout()));
39 topology.add(reduce("reduce", "input", reduce_mode::sum, {cldnn::reduce::along_b}, 0));
41 network network(engine, topology);
43 network.set_input_data("input", input);
45 auto outputs = network.execute();
47 EXPECT_EQ(outputs.size(), size_t(1));
48 EXPECT_EQ(outputs.begin()->first, "reduce");
50 auto output = outputs.at("reduce").get_memory();
52 std::vector<float> ref_data = {1.0f};
54 auto output_ptr = output.pointer<float>();
56 for (size_t i = 0; i < ref_data.size(); ++i) {
57 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
61 TEST(reduce_gpu, common_bfyx_keepdims) {
62 const auto& engine = get_test_engine();
63 auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 3, 4, 1}});
65 set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f});
68 topology.add(input_layout("input", input.get_layout()));
69 topology.add(reduce("reduce", "input", reduce_mode::sum, {cldnn::reduce::along_x, cldnn::reduce::along_y}, 1));
71 network network(engine, topology);
73 network.set_input_data("input", input);
75 auto outputs = network.execute();
77 EXPECT_EQ(outputs.size(), size_t(1));
78 EXPECT_EQ(outputs.begin()->first, "reduce");
80 auto output = outputs.at("reduce").get_memory();
82 std::vector<float> ref_data = {6.0f, 22.0f, 38.0f};
84 auto output_ptr = output.pointer<float>();
86 for (size_t i = 0; i < ref_data.size(); ++i) {
87 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
91 TEST(reduce_gpu, regr_bfyx_keepdims) {
92 const auto& engine = get_test_engine();
93 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {1, 3, 2, 2} });
95 set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f });
98 topology.add(input_layout("input", input.get_layout()));
99 topology.add(reduce("reduce", "input", reduce_mode::sum, { cldnn::reduce::along_b, cldnn::reduce::along_x }, 1));
101 network network(engine, topology);
103 network.set_input_data("input", input);
105 auto outputs = network.execute();
107 EXPECT_EQ(outputs.size(), size_t(1));
108 EXPECT_EQ(outputs.begin()->first, "reduce");
110 auto output = outputs.at("reduce").get_memory();
112 std::vector<float> ref_data = { 1.0f, 5.0f, 9.0f, 13.0f, 17.0f, 21.0f };
114 auto output_ptr = output.pointer<float>();
116 for (size_t i = 0; i < ref_data.size(); ++i) {
117 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
121 TEST(reduce_gpu, common_bfzyx) {
122 const auto& engine = get_test_engine();
123 auto input = memory::allocate(engine, {data_types::f32, format::bfzyx, {1, 1, 1, 1, 1}});
125 set_values(input, {1.0f});
128 topology.add(input_layout("input", input.get_layout()));
129 topology.add(reduce("reduce", "input", reduce_mode::sum, {cldnn::reduce::along_b}, 0));
131 network network(engine, topology);
133 network.set_input_data("input", input);
135 auto outputs = network.execute();
137 EXPECT_EQ(outputs.size(), size_t(1));
138 EXPECT_EQ(outputs.begin()->first, "reduce");
140 auto output = outputs.at("reduce").get_memory();
142 std::vector<float> ref_data = {1.0f};
144 auto output_ptr = output.pointer<float>();
146 for (size_t i = 0; i < ref_data.size(); ++i) {
147 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
151 TEST(reduce_gpu, common_bfzyx_keepdims) {
152 const auto& engine = get_test_engine();
153 auto input = memory::allocate(engine, {data_types::f32, format::bfzyx, {1, 1, 1, 1, 1}});
155 set_values(input, {1.0f});
158 topology.add(input_layout("input", input.get_layout()));
159 topology.add(reduce("reduce", "input", reduce_mode::sum, {cldnn::reduce::along_b}, 1));
161 network network(engine, topology);
163 network.set_input_data("input", input);
165 auto outputs = network.execute();
167 EXPECT_EQ(outputs.size(), size_t(1));
168 EXPECT_EQ(outputs.begin()->first, "reduce");
170 auto output = outputs.at("reduce").get_memory();
172 std::vector<float> ref_data = {1.0f};
174 auto output_ptr = output.pointer<float>();
176 for (size_t i = 0; i < ref_data.size(); ++i) {
177 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
181 TEST(reduce_gpu, common_bfwzyx) {
182 const auto& engine = get_test_engine();
183 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, tensor(format::bfwzyx, {1, 3, 4, 1, 1, 1})});
185 set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f});
188 topology.add(input_layout("input", input.get_layout()));
189 topology.add(reduce("reduce", "input", reduce_mode::sum, {cldnn::reduce::along_w, cldnn::reduce::along_z, cldnn::reduce::along_y, cldnn::reduce::along_x}, 0));
191 network network(engine, topology);
193 network.set_input_data("input", input);
195 auto outputs = network.execute();
197 EXPECT_EQ(outputs.size(), size_t(1));
198 EXPECT_EQ(outputs.begin()->first, "reduce");
200 auto output = outputs.at("reduce").get_memory();
202 std::vector<float> ref_data = {6.0f, 22.0f, 38.0f};
204 auto output_ptr = output.pointer<float>();
206 for (size_t i = 0; i < ref_data.size(); ++i) {
207 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
211 TEST(reduce_gpu, common_bfwzyx_keepdims) {
212 const auto& engine = get_test_engine();
213 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, tensor(format::bfwzyx, {1, 3, 4, 1, 1, 1})});
215 set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f});
218 topology.add(input_layout("input", input.get_layout()));
219 topology.add(reduce("reduce", "input", reduce_mode::sum, {cldnn::reduce::along_f, cldnn::reduce::along_w, cldnn::reduce::along_z}, 1));
221 network network(engine, topology);
223 network.set_input_data("input", input);
225 auto outputs = network.execute();
227 EXPECT_EQ(outputs.size(), size_t(1));
228 EXPECT_EQ(outputs.begin()->first, "reduce");
230 auto output = outputs.at("reduce").get_memory();
232 std::vector<float> ref_data = {66.0f};
234 auto output_ptr = output.pointer<float>();
236 for (size_t i = 0; i < ref_data.size(); ++i) {
237 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
241 TEST(reduce_gpu, common_bfwzyx_max_keepdims) {
242 const auto& engine = get_test_engine();
243 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 4, 1, 1, 1}});
245 set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f,
246 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f});
249 topology.add(input_layout("input", input.get_layout()));
250 topology.add(reduce("reduce", "input", reduce_mode::max, {cldnn::reduce::along_b, cldnn::reduce::along_f}, 1));
252 network network(engine, topology);
254 network.set_input_data("input", input);
256 auto outputs = network.execute();
258 EXPECT_EQ(outputs.size(), size_t(1));
259 EXPECT_EQ(outputs.begin()->first, "reduce");
261 auto output = outputs.at("reduce").get_memory();
263 std::vector<float> ref_data = {20.0f, 21.0f, 22.0f, 23.0f};
265 auto output_ptr = output.pointer<float>();
267 for (size_t i = 0; i < ref_data.size(); ++i) {
268 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
272 TEST(reduce_gpu, common_bfwzyx_min) {
273 const auto& engine = get_test_engine();
274 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}});
276 set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f});
279 topology.add(input_layout("input", input.get_layout()));
280 topology.add(reduce("reduce", "input", reduce_mode::min, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 0));
282 network network(engine, topology);
284 network.set_input_data("input", input);
286 auto outputs = network.execute();
288 EXPECT_EQ(outputs.size(), size_t(1));
289 EXPECT_EQ(outputs.begin()->first, "reduce");
291 auto output = outputs.at("reduce").get_memory();
293 std::vector<float> ref_data = {0.0f, 3.0f};
295 auto output_ptr = output.pointer<float>();
297 for (size_t i = 0; i < ref_data.size(); ++i) {
298 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
302 TEST(reduce_gpu, common_bfwzyx_min_keepdims) {
303 const auto& engine = get_test_engine();
304 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}});
306 set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f});
309 topology.add(input_layout("input", input.get_layout()));
310 topology.add(reduce("reduce", "input", reduce_mode::min, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 1));
312 network network(engine, topology);
314 network.set_input_data("input", input);
316 auto outputs = network.execute();
318 EXPECT_EQ(outputs.size(), size_t(1));
319 EXPECT_EQ(outputs.begin()->first, "reduce");
321 auto output = outputs.at("reduce").get_memory();
323 std::vector<float> ref_data = {0.0f, 3.0f};
325 auto output_ptr = output.pointer<float>();
327 for (size_t i = 0; i < ref_data.size(); ++i) {
328 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
332 TEST(reduce_gpu, common_bfwzyx_mean) {
333 const auto& engine = get_test_engine();
334 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}});
336 set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f});
339 topology.add(input_layout("input", input.get_layout()));
340 topology.add(reduce("reduce", "input", reduce_mode::mean, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 0));
342 network network(engine, topology);
344 network.set_input_data("input", input);
346 auto outputs = network.execute();
348 EXPECT_EQ(outputs.size(), size_t(1));
349 EXPECT_EQ(outputs.begin()->first, "reduce");
351 auto output = outputs.at("reduce").get_memory();
353 std::vector<float> ref_data = {1.0f, 4.0f};
355 auto output_ptr = output.pointer<float>();
357 for (size_t i = 0; i < ref_data.size(); ++i) {
358 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
362 TEST(reduce_gpu, common_bfwzyx_mean_keepdims) {
363 const auto& engine = get_test_engine();
364 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}});
366 set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f});
369 topology.add(input_layout("input", input.get_layout()));
370 topology.add(reduce("reduce", "input", reduce_mode::mean, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 1));
372 network network(engine, topology);
374 network.set_input_data("input", input);
376 auto outputs = network.execute();
378 EXPECT_EQ(outputs.size(), size_t(1));
379 EXPECT_EQ(outputs.begin()->first, "reduce");
381 auto output = outputs.at("reduce").get_memory();
383 std::vector<float> ref_data = {1.0f, 4.0f};
385 auto output_ptr = output.pointer<float>();
387 for (size_t i = 0; i < ref_data.size(); ++i) {
388 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
392 TEST(reduce_gpu, common_bfwzyx_prod) {
393 const auto& engine = get_test_engine();
394 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}});
396 set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f});
399 topology.add(input_layout("input", input.get_layout()));
400 topology.add(reduce("reduce", "input", reduce_mode::prod, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 0));
402 network network(engine, topology);
404 network.set_input_data("input", input);
406 auto outputs = network.execute();
408 EXPECT_EQ(outputs.size(), size_t(1));
409 EXPECT_EQ(outputs.begin()->first, "reduce");
411 auto output = outputs.at("reduce").get_memory();
413 std::vector<float> ref_data = {0.0f, 60.0f};
415 auto output_ptr = output.pointer<float>();
417 for (size_t i = 0; i < ref_data.size(); ++i) {
418 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
422 TEST(reduce_gpu, common_bfwzyx_prod_keepdims) {
423 const auto& engine = get_test_engine();
424 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}});
426 set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f});
429 topology.add(input_layout("input", input.get_layout()));
430 topology.add(reduce("reduce", "input", reduce_mode::prod, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 1));
432 network network(engine, topology);
434 network.set_input_data("input", input);
436 auto outputs = network.execute();
438 EXPECT_EQ(outputs.size(), size_t(1));
439 EXPECT_EQ(outputs.begin()->first, "reduce");
441 auto output = outputs.at("reduce").get_memory();
443 std::vector<float> ref_data = {0.0f, 60.0f};
445 auto output_ptr = output.pointer<float>();
447 for (size_t i = 0; i < ref_data.size(); ++i) {
448 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
452 TEST(reduce_gpu, common_bfwzyx_sum_keepdims) {
453 const auto& engine = get_test_engine();
454 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 4, 1, 1, 1}});
456 set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f,
457 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f});
460 topology.add(input_layout("input", input.get_layout()));
461 topology.add(reduce("reduce", "input", reduce_mode::sum, {cldnn::reduce::along_b, cldnn::reduce::along_f}, 1));
463 network network(engine, topology);
465 network.set_input_data("input", input);
467 auto outputs = network.execute();
469 EXPECT_EQ(outputs.size(), size_t(1));
470 EXPECT_EQ(outputs.begin()->first, "reduce");
472 auto output = outputs.at("reduce").get_memory();
474 std::vector<float> ref_data = {60.0f, 66.0f, 72.0f, 78.0f};
476 auto output_ptr = output.pointer<float>();
478 for (size_t i = 0; i < ref_data.size(); ++i) {
479 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
483 TEST(reduce_gpu, common_bfwzyx_logical_and) {
484 const auto& engine = get_test_engine();
485 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}});
487 set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f});
490 topology.add(input_layout("input", input.get_layout()));
491 topology.add(reduce("reduce", "input", reduce_mode::logical_and, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 0));
493 network network(engine, topology);
495 network.set_input_data("input", input);
497 auto outputs = network.execute();
499 EXPECT_EQ(outputs.size(), size_t(1));
500 EXPECT_EQ(outputs.begin()->first, "reduce");
502 auto output = outputs.at("reduce").get_memory();
504 std::vector<char> ref_data = {0, 1};
506 auto output_ptr = output.pointer<char>();
508 for (size_t i = 0; i < ref_data.size(); ++i) {
509 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
513 TEST(reduce_gpu, common_bfwzyx_logical_and_keepdims) {
514 const auto& engine = get_test_engine();
515 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}});
517 set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f});
520 topology.add(input_layout("input", input.get_layout()));
521 topology.add(reduce("reduce", "input", reduce_mode::logical_and, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 1));
523 network network(engine, topology);
525 network.set_input_data("input", input);
527 auto outputs = network.execute();
529 EXPECT_EQ(outputs.size(), size_t(1));
530 EXPECT_EQ(outputs.begin()->first, "reduce");
532 auto output = outputs.at("reduce").get_memory();
534 std::vector<char> ref_data = {0, 1};
536 auto output_ptr = output.pointer<char>();
538 for (size_t i = 0; i < ref_data.size(); ++i) {
539 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
543 TEST(reduce_gpu, common_bfwzyx_logical_or) {
544 const auto& engine = get_test_engine();
545 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}});
547 set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f});
550 topology.add(input_layout("input", input.get_layout()));
551 topology.add(reduce("reduce", "input", reduce_mode::logical_or, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 0));
553 network network(engine, topology);
555 network.set_input_data("input", input);
557 auto outputs = network.execute();
559 EXPECT_EQ(outputs.size(), size_t(1));
560 EXPECT_EQ(outputs.begin()->first, "reduce");
562 auto output = outputs.at("reduce").get_memory();
564 std::vector<char> ref_data = {1, 1};
566 auto output_ptr = output.pointer<char>();
568 for (size_t i = 0; i < ref_data.size(); ++i) {
569 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
573 TEST(reduce_gpu, common_bfwzyx_logical_or_keepdims) {
574 const auto& engine = get_test_engine();
575 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}});
577 set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f});
580 topology.add(input_layout("input", input.get_layout()));
581 topology.add(reduce("reduce", "input", reduce_mode::logical_or, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 1));
583 network network(engine, topology);
585 network.set_input_data("input", input);
587 auto outputs = network.execute();
589 EXPECT_EQ(outputs.size(), size_t(1));
590 EXPECT_EQ(outputs.begin()->first, "reduce");
592 auto output = outputs.at("reduce").get_memory();
594 std::vector<char> ref_data = {1, 1};
596 auto output_ptr = output.pointer<char>();
598 for (size_t i = 0; i < ref_data.size(); ++i) {
599 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
603 TEST(reduce_gpu, common_bfwzyx_sum_square) {
604 const auto& engine = get_test_engine();
605 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}});
607 set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f});
610 topology.add(input_layout("input", input.get_layout()));
611 topology.add(reduce("reduce", "input", reduce_mode::sum_square, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 0));
613 network network(engine, topology);
615 network.set_input_data("input", input);
617 auto outputs = network.execute();
619 EXPECT_EQ(outputs.size(), size_t(1));
620 EXPECT_EQ(outputs.begin()->first, "reduce");
622 auto output = outputs.at("reduce").get_memory();
624 std::vector<float> ref_data = {5.0f, 50.0f};
626 auto output_ptr = output.pointer<float>();
628 for (size_t i = 0; i < ref_data.size(); ++i) {
629 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
633 TEST(reduce_gpu, common_bfwzyx_sum_square_keepdims) {
634 const auto& engine = get_test_engine();
635 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}});
637 set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f});
640 topology.add(input_layout("input", input.get_layout()));
641 topology.add(reduce("reduce", "input", reduce_mode::sum_square, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 1));
643 network network(engine, topology);
645 network.set_input_data("input", input);
647 auto outputs = network.execute();
649 EXPECT_EQ(outputs.size(), size_t(1));
650 EXPECT_EQ(outputs.begin()->first, "reduce");
652 auto output = outputs.at("reduce").get_memory();
654 std::vector<float> ref_data = {5.0f, 50.0f};
656 auto output_ptr = output.pointer<float>();
658 for (size_t i = 0; i < ref_data.size(); ++i) {
659 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
663 TEST(reduce_gpu, common_bfwzyx_l1) {
664 const auto& engine = get_test_engine();
665 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}});
667 set_values(input, {0.0f, 1.0f, -2.0f, 3.0f, 4.0f, -5.0f});
670 topology.add(input_layout("input", input.get_layout()));
671 topology.add(reduce("reduce", "input", reduce_mode::l1, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 0));
673 network network(engine, topology);
675 network.set_input_data("input", input);
677 auto outputs = network.execute();
679 EXPECT_EQ(outputs.size(), size_t(1));
680 EXPECT_EQ(outputs.begin()->first, "reduce");
682 auto output = outputs.at("reduce").get_memory();
684 std::vector<float> ref_data = {3.0f, 12.0f};
686 auto output_ptr = output.pointer<float>();
688 for (size_t i = 0; i < ref_data.size(); ++i) {
689 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
693 TEST(reduce_gpu, common_bfwzyx_l1_keepdims) {
694 const auto& engine = get_test_engine();
695 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}});
697 set_values(input, {0.0f, 1.0f, -2.0f, 3.0f, 4.0f, -5.0f});
700 topology.add(input_layout("input", input.get_layout()));
701 topology.add(reduce("reduce", "input", reduce_mode::l1, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 1));
703 network network(engine, topology);
705 network.set_input_data("input", input);
707 auto outputs = network.execute();
709 EXPECT_EQ(outputs.size(), size_t(1));
710 EXPECT_EQ(outputs.begin()->first, "reduce");
712 auto output = outputs.at("reduce").get_memory();
714 std::vector<float> ref_data = {3.0f, 12.0f};
716 auto output_ptr = output.pointer<float>();
718 for (size_t i = 0; i < ref_data.size(); ++i) {
719 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
723 TEST(reduce_gpu, common_bfwzyx_l2) {
724 const auto& engine = get_test_engine();
725 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}});
727 set_values(input, {0.0f, 1.0f, -2.0f, 3.0f, 4.0f, -5.0f});
730 topology.add(input_layout("input", input.get_layout()));
731 topology.add(reduce("reduce", "input", reduce_mode::l2, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 0));
733 network network(engine, topology);
735 network.set_input_data("input", input);
737 auto outputs = network.execute();
739 EXPECT_EQ(outputs.size(), size_t(1));
740 EXPECT_EQ(outputs.begin()->first, "reduce");
742 auto output = outputs.at("reduce").get_memory();
744 std::vector<float> ref_data = {2.236067977f, 7.071067812f};
746 auto output_ptr = output.pointer<float>();
748 for (size_t i = 0; i < ref_data.size(); ++i) {
749 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
753 TEST(reduce_gpu, common_bfwzyx_l2_keepdims) {
754 const auto& engine = get_test_engine();
755 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}});
757 set_values(input, {0.0f, 1.0f, -2.0f, 3.0f, 4.0f, -5.0f});
760 topology.add(input_layout("input", input.get_layout()));
761 topology.add(reduce("reduce", "input", reduce_mode::l2, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 1));
763 network network(engine, topology);
765 network.set_input_data("input", input);
767 auto outputs = network.execute();
769 EXPECT_EQ(outputs.size(), size_t(1));
770 EXPECT_EQ(outputs.begin()->first, "reduce");
772 auto output = outputs.at("reduce").get_memory();
774 std::vector<float> ref_data = {2.236067977f, 7.071067812f};
776 auto output_ptr = output.pointer<float>();
778 for (size_t i = 0; i < ref_data.size(); ++i) {
779 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
783 TEST(reduce_gpu, common_bfwzyx_log_sum) {
784 const auto& engine = get_test_engine();
785 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}});
787 set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f});
790 topology.add(input_layout("input", input.get_layout()));
791 topology.add(reduce("reduce", "input", reduce_mode::log_sum, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 0));
793 network network(engine, topology);
795 network.set_input_data("input", input);
797 auto outputs = network.execute();
799 EXPECT_EQ(outputs.size(), size_t(1));
800 EXPECT_EQ(outputs.begin()->first, "reduce");
802 auto output = outputs.at("reduce").get_memory();
804 std::vector<float> ref_data = {1.0986122887f, 2.4849066498f};
806 auto output_ptr = output.pointer<float>();
808 for (size_t i = 0; i < ref_data.size(); ++i) {
809 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
813 TEST(reduce_gpu, common_bfwzyx_log_sum_keepdims) {
814 const auto& engine = get_test_engine();
815 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}});
817 set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f});
820 topology.add(input_layout("input", input.get_layout()));
821 topology.add(reduce("reduce", "input", reduce_mode::log_sum, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 1));
823 network network(engine, topology);
825 network.set_input_data("input", input);
827 auto outputs = network.execute();
829 EXPECT_EQ(outputs.size(), size_t(1));
830 EXPECT_EQ(outputs.begin()->first, "reduce");
832 auto output = outputs.at("reduce").get_memory();
834 std::vector<float> ref_data = {1.0986122887f, 2.4849066498f};
836 auto output_ptr = output.pointer<float>();
838 for (size_t i = 0; i < ref_data.size(); ++i) {
839 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
843 TEST(reduce_gpu, common_bfwzyx_log_sum_exp) {
844 const auto& engine = get_test_engine();
845 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}});
847 set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f});
850 topology.add(input_layout("input", input.get_layout()));
851 topology.add(reduce("reduce", "input", reduce_mode::log_sum_exp, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 0));
853 network network(engine, topology);
855 network.set_input_data("input", input);
857 auto outputs = network.execute();
859 EXPECT_EQ(outputs.size(), size_t(1));
860 EXPECT_EQ(outputs.begin()->first, "reduce");
862 auto output = outputs.at("reduce").get_memory();
864 std::vector<float> ref_data = {2.407605964f, 5.407605964f};
866 auto output_ptr = output.pointer<float>();
868 for (size_t i = 0; i < ref_data.size(); ++i) {
869 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));
873 TEST(reduce_gpu, common_bfwzyx_log_sum_exp_keepdims) {
874 const auto& engine = get_test_engine();
875 auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}});
877 set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f});
880 topology.add(input_layout("input", input.get_layout()));
881 topology.add(reduce("reduce", "input", reduce_mode::log_sum_exp, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 1));
883 network network(engine, topology);
885 network.set_input_data("input", input);
887 auto outputs = network.execute();
889 EXPECT_EQ(outputs.size(), size_t(1));
890 EXPECT_EQ(outputs.begin()->first, "reduce");
892 auto output = outputs.at("reduce").get_memory();
894 std::vector<float> ref_data = {2.407605964f, 5.407605964f};
896 auto output_ptr = output.pointer<float>();
898 for (size_t i = 0; i < ref_data.size(); ++i) {
899 EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i]));