2 // Copyright (c) 2016-2019 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18 #include <gtest/gtest.h>
19 #include "api/CPP/memory.hpp"
20 #include <api/CPP/input_layout.hpp>
21 #include "api/CPP/eltwise.hpp"
22 #include <api/CPP/topology.hpp>
23 #include <api/CPP/network.hpp>
24 #include <api/CPP/engine.hpp>
25 #include <api/CPP/reorder.hpp>
26 #include <api/CPP/data.hpp>
27 #include "test_utils/test_utils.h"
31 template<> struct type_to_data_type<FLOAT16> { static const data_types value = data_types::f16; };
34 using namespace cldnn;
35 using namespace tests;
38 T eltwise_execute(cldnn::eltwise_mode mode, T x, T y) {
40 case eltwise_mode::sum:
42 case eltwise_mode::sub:
44 case eltwise_mode::max:
45 return std::max(x, y);
46 case eltwise_mode::prod:
48 case eltwise_mode::div:
50 case eltwise_mode::min:
51 return std::min(x, y);
52 case eltwise_mode::pow:
53 return std::pow((float)x, (float)y);
54 case eltwise_mode::mod:
55 return std::fmod((float)x, (float)y);
62 VVVVF<T> eltwise_reference(VVVVF<T> &input1, VVVVF<T> &input2,
63 cldnn::eltwise_mode mode, bool relu = false, T slope = 0.0f,
64 int input_padding_y = 0, int input_padding_x = 0,
65 int output_padding_y = 0, int output_padding_x = 0) {
67 size_t padding_y = input_padding_y + output_padding_y;
68 size_t padding_x = input_padding_x + output_padding_x;
69 size_t output_b = input1.size();
70 size_t output_f = input1[0].size();
71 size_t output_y = input1[0][0].size() + 2 * padding_y;
72 size_t output_x = input1[0][0][0].size() + 2 * padding_x;
73 VVVVF<T> output(output_b, VVVF<T>(output_f, VVF<T>(output_y, VF<T>(output_x))));
76 for (size_t b = 0; b < output_b; ++b) {
77 for (size_t f = 0; f < output_f; ++f) {
78 for (size_t y = 0; y < input1[0][0].size(); ++y) {
79 for (size_t x = 0; x < input1[0][0][0].size(); ++x) {
80 res = eltwise_execute<T>(mode, input1[b][f][y][x], input2[b][f][y][x]);
81 if (relu && res < (T)0)
83 output[b][f][y + padding_y][x + padding_x] = res;
92 void generic_eltwise_test(cldnn::format test_input_fmt, int input_b, int input_f, int input_y, int input_x, cldnn::eltwise_mode mode,
93 bool relu, T slope, int input_padding_y, int input_padding_x, int output_padding_y, int output_padding_x) {
95 int min_random = -2, max_random = 2;
96 VVVVF<T> input1_rnd = generate_random_4d<T>(input_b, input_f, input_y, input_x, min_random, max_random);
97 VVVVF<T> input2_rnd = generate_random_4d<T>(input_b, input_f, input_y, input_x, min_random, max_random);
98 VF<T> input1_rnd_vec = flatten_4d<T>(test_input_fmt, input1_rnd);
99 VF<T> input2_rnd_vec = flatten_4d<T>(test_input_fmt, input2_rnd);
101 const auto& engine = get_test_engine();
102 tensor input_tensor( input_b, input_f, input_x, input_y );
103 auto input1 = memory::allocate(engine, { type_to_data_type<T>::value, test_input_fmt, input_tensor });
104 auto input2 = memory::allocate(engine, { type_to_data_type<T>::value, test_input_fmt, input_tensor });
105 set_values(input1, input1_rnd_vec);
106 set_values(input2, input2_rnd_vec);
109 topology.add(input_layout("input1", input1.get_layout()));
110 topology.add(input_layout("input2", input2.get_layout()));
111 topology.add(reorder("reorder1", "input1", input1.get_layout().with_padding({{ 0, 0, input_padding_x, input_padding_y }, 0 })));
112 topology.add(eltwise("eltwise", {"reorder1", "input2"}, mode, relu, slope, { { 0, 0, output_padding_x, output_padding_y }, 0 }));
114 network network(engine, topology);
115 network.set_input_data("input1", input1);
116 network.set_input_data("input2", input2);
117 auto outputs = network.execute();
118 EXPECT_EQ(outputs.size(), size_t(1));
119 EXPECT_EQ(outputs.begin()->first, "eltwise");
121 auto output_memory = outputs.at("eltwise").get_memory();
122 auto output_layout = output_memory.get_layout();
123 auto output_ptr = output_memory.pointer<T>();
125 VVVVF<T> output_cpu = eltwise_reference<T>(input1_rnd, input2_rnd, mode, relu, slope, input_padding_y, input_padding_x, output_padding_y, output_padding_x);
126 EXPECT_EQ(output_layout.format.value, test_input_fmt.value);
127 tensor output_tensor = output_layout.get_buffer_size();
128 int y_size = output_tensor.spatial[1];
129 int x_size = output_tensor.spatial[0];
130 int f_size = output_tensor.feature[0];
131 int b_size = output_tensor.batch[0];
132 EXPECT_EQ(y_size, (int)output_cpu[0][0].size());
133 EXPECT_EQ(x_size, (int)output_cpu[0][0][0].size());
134 EXPECT_EQ(f_size, (int)output_cpu[0].size());
135 EXPECT_EQ(b_size, (int)output_cpu.size());
137 bool test_is_correct = true;
138 VF<T> output_cpu_vec = flatten_4d<T>(test_input_fmt, output_cpu);
139 for (size_t i = 0; i < output_cpu_vec.size(); ++i) {
140 if (!floating_point_equal(output_cpu_vec[i], output_ptr[i]) && !(std::isnan((float)output_cpu_vec[i]) && std::isnan((float)output_ptr[i]))) {
141 test_is_correct = false;
145 EXPECT_EQ(test_is_correct, true) << std::endl
146 << "failing test parameters:" << std::endl
147 << "input_b = " << input_b << std::endl
148 << "input_f = " << input_f << std::endl
149 << "input_y = " << input_y << std::endl
150 << "input_x = " << input_x << std::endl
151 << "eltwise_mode = " << (int)mode << std::endl
152 << "relu = " << relu << std::endl
153 << "slope = " << (float)slope << std::endl
154 << "input_padding_y = " << input_padding_y << std::endl
155 << "input_padding_x = " << input_padding_x << std::endl
156 << "output_padding_y = " << output_padding_y << std::endl
157 << "output_padding_x = " << output_padding_x << std::endl
158 << "type = " << (sizeof(T) == 2 ? "float16" : "float32") << std::endl;
161 TEST(eltwise_gpu_f32, equal_in2_float_out1_int) {
167 // 1.f, 2.5f, 5.f, 1.5f,
168 // 2.f, 0.f, 6.f, 5.2f,
169 // 3.f, 0.5f, 7.f, 12.f,
170 // 4.f, 0.f, 8.f, 8.f
173 // 0.5f, 2.5f, 0.5f, 1.5f,
174 // 5.f, 7.f, 6.f, 4.f,
175 // 15.f, 17.f, 8.f, 10.f,
176 // -2.f, 0.f, -0.5f, -2.5f
184 const auto& engine = get_test_engine();
186 auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
187 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
190 1.f, 2.5f, 5.f, 1.5f,
192 3.f, 0.5f, 7.f, 12.f,
197 0.5f, 2.5f, 0.5f, 1.5f,
199 15.f, 17.f, 8.f, 10.f,
200 -2.f, 0.f, -0.5f, -2.5f
204 topology.add(input_layout("input", input1.get_layout()));
205 topology.add(input_layout("input2", input2.get_layout()));
206 topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::eq));
208 network network(engine, topology);
210 network.set_input_data("input", input1);
211 network.set_input_data("input2", input2);
213 auto outputs = network.execute();
215 EXPECT_EQ(outputs.size(), size_t(1));
216 EXPECT_EQ(outputs.begin()->first, "eltwise");
218 auto output = outputs.at("eltwise").get_memory();
219 auto output_ptr = output.pointer<int8_t>();
221 std::vector<int8_t> answers = { 0, 1, 0, 1,
226 for (size_t i = 0; i < answers.size(); ++i) {
227 EXPECT_EQ(answers[i], output_ptr[i]);
231 TEST(eltwise_gpu_f32, not_equal_in2_float_out1_int) {
237 // 1.f, 2.5f, 5.f, 1.5f,
238 // 2.f, 0.f, 6.f, 5.2f,
239 // 3.f, 0.5f, 7.f, 12.f,
240 // 4.f, 0.f, 8.f, 8.f
243 // 0.5f, 2.5f, 0.5f, 1.5f,
244 // 5.f, 7.f, 6.f, 4.f,
245 // 15.f, 17.f, 8.f, 10.f,
246 // -2.f, 0.f, -0.5f, -2.5f
254 const auto& engine = get_test_engine();
256 auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
257 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
260 1.f, 2.5f, 5.f, 1.5f,
262 3.f, 0.5f, 7.f, 12.f,
267 0.5f, 2.5f, 0.5f, 1.5f,
269 15.f, 17.f, 8.f, 10.f,
270 -2.f, 0.f, -0.5f, -2.5f
274 topology.add(input_layout("input", input1.get_layout()));
275 topology.add(input_layout("input2", input2.get_layout()));
276 topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::ne));
278 network network(engine, topology);
280 network.set_input_data("input", input1);
281 network.set_input_data("input2", input2);
283 auto outputs = network.execute();
285 EXPECT_EQ(outputs.size(), size_t(1));
286 EXPECT_EQ(outputs.begin()->first, "eltwise");
288 auto output = outputs.at("eltwise").get_memory();
289 auto output_ptr = output.pointer<int8_t>();
291 std::vector<int8_t> answers = { 1, 0, 1, 0,
296 for (size_t i = 0; i < answers.size(); ++i) {
297 EXPECT_EQ(answers[i], output_ptr[i]);
301 TEST(eltwise_gpu_f32, less_in2_float_out1_int) {
307 // 1.f, 2.5f, 5.f, 1.5f,
308 // 2.f, 0.f, 6.f, 5.2f,
309 // 3.f, 0.5f, 7.f, 12.f,
310 // 4.f, 0.f, 8.f, 8.f
313 // 0.5f, 2.5f, 0.5f, 1.5f,
314 // 5.f, 7.f, 6.f, 4.f,
315 // 15.f, 17.f, 8.f, 10.f,
316 // -2.f, 0.f, -0.5f, -2.5f
324 const auto& engine = get_test_engine();
326 auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
327 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
330 1.f, 2.5f, 5.f, 1.5f,
332 3.f, 0.5f, 7.f, 12.f,
337 0.5f, 2.5f, 0.5f, 1.5f,
339 15.f, 17.f, 8.f, 10.f,
340 -2.f, 0.f, -0.5f, -2.5f
344 topology.add(input_layout("input", input1.get_layout()));
345 topology.add(input_layout("input2", input2.get_layout()));
346 topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::lt));
348 network network(engine, topology);
350 network.set_input_data("input", input1);
351 network.set_input_data("input2", input2);
353 auto outputs = network.execute();
355 EXPECT_EQ(outputs.size(), size_t(1));
356 EXPECT_EQ(outputs.begin()->first, "eltwise");
358 auto output = outputs.at("eltwise").get_memory();
359 auto output_ptr = output.pointer<int8_t>();
361 std::vector<int8_t> answers = { 0, 0, 0, 0,
366 for (size_t i = 0; i < answers.size(); ++i) {
367 EXPECT_EQ(answers[i], output_ptr[i]);
371 TEST(eltwise_gpu_f32, less_equal_in2_float_out1_int) {
377 // 1.f, 2.5f, 5.f, 1.5f,
378 // 2.f, 0.f, 6.f, 5.2f,
379 // 3.f, 0.5f, 7.f, 12.f,
380 // 4.f, 0.f, 8.f, 8.f
383 // 0.5f, 2.5f, 0.5f, 1.5f,
384 // 5.f, 7.f, 6.f, 4.f,
385 // 15.f, 17.f, 8.f, 10.f,
386 // -2.f, 0.f, -0.5f, -2.5f
394 const auto& engine = get_test_engine();
396 auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
397 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
400 1.f, 2.5f, 5.f, 1.5f,
402 3.f, 0.5f, 7.f, 12.f,
407 0.5f, 2.5f, 0.5f, 1.5f,
409 15.f, 17.f, 8.f, 10.f,
410 -2.f, 0.f, -0.5f, -2.5f
414 topology.add(input_layout("input", input1.get_layout()));
415 topology.add(input_layout("input2", input2.get_layout()));
416 topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::le));
418 network network(engine, topology);
420 network.set_input_data("input", input1);
421 network.set_input_data("input2", input2);
423 auto outputs = network.execute();
425 EXPECT_EQ(outputs.size(), size_t(1));
426 EXPECT_EQ(outputs.begin()->first, "eltwise");
428 auto output = outputs.at("eltwise").get_memory();
429 auto output_ptr = output.pointer<int8_t>();
431 std::vector<int8_t> answers = { 0, 1, 0, 1,
436 for (size_t i = 0; i < answers.size(); ++i) {
437 EXPECT_EQ(answers[i], output_ptr[i]);
441 TEST(eltwise_gpu_f32, greater_in2_float_out1_int) {
447 // 1.f, 2.5f, 5.f, 1.5f,
448 // 2.f, 0.f, 6.f, 5.2f,
449 // 3.f, 0.5f, 7.f, 12.f,
450 // 4.f, 0.f, 8.f, 8.f
453 // 0.5f, 2.5f, 0.5f, 1.5f,
454 // 5.f, 7.f, 6.f, 4.f,
455 // 15.f, 17.f, 8.f, 10.f,
456 // -2.f, 0.f, -0.5f, -2.5f
464 const auto& engine = get_test_engine();
466 auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
467 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
470 1.f, 2.5f, 5.f, 1.5f,
472 3.f, 0.5f, 7.f, 12.f,
477 0.5f, 2.5f, 0.5f, 1.5f,
479 15.f, 17.f, 8.f, 10.f,
480 -2.f, 0.f, -0.5f, -2.5f
484 topology.add(input_layout("input", input1.get_layout()));
485 topology.add(input_layout("input2", input2.get_layout()));
486 topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::gt));
488 network network(engine, topology);
490 network.set_input_data("input", input1);
491 network.set_input_data("input2", input2);
493 auto outputs = network.execute();
495 EXPECT_EQ(outputs.size(), size_t(1));
496 EXPECT_EQ(outputs.begin()->first, "eltwise");
498 auto output = outputs.at("eltwise").get_memory();
499 auto output_ptr = output.pointer<int8_t>();
501 std::vector<int8_t> answers = { 1, 0, 1, 0,
506 for (size_t i = 0; i < answers.size(); ++i) {
507 EXPECT_EQ(answers[i], output_ptr[i]);
511 TEST(eltwise_gpu_f32, greater_equal_in2_float_out1_int) {
517 // 1.f, 2.5f, 5.f, 1.5f,
518 // 2.f, 0.f, 6.f, 5.2f,
519 // 3.f, 0.5f, 7.f, 12.f,
520 // 4.f, 0.f, 8.f, 8.f
523 // 0.5f, 2.5f, 0.5f, 1.5f,
524 // 5.f, 7.f, 6.f, 4.f,
525 // 15.f, 17.f, 8.f, 10.f,
526 // -2.f, 0.f, -0.5f, -2.5f
534 const auto& engine = get_test_engine();
536 auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
537 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
540 1.f, 2.5f, 5.f, 1.5f,
542 3.f, 0.5f, 7.f, 12.f,
547 0.5f, 2.5f, 0.5f, 1.5f,
549 15.f, 17.f, 8.f, 10.f,
550 -2.f, 0.f, -0.5f, -2.5f
554 topology.add(input_layout("input", input1.get_layout()));
555 topology.add(input_layout("input2", input2.get_layout()));
556 topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::ge));
558 network network(engine, topology);
560 network.set_input_data("input", input1);
561 network.set_input_data("input2", input2);
563 auto outputs = network.execute();
565 EXPECT_EQ(outputs.size(), size_t(1));
566 EXPECT_EQ(outputs.begin()->first, "eltwise");
568 auto output = outputs.at("eltwise").get_memory();
569 auto output_ptr = output.pointer<int8_t>();
571 std::vector<int8_t> answers = { 1, 1, 1, 1,
576 for (size_t i = 0; i < answers.size(); ++i) {
577 EXPECT_EQ(answers[i], output_ptr[i]);
581 TEST(eltwise_gpu_f32, logicalAND_in2_float_out1_int) {
587 // 1.f, 2.5f, 5.f, 1.5f,
588 // 2.f, 0.f, 6.f, 5.2f,
589 // 3.f, 0.5f, 7.f, 12.f,
590 // 4.f, 0.f, 8.f, 8.f
593 // 0.5f, 2.5f, 0.5f, 1.5f,
594 // 5.f, 7.f, 6.f, 4.f,
595 // 15.f, 17.f, 8.f, 10.f,
596 // -2.f, 0.f, -0.5f, -2.5f
604 const auto& engine = get_test_engine();
606 auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
607 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
610 1.f, 2.5f, 5.f, 1.5f,
612 3.f, 0.5f, 7.f, 12.f,
617 0.5f, 2.5f, 0.5f, 1.5f,
619 15.f, 17.f, 8.f, 10.f,
620 -2.f, 0.f, -0.5f, -2.5f
624 topology.add(input_layout("input", input1.get_layout()));
625 topology.add(input_layout("input2", input2.get_layout()));
626 topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::logic_and));
628 network network(engine, topology);
630 network.set_input_data("input", input1);
631 network.set_input_data("input2", input2);
633 auto outputs = network.execute();
635 EXPECT_EQ(outputs.size(), size_t(1));
636 EXPECT_EQ(outputs.begin()->first, "eltwise");
638 auto output = outputs.at("eltwise").get_memory();
639 auto output_ptr = output.pointer<int8_t>();
641 std::vector<int8_t> answers = { 1, 1, 1, 1,
646 for (size_t i = 0; i < answers.size(); ++i) {
647 EXPECT_EQ(answers[i], output_ptr[i]);
651 TEST(eltwise_gpu_f32, logicalAND_in3_float_out1_int) {
658 // 1.f, 2.5f, 5.f, 1.5f,
659 // 2.f, 0.f, 6.f, 5.2f,
660 // 3.f, 0.5f, 7.f, 12.f,
661 // 4.f, 0.f, 8.f, 8.f
664 // 0.5f, 2.5f, 0.5f, 1.5f,
665 // 5.f, 7.f, 6.f, 4.f,
666 // 15.f, 17.f, 8.f, 10.f,
667 // -2.f, 0.f, -0.5f, -2.5f
670 // 0.f, 0.f, 0.f, 0.f,
671 // 0.f, 0.f, 0.f, 0.f,
672 // 1.f, 1.f, 1.f, 1.f,
673 // 1.f, 1.f, 1.f, 1.f
681 const auto& engine = get_test_engine();
683 auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
684 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
685 auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
688 1.f, 2.5f, 5.f, 1.5f,
690 3.f, 0.5f, 7.f, 12.f,
695 0.5f, 2.5f, 0.5f, 1.5f,
697 15.f, 17.f, 8.f, 10.f,
698 -2.f, 0.f, -0.5f, -2.5f
709 topology.add(input_layout("input", input1.get_layout()));
710 topology.add(input_layout("input2", input2.get_layout()));
711 topology.add(input_layout("input3", input2.get_layout()));
712 topology.add(eltwise("eltwise", {"input", "input2", "input3"}, eltwise_mode::logic_and));
714 network network(engine, topology);
716 network.set_input_data("input", input1);
717 network.set_input_data("input2", input2);
718 network.set_input_data("input3", input3);
720 auto outputs = network.execute();
722 EXPECT_EQ(outputs.size(), size_t(1));
723 EXPECT_EQ(outputs.begin()->first, "eltwise");
725 auto output = outputs.at("eltwise").get_memory();
726 auto output_ptr = output.pointer<int8_t>();
728 std::vector<int8_t> answers = { 0, 0, 0, 0,
733 for (size_t i = 0; i < answers.size(); ++i) {
734 EXPECT_EQ(answers[i], output_ptr[i]);
738 TEST(eltwise_gpu_f32, logicalOR_in2_float_out1_int) {
744 // 1.f, 2.5f, 5.f, 1.5f,
745 // 2.f, 0.f, 6.f, 5.2f,
746 // 3.f, 0.5f, 7.f, 12.f,
747 // 4.f, 0.f, 8.f, 8.f
750 // 0.5f, 2.5f, 0.5f, 1.5f,
751 // 5.f, 7.f, 6.f, 4.f,
752 // 15.f, 17.f, 8.f, 10.f,
753 // -2.f, 0.f, -0.5f, -2.5f
761 const auto& engine = get_test_engine();
763 auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
764 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
767 1.f, 2.5f, 5.f, 1.5f,
769 3.f, 0.5f, 7.f, 12.f,
774 0.5f, 2.5f, 0.5f, 1.5f,
776 15.f, 17.f, 8.f, 10.f,
777 -2.f, 0.f, -0.5f, -2.5f
781 topology.add(input_layout("input", input1.get_layout()));
782 topology.add(input_layout("input2", input2.get_layout()));
783 topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::logic_or));
785 network network(engine, topology);
787 network.set_input_data("input", input1);
788 network.set_input_data("input2", input2);
790 auto outputs = network.execute();
792 EXPECT_EQ(outputs.size(), size_t(1));
793 EXPECT_EQ(outputs.begin()->first, "eltwise");
795 auto output = outputs.at("eltwise").get_memory();
796 auto output_ptr = output.pointer<int8_t>();
798 std::vector<int8_t> answers = { 1, 1, 1, 1,
803 for (size_t i = 0; i < answers.size(); ++i) {
804 EXPECT_EQ(answers[i], output_ptr[i]);
808 TEST(eltwise_gpu_f32, logicalOR_in3_float_out1_int) {
815 // 1.f, 2.5f, 5.f, 1.5f,
816 // 2.f, 0.f, 6.f, 5.2f,
817 // 3.f, 0.5f, 7.f, 12.f,
818 // 4.f, 0.f, 8.f, 8.f
821 // 0.5f, 2.5f, 0.5f, 1.5f,
822 // 5.f, 7.f, 6.f, 4.f,
823 // 15.f, 17.f, 8.f, 10.f,
824 // -2.f, 0.f, -0.5f, -2.5f
827 // 0.f, 1.f, 1.f, 1.f,
828 // 0.f, 1.f, 1.f, 0.f,
829 // 1.f, 1.f, 1.f, 1.f,
830 // 1.f, 1.f, 1.f, 1.f
838 const auto& engine = get_test_engine();
840 auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
841 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
842 auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
845 1.f, 2.5f, 5.f, 1.5f,
847 3.f, 0.5f, 7.f, 12.f,
852 0.5f, 2.5f, 0.5f, 1.5f,
854 15.f, 17.f, 8.f, 10.f,
855 -2.f, 0.f, -0.5f, -2.5f
866 topology.add(input_layout("input", input1.get_layout()));
867 topology.add(input_layout("input2", input2.get_layout()));
868 topology.add(input_layout("input3", input2.get_layout()));
869 topology.add(eltwise("eltwise", {"input", "input2", "input3"}, eltwise_mode::logic_or));
871 network network(engine, topology);
873 network.set_input_data("input", input1);
874 network.set_input_data("input2", input2);
875 network.set_input_data("input3", input3);
877 auto outputs = network.execute();
879 EXPECT_EQ(outputs.size(), size_t(1));
880 EXPECT_EQ(outputs.begin()->first, "eltwise");
882 auto output = outputs.at("eltwise").get_memory();
883 auto output_ptr = output.pointer<int8_t>();
885 std::vector<int8_t> answers = { 1, 1, 1, 1,
890 for (size_t i = 0; i < answers.size(); ++i) {
891 EXPECT_EQ(answers[i], output_ptr[i]);
895 TEST(eltwise_gpu_f32, logicalXOR_in2_float_out1_int) {
901 // 1.f, 2.5f, 5.f, 1.5f,
902 // 2.f, 0.f, 6.f, 5.2f,
903 // 3.f, 0.5f, 7.f, 12.f,
904 // 4.f, 0.f, 8.f, 8.f
907 // 0.5f, 2.5f, 0.5f, 1.5f,
908 // 5.f, 7.f, 6.f, 4.f,
909 // 15.f, 17.f, 8.f, 10.f,
910 // -2.f, 0.f, -0.5f, -2.5f
918 const auto& engine = get_test_engine();
920 auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
921 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
924 1.f, 2.5f, 5.f, 1.5f,
926 3.f, 0.5f, 7.f, 12.f,
931 0.5f, 2.5f, 0.5f, 1.5f,
933 15.f, 17.f, 8.f, 10.f,
934 -2.f, 0.f, -0.5f, -2.5f
938 topology.add(input_layout("input", input1.get_layout()));
939 topology.add(input_layout("input2", input2.get_layout()));
940 topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::logic_xor));
942 network network(engine, topology);
944 network.set_input_data("input", input1);
945 network.set_input_data("input2", input2);
947 auto outputs = network.execute();
949 EXPECT_EQ(outputs.size(), size_t(1));
950 EXPECT_EQ(outputs.begin()->first, "eltwise");
952 auto output = outputs.at("eltwise").get_memory();
953 auto output_ptr = output.pointer<int8_t>();
955 std::vector<int8_t> answers = { 0, 0, 0, 0,
960 for (size_t i = 0; i < answers.size(); ++i) {
961 EXPECT_EQ(answers[i], output_ptr[i]);
965 TEST(eltwise_gpu_f32, add_basic_in4x4x2x2) {
971 // f0: b0: 1 2 b1: 0 0
972 // f0: b0: 3 4 b1: 0.5 -0.5
973 // f1: b0: 5 6 b1: 1.5 5.2
974 // f1: b0: 7 8 b1: 12 8
977 // f0: b0: 0.5 5 b1: 2.5 7
978 // f0: b0: 15 -2 b1: 17 6.5
979 // f1: b0: 0.5 2 b1: 2.5 4
980 // f1: b0: 8 -0.5 b1: 10 -2.5
983 // f0: b0: 1.5 7 b1: 2.5 7
984 // f0: b0: 18 2 b1: 17.5 6
985 // f1: b0: 5.5 8 b1: 4 9.2
986 // f1: b0: 15 16.5 b1: 22 16.5
989 const auto& engine = get_test_engine();
991 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
992 auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
995 topology.add(input_layout("input", input.get_layout()));
996 topology.add(input_layout("input2", input2.get_layout()));
997 topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum));
1000 1.f, 0.f, 5.f, 1.5f,
1001 2.f, 0.f, 6.f, 5.2f,
1002 3.f, 0.5f, 7.f, 12.f,
1003 4.f, -0.5f, 8.f, 8.f
1006 set_values(input2, {
1007 0.5f, 2.5f, 0.5f, 2.5f,
1009 15.f, 17.f, 8.f, 10.f,
1010 -2.f, 6.5f, -0.5f, -2.5f });
1012 network network(engine, topology);
1014 network.set_input_data("input", input);
1015 network.set_input_data("input2", input2);
1016 auto outputs = network.execute();
1018 EXPECT_EQ(outputs.size(), size_t(1));
1019 EXPECT_EQ(outputs.begin()->first, "eltwise");
1021 auto output = outputs.at("eltwise").get_memory();
1023 float answers[16] = { 1.5f, 2.5f, 5.5f, 4.f,
1024 7.f, 7.f, 8.f, 9.2f,
1025 18.f,17.5f, 15.f, 22.f,
1026 2.f, 6.f, 7.5f, 5.5f };
1028 auto output_ptr = output.pointer<float>();
1030 for (int i = 0; i < 16; i++)
1032 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
1036 TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_channel) {
1039 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 2, 2 } });
1040 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
1043 topology.add(input_layout("input", input.get_layout()));
1044 topology.add(input_layout("input2", input2.get_layout()));
1045 topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum));
1055 set_values(input2, {
1068 network network(engine, topology);
1070 network.set_input_data("input", input);
1071 network.set_input_data("input2", input2);
1072 auto outputs = network.execute();
1074 EXPECT_EQ(outputs.size(), size_t(1));
1075 EXPECT_EQ(outputs.begin()->first, "eltwise");
1077 auto output = outputs.at("eltwise").get_memory();
1079 float answers[16] = { 1.5f, 2.5f,
1091 auto output_ptr = output.pointer<float>();
1093 for (int i = 0; i < 16; i++)
1095 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
1099 TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_x) {
1102 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
1103 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 2 } });
1106 topology.add(input_layout("input", input.get_layout()));
1107 topology.add(input_layout("input2", input2.get_layout()));
1108 topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum));
1124 set_values(input2, {
1138 network network(engine, topology);
1140 network.set_input_data("input", input);
1141 network.set_input_data("input2", input2);
1142 auto outputs = network.execute();
1144 EXPECT_EQ(outputs.size(), size_t(1));
1145 EXPECT_EQ(outputs.begin()->first, "eltwise");
1147 auto output = outputs.at("eltwise").get_memory();
1149 float answers[16] = { 1.5f, 3.5f,
1161 auto output_ptr = output.pointer<float>();
1163 for (int i = 0; i < 16; i++)
1165 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
1169 TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_y) {
1172 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
1173 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } });
1176 topology.add(input_layout("input", input.get_layout()));
1177 topology.add(input_layout("input2", input2.get_layout()));
1178 topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum));
1194 set_values(input2, {
1202 network network(engine, topology);
1204 network.set_input_data("input", input);
1205 network.set_input_data("input2", input2);
1206 auto outputs = network.execute();
1208 EXPECT_EQ(outputs.size(), size_t(1));
1209 EXPECT_EQ(outputs.begin()->first, "eltwise");
1211 auto output = outputs.at("eltwise").get_memory();
1213 float answers[16] = { 1.5f, 2.5f,
1225 auto output_ptr = output.pointer<float>();
1227 for (int i = 0; i < 16; i++)
1229 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
1233 TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_batch) {
1236 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
1237 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } });
1240 topology.add(input_layout("input", input.get_layout()));
1241 topology.add(input_layout("input2", input2.get_layout()));
1242 topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum));
1258 set_values(input2, {
1268 network network(engine, topology);
1270 network.set_input_data("input", input);
1271 network.set_input_data("input2", input2);
1272 auto outputs = network.execute();
1274 EXPECT_EQ(outputs.size(), size_t(1));
1275 EXPECT_EQ(outputs.begin()->first, "eltwise");
1277 auto output = outputs.at("eltwise").get_memory();
1279 float answers[16] = { 1.5f, 2.5f,
1291 auto output_ptr = output.pointer<float>();
1293 for (int i = 0; i < 16; i++)
1295 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
1299 TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_multiple_dims) {
1302 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
1303 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
1306 topology.add(input_layout("input", input.get_layout()));
1307 topology.add(input_layout("input2", input2.get_layout()));
1308 topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum));
1323 set_values(input2, {
1327 network network(engine, topology);
1329 network.set_input_data("input", input);
1330 network.set_input_data("input2", input2);
1331 auto outputs = network.execute();
1333 EXPECT_EQ(outputs.size(), size_t(1));
1334 EXPECT_EQ(outputs.begin()->first, "eltwise");
1336 auto output = outputs.at("eltwise").get_memory();
1338 float answers[16] = { 1.5f, 3.5f,
1350 auto output_ptr = output.pointer<float>();
1352 for (int i = 0; i < 16; i++)
1354 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
1358 TEST(eltwise_gpu_f32, pow_in2x2x2x2_broadcast_all) {
1361 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
1362 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
1365 topology.add(input_layout("input", input.get_layout()));
1366 topology.add(input_layout("input2", input2.get_layout()));
1367 topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::pow));
1383 set_values(input2, { 2.0f });
1385 network network(engine, topology);
1387 network.set_input_data("input", input);
1388 network.set_input_data("input2", input2);
1389 auto outputs = network.execute();
1391 EXPECT_EQ(outputs.size(), size_t(1));
1392 EXPECT_EQ(outputs.begin()->first, "eltwise");
1394 auto output = outputs.at("eltwise").get_memory();
1396 float answers[16] = { 1.f, 4.f,
1408 auto output_ptr = output.pointer<float>();
1410 for (int i = 0; i < 16; i++)
1412 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
1416 TEST(eltwise_gpu_f32, add_basic_in2x2x2x2_broadcast_2_inputs_same_dim) {
1419 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
1420 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } });
1421 auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } });
1424 topology.add(input_layout("input", input.get_layout()));
1425 topology.add(input_layout("input2", input2.get_layout()));
1426 topology.add(input_layout("input3", input3.get_layout()));
1427 topology.add(eltwise("eltwise", {"input", "input2", "input3"}, eltwise_mode::sum));
1443 set_values(input2, {
1453 set_values(input3, {
1463 network network(engine, topology);
1465 network.set_input_data("input", input);
1466 network.set_input_data("input2", input2);
1467 network.set_input_data("input3", input3);
1468 auto outputs = network.execute();
1470 EXPECT_EQ(outputs.size(), size_t(1));
1471 EXPECT_EQ(outputs.begin()->first, "eltwise");
1473 auto output = outputs.at("eltwise").get_memory();
1475 float answers[16] = { 4.5f, 4.5f,
1487 auto output_ptr = output.pointer<float>();
1489 for (int i = 0; i < 16; i++)
1491 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
1495 TEST(eltwise_gpu_f32, add_basic_in2x2x2x2_broadcast_2_inputs_diff_dim) {
1498 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
1499 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } });
1500 auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 2, 2 } });
1503 topology.add(input_layout("input", input.get_layout()));
1504 topology.add(input_layout("input2", input2.get_layout()));
1505 topology.add(input_layout("input3", input3.get_layout()));
1506 topology.add(eltwise("eltwise", {"input", "input2", "input3"}, eltwise_mode::sum));
1522 set_values(input2, {
1532 set_values(input3, {
1540 network network(engine, topology);
1542 network.set_input_data("input", input);
1543 network.set_input_data("input2", input2);
1544 network.set_input_data("input3", input3);
1545 auto outputs = network.execute();
1547 EXPECT_EQ(outputs.size(), size_t(1));
1548 EXPECT_EQ(outputs.begin()->first, "eltwise");
1550 auto output = outputs.at("eltwise").get_memory();
1552 float answers[16] = { 4.5f, 4.5f,
1564 auto output_ptr = output.pointer<float>();
1566 for (int i = 0; i < 16; i++)
1568 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
1572 TEST(eltwise_gpu_f32, max_basic_in4x4x4x4) {
1578 // f0: b0: 1 2 b1: 0 0
1579 // f0: b0: 3 4 b1: 0.5 -0.5
1580 // f1: b0: 5 6 b1: 1.5 5.2
1581 // f1: b0: 7 8 b1: 12 8
1584 // f0: b0: 0.5 5 b1: 2.5 7
1585 // f0: b0: 15 6 b1: 17 8
1586 // f1: b0: 0.5 2 b1: 2.5 4
1587 // f1: b0: 8 -0.5 b1: 10 -2.5
1590 // f0: b0: 1 5 b1: 2.5 7
1591 // f0: b0: 15 6 b1: 17 8
1592 // f1: b0: 5 6 b1: 2.5 5.2
1593 // f1: b0: 8 8 b1: 12 8
1595 const auto& engine = get_test_engine();
1597 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
1598 auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
1601 topology.add(input_layout("input", input.get_layout()));
1602 topology.add(input_layout("input2", input2.get_layout()));
1603 topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::max));
1606 1.f, 0.f, 5.f, 1.5f,
1607 2.f, 0.f, 6.f, 5.2f,
1608 3.f, 0.5f, 7.f, 12.f,
1609 4.f, -0.5f, 8.f, 8.f
1612 set_values(input2, {
1613 0.5f, 2.5f, 0.5f, 2.5f,
1615 15.f, 17.f, 8.f, 10.f,
1616 6.f, 8.f, -0.5f, -2.5f });
1618 network network(engine, topology);
1620 network.set_input_data("input", input);
1621 network.set_input_data("input2", input2);
1622 auto outputs = network.execute();
1624 EXPECT_EQ(outputs.size(), size_t(1));
1625 EXPECT_EQ(outputs.begin()->first, "eltwise");
1627 auto output = outputs.at("eltwise").get_memory();
1629 float answers[16] = {
1630 1.f, 2.5f, 5.f, 2.5f,
1631 5.f, 7.f, 6.f, 5.2f,
1632 15.f, 17.f, 8.f, 12.f,
1633 6.f, 8.f, 8.f, 8.f };
1635 auto output_ptr = output.pointer<float>();
1637 for (int i = 0; i < 16; i++)
1639 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
1643 TEST(eltwise_gpu_f32, sub_basic_in4x4x4x4) {
1649 // f0: b0: 1 2 b1: 0 0
1650 // f0: b0: 3 4 b1: 0.5 -0.5
1651 // f1: b0: 5 6 b1: 1.5 5.2
1652 // f1: b0: 7 8 b1: 12 8
1655 // f0: b0: 0.5 5 b1: 2.5 7
1656 // f0: b0: 15 6 b1: 17 8
1657 // f1: b0: 0.5 2 b1: -1 2
1658 // f1: b0: 8 -0.5 b1: 8.5 10.5
1661 // f0: b0: 0.5 -3 b1: -2.5 -7
1662 // f0: b0: -12 -2 b1: -16.5 -8.5
1663 // f1: b0: 4.5 4 b1: 2.5 3.2
1664 // f1: b0: -1 8.5 b1: 3.5 -2.5
1667 const auto& engine = get_test_engine();
1668 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
1669 auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
1672 topology.add(input_layout("input", input.get_layout()));
1673 topology.add(input_layout("input2", input2.get_layout()));
1674 topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sub));
1677 1.f, 0.f, 5.f, 1.5f,
1678 2.f, 0.f, 6.f, 5.2f,
1679 3.f, 0.5f, 7.f, 12.f,
1680 4.f, -0.5f, 8.f, 8.f
1683 set_values(input2, {
1684 0.5f, 2.5f, 0.5f, -1.f,
1686 15.f, 17.f, 8.f, 8.5f,
1687 6.f, 8.f, -0.5f, 10.5f });
1689 network network(engine, topology);
1691 network.set_input_data("input", input);
1692 network.set_input_data("input2", input2);
1693 auto outputs = network.execute();
1695 EXPECT_EQ(outputs.size(), size_t(1));
1696 EXPECT_EQ(outputs.begin()->first, "eltwise");
1698 auto output = outputs.at("eltwise").get_memory();
1700 float answers[16] = {
1701 0.5f, -2.5f, 4.5f, 2.5f,
1702 -3.f, -7.f, 4.f, 3.2f,
1703 -12.f, -16.5f, -1.f, 3.5f,
1704 -2.f, -8.5f, 8.5f, -2.5f };
1706 auto output_ptr = output.pointer<float>();
1708 for (int i = 0; i < 16; i++)
1710 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
1714 TEST(eltwise_gpu_int, basic_in4x4x4x4) {
1715 // Same params as in eltwise_gpu_f32, sub_basic_in4x4x4x4 but using int types instead
1717 std::vector<data_types> data_types_to_test = { data_types::i8, data_types::i32, data_types::i64 };
1718 std::vector<eltwise_mode> eltwise_ops_to_test = { eltwise_mode::sum, eltwise_mode::sub, eltwise_mode::div, eltwise_mode::prod, eltwise_mode::min, eltwise_mode::max, eltwise_mode::mod };
1720 for (auto& data_type : data_types_to_test)
1722 for (auto& mode : eltwise_ops_to_test)
1724 const auto& engine = get_test_engine();
1725 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
1726 auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
1729 topology.add(input_layout("input", input.get_layout()));
1730 topology.add(input_layout("input2", input2.get_layout()));
1731 topology.add(reorder("input_reorder", "input", { data_type, format::yxfb,{ 2, 2, 2, 2 } }));
1732 topology.add(reorder("input2_reorder", "input2", { data_type, format::yxfb,{ 2, 2, 2, 2 } }));
1733 topology.add(eltwise("eltwise", { "input_reorder", "input2_reorder" }, mode));
1734 topology.add(reorder("eltwise_reorder", "eltwise", { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }));
1736 std::vector<float> input_1_vec = {
1739 3.f, 0.f, 7.f, 12.f,
1742 set_values(input, input_1_vec);
1744 std::vector<float> input_2_vec = {
1745 0.f, 2.f, 0.f, -1.f,
1747 15.f, 17.f, 8.f, 8.f,
1748 6.f, 8.f, 0.f, 10.f };
1749 set_values(input2, input_2_vec);
1751 network network(engine, topology);
1752 network.set_input_data("input", input);
1753 network.set_input_data("input2", input2);
1754 auto outputs = network.execute();
1756 ASSERT_EQ(outputs.size(), size_t(1));
1757 EXPECT_EQ(outputs.begin()->first, "eltwise_reorder");
1759 auto output = outputs.at("eltwise_reorder").get_memory();
1761 auto output_ptr = output.pointer<float>();
1763 for (int i = 0; i < 16; i++)
1765 float expected = 0.f;
1766 if (mode == eltwise_mode::sum)
1767 expected = input_1_vec[i] + input_2_vec[i];
1768 else if (mode == eltwise_mode::sub)
1769 expected = input_1_vec[i] - input_2_vec[i];
1770 else if (mode == eltwise_mode::prod)
1771 expected = input_1_vec[i] * input_2_vec[i];
1772 else if (mode == eltwise_mode::div)
1773 expected = input_1_vec[i] / input_2_vec[i];
1774 else if (mode == eltwise_mode::min)
1775 expected = std::min(input_1_vec[i], input_2_vec[i]);
1776 else if (mode == eltwise_mode::max)
1777 expected = std::max(input_1_vec[i], input_2_vec[i]);
1778 else if (mode == eltwise_mode::mod) {
1779 expected = std::fmod(input_1_vec[i], input_2_vec[i]);
1783 EXPECT_TRUE(are_equal(std::floor(expected), output_ptr[i]));
1789 TEST(eltwise_gpu_f32_int, basic_in4x4x4x4) {
1790 // Same params as in eltwise_gpu_f32, sub_basic_in4x4x4x4 but using int types for first input.
1792 // Eltwise supports mixed inputs, but only first input can be set as intX.
1794 std::vector<data_types> data_types_to_test = { data_types::i8, data_types::i32, data_types::i64 };
1795 std::vector<eltwise_mode> eltwise_ops_to_test = { eltwise_mode::sum, eltwise_mode::sub, eltwise_mode::div, eltwise_mode::prod, eltwise_mode::min, eltwise_mode::max, eltwise_mode::mod };
1797 for (auto& data_type : data_types_to_test)
1799 for (auto& mode : eltwise_ops_to_test)
1801 const auto& engine = get_test_engine();
1802 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
1803 auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
1806 topology.add(input_layout("input", input.get_layout()));
1807 topology.add(input_layout("input2", input2.get_layout()));
1808 topology.add(reorder("input_reorder", "input", { data_type, format::yxfb,{ 2, 2, 2, 2 } }));
1809 topology.add(eltwise("eltwise", { "input_reorder", "input2" }, mode));
1810 topology.add(reorder("eltwise_reorder", "eltwise", { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }));
1812 std::vector<float> input_1_vec = {
1815 3.f, 0.f, 7.f, 12.f,
1818 set_values(input, input_1_vec);
1820 std::vector<float> input_2_vec = {
1821 0.f, 2.f, 0.f, -1.f,
1823 15.f, 17.f, 8.f, 8.f,
1824 6.f, 8.f, 0.f, 10.f };
1825 set_values(input2, input_2_vec);
1827 network network(engine, topology);
1828 network.set_input_data("input", input);
1829 network.set_input_data("input2", input2);
1830 auto outputs = network.execute();
1832 ASSERT_EQ(outputs.size(), size_t(1));
1833 EXPECT_EQ(outputs.begin()->first, "eltwise_reorder");
1835 auto output = outputs.at("eltwise_reorder").get_memory();
1837 auto output_ptr = output.pointer<float>();
1839 for (int i = 0; i < 16; i++)
1841 float expected = 0.f;
1842 if (mode == eltwise_mode::sum)
1843 expected = input_1_vec[i] + input_2_vec[i];
1844 else if (mode == eltwise_mode::sub)
1845 expected = input_1_vec[i] - input_2_vec[i];
1846 else if (mode == eltwise_mode::prod)
1847 expected = input_1_vec[i] * input_2_vec[i];
1848 else if (mode == eltwise_mode::div)
1849 expected = input_1_vec[i] / input_2_vec[i];
1850 else if (mode == eltwise_mode::min)
1851 expected = std::min(input_1_vec[i], input_2_vec[i]);
1852 else if (mode == eltwise_mode::max)
1853 expected = std::max(input_1_vec[i], input_2_vec[i]);
1854 else if (mode == eltwise_mode::mod)
1855 expected = std::fmod(input_1_vec[i], input_2_vec[i]);
1857 EXPECT_TRUE(are_equal(std::floor(expected), output_ptr[i]));
1863 TEST(eltwise_gpu_f32, prod_basic_in4x4x4x4) {
1869 // f0: b0: 1 2 b1: 0 0
1870 // f0: b0: 3 4 b1: 0.5 -0.5
1871 // f1: b0: 5 6 b1: 1 5.2
1872 // f1: b0: 7 8 b1: 12 7.5
1875 // f0: b0: 0.5 0.5 b1: 5 2
1876 // f0: b0: 2.5 2.5 b1: 7 4
1877 // f1: b0: 15 8 b1: 6 -0.5
1878 // f1: b0: 17 10 b1: 8 -2.5
1881 // f0: b0: 0.5 1 b1: 0 0
1882 // f0: b0: 7.5 10 b1: 3.5 -2
1883 // f1: b0: 75 48 b1: 6 -2.6
1884 // f1: b0: 119 80 b1: 96 -18.75
1888 const auto& engine = get_test_engine();
1889 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
1890 auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
1892 topology.add(input_layout("input", input.get_layout()));
1893 topology.add(input_layout("input2", input2.get_layout()));
1894 topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::prod));
1898 2.f, 0.f, 6.f, 5.2f,
1899 3.f, 0.5f, 7.f, 12.f,
1900 4.f, -0.5f, 8.f, 7.5f
1903 set_values(input2, {
1904 0.5f, 5.f, 15.f, 6.f,
1905 0.5f, 2.f, 8.f, -0.5f,
1906 2.5f, 7.f, 17.f, 8.f,
1907 2.5f, 4.f, 10.f, -2.5f });
1909 network network(engine, topology);
1911 network.set_input_data("input", input);
1912 network.set_input_data("input2", input2);
1913 auto outputs = network.execute();
1915 EXPECT_EQ(outputs.size(), size_t(1));
1916 EXPECT_EQ(outputs.begin()->first, "eltwise");
1918 auto output = outputs.at("eltwise").get_memory();
1920 float answers[16] = {
1921 0.5f, 0.0f, 75.f, 6.0f,
1922 1.0f, 0.0f, 48.f, -2.6f,
1923 7.5f, 3.5f, 119.f, 96.0f,
1924 10.0f, -2.0f, 80.f, -18.75f };
1926 auto output_ptr = output.pointer<float>();
1928 for (int i = 0; i < 16; i++)
1930 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
1934 TEST(eltwise_gpu_f32, max_basic_in4x4x4x4_input_padding) {
1938 // Input Padding: 2x1 (with reorder)
1941 // f0: b0: 1 2 b1: 0 0
1942 // f0: b0: 3 4 b1: 0.5 -0.5
1943 // f1: b0: 5 6 b1: 1.5 5.2
1944 // f1: b0: 7 8 b1: 12 8
1947 // f0: b0: 0.5 5 b1: 2.5 7
1948 // f0: b0: 15 6 b1: 17 8
1949 // f1: b0: 0.5 2 b1: 2.5 4
1950 // f1: b0: 8 -0.5 b1: 10 -2.5
1953 // f0: b0: 1 5 b1: 2.5 7
1954 // f0: b0: 15 6 b1: 17 8
1955 // f1: b0: 5 6 b1: 2.5 5.2
1956 // f1: b0: 8 8 b1: 12 8
1958 const auto& engine = get_test_engine();
1960 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
1961 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
1964 topology.add(input_layout("input", input.get_layout()));
1965 topology.add(input_layout("input2", input2.get_layout()));
1966 topology.add(reorder("reorder", "input", input.get_layout().with_padding({ { 0, 0, 2, 1 }, 0 })));
1967 topology.add(reorder("reorder2", "input2", input.get_layout().with_padding({ { 0, 0, 2, 1 }, 0 })));
1968 topology.add(eltwise("eltwise", {"reorder", "reorder2"}, eltwise_mode::max));
1971 1.f, 0.f, 5.f, 1.5f,
1972 2.f, 0.f, 6.f, 5.2f,
1973 3.f, 0.5f, 7.f, 12.f,
1974 4.f, -0.5f, 8.f, 8.f
1977 set_values(input2, {
1978 0.5f, 2.5f, 0.5f, 2.5f,
1980 15.f, 17.f, 8.f, 10.f,
1981 6.f, 8.f, -0.5f, -2.5f });
1983 network network(engine, topology);
1985 network.set_input_data("input", input);
1986 network.set_input_data("input2", input2);
1987 auto outputs = network.execute();
1989 EXPECT_EQ(outputs.size(), size_t(1));
1990 EXPECT_EQ(outputs.begin()->first, "eltwise");
1992 auto output = outputs.at("eltwise").get_memory();
1994 float answers[16] = {
1995 1.f, 2.5f, 5.f, 2.5f,
1996 5.f, 7.f, 6.f, 5.2f,
1997 15.f, 17.f, 8.f, 12.f,
1998 6.f, 8.f, 8.f, 8.f };
2000 auto output_ptr = output.pointer<float>();
2002 for (int i = 0; i < 16; i++)
2004 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
2008 TEST(eltwise_gpu_f32, add_basic_in4x4x2x2_with_coefficients) {
2014 // f0: b0: 1 2 b1: 0 0
2015 // f0: b0: 3 4 b1: 0.5 -0.5
2016 // f1: b0: 5 6 b1: 1.5 5.2
2017 // f1: b0: 7 8 b1: 12 8
2020 // f0: b0: 0.5 5 b1: 2.5 7
2021 // f0: b0: 15 -2 b1: 17 6.5
2022 // f1: b0: 0.5 2 b1: 2.5 4
2023 // f1: b0: 8 -0.5 b1: 10 -2.5
2026 // f0: b0: 0.75 3.5 b1: 1.25 3.5
2027 // f0: b0: 9 1 b1: 8.75 3
2028 // f1: b0: 2.75 4 b1: 2 4.6
2029 // f1: b0: 7.5 8.25 b1: 11 8.25
2032 const auto& engine = get_test_engine();
2034 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
2035 auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
2038 topology.add(input_layout("input", input.get_layout()));
2039 topology.add(input_layout("input2", input2.get_layout()));
2040 topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum, {0.5f, 0.5f}));
2043 1.f, 0.f, 5.f, 1.5f,
2044 2.f, 0.f, 6.f, 5.2f,
2045 3.f, 0.5f, 7.f, 12.f,
2046 4.f, -0.5f, 8.f, 8.f
2049 set_values(input2, {
2050 0.5f, 2.5f, 0.5f, 2.5f,
2052 15.f, 17.f, 8.f, 10.f,
2053 -2.f, 6.5f, -0.5f, -2.5f });
2055 network network(engine, topology);
2057 network.set_input_data("input", input);
2058 network.set_input_data("input2", input2);
2059 auto outputs = network.execute();
2061 EXPECT_EQ(outputs.size(), size_t(1));
2062 EXPECT_EQ(outputs.begin()->first, "eltwise");
2064 auto output = outputs.at("eltwise").get_memory();
2066 float answers[16] = { 0.75f, 1.25f, 2.75f, 2.f,
2067 3.5f, 3.5f, 4.f, 4.6f,
2068 9.f, 8.75f, 7.5f, 11.f,
2069 1.f, 3.f, 3.75f, 2.75f };
2071 auto output_ptr = output.pointer<float>();
2073 for (int i = 0; i < 16; i++)
2075 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
2079 TEST(eltwise_gpu_f32, coefficients_count_check) {
2080 const auto& engine = get_test_engine();
2082 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
2083 auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
2084 auto input3 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
2087 topology.add(input_layout("input", input.get_layout()));
2088 topology.add(input_layout("input2", input2.get_layout()));
2089 topology.add(input_layout("input3", input3.get_layout()));
2091 std::vector<float> coeffs0 = {};
2092 std::vector<float> coeffs1 = {0.5f};
2093 std::vector<float> coeffs2 = {0.5f, 0.5f};
2094 std::vector<float> coeffs3 = {0.5f, 0.5f, 0.5f};
2096 EXPECT_THROW(topology.add(eltwise("eltwise1", {"input", "input2"}, eltwise_mode::sum, coeffs1)), std::invalid_argument);
2097 EXPECT_THROW(topology.add(eltwise("eltwise2", {"input", "input2"}, eltwise_mode::sum, coeffs3)), std::invalid_argument);
2099 EXPECT_THROW(topology.add(eltwise("eltwise3", {"input", "input2", "input3"}, eltwise_mode::sum, coeffs1)), std::invalid_argument);
2100 EXPECT_THROW(topology.add(eltwise("eltwise4", {"input", "input2", "input3"}, eltwise_mode::sum, coeffs2)), std::invalid_argument);
2102 EXPECT_NO_THROW(topology.add(eltwise("eltwise5", {"input", "input2"}, eltwise_mode::sum, coeffs0)));
2103 EXPECT_NO_THROW(topology.add(eltwise("eltwise6", {"input", "input2"}, eltwise_mode::sum, coeffs2)));
2105 EXPECT_NO_THROW(topology.add(eltwise("eltwise7", {"input", "input2", "input3"}, eltwise_mode::sum, coeffs0)));
2106 EXPECT_NO_THROW(topology.add(eltwise("eltwise8", {"input", "input2", "input3"}, eltwise_mode::sum, coeffs3)));
2109 TEST(eltwise_gpu_f32, add_basic_in4x4x2x2_with_coefficients_3inputs) {
2116 // f0: b0: 1 2 b1: 0 0
2117 // f0: b0: 3 4 b1: 0.5 -0.5
2118 // f1: b0: 5 6 b1: 1.5 5.2
2119 // f1: b0: 7 8 b1: 12 8
2122 // f0: b0: 0.5 5 b1: 2.5 7
2123 // f0: b0: 15 -2 b1: 17 6.5
2124 // f1: b0: 0.5 2 b1: 2.5 4
2125 // f1: b0: 8 -0.5 b1: 10 -2.5
2128 // f0: b0: 8 7 b1: 0 1
2129 // f0: b0: 6 5 b1: 0 1
2130 // f1: b0: 4 3 b1: 0 1
2131 // f1: b0: 2 1 b1: 0 1
2134 // f0: b0: 4.75 7 b1: 1.25 4
2135 // f0: b0: 12 3.5 b1: 8.75 3.5
2136 // f1: b0: 4.75 5.5 b1: 2 5.1
2137 // f1: b0: 8.5 8.75 b1: 11 8.75
2140 const auto& engine = get_test_engine();
2142 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
2143 auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
2144 auto input3 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
2147 topology.add(input_layout("input", input.get_layout()));
2148 topology.add(input_layout("input2", input2.get_layout()));
2149 topology.add(input_layout("input3", input3.get_layout()));
2150 topology.add(eltwise("eltwise", {"input", "input2", "input3"}, eltwise_mode::sum, {0.5f, 0.5f, 0.5f}));
2153 1.f, 0.f, 5.f, 1.5f,
2154 2.f, 0.f, 6.f, 5.2f,
2155 3.f, 0.5f, 7.f, 12.f,
2156 4.f, -0.5f, 8.f, 8.f
2159 set_values(input2, {
2160 0.5f, 2.5f, 0.5f, 2.5f,
2162 15.f, 17.f, 8.f, 10.f,
2163 -2.f, 6.5f, -0.5f, -2.5f });
2165 set_values(input3, {
2169 5.f, 1.f, 1.f, 1.f });
2171 network network(engine, topology);
2173 network.set_input_data("input", input);
2174 network.set_input_data("input2", input2);
2175 network.set_input_data("input3", input3);
2176 auto outputs = network.execute();
2178 EXPECT_EQ(outputs.size(), size_t(1));
2179 EXPECT_EQ(outputs.begin()->first, "eltwise");
2181 auto output = outputs.at("eltwise").get_memory();
2183 float answers[16] = { 4.75f, 1.25f, 4.75f, 2.f,
2184 7.0f, 4.0f, 5.5f, 5.1f,
2185 12.f, 8.75f, 8.5f, 11.f,
2186 3.5f, 3.5f, 4.25f, 3.25f };
2188 auto output_ptr = output.pointer<float>();
2190 for (int i = 0; i < 16; i++)
2192 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
2196 TEST(eltwise_gpu_f32, max_3inputs_in4x4x4x4_input_padding) {
2201 // Input Padding: 2x1 (with reorder)
2204 // f0: b0: 1 2 b1: 0 0
2205 // f0: b0: 3 4 b1: 0.5 -0.5
2206 // f1: b0: 5 6 b1: 1.5 5.2
2207 // f1: b0: 7 8 b1: 12 8
2210 // f0: b0: 0.5 5 b1: 2.5 7
2211 // f0: b0: 15 6 b1: 17 8
2212 // f1: b0: 0.5 2 b1: 2.5 4
2213 // f1: b0: 8 -0.5 b1: 10 -2.5
2216 // f0: b0: 1.1 1 b1: 4 0
2217 // f0: b0: 15 -1 b1: 3 6
2218 // f1: b0: 1.5 2 b1: 2 7
2219 // f1: b0: 9 0.5 b1: 1 8
2222 // f0: b0: 1.1 5 b1: 4 7
2223 // f0: b0: 15 6 b1: 17 8
2224 // f1: b0: 5 6 b1: 2.5 7
2225 // f1: b0: 9 8 b1: 12 8
2227 const auto& engine = get_test_engine();
2229 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
2230 auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
2231 auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
2234 topology.add(input_layout("input", input.get_layout()));
2235 topology.add(input_layout("input2", input2.get_layout()));
2236 topology.add(input_layout("input3", input3.get_layout()));
2237 topology.add(reorder("reorder", "input", input.get_layout().with_padding({ { 0, 0, 2, 1 }, 0 })));
2238 topology.add(reorder("reorder2", "input2", input.get_layout().with_padding({ { 0, 0, 2, 1 }, 0 })));
2239 topology.add(reorder("reorder3", "input3", input.get_layout().with_padding({ { 0, 0, 2, 1 }, 0 })));
2240 topology.add(eltwise("eltwise", {"reorder", "reorder2", "reorder3"}, eltwise_mode::max));
2243 1.f, 0.f, 5.f, 1.5f,
2244 2.f, 0.f, 6.f, 5.2f,
2245 3.f, 0.5f, 7.f, 12.f,
2246 4.f, -0.5f, 8.f, 8.f
2249 set_values(input2, {
2250 0.5f, 2.5f, 0.5f, 2.5f,
2252 15.f, 17.f, 8.f, 10.f,
2253 6.f, 8.f, -0.5f, -2.5f });
2255 set_values(input3, {
2256 1.1f, 4.f, 1.5f, 2.f,
2258 15.f, 3.f, 9.f, 1.f,
2259 -1.f, 6.f, 0.5f, 8.f });
2261 network network(engine, topology);
2263 network.set_input_data("input", input);
2264 network.set_input_data("input2", input2);
2265 network.set_input_data("input3", input3);
2266 auto outputs = network.execute();
2268 EXPECT_EQ(outputs.size(), size_t(1));
2269 EXPECT_EQ(outputs.begin()->first, "eltwise");
2271 auto output = outputs.at("eltwise").get_memory();
2273 float answers[16] = {
2274 1.1f, 4.f, 5.f, 2.5f,
2276 15.f, 17.f, 9.f, 12.f,
2277 6.f, 8.f, 8.f, 8.f };
2279 auto output_ptr = output.pointer<float>();
2281 for (int i = 0; i < 16; i++)
2283 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
2288 TEST(eltwise_gpu_f32, stride_test_2x2) {
2294 // f0: b0: 1 2 b1: 0 0
2295 // f0: b0: 3 4 b1: 0.5 -0.5
2296 // f1: b0: 5 6 b1: 1.5 5.2
2297 // f1: b0: 7 8 b1: 12 8
2300 // f0: b0: 1 2 3 4 b1: 17 18 19 20
2301 // f0: b0: 5 6 7 8 b1: 21 22 23 24
2302 // f0: b0: 9 10 11 12 b1: 25 26 27 28
2303 // f0: b0: 13 14 15 16 b1: 29 30 31 32
2305 // f1: b0: 33 34 35 36 b1: 49 50 51 52
2306 // f1: b0: 37 38 39 40 b1: 53 54 55 56
2307 // f1: b0: 41 42 43 44 b1: 57 58 59 60
2308 // f1: b0: 45 46 47 48 b1: 61 62 63 64
2312 // f0: b0: 1 3 b1: 17 19
2313 // f0: b0: 9 11 b1: 25 27
2314 // f1: b0: 33 35 b1: 49 51
2315 // f1: b0: 41 43 b1: 57 59
2317 const auto& engine = get_test_engine();
2319 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
2320 auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 4, 4 } });
2323 topology.add(input_layout("input", input.get_layout()));
2324 topology.add(input_layout("input2", input2.get_layout()));
2325 topology.add(eltwise("eltwise", "input", "input2", { {0,0,1,1}, {0,0,2,2} }, eltwise_mode::max));
2328 1.f, 0.f, 5.f, 1.5f,
2329 2.f, 0.f, 6.f, 5.2f,
2330 3.f, 0.5f, 7.f, 12.f,
2331 4.f, -0.5f, 8.f, 8.f
2334 set_values<float>(input2, {
2352 network network(engine, topology);
2354 network.set_input_data("input", input);
2355 network.set_input_data("input2", input2);
2356 auto outputs = network.execute();
2358 EXPECT_EQ(outputs.size(), size_t(1));
2359 EXPECT_EQ(outputs.begin()->first, "eltwise");
2361 auto output = outputs.at("eltwise").get_memory();
2363 float answers[16] = {
2369 auto output_ptr = output.pointer<float>();
2371 for (int i = 0; i < 16; i++)
2373 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
2377 TEST(eltwise_gpu_f32, broadcast_test_in4x4x2x2) {
2383 // f0: b0: 1 2 b1: 0 0
2384 // f0: b0: 3 4 b1: 0.5 -0.5
2385 // f1: b0: 5 6 b1: 1.5 5.2
2386 // f1: b0: 7 8 b1: 12 8
2389 // f0: b0: 0.5 b1: 2.5
2390 // f1: b0: 0.5 b1: 2.5
2393 // f0: b0: 1.5 7 b1: 2.5 7
2394 // f0: b0: 18 2 b1: 17.5 6
2395 // f1: b0: 5.5 8 b1: 4 9.2
2396 // f1: b0: 15 16.5 b1: 22 16.5
2399 const auto& engine = get_test_engine();
2401 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
2402 auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 1, 1 } });
2405 topology.add(input_layout("input", input.get_layout()));
2406 topology.add(input_layout("input2", input2.get_layout()));
2407 topology.add(eltwise("eltwise", { "input", "input2" }, eltwise_mode::sum));
2410 1.f, 0.f, 5.f, 1.5f,
2411 2.f, 0.f, 6.f, 5.2f,
2412 3.f, 0.5f, 7.f, 12.f,
2413 4.f, -0.5f, 8.f, 8.f
2416 set_values(input2, {
2417 0.5f, 2.5f, 0.5f, 2.5f
2420 network network(engine, topology);
2422 network.set_input_data("input", input);
2423 network.set_input_data("input2", input2);
2424 auto outputs = network.execute();
2426 EXPECT_EQ(outputs.size(), size_t(1));
2427 EXPECT_EQ(outputs.begin()->first, "eltwise");
2429 auto output = outputs.at("eltwise").get_memory();
2431 float answers[16] = {
2432 1.5f, 2.5f, 5.5f, 4.f,
2433 2.5f, 2.5f, 6.5f, 7.7f,
2434 3.5f, 3.f, 7.5f, 14.5f,
2435 4.5f, 2.f, 8.5f, 10.5f };
2437 auto output_ptr = output.pointer<float>();
2439 for (int i = 0; i < 16; i++)
2441 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
2446 template <typename T>
2447 int8_t eltwise_bool_execute(cldnn::eltwise_mode mode, T x, T y) {
2449 case eltwise_mode::eq:
2451 case eltwise_mode::ne:
2453 case eltwise_mode::lt:
2455 case eltwise_mode::le:
2457 case eltwise_mode::gt:
2459 case eltwise_mode::ge:
2461 case eltwise_mode::logic_and:
2463 case eltwise_mode::logic_or:
2470 template <typename T>
2471 VVVVF<int8_t> eltwise_bool_reference(VVVVF<T> &input1, VVVVF<T> &input2,
2472 cldnn::eltwise_mode mode, int input_padding_y = 0,
2473 int input_padding_x = 0, int output_padding_y = 0,
2474 int output_padding_x = 0) {
2476 size_t padding_y = input_padding_y + output_padding_y;
2477 size_t padding_x = input_padding_x + output_padding_x;
2478 size_t output_b = input1.size();
2479 size_t output_f = input1[0].size();
2480 size_t output_y = input1[0][0].size() + 2 * padding_y;
2481 size_t output_x = input1[0][0][0].size() + 2 * padding_x;
2482 VVVVF<int8_t> output(output_b, VVVF<int8_t>(output_f, VVF<int8_t>(output_y, VF<int8_t>(output_x))));
2485 for (size_t b = 0; b < output_b; ++b) {
2486 for (size_t f = 0; f < output_f; ++f) {
2487 for (size_t y = 0; y < input1[0][0].size(); ++y) {
2488 for (size_t x = 0; x < input1[0][0][0].size(); ++x) {
2489 res = eltwise_bool_execute<T>(mode, input1[b][f][y][x], input2[b][f][y][x]);
2490 output[b][f][y + padding_y][x + padding_x] = res;
2498 template <typename T>
2499 void generic_eltwise_bool_test(cldnn::format test_input_fmt, int input_b, int input_f, int input_y, int input_x, cldnn::eltwise_mode mode,
2500 int input_padding_y, int input_padding_x, int output_padding_y, int output_padding_x) {
2502 int min_random = -2, max_random = 2;
2503 VVVVF<T> input1_rnd = generate_random_4d<T>(input_b, input_f, input_y, input_x, min_random, max_random);
2504 VVVVF<T> input2_rnd = generate_random_4d<T>(input_b, input_f, input_y, input_x, min_random, max_random);
2505 VF<T> input1_rnd_vec = flatten_4d<T>(test_input_fmt, input1_rnd);
2506 VF<T> input2_rnd_vec = flatten_4d<T>(test_input_fmt, input2_rnd);
2508 const auto& engine = get_test_engine();
2509 tensor input_tensor( input_b, input_f, input_x, input_y );
2510 auto input1 = memory::allocate(engine, { type_to_data_type<T>::value, test_input_fmt, input_tensor });
2511 auto input2 = memory::allocate(engine, { type_to_data_type<T>::value, test_input_fmt, input_tensor });
2512 set_values(input1, input1_rnd_vec);
2513 set_values(input2, input2_rnd_vec);
2516 topology.add(input_layout("input1", input1.get_layout()));
2517 topology.add(input_layout("input2", input2.get_layout()));
2518 topology.add(reorder("reorder1", "input1", input1.get_layout().with_padding({{ 0, 0, input_padding_x, input_padding_y }, 0 })));
2519 topology.add(eltwise("eltwise", {"reorder1", "input2"}, mode, false, 0.f, { { 0, 0, output_padding_x, output_padding_y }, 0 }));
2521 network network(engine, topology);
2522 network.set_input_data("input1", input1);
2523 network.set_input_data("input2", input2);
2524 auto outputs = network.execute();
2525 EXPECT_EQ(outputs.size(), size_t(1));
2526 EXPECT_EQ(outputs.begin()->first, "eltwise");
2528 auto output_memory = outputs.at("eltwise").get_memory();
2529 auto output_layout = output_memory.get_layout();
2530 auto output_ptr = output_memory.pointer<int8_t>();
2532 VVVVF<int8_t> output_cpu = eltwise_bool_reference<T>(input1_rnd, input2_rnd, mode, input_padding_y, input_padding_x, output_padding_y, output_padding_x);
2533 EXPECT_EQ(output_layout.format.value, test_input_fmt.value);
2534 tensor output_tensor = output_layout.get_buffer_size();
2535 int y_size = output_tensor.spatial[1];
2536 int x_size = output_tensor.spatial[0];
2537 int f_size = output_tensor.feature[0];
2538 int b_size = output_tensor.batch[0];
2539 EXPECT_EQ(y_size, (int)output_cpu[0][0].size());
2540 EXPECT_EQ(x_size, (int)output_cpu[0][0][0].size());
2541 EXPECT_EQ(f_size, (int)output_cpu[0].size());
2542 EXPECT_EQ(b_size, (int)output_cpu.size());
2544 bool test_is_correct = true;
2545 VF<int8_t> output_cpu_vec = flatten_4d<int8_t>(test_input_fmt, output_cpu);
2546 for (size_t i = 0; i < output_cpu_vec.size(); ++i) {
2547 if (output_cpu_vec[i] != output_ptr[i]) {
2548 test_is_correct = false;
2552 EXPECT_EQ(test_is_correct, true) << std::endl
2553 << "failing test parameters:" << std::endl
2554 << "input_b = " << input_b << std::endl
2555 << "input_f = " << input_f << std::endl
2556 << "input_y = " << input_y << std::endl
2557 << "input_x = " << input_x << std::endl
2558 << "eltwise_mode = " << (int)mode << std::endl
2559 << "input_padding_y = " << input_padding_y << std::endl
2560 << "input_padding_x = " << input_padding_x << std::endl
2561 << "output_padding_y = " << output_padding_y << std::endl
2562 << "output_padding_x = " << output_padding_x << std::endl
2563 << "type = " << (sizeof(T) == 1 ? "int8" : "int32") << std::endl;
2566 void run_eltwise_bool_generic_test(cldnn::eltwise_mode mode)
2568 cldnn::format test_inputs_fmt = cldnn::format::bfyx;
2569 std::pair<int, int> input_size = { 227, 227 };
2571 generic_eltwise_bool_test<int32_t>(test_inputs_fmt, 1, 1, input_size.first, input_size.second, mode, 0, 0, 0, 0);
2572 generic_eltwise_bool_test<int8_t>(test_inputs_fmt, 1, 1, input_size.first, input_size.second, mode, 0, 0, 0, 0);
2575 TEST(eltwise_gpu_bool, eltwise_eq) {
2576 run_eltwise_bool_generic_test(cldnn::eltwise_mode::eq);
2579 TEST(eltwise_gpu_bool, eltwise_ne) {
2580 run_eltwise_bool_generic_test(cldnn::eltwise_mode::ne);
2583 TEST(eltwise_gpu_bool, eltwise_lt) {
2584 run_eltwise_bool_generic_test(cldnn::eltwise_mode::lt);
2587 TEST(eltwise_gpu_bool, eltwise_le) {
2588 run_eltwise_bool_generic_test(cldnn::eltwise_mode::le);
2591 TEST(eltwise_gpu_bool, eltwise_gt) {
2592 run_eltwise_bool_generic_test(cldnn::eltwise_mode::gt);
2595 TEST(eltwise_gpu_bool, eltwise_ge) {
2596 run_eltwise_bool_generic_test(cldnn::eltwise_mode::ge);
2599 TEST(eltwise_gpu_bool, eltwise_and) {
2600 run_eltwise_bool_generic_test(cldnn::eltwise_mode::logic_and);
2603 TEST(eltwise_gpu_bool, eltwise_or) {
2604 run_eltwise_bool_generic_test(cldnn::eltwise_mode::logic_or);
2608 void run_eltwise_generic_test(cldnn::eltwise_mode mode)
2610 cldnn::format test_inputs_fmt = cldnn::format::bfyx;
2611 std::pair<int, int> input_size = { 227, 227 };
2613 const auto& engine = get_test_engine();
2614 bool f16_supported = !!engine.get_info().supports_fp16;
2615 if (!f16_supported) {
2616 std::cout << "[ SKIPPED ] float16 combinations are skipped (cl_khr_fp16 is not supported)." << std::endl;
2619 generic_eltwise_test<float>(test_inputs_fmt, 1, 1, input_size.first, input_size.second, mode, false, 0.f, 0, 0, 0, 0);
2621 generic_eltwise_test<FLOAT16>(test_inputs_fmt, 1, 1, input_size.first, input_size.second, mode, false, (FLOAT16)0.f, 0, 0, 0, 0);
2624 TEST(eltwise_gpu, eltwise_div) {
2625 run_eltwise_generic_test(cldnn::eltwise_mode::div);
2628 TEST(eltwise_gpu, eltwise_min) {
2629 run_eltwise_generic_test(cldnn::eltwise_mode::min);
2632 TEST(eltwise_gpu, eltwise_pow) {
2633 run_eltwise_generic_test(cldnn::eltwise_mode::pow);
2636 TEST(eltwise_gpu, eltwise_mod) {
2637 run_eltwise_generic_test(cldnn::eltwise_mode::mod);
2641 TEST(eltwise_gpu, b_fs_yx_fsv4_w_callib) {
2642 int B_array[] = { 1, 4, 16, 32, 0 }; // Batch
2643 int F_array[] = { 256, 512, 1024, 2048, 0 }; // Features
2644 int I_array[] = { 56, 28, 14, 14, 0 }; // Input MxM data sizes
2646 for (int j = 0; F_array[j]; j++) {
2647 const auto& engine = get_test_engine();
2649 int in_B = B_array[j];
2650 int in_F = F_array[j];
2652 int in_X = I_array[j],
2656 std::vector<char> Data(in_B * in_F * in_X * in_Y);
2657 for (size_t i = 0; i < Data.size(); i++)
2658 Data[i] = static_cast<char>(i);
2659 std::vector<char> DataGold(Data);
2661 // Expected "gold" output and IMAD output.
2662 std::vector<char> vGoldOutput;
2663 std::vector<char> vTestOutput;
2665 // Mem initialization
2666 // This is user data, no kernels here
2667 auto input1 = memory::allocate(engine,
2670 { in_B, in_F, in_X, in_Y } });
2671 std::vector<char> data_i1(DataGold);
2672 set_values(input1, std::move(data_i1));
2673 auto input2 = memory::allocate(engine,
2676 { in_B, in_F, in_X, in_Y } });
2677 std::vector<char> data_i2(DataGold);
2678 set_values(input2, std::move(data_i2));
2680 auto callib = memory::allocate(engine,
2683 { 1, in_F, 1, 1 } });
2684 std::vector<float> data_c(in_F);
2686 for (size_t i = 0; i < data_c.size(); i++) {
2687 data_c[i] = ((i + 1) % 7) ? sign : -sign;
2688 sign *= (float)1.0123;
2690 set_values(callib, std::move(data_c));
2696 auto eltw = eltwise("eltw_GOLD",
2699 eltwise_mode::sum, true);
2701 // Create a topology
2702 topology.add(input_layout("input1", input1.get_layout()),
2703 input_layout("input2", input2.get_layout()),
2706 topology.add(data("callib", callib));
2708 // Network processing
2709 network network(engine, topology);
2710 network.set_input_data("input1", input1);
2711 network.set_input_data("input2", input2);
2712 auto outputs = network.execute();
2715 auto searchC = outputs.find("eltw_GOLD");
2716 EXPECT_NE(searchC, outputs.end());
2717 auto output = outputs.begin()->second.get_memory();
2718 auto output_ptr = output.pointer<char>();
2719 vGoldOutput.reserve(output_ptr.size());
2720 for (size_t i = 0; i < output_ptr.size(); i++)
2721 vGoldOutput.push_back(output_ptr[i]);
2728 // Reorder (a-ka swizzelling) input to MMAD/IMAD Pooling format
2729 topology.add(reorder("reorder1_Swizzelled",
2731 layout(data_types::i8,
2732 format::b_fs_yx_fsv4,
2733 { in_B, in_F, in_X, in_Y })),
2734 reorder("reorder2_Swizzelled",
2736 layout(data_types::i8,
2737 format::b_fs_yx_fsv4,
2738 { in_B, in_F, in_X, in_Y })));
2740 auto eltw = eltwise("eltw_IMAD",
2741 "reorder1_Swizzelled", "reorder2_Swizzelled",
2743 eltwise_mode::sum, true);
2745 topology.add(input_layout("input1", input1.get_layout()),
2746 input_layout("input2", input2.get_layout()),
2749 topology.add(data("callib", callib));
2751 // Back reordering (a-ka unswizzelling) output from MMAD/IMAD pooling
2752 topology.add(reorder("reorder_UnSwizzelled",
2754 layout(data_types::i8,
2756 { in_B, in_F, in_X, in_Y })));
2758 // Network processing
2759 network network(engine, topology);
2760 network.set_input_data("input1", input1);
2761 network.set_input_data("input2", input2);
2762 auto outputs = network.execute();
2765 auto searchC = outputs.find("reorder_UnSwizzelled");
2766 EXPECT_NE(searchC, outputs.end());
2767 auto output = outputs.begin()->second.get_memory();
2768 auto output_ptr = output.pointer<char>();
2769 vTestOutput.reserve(output_ptr.size());
2770 for (size_t i = 0; i < output_ptr.size(); i++)
2771 vTestOutput.push_back(output_ptr[i]);
2774 // Result validation
2775 ASSERT_TRUE(vGoldOutput.size() == vTestOutput.size());
2776 for (size_t i = 0; i < vGoldOutput.size(); i++)
2777 ASSERT_TRUE(vTestOutput[i] == vGoldOutput[i]);
2778 } // for (int j = 0; F_array[j]; j++)
2781 TEST(eltwise_gpu, b_fs_yx_fsv4_wo_callib) {
2784 const int BATCH = 1;
2785 const int in_B = BATCH;
2787 const auto& engine = get_test_engine();
2795 std::vector<char> Data(in_B * in_F * in_X * in_Y);
2796 for (size_t i = 0; i < Data.size(); i++)
2797 Data[i] = static_cast<char>(i);
2798 std::vector<char> DataGold(Data);
2800 // Mem initialization
2801 // This is user data, no kernels here
2802 auto input1 = memory::allocate(engine,
2805 { in_B, in_F, in_X, in_Y } });
2806 std::vector<char> data_i1(DataGold);
2807 for (size_t i = 0; i < data_i1.size(); i++) data_i1[i] = data_i1[i] + 1;
2808 set_values(input1, std::move(data_i1));
2810 auto input2 = memory::allocate(engine,
2813 { in_B, in_F, in_X, in_Y } });
2814 std::vector<char> data_i2(DataGold);
2815 for (size_t i = 0; i < data_i2.size(); i++) data_i2[i] = data_i2[i] + 2;
2816 set_values(input2, std::move(data_i2));
2818 auto input3 = memory::allocate(engine,
2821 { in_B, in_F, in_X, in_Y } });
2822 std::vector<char> data_i3(DataGold);
2823 for (size_t i = 0; i < data_i3.size(); i++) data_i3[i] = data_i3[i] + 3;
2824 set_values(input3, std::move(data_i3));
2826 cldnn::eltwise_mode mode[] = { cldnn::eltwise_mode::min,
2827 cldnn::eltwise_mode::max,
2828 cldnn::eltwise_mode::sum };
2830 for (int i = 0; i < 3; i++) {
2831 // Expected "gold" output and IMAD output.
2832 std::vector<char> vGoldOutput;
2833 std::vector<char> vTestOutput;
2839 auto eltw = eltwise("eltw_GOLD",
2840 { "input1", "input2", "input3" },
2843 // Create a topology
2844 topology.add(input_layout("input1", input1.get_layout()),
2845 input_layout("input2", input2.get_layout()),
2846 input_layout("input3", input3.get_layout()),
2849 // Network processing
2850 network network(engine, topology);
2851 network.set_input_data("input1", input1);
2852 network.set_input_data("input2", input2);
2853 network.set_input_data("input3", input3);
2854 auto outputs = network.execute();
2857 auto searchC = outputs.find("eltw_GOLD");
2858 EXPECT_NE(searchC, outputs.end());
2859 auto output = outputs.begin()->second.get_memory();
2860 auto output_ptr = output.pointer<char>();
2861 vGoldOutput.reserve(output_ptr.size());
2862 for (size_t i = 0; i < output_ptr.size(); i++)
2863 vGoldOutput.push_back(output_ptr[i]);
2870 // Reorder (a-ka swizzelling) input to MMAD/IMAD Pooling format
2871 topology.add(reorder("reorder1_Swizzelled",
2873 layout(data_types::i8,
2874 format::b_fs_yx_fsv4,
2875 { in_B, in_F, in_X, in_Y })),
2876 reorder("reorder2_Swizzelled",
2878 layout(data_types::i8,
2879 format::b_fs_yx_fsv4,
2880 { in_B, in_F, in_X, in_Y })),
2881 reorder("reorder3_Swizzelled",
2883 layout(data_types::i8,
2884 format::b_fs_yx_fsv4,
2885 { in_B, in_F, in_X, in_Y })));
2887 auto eltw = eltwise("eltw_IMAD",
2888 { "reorder1_Swizzelled",
2889 "reorder2_Swizzelled",
2890 "reorder3_Swizzelled" },
2893 topology.add(input_layout("input1", input1.get_layout()),
2894 input_layout("input2", input2.get_layout()),
2895 input_layout("input3", input3.get_layout()),
2898 // Back reordering (a-ka unswizzelling) output from MMAD/IMAD pooling
2899 topology.add(reorder("reorder_UnSwizzelled",
2901 layout(data_types::i8,
2903 { in_B, in_F, in_X, in_Y })));
2905 // Network processing
2906 network network(engine, topology);
2907 network.set_input_data("input1", input1);
2908 network.set_input_data("input2", input2);
2909 network.set_input_data("input3", input3);
2910 auto outputs = network.execute();
2913 auto searchC = outputs.find("reorder_UnSwizzelled");
2914 EXPECT_NE(searchC, outputs.end());
2915 auto output = outputs.begin()->second.get_memory();
2916 auto output_ptr = output.pointer<char>();
2917 vTestOutput.reserve(output_ptr.size());
2918 for (size_t i = 0; i < output_ptr.size(); i++)
2919 vTestOutput.push_back(output_ptr[i]);
2922 // Result validation
2923 ASSERT_TRUE(vGoldOutput.size() == vTestOutput.size());
2924 for (size_t i = 0; i < vGoldOutput.size(); i++)
2925 ASSERT_TRUE(vTestOutput[i] == vGoldOutput[i]);
2929 TEST(DISABLED_eltwise_gpu, generic_random) {
2930 VF<cldnn::format> test_inputs_fmts = { cldnn::format::bfyx, cldnn::format::yxfb };
2931 VF<cldnn::eltwise_mode> modes = { cldnn::eltwise_mode::sum, cldnn::eltwise_mode::sub, cldnn::eltwise_mode::max, cldnn::eltwise_mode::prod };
2932 VF<bool> relu = { true, false };
2933 VF<float> slopes = { 0.0f, -0.0f, -17.19f, 1028.8f, std::numeric_limits<float>::infinity(), -std::numeric_limits<float>::infinity() };
2934 std::vector<std::pair<int, int>> input_sizes = { { 100, 100 },{ 227, 227 },{ 400, 600 } };
2936 const auto& engine = get_test_engine();
2937 bool f16_supported = !!engine.get_info().supports_fp16;
2938 if (!f16_supported) {
2939 std::cout << "[ SKIPPED ] float16 combinations are skipped (cl_khr_fp16 is not supported)." << std::endl;
2942 for (cldnn::format test_input_fmt : test_inputs_fmts) {
2943 for (int input_b = 1; input_b <= 16; input_b *= 2) {
2944 for (int input_f = 1; input_f <= 1; ++input_f) {
2945 for (std::pair<int, int> &input_yx : input_sizes) {
2946 for (cldnn::eltwise_mode mode : modes) {
2947 for (bool relu_activated : relu) {
2948 for (float slope : slopes) {
2949 for (int input_padding_y = 0; input_padding_y <= 0; ++input_padding_y) {
2950 for (int input_padding_x = 0; input_padding_x <= 0; ++input_padding_x) {
2951 for (int output_padding_y = 0; output_padding_y <= 1; ++output_padding_y) {
2952 for (int output_padding_x = 0; output_padding_x <= 1; ++output_padding_x) {
2953 generic_eltwise_test<float>(test_input_fmt, input_b, input_f, input_yx.first, input_yx.second, mode, relu_activated, slope, input_padding_y, input_padding_x, output_padding_y, output_padding_x);
2954 if (!f16_supported) continue;
2955 generic_eltwise_test<FLOAT16>(test_input_fmt, input_b, input_f, input_yx.first, input_yx.second, mode, relu_activated, (FLOAT16)slope, input_padding_y, input_padding_x, output_padding_y, output_padding_x);