Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / tests / test_cases / eltwise_gpu_test.cpp
index 750aaa5..417ab07 100644 (file)
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -23,6 +23,7 @@
 #include <api/CPP/network.hpp>
 #include <api/CPP/engine.hpp>
 #include <api/CPP/reorder.hpp>
+#include <api/CPP/data.hpp>
 #include "test_utils/test_utils.h"
 
 namespace cldnn
@@ -97,7 +98,7 @@ void generic_eltwise_test(cldnn::format test_input_fmt, int input_b, int input_f
     VF<T> input1_rnd_vec = flatten_4d<T>(test_input_fmt, input1_rnd);
     VF<T> input2_rnd_vec = flatten_4d<T>(test_input_fmt, input2_rnd);
 
-    engine engine;
+    const auto& engine = get_test_engine();
     tensor input_tensor( input_b, input_f, input_x, input_y );
     auto input1 = memory::allocate(engine, { type_to_data_type<T>::value, test_input_fmt, input_tensor });
     auto input2 = memory::allocate(engine, { type_to_data_type<T>::value, test_input_fmt, input_tensor });
@@ -157,707 +158,2283 @@ void generic_eltwise_test(cldnn::format test_input_fmt, int input_b, int input_f
         << "type = " << (sizeof(T) == 2 ? "float16" : "float32") << std::endl;
 }
 
-TEST(eltwise_gpu_f32, add_basic_in4x4x2x2) {
-    //  Input2   : 2x2x2
+TEST(eltwise_gpu_f32, equal_in2_float_out1_int) {
+    //  Input2 : 2x2x2x2
     //  Input  : 2x2x2x2
     //  Output : 2x2x2x2
 
     //  Input:
-    //  f0: b0:  1    2  b1:   0    0       
-    //  f0: b0:  3    4  b1:   0.5 -0.5     
-    //  f1: b0:  5    6  b1:   1.5  5.2     
-    //  f1: b0:  7    8  b1:   12   8       
+    //  1.f,   2.5f,    5.f,    1.5f,
+    //  2.f,   0.f,     6.f,    5.2f,
+    //  3.f,   0.5f,    7.f,    12.f,
+    //  4.f,   0.f,     8.f,    8.f
     //
     //  Input2
-    //  f0: b0: 0.5  5   b1: 2.5  7 
-    //  f0: b0: 15  -2   b1: 17   6.5
-    //  f1: b0: 0.5  2   b1: 2.5  4
-    //  f1: b0: 8   -0.5 b1: 10   -2.5
+    //  0.5f,  2.5f,    0.5f,   1.5f,
+    //  5.f,   7.f,     6.f,    4.f,
+    //  15.f,  17.f,    8.f,    10.f,
+    //  -2.f,  0.f,     -0.5f,  -2.5f
     //
     //  Output:
-    //  f0: b0:   1.5  7    b1:  2.5   7      
-    //  f0: b0:   18   2    b1:  17.5  6     
-    //  f1: b0:   5.5  8    b1:   4    9.2     
-    //  f1: b0:   15  16.5  b1:  22    16.5     
-    //
+    //  0, 1, 0, 1,
+    //  0, 0, 1, 0,
+    //  0, 0, 0, 0,
+    //  0, 1, 0, 0
 
-    engine engine;
-
-    auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
-    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
+    const auto& engine = get_test_engine();
 
-    topology topology;
-    topology.add(input_layout("input", input.get_layout()));
-    topology.add(input_layout("input2", input2.get_layout()));
-    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum));
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
 
-    set_values(input, {
-        1.f,   0.f, 5.f, 1.5f,
-        2.f,   0.f, 6.f, 5.2f,
-        3.f,  0.5f, 7.f, 12.f,
-        4.f, -0.5f, 8.f,  8.f
+    set_values(input1, {
+            1.f,   2.5f, 5.f, 1.5f,
+            2.f,   0.f, 6.f, 5.2f,
+            3.f,  0.5f, 7.f, 12.f,
+            4.f, 0.f, 8.f,  8.f
     });
 
     set_values(input2, {
-        0.5f,   2.5f,  0.5f,  2.5f,
-         5.f,   7.f,    2.f,   4.f,
-        15.f,  17.f,    8.f,  10.f,
-        -2.f,  6.5f,  -0.5f, -2.5f });
+            0.5f,   2.5f,  0.5f,  1.5f,
+            5.f,   7.f,    6.f,   4.f,
+            15.f,  17.f,    8.f,  10.f,
+            -2.f,  0.f,  -0.5f, -2.5f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::eq));
 
     network network(engine, topology);
 
-    network.set_input_data("input", input);
+    network.set_input_data("input", input1);
     network.set_input_data("input2", input2);
+
     auto outputs = network.execute();
 
     EXPECT_EQ(outputs.size(), size_t(1));
     EXPECT_EQ(outputs.begin()->first, "eltwise");
 
     auto output = outputs.at("eltwise").get_memory();
+    auto output_ptr = output.pointer<int8_t>();
 
-    float answers[16] = { 1.5f, 2.5f,   5.5f,    4.f,
-                          7.f,   7.f,    8.f,   9.2f,
-                          18.f,17.5f,   15.f,   22.f,
-                          2.f,   6.f,   7.5f,  5.5f };
-
-    auto output_ptr = output.pointer<float>();
+    std::vector<int8_t> answers = { 0, 1, 0, 1,
+                                    0, 0, 1, 0,
+                                    0, 0, 0, 0,
+                                    0, 1, 0, 0 };
 
-    for (int i = 0; i < 16; i++)
-    {
-        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]);
     }
 }
 
-TEST(eltwise_gpu_f32, max_basic_in4x4x4x4) {
-    //  Input2   : 2x2x2
+TEST(eltwise_gpu_f32, not_equal_in2_float_out1_int) {
+    //  Input2 : 2x2x2x2
     //  Input  : 2x2x2x2
     //  Output : 2x2x2x2
 
     //  Input:
-    //  f0: b0:  1    2  b1:   0    0       
-    //  f0: b0:  3    4  b1:   0.5 -0.5     
-    //  f1: b0:  5    6  b1:   1.5  5.2     
-    //  f1: b0:  7    8  b1:   12   8       
+    //  1.f,   2.5f,    5.f,    1.5f,
+    //  2.f,   0.f,     6.f,    5.2f,
+    //  3.f,   0.5f,    7.f,    12.f,
+    //  4.f,   0.f,     8.f,    8.f
     //
     //  Input2
-    //  f0: b0: 0.5  5   b1: 2.5  7 
-    //  f0: b0: 15   6   b1: 17   8
-    //  f1: b0: 0.5  2   b1: 2.5  4
-    //  f1: b0: 8   -0.5 b1: 10   -2.5
+    //  0.5f,  2.5f,    0.5f,   1.5f,
+    //  5.f,   7.f,     6.f,    4.f,
+    //  15.f,  17.f,    8.f,    10.f,
+    //  -2.f,  0.f,     -0.5f,  -2.5f
     //
     //  Output:
-    //  f0: b0:    1   5    b1:  2.5   7       
-    //  f0: b0:   15   6    b1:  17    8    
-    //  f1: b0:    5   6    b1:  2.5   5.2     
-    //  f1: b0:    8   8    b1:  12    8     
-    //
-    engine engine;
+    //  1, 0, 1, 0,
+    //  1, 1, 0, 1,
+    //  1, 1, 1, 1,
+    //  1, 0, 1, 1
 
-    auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
-    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
+    const auto& engine = get_test_engine();
 
-    topology topology;
-    topology.add(input_layout("input", input.get_layout()));
-    topology.add(input_layout("input2", input2.get_layout()));
-    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::max));
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
 
-    set_values(input, {
-        1.f,   0.f,  5.f,  1.5f,
-        2.f,   0.f,  6.f,  5.2f,
-        3.f,   0.5f, 7.f, 12.f,
-        4.f,  -0.5f, 8.f,  8.f
+    set_values(input1, {
+            1.f,   2.5f, 5.f, 1.5f,
+            2.f,   0.f, 6.f, 5.2f,
+            3.f,  0.5f, 7.f, 12.f,
+            4.f, 0.f, 8.f,  8.f
     });
 
     set_values(input2, {
-        0.5f,  2.5f,  0.5f,  2.5f, 
-         5.f,   7.f,   2.f,   4.f,
-        15.f,  17.f,   8.f,  10.f,
-         6.f,   8.f, -0.5f, -2.5f });
+            0.5f,   2.5f,  0.5f,  1.5f,
+            5.f,   7.f,    6.f,   4.f,
+            15.f,  17.f,    8.f,  10.f,
+            -2.f,  0.f,  -0.5f, -2.5f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::ne));
 
     network network(engine, topology);
 
-    network.set_input_data("input", input);
+    network.set_input_data("input", input1);
     network.set_input_data("input2", input2);
+
     auto outputs = network.execute();
 
     EXPECT_EQ(outputs.size(), size_t(1));
     EXPECT_EQ(outputs.begin()->first, "eltwise");
 
     auto output = outputs.at("eltwise").get_memory();
+    auto output_ptr = output.pointer<int8_t>();
 
-    float answers[16] = {
-         1.f,   2.5f,  5.f,   2.5f,
-         5.f,   7.f,   6.f,   5.2f,
-        15.f,  17.f,   8.f,  12.f,
-         6.f,   8.f,   8.f,   8.f };
-
-    auto output_ptr = output.pointer<float>();
+    std::vector<int8_t> answers = { 1, 0, 1, 0,
+                                    1, 1, 0, 1,
+                                    1, 1, 1, 1,
+                                    1, 0, 1, 1 };
 
-    for (int i = 0; i < 16; i++)
-    {
-        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]);
     }
 }
 
-TEST(eltwise_gpu_f32, sub_basic_in4x4x4x4) {
-    //  Input2   : 2x2x2
+TEST(eltwise_gpu_f32, less_in2_float_out1_int) {
+    //  Input2 : 2x2x2x2
     //  Input  : 2x2x2x2
     //  Output : 2x2x2x2
 
     //  Input:
-    //  f0: b0:  1    2  b1:   0    0       
-    //  f0: b0:  3    4  b1:   0.5 -0.5     
-    //  f1: b0:  5    6  b1:   1.5  5.2     
-    //  f1: b0:  7    8  b1:   12   8       
+    //  1.f,   2.5f,    5.f,    1.5f,
+    //  2.f,   0.f,     6.f,    5.2f,
+    //  3.f,   0.5f,    7.f,    12.f,
+    //  4.f,   0.f,     8.f,    8.f
     //
     //  Input2
-    //  f0: b0: 0.5  5   b1: 2.5  7 
-    //  f0: b0: 15   6   b1: 17   8
-    //  f1: b0: 0.5  2   b1: -1   2
-    //  f1: b0: 8   -0.5 b1: 8.5  10.5
+    //  0.5f,  2.5f,    0.5f,   1.5f,
+    //  5.f,   7.f,     6.f,    4.f,
+    //  15.f,  17.f,    8.f,    10.f,
+    //  -2.f,  0.f,     -0.5f,  -2.5f
     //
     //  Output:
-    //  f0: b0:   0.5  -3    b1:  -2.5  -7       
-    //  f0: b0:   -12  -2    b1:  -16.5 -8.5    
-    //  f1: b0:   4.5   4    b1:  2.5    3.2     
-    //  f1: b0:   -1    8.5  b1:  3.5   -2.5     
-    //
+    //  0, 0, 0, 0,
+    //  1, 1, 0, 0,
+    //  1, 1, 1, 0,
+    //  0, 0, 0, 0
 
-    engine engine;
-    auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
-    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
+    const auto& engine = get_test_engine();
 
-    topology topology;
-    topology.add(input_layout("input", input.get_layout()));
-    topology.add(input_layout("input2", input2.get_layout()));
-    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sub));
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
 
-    set_values(input, {
-        1.f,   0.f,  5.f,  1.5f,
-        2.f,   0.f,  6.f,  5.2f,
-        3.f,   0.5f, 7.f,  12.f,
-        4.f,  -0.5f, 8.f,   8.f
+    set_values(input1, {
+            1.f,   2.5f, 5.f, 1.5f,
+            2.f,   0.f, 6.f, 5.2f,
+            3.f,  0.5f, 7.f, 12.f,
+            4.f, 0.f, 8.f,  8.f
     });
 
     set_values(input2, {
-        0.5f,  2.5f,  0.5f, -1.f,
-        5.f,   7.f,   2.f,   2.f,
-       15.f,  17.f,   8.f,   8.5f,
-        6.f,   8.f, -0.5f,  10.5f });
+            0.5f,   2.5f,  0.5f,  1.5f,
+            5.f,   7.f,    6.f,   4.f,
+            15.f,  17.f,    8.f,  10.f,
+            -2.f,  0.f,  -0.5f, -2.5f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::lt));
 
     network network(engine, topology);
 
-    network.set_input_data("input", input);
+    network.set_input_data("input", input1);
     network.set_input_data("input2", input2);
+
     auto outputs = network.execute();
 
     EXPECT_EQ(outputs.size(), size_t(1));
     EXPECT_EQ(outputs.begin()->first, "eltwise");
 
     auto output = outputs.at("eltwise").get_memory();
+    auto output_ptr = output.pointer<int8_t>();
 
-    float answers[16] = {
-         0.5f,  -2.5f,   4.5f,   2.5f,
-        -3.f,   -7.f,    4.f,    3.2f,
-       -12.f,  -16.5f,  -1.f,    3.5f,
-        -2.f,   -8.5f,   8.5f,  -2.5f };
-
-    auto output_ptr = output.pointer<float>();
+    std::vector<int8_t> answers = { 0, 0, 0, 0,
+                                    1, 1, 0, 0,
+                                    1, 1, 1, 0,
+                                    0, 0, 0, 0 };
 
-    for (int i = 0; i < 16; i++)
-    {
-        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]);
     }
 }
 
-TEST(eltwise_gpu_int, basic_in4x4x4x4) {
-    //  Same params as in eltwise_gpu_f32, sub_basic_in4x4x4x4 but using int types instead
+TEST(eltwise_gpu_f32, less_equal_in2_float_out1_int) {
+    //  Input2 : 2x2x2x2
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
 
-    std::vector<data_types> data_types_to_test = { data_types::i8, data_types::i32, data_types::i64 };
-    std::vector<eltwise_mode> eltwise_ops_to_test = { eltwise_mode::sum, eltwise_mode::sub, eltwise_mode::div, eltwise_mode::prod };
+    //  Input:
+    //  1.f,   2.5f,    5.f,    1.5f,
+    //  2.f,   0.f,     6.f,    5.2f,
+    //  3.f,   0.5f,    7.f,    12.f,
+    //  4.f,   0.f,     8.f,    8.f
+    //
+    //  Input2
+    //  0.5f,  2.5f,    0.5f,   1.5f,
+    //  5.f,   7.f,     6.f,    4.f,
+    //  15.f,  17.f,    8.f,    10.f,
+    //  -2.f,  0.f,     -0.5f,  -2.5f
+    //
+    //  Output:
+    //  0, 1, 0, 1,
+    //  1, 1, 1, 0,
+    //  1, 1, 1, 0,
+    //  0, 1, 0, 0
 
-    for (auto& data_type : data_types_to_test)
-    {
-        for (auto& mode : eltwise_ops_to_test)
-        {
-            engine engine;
-            auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
-            auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    const auto& engine = get_test_engine();
 
-            topology topology;
-            topology.add(input_layout("input", input.get_layout()));
-            topology.add(input_layout("input2", input2.get_layout()));
-            topology.add(reorder("input_reorder", "input", { data_type, format::yxfb,{ 2, 2, 2, 2 } }));
-            topology.add(reorder("input2_reorder", "input2", { data_type, format::yxfb,{ 2, 2, 2, 2 } }));
-            topology.add(eltwise("eltwise", { "input_reorder", "input2_reorder" }, mode));
-            topology.add(reorder("eltwise_reorder", "eltwise", { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }));
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
 
-            std::vector<float> input_1_vec = {
-                1.f,   0.f,  5.f,  1.f,
-                2.f,   0.f,  6.f,  5.f,
-                3.f,   0.f, 7.f,  12.f,
-                4.f,   0.f, 8.f,   8.f
-            };
-            set_values(input, input_1_vec);
+    set_values(input1, {
+            1.f,   2.5f, 5.f, 1.5f,
+            2.f,   0.f, 6.f, 5.2f,
+            3.f,  0.5f, 7.f, 12.f,
+            4.f, 0.f, 8.f,  8.f
+    });
 
-            std::vector<float> input_2_vec = {
-                0.f,  2.f,  0.f, -1.f,
-                5.f,   7.f,   2.f,   2.f,
-                15.f,  17.f,   8.f,   8.f,
-                6.f,   8.f, 0.f,  10.f };
-            set_values(input2, input_2_vec);
+    set_values(input2, {
+            0.5f,   2.5f,  0.5f,  1.5f,
+            5.f,   7.f,    6.f,   4.f,
+            15.f,  17.f,    8.f,  10.f,
+            -2.f,  0.f,  -0.5f, -2.5f
+    });
 
-            network network(engine, topology);
-            network.set_input_data("input", input);
-            network.set_input_data("input2", input2);
-            auto outputs = network.execute();
+    topology topology;
+    topology.add(input_layout("input", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::le));
 
-            ASSERT_EQ(outputs.size(), size_t(1));
-            EXPECT_EQ(outputs.begin()->first, "eltwise_reorder");
+    network network(engine, topology);
 
-            auto output = outputs.at("eltwise_reorder").get_memory();
+    network.set_input_data("input", input1);
+    network.set_input_data("input2", input2);
 
-            auto output_ptr = output.pointer<float>();
+    auto outputs = network.execute();
 
-            for (int i = 0; i < 16; i++)
-            {
-                float expected = 0.f;
-                if (mode == eltwise_mode::sum)
-                    expected = input_1_vec[i] + input_2_vec[i];
-                else if (mode == eltwise_mode::sub)
-                    expected = input_1_vec[i] - input_2_vec[i];
-                else if (mode == eltwise_mode::prod)
-                    expected = input_1_vec[i] * input_2_vec[i];
-                else if (mode == eltwise_mode::div)
-                    expected = input_1_vec[i] / input_2_vec[i];
-                else if (mode == eltwise_mode::min)
-                    expected = std::min(input_1_vec[i], input_2_vec[i]);
-                else if (mode == eltwise_mode::max)
-                    expected = std::max(input_1_vec[i], input_2_vec[i]);
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
 
-                EXPECT_TRUE(are_equal(std::floor(expected), output_ptr[i]));
-            }
-        }
+    auto output = outputs.at("eltwise").get_memory();
+    auto output_ptr = output.pointer<int8_t>();
+
+    std::vector<int8_t> answers = { 0, 1, 0, 1,
+                                    1, 1, 1, 0,
+                                    1, 1, 1, 0,
+                                    0, 1, 0, 0 };
+
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]);
     }
 }
 
-TEST(eltwise_gpu_f32, prod_basic_in4x4x4x4) {
-    //  Input2   : 2x2x2
+TEST(eltwise_gpu_f32, greater_in2_float_out1_int) {
+    //  Input2 : 2x2x2x2
     //  Input  : 2x2x2x2
     //  Output : 2x2x2x2
 
     //  Input:
-    //  f0: b0:  1    2  b1:   0    0       
-    //  f0: b0:  3    4  b1:   0.5 -0.5     
-    //  f1: b0:  5    6  b1:   1    5.2     
-    //  f1: b0:  7    8  b1:   12   7.5       
+    //  1.f,   2.5f,    5.f,    1.5f,
+    //  2.f,   0.f,     6.f,    5.2f,
+    //  3.f,   0.5f,    7.f,    12.f,
+    //  4.f,   0.f,     8.f,    8.f
     //
     //  Input2
-    //  f0: b0: 0.5  0.5   b1: 5  2 
-    //  f0: b0: 2.5  2.5   b1: 7  4
-    //  f1: b0: 15   8     b1: 6  -0.5
-    //  f1: b0: 17   10    b1: 8  -2.5
+    //  0.5f,  2.5f,    0.5f,   1.5f,
+    //  5.f,   7.f,     6.f,    4.f,
+    //  15.f,  17.f,    8.f,    10.f,
+    //  -2.f,  0.f,     -0.5f,  -2.5f
     //
     //  Output:
-    //  f0: b0:   0.5  1     b1:  0      0       
-    //  f0: b0:   7.5  10    b1:  3.5   -2     
-    //  f1: b0:   75   48    b1:  6     -2.6     
-    //  f1: b0:   119  80    b1:  96   -18.75     
-    //
+    //  1, 0, 1, 0,
+    //  0, 0, 0, 1,
+    //  0, 0, 0, 1,
+    //  1, 0, 1, 1
 
+    const auto& engine = get_test_engine();
 
-    engine engine;
-    auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
-    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
-    topology topology;
-    topology.add(input_layout("input", input.get_layout()));
-    topology.add(input_layout("input2", input2.get_layout()));
-    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::prod));
-    
-    set_values(input, {
-        1.f,   0.f,  5.f,  1.f,
-        2.f,   0.f,  6.f,  5.2f,
-        3.f,   0.5f, 7.f, 12.f,
-        4.f,  -0.5f, 8.f,  7.5f
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+
+    set_values(input1, {
+            1.f,   2.5f, 5.f, 1.5f,
+            2.f,   0.f, 6.f, 5.2f,
+            3.f,  0.5f, 7.f, 12.f,
+            4.f, 0.f, 8.f,  8.f
     });
 
     set_values(input2, {
-        0.5f,   5.f,  15.f,    6.f,
-        0.5f,   2.f,   8.f,   -0.5f,
-        2.5f,   7.f,  17.f,    8.f,
-        2.5f,   4.f,  10.f,   -2.5f });
+            0.5f,   2.5f,  0.5f,  1.5f,
+            5.f,   7.f,    6.f,   4.f,
+            15.f,  17.f,    8.f,  10.f,
+            -2.f,  0.f,  -0.5f, -2.5f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::gt));
 
     network network(engine, topology);
 
-    network.set_input_data("input", input);
+    network.set_input_data("input", input1);
     network.set_input_data("input2", input2);
+
     auto outputs = network.execute();
 
     EXPECT_EQ(outputs.size(), size_t(1));
     EXPECT_EQ(outputs.begin()->first, "eltwise");
 
     auto output = outputs.at("eltwise").get_memory();
+    auto output_ptr = output.pointer<int8_t>();
 
-    float answers[16] = {
-        0.5f,   0.0f,    75.f,    6.0f,
-        1.0f,   0.0f,    48.f,   -2.6f,
-        7.5f,   3.5f,   119.f,   96.0f,
-       10.0f,  -2.0f,    80.f, -18.75f };
-    
-    auto output_ptr = output.pointer<float>();
+    std::vector<int8_t> answers = { 1, 0, 1, 0,
+                                    0, 0, 0, 1,
+                                    0, 0, 0, 1,
+                                    1, 0, 1, 1 };
 
-    for (int i = 0; i < 16; i++)
-    {
-        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]);
     }
 }
 
-TEST(eltwise_gpu_f32, max_basic_in4x4x4x4_input_padding) {
-    //  Input2   : 2x2x2
+TEST(eltwise_gpu_f32, greater_equal_in2_float_out1_int) {
+    //  Input2 : 2x2x2x2
     //  Input  : 2x2x2x2
     //  Output : 2x2x2x2
-    //  Input Padding: 2x1 (with reorder)
 
     //  Input:
-    //  f0: b0:  1    2  b1:   0    0       
-    //  f0: b0:  3    4  b1:   0.5 -0.5     
-    //  f1: b0:  5    6  b1:   1.5  5.2     
-    //  f1: b0:  7    8  b1:   12   8       
+    //  1.f,   2.5f,    5.f,    1.5f,
+    //  2.f,   0.f,     6.f,    5.2f,
+    //  3.f,   0.5f,    7.f,    12.f,
+    //  4.f,   0.f,     8.f,    8.f
     //
     //  Input2
-    //  f0: b0: 0.5  5   b1: 2.5  7 
-    //  f0: b0: 15   6   b1: 17   8
-    //  f1: b0: 0.5  2   b1: 2.5  4
-    //  f1: b0: 8   -0.5 b1: 10   -2.5
+    //  0.5f,  2.5f,    0.5f,   1.5f,
+    //  5.f,   7.f,     6.f,    4.f,
+    //  15.f,  17.f,    8.f,    10.f,
+    //  -2.f,  0.f,     -0.5f,  -2.5f
     //
     //  Output:
-    //  f0: b0:    1   5    b1:  2.5   7       
-    //  f0: b0:   15   6    b1:  17    8    
-    //  f1: b0:    5   6    b1:  2.5   5.2     
-    //  f1: b0:    8   8    b1:  12    8     
-    //
-    engine engine;
+    //  1, 1, 1, 1,
+    //  0, 0, 1, 1,
+    //  0, 0, 0, 1,
+    //  1, 1, 1, 1
 
-    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
-    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
+    const auto& engine = get_test_engine();
 
-    topology topology;
-    topology.add(input_layout("input", input.get_layout()));
-    topology.add(input_layout("input2", input2.get_layout()));
-    topology.add(reorder("reorder", "input", input.get_layout().with_padding({ { 0, 0, 2, 1 }, 0 })));
-    topology.add(reorder("reorder2", "input2", input.get_layout().with_padding({ { 0, 0, 2, 1 }, 0 })));
-    topology.add(eltwise("eltwise", {"reorder", "reorder2"}, eltwise_mode::max));
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
 
-    set_values(input, {
-        1.f,   0.f,  5.f,  1.5f,
-        2.f,   0.f,  6.f,  5.2f,
-        3.f,   0.5f, 7.f, 12.f,
-        4.f,  -0.5f, 8.f,  8.f
+    set_values(input1, {
+            1.f,   2.5f, 5.f, 1.5f,
+            2.f,   0.f, 6.f, 5.2f,
+            3.f,  0.5f, 7.f, 12.f,
+            4.f, 0.f, 8.f,  8.f
     });
 
     set_values(input2, {
-        0.5f,  2.5f,  0.5f,  2.5f,
-        5.f,   7.f,   2.f,   4.f,
-        15.f,  17.f,   8.f,  10.f,
-        6.f,   8.f, -0.5f, -2.5f });
+            0.5f,   2.5f,  0.5f,  1.5f,
+            5.f,   7.f,    6.f,   4.f,
+            15.f,  17.f,    8.f,  10.f,
+            -2.f,  0.f,  -0.5f, -2.5f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::ge));
 
     network network(engine, topology);
 
-    network.set_input_data("input", input);
+    network.set_input_data("input", input1);
     network.set_input_data("input2", input2);
+
     auto outputs = network.execute();
 
     EXPECT_EQ(outputs.size(), size_t(1));
     EXPECT_EQ(outputs.begin()->first, "eltwise");
 
     auto output = outputs.at("eltwise").get_memory();
+    auto output_ptr = output.pointer<int8_t>();
 
-    float answers[16] = {
-        1.f,   2.5f,  5.f,   2.5f,
-        5.f,   7.f,   6.f,   5.2f,
-        15.f,  17.f,   8.f,  12.f,
-        6.f,   8.f,   8.f,   8.f };
-
-    auto output_ptr = output.pointer<float>();
+    std::vector<int8_t> answers = { 1, 1, 1, 1,
+                                    0, 0, 1, 1,
+                                    0, 0, 0, 1,
+                                    1, 1, 1, 1 };
 
-    for (int i = 0; i < 16; i++)
-    {
-        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]);
     }
 }
 
-TEST(eltwise_gpu_f32, add_basic_in4x4x2x2_with_coefficients) {
-    //  Input2   : 2x2x2
+TEST(eltwise_gpu_f32, logicalAND_in2_float_out1_int) {
+    //  Input2 : 2x2x2x2
     //  Input  : 2x2x2x2
     //  Output : 2x2x2x2
 
     //  Input:
-    //  f0: b0:  1    2  b1:   0    0
-    //  f0: b0:  3    4  b1:   0.5 -0.5
-    //  f1: b0:  5    6  b1:   1.5  5.2
-    //  f1: b0:  7    8  b1:   12   8
+    //  1.f,   2.5f,    5.f,    1.5f,
+    //  2.f,   0.f,     6.f,    5.2f,
+    //  3.f,   0.5f,    7.f,    12.f,
+    //  4.f,   0.f,     8.f,    8.f
     //
     //  Input2
-    //  f0: b0: 0.5  5   b1: 2.5  7
-    //  f0: b0: 15  -2   b1: 17   6.5
-    //  f1: b0: 0.5  2   b1: 2.5  4
-    //  f1: b0: 8   -0.5 b1: 10   -2.5
+    //  0.5f,  2.5f,    0.5f,   1.5f,
+    //  5.f,   7.f,     6.f,    4.f,
+    //  15.f,  17.f,    8.f,    10.f,
+    //  -2.f,  0.f,     -0.5f,  -2.5f
     //
     //  Output:
-    //  f0: b0:   0.75  3.5  b1:  1.25  3.5
-    //  f0: b0:   9     1    b1:  8.75  3
-    //  f1: b0:   2.75  4    b1:   2    4.6
-    //  f1: b0:   7.5  8.25  b1:  11    8.25
-    //
+    //  1, 1, 1, 1,
+    //  1, 0, 1, 1,
+    //  1, 1, 1, 1,
+    //  1, 0, 1, 1
 
-    engine engine;
-
-    auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
-    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
+    const auto& engine = get_test_engine();
 
-    topology topology;
-    topology.add(input_layout("input", input.get_layout()));
-    topology.add(input_layout("input2", input2.get_layout()));
-    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum, {0.5f, 0.5f}));
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
 
-    set_values(input, {
-            1.f,   0.f, 5.f, 1.5f,
+    set_values(input1, {
+            1.f,   2.5f, 5.f, 1.5f,
             2.f,   0.f, 6.f, 5.2f,
             3.f,  0.5f, 7.f, 12.f,
-            4.f, -0.5f, 8.f,  8.f
+            4.f, 0.f, 8.f,  8.f
     });
 
     set_values(input2, {
-            0.5f,   2.5f,  0.5f,  2.5f,
-            5.f,   7.f,    2.f,   4.f,
+            0.5f,   2.5f,  0.5f,  1.5f,
+            5.f,   7.f,    6.f,   4.f,
             15.f,  17.f,    8.f,  10.f,
-            -2.f,  6.5f,  -0.5f, -2.5f });
+            -2.f,  0.f,  -0.5f, -2.5f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::logic_and));
 
     network network(engine, topology);
 
-    network.set_input_data("input", input);
+    network.set_input_data("input", input1);
     network.set_input_data("input2", input2);
+
     auto outputs = network.execute();
 
     EXPECT_EQ(outputs.size(), size_t(1));
     EXPECT_EQ(outputs.begin()->first, "eltwise");
 
     auto output = outputs.at("eltwise").get_memory();
+    auto output_ptr = output.pointer<int8_t>();
 
-    float answers[16] = { 0.75f, 1.25f, 2.75f,  2.f,
-                          3.5f,  3.5f,   4.f, 4.6f,
-                          9.f, 8.75f,  7.5f, 11.f,
-                          1.f,   3.f, 3.75f, 2.75f };
+    std::vector<int8_t> answers = { 1, 1, 1, 1,
+                                    1, 0, 1, 1,
+                                    1, 1, 1, 1,
+                                    1, 0, 1, 1 };
 
-    auto output_ptr = output.pointer<float>();
-
-    for (int i = 0; i < 16; i++)
-    {
-        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]);
     }
 }
 
-TEST(eltwise_gpu_f32, coefficients_count_check) {
-    engine engine;
+TEST(eltwise_gpu_f32, logicalAND_in3_float_out1_int) {
+    //  Input2 : 2x2x2x2
+    //  Input3 : 2x2x2x2
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
 
-    auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
-    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
-    auto input3 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
+    //  Input:
+    //  1.f,   2.5f,    5.f,    1.5f,
+    //  2.f,   0.f,     6.f,    5.2f,
+    //  3.f,   0.5f,    7.f,    12.f,
+    //  4.f,   0.f,     8.f,    8.f
+    //
+    //  Input2
+    //  0.5f,  2.5f,    0.5f,   1.5f,
+    //  5.f,   7.f,     6.f,    4.f,
+    //  15.f,  17.f,    8.f,    10.f,
+    //  -2.f,  0.f,     -0.5f,  -2.5f
+    //
+    //  Input3
+    //  0.f, 0.f, 0.f, 0.f,
+    //  0.f, 0.f, 0.f, 0.f,
+    //  1.f, 1.f, 1.f, 1.f,
+    //  1.f, 1.f, 1.f, 1.f
+    //
+    //  Output:
+    //  0, 0, 0, 0,
+    //  0, 0, 0, 0,
+    //  1, 1, 1, 1,
+    //  1, 0, 1, 1
+
+    const auto& engine = get_test_engine();
+
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+
+    set_values(input1, {
+            1.f,   2.5f, 5.f, 1.5f,
+            2.f,   0.f, 6.f, 5.2f,
+            3.f,  0.5f, 7.f, 12.f,
+            4.f, 0.f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+            0.5f,   2.5f,  0.5f,  1.5f,
+            5.f,   7.f,    6.f,   4.f,
+            15.f,  17.f,    8.f,  10.f,
+            -2.f,  0.f,  -0.5f, -2.5f
+    });
+
+    set_values(input3, {
+            0.f, 0.f, 0.f, 0.f,
+            0.f, 0.f, 0.f, 0.f,
+            1.f, 1.f, 1.f, 1.f,
+            1.f, 1.f, 1.f, 1.f
+    });
 
     topology topology;
-    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input", input1.get_layout()));
     topology.add(input_layout("input2", input2.get_layout()));
-    topology.add(input_layout("input3", input3.get_layout()));
+    topology.add(input_layout("input3", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2", "input3"}, eltwise_mode::logic_and));
 
-    std::vector<float> coeffs0 = {};
-    std::vector<float> coeffs1 = {0.5f};
-    std::vector<float> coeffs2 = {0.5f, 0.5f};
-    std::vector<float> coeffs3 = {0.5f, 0.5f, 0.5f};
+    network network(engine, topology);
 
-    EXPECT_THROW(topology.add(eltwise("eltwise1", {"input", "input2"}, eltwise_mode::sum, coeffs1)), std::invalid_argument);
-    EXPECT_THROW(topology.add(eltwise("eltwise2", {"input", "input2"}, eltwise_mode::sum, coeffs3)), std::invalid_argument);
+    network.set_input_data("input", input1);
+    network.set_input_data("input2", input2);
+    network.set_input_data("input3", input3);
 
-    EXPECT_THROW(topology.add(eltwise("eltwise3", {"input", "input2", "input3"}, eltwise_mode::sum, coeffs1)), std::invalid_argument);
-    EXPECT_THROW(topology.add(eltwise("eltwise4", {"input", "input2", "input3"}, eltwise_mode::sum, coeffs2)), std::invalid_argument);
+    auto outputs = network.execute();
 
-    EXPECT_NO_THROW(topology.add(eltwise("eltwise5", {"input", "input2"}, eltwise_mode::sum, coeffs0)));
-    EXPECT_NO_THROW(topology.add(eltwise("eltwise6", {"input", "input2"}, eltwise_mode::sum, coeffs2)));
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
 
-    EXPECT_NO_THROW(topology.add(eltwise("eltwise7", {"input", "input2", "input3"}, eltwise_mode::sum, coeffs0)));
-    EXPECT_NO_THROW(topology.add(eltwise("eltwise8", {"input", "input2", "input3"}, eltwise_mode::sum, coeffs3)));
+    auto output = outputs.at("eltwise").get_memory();
+    auto output_ptr = output.pointer<int8_t>();
+
+    std::vector<int8_t> answers = { 0, 0, 0, 0,
+                                    0, 0, 0, 0,
+                                    1, 1, 1, 1,
+                                    1, 0, 1, 1 };
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
 }
 
-TEST(eltwise_gpu_f32, add_basic_in4x4x2x2_with_coefficients_3inputs) {
-    //  Input3   : 2x2x2
-    //  Input2   : 2x2x2
+TEST(eltwise_gpu_f32, logicalOR_in2_float_out1_int) {
+    //  Input2 : 2x2x2x2
     //  Input  : 2x2x2x2
     //  Output : 2x2x2x2
 
     //  Input:
-    //  f0: b0:  1    2  b1:   0    0
-    //  f0: b0:  3    4  b1:   0.5 -0.5
-    //  f1: b0:  5    6  b1:   1.5  5.2
-    //  f1: b0:  7    8  b1:   12   8
+    //  1.f,   2.5f,    5.f,    1.5f,
+    //  2.f,   0.f,     6.f,    5.2f,
+    //  3.f,   0.5f,    7.f,    12.f,
+    //  4.f,   0.f,     8.f,    8.f
     //
     //  Input2
-    //  f0: b0: 0.5  5   b1: 2.5  7
-    //  f0: b0: 15  -2   b1: 17   6.5
-    //  f1: b0: 0.5  2   b1: 2.5  4
-    //  f1: b0: 8   -0.5 b1: 10   -2.5
-    //
-    //  Input3
-    //  f0: b0: 8  7 b1: 0 1
-    //  f0: b0: 6  5 b1: 0 1
-    //  f1: b0: 4  3 b1: 0 1
-    //  f1: b0: 2  1 b1: 0 1
+    //  0.5f,  2.5f,    0.5f,   1.5f,
+    //  5.f,   7.f,     6.f,    4.f,
+    //  15.f,  17.f,    8.f,    10.f,
+    //  -2.f,  0.f,     -0.5f,  -2.5f
     //
     //  Output:
-    //  f0: b0:   4.75  7      b1:  1.25  4
-    //  f0: b0:   12    3.5    b1:  8.75  3.5
-    //  f1: b0:   4.75  5.5    b1:   2    5.1
-    //  f1: b0:   8.5   8.75   b1:  11    8.75
-    //
-
-    engine engine;
+    //  1, 1, 1, 1,
+    //  1, 1, 1, 1,
+    //  1, 1, 1, 1,
+    //  1, 0, 1, 1
 
-    auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
-    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
-    auto input3 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
+    const auto& engine = get_test_engine();
 
-    topology topology;
-    topology.add(input_layout("input", input.get_layout()));
-    topology.add(input_layout("input2", input2.get_layout()));
-    topology.add(input_layout("input3", input3.get_layout()));
-    topology.add(eltwise("eltwise", {"input", "input2", "input3"}, eltwise_mode::sum, {0.5f, 0.5f, 0.5f}));
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
 
-    set_values(input, {
-            1.f,   0.f, 5.f, 1.5f,
+    set_values(input1, {
+            1.f,   2.5f, 5.f, 1.5f,
             2.f,   0.f, 6.f, 5.2f,
             3.f,  0.5f, 7.f, 12.f,
-            4.f, -0.5f, 8.f,  8.f
+            4.f, 0.f, 8.f,  8.f
     });
 
     set_values(input2, {
-            0.5f,   2.5f,  0.5f,  2.5f,
-            5.f,   7.f,    2.f,   4.f,
+            0.5f,   2.5f,  0.5f,  1.5f,
+            5.f,   7.f,    6.f,   4.f,
             15.f,  17.f,    8.f,  10.f,
-            -2.f,  6.5f,  -0.5f, -2.5f });
+            -2.f,  0.f,  -0.5f, -2.5f
+    });
 
-    set_values(input3, {
-            8.f,  0.f,  4.f, 0.f,
-            7.f,  1.f,  3.f, 1.f,
-            6.f,  0.f,  2.f, 0.f,
-            5.f,  1.f,  1.f, 1.f });
+    topology topology;
+    topology.add(input_layout("input", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::logic_or));
 
     network network(engine, topology);
 
-    network.set_input_data("input", input);
+    network.set_input_data("input", input1);
     network.set_input_data("input2", input2);
-    network.set_input_data("input3", input3);
+
     auto outputs = network.execute();
 
     EXPECT_EQ(outputs.size(), size_t(1));
     EXPECT_EQ(outputs.begin()->first, "eltwise");
 
     auto output = outputs.at("eltwise").get_memory();
+    auto output_ptr = output.pointer<int8_t>();
 
-    float answers[16] = { 4.75f, 1.25f, 4.75f,  2.f,
-                          7.0f,  4.0f,  5.5f, 5.1f,
-                          12.f, 8.75f,  8.5f, 11.f,
-                          3.5f,  3.5f, 4.25f, 3.25f };
-
-    auto output_ptr = output.pointer<float>();
+    std::vector<int8_t> answers = { 1, 1, 1, 1,
+                                    1, 1, 1, 1,
+                                    1, 1, 1, 1,
+                                    1, 0, 1, 1 };
 
-    for (int i = 0; i < 16; i++)
-    {
-        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]);
     }
 }
 
-TEST(eltwise_gpu_f32, max_3inputs_in4x4x4x4_input_padding) {
-    //  Input  : 2x2x2x2
-    //  Input2 : 2x2x2x2
+TEST(eltwise_gpu_f32, logicalOR_in3_float_out1_int) {
     //  Input3 : 2x2x2x2
+    //  Input2 : 2x2x2x2
+    //  Input  : 2x2x2x2
     //  Output : 2x2x2x2
-    //  Input Padding: 2x1 (with reorder)
 
     //  Input:
-    //  f0: b0:  1    2  b1:   0    0
-    //  f0: b0:  3    4  b1:   0.5 -0.5
-    //  f1: b0:  5    6  b1:   1.5  5.2
-    //  f1: b0:  7    8  b1:   12   8
+    //  1.f,   2.5f,    5.f,    1.5f,
+    //  2.f,   0.f,     6.f,    5.2f,
+    //  3.f,   0.5f,    7.f,    12.f,
+    //  4.f,   0.f,     8.f,    8.f
     //
     //  Input2
-    //  f0: b0: 0.5  5   b1: 2.5  7
-    //  f0: b0: 15   6   b1: 17   8
-    //  f1: b0: 0.5  2   b1: 2.5  4
-    //  f1: b0: 8   -0.5 b1: 10   -2.5
+    //  0.5f,  2.5f,    0.5f,   1.5f,
+    //  5.f,   7.f,     6.f,    4.f,
+    //  15.f,  17.f,    8.f,    10.f,
+    //  -2.f,  0.f,     -0.5f,  -2.5f
     //
     //  Input3
-    //  f0: b0: 1.1  1   b1: 4  0
-    //  f0: b0: 15  -1   b1: 3  6
-    //  f1: b0: 1.5  2   b1: 2  7
-    //  f1: b0: 9   0.5  b1: 1  8
+    //  0.f, 1.f, 1.f, 1.f,
+    //  0.f, 1.f, 1.f, 0.f,
+    //  1.f, 1.f, 1.f, 1.f,
+    //  1.f, 1.f, 1.f, 1.f
     //
     //  Output:
-    //  f0: b0:   1.1  5    b1:  4   7
-    //  f0: b0:   15   6    b1:  17    8
-    //  f1: b0:    5   6    b1:  2.5   7
-    //  f1: b0:    9   8    b1:  12    8
-    //
-    engine engine;
+    //  1, 1, 1, 1,
+    //  1, 1, 1, 1,
+    //  1, 1, 1, 1,
+    //  1, 1, 1, 1
 
-    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
-    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
-    auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
+    const auto& engine = get_test_engine();
 
-    topology topology;
-    topology.add(input_layout("input", input.get_layout()));
-    topology.add(input_layout("input2", input2.get_layout()));
-    topology.add(input_layout("input3", input3.get_layout()));
-    topology.add(reorder("reorder", "input", input.get_layout().with_padding({ { 0, 0, 2, 1 }, 0 })));
-    topology.add(reorder("reorder2", "input2", input.get_layout().with_padding({ { 0, 0, 2, 1 }, 0 })));
-    topology.add(reorder("reorder3", "input3", input.get_layout().with_padding({ { 0, 0, 2, 1 }, 0 })));
-    topology.add(eltwise("eltwise", {"reorder", "reorder2", "reorder3"}, eltwise_mode::max));
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
 
-    set_values(input, {
-         1.f,  0.f,  5.f,  1.5f,
-         2.f,  0.f,  6.f,  5.2f,
-         3.f,  0.5f, 7.f,  12.f,
-         4.f, -0.5f, 8.f,   8.f
+    set_values(input1, {
+            1.f,   2.5f, 5.f, 1.5f,
+            2.f,   0.f, 6.f, 5.2f,
+            3.f,  0.5f, 7.f, 12.f,
+            4.f, 0.f, 8.f,  8.f
     });
 
     set_values(input2, {
-        0.5f, 2.5f,  0.5f,  2.5f,
-         5.f,  7.f,   2.f,   4.f,
-        15.f, 17.f,   8.f,  10.f,
-         6.f,  8.f, -0.5f, -2.5f });
+            0.5f,   2.5f,  0.5f,  1.5f,
+            5.f,   7.f,    6.f,   4.f,
+            15.f,  17.f,    8.f,  10.f,
+            -2.f,  0.f,  -0.5f, -2.5f
+    });
 
     set_values(input3, {
-        1.1f,  4.f, 1.5f, 2.f,
-         1.f,  0.f,  2.f, 7.f,
-        15.f,  3.f,  9.f, 1.f,
-        -1.f,  6.f, 0.5f, 8.f });
+            0.f, 1.f, 1.f, 1.f,
+            0.f, 1.f, 1.f, 0.f,
+            1.f, 1.f, 1.f, 1.f,
+            1.f, 1.f, 1.f, 1.f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("input3", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2", "input3"}, eltwise_mode::logic_or));
 
     network network(engine, topology);
 
-    network.set_input_data("input", input);
+    network.set_input_data("input", input1);
     network.set_input_data("input2", input2);
     network.set_input_data("input3", input3);
+
     auto outputs = network.execute();
 
     EXPECT_EQ(outputs.size(), size_t(1));
     EXPECT_EQ(outputs.begin()->first, "eltwise");
 
     auto output = outputs.at("eltwise").get_memory();
+    auto output_ptr = output.pointer<int8_t>();
 
-    float answers[16] = {
-        1.1f,   4.f,  5.f,   2.5f,
-        5.f,   7.f,   6.f,   7.f,
-        15.f,  17.f,   9.f,  12.f,
-        6.f,   8.f,   8.f,   8.f };
-
-    auto output_ptr = output.pointer<float>();
+    std::vector<int8_t> answers = { 1, 1, 1, 1,
+                                    1, 1, 1, 1,
+                                    1, 1, 1, 1,
+                                    1, 1, 1, 1 };
+
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
+}
+
+TEST(eltwise_gpu_f32, logicalXOR_in2_float_out1_int) {
+    //  Input2 : 2x2x2x2
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
+
+    //  Input:
+    //  1.f,   2.5f,    5.f,    1.5f,
+    //  2.f,   0.f,     6.f,    5.2f,
+    //  3.f,   0.5f,    7.f,    12.f,
+    //  4.f,   0.f,     8.f,    8.f
+    //
+    //  Input2
+    //  0.5f,  2.5f,    0.5f,   1.5f,
+    //  5.f,   7.f,     6.f,    4.f,
+    //  15.f,  17.f,    8.f,    10.f,
+    //  -2.f,  0.f,     -0.5f,  -2.5f
+    //
+    //  Output:
+    //  0, 0, 0, 0,
+    //  0, 1, 0, 0,
+    //  0, 0, 0, 0,
+    //  0, 0, 0, 0
+
+    const auto& engine = get_test_engine();
+
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+
+    set_values(input1, {
+            1.f,   2.5f, 5.f, 1.5f,
+            2.f,   0.f, 6.f, 5.2f,
+            3.f,  0.5f, 7.f, 12.f,
+            4.f, 0.f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+            0.5f,   2.5f,  0.5f,  1.5f,
+            5.f,   7.f,    6.f,   4.f,
+            15.f,  17.f,    8.f,  10.f,
+            -2.f,  0.f,  -0.5f, -2.5f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::logic_xor));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input1);
+    network.set_input_data("input2", input2);
+
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+    auto output_ptr = output.pointer<int8_t>();
+
+    std::vector<int8_t> answers = { 0, 0, 0, 0,
+                                    0, 1, 0, 0,
+                                    0, 0, 0, 0,
+                                    0, 0, 0, 0 };
+
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
+}
+
+TEST(eltwise_gpu_f32, add_basic_in4x4x2x2) {
+    //  Input2   : 2x2x2
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
+
+    //  Input:
+    //  f0: b0:  1    2  b1:   0    0
+    //  f0: b0:  3    4  b1:   0.5 -0.5
+    //  f1: b0:  5    6  b1:   1.5  5.2
+    //  f1: b0:  7    8  b1:   12   8
+    //
+    //  Input2
+    //  f0: b0: 0.5  5   b1: 2.5  7
+    //  f0: b0: 15  -2   b1: 17   6.5
+    //  f1: b0: 0.5  2   b1: 2.5  4
+    //  f1: b0: 8   -0.5 b1: 10   -2.5
+    //
+    //  Output:
+    //  f0: b0:   1.5  7    b1:  2.5   7
+    //  f0: b0:   18   2    b1:  17.5  6
+    //  f1: b0:   5.5  8    b1:   4    9.2
+    //  f1: b0:   15  16.5  b1:  22    16.5
+    //
+
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum));
+
+    set_values(input, {
+        1.f,   0.f, 5.f, 1.5f,
+        2.f,   0.f, 6.f, 5.2f,
+        3.f,  0.5f, 7.f, 12.f,
+        4.f, -0.5f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+        0.5f,   2.5f,  0.5f,  2.5f,
+         5.f,   7.f,    2.f,   4.f,
+        15.f,  17.f,    8.f,  10.f,
+        -2.f,  6.5f,  -0.5f, -2.5f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = { 1.5f, 2.5f,   5.5f,    4.f,
+                          7.f,   7.f,    8.f,   9.2f,
+                          18.f,17.5f,   15.f,   22.f,
+                          2.f,   6.f,   7.5f,  5.5f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_channel) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum));
+
+    set_values(input, {
+        1.f,   0.f,
+        2.f,   0.f,
+
+        3.f,  0.5f,
+        4.f, -0.5f,
+    });
+
+    set_values(input2, {
+         0.5f,  2.5f,
+         0.5f,  2.5f,
+
+          5.f,   7.f,
+          2.f,   4.f,
+
+         15.f,  17.f,
+          8.f,  10.f,
+
+         -2.f,  6.5f,
+        -0.5f, -2.5f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = { 1.5f, 2.5f,
+                          2.5f, 2.5f,
+
+                          6.f,   7.f,
+                          4.f,   4.f,
+
+                          18.f, 17.5f,
+                          12.f,  9.5f,
+
+                          1.f,   7.f,
+                          3.5f, -3.f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_x) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum));
+
+    set_values(input, {
+            0.5f,  2.5f,
+            0.5f,  2.5f,
+
+            5.f,   7.f,
+            2.f,   4.f,
+
+            15.f,  17.f,
+            8.f,  10.f,
+
+            -2.f,  6.5f,
+            -0.5f, -2.5f });
+
+
+    set_values(input2, {
+        1.f,
+        0.f,
+
+        2.f,
+        0.f,
+
+        3.f,
+        0.5f,
+
+        4.f,
+        -0.5f,
+    });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = { 1.5f, 3.5f,
+                          0.5f, 2.5f,
+
+                          7.f,   9.f,
+                          2.f,   4.f,
+
+                          18.f, 20.f,
+                          8.5f,  10.5f,
+
+                          2.f,   10.5f,
+                          -1.f, -3.f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_y) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum));
+
+    set_values(input, {
+            0.5f,  2.5f,
+            0.5f,  2.5f,
+
+            5.f,   7.f,
+            2.f,   4.f,
+
+            15.f,  17.f,
+            8.f,  10.f,
+
+            -2.f,  6.5f,
+            -0.5f, -2.5f });
+
+
+    set_values(input2, {
+        1.f, 0.f,
+        2.f, 0.f,
+
+        3.f, 0.5f,
+        4.f, -0.5f,
+    });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = { 1.5f, 2.5f,
+                          2.5f, 2.5f,
+
+                          8.f,   7.5f,
+                          6.f,   3.5f,
+
+                          16.f,  17.f,
+                          10.f,  10.f,
+
+                          1.f,   7.f,
+                          3.5f, -3.f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_batch) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum));
+
+    set_values(input, {
+            0.5f,  2.5f,
+            0.5f,  2.5f,
+
+            5.f,   7.f,
+            2.f,   4.f,
+
+            15.f,  17.f,
+            8.f,  10.f,
+
+            -2.f,  6.5f,
+            -0.5f, -2.5f });
+
+
+    set_values(input2, {
+        1.f, 0.f,
+
+        2.f, 0.f,
+
+        3.f, 0.5f,
+
+        4.f, -0.5f,
+    });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = { 1.5f, 2.5f,
+                          1.5f, 2.5f,
+
+                          7.f,   7.f,
+                          4.f,   4.f,
+
+                          18.f,  17.5f,
+                          11.f,  10.5f,
+
+                          2.f,   6.f,
+                          3.5f, -3.f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_multiple_dims) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum));
+
+    set_values(input, {
+            0.5f,  2.5f,
+            0.5f,  2.5f,
+
+            5.f,   7.f,
+            2.f,   4.f,
+
+            15.f,  17.f,
+            8.f,  10.f,
+
+            -2.f,  6.5f,
+            -0.5f, -2.5f });
+
+    set_values(input2, {
+            1.f,
+            2.f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = { 1.5f, 3.5f,
+                          1.5f, 3.5f,
+
+                          7.f,   9.f,
+                          4.f,   6.f,
+
+                          16.f, 18.f,
+                          9.f,  11.f,
+
+                          0.f,   8.5f,
+                          1.5f, -0.5f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, pow_in2x2x2x2_broadcast_all) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::pow));
+
+    set_values(input, {
+            1.f,  2.f,
+            3.f,  4.f,
+
+            5.f,  6.f,
+            7.f,  8.f,
+
+            9.f,  10.f,
+           11.f,  12.f,
+
+            13.f, 14.f,
+            15.f, 16.f });
+
+
+    set_values(input2, { 2.0f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = { 1.f, 4.f,
+                          9.f, 16.f,
+
+                          25.f, 36.f,
+                          49.f, 64.f,
+
+                          81.f,  100.f,
+                          121.f, 144.f,
+
+                          169.f, 196.f,
+                          225.f, 256.f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, add_basic_in2x2x2x2_broadcast_2_inputs_same_dim) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } });
+    auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("input3", input3.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2", "input3"}, eltwise_mode::sum));
+
+    set_values(input, {
+            0.5f,  2.5f,
+            0.5f,  2.5f,
+
+            5.f,   7.f,
+            2.f,   4.f,
+
+            15.f,  17.f,
+            8.f,  10.f,
+
+            -2.f,  6.5f,
+            -0.5f, -2.5f });
+
+
+    set_values(input2, {
+        1.f, 0.f,
+
+        2.f, 0.f,
+
+        3.f, 0.5f,
+
+        4.f, -0.5f,
+    });
+
+    set_values(input3, {
+        3.f, 2.f,
+
+        1.f, 2.f,
+
+        -2.f, 1.5f,
+
+        -4.f, 0.5f,
+    });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("input3", input3);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = { 4.5f, 4.5f,
+                          4.5f, 4.5f,
+
+                          8.f,   9.f,
+                          5.f,   6.f,
+
+                          16.f,  19.f,
+                          9.f,  12.f,
+
+                          -2.f,  6.5f,
+                          -0.5f, -2.5f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, add_basic_in2x2x2x2_broadcast_2_inputs_diff_dim) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } });
+    auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 2, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("input3", input3.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2", "input3"}, eltwise_mode::sum));
+
+    set_values(input, {
+            0.5f,  2.5f,
+            0.5f,  2.5f,
+
+            5.f,   7.f,
+            2.f,   4.f,
+
+            15.f,  17.f,
+            8.f,  10.f,
+
+            -2.f,  6.5f,
+            -0.5f, -2.5f });
+
+
+    set_values(input2, {
+        1.f, 0.f,
+
+        2.f, 0.f,
+
+        3.f, 0.5f,
+
+        4.f, -0.5f,
+    });
+
+    set_values(input3, {
+        3.f, 2.f,
+        1.f, 2.f,
+
+        -2.f, 1.5f,
+        -4.f, 0.5f,
+    });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("input3", input3);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = { 4.5f, 4.5f,
+                          2.5f, 4.5f,
+
+                          10.f,  9.f,
+                          5.f,   6.f,
+
+                          16.f,  19.f,
+                          7.f,  11.f,
+
+                           0.f,   7.5f,
+                          -0.5f, -2.5f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, max_basic_in4x4x4x4) {
+    //  Input2   : 2x2x2
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
+
+    //  Input:
+    //  f0: b0:  1    2  b1:   0    0
+    //  f0: b0:  3    4  b1:   0.5 -0.5
+    //  f1: b0:  5    6  b1:   1.5  5.2
+    //  f1: b0:  7    8  b1:   12   8
+    //
+    //  Input2
+    //  f0: b0: 0.5  5   b1: 2.5  7
+    //  f0: b0: 15   6   b1: 17   8
+    //  f1: b0: 0.5  2   b1: 2.5  4
+    //  f1: b0: 8   -0.5 b1: 10   -2.5
+    //
+    //  Output:
+    //  f0: b0:    1   5    b1:  2.5   7
+    //  f0: b0:   15   6    b1:  17    8
+    //  f1: b0:    5   6    b1:  2.5   5.2
+    //  f1: b0:    8   8    b1:  12    8
+    //
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::max));
+
+    set_values(input, {
+        1.f,   0.f,  5.f,  1.5f,
+        2.f,   0.f,  6.f,  5.2f,
+        3.f,   0.5f, 7.f, 12.f,
+        4.f,  -0.5f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+        0.5f,  2.5f,  0.5f,  2.5f,
+         5.f,   7.f,   2.f,   4.f,
+        15.f,  17.f,   8.f,  10.f,
+         6.f,   8.f, -0.5f, -2.5f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = {
+         1.f,   2.5f,  5.f,   2.5f,
+         5.f,   7.f,   6.f,   5.2f,
+        15.f,  17.f,   8.f,  12.f,
+         6.f,   8.f,   8.f,   8.f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, sub_basic_in4x4x4x4) {
+    //  Input2   : 2x2x2
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
+
+    //  Input:
+    //  f0: b0:  1    2  b1:   0    0
+    //  f0: b0:  3    4  b1:   0.5 -0.5
+    //  f1: b0:  5    6  b1:   1.5  5.2
+    //  f1: b0:  7    8  b1:   12   8
+    //
+    //  Input2
+    //  f0: b0: 0.5  5   b1: 2.5  7
+    //  f0: b0: 15   6   b1: 17   8
+    //  f1: b0: 0.5  2   b1: -1   2
+    //  f1: b0: 8   -0.5 b1: 8.5  10.5
+    //
+    //  Output:
+    //  f0: b0:   0.5  -3    b1:  -2.5  -7
+    //  f0: b0:   -12  -2    b1:  -16.5 -8.5
+    //  f1: b0:   4.5   4    b1:  2.5    3.2
+    //  f1: b0:   -1    8.5  b1:  3.5   -2.5
+    //
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sub));
+
+    set_values(input, {
+        1.f,   0.f,  5.f,  1.5f,
+        2.f,   0.f,  6.f,  5.2f,
+        3.f,   0.5f, 7.f,  12.f,
+        4.f,  -0.5f, 8.f,   8.f
+    });
+
+    set_values(input2, {
+        0.5f,  2.5f,  0.5f, -1.f,
+        5.f,   7.f,   2.f,   2.f,
+       15.f,  17.f,   8.f,   8.5f,
+        6.f,   8.f, -0.5f,  10.5f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = {
+         0.5f,  -2.5f,   4.5f,   2.5f,
+        -3.f,   -7.f,    4.f,    3.2f,
+       -12.f,  -16.5f,  -1.f,    3.5f,
+        -2.f,   -8.5f,   8.5f,  -2.5f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_int, basic_in4x4x4x4) {
+    //  Same params as in eltwise_gpu_f32, sub_basic_in4x4x4x4 but using int types instead
+
+    std::vector<data_types> data_types_to_test = { data_types::i8, data_types::i32, data_types::i64 };
+    std::vector<eltwise_mode> eltwise_ops_to_test = { eltwise_mode::sum, eltwise_mode::sub, eltwise_mode::div, eltwise_mode::prod, eltwise_mode::min, eltwise_mode::max, eltwise_mode::mod };
+
+    for (auto& data_type : data_types_to_test)
+    {
+        for (auto& mode : eltwise_ops_to_test)
+        {
+            const auto& engine = get_test_engine();
+            auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+            auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+
+            topology topology;
+            topology.add(input_layout("input", input.get_layout()));
+            topology.add(input_layout("input2", input2.get_layout()));
+            topology.add(reorder("input_reorder", "input", { data_type, format::yxfb,{ 2, 2, 2, 2 } }));
+            topology.add(reorder("input2_reorder", "input2", { data_type, format::yxfb,{ 2, 2, 2, 2 } }));
+            topology.add(eltwise("eltwise", { "input_reorder", "input2_reorder" }, mode));
+            topology.add(reorder("eltwise_reorder", "eltwise", { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }));
+
+            std::vector<float> input_1_vec = {
+                1.f,   0.f,  5.f,  1.f,
+                2.f,   0.f,  6.f,  5.f,
+                3.f,   0.f, 7.f,  12.f,
+                4.f,   0.f, 8.f,   8.f
+            };
+            set_values(input, input_1_vec);
+
+            std::vector<float> input_2_vec = {
+                0.f,  2.f,  0.f, -1.f,
+                5.f,   7.f,   2.f,   2.f,
+                15.f,  17.f,   8.f,   8.f,
+                6.f,   8.f, 0.f,  10.f };
+            set_values(input2, input_2_vec);
+
+            network network(engine, topology);
+            network.set_input_data("input", input);
+            network.set_input_data("input2", input2);
+            auto outputs = network.execute();
+
+            ASSERT_EQ(outputs.size(), size_t(1));
+            EXPECT_EQ(outputs.begin()->first, "eltwise_reorder");
+
+            auto output = outputs.at("eltwise_reorder").get_memory();
+
+            auto output_ptr = output.pointer<float>();
+
+            for (int i = 0; i < 16; i++)
+            {
+                float expected = 0.f;
+                if (mode == eltwise_mode::sum)
+                    expected = input_1_vec[i] + input_2_vec[i];
+                else if (mode == eltwise_mode::sub)
+                    expected = input_1_vec[i] - input_2_vec[i];
+                else if (mode == eltwise_mode::prod)
+                    expected = input_1_vec[i] * input_2_vec[i];
+                else if (mode == eltwise_mode::div)
+                    expected = input_1_vec[i] / input_2_vec[i];
+                else if (mode == eltwise_mode::min)
+                    expected = std::min(input_1_vec[i], input_2_vec[i]);
+                else if (mode == eltwise_mode::max)
+                    expected = std::max(input_1_vec[i], input_2_vec[i]);
+                else if (mode == eltwise_mode::mod) {
+                    expected = std::fmod(input_1_vec[i], input_2_vec[i]);
+                }
+
+
+                EXPECT_TRUE(are_equal(std::floor(expected), output_ptr[i]));
+            }
+        }
+    }
+}
+
+TEST(eltwise_gpu_f32_int, basic_in4x4x4x4) {
+    // Same params as in eltwise_gpu_f32, sub_basic_in4x4x4x4 but using int types for first input.
+    //
+    // Eltwise supports mixed inputs, but only first input can be set as intX.
+
+    std::vector<data_types> data_types_to_test = { data_types::i8, data_types::i32, data_types::i64 };
+    std::vector<eltwise_mode> eltwise_ops_to_test = { eltwise_mode::sum, eltwise_mode::sub, eltwise_mode::div, eltwise_mode::prod, eltwise_mode::min, eltwise_mode::max, eltwise_mode::mod };
+
+    for (auto& data_type : data_types_to_test)
+    {
+        for (auto& mode : eltwise_ops_to_test)
+        {
+            const auto& engine = get_test_engine();
+            auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+            auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+
+            topology topology;
+            topology.add(input_layout("input", input.get_layout()));
+            topology.add(input_layout("input2", input2.get_layout()));
+            topology.add(reorder("input_reorder", "input", { data_type, format::yxfb,{ 2, 2, 2, 2 } }));
+            topology.add(eltwise("eltwise", { "input_reorder", "input2" }, mode));
+            topology.add(reorder("eltwise_reorder", "eltwise", { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }));
+
+            std::vector<float> input_1_vec = {
+                1.f,   0.f,  5.f,  1.f,
+                2.f,   0.f,  6.f,  5.f,
+                3.f,   0.f, 7.f,  12.f,
+                4.f,   0.f, 8.f,   8.f
+            };
+            set_values(input, input_1_vec);
+
+            std::vector<float> input_2_vec = {
+                0.f,  2.f,  0.f, -1.f,
+                5.f,   7.f,   2.f,   2.f,
+                15.f,  17.f,   8.f,   8.f,
+                6.f,   8.f, 0.f,  10.f };
+            set_values(input2, input_2_vec);
+
+            network network(engine, topology);
+            network.set_input_data("input", input);
+            network.set_input_data("input2", input2);
+            auto outputs = network.execute();
+
+            ASSERT_EQ(outputs.size(), size_t(1));
+            EXPECT_EQ(outputs.begin()->first, "eltwise_reorder");
+
+            auto output = outputs.at("eltwise_reorder").get_memory();
+
+            auto output_ptr = output.pointer<float>();
+
+            for (int i = 0; i < 16; i++)
+            {
+                float expected = 0.f;
+                if (mode == eltwise_mode::sum)
+                    expected = input_1_vec[i] + input_2_vec[i];
+                else if (mode == eltwise_mode::sub)
+                    expected = input_1_vec[i] - input_2_vec[i];
+                else if (mode == eltwise_mode::prod)
+                    expected = input_1_vec[i] * input_2_vec[i];
+                else if (mode == eltwise_mode::div)
+                    expected = input_1_vec[i] / input_2_vec[i];
+                else if (mode == eltwise_mode::min)
+                    expected = std::min(input_1_vec[i], input_2_vec[i]);
+                else if (mode == eltwise_mode::max)
+                    expected = std::max(input_1_vec[i], input_2_vec[i]);
+                else if (mode == eltwise_mode::mod)
+                    expected = std::fmod(input_1_vec[i], input_2_vec[i]);
+
+                EXPECT_TRUE(are_equal(std::floor(expected), output_ptr[i]));
+            }
+        }
+    }
+}
+
+TEST(eltwise_gpu_f32, prod_basic_in4x4x4x4) {
+    //  Input2   : 2x2x2
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
+
+    //  Input:
+    //  f0: b0:  1    2  b1:   0    0
+    //  f0: b0:  3    4  b1:   0.5 -0.5
+    //  f1: b0:  5    6  b1:   1    5.2
+    //  f1: b0:  7    8  b1:   12   7.5
+    //
+    //  Input2
+    //  f0: b0: 0.5  0.5   b1: 5  2
+    //  f0: b0: 2.5  2.5   b1: 7  4
+    //  f1: b0: 15   8     b1: 6  -0.5
+    //  f1: b0: 17   10    b1: 8  -2.5
+    //
+    //  Output:
+    //  f0: b0:   0.5  1     b1:  0      0
+    //  f0: b0:   7.5  10    b1:  3.5   -2
+    //  f1: b0:   75   48    b1:  6     -2.6
+    //  f1: b0:   119  80    b1:  96   -18.75
+    //
+
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::prod));
+
+    set_values(input, {
+        1.f,   0.f,  5.f,  1.f,
+        2.f,   0.f,  6.f,  5.2f,
+        3.f,   0.5f, 7.f, 12.f,
+        4.f,  -0.5f, 8.f,  7.5f
+    });
+
+    set_values(input2, {
+        0.5f,   5.f,  15.f,    6.f,
+        0.5f,   2.f,   8.f,   -0.5f,
+        2.5f,   7.f,  17.f,    8.f,
+        2.5f,   4.f,  10.f,   -2.5f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = {
+        0.5f,   0.0f,    75.f,    6.0f,
+        1.0f,   0.0f,    48.f,   -2.6f,
+        7.5f,   3.5f,   119.f,   96.0f,
+       10.0f,  -2.0f,    80.f, -18.75f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, max_basic_in4x4x4x4_input_padding) {
+    //  Input2   : 2x2x2
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
+    //  Input Padding: 2x1 (with reorder)
+
+    //  Input:
+    //  f0: b0:  1    2  b1:   0    0
+    //  f0: b0:  3    4  b1:   0.5 -0.5
+    //  f1: b0:  5    6  b1:   1.5  5.2
+    //  f1: b0:  7    8  b1:   12   8
+    //
+    //  Input2
+    //  f0: b0: 0.5  5   b1: 2.5  7
+    //  f0: b0: 15   6   b1: 17   8
+    //  f1: b0: 0.5  2   b1: 2.5  4
+    //  f1: b0: 8   -0.5 b1: 10   -2.5
+    //
+    //  Output:
+    //  f0: b0:    1   5    b1:  2.5   7
+    //  f0: b0:   15   6    b1:  17    8
+    //  f1: b0:    5   6    b1:  2.5   5.2
+    //  f1: b0:    8   8    b1:  12    8
+    //
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(reorder("reorder", "input", input.get_layout().with_padding({ { 0, 0, 2, 1 }, 0 })));
+    topology.add(reorder("reorder2", "input2", input.get_layout().with_padding({ { 0, 0, 2, 1 }, 0 })));
+    topology.add(eltwise("eltwise", {"reorder", "reorder2"}, eltwise_mode::max));
+
+    set_values(input, {
+        1.f,   0.f,  5.f,  1.5f,
+        2.f,   0.f,  6.f,  5.2f,
+        3.f,   0.5f, 7.f, 12.f,
+        4.f,  -0.5f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+        0.5f,  2.5f,  0.5f,  2.5f,
+        5.f,   7.f,   2.f,   4.f,
+        15.f,  17.f,   8.f,  10.f,
+        6.f,   8.f, -0.5f, -2.5f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = {
+        1.f,   2.5f,  5.f,   2.5f,
+        5.f,   7.f,   6.f,   5.2f,
+        15.f,  17.f,   8.f,  12.f,
+        6.f,   8.f,   8.f,   8.f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, add_basic_in4x4x2x2_with_coefficients) {
+    //  Input2   : 2x2x2
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
+
+    //  Input:
+    //  f0: b0:  1    2  b1:   0    0
+    //  f0: b0:  3    4  b1:   0.5 -0.5
+    //  f1: b0:  5    6  b1:   1.5  5.2
+    //  f1: b0:  7    8  b1:   12   8
+    //
+    //  Input2
+    //  f0: b0: 0.5  5   b1: 2.5  7
+    //  f0: b0: 15  -2   b1: 17   6.5
+    //  f1: b0: 0.5  2   b1: 2.5  4
+    //  f1: b0: 8   -0.5 b1: 10   -2.5
+    //
+    //  Output:
+    //  f0: b0:   0.75  3.5  b1:  1.25  3.5
+    //  f0: b0:   9     1    b1:  8.75  3
+    //  f1: b0:   2.75  4    b1:   2    4.6
+    //  f1: b0:   7.5  8.25  b1:  11    8.25
+    //
+
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum, {0.5f, 0.5f}));
+
+    set_values(input, {
+            1.f,   0.f, 5.f, 1.5f,
+            2.f,   0.f, 6.f, 5.2f,
+            3.f,  0.5f, 7.f, 12.f,
+            4.f, -0.5f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+            0.5f,   2.5f,  0.5f,  2.5f,
+            5.f,   7.f,    2.f,   4.f,
+            15.f,  17.f,    8.f,  10.f,
+            -2.f,  6.5f,  -0.5f, -2.5f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = { 0.75f, 1.25f, 2.75f,  2.f,
+                          3.5f,  3.5f,   4.f, 4.6f,
+                          9.f, 8.75f,  7.5f, 11.f,
+                          1.f,   3.f, 3.75f, 2.75f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, coefficients_count_check) {
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
+    auto input3 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("input3", input3.get_layout()));
+
+    std::vector<float> coeffs0 = {};
+    std::vector<float> coeffs1 = {0.5f};
+    std::vector<float> coeffs2 = {0.5f, 0.5f};
+    std::vector<float> coeffs3 = {0.5f, 0.5f, 0.5f};
+
+    EXPECT_THROW(topology.add(eltwise("eltwise1", {"input", "input2"}, eltwise_mode::sum, coeffs1)), std::invalid_argument);
+    EXPECT_THROW(topology.add(eltwise("eltwise2", {"input", "input2"}, eltwise_mode::sum, coeffs3)), std::invalid_argument);
+
+    EXPECT_THROW(topology.add(eltwise("eltwise3", {"input", "input2", "input3"}, eltwise_mode::sum, coeffs1)), std::invalid_argument);
+    EXPECT_THROW(topology.add(eltwise("eltwise4", {"input", "input2", "input3"}, eltwise_mode::sum, coeffs2)), std::invalid_argument);
+
+    EXPECT_NO_THROW(topology.add(eltwise("eltwise5", {"input", "input2"}, eltwise_mode::sum, coeffs0)));
+    EXPECT_NO_THROW(topology.add(eltwise("eltwise6", {"input", "input2"}, eltwise_mode::sum, coeffs2)));
+
+    EXPECT_NO_THROW(topology.add(eltwise("eltwise7", {"input", "input2", "input3"}, eltwise_mode::sum, coeffs0)));
+    EXPECT_NO_THROW(topology.add(eltwise("eltwise8", {"input", "input2", "input3"}, eltwise_mode::sum, coeffs3)));
+}
+
+TEST(eltwise_gpu_f32, add_basic_in4x4x2x2_with_coefficients_3inputs) {
+    //  Input3   : 2x2x2
+    //  Input2   : 2x2x2
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
+
+    //  Input:
+    //  f0: b0:  1    2  b1:   0    0
+    //  f0: b0:  3    4  b1:   0.5 -0.5
+    //  f1: b0:  5    6  b1:   1.5  5.2
+    //  f1: b0:  7    8  b1:   12   8
+    //
+    //  Input2
+    //  f0: b0: 0.5  5   b1: 2.5  7
+    //  f0: b0: 15  -2   b1: 17   6.5
+    //  f1: b0: 0.5  2   b1: 2.5  4
+    //  f1: b0: 8   -0.5 b1: 10   -2.5
+    //
+    //  Input3
+    //  f0: b0: 8  7 b1: 0 1
+    //  f0: b0: 6  5 b1: 0 1
+    //  f1: b0: 4  3 b1: 0 1
+    //  f1: b0: 2  1 b1: 0 1
+    //
+    //  Output:
+    //  f0: b0:   4.75  7      b1:  1.25  4
+    //  f0: b0:   12    3.5    b1:  8.75  3.5
+    //  f1: b0:   4.75  5.5    b1:   2    5.1
+    //  f1: b0:   8.5   8.75   b1:  11    8.75
+    //
+
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
+    auto input3 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("input3", input3.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2", "input3"}, eltwise_mode::sum, {0.5f, 0.5f, 0.5f}));
+
+    set_values(input, {
+            1.f,   0.f, 5.f, 1.5f,
+            2.f,   0.f, 6.f, 5.2f,
+            3.f,  0.5f, 7.f, 12.f,
+            4.f, -0.5f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+            0.5f,   2.5f,  0.5f,  2.5f,
+            5.f,   7.f,    2.f,   4.f,
+            15.f,  17.f,    8.f,  10.f,
+            -2.f,  6.5f,  -0.5f, -2.5f });
+
+    set_values(input3, {
+            8.f,  0.f,  4.f, 0.f,
+            7.f,  1.f,  3.f, 1.f,
+            6.f,  0.f,  2.f, 0.f,
+            5.f,  1.f,  1.f, 1.f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("input3", input3);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = { 4.75f, 1.25f, 4.75f,  2.f,
+                          7.0f,  4.0f,  5.5f, 5.1f,
+                          12.f, 8.75f,  8.5f, 11.f,
+                          3.5f,  3.5f, 4.25f, 3.25f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, max_3inputs_in4x4x4x4_input_padding) {
+    //  Input  : 2x2x2x2
+    //  Input2 : 2x2x2x2
+    //  Input3 : 2x2x2x2
+    //  Output : 2x2x2x2
+    //  Input Padding: 2x1 (with reorder)
+
+    //  Input:
+    //  f0: b0:  1    2  b1:   0    0
+    //  f0: b0:  3    4  b1:   0.5 -0.5
+    //  f1: b0:  5    6  b1:   1.5  5.2
+    //  f1: b0:  7    8  b1:   12   8
+    //
+    //  Input2
+    //  f0: b0: 0.5  5   b1: 2.5  7
+    //  f0: b0: 15   6   b1: 17   8
+    //  f1: b0: 0.5  2   b1: 2.5  4
+    //  f1: b0: 8   -0.5 b1: 10   -2.5
+    //
+    //  Input3
+    //  f0: b0: 1.1  1   b1: 4  0
+    //  f0: b0: 15  -1   b1: 3  6
+    //  f1: b0: 1.5  2   b1: 2  7
+    //  f1: b0: 9   0.5  b1: 1  8
+    //
+    //  Output:
+    //  f0: b0:   1.1  5    b1:  4   7
+    //  f0: b0:   15   6    b1:  17    8
+    //  f1: b0:    5   6    b1:  2.5   7
+    //  f1: b0:    9   8    b1:  12    8
+    //
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
+    auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("input3", input3.get_layout()));
+    topology.add(reorder("reorder", "input", input.get_layout().with_padding({ { 0, 0, 2, 1 }, 0 })));
+    topology.add(reorder("reorder2", "input2", input.get_layout().with_padding({ { 0, 0, 2, 1 }, 0 })));
+    topology.add(reorder("reorder3", "input3", input.get_layout().with_padding({ { 0, 0, 2, 1 }, 0 })));
+    topology.add(eltwise("eltwise", {"reorder", "reorder2", "reorder3"}, eltwise_mode::max));
+
+    set_values(input, {
+         1.f,  0.f,  5.f,  1.5f,
+         2.f,  0.f,  6.f,  5.2f,
+         3.f,  0.5f, 7.f,  12.f,
+         4.f, -0.5f, 8.f,   8.f
+    });
+
+    set_values(input2, {
+        0.5f, 2.5f,  0.5f,  2.5f,
+         5.f,  7.f,   2.f,   4.f,
+        15.f, 17.f,   8.f,  10.f,
+         6.f,  8.f, -0.5f, -2.5f });
+
+    set_values(input3, {
+        1.1f,  4.f, 1.5f, 2.f,
+         1.f,  0.f,  2.f, 7.f,
+        15.f,  3.f,  9.f, 1.f,
+        -1.f,  6.f, 0.5f, 8.f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("input3", input3);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = {
+        1.1f,   4.f,  5.f,   2.5f,
+        5.f,   7.f,   6.f,   7.f,
+        15.f,  17.f,   9.f,  12.f,
+        6.f,   8.f,   8.f,   8.f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+
+TEST(eltwise_gpu_f32, stride_test_2x2) {
+    //  Input  : 2x2x2x2
+    //  Input2 : 2x2x4x4
+    //  Output : 2x2x2x2
+
+    //  Input:
+    //  f0: b0:  1    2  b1:   0    0
+    //  f0: b0:  3    4  b1:   0.5 -0.5
+    //  f1: b0:  5    6  b1:   1.5  5.2
+    //  f1: b0:  7    8  b1:   12   8
+    //
+    //  Input2
+    //  f0: b0: 1  2  3  4   b1: 17 18 19 20
+    //  f0: b0: 5  6  7  8   b1: 21 22 23 24
+    //  f0: b0: 9  10 11 12  b1: 25 26 27 28
+    //  f0: b0: 13 14 15 16  b1: 29 30 31 32
+
+    //  f1: b0: 33 34 35 36  b1: 49 50 51 52
+    //  f1: b0: 37 38 39 40  b1: 53 54 55 56
+    //  f1: b0: 41 42 43 44  b1: 57 58 59 60
+    //  f1: b0: 45 46 47 48  b1: 61 62 63 64
+
+    //
+    //  Output:
+    //  f0: b0: 1  3    b1:  17  19
+    //  f0: b0: 9  11   b1:  25  27
+    //  f1: b0: 33 35   b1:  49  51
+    //  f1: b0: 41 43   b1:  57  59
+    //
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 4, 4 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", "input", "input2", { {0,0,1,1}, {0,0,2,2} }, eltwise_mode::max));
+
+    set_values(input, {
+        1.f,   0.f,  5.f,  1.5f,
+        2.f,   0.f,  6.f,  5.2f,
+        3.f,   0.5f, 7.f, 12.f,
+        4.f,  -0.5f, 8.f,  8.f
+        });
+
+    set_values<float>(input2, {
+        1,  17, 33, 49,
+        2,  18, 33, 50,
+        3,  19, 35, 51,
+        4,  20, 36, 52,
+        5,  21, 37, 53,
+        6,  22, 38, 54,
+        7,  23, 39, 55,
+        8,  24, 40, 56,
+        9,  25, 41, 57,
+        10, 26, 42, 58,
+        11, 27, 43, 59,
+        12, 28, 44, 60,
+        13, 29, 45, 61,
+        14, 30, 46, 62,
+        15, 31, 47, 63,
+        16, 32, 48, 64 });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = {
+        1,  17,  33,  49,
+        3,  19,  35,  51,
+        9,  25,  41,  57,
+        11, 27,  43,  59 };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, broadcast_test_in4x4x2x2) {
+    //  Input2   : 2x1x1
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
+
+    //  Input:
+    //  f0: b0:  1    2  b1:   0    0
+    //  f0: b0:  3    4  b1:   0.5 -0.5
+    //  f1: b0:  5    6  b1:   1.5  5.2
+    //  f1: b0:  7    8  b1:   12   8
+    //
+    //  Input2
+    //  f0: b0: 0.5  b1: 2.5
+    //  f1: b0: 0.5  b1: 2.5
+    //
+    //  Output:
+    //  f0: b0:   1.5  7    b1:  2.5   7
+    //  f0: b0:   18   2    b1:  17.5  6
+    //  f1: b0:   5.5  8    b1:   4    9.2
+    //  f1: b0:   15  16.5  b1:  22    16.5
+    //
+
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 1, 1 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", { "input", "input2" }, eltwise_mode::sum));
+
+    set_values(input, {
+        1.f,   0.f, 5.f, 1.5f,
+        2.f,   0.f, 6.f, 5.2f,
+        3.f,  0.5f, 7.f, 12.f,
+        4.f, -0.5f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+        0.5f,   2.5f,  0.5f,  2.5f
+    });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = {
+        1.5f,   2.5f,   5.5f,   4.f,
+        2.5f,   2.5f,   6.5f,   7.7f,
+        3.5f,    3.f,   7.5f,  14.5f,
+        4.5f,    2.f,   8.5f,  10.5f };
+
+    auto output_ptr = output.pointer<float>();
 
     for (int i = 0; i < 16; i++)
     {
@@ -866,12 +2443,174 @@ TEST(eltwise_gpu_f32, max_3inputs_in4x4x4x4_input_padding) {
 }
 
 
+template <typename T>
+int8_t eltwise_bool_execute(cldnn::eltwise_mode mode, T x, T y) {
+    switch (mode) {
+    case eltwise_mode::eq:
+        return x == y;
+    case eltwise_mode::ne:
+        return x != y;
+    case eltwise_mode::lt:
+        return x < y;
+    case eltwise_mode::le:
+        return x <= y;
+    case eltwise_mode::gt:
+        return x > y;
+    case eltwise_mode::ge:
+        return x >= y;
+    case eltwise_mode::logic_and:
+        return x && y;
+    case eltwise_mode::logic_or:
+        return x || y;
+    default:
+        return (int8_t)0;
+    }
+}
+
+template <typename T>
+VVVVF<int8_t> eltwise_bool_reference(VVVVF<T> &input1, VVVVF<T> &input2,
+    cldnn::eltwise_mode mode, int input_padding_y = 0,
+    int input_padding_x = 0, int output_padding_y = 0,
+    int output_padding_x = 0) {
+
+    size_t padding_y = input_padding_y + output_padding_y;
+    size_t padding_x = input_padding_x + output_padding_x;
+    size_t output_b = input1.size();
+    size_t output_f = input1[0].size();
+    size_t output_y = input1[0][0].size() + 2 * padding_y;
+    size_t output_x = input1[0][0][0].size() + 2 * padding_x;
+    VVVVF<int8_t> output(output_b, VVVF<int8_t>(output_f, VVF<int8_t>(output_y, VF<int8_t>(output_x))));
+
+    T res;
+    for (size_t b = 0; b < output_b; ++b) {
+        for (size_t f = 0; f < output_f; ++f) {
+            for (size_t y = 0; y < input1[0][0].size(); ++y) {
+                for (size_t x = 0; x < input1[0][0][0].size(); ++x) {
+                    res = eltwise_bool_execute<T>(mode, input1[b][f][y][x], input2[b][f][y][x]);
+                    output[b][f][y + padding_y][x + padding_x] = res;
+                }
+            }
+        }
+    }
+    return output;
+}
+
+template <typename T>
+void generic_eltwise_bool_test(cldnn::format test_input_fmt, int input_b, int input_f, int input_y, int input_x, cldnn::eltwise_mode mode,
+    int input_padding_y, int input_padding_x, int output_padding_y, int output_padding_x) {
+
+    int min_random = -2, max_random = 2;
+    VVVVF<T> input1_rnd = generate_random_4d<T>(input_b, input_f, input_y, input_x, min_random, max_random);
+    VVVVF<T> input2_rnd = generate_random_4d<T>(input_b, input_f, input_y, input_x, min_random, max_random);
+    VF<T> input1_rnd_vec = flatten_4d<T>(test_input_fmt, input1_rnd);
+    VF<T> input2_rnd_vec = flatten_4d<T>(test_input_fmt, input2_rnd);
+
+    const auto& engine = get_test_engine();
+    tensor input_tensor( input_b, input_f, input_x, input_y );
+    auto input1 = memory::allocate(engine, { type_to_data_type<T>::value, test_input_fmt, input_tensor });
+    auto input2 = memory::allocate(engine, { type_to_data_type<T>::value, test_input_fmt, input_tensor });
+    set_values(input1, input1_rnd_vec);
+    set_values(input2, input2_rnd_vec);
+
+    topology topology;
+    topology.add(input_layout("input1", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(reorder("reorder1", "input1", input1.get_layout().with_padding({{ 0, 0, input_padding_x, input_padding_y }, 0 })));
+    topology.add(eltwise("eltwise", {"reorder1", "input2"}, mode, false, 0.f, { { 0, 0, output_padding_x, output_padding_y }, 0 }));
+
+    network network(engine, topology);
+    network.set_input_data("input1", input1);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output_memory = outputs.at("eltwise").get_memory();
+    auto output_layout = output_memory.get_layout();
+    auto output_ptr = output_memory.pointer<int8_t>();
+
+    VVVVF<int8_t> output_cpu = eltwise_bool_reference<T>(input1_rnd, input2_rnd, mode, input_padding_y, input_padding_x, output_padding_y, output_padding_x);
+    EXPECT_EQ(output_layout.format.value, test_input_fmt.value);
+    tensor output_tensor = output_layout.get_buffer_size();
+    int y_size = output_tensor.spatial[1];
+    int x_size = output_tensor.spatial[0];
+    int f_size = output_tensor.feature[0];
+    int b_size = output_tensor.batch[0];
+    EXPECT_EQ(y_size, (int)output_cpu[0][0].size());
+    EXPECT_EQ(x_size, (int)output_cpu[0][0][0].size());
+    EXPECT_EQ(f_size, (int)output_cpu[0].size());
+    EXPECT_EQ(b_size, (int)output_cpu.size());
+
+    bool test_is_correct = true;
+    VF<int8_t> output_cpu_vec = flatten_4d<int8_t>(test_input_fmt, output_cpu);
+    for (size_t i = 0; i < output_cpu_vec.size(); ++i) {
+        if (output_cpu_vec[i] != output_ptr[i]) {
+            test_is_correct = false;
+            break;
+        }
+    }
+    EXPECT_EQ(test_is_correct, true) << std::endl
+        << "failing test parameters:" << std::endl
+        << "input_b = " << input_b << std::endl
+        << "input_f = " << input_f << std::endl
+        << "input_y = " << input_y << std::endl
+        << "input_x = " << input_x << std::endl
+        << "eltwise_mode = " << (int)mode << std::endl
+        << "input_padding_y = " << input_padding_y << std::endl
+        << "input_padding_x = " << input_padding_x << std::endl
+        << "output_padding_y = " << output_padding_y << std::endl
+        << "output_padding_x = " << output_padding_x << std::endl
+        << "type = " << (sizeof(T) == 1 ? "int8" : "int32") << std::endl;
+}
+
+void run_eltwise_bool_generic_test(cldnn::eltwise_mode mode)
+{
+    cldnn::format test_inputs_fmt = cldnn::format::bfyx;
+    std::pair<int, int> input_size = { 227, 227 };
+
+    generic_eltwise_bool_test<int32_t>(test_inputs_fmt, 1, 1, input_size.first, input_size.second, mode, 0, 0, 0, 0);
+    generic_eltwise_bool_test<int8_t>(test_inputs_fmt, 1, 1, input_size.first, input_size.second, mode, 0, 0, 0, 0);
+}
+
+TEST(eltwise_gpu_bool, eltwise_eq) {
+    run_eltwise_bool_generic_test(cldnn::eltwise_mode::eq);
+}
+
+TEST(eltwise_gpu_bool, eltwise_ne) {
+    run_eltwise_bool_generic_test(cldnn::eltwise_mode::ne);
+}
+
+TEST(eltwise_gpu_bool, eltwise_lt) {
+    run_eltwise_bool_generic_test(cldnn::eltwise_mode::lt);
+}
+
+TEST(eltwise_gpu_bool, eltwise_le) {
+    run_eltwise_bool_generic_test(cldnn::eltwise_mode::le);
+}
+
+TEST(eltwise_gpu_bool, eltwise_gt) {
+    run_eltwise_bool_generic_test(cldnn::eltwise_mode::gt);
+}
+
+TEST(eltwise_gpu_bool, eltwise_ge) {
+    run_eltwise_bool_generic_test(cldnn::eltwise_mode::ge);
+}
+
+TEST(eltwise_gpu_bool, eltwise_and) {
+    run_eltwise_bool_generic_test(cldnn::eltwise_mode::logic_and);
+}
+
+TEST(eltwise_gpu_bool, eltwise_or) {
+    run_eltwise_bool_generic_test(cldnn::eltwise_mode::logic_or);
+}
+
+
 void run_eltwise_generic_test(cldnn::eltwise_mode mode)
 {
     cldnn::format test_inputs_fmt = cldnn::format::bfyx;
     std::pair<int, int> input_size = { 227, 227 };
 
-    engine engine;
+    const auto& engine = get_test_engine();
     bool f16_supported = !!engine.get_info().supports_fp16;
     if (!f16_supported) {
         std::cout << "[ SKIPPED  ] float16 combinations are skipped (cl_khr_fp16 is not supported)." << std::endl;
@@ -898,6 +2637,295 @@ TEST(eltwise_gpu, eltwise_mod) {
     run_eltwise_generic_test(cldnn::eltwise_mode::mod);
 }
 
+
+TEST(eltwise_gpu, b_fs_yx_fsv4_w_callib) {
+    int B_array[] = {   1,   4,   16,   32, 0 };  // Batch
+    int F_array[] = { 256, 512, 1024, 2048, 0 };  // Features
+    int I_array[] = {  56,  28,   14,   14, 0 };  // Input MxM data sizes
+
+    for (int j = 0; F_array[j]; j++) {
+        const auto& engine = get_test_engine();
+
+        int in_B = B_array[j];
+        int in_F = F_array[j];
+
+        int in_X = I_array[j],
+            in_Y = in_X;
+
+        // Input data init
+        std::vector<char> Data(in_B * in_F * in_X * in_Y);
+        for (size_t i = 0; i < Data.size(); i++)
+            Data[i] = static_cast<char>(i);
+        std::vector<char> DataGold(Data);
+
+        // Expected "gold" output and IMAD output.
+        std::vector<char>  vGoldOutput;
+        std::vector<char>  vTestOutput;
+
+        // Mem initialization
+        // This is user data, no kernels here
+        auto input1 = memory::allocate(engine,
+                                      { data_types::i8,
+                                          format::bfyx,
+                                          { in_B, in_F, in_X, in_Y } });
+        std::vector<char> data_i1(DataGold);
+        set_values(input1, std::move(data_i1));
+        auto input2 = memory::allocate(engine,
+                                      { data_types::i8,
+                                          format::bfyx,
+                                          { in_B, in_F, in_X, in_Y } });
+        std::vector<char> data_i2(DataGold);
+        set_values(input2, std::move(data_i2));
+
+        auto callib = memory::allocate(engine,
+                                       { data_types::f32,
+                                         format::bfyx,
+                                         { 1, in_F, 1, 1 } });
+        std::vector<float> data_c(in_F);
+        float sign = 1;
+        for (size_t i = 0; i < data_c.size(); i++) {
+            data_c[i] = ((i + 1) % 7) ? sign : -sign;
+            sign *= (float)1.0123;
+        }
+        set_values(callib, std::move(data_c));
+
+        // "Golden" Eltwise
+        {
+            topology topology;
+
+            auto eltw = eltwise("eltw_GOLD",
+                                "input1", "input2",
+                                "callib",
+                                eltwise_mode::sum, true);
+
+            // Create a topology
+            topology.add(input_layout("input1", input1.get_layout()),
+                         input_layout("input2", input2.get_layout()),
+                         eltw);
+
+            topology.add(data("callib", callib));
+
+            // Network processing
+            network network(engine, topology);
+            network.set_input_data("input1", input1);
+            network.set_input_data("input2", input2);
+            auto outputs = network.execute();
+
+            // Validation
+            auto searchC = outputs.find("eltw_GOLD");
+            EXPECT_NE(searchC, outputs.end());
+            auto output = outputs.begin()->second.get_memory();
+            auto output_ptr = output.pointer<char>();
+            vGoldOutput.reserve(output_ptr.size());
+            for (size_t i = 0; i < output_ptr.size(); i++)
+                vGoldOutput.push_back(output_ptr[i]);
+        }
+
+        // "IMAD" Eltwise
+        {
+            topology topology;
+
+            // Reorder (a-ka swizzelling) input to MMAD/IMAD Pooling format
+            topology.add(reorder("reorder1_Swizzelled",
+                                 "input1",
+                                 layout(data_types::i8,
+                                        format::b_fs_yx_fsv4,
+                                        { in_B, in_F, in_X, in_Y })),
+                         reorder("reorder2_Swizzelled",
+                                  "input2",
+                                  layout(data_types::i8,
+                                         format::b_fs_yx_fsv4,
+                                         { in_B, in_F, in_X, in_Y })));
+
+            auto eltw = eltwise("eltw_IMAD",
+                                "reorder1_Swizzelled", "reorder2_Swizzelled",
+                                "callib",
+                                eltwise_mode::sum, true);
+
+            topology.add(input_layout("input1", input1.get_layout()),
+                         input_layout("input2", input2.get_layout()),
+                         eltw);
+
+            topology.add(data("callib", callib));
+
+            // Back reordering (a-ka unswizzelling) output from MMAD/IMAD pooling
+            topology.add(reorder("reorder_UnSwizzelled",
+                                 "eltw_IMAD",
+                                 layout(data_types::i8,
+                                        format::bfyx,
+                                        { in_B, in_F, in_X, in_Y })));
+
+            // Network processing
+            network network(engine, topology);
+            network.set_input_data("input1", input1);
+            network.set_input_data("input2", input2);
+            auto outputs = network.execute();
+
+            // Validation
+            auto searchC = outputs.find("reorder_UnSwizzelled");
+            EXPECT_NE(searchC, outputs.end());
+            auto output = outputs.begin()->second.get_memory();
+            auto output_ptr = output.pointer<char>();
+            vTestOutput.reserve(output_ptr.size());
+            for (size_t i = 0; i < output_ptr.size(); i++)
+                vTestOutput.push_back(output_ptr[i]);
+        }
+
+        // Result validation
+        ASSERT_TRUE(vGoldOutput.size() == vTestOutput.size());
+        for (size_t i = 0; i < vGoldOutput.size(); i++)
+            ASSERT_TRUE(vTestOutput[i] == vGoldOutput[i]);
+    } // for (int j = 0; F_array[j]; j++)
+}
+
+TEST(eltwise_gpu, b_fs_yx_fsv4_wo_callib) {
+    //
+    // Input data
+    const int BATCH = 1;
+    const int in_B = BATCH;
+
+    const auto& engine = get_test_engine();
+
+    int in_F = 256;
+
+    int in_X = 56,
+        in_Y = in_X;
+
+    // Input data init
+    std::vector<char> Data(in_B * in_F * in_X * in_Y);
+    for (size_t i = 0; i < Data.size(); i++)
+        Data[i] = static_cast<char>(i);
+    std::vector<char> DataGold(Data);
+
+    // Mem initialization
+    // This is user data, no kernels here
+    auto input1 = memory::allocate(engine,
+                                   { data_types::i8,
+                                       format::bfyx,
+                                       { in_B, in_F, in_X, in_Y } });
+    std::vector<char> data_i1(DataGold);
+    for (size_t i = 0; i < data_i1.size(); i++) data_i1[i] = data_i1[i] + 1;
+    set_values(input1, std::move(data_i1));
+
+    auto input2 = memory::allocate(engine,
+                                  { data_types::i8,
+                                      format::bfyx,
+                                      { in_B, in_F, in_X, in_Y } });
+    std::vector<char> data_i2(DataGold);
+    for (size_t i = 0; i < data_i2.size(); i++) data_i2[i] = data_i2[i] + 2;
+    set_values(input2, std::move(data_i2));
+
+    auto input3 = memory::allocate(engine,
+                                  { data_types::i8,
+                                      format::bfyx,
+                                      { in_B, in_F, in_X, in_Y } });
+    std::vector<char> data_i3(DataGold);
+    for (size_t i = 0; i < data_i3.size(); i++) data_i3[i] = data_i3[i] + 3;
+    set_values(input3, std::move(data_i3));
+
+    cldnn::eltwise_mode mode[] = { cldnn::eltwise_mode::min,
+                                   cldnn::eltwise_mode::max,
+                                   cldnn::eltwise_mode::sum };
+
+    for (int i = 0; i < 3; i++) {
+        // Expected "gold" output and IMAD output.
+        std::vector<char>  vGoldOutput;
+        std::vector<char>  vTestOutput;
+
+        // "Golden" Eltwise
+        {
+            topology topology;
+
+            auto eltw = eltwise("eltw_GOLD",
+                                { "input1", "input2", "input3" },
+                                mode[i], true);
+
+            // Create a topology
+            topology.add(input_layout("input1", input1.get_layout()),
+                         input_layout("input2", input2.get_layout()),
+                         input_layout("input3", input3.get_layout()),
+                         eltw);
+
+            // Network processing
+            network network(engine, topology);
+            network.set_input_data("input1", input1);
+            network.set_input_data("input2", input2);
+            network.set_input_data("input3", input3);
+            auto outputs = network.execute();
+
+            // Validation
+            auto searchC = outputs.find("eltw_GOLD");
+            EXPECT_NE(searchC, outputs.end());
+            auto output = outputs.begin()->second.get_memory();
+            auto output_ptr = output.pointer<char>();
+            vGoldOutput.reserve(output_ptr.size());
+            for (size_t i = 0; i < output_ptr.size(); i++)
+                vGoldOutput.push_back(output_ptr[i]);
+        }
+
+        // "IMAD" Eltwise
+        {
+            topology topology;
+
+            // Reorder (a-ka swizzelling) input to MMAD/IMAD Pooling format
+            topology.add(reorder("reorder1_Swizzelled",
+                                 "input1",
+                                 layout(data_types::i8,
+                                        format::b_fs_yx_fsv4,
+                                        { in_B, in_F, in_X, in_Y })),
+                         reorder("reorder2_Swizzelled",
+                                 "input2",
+                                 layout(data_types::i8,
+                                        format::b_fs_yx_fsv4,
+                                        { in_B, in_F, in_X, in_Y })),
+                         reorder("reorder3_Swizzelled",
+                                 "input3",
+                                 layout(data_types::i8,
+                                        format::b_fs_yx_fsv4,
+                                        { in_B, in_F, in_X, in_Y })));
+
+            auto eltw = eltwise("eltw_IMAD",
+                                { "reorder1_Swizzelled",
+                                  "reorder2_Swizzelled",
+                                  "reorder3_Swizzelled" },
+                                mode[i], true);
+
+            topology.add(input_layout("input1", input1.get_layout()),
+                         input_layout("input2", input2.get_layout()),
+                         input_layout("input3", input3.get_layout()),
+                         eltw);
+
+            // Back reordering (a-ka unswizzelling) output from MMAD/IMAD pooling
+            topology.add(reorder("reorder_UnSwizzelled",
+                                 "eltw_IMAD",
+                                 layout(data_types::i8,
+                                     format::bfyx,
+                                     { in_B, in_F, in_X, in_Y })));
+
+            // Network processing
+            network network(engine, topology);
+            network.set_input_data("input1", input1);
+            network.set_input_data("input2", input2);
+            network.set_input_data("input3", input3);
+            auto outputs = network.execute();
+
+            // Validation
+            auto searchC = outputs.find("reorder_UnSwizzelled");
+            EXPECT_NE(searchC, outputs.end());
+            auto output = outputs.begin()->second.get_memory();
+            auto output_ptr = output.pointer<char>();
+            vTestOutput.reserve(output_ptr.size());
+            for (size_t i = 0; i < output_ptr.size(); i++)
+                vTestOutput.push_back(output_ptr[i]);
+        }
+
+        // Result validation
+        ASSERT_TRUE(vGoldOutput.size() == vTestOutput.size());
+        for (size_t i = 0; i < vGoldOutput.size(); i++)
+            ASSERT_TRUE(vTestOutput[i] == vGoldOutput[i]);
+    }
+}
+
 TEST(DISABLED_eltwise_gpu, generic_random) {
     VF<cldnn::format> test_inputs_fmts = { cldnn::format::bfyx, cldnn::format::yxfb };
     VF<cldnn::eltwise_mode> modes = { cldnn::eltwise_mode::sum, cldnn::eltwise_mode::sub, cldnn::eltwise_mode::max, cldnn::eltwise_mode::prod };
@@ -905,7 +2933,7 @@ TEST(DISABLED_eltwise_gpu, generic_random) {
     VF<float> slopes = { 0.0f, -0.0f, -17.19f, 1028.8f, std::numeric_limits<float>::infinity(), -std::numeric_limits<float>::infinity() };
     std::vector<std::pair<int, int>> input_sizes = { { 100, 100 },{ 227, 227 },{ 400, 600 } };
 
-    engine engine;
+    const auto& engine = get_test_engine();
     bool f16_supported = !!engine.get_info().supports_fp16;
     if (!f16_supported) {
         std::cout << "[ SKIPPED  ] float16 combinations are skipped (cl_khr_fp16 is not supported)." << std::endl;