Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / src / gpu / eltwise_gpu.cpp
index 4833983..5219fe9 100644 (file)
@@ -1,5 +1,9 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+<<<<<<< HEAD
+// Copyright (c) 2019 Intel Corporation
+=======
+// Copyright (c) 2016-2019 Intel Corporation
+>>>>>>> 0473785... Eltwise operation added: equal, not_equal, less, less_equal, greater, greater_equal, logical_and, logical_or, logical_xor, that produce int output with 0 and 1 values
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -26,7 +30,7 @@ namespace cldnn { namespace gpu {
 
 namespace
 {
-    inline kernel_selector::eltwise_mode convect_to_eltwise_mode(eltwise_mode mode)
+    inline kernel_selector::eltwise_mode convert_to_eltwise_mode(eltwise_mode mode)
     {
         switch (mode)
         {
@@ -38,6 +42,16 @@ namespace
         case eltwise_mode::min: return kernel_selector::eltwise_mode::MIN;
         case eltwise_mode::pow: return kernel_selector::eltwise_mode::POW;
         case eltwise_mode::mod: return kernel_selector::eltwise_mode::MODULU;
+        case eltwise_mode::eq: return kernel_selector::eltwise_mode::EQ;
+        case eltwise_mode::ne: return kernel_selector::eltwise_mode::NE;
+        case eltwise_mode::lt: return kernel_selector::eltwise_mode::LT;
+        case eltwise_mode::le: return kernel_selector::eltwise_mode::LE;
+        case eltwise_mode::gt: return kernel_selector::eltwise_mode::GT;
+        case eltwise_mode::ge: return kernel_selector::eltwise_mode::GE;
+        case eltwise_mode::logic_and: return kernel_selector::eltwise_mode::LOGIC_AND;
+        case eltwise_mode::logic_or: return kernel_selector::eltwise_mode::LOGIC_OR;
+        case eltwise_mode::logic_xor: return kernel_selector::eltwise_mode::LOGIC_XOR;
+        case eltwise_mode::squared_diff: return kernel_selector::eltwise_mode::SQUARED_DIFF;
         default:
             return kernel_selector::eltwise_mode::ADD;
         }
@@ -58,8 +72,8 @@ protected:
     }
 
 public:
-    static primitive_impl* create(const eltwise_node& arg) 
-    { 
+    static primitive_impl* create(const eltwise_node& arg)
+    {
         auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg);
         auto ew_optional_params = get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
 
@@ -70,17 +84,17 @@ public:
 
         const auto& primitive = arg.get_primitive();
         if(primitive->with_activation)
-            convert_activation_func_params(primitive, ew_params);
+            convert_activation_func_params(primitive, ew_params.activation);
 
-        ew_params.operations.push_back({ 
+        ew_params.operations.push_back({
             { kernel_selector::eltwise_params::InputType::Buffer(0), kernel_selector::eltwise_params::InputType::Buffer(1) },
-            convect_to_eltwise_mode(primitive->mode) });
+            convert_to_eltwise_mode(primitive->mode) });
 
         for (uint32_t i = 2; i < static_cast<uint32_t>(arg.inputs_count()); i++)
         {
             ew_params.operations.push_back({{ kernel_selector::eltwise_params::InputType::Intermediate(i-2),
                                                             kernel_selector::eltwise_params::InputType::Buffer(i) },
-                                                            convect_to_eltwise_mode(primitive->mode) });
+                                                            convert_to_eltwise_mode(primitive->mode) });
         }
 
         if (primitive->mode == eltwise_mode::sum)
@@ -91,7 +105,53 @@ public:
         for (size_t i = 0; i < ew_params.inputs.size(); i++)
         {
             if (!ew_params.inputs[i].SameDims(ew_params.output))
-                ew_params.layoutBased = true;
+            {
+                std::vector<int32_t> input_size  = arg.input(i).get_output_layout().size.raw.vector();
+                std::vector<int32_t> output_size = arg.get_output_layout().size.raw.vector();
+                bool broadcast = false;
+                for (size_t d = 0; d < output_size.size(); d++)
+                {
+                    if (output_size[d] != 1 || input_size[d] == 1)
+                        broadcast = true;
+                }
+                if (broadcast)
+                {
+                    ew_params.broadcast = true;
+                    break;
+                }
+                else
+                {
+                    ew_params.layoutBased = true;
+                    break;
+                }
+
+            }
+        }
+
+        // stride
+        if (!primitive->stride.empty())
+        {
+            const auto& stride = primitive->stride;
+            ew_params.stride.resize(stride.size());
+            for (size_t i = 0; i < primitive->stride.size(); i++)
+            {
+                ew_params.stride[i] = { (uint32_t)stride[i].spatial[0], (uint32_t)stride[i].spatial[1] };
+            }
+        }
+
+        // check if strides are the same
+        if(!ew_params.stride.empty())
+        {
+            const auto& stride = ew_params.stride[0];
+            for (size_t i = 1; i < ew_params.stride.size(); i++)
+            {
+                if (stride.x != ew_params.stride[i].x || stride.y != ew_params.stride[i].y)
+                    ew_params.layoutBased = true;
+            }
+        }
+        else if (!ew_params.inputs[0].SameDimsSizes(ew_params.inputs[1]))
+        {
+            ew_params.broadcast = true;
         }
 
         if (primitive->output_calibration_factors.size() > 0 || primitive->output_quantization_factor != 1.0f)
@@ -139,7 +199,9 @@ namespace {
                 { std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), eltwise_gpu::create },
                 // MMAD
                 { std::make_tuple(engine_types::ocl, data_types::i8, format::byxf_af32), eltwise_gpu::create },
-                { std::make_tuple(engine_types::ocl, data_types::i8, format::fs_bs_yx_bsv4_fsv32), eltwise_gpu::create }
+                { std::make_tuple(engine_types::ocl, data_types::i8, format::fs_bs_yx_bsv4_fsv32), eltwise_gpu::create },
+                { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), eltwise_gpu::create },
+                { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), eltwise_gpu::create }
             });
         }
         ~attach() {}