[IE CLDNN] Prod mode support in eltwise fusings (#1491)
authorVladimir Paramuzov <vladimir.paramuzov@intel.com>
Thu, 30 Jul 2020 15:16:37 +0000 (18:16 +0300)
committerGitHub <noreply@github.com>
Thu, 30 Jul 2020 15:16:37 +0000 (18:16 +0300)
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h
inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp
inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp
inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
inference-engine/thirdparty/clDNN/src/include/eltwise_inst.h
inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp

index 0e59efa68b1d632697642db2cacf14ebed052144..4524435e2700af4f30e019efca4a2fcb304746dd 100644 (file)
@@ -100,7 +100,9 @@ struct eltwise_optional_params : optional_params {
 // fuse_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct eltwise_fuse_params : fuse_params {
-    eltwise_fuse_params() : fuse_params(KernelType::ELTWISE) {}
+    EltwiseMode mode;
+
+    eltwise_fuse_params(EltwiseMode mode) : fuse_params(KernelType::ELTWISE), mode(mode) {}
 };
 
 struct scale_fuse_params : fuse_params {
index 734e02a449a4c23c139506346b96423f562cd7a2..a9910c12aa49b978296b5586ab9a2c4057ddb185 100644 (file)
@@ -1142,8 +1142,25 @@ JitConstants FusedOpsCodeGenerator::MakeOpJitConstants(const FusedOpsConfigurati
             break;
         }
         case KernelType::ELTWISE: {
+            auto p = desc.GetOpParams<eltwise_fuse_params>();
+            if (!p)
+                throw std::runtime_error("[clDNN] Eltwise fuse params can't be nullptr");
+
+            std::string op = "";
+            switch (p->mode)
+            {
+            case kernel_selector::EltwiseMode::ADD:
+                op = "+";
+                break;
+            case kernel_selector::EltwiseMode::MUL:
+                op = "*";
+                break;
+            default:
+                throw std::runtime_error("[clDNN] Eltwise mode is not supported in fused ops codegen");
+            }
+
             op_decls += "\\\n\t" + GetOutputType(vec_size) + " " + out_var + " = " + in_vars_converted[0] +
-                        " + " + ConvertToOutputType(in_var, vec_size) + ";";
+                        op + ConvertToOutputType(in_var, vec_size) + ";";
             break;
         }
         case KernelType::QUANTIZE: {
index f1fe37ccdde5db8967833f8989eb724176e40976..7f0a4535b5ed2fd4957640890c166d3ad1474750 100644 (file)
 namespace cldnn {
 namespace gpu {
 
-namespace {
-inline kernel_selector::eltwise_mode convert_to_eltwise_mode(eltwise_mode mode) {
-    switch (mode) {
-        case eltwise_mode::sum:
-            return kernel_selector::eltwise_mode::ADD;
-        case eltwise_mode::sub:
-            return kernel_selector::eltwise_mode::SUB;
-        case eltwise_mode::max:
-            return kernel_selector::eltwise_mode::MAX;
-        case eltwise_mode::prod:
-            return kernel_selector::eltwise_mode::MUL;
-        case eltwise_mode::div:
-            return kernel_selector::eltwise_mode::DIV;
-        case eltwise_mode::min:
-            return kernel_selector::eltwise_mode::MIN;
-        case eltwise_mode::pow:
-            return kernel_selector::eltwise_mode::POW;
-        case eltwise_mode::mod:
-            return kernel_selector::eltwise_mode::MODULU;
-        case eltwise_mode::eq:
-            return kernel_selector::eltwise_mode::EQ;
-        case eltwise_mode::ne:
-            return kernel_selector::eltwise_mode::NE;
-        case eltwise_mode::lt:
-            return kernel_selector::eltwise_mode::LT;
-        case eltwise_mode::le:
-            return kernel_selector::eltwise_mode::LE;
-        case eltwise_mode::gt:
-            return kernel_selector::eltwise_mode::GT;
-        case eltwise_mode::ge:
-            return kernel_selector::eltwise_mode::GE;
-        case eltwise_mode::logic_and:
-            return kernel_selector::eltwise_mode::LOGIC_AND;
-        case eltwise_mode::logic_or:
-            return kernel_selector::eltwise_mode::LOGIC_OR;
-        case eltwise_mode::logic_xor:
-            return kernel_selector::eltwise_mode::LOGIC_XOR;
-        case eltwise_mode::squared_diff:
-            return kernel_selector::eltwise_mode::SQUARED_DIFF;
-        case eltwise_mode::floor_mod:
-            return kernel_selector::eltwise_mode::FLOOR_MOD;
-        default:
-            return kernel_selector::eltwise_mode::ADD;
-    }
-}
-}  // namespace
-
 struct eltwise_gpu : typed_primitive_gpu_impl<eltwise> {
     using parent = typed_primitive_gpu_impl<eltwise>;
     using parent::parent;
index 3edfa7692a8c410b230cb311af8d5e699b641a2e..bb90cace260026c2407368f277d293fac1fc8aea 100644 (file)
@@ -518,8 +518,14 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
 
         auto fuse_eltwise_f = [&](eltwise_node& node) {
             std::shared_ptr<const cldnn::eltwise> prim = node.get_primitive();
+            const std::vector<eltwise_mode> supported_modes = {
+                eltwise_mode::sum,
+                eltwise_mode::prod
+            };
+
             if (node.is_output() || node.inputs_count() != 2 ||
-                prim->mode != eltwise_mode::sum || !prim->stride.empty())
+                std::find(supported_modes.begin(), supported_modes.end(), prim->mode) == supported_modes.end() ||
+                !prim->stride.empty())
                 return;
 
             std::vector<cldnn::program_node*> parents = node.get_dependencies();
index e1cb4dc3d4441032a55f6b526bb3b07e56c0dbed..a723ecffb5c032b12e58ca6bd08777b49f89694c 100644 (file)
 #include <string>
 
 namespace cldnn {
+
+inline kernel_selector::eltwise_mode convert_to_eltwise_mode(eltwise_mode mode) {
+    switch (mode) {
+        case eltwise_mode::sum:
+            return kernel_selector::eltwise_mode::ADD;
+        case eltwise_mode::sub:
+            return kernel_selector::eltwise_mode::SUB;
+        case eltwise_mode::max:
+            return kernel_selector::eltwise_mode::MAX;
+        case eltwise_mode::prod:
+            return kernel_selector::eltwise_mode::MUL;
+        case eltwise_mode::div:
+            return kernel_selector::eltwise_mode::DIV;
+        case eltwise_mode::min:
+            return kernel_selector::eltwise_mode::MIN;
+        case eltwise_mode::pow:
+            return kernel_selector::eltwise_mode::POW;
+        case eltwise_mode::mod:
+            return kernel_selector::eltwise_mode::MODULU;
+        case eltwise_mode::eq:
+            return kernel_selector::eltwise_mode::EQ;
+        case eltwise_mode::ne:
+            return kernel_selector::eltwise_mode::NE;
+        case eltwise_mode::lt:
+            return kernel_selector::eltwise_mode::LT;
+        case eltwise_mode::le:
+            return kernel_selector::eltwise_mode::LE;
+        case eltwise_mode::gt:
+            return kernel_selector::eltwise_mode::GT;
+        case eltwise_mode::ge:
+            return kernel_selector::eltwise_mode::GE;
+        case eltwise_mode::logic_and:
+            return kernel_selector::eltwise_mode::LOGIC_AND;
+        case eltwise_mode::logic_or:
+            return kernel_selector::eltwise_mode::LOGIC_OR;
+        case eltwise_mode::logic_xor:
+            return kernel_selector::eltwise_mode::LOGIC_XOR;
+        case eltwise_mode::squared_diff:
+            return kernel_selector::eltwise_mode::SQUARED_DIFF;
+        case eltwise_mode::floor_mod:
+            return kernel_selector::eltwise_mode::FLOOR_MOD;
+        default:
+            return kernel_selector::eltwise_mode::ADD;
+    }
+}
+
 template <>
 struct typed_program_node<eltwise> : public typed_program_node_base<eltwise> {
     using parent = typed_program_node_base<eltwise>;
@@ -38,7 +84,8 @@ public:
     size_t inputs_count() const { return get_primitive()->input.size(); }
 
     std::shared_ptr<kernel_selector::fuse_params> get_fuse_params() const override {
-        return std::make_shared<kernel_selector::eltwise_fuse_params>();
+        kernel_selector::eltwise_mode mode = convert_to_eltwise_mode(get_primitive()->mode);
+        return std::make_shared<kernel_selector::eltwise_fuse_params>(mode);
     }
 };
 
index f1a041b832fa866af0ead550bd7a737611999e86..a65832a1c8d8d50bc02d4ca18ccc079ee778bbd7 100644 (file)
@@ -588,7 +588,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_scale,
                                              }), );
 
 class conv_fp32_prelu_eltwise : public ConvFusingTest {};
-TEST_P(conv_fp32_prelu_eltwise, basic) {
+TEST_P(conv_fp32_prelu_eltwise, basic_sum) {
     auto p = GetParam();
     create_topologies(input_layout("input", get_input_layout(p)),
                  data("weights", get_mem(get_weights_layout(p))),
@@ -605,7 +605,24 @@ TEST_P(conv_fp32_prelu_eltwise, basic) {
     execute(p);
 }
 
-TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast) {
+TEST_P(conv_fp32_prelu_eltwise, basic_prod) {
+    auto p = GetParam();
+    create_topologies(input_layout("input", get_input_layout(p)),
+                 data("weights", get_mem(get_weights_layout(p))),
+                 data("bias", get_mem(get_bias_layout(p))),
+                 data("slope_data", get_mem(get_per_channel_layout(p))),
+                 data("eltwise_data", get_mem(get_output_layout(p))),
+                 convolution("conv_prim", "input", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
+                 activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
+                 eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::prod),
+                 reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
+    );
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
+TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_sum) {
     auto p = GetParam();
     tensor eltw_shape = p.default_format.spatial_num() == 2 ? tensor{1, 1, 1, 1} : tensor{1, 1, 1, 1, 1};
     create_topologies(input_layout("input", get_input_layout(p)),
@@ -623,6 +640,24 @@ TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast) {
     execute(p);
 }
 
+TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_prod) {
+    auto p = GetParam();
+    tensor eltw_shape = p.default_format.spatial_num() == 2 ? tensor{1, 1, 1, 1} : tensor{1, 1, 1, 1, 1};
+    create_topologies(input_layout("input", get_input_layout(p)),
+                 data("weights", get_mem(get_weights_layout(p))),
+                 data("bias", get_mem(get_bias_layout(p))),
+                 data("slope_data", get_mem(get_per_channel_layout(p))),
+                 data("eltwise_data", get_mem(layout{ p.data_type, p.input_format, eltw_shape })),
+                 convolution("conv_prim", "input", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
+                 activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
+                 eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::prod),
+                 reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
+    );
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
 TEST_P(conv_fp32_prelu_eltwise, vector_ops) {
     auto p = GetParam();
     create_topologies(input_layout("input", get_input_layout(p)),