From 8f966887d765171780fb846f768c1ef734e9ea68 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Thu, 30 Jul 2020 18:16:37 +0300 Subject: [PATCH] [IE CLDNN] Prod mode support in eltwise fusings (#1491) --- .../eltwise/eltwise_kernel_base.h | 4 +- .../kernel_selector/core/common/jitter.cpp | 19 ++++++- .../thirdparty/clDNN/src/gpu/eltwise_gpu.cpp | 47 ------------------ .../prepare_primitive_fusing.cpp | 8 ++- .../clDNN/src/include/eltwise_inst.h | 49 ++++++++++++++++++- .../tests/test_cases/fusings_gpu_test.cpp | 39 ++++++++++++++- 6 files changed, 113 insertions(+), 53 deletions(-) diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h index 0e59efa68..4524435e2 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h @@ -100,7 +100,9 @@ struct eltwise_optional_params : optional_params { // fuse_params //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// struct eltwise_fuse_params : fuse_params { - eltwise_fuse_params() : fuse_params(KernelType::ELTWISE) {} + EltwiseMode mode; + + eltwise_fuse_params(EltwiseMode mode) : fuse_params(KernelType::ELTWISE), mode(mode) {} }; struct scale_fuse_params : fuse_params { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp index 734e02a44..a9910c12a 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp @@ -1142,8 +1142,25 @@ JitConstants FusedOpsCodeGenerator::MakeOpJitConstants(const FusedOpsConfigurati break; } case KernelType::ELTWISE: { + auto p = desc.GetOpParams(); + if (!p) + throw std::runtime_error("[clDNN] Eltwise fuse params can't be nullptr"); + + std::string op = ""; + switch (p->mode) + { + case kernel_selector::EltwiseMode::ADD: + op = "+"; + break; + case kernel_selector::EltwiseMode::MUL: + op = "*"; + break; + default: + throw std::runtime_error("[clDNN] Eltwise mode is not supported in fused ops codegen"); + } + op_decls += "\\\n\t" + GetOutputType(vec_size) + " " + out_var + " = " + in_vars_converted[0] + - " + " + ConvertToOutputType(in_var, vec_size) + ";"; + op + ConvertToOutputType(in_var, vec_size) + ";"; break; } case KernelType::QUANTIZE: { diff --git a/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp index f1fe37ccd..7f0a4535b 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp @@ -26,53 +26,6 @@ namespace cldnn { namespace gpu { -namespace { -inline kernel_selector::eltwise_mode convert_to_eltwise_mode(eltwise_mode mode) { - switch (mode) { - case eltwise_mode::sum: - return kernel_selector::eltwise_mode::ADD; - case eltwise_mode::sub: - return kernel_selector::eltwise_mode::SUB; - case eltwise_mode::max: - return kernel_selector::eltwise_mode::MAX; - case eltwise_mode::prod: - return kernel_selector::eltwise_mode::MUL; - case eltwise_mode::div: - return kernel_selector::eltwise_mode::DIV; - case eltwise_mode::min: - return kernel_selector::eltwise_mode::MIN; - case eltwise_mode::pow: - return kernel_selector::eltwise_mode::POW; - case eltwise_mode::mod: - return kernel_selector::eltwise_mode::MODULU; - case eltwise_mode::eq: - return kernel_selector::eltwise_mode::EQ; - case eltwise_mode::ne: - return kernel_selector::eltwise_mode::NE; - case eltwise_mode::lt: - return kernel_selector::eltwise_mode::LT; - case eltwise_mode::le: - return kernel_selector::eltwise_mode::LE; - case eltwise_mode::gt: - return kernel_selector::eltwise_mode::GT; - case eltwise_mode::ge: - return kernel_selector::eltwise_mode::GE; - case eltwise_mode::logic_and: - return kernel_selector::eltwise_mode::LOGIC_AND; - case eltwise_mode::logic_or: - return kernel_selector::eltwise_mode::LOGIC_OR; - case eltwise_mode::logic_xor: - return kernel_selector::eltwise_mode::LOGIC_XOR; - case eltwise_mode::squared_diff: - return kernel_selector::eltwise_mode::SQUARED_DIFF; - case eltwise_mode::floor_mod: - return kernel_selector::eltwise_mode::FLOOR_MOD; - default: - return kernel_selector::eltwise_mode::ADD; - } -} -} // namespace - struct eltwise_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp index 3edfa7692..bb90cace2 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp @@ -518,8 +518,14 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) { auto fuse_eltwise_f = [&](eltwise_node& node) { std::shared_ptr prim = node.get_primitive(); + const std::vector supported_modes = { + eltwise_mode::sum, + eltwise_mode::prod + }; + if (node.is_output() || node.inputs_count() != 2 || - prim->mode != eltwise_mode::sum || !prim->stride.empty()) + std::find(supported_modes.begin(), supported_modes.end(), prim->mode) == supported_modes.end() || + !prim->stride.empty()) return; std::vector parents = node.get_dependencies(); diff --git a/inference-engine/thirdparty/clDNN/src/include/eltwise_inst.h b/inference-engine/thirdparty/clDNN/src/include/eltwise_inst.h index e1cb4dc3d..a723ecffb 100644 --- a/inference-engine/thirdparty/clDNN/src/include/eltwise_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/eltwise_inst.h @@ -24,6 +24,52 @@ #include namespace cldnn { + +inline kernel_selector::eltwise_mode convert_to_eltwise_mode(eltwise_mode mode) { + switch (mode) { + case eltwise_mode::sum: + return kernel_selector::eltwise_mode::ADD; + case eltwise_mode::sub: + return kernel_selector::eltwise_mode::SUB; + case eltwise_mode::max: + return kernel_selector::eltwise_mode::MAX; + case eltwise_mode::prod: + return kernel_selector::eltwise_mode::MUL; + case eltwise_mode::div: + return kernel_selector::eltwise_mode::DIV; + case eltwise_mode::min: + return kernel_selector::eltwise_mode::MIN; + case eltwise_mode::pow: + return kernel_selector::eltwise_mode::POW; + case eltwise_mode::mod: + return kernel_selector::eltwise_mode::MODULU; + case eltwise_mode::eq: + return kernel_selector::eltwise_mode::EQ; + case eltwise_mode::ne: + return kernel_selector::eltwise_mode::NE; + case eltwise_mode::lt: + return kernel_selector::eltwise_mode::LT; + case eltwise_mode::le: + return kernel_selector::eltwise_mode::LE; + case eltwise_mode::gt: + return kernel_selector::eltwise_mode::GT; + case eltwise_mode::ge: + return kernel_selector::eltwise_mode::GE; + case eltwise_mode::logic_and: + return kernel_selector::eltwise_mode::LOGIC_AND; + case eltwise_mode::logic_or: + return kernel_selector::eltwise_mode::LOGIC_OR; + case eltwise_mode::logic_xor: + return kernel_selector::eltwise_mode::LOGIC_XOR; + case eltwise_mode::squared_diff: + return kernel_selector::eltwise_mode::SQUARED_DIFF; + case eltwise_mode::floor_mod: + return kernel_selector::eltwise_mode::FLOOR_MOD; + default: + return kernel_selector::eltwise_mode::ADD; + } +} + template <> struct typed_program_node : public typed_program_node_base { using parent = typed_program_node_base; @@ -38,7 +84,8 @@ public: size_t inputs_count() const { return get_primitive()->input.size(); } std::shared_ptr get_fuse_params() const override { - return std::make_shared(); + kernel_selector::eltwise_mode mode = convert_to_eltwise_mode(get_primitive()->mode); + return std::make_shared(mode); } }; diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp index f1a041b83..a65832a1c 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp @@ -588,7 +588,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_scale, }), ); class conv_fp32_prelu_eltwise : public ConvFusingTest {}; -TEST_P(conv_fp32_prelu_eltwise, basic) { +TEST_P(conv_fp32_prelu_eltwise, basic_sum) { auto p = GetParam(); create_topologies(input_layout("input", get_input_layout(p)), data("weights", get_mem(get_weights_layout(p))), @@ -605,7 +605,24 @@ TEST_P(conv_fp32_prelu_eltwise, basic) { execute(p); } -TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast) { +TEST_P(conv_fp32_prelu_eltwise, basic_prod) { + auto p = GetParam(); + create_topologies(input_layout("input", get_input_layout(p)), + data("weights", get_mem(get_weights_layout(p))), + data("bias", get_mem(get_bias_layout(p))), + data("slope_data", get_mem(get_per_channel_layout(p))), + data("eltwise_data", get_mem(get_output_layout(p))), + convolution("conv_prim", "input", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation), + activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope), + eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::prod), + reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32) + ); + + tolerance = 1e-5f; + execute(p); +} + +TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_sum) { auto p = GetParam(); tensor eltw_shape = p.default_format.spatial_num() == 2 ? tensor{1, 1, 1, 1} : tensor{1, 1, 1, 1, 1}; create_topologies(input_layout("input", get_input_layout(p)), @@ -623,6 +640,24 @@ TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast) { execute(p); } +TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_prod) { + auto p = GetParam(); + tensor eltw_shape = p.default_format.spatial_num() == 2 ? tensor{1, 1, 1, 1} : tensor{1, 1, 1, 1, 1}; + create_topologies(input_layout("input", get_input_layout(p)), + data("weights", get_mem(get_weights_layout(p))), + data("bias", get_mem(get_bias_layout(p))), + data("slope_data", get_mem(get_per_channel_layout(p))), + data("eltwise_data", get_mem(layout{ p.data_type, p.input_format, eltw_shape })), + convolution("conv_prim", "input", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation), + activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope), + eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::prod), + reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32) + ); + + tolerance = 1e-5f; + execute(p); +} + TEST_P(conv_fp32_prelu_eltwise, vector_ops) { auto p = GetParam(); create_topologies(input_layout("input", get_input_layout(p)), -- 2.34.1