From c722f6a05a0b54f47b45f658df6a366c99f585b4 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Mon, 26 Oct 2020 09:03:00 +0300 Subject: [PATCH] [IE CLDNN] Fixed fusing config for fsv16 eltw kernel (#2807) --- .../eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp | 2 +- .../clDNN/tests/test_cases/fusings_gpu_test.cpp | 58 +++++++++++++++++++--- 2 files changed, 51 insertions(+), 9 deletions(-) diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp index 7f12c6b..ae0c07b 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp @@ -143,7 +143,7 @@ JitConstants EltwiseKernel_b_fs_yx_fsv16::GetJitConstants(const eltwise_params& if (!params.fused_ops.empty()) { kernel_selector::Datatype input_dt = GetAccumulatorType(params); - FusedOpsConfiguration conf = {"", {"b", "f_block", "y", "x"}, "res", input_dt, blockSize}; + FusedOpsConfiguration conf = {"", {"b", "f_block*16", "y", "x"}, "res", input_dt, blockSize}; conf.load_type = FusedOpsConfiguration::LoadType::LT_ALIGNED_READ; conf.vec_axis = Tensor::DataChannelName::X; diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp index 358e49c..da8847f 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp @@ -6226,11 +6226,11 @@ struct eltwise_test_params { #define CASE_ELTWISE_FP32_1 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum #define CASE_ELTWISE_FP32_2 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum -#define CASE_ELTWISE_FP32_3 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum +#define CASE_ELTWISE_FP32_3 {2, 32, 4, 8}, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum #define CASE_ELTWISE_FP32_4 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::bfwzyx, data_types::f32, format::bfwzyx, eltwise_mode::sum #define CASE_ELTWISE_FP16_1 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum #define CASE_ELTWISE_FP16_2 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum -#define CASE_ELTWISE_FP16_3 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum +#define CASE_ELTWISE_FP16_3 {2, 32, 4, 8}, data_types::f16, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum #define CASE_ELTWISE_I8_1 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum #define CASE_ELTWISE_I8_2 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum #define CASE_ELTWISE_I8_3 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum @@ -6239,22 +6239,22 @@ struct eltwise_test_params { #define CASE_ELTWISE_U8_3 {2, 16, 4, 4}, data_types::u8, data_types::u8, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum #define CASE_ELTWISE_FP32_FP16_1 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum #define CASE_ELTWISE_FP32_FP16_2 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum -#define CASE_ELTWISE_FP32_FP16_3 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum +#define CASE_ELTWISE_FP32_FP16_3 {2, 32, 4, 4}, data_types::f32, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum #define CASE_ELTWISE_FP16_FP32_1 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum #define CASE_ELTWISE_FP16_FP32_2 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum -#define CASE_ELTWISE_FP16_FP32_3 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum +#define CASE_ELTWISE_FP16_FP32_3 {2, 32, 4, 4}, data_types::f16, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum #define CASE_ELTWISE_I8_FP16_1 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum #define CASE_ELTWISE_I8_FP16_2 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum -#define CASE_ELTWISE_I8_FP16_3 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum +#define CASE_ELTWISE_I8_FP16_3 {2, 32, 4, 4}, data_types::i8, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum #define CASE_ELTWISE_I8_FP32_1 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum #define CASE_ELTWISE_I8_FP32_2 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum -#define CASE_ELTWISE_I8_FP32_3 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum +#define CASE_ELTWISE_I8_FP32_3 {2, 32, 4, 4}, data_types::i8, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum #define CASE_ELTWISE_U8_FP16_1 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum #define CASE_ELTWISE_U8_FP16_2 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum -#define CASE_ELTWISE_U8_FP16_3 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum +#define CASE_ELTWISE_U8_FP16_3 {2, 32, 4, 4}, data_types::u8, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum #define CASE_ELTWISE_U8_FP32_1 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum #define CASE_ELTWISE_U8_FP32_2 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum -#define CASE_ELTWISE_U8_FP32_3 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum +#define CASE_ELTWISE_U8_FP32_3 {2, 32, 4, 4}, data_types::u8, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum class EltwiseFusingTest : public ::BaseFusingTest { @@ -6344,6 +6344,48 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, eltwise_test_params{CASE_ELTWISE_U8_FP16_3, 3, 4}, }), ); +class eltwise_fp32_fsv16 : public EltwiseFusingTest {}; +TEST_P(eltwise_fp32_fsv16, add) { + auto p = GetParam(); + create_topologies(input_layout("input", get_input_layout(p)), + input_layout("input2", get_input_layout2(p)), + data("add_data", get_mem(get_per_channel_layout(p), -10, 10)), + eltwise("eltwise", {"input", "input2"}, p.mode, p.default_type), + eltwise("add", {"eltwise", "add_data"}, eltwise_mode::sum), + activation("activation", "add", activation_func::negative), + reorder("out", "activation", p.default_format, data_types::f32)); + + implementation_desc eltw_impl = { format::b_fs_yx_fsv16, "eltwise_b_fs_yx_fsv16" }; + bo_fused.set_option(build_option::force_implementations({ {"eltwise", eltw_impl} })); + + tolerance = 1e-5f; + execute(p); +} + +TEST_P(eltwise_fp32_fsv16, add_per_element) { + auto p = GetParam(); + create_topologies(input_layout("input", get_input_layout(p)), + input_layout("input2", get_input_layout2(p)), + data("add_data", get_mem(get_input_layout(p), -10, 10)), + eltwise("eltwise", {"input", "input2"}, p.mode, p.default_type), + eltwise("add", {"eltwise", "add_data"}, eltwise_mode::sum), + activation("activation", "add", activation_func::negative), + reorder("out", "activation", p.default_format, data_types::f32)); + + implementation_desc eltw_impl = { format::b_fs_yx_fsv16, "eltwise_b_fs_yx_fsv16" }; + bo_fused.set_option(build_option::force_implementations({ {"eltwise", eltw_impl} })); + + tolerance = 1e-5f; + execute(p); +} + +INSTANTIATE_TEST_CASE_P(fusings_gpu, + eltwise_fp32_fsv16, + ::testing::ValuesIn(std::vector{ + eltwise_test_params{CASE_ELTWISE_FP16_3, 3, 5}, + eltwise_test_params{CASE_ELTWISE_FP32_3, 3, 5}, + }), ); + class eltwise_fp32_fused_prims : public EltwiseFusingTest {}; TEST_P(eltwise_fp32_fused_prims, scale_activation) { auto p = GetParam(); -- 2.7.4