From a09de25dd27b67e0798eada572c3c6436658a138 Mon Sep 17 00:00:00 2001 From: Jedrzej Hajduczenia Date: Fri, 27 Nov 2020 11:41:13 +0100 Subject: [PATCH] [IE CLDNN] Fix handling of fsv4 input padding in mmad bfyx to fsv32 convolution kernel (#3253) --- .../core/cl_kernels/convolution_gpu_mmad_bfyx_to_b_fs_yx_fsv32.cl | 8 ++++++-- .../thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp | 8 ++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_bfyx_to_b_fs_yx_fsv32.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_bfyx_to_b_fs_yx_fsv32.cl index 1495439..59c3040 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_bfyx_to_b_fs_yx_fsv32.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_bfyx_to_b_fs_yx_fsv32.cl @@ -185,7 +185,9 @@ KERNEL(convolution_mmad_bfyx_to_b_fs_yx_fsv32)( } slm_block[c + lid] = AS_PACKED_IN_TYPE(src); #elif INPUT0_LAYOUT_B_FS_YX_FSV4 - const __global uint* ptr = input + input_offset + kh * DILATION_SIZE_Y * input_y_pitch + (x_chunk + lid) * input_x_pitch; + const __global uint* ptr = input + input_offset + + (kh * DILATION_SIZE_Y + INPUT0_PAD_BEFORE_SIZE_Y) * input_y_pitch + + (x_chunk + lid + INPUT0_PAD_BEFORE_SIZE_X) * input_x_pitch; PACKED_IN_TYPE src = AS_PACKED_IN_TYPE(ptr[0]); slm_block[c + lid] = src; #endif @@ -214,7 +216,9 @@ KERNEL(convolution_mmad_bfyx_to_b_fs_yx_fsv32)( } slm_block_tail[lid] = AS_PACKED_IN_TYPE(src); #elif INPUT0_LAYOUT_B_FS_YX_FSV4 - const __global uint* ptr = input + input_offset + kh * DILATION_SIZE_Y * input_y_pitch + (x_chunk + lid) * input_x_pitch; + const __global uint* ptr = input + input_offset + + (kh * DILATION_SIZE_Y + INPUT0_PAD_BEFORE_SIZE_Y) * input_y_pitch + + (x_chunk + lid + INPUT0_PAD_BEFORE_SIZE_X) * input_x_pitch; PACKED_IN_TYPE src = AS_PACKED_IN_TYPE(ptr[0]); slm_block_tail[lid] = src; #endif diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp index d237bbd..01c0040 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp @@ -7661,7 +7661,7 @@ template class convolution_test_base { public: virtual topology build_topology(const cldnn::engine& engine) { - auto input_lay = layout(input_type(), format::bfyx, input_size()); + auto input_lay = layout(input_type(), format::bfyx, input_size(), padding_size()); auto wei_lay = layout(weights_type(), format::bfyx, weights_size()); auto wei_mem = memory::allocate(engine, wei_lay); @@ -7739,7 +7739,7 @@ public: auto net = network(prog, 0); - auto input_lay = layout(input_type(), format::bfyx, input_size()); + auto input_lay = layout(input_type(), format::bfyx, input_size(), padding_size()); auto input_mem = memory::allocate(engine, input_lay); std::vector input_flat(input_lay.get_linear_size(), static_cast(0)); for (size_t bi = 0; bi < batch_num(); ++bi) @@ -8026,7 +8026,7 @@ class convolution_random_test_fsv4_input : public convolution_random_test_base; topology build_topology(const cldnn::engine& engine) override { - auto input_lay = layout(this->input_type(), format::b_fs_yx_fsv4, this->input_size()); + auto input_lay = layout(this->input_type(), format::b_fs_yx_fsv4, this->input_size(), this->padding_size()); auto wei_lay = layout(this->weights_type(), format::bfyx, this->weights_size()); auto wei_mem = memory::allocate(engine, wei_lay); @@ -8099,7 +8099,7 @@ public: auto net = network(prog, 0); - auto input_lay = layout(this->input_type(), format::b_fs_yx_fsv4, this->input_size()); + auto input_lay = layout(this->input_type(), format::b_fs_yx_fsv4, this->input_size(), this->padding_size()); auto input_mem = memory::allocate(engine, input_lay); std::vector input_flat(input_lay.get_linear_size(), static_cast(0)); for (size_t bi = 0; bi < this->batch_num(); ++bi) -- 2.7.4