[IE CLDNN] Fix handling of fsv4 input padding in mmad bfyx to fsv32 convolution kerne...

author Jedrzej Hajduczenia <jedrzej.hajduczenia@intel.com>

Fri, 27 Nov 2020 10:41:13 +0000 (11:41 +0100)

committer GitHub <noreply@github.com>

Fri, 27 Nov 2020 10:41:13 +0000 (13:41 +0300)
author Jedrzej Hajduczenia <jedrzej.hajduczenia@intel.com>
Fri, 27 Nov 2020 10:41:13 +0000 (11:41 +0100)
committer GitHub <noreply@github.com>
Fri, 27 Nov 2020 10:41:13 +0000 (13:41 +0300)
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_bfyx_to_b_fs_yx_fsv32.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_bfyx_to_b_fs_yx_fsv32.cl

index 1495439..59c3040 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_bfyx_to_b_fs_yx_fsv32.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_bfyx_to_b_fs_yx_fsv32.cl
@@ -185,7 +185,9 @@ KERNEL(convolution_mmad_bfyx_to_b_fs_yx_fsv32)(
                          }
                          slm_block[c + lid] = AS_PACKED_IN_TYPE(src);
                      #elif INPUT0_LAYOUT_B_FS_YX_FSV4
-                        const __global uint* ptr = input + input_offset + kh * DILATION_SIZE_Y * input_y_pitch + (x_chunk + lid) * input_x_pitch;
+                        const __global uint* ptr = input + input_offset
+                                                   + (kh * DILATION_SIZE_Y + INPUT0_PAD_BEFORE_SIZE_Y) * input_y_pitch
+                                                   + (x_chunk + lid + INPUT0_PAD_BEFORE_SIZE_X) * input_x_pitch;
                          PACKED_IN_TYPE src = AS_PACKED_IN_TYPE(ptr[0]);
                          slm_block[c + lid] = src;
                      #endif
@@ -214,7 +216,9 @@ KERNEL(convolution_mmad_bfyx_to_b_fs_yx_fsv32)(
                          }
                          slm_block_tail[lid] = AS_PACKED_IN_TYPE(src);
                      #elif INPUT0_LAYOUT_B_FS_YX_FSV4
-                        const __global uint* ptr = input + input_offset + kh * DILATION_SIZE_Y * input_y_pitch + (x_chunk + lid) * input_x_pitch;
+                        const __global uint* ptr = input + input_offset
+                                                   + (kh * DILATION_SIZE_Y + INPUT0_PAD_BEFORE_SIZE_Y) * input_y_pitch
+                                                   + (x_chunk + lid + INPUT0_PAD_BEFORE_SIZE_X) * input_x_pitch;
                          PACKED_IN_TYPE src = AS_PACKED_IN_TYPE(ptr[0]);
                          slm_block_tail[lid] = src;
                      #endif
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp

index d237bbd..01c0040 100644 (file)
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp
@@ -7661,7 +7661,7 @@ template <typename InputT, typename WeightsT, typename OutputT>
  class convolution_test_base {
  public:
      virtual topology build_topology(const cldnn::engine& engine) {
-        auto input_lay = layout(input_type(), format::bfyx, input_size());
+        auto input_lay = layout(input_type(), format::bfyx, input_size(), padding_size());
          auto wei_lay = layout(weights_type(), format::bfyx, weights_size());
  
          auto wei_mem = memory::allocate(engine, wei_lay);
@@ -7739,7 +7739,7 @@ public:
  
          auto net = network(prog, 0);
  
-        auto input_lay = layout(input_type(), format::bfyx, input_size());
+        auto input_lay = layout(input_type(), format::bfyx, input_size(), padding_size());
          auto input_mem = memory::allocate(engine, input_lay);
          std::vector<InputT> input_flat(input_lay.get_linear_size(), static_cast<InputT>(0));
          for (size_t bi = 0; bi < batch_num(); ++bi)
@@ -8026,7 +8026,7 @@ class convolution_random_test_fsv4_input : public convolution_random_test_base<I
  public:
      using parent = convolution_random_test_base<InputT, WeightsT, OutputT>;
      topology build_topology(const cldnn::engine& engine) override {
-        auto input_lay = layout(this->input_type(), format::b_fs_yx_fsv4, this->input_size());
+        auto input_lay = layout(this->input_type(), format::b_fs_yx_fsv4, this->input_size(), this->padding_size());
          auto wei_lay = layout(this->weights_type(), format::bfyx, this->weights_size());
  
          auto wei_mem = memory::allocate(engine, wei_lay);
@@ -8099,7 +8099,7 @@ public:
  
          auto net = network(prog, 0);
  
-        auto input_lay = layout(this->input_type(), format::b_fs_yx_fsv4,  this->input_size());
+        auto input_lay = layout(this->input_type(), format::b_fs_yx_fsv4,  this->input_size(), this->padding_size());
          auto input_mem = memory::allocate(engine, input_lay);
          std::vector<InputT> input_flat(input_lay.get_linear_size(), static_cast<InputT>(0));
          for (size_t bi = 0; bi < this->batch_num(); ++bi)
author	Jedrzej Hajduczenia <jedrzej.hajduczenia@intel.com>
	Fri, 27 Nov 2020 10:41:13 +0000 (11:41 +0100)
committer	GitHub <noreply@github.com>
	Fri, 27 Nov 2020 10:41:13 +0000 (13:41 +0300)
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_bfyx_to_b_fs_yx_fsv32.cl		patch \| blob \| history
inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp		patch \| blob \| history