[IE CLDNN] Fix fused ops in 1x1 conv fsv16 kernel (#1948)

author Jedrzej Hajduczenia <jedrzej.hajduczenia@intel.com>

Wed, 26 Aug 2020 17:58:51 +0000 (19:58 +0200)

committer GitHub <noreply@github.com>

Wed, 26 Aug 2020 17:58:51 +0000 (20:58 +0300)
author Jedrzej Hajduczenia <jedrzej.hajduczenia@intel.com>
Wed, 26 Aug 2020 17:58:51 +0000 (19:58 +0200)
committer GitHub <noreply@github.com>
Wed, 26 Aug 2020 17:58:51 +0000 (20:58 +0300)
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp

index bcb6a1d45b513709ade6e513f6510ba30c76c446..c3b10842ad2e7e7e16128392e087e74c17df73a5 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp
@@ -165,6 +165,7 @@ JitConstants ConvolutionKernel_b_fs_yx_fsv16_1x1::GetJitConstants(const convolut
      jit.AddConstant(MakeJitConstant("PADDED_INPUT", params.inputs[0].X().pad.Total() != 0));
  
      bool padded_output = params.output.X().pad.Total() != 0;
+    bool non_unit_fused_op_spatial = false;
  
      // Set padded_output to true when fused inputs have paddings to have correct blocked loads
      for (auto& fused_op : params.fused_ops) {
@@ -172,10 +173,17 @@ JitConstants ConvolutionKernel_b_fs_yx_fsv16_1x1::GetJitConstants(const convolut
              if (t.PitchesDifferFromLogicalDims()) {
                  padded_output = true;
              }
+            if ((t.X().v > 1) ||
+                (t.Y().v > 1) ||
+                (t.Z().v > 1) ||
+                (t.W().v > 1)) {
+                non_unit_fused_op_spatial = true;
+            }
          }
      }
  
      jit.AddConstant(MakeJitConstant("PADDED_OUTPUT", padded_output));
+    jit.AddConstant(MakeJitConstant("NON_UNIT_FUSED_OP_SPATIAL", non_unit_fused_op_spatial));
  
      jit.AddConstant(MakeJitConstant("X_BLOCK_SIZE", blockWidth));
      jit.AddConstant(MakeJitConstant("X_BLOCKS", CeilDiv(params.output.X().v, blockWidth)));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_f16_1x1.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_f16_1x1.cl

index 155ed590e73113723d0fc81f212d59a8bc805548..7e103cf2f02477a26e947130c416f3f255c46589 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_f16_1x1.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_f16_1x1.cl
@@ -207,7 +207,7 @@ KERNEL(convolution_b_fs_yx_fsv16_1x1)(
      else
  #endif
      {
-#if !PADDED_OUTPUT
+#if !PADDED_OUTPUT && !NON_UNIT_FUSED_OP_SPATIAL
          if (xy * X_BLOCK_SIZE + X_BLOCK_SIZE <= OUTPUT_SIZE_X * OUTPUT_SIZE_Y || (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % X_BLOCK_SIZE == 0) {
  #else
          if (x + X_BLOCK_SIZE <= OUTPUT_SIZE_X || OUTPUT_SIZE_X % X_BLOCK_SIZE == 0) {
author	Jedrzej Hajduczenia <jedrzej.hajduczenia@intel.com>
	Wed, 26 Aug 2020 17:58:51 +0000 (19:58 +0200)
committer	GitHub <noreply@github.com>
	Wed, 26 Aug 2020 17:58:51 +0000 (20:58 +0300)
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp		patch \| blob \| history
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_f16_1x1.cl		patch \| blob \| history