Publishing 2019 R1 content

[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / kernel_selector / core / cl_kernels / deconvolution_gpu_ref.cl
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_ref.cl

index d2a369b..4e8fa0d 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_ref.cl
@@ -55,6 +55,11 @@ KERNEL(deconvolution_gpu_yxfb_ref)(
      const uint in_split_offset = split_idx * INPUT0_FEATURE_PITCH * FILTER_IFM_NUM;
  #endif
      const uint input_offset = INPUT0_OFFSET + batch_offset*INPUT0_BATCH_PITCH + in_split_offset;
+#if GROUPED && !DEPTHWISE_SEPARABLE_OPT
+    const uint filter_offset = split_idx * FILTER_LENGTH;
+#else
+    const uint filter_offset = 0;
+#endif
  
      for (uint i = 0; i < FILTER_SIZE_Y; i++)
      {
@@ -74,7 +79,7 @@ KERNEL(deconvolution_gpu_yxfb_ref)(
                      uint fixed_input_offset_y = (uint)input_offset_y / STRIDE_SIZE_Y;
                      uint input_idx = input_offset + (uint)fixed_input_offset_x*INPUT0_X_PITCH + (uint)fixed_input_offset_y*INPUT0_Y_PITCH;
  #if GRADIENT
-                    uint filter_idx = ofm_offset*FILTER_IFM_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
+                    uint filter_idx = filter_offset + ofm_offset*FILTER_IFM_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
                      for (uint h = 0; h < FILTER_OFM_NUM; h++)
                      {
                          result = fma(input[input_idx], filter[filter_idx], result);
@@ -82,7 +87,7 @@ KERNEL(deconvolution_gpu_yxfb_ref)(
                          input_idx += INPUT0_FEATURE_PITCH;
                      }
  #else
-                    uint filter_idx = ofm_offset*FILTER_OFM_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
+                    uint filter_idx = filter_offset + ofm_offset*FILTER_OFM_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
                      for (uint h = 0; h < FILTER_IFM_NUM; h++)
                      {
                          result = fma(input[input_idx], filter[filter_idx], result);
@@ -95,7 +100,12 @@ KERNEL(deconvolution_gpu_yxfb_ref)(
          }
      }
  #if BIAS_TERM
-    result += bias[ofm_offset];
+#if GROUPED && !DEPTHWISE_SEPARABLE_OPT
+    const uint bias_offset = split_idx * BIAS_LENGTH;
+#else
+    const uint bias_offset = 0;
+#endif
+    result += bias[ofm_offset + bias_offset];
  #endif
      const uint out_split_offset = split_idx * OUTPUT_FEATURE_PITCH * FILTER_OFM_NUM;
      const uint dst_index = OUTPUT_OFFSET + out_split_offset + batch_offset*OUTPUT_BATCH_PITCH + ofm_offset*OUTPUT_FEATURE_PITCH + out_y*OUTPUT_Y_PITCH + out_x*OUTPUT_X_PITCH;