Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / kernel_selector / core / cl_kernels / deconvolution_gpu_ref.cl
index d2a369b..4e8fa0d 100644 (file)
@@ -55,6 +55,11 @@ KERNEL(deconvolution_gpu_yxfb_ref)(
     const uint in_split_offset = split_idx * INPUT0_FEATURE_PITCH * FILTER_IFM_NUM;
 #endif
     const uint input_offset = INPUT0_OFFSET + batch_offset*INPUT0_BATCH_PITCH + in_split_offset;
+#if GROUPED && !DEPTHWISE_SEPARABLE_OPT
+    const uint filter_offset = split_idx * FILTER_LENGTH;
+#else
+    const uint filter_offset = 0;
+#endif
 
     for (uint i = 0; i < FILTER_SIZE_Y; i++)
     {
@@ -74,7 +79,7 @@ KERNEL(deconvolution_gpu_yxfb_ref)(
                     uint fixed_input_offset_y = (uint)input_offset_y / STRIDE_SIZE_Y;
                     uint input_idx = input_offset + (uint)fixed_input_offset_x*INPUT0_X_PITCH + (uint)fixed_input_offset_y*INPUT0_Y_PITCH;
 #if GRADIENT
-                    uint filter_idx = ofm_offset*FILTER_IFM_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
+                    uint filter_idx = filter_offset + ofm_offset*FILTER_IFM_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
                     for (uint h = 0; h < FILTER_OFM_NUM; h++)
                     {
                         result = fma(input[input_idx], filter[filter_idx], result);
@@ -82,7 +87,7 @@ KERNEL(deconvolution_gpu_yxfb_ref)(
                         input_idx += INPUT0_FEATURE_PITCH;
                     }
 #else
-                    uint filter_idx = ofm_offset*FILTER_OFM_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
+                    uint filter_idx = filter_offset + ofm_offset*FILTER_OFM_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
                     for (uint h = 0; h < FILTER_IFM_NUM; h++)
                     {
                         result = fma(input[input_idx], filter[filter_idx], result);
@@ -95,7 +100,12 @@ KERNEL(deconvolution_gpu_yxfb_ref)(
         }
     }
 #if BIAS_TERM
-    result += bias[ofm_offset];
+#if GROUPED && !DEPTHWISE_SEPARABLE_OPT
+    const uint bias_offset = split_idx * BIAS_LENGTH;
+#else
+    const uint bias_offset = 0;
+#endif
+    result += bias[ofm_offset + bias_offset];
 #endif
     const uint out_split_offset = split_idx * OUTPUT_FEATURE_PITCH * FILTER_OFM_NUM;
     const uint dst_index = OUTPUT_OFFSET + out_split_offset + batch_offset*OUTPUT_BATCH_PITCH + ofm_offset*OUTPUT_FEATURE_PITCH + out_y*OUTPUT_Y_PITCH + out_x*OUTPUT_X_PITCH;