const uint in_split_offset = split_idx * INPUT0_FEATURE_PITCH * FILTER_IFM_NUM;
#endif
const uint input_offset = INPUT0_OFFSET + batch_offset*INPUT0_BATCH_PITCH + in_split_offset;
+#if GROUPED && !DEPTHWISE_SEPARABLE_OPT
+ const uint filter_offset = split_idx * FILTER_LENGTH;
+#else
+ const uint filter_offset = 0;
+#endif
for (uint i = 0; i < FILTER_SIZE_Y; i++)
{
if(!zero)
{
uint input_idx = input_offset + (uint)input_offset_x*INPUT0_X_PITCH + (uint)input_offset_y*INPUT0_Y_PITCH;
- uint filter_idx = ofm_offset*FILTER_OFM_PITCH + i*FILTER_Y_PITCH + j*FILTER_X_PITCH;
+ uint filter_idx = filter_offset + ofm_offset*FILTER_OFM_PITCH + i*FILTER_Y_PITCH + j*FILTER_X_PITCH;
for (uint h = 0; h < FILTER_IFM_NUM; h++)
{
}
}
#if BIAS_TERM
- result += bias[ofm_offset];
+#if GROUPED && !DEPTHWISE_SEPARABLE_OPT
+ const uint bias_offset = split_idx * BIAS_LENGTH;
+#else
+ const uint bias_offset = 0;
+#endif
+ result += bias[ofm_offset + bias_offset];
#endif
const uint out_split_offset = split_idx * OUTPUT_FEATURE_PITCH * FILTER_OFM_NUM;
const uint dst_index = batch_offset*OUTPUT_BATCH_PITCH + ofm_offset*OUTPUT_FEATURE_PITCH + out_y*OUTPUT_Y_PITCH + out_x*OUTPUT_X_PITCH + OUTPUT_OFFSET + out_split_offset;