os_is_zyx_osv16_isv16, ///< format for weights for IMAD convolutions
os_is_yx_osv32_isv4_swizzled_by_2, ///< format for weights for IMAD convolutions
os_is_yx_osv32_isv4, ///< format for weights for IMAD convolutions
+ os_is_zyx_osv32_isv4, ///< format for weights for IMAD convolutions
os_is_yx_osv32_isv32p, ///< format for weights for binary convolutions
lstm_weights_dio, ///< dynamic_lstm, direction,
///< than IO (I - input size, O - 4 * hidden_size)
{ os_is_zyx_osv16_isv16, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{0, 16}, {1, 16}}}},
{ os_is_yx_osv32_isv4_swizzled_by_2, { 1, 1, 2, 0, 0, "bfxy", "bfxy?", {{0, 32}, {1, 4}}}},
{ os_is_yx_osv32_isv4, { 1, 1, 2, 0, 0, "bfxy", "bfxy?", {{0, 32}, {1, 4}}}},
+ { os_is_zyx_osv32_isv4, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{0, 32}, {1, 4}}}},
{ os_is_yx_osv32_isv32p, { 1, 1, 1, 0, 0, "bfxy", "bfxy?", {}}},
{ os_is_zyx_isv16_osv16, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{0, 16}, {1, 16}}}},
{ is_os_zyx_isv16_osv16, { 1, 1, 3, 0, 0, "fbzyx", "bfxyz", {{1, 16}, {0, 16}}}},
{ WeightsLayout::os_is_yx_osv16_isv4, { 0, 1, -1, 2, 3, -1, -1, -1 } },
{ WeightsLayout::os_is_yx_osv32_isv4_swizzled_by_2, { 0, 1, -1, 2, 3, -1, -1, -1 } },
{ WeightsLayout::os_is_yx_osv32_isv4, { 0, 1, -1, 2, 3, -1, -1, -1 } },
+ { WeightsLayout::os_is_zyx_osv32_isv4, { 0, 1, 2, 3, 4, -1, -1, -1 } },
{ WeightsLayout::oizyx, { 0, 1, 2, 3, 4, -1, -1, -1 } },
{ WeightsLayout::os_is_yx_osv32_isv32p, { 0, 1, -1, 2, 3, -1, -1, -1 } },
{ WeightsLayout::os_is_zyx_isv16_osv16, { 0, 1, 2, 3, 4, -1, -1, -1 } },
newDims[2] = RoundUp(newDims[2], 4);
newDims[3] = RoundUp(newDims[3], 32);
break;
+ case os_is_zyx_osv32_isv4:
+ assert(newDims.size() == 5);
+ newDims[3] = RoundUp(newDims[3], 4);
+ newDims[4] = RoundUp(newDims[4], 32);
+ break;
case os_is_yx_osv32_isv32p:
assert(newDims.size() == 4);
newDims[2] = RoundUp(newDims[2], 32); // ic
os_is_yx_osv16_isv4, // swizzled weights for convolution using IMAD
os_is_yx_osv32_isv4_swizzled_by_2, // weights for bfyx -> b_fs_yx_fsv32 convolution using IMAD with swizzeled ofm (0, 2, 4..), (1, 3, 5...)
os_is_yx_osv32_isv4, // weights for bfyx -> b_fs_yx_fsv{32,16} convolution using IMAD
+ os_is_zyx_osv32_isv4, // weights for bfzyx -> b_fs_zyx_fsv16 convolution using IMAD
oizyx,
os_is_yx_osv32_isv32p, // 2 blocks: 32 packed binary in channels and 32 output channels
os_is_osv32_isv32_swizzled_by_4, // for weights for 1x1 IMAD convolution
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableInputLayout(DataLayout::bfyx);
+ k.EnableInputLayout(DataLayout::bfzyx);
k.EnableInputLayout(DataLayout::b_fs_yx_fsv4);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv32);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
+ k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16);
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableDilation();
auto params = dynamic_cast<const convolution_params&>(p);
+ if (params.inputs[0].Dimentions() != params.output.Dimentions())
+ return false;
+
if (params.inputs[0].Feature().v != 3 && params.inputs[0].Feature().v != 4)
return false;
static size_t get_slm_byte_size(const convolution_params &cp, size_t lws, size_t block_size) {
return (cp.stride.x * (lws * block_size - 1) + (cp.weights.X().v - 1) * cp.dilation.x + 1)*
- cp.weights.Y().v * sizeof(int32_t);
+ cp.weights.Y().v * cp.weights.Z().v * sizeof(int32_t);
}
static size_t get_lws(const convolution_params &cp, size_t blocks_count, size_t block_size, size_t max_lws) {
const size_t max_lws = std::max((size_t)1, cp.engineInfo.maxWorkGroupSize / sub_group_size);
runInfo.gws0 = Align(cp.output.Feature().v, 32) / 2;
runInfo.gws1 = CeilDiv(cp.output.X().v, runInfo.cldnnStyle.blockWidth);
- runInfo.gws2 = cp.output.Batch().v * cp.output.Y().v;
+ runInfo.gws2 = cp.output.Batch().v * cp.output.Y().v * cp.output.Z().v;
runInfo.lws0 = sub_group_size;
runInfo.lws1 = get_lws(cp, runInfo.gws1, tuneOptions.blockWidth, max_lws);
size_t slm_tail = slm_line_size % runInfo.lws1;
size_t slm_line_aligned = slm_chunk_size*runInfo.lws1 + Align(slm_tail, sub_group_size);
- size_t input_line_size = std::min(params.stride.x * (blockWidth - 1) + (params.weights.X().v - 1) * params.dilation.x + 1,
- input.X().v + input.X().pad.Total());
+ size_t input_line_size = params.stride.x * (blockWidth - 1) + (params.weights.X().v - 1) * params.dilation.x + 1;
jit.AddConstant(MakeJitConstant("INPUT_LINE_SIZE", input_line_size));
jit.AddConstant(MakeJitConstant("OUTPUT_X_BLOCK_SIZE", blockWidth));
if (!params.fused_ops.empty()) {
auto input_dt = GetActivationType(params);
- if (GetPreferredWeightsLayout(params) == WeightsLayout::os_is_yx_osv32_isv4) {
- FusedOpsConfiguration conf0 = {"_0", {"b", "(fg*32 + lid)", "y", "(x+i)"}, "res0", input_dt, 1};
- FusedOpsConfiguration conf1 = {"_1", {"b", "(fg*32 + lid+16)", "y", "(x+i)"}, "res1", input_dt, 1};
+ if (WeightsTensor::ChannelsCount(GetPreferredWeightsLayout(params)) == 5) {
+ FusedOpsConfiguration conf0 = {"_0", {"b", "(fg*32 + lid)", "z", "y", "(x+i)"}, "res0", input_dt, 1};
+ FusedOpsConfiguration conf1 = {"_1", {"b", "(fg*32 + lid+16)", "z", "y", "(x+i)"}, "res1", input_dt, 1};
jit.Merge(MakeFusedOpsJitConstants(params, {conf0, conf1}));
} else {
- FusedOpsConfiguration conf0 = {"_0", {"b", "(fg*32 + 2*lid + 0)", "y", "(x+i)"}, "res0", input_dt, 1};
- FusedOpsConfiguration conf1 = {"_1", {"b", "(fg*32 + 2*lid + 1)", "y", "(x+i)"}, "res1", input_dt, 1};
- jit.Merge(MakeFusedOpsJitConstants(params, {conf0, conf1}));
+ if (GetPreferredWeightsLayout(params) == WeightsLayout::os_is_yx_osv32_isv4) {
+ FusedOpsConfiguration conf0 = {"_0", {"b", "(fg*32 + lid)", "y", "(x+i)"}, "res0", input_dt, 1};
+ FusedOpsConfiguration conf1 = {"_1", {"b", "(fg*32 + lid+16)", "y", "(x+i)"}, "res1", input_dt, 1};
+ jit.Merge(MakeFusedOpsJitConstants(params, {conf0, conf1}));
+ } else {
+ FusedOpsConfiguration conf0 = {"_0", {"b", "(fg*32 + 2*lid + 0)", "y", "(x+i)"}, "res0", input_dt, 1};
+ FusedOpsConfiguration conf1 = {"_1", {"b", "(fg*32 + 2*lid + 1)", "y", "(x+i)"}, "res1", input_dt, 1};
+ jit.Merge(MakeFusedOpsJitConstants(params, {conf0, conf1}));
+ }
}
}
DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
WeightsLayout GetPreferredWeightsLayout(const convolution_params &p) const override {
if (p.output.GetDType() == Datatype::F16 || p.output.GetDType() == Datatype::F32 ||
- p.output.GetLayout() == DataLayout::b_fs_yx_fsv16) {
- return WeightsLayout::os_is_yx_osv32_isv4;
+ p.output.GetLayout() == DataLayout::b_fs_yx_fsv16 || p.output.GetLayout() == DataLayout::b_fs_zyx_fsv16) {
+ if (p.output.Dimentions() == 5) {
+ return WeightsLayout::os_is_zyx_osv32_isv4;
+ } else {
+ return WeightsLayout::os_is_yx_osv32_isv4;
+ }
} else {
return WeightsLayout::os_is_yx_osv32_isv4_swizzled_by_2;
}
{
const int fg = get_group_id(0);
const int x = (int)get_global_id(1) * OUTPUT_X_BLOCK_SIZE;
+
+#if OUTPUT_DIMS == 4
const int b = (int)get_global_id(2) / OUTPUT_SIZE_Y;
+ const int z = 0;
+ const int y = (int)get_global_id(2) % OUTPUT_SIZE_Y;
+#elif OUTPUT_DIMS == 5
+ const int b = (int)get_global_id(2) / OUTPUT_SIZE_Y / OUTPUT_SIZE_Z;
+ const int z = (int)get_global_id(2) / OUTPUT_SIZE_Y % OUTPUT_SIZE_Z;
const int y = (int)get_global_id(2) % OUTPUT_SIZE_Y;
+#endif // OUTPUT_DIMS == 4
const int lid = get_sub_group_local_id();
const int group_id = get_group_id(1);
const int x_wg_start = (group_id * GROUP_SIZE) * STRIDE_SIZE_X - PADDING_SIZE_X;
const int input_y = y * STRIDE_SIZE_Y - PADDING_SIZE_Y;
+ const int input_z = z * STRIDE_SIZE_Z - PADDING_SIZE_Z;
ACCUMULATOR_TYPE_VEC acc[2] = { 0 }; // 2*16 packed channels * OUTPUT_X_BLOCK_SIZE
#if ASYMMETRIC_WEIGHTS_QUANTIZATION
ACCUMULATOR_TYPE_VEC acc_assym_weights = 0;
#endif
-#if INPUT0_LAYOUT_BFYX
- const int input_offset = b*INPUT0_BATCH_PITCH + INPUT0_OFFSET + input_y * INPUT0_Y_PITCH;
+#if INPUT0_LAYOUT_BFYX || INPUT0_LAYOUT_BFZYX
+ const int input_offset = b*INPUT0_BATCH_PITCH + INPUT0_OFFSET + input_y * INPUT0_Y_PITCH + input_z * INPUT0_Z_PITCH;
#elif INPUT0_LAYOUT_B_FS_YX_FSV4
const int fsv = 4;
const int input_x_pitch = fsv;
const int input_offset = b * input_b_pitch + input_y * input_y_pitch;
#endif
- int filter_idx = fg * FILTER_SIZE_X * FILTER_SIZE_Y * ISV * OSV;
+ int filter_idx = fg * FILTER_SIZE_X * FILTER_SIZE_Y * FILTER_SIZE_Z * ISV * OSV;
#if ASYMMETRIC_WEIGHTS_QUANTIZATION
char4 multiplier;
for (int i = 0; i < INPUT0_FEATURE_NUM; i++)
#endif // INPUT0_FEATURE_NUM == 3
#endif // ASYMMETRIC_DATA_QUANTIZATION
- __local PACKED_IN_TYPE slm[SLM_LINE_SIZE*FILTER_SIZE_Y];
+ __local PACKED_IN_TYPE slm[SLM_LINE_SIZE*FILTER_SIZE_Y*FILTER_SIZE_Z];
- for (int kh = 0; kh < FILTER_SIZE_Y ; ++kh) {
- __local PACKED_IN_TYPE* slm_block = slm + kh*SLM_LINE_SIZE + sg*SLM_CHUNK_SIZE;
- bool y_cross_fm = input_y + kh*DILATION_SIZE_Y < 0 || input_y + kh*DILATION_SIZE_Y >= INPUT0_SIZE_Y;
- if (y_cross_fm) {
+ for (int kd = 0; kd < FILTER_SIZE_Z ; ++kd) {
+ bool z_cross_fm = input_z + kd*DILATION_SIZE_Z < 0 || input_z + kd*DILATION_SIZE_Z >= INPUT0_SIZE_Z;
+ for (int kh = 0; kh < FILTER_SIZE_Y ; ++kh) {
+ __local PACKED_IN_TYPE* slm_block = slm + kh*SLM_LINE_SIZE + kd*SLM_LINE_SIZE*FILTER_SIZE_Y + sg*SLM_CHUNK_SIZE;
+ bool y_cross_fm = input_y + kh*DILATION_SIZE_Y < 0 || input_y + kh*DILATION_SIZE_Y >= INPUT0_SIZE_Y;
+ if (y_cross_fm || z_cross_fm) {
#if ASYMMETRIC_DATA_QUANTIZATION
- for (int c = 0; c < SLM_CHUNK_SIZE; c += SUB_GROUP_SIZE) {
- if (sg*SLM_CHUNK_SIZE + c + lid < SLM_LINE_SIZE)
- slm_block[c + lid] = AS_PACKED_IN_TYPE(zp);
- }
+ for (int c = 0; c < SLM_CHUNK_SIZE; c += SUB_GROUP_SIZE) {
+ if (sg*SLM_CHUNK_SIZE + c + lid < SLM_LINE_SIZE)
+ slm_block[c + lid] = AS_PACKED_IN_TYPE(zp);
+ }
#if SLM_TAIL > 0
- if (sg == LWS1 - 1) {
- __local PACKED_IN_TYPE* slm_block_tail = slm + kh*SLM_LINE_SIZE + LWS1*SLM_CHUNK_SIZE;
- slm_block_tail[lid] = AS_PACKED_IN_TYPE(zp);
- }
+ if (sg == LWS1 - 1) {
+ __local PACKED_IN_TYPE* slm_block_tail = slm + kh*SLM_LINE_SIZE + kd*SLM_LINE_SIZE*FILTER_SIZE_Y + LWS1*SLM_CHUNK_SIZE;
+ slm_block_tail[lid] = AS_PACKED_IN_TYPE(zp);
+ }
#endif // SLM_TAIL > 0
#endif // ASYMMETRIC_DATA_QUANTIZATION
- continue;
- }
+ continue;
+ }
- {
- for (int c = 0; c < SLM_CHUNK_SIZE; c += SUB_GROUP_SIZE) {
- const int x_chunk = x_wg_start + sg*SLM_CHUNK_SIZE + c;
- bool x_cross_fm = x_chunk + lid < 0 || x_chunk + lid >= INPUT0_SIZE_X;
-
- if (!x_cross_fm) {
- #if INPUT0_LAYOUT_BFYX
- MAKE_VECTOR_TYPE(INPUT0_TYPE, ISV) src = 0;
- __attribute__((opencl_unroll_hint(INPUT0_FEATURE_NUM)))
- for (int i = 0; i < INPUT0_FEATURE_NUM; i++) {
- src[i] = input[input_offset + i * INPUT0_FEATURE_PITCH
- + kh * DILATION_SIZE_Y * INPUT0_Y_PITCH
- + (x_chunk + lid)* INPUT0_X_PITCH];
- }
- slm_block[c + lid] = AS_PACKED_IN_TYPE(src);
- #elif INPUT0_LAYOUT_B_FS_YX_FSV4
- const __global uint* ptr = input + input_offset + kh * DILATION_SIZE_Y * input_y_pitch + (x_chunk + lid) * input_x_pitch;
- PACKED_IN_TYPE src = AS_PACKED_IN_TYPE(ptr[0]);
- slm_block[c + lid] = src;
- #endif
- } else {
+ {
+ for (int c = 0; c < SLM_CHUNK_SIZE; c += SUB_GROUP_SIZE) {
+ const int x_chunk = x_wg_start + sg*SLM_CHUNK_SIZE + c;
+ bool x_cross_fm = x_chunk + lid < 0 || x_chunk + lid >= INPUT0_SIZE_X;
+
+ if (!x_cross_fm) {
+ #if INPUT0_LAYOUT_BFYX || INPUT0_LAYOUT_BFZYX
+ MAKE_VECTOR_TYPE(INPUT0_TYPE, ISV) src = 0;
+ __attribute__((opencl_unroll_hint(INPUT0_FEATURE_NUM)))
+ for (int i = 0; i < INPUT0_FEATURE_NUM; i++) {
+ src[i] = input[input_offset + i * INPUT0_FEATURE_PITCH
+ + kd * DILATION_SIZE_Z * INPUT0_Z_PITCH
+ + kh * DILATION_SIZE_Y * INPUT0_Y_PITCH
+ + (x_chunk + lid)* INPUT0_X_PITCH];
+ }
+ slm_block[c + lid] = AS_PACKED_IN_TYPE(src);
+ #elif INPUT0_LAYOUT_B_FS_YX_FSV4
+ const __global uint* ptr = input + input_offset + kh * DILATION_SIZE_Y * input_y_pitch + (x_chunk + lid) * input_x_pitch;
+ PACKED_IN_TYPE src = AS_PACKED_IN_TYPE(ptr[0]);
+ slm_block[c + lid] = src;
+ #endif
+ } else {
#if ASYMMETRIC_DATA_QUANTIZATION
- slm_block[c + lid] = AS_PACKED_IN_TYPE(zp);
+ slm_block[c + lid] = AS_PACKED_IN_TYPE(zp);
#else // ASYMMETRIC_DATA_QUANTIZATION
- slm_block[c + lid] = 0;
+ slm_block[c + lid] = 0;
#endif // ASYMMETRIC_DATA_QUANTIZATION
- }
- }
+ }
+ }
#if SLM_TAIL > 0
- if (sg == LWS1 - 1) {
- __local PACKED_IN_TYPE* slm_block_tail = slm + kh*SLM_LINE_SIZE + LWS1*SLM_CHUNK_SIZE;
- const int x_chunk = x_wg_start + LWS1*SLM_CHUNK_SIZE;
- bool x_cross_fm = x_chunk + lid >= INPUT0_SIZE_X;
- if (!x_cross_fm) {
- #if INPUT0_LAYOUT_BFYX
- MAKE_VECTOR_TYPE(INPUT0_TYPE, ISV) src = 0;
- __attribute__((opencl_unroll_hint(INPUT0_FEATURE_NUM)))
- for (int i = 0; i < INPUT0_FEATURE_NUM; i++) {
- src[i] = input[input_offset + i * INPUT0_FEATURE_PITCH
- + kh * DILATION_SIZE_Y * INPUT0_Y_PITCH
- + (x_chunk + lid)* INPUT0_X_PITCH];
- }
- slm_block_tail[lid] = AS_PACKED_IN_TYPE(src);
- #elif INPUT0_LAYOUT_B_FS_YX_FSV4
- const __global uint* ptr = input + input_offset + kh * DILATION_SIZE_Y * input_y_pitch + (x_chunk + lid) * input_x_pitch;
- PACKED_IN_TYPE src = AS_PACKED_IN_TYPE(ptr[0]);
- slm_block_tail[lid] = src;
- #endif
- } else {
+ if (sg == LWS1 - 1) {
+ __local PACKED_IN_TYPE* slm_block_tail = slm + kh*SLM_LINE_SIZE + kd*SLM_LINE_SIZE*FILTER_SIZE_Y + LWS1*SLM_CHUNK_SIZE;
+ const int x_chunk = x_wg_start + LWS1*SLM_CHUNK_SIZE;
+ bool x_cross_fm = x_chunk + lid >= INPUT0_SIZE_X;
+ if (!x_cross_fm) {
+ #if INPUT0_LAYOUT_BFYX || INPUT0_LAYOUT_BFZYX
+ MAKE_VECTOR_TYPE(INPUT0_TYPE, ISV) src = 0;
+ __attribute__((opencl_unroll_hint(INPUT0_FEATURE_NUM)))
+ for (int i = 0; i < INPUT0_FEATURE_NUM; i++) {
+ src[i] = input[input_offset + i * INPUT0_FEATURE_PITCH
+ + kd * DILATION_SIZE_Z * INPUT0_Z_PITCH
+ + kh * DILATION_SIZE_Y * INPUT0_Y_PITCH
+ + (x_chunk + lid)* INPUT0_X_PITCH];
+ }
+ slm_block_tail[lid] = AS_PACKED_IN_TYPE(src);
+ #elif INPUT0_LAYOUT_B_FS_YX_FSV4
+ const __global uint* ptr = input + input_offset + kh * DILATION_SIZE_Y * input_y_pitch + (x_chunk + lid) * input_x_pitch;
+ PACKED_IN_TYPE src = AS_PACKED_IN_TYPE(ptr[0]);
+ slm_block_tail[lid] = src;
+ #endif
+ } else {
#if ASYMMETRIC_DATA_QUANTIZATION
slm_block_tail[lid] = AS_PACKED_IN_TYPE(zp);
#else // ASYMMETRIC_DATA_QUANTIZATION
slm_block_tail[lid] = 0;
#endif // ASYMMETRIC_DATA_QUANTIZATION
+ }
}
- }
#endif
+ }
}
}
barrier(CLK_LOCAL_MEM_FENCE);
- __attribute__((opencl_unroll_hint(FILTER_SIZE_Y)))
- for (int kh = 0; kh < FILTER_SIZE_Y ; ++kh) {
- bool y_cross_fm = input_y + kh*DILATION_SIZE_Y < 0 || input_y + kh*DILATION_SIZE_Y >= INPUT0_SIZE_Y;
+ for (int kd = 0; kd < FILTER_SIZE_Z; ++kd) {
+ bool z_cross_fm = input_z + kd*DILATION_SIZE_Z < 0 || input_z + kd*DILATION_SIZE_Z >= INPUT0_SIZE_Z;
#if !ASYMMETRIC_DATA_QUANTIZATION
- if (y_cross_fm)
+ if (z_cross_fm)
continue;
#endif
- PACKED_IN_TYPE line_cache[INPUT_LINE_SIZE];
- for (int xb = 0; xb < INPUT_LINE_SIZE; xb++) {
- line_cache[xb] = slm[kh*SLM_LINE_SIZE + sg*OUTPUT_X_BLOCK_SIZE*STRIDE_SIZE_X + xb];
- }
+ __attribute__((opencl_unroll_hint(FILTER_SIZE_Y)))
+ for (int kh = 0; kh < FILTER_SIZE_Y ; ++kh) {
+ bool y_cross_fm = input_y + kh*DILATION_SIZE_Y < 0 || input_y + kh*DILATION_SIZE_Y >= INPUT0_SIZE_Y;
+#if !ASYMMETRIC_DATA_QUANTIZATION
+ if (y_cross_fm)
+ continue;
+#endif
+ PACKED_IN_TYPE line_cache[INPUT_LINE_SIZE];
+ for (int xb = 0; xb < INPUT_LINE_SIZE; xb++) {
+ line_cache[xb] = slm[kd*SLM_LINE_SIZE*FILTER_SIZE_Y + kh*SLM_LINE_SIZE + sg*OUTPUT_X_BLOCK_SIZE*STRIDE_SIZE_X + xb];
+ }
- __attribute__((opencl_unroll_hint(FILTER_SIZE_X)))
- for (uint kw = 0; kw < FILTER_SIZE_X ; ++kw) {
- const uint f_off = filter_idx
- + kh * OSV * ISV * FILTER_SIZE_X
- + kw * OSV * ISV;
+ __attribute__((opencl_unroll_hint(FILTER_SIZE_X)))
+ for (uint kw = 0; kw < FILTER_SIZE_X ; ++kw) {
+ const uint f_off = filter_idx
+ + kd * OSV * ISV * FILTER_SIZE_X * FILTER_SIZE_Y
+ + kh * OSV * ISV * FILTER_SIZE_X
+ + kw * OSV * ISV;
- int weights_data0 = as_int(intel_sub_group_block_read((const __global uint*)(weights + f_off)));
- int weights_data1 = as_int(intel_sub_group_block_read((const __global uint*)(weights + f_off + SUB_GROUP_SIZE*ISV)));
+ int weights_data0 = as_int(intel_sub_group_block_read((const __global uint*)(weights + f_off)));
+ int weights_data1 = as_int(intel_sub_group_block_read((const __global uint*)(weights + f_off + SUB_GROUP_SIZE*ISV)));
- PACKED_TYPE_VEC src;
+ PACKED_TYPE_VEC src;
- __attribute__((opencl_unroll_hint(OUTPUT_X_BLOCK_SIZE)))
- for (int i = 0; i < OUTPUT_X_BLOCK_SIZE; i++) {
- // src[i] = slm[kh*SLM_LINE_SIZE + (sg*OUTPUT_X_BLOCK_SIZE + i)*STRIDE_SIZE_X + kw*DILATION_SIZE_X];
- src[i] = line_cache[kw*DILATION_SIZE_X + STRIDE_SIZE_X*i];
- acc[0][i] = IMAD(acc[0][i], AS_INPUT0_TYPE_4(src[i]), as_char4(weights_data0));
- acc[1][i] = IMAD(acc[1][i], AS_INPUT0_TYPE_4(src[i]), as_char4(weights_data1));
+ __attribute__((opencl_unroll_hint(OUTPUT_X_BLOCK_SIZE)))
+ for (int i = 0; i < OUTPUT_X_BLOCK_SIZE; i++) {
+ // src[i] = slm[kh*SLM_LINE_SIZE + (sg*OUTPUT_X_BLOCK_SIZE + i)*STRIDE_SIZE_X + kw*DILATION_SIZE_X];
+ src[i] = line_cache[kw*DILATION_SIZE_X + STRIDE_SIZE_X*i];
+ acc[0][i] = IMAD(acc[0][i], AS_INPUT0_TYPE_4(src[i]), as_char4(weights_data0));
+ acc[1][i] = IMAD(acc[1][i], AS_INPUT0_TYPE_4(src[i]), as_char4(weights_data1));
#if ASYMMETRIC_WEIGHTS_QUANTIZATION
- acc_assym_weights[i] = IMAD(acc_assym_weights[i], AS_INPUT0_TYPE_4(src[i]), multiplier);
+ acc_assym_weights[i] = IMAD(acc_assym_weights[i], AS_INPUT0_TYPE_4(src[i]), multiplier);
#endif
+ }
}
}
}
for (int i = 0; i < OUTPUT_X_BLOCK_SIZE; i++) {
#if OUTPUT_FEATURE_NUM > 16
for (int ofm = 0; ofm < 2; ofm++) {
+#if OUTPUT_DIMS == 4
const uint dst_index = OUTPUT_GET_INDEX(b, fg*OSV + SUB_GROUP_SIZE*ofm + lid, y, x+i);
+#elif OUTPUT_DIMS == 5
+ const uint dst_index = OUTPUT_GET_INDEX(b, fg*OSV + SUB_GROUP_SIZE*ofm + lid, z, y, x+i);
+#endif
if (x + i < OUTPUT_SIZE_X && fg*OSV + SUB_GROUP_SIZE*ofm + lid < OUTPUT_FEATURE_NUM) {
output[dst_index] = dst[ofm][i];
}
}
#else // OUTPUT_FEATURE_NUM > 16
+#if OUTPUT_DIMS == 4
const uint dst_index = OUTPUT_GET_INDEX(b, fg*OSV + lid, y, x+i);
+#elif OUTPUT_DIMS == 5
+ const uint dst_index = OUTPUT_GET_INDEX(b, fg*OSV + lid, z, y, x+i);
+#endif
if (x + i < OUTPUT_SIZE_X && fg*OSV + lid < OUTPUT_FEATURE_NUM) {
output[dst_index] = dst[0][i];
}
}
#else // OUTPUT_LAYOUT_B_FS_YX_FSV32
if (full_x && full_f) {
+#if OUTPUT_DIMS == 4
const uint dst_index0 = OUTPUT_GET_INDEX(b, fg*OSV, y, x);
const uint dst_index1 = OUTPUT_GET_INDEX(b, fg*OSV+16, y, x);
+#elif OUTPUT_DIMS == 5
+ const uint dst_index0 = OUTPUT_GET_INDEX(b, fg*OSV, z, y, x);
+ const uint dst_index1 = OUTPUT_GET_INDEX(b, fg*OSV+16, z, y, x);
+#endif
BLOCK_WRITE(output + dst_index0, dst[0]);
BLOCK_WRITE(output + dst_index1, dst[1]);
} else {
const bool full_it_x = OUTPUT_SIZE_X % OUTPUT_X_BLOCK_SIZE == 0 || x + i < OUTPUT_SIZE_X;
const bool full_sgl_f = OUTPUT_FEATURE_NUM % OSV == 0 || 16*ofm + lid < OUTPUT_FEATURE_NUM % OSV;
if (full_it_x && full_sgl_f) {
+#if OUTPUT_DIMS == 4
const uint dst_index = OUTPUT_GET_INDEX(b, fg*OSV + 16*ofm + lid, y, x+i);
+#elif OUTPUT_DIMS == 5
+ const uint dst_index = OUTPUT_GET_INDEX(b, fg*OSV + 16*ofm + lid, z, y, x+i);
+#endif
output[dst_index] = dst[ofm][i];
}
}
}
#define GET_FILTER_OS_IS_YX_OSV16_ISV4_INDEX(prefix, o, i, y, x) \
- FUNC_CALL(get_os_is_yx_osv_isv4)( \
- o, i, y, x, \
+ FUNC_CALL(get_os_is_zyx_osv_isv4)( \
+ o, i, 0, y, x, \
CAT(prefix, _IFM_PITCH), \
CAT(prefix, _OFM_PITCH), \
- CAT(prefix, _SIZE_X), 16)
+ CAT(prefix, _SIZE_X), \
+ CAT(prefix, _SIZE_Y), \
+ 16)
#define GET_FILTER_OS_IS_YX_OSV32_ISV4_INDEX(prefix, o, i, y, x) \
- FUNC_CALL(get_os_is_yx_osv_isv4)( \
- o, i, y, x, \
+ FUNC_CALL(get_os_is_zyx_osv_isv4)( \
+ o, i, 0, y, x, \
CAT(prefix, _IFM_PITCH), \
CAT(prefix, _OFM_PITCH), \
- CAT(prefix, _SIZE_X), 32)
+ CAT(prefix, _SIZE_X), \
+ CAT(prefix, _SIZE_Y), \
+ 32)
+
+#define GET_FILTER_OS_IS_ZYX_OSV32_ISV4_INDEX(prefix, o, i, z, y, x) \
+ FUNC_CALL(get_os_is_zyx_osv_isv4)( \
+ o, i, z, y, x, \
+ CAT(prefix, _IFM_PITCH), \
+ CAT(prefix, _OFM_PITCH), \
+ CAT(prefix, _SIZE_X), \
+ CAT(prefix, _SIZE_Y), \
+ 32)
-inline uint FUNC(get_os_is_yx_osv_isv4)(uint o, uint i, uint y, uint x,
- uint i_size,
- uint o_size,
- uint x_size,
- uint otd)
+inline uint FUNC(get_os_is_zyx_osv_isv4)(uint o, uint i, uint z, uint y, uint x,
+ uint i_size,
+ uint o_size,
+ uint x_size,
+ uint y_size,
+ uint otd)
{
uint out_depth_tile = o / otd;
uint od = o - out_depth_tile * otd;
uint idx = out_depth_tile * (o_size / tile) * otd * tile
+ id_tile * i_size * otd * tile
+ + z * y_size * x_size * otd * tile
+ y * x_size * otd * tile
+ x * otd * tile
+ od * tile
return GET_FILTER_OS_IS_YX_OSV32_ISV4_SWIZZLED_BY_2_INDEX(OUTPUT, o, i, y, x);
#elif defined OUTPUT_LAYOUT_OS_IS_YX_OSV32_ISV4
return GET_FILTER_OS_IS_YX_OSV32_ISV4_INDEX(OUTPUT, o, i, y, x);
+#elif defined OUTPUT_LAYOUT_OS_IS_ZYX_OSV32_ISV4
+ return GET_FILTER_OS_IS_ZYX_OSV32_ISV4_INDEX(OUTPUT, o, i, z, y, x);
#elif defined OUTPUT_LAYOUT_OS_IS_YX_ISA8_OSV8_ISV4_SWIZZLED_BY_4
return GET_FILTER_OS_IS_YX_ISA8_OSV8_ISV4_SWIZZLED_BY_4_INDEX(OUTPUT, o, i, y, x);
#elif defined OUTPUT_LAYOUT_OS_IS_YX_OSA4_ISA8_OSV8_ISV4_SWIZZLED_BY_4
case WeightsLayout::os_is_yx_osv16_isv4: return "OS_IS_YX_OSV16_ISV4";
case WeightsLayout::os_is_yx_osv32_isv4_swizzled_by_2: return "OS_IS_YX_OSV32_ISV4_SWIZZLED_BY_2";
case WeightsLayout::os_is_yx_osv32_isv4: return "OS_IS_YX_OSV32_ISV4";
+ case WeightsLayout::os_is_zyx_osv32_isv4: return "OS_IS_ZYX_OSV32_ISV4";
case WeightsLayout::os_is_y_x8_osv8_isv4_swizzled_by_4: return "OS_IS_Y_X8_OSV8_ISV4_SWIZZLED_BY_4";
case WeightsLayout::os_is_yx_osv32_isv32p: return "OS_IS_YX_OSV32_ISV32P";
case WeightsLayout::oizyx: return "OIZYX";
continue;
auto output_padded = static_cast<bool>(output_layout.data_padding);
- auto can_omit_padding = (output_layout.format == format::b_fs_yx_fsv16 || output_layout.format == format::b_fs_yx_fsv32) &&
- (input.get_output_layout().format == format::bfyx || input.get_output_layout().format == format::b_fs_yx_fsv4);
+ auto can_omit_padding = ((output_layout.format == format::b_fs_yx_fsv16 || output_layout.format == format::b_fs_yx_fsv32) &&
+ (input.get_output_layout().format == format::bfyx || input.get_output_layout().format == format::b_fs_yx_fsv4)) ||
+ (output_layout.format == format::b_fs_zyx_fsv16 && input.get_output_layout().format == format::bfzyx);
if (output_padded && !can_omit_padding) {
if (input.get_users().size() != 1)
return "os_is_yx_osv32_isv4_swizzled_by_2";
case format::os_is_yx_osv32_isv4:
return "os_is_yx_osv32_isv4";
+ case format::os_is_zyx_osv32_isv4:
+ return "os_is_zyx_osv32_isv4";
case format::os_is_y_x8_osv8_isv4:
return "os_is_y_x8_osv8_isv4";
case format::os_is_yx_osv32_isv32p:
return kernel_selector::weights_layout::os_is_yx_osv32_isv4_swizzled_by_2;
case format::os_is_yx_osv32_isv4:
return kernel_selector::weights_layout::os_is_yx_osv32_isv4;
+ case format::os_is_zyx_osv32_isv4:
+ return kernel_selector::weights_layout::os_is_zyx_osv32_isv4;
case format::os_is_yx_osv32_isv32p:
return kernel_selector::weights_layout::os_is_yx_osv32_isv32p;
case format::os_is_yx_isv16_osv16:
return format::os_is_yx_osv32_isv4_swizzled_by_2;
case kernel_selector::weights_layout::os_is_yx_osv32_isv4:
return format::os_is_yx_osv32_isv4;
+ case kernel_selector::weights_layout::os_is_zyx_osv32_isv4:
+ return format::os_is_zyx_osv32_isv4;
case kernel_selector::weights_layout::os_is_y_x8_osv8_isv4_swizzled_by_4:
return cldnn::format::os_is_y_x8_osv8_isv4_swizzled_by_4;
case kernel_selector::weights_layout::os_is_yx_osv32_isv32p:
TestParamType_grouped_convolution_gpu(3, 1, 5, 196, 252, 3, 1, 3, 4, 1, 1, format::b_fs_zyx_fsv16, ""),
TestParamType_grouped_convolution_gpu(4, 1, 6, 256, 256, 2, 1, 2, 4, 1, 1, format::b_fs_zyx_fsv16, ""),
TestParamType_grouped_convolution_gpu(4, 1, 6, 256, 512, 2, 1, 3, 16, 1, 1, format::b_fs_zyx_fsv16, ""),
- TestParamType_grouped_convolution_gpu(1, 3, 1, 18, 2, 1, 3, 1, 2, 1, 1, format::b_fs_zyx_fsv16, "")
+ TestParamType_grouped_convolution_gpu(1, 3, 1, 18, 2, 1, 3, 1, 2, 1, 1, format::b_fs_zyx_fsv16, ""),
+ TestParamType_grouped_convolution_gpu(2, 3, 4, 3, 18, 3, 3, 3, 1, 1, 1, format::b_fs_zyx_fsv16, "convolution_gpu_mmad_bfyx_to_b_fs_yx_fsv32"),
+ TestParamType_grouped_convolution_gpu(79, 224, 224, 3, 64, 3, 3, 3, 1, 2, 1, format::b_fs_zyx_fsv16, "convolution_gpu_mmad_bfyx_to_b_fs_yx_fsv32")
),
convolution_grouped_gpu::PrintToStringParamName);
}
layout get_bias_layout(T& p) {
- return layout{ p.default_type, p.default_format, tensor{1, p.out_shape.feature[0], 1, 1} };
+ return layout{ p.default_type, format::bfyx, tensor{1, p.out_shape.feature[0], 1, 1} };
}
layout get_weights_zp_layout(T& p) {
#define CASE_CONV3D_U8S8_1 {1, 15, 5, 4, 5}, {1, 30, 3, 2, 3}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfzyx, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx
#define CASE_CONV3D_U8S8_2 {1, 15, 5, 5, 5}, {1, 30, 3, 3, 3}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfzyx, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx
#define CASE_CONV3D_U8S8_3 {1, 16, 5, 4, 5}, {1, 32, 5, 4, 5}, {1, 1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfzyx, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx
-#define CASE_CONV3D_U8S8_4 {1, 17, 5, 4, 5}, {1, 17, 5, 4, 5}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, -1}, tensor{1}, 17, data_types::u8, format::bfzyx, data_types::i8, format::goizyx, data_types::f32, format::bfzyx
+#define CASE_CONV3D_U8S8_4 {1, 17, 5, 4, 5}, {1, 17, 5, 4, 5}, {1, 1, 3, 3, 3}, tensor{1}, tensor{{0, 0, -1, -1, -1}, 0}, tensor{1}, 17, data_types::u8, format::bfzyx, data_types::i8, format::goizyx, data_types::f32, format::bfzyx
+#define CASE_CONV3D_U8S8_5 {1, 3, 5, 4, 5}, {1, 32, 5, 4, 5}, {1, 1, 3, 3, 3}, tensor{1}, tensor{{0, 0, -1, -1, -1}, 0}, tensor{1}, 1, data_types::u8, format::bfzyx, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx
#define CASE_CONV3D_S8S8_1 {1, 15, 5, 4, 5}, {1, 30, 3, 2, 3}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::bfzyx, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx
#define CASE_CONV3D_S8S8_2 {1, 15, 5, 5, 5}, {1, 30, 3, 3, 3}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::bfzyx, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx
#define CASE_CONV3D_S8S8_3 {1, 16, 5, 4, 5}, {1, 32, 5, 4, 5}, {1, 1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::bfzyx, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx
#define CASE_CONV3D_S8S8_4 {1, 17, 5, 4, 5}, {1, 17, 5, 4, 5}, {1, 1, 3, 3, 3}, tensor{1}, tensor{{0, 0, -1, -1, -1}, 0}, tensor{1}, 17, data_types::i8, format::bfzyx, data_types::i8, format::goizyx, data_types::f32, format::bfzyx
+#define CASE_CONV3D_S8S8_5 {1, 3, 5, 4, 5}, {1, 18, 5, 4, 5}, {1, 1, 3, 3, 3}, tensor{1}, tensor{{0, 0, -1, -1, -1}, 0}, tensor{1}, 1, data_types::i8, format::bfzyx, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx
// in_shape; out_shape; eltw_shape; kernel; stride; pad; dilation; groups; data_type; input_format; weights_type; weights_format; default_type; default_format;
#define CASE_CONV_ELTW_FP32_1 {1, 16, 4, 5}, {1, 32, 2, 3}, {1, 32, 1, 1}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::oiyx, data_types::f32, format::bfyx
bc_test_params{CASE_CONV3D_U8S8_2, 2, 3},
bc_test_params{CASE_CONV3D_U8S8_3, 2, 3},
bc_test_params{CASE_CONV3D_U8S8_4, 2, 3},
+ bc_test_params{CASE_CONV3D_U8S8_5, 2, 3},
bc_test_params{CASE_CONV3D_S8S8_1, 2, 3},
bc_test_params{CASE_CONV3D_S8S8_2, 2, 3},
bc_test_params{CASE_CONV3D_S8S8_3, 2, 3},
bc_test_params{CASE_CONV3D_S8S8_4, 2, 3},
+ bc_test_params{CASE_CONV3D_S8S8_5, 2, 3},
}), );
class conv_int8_scale_shift_swish : public ConvFusingTest {};
bc_test_params{CASE_CONV3D_U8S8_2, 2, 6},
bc_test_params{CASE_CONV3D_U8S8_3, 2, 6},
bc_test_params{CASE_CONV3D_U8S8_4, 2, 6},
+ bc_test_params{CASE_CONV3D_U8S8_5, 2, 6},
bc_test_params{CASE_CONV3D_S8S8_1, 2, 6},
bc_test_params{CASE_CONV3D_S8S8_2, 2, 6},
bc_test_params{CASE_CONV3D_S8S8_3, 2, 6},
bc_test_params{CASE_CONV3D_S8S8_4, 2, 6},
+ bc_test_params{CASE_CONV3D_S8S8_5, 2, 6},
}), );
class conv_int8_prelu_eltwise : public ConvFusingTest {};
bc_test_params{CASE_CONV3D_U8S8_2, 2, 4},
bc_test_params{CASE_CONV3D_U8S8_3, 2, 4},
bc_test_params{CASE_CONV3D_U8S8_4, 2, 4},
+ bc_test_params{CASE_CONV3D_U8S8_5, 2, 4},
bc_test_params{CASE_CONV3D_S8S8_1, 2, 4},
bc_test_params{CASE_CONV3D_S8S8_2, 2, 4},
bc_test_params{CASE_CONV3D_S8S8_3, 2, 4},
bc_test_params{CASE_CONV3D_S8S8_4, 2, 4},
+ bc_test_params{CASE_CONV3D_S8S8_5, 2, 4},
}), );
class conv_int8_activation_eltwise_quantize : public ConvFusingTest {};
bc_test_params{CASE_CONV3D_U8S8_2, 2, 3},
bc_test_params{CASE_CONV3D_U8S8_3, 2, 3},
bc_test_params{CASE_CONV3D_U8S8_4, 2, 3},
+ bc_test_params{CASE_CONV3D_U8S8_5, 2, 3},
bc_test_params{CASE_CONV3D_S8S8_1, 2, 3},
bc_test_params{CASE_CONV3D_S8S8_2, 2, 3},
bc_test_params{CASE_CONV3D_S8S8_3, 2, 3},
bc_test_params{CASE_CONV3D_S8S8_4, 2, 3},
+ bc_test_params{CASE_CONV3D_S8S8_5, 2, 3},
}), );
class conv_int8_scale_quantize_i8 : public ConvFusingTest {};
bc_test_params{CASE_CONV3D_U8S8_2, 2, 4},
bc_test_params{CASE_CONV3D_U8S8_3, 2, 4},
bc_test_params{CASE_CONV3D_U8S8_4, 2, 4},
+ bc_test_params{CASE_CONV3D_U8S8_5, 2, 4},
bc_test_params{CASE_CONV3D_S8S8_1, 2, 4},
bc_test_params{CASE_CONV3D_S8S8_2, 2, 4},
bc_test_params{CASE_CONV3D_S8S8_3, 2, 4},
bc_test_params{CASE_CONV3D_S8S8_4, 2, 4},
+ bc_test_params{CASE_CONV3D_S8S8_5, 2, 4},
}), );
class conv_int8_scale_quantize_i8_conv_b_fs_yx_fsv4_int8 : public ConvFusingTest {};
bc_test_params{CASE_CONV3D_U8S8_2, 2, 4},
bc_test_params{CASE_CONV3D_U8S8_3, 2, 4},
bc_test_params{CASE_CONV3D_U8S8_4, 2, 4},
+ bc_test_params{CASE_CONV3D_U8S8_5, 2, 4},
bc_test_params{CASE_CONV3D_S8S8_1, 2, 4},
bc_test_params{CASE_CONV3D_S8S8_2, 2, 4},
bc_test_params{CASE_CONV3D_S8S8_3, 2, 4},
bc_test_params{CASE_CONV3D_S8S8_4, 2, 4},
+ bc_test_params{CASE_CONV3D_S8S8_5, 2, 4},
}), );
class conv_int8_scale_activation_quantize_i8 : public ConvFusingTest {};
bc_test_params{CASE_CONV3D_U8S8_2, 2, 5},
bc_test_params{CASE_CONV3D_U8S8_3, 2, 5},
bc_test_params{CASE_CONV3D_U8S8_4, 2, 5},
+ bc_test_params{CASE_CONV3D_U8S8_5, 2, 5},
bc_test_params{CASE_CONV3D_S8S8_1, 2, 5},
bc_test_params{CASE_CONV3D_S8S8_2, 2, 5},
bc_test_params{CASE_CONV3D_S8S8_3, 2, 5},
bc_test_params{CASE_CONV3D_S8S8_4, 2, 5},
+ bc_test_params{CASE_CONV3D_S8S8_5, 2, 5},
}), );
class conv_int8_scale_activation_quantize_i8_eltwise_fp32 : public ConvFusingTest {};
bc_test_params{CASE_CONV3D_U8S8_2, 2, 6},
bc_test_params{CASE_CONV3D_U8S8_3, 2, 6},
bc_test_params{CASE_CONV3D_U8S8_4, 2, 6},
+ bc_test_params{CASE_CONV3D_U8S8_5, 2, 6},
bc_test_params{CASE_CONV3D_S8S8_1, 2, 6},
bc_test_params{CASE_CONV3D_S8S8_2, 2, 6},
bc_test_params{CASE_CONV3D_S8S8_3, 2, 6},
bc_test_params{CASE_CONV3D_S8S8_4, 2, 6},
+ bc_test_params{CASE_CONV3D_S8S8_5, 2, 6},
}), );
class conv_int8_scale_activation_quantize_i8_activation : public ConvFusingTest {};
bc_test_params{CASE_CONV3D_U8S8_2, 2, 6},
bc_test_params{CASE_CONV3D_U8S8_3, 2, 6},
bc_test_params{CASE_CONV3D_U8S8_4, 2, 6},
+ bc_test_params{CASE_CONV3D_U8S8_5, 2, 6},
bc_test_params{CASE_CONV3D_S8S8_1, 2, 6},
bc_test_params{CASE_CONV3D_S8S8_2, 2, 6},
bc_test_params{CASE_CONV3D_S8S8_3, 2, 6},
bc_test_params{CASE_CONV3D_S8S8_4, 2, 6},
+ bc_test_params{CASE_CONV3D_S8S8_5, 2, 6},
}), );
bc_test_params{CASE_CONV3D_U8S8_2, 2, 7},
bc_test_params{CASE_CONV3D_U8S8_3, 2, 7},
bc_test_params{CASE_CONV3D_U8S8_4, 2, 7},
+ bc_test_params{CASE_CONV3D_U8S8_5, 2, 7},
bc_test_params{CASE_CONV3D_S8S8_1, 2, 7},
bc_test_params{CASE_CONV3D_S8S8_2, 2, 7},
bc_test_params{CASE_CONV3D_S8S8_3, 2, 7},
bc_test_params{CASE_CONV3D_S8S8_4, 2, 7},
+ bc_test_params{CASE_CONV3D_S8S8_5, 2, 7},
}), );
class conv_int8_scale_prelu_quantize_i8_eltwise_fp32_quantize_i8_vec : public ConvFusingTest {};
bc_test_params{CASE_CONV3D_U8S8_2, 2, 3},
bc_test_params{CASE_CONV3D_U8S8_3, 2, 3},
bc_test_params{CASE_CONV3D_U8S8_4, 2, 3},
+ bc_test_params{CASE_CONV3D_U8S8_5, 2, 3},
bc_test_params{CASE_CONV3D_S8S8_1, 2, 3},
bc_test_params{CASE_CONV3D_S8S8_2, 2, 3},
bc_test_params{CASE_CONV3D_S8S8_3, 2, 3},
bc_test_params{CASE_CONV3D_S8S8_4, 2, 3},
+ bc_test_params{CASE_CONV3D_S8S8_5, 2, 3},
}), );
class conv_int8_asymmetric_data_and_weights : public ConvFusingTest {};
bc_test_params{CASE_CONV3D_U8S8_2, 2, 3},
bc_test_params{CASE_CONV3D_U8S8_3, 2, 3},
bc_test_params{CASE_CONV3D_U8S8_4, 2, 3},
+ bc_test_params{CASE_CONV3D_U8S8_5, 2, 3},
bc_test_params{CASE_CONV3D_S8S8_1, 2, 3},
bc_test_params{CASE_CONV3D_S8S8_2, 2, 3},
bc_test_params{CASE_CONV3D_S8S8_3, 2, 3},
bc_test_params{CASE_CONV3D_S8S8_4, 2, 3},
+ bc_test_params{CASE_CONV3D_S8S8_5, 2, 3},
}), );