#if ASYMMETRIC_WEIGHTS_QUANTIZATION
ACCUMULATOR_TYPE_VEC acc_assym_weights = 0;
#endif
+
+#if INPUT0_LAYOUT_BFYX
const int input_offset = b*INPUT0_BATCH_PITCH + INPUT0_OFFSET + input_y * INPUT0_Y_PITCH;
+#elif INPUT0_LAYOUT_B_FS_YX_FSV4
+ const int fsv = 4;
+ const int input_x_pitch = fsv;
+ const int input_y_pitch = input_x_pitch * (INPUT0_PAD_BEFORE_SIZE_X + INPUT0_SIZE_X + INPUT0_PAD_AFTER_SIZE_X);
+ const int input_fs_pitch = input_y_pitch * (INPUT0_PAD_BEFORE_SIZE_Y + INPUT0_SIZE_Y + INPUT0_PAD_AFTER_SIZE_Y);
+ const int input_total_f_size = INPUT0_PAD_BEFORE_FEATURE_NUM + INPUT0_FEATURE_NUM + INPUT0_PAD_AFTER_FEATURE_NUM;
+ const int input_b_pitch = input_fs_pitch * ((input_total_f_size + fsv - 1) / fsv);
+ const int input_offset = b * input_b_pitch + input_y * input_y_pitch;
+#endif
+
int filter_idx = fg * FILTER_SIZE_X * FILTER_SIZE_Y * ISV * OSV;
#if ASYMMETRIC_WEIGHTS_QUANTIZATION
char4 multiplier;
bool x_cross_fm = x_chunk + lid < 0 || x_chunk + lid >= INPUT0_SIZE_X;
if (!x_cross_fm) {
+ #if INPUT0_LAYOUT_BFYX
MAKE_VECTOR_TYPE(INPUT0_TYPE, ISV) src = 0;
__attribute__((opencl_unroll_hint(INPUT0_FEATURE_NUM)))
for (int i = 0; i < INPUT0_FEATURE_NUM; i++) {
+ (x_chunk + lid)* INPUT0_X_PITCH];
}
slm_block[c + lid] = AS_PACKED_IN_TYPE(src);
+ #elif INPUT0_LAYOUT_B_FS_YX_FSV4
+ const __global uint* ptr = input + input_offset + kh * DILATION_SIZE_Y * input_y_pitch + (x_chunk + lid) * input_x_pitch;
+ PACKED_IN_TYPE src = AS_PACKED_IN_TYPE(ptr[0]);
+ slm_block[c + lid] = src;
+ #endif
} else {
#if ASYMMETRIC_DATA_QUANTIZATION
slm_block[c + lid] = AS_PACKED_IN_TYPE(zp);
const int x_chunk = x_wg_start + LWS1*SLM_CHUNK_SIZE;
bool x_cross_fm = x_chunk + lid >= INPUT0_SIZE_X;
if (!x_cross_fm) {
+ #if INPUT0_LAYOUT_BFYX
MAKE_VECTOR_TYPE(INPUT0_TYPE, ISV) src = 0;
__attribute__((opencl_unroll_hint(INPUT0_FEATURE_NUM)))
for (int i = 0; i < INPUT0_FEATURE_NUM; i++) {
+ (x_chunk + lid)* INPUT0_X_PITCH];
}
slm_block_tail[lid] = AS_PACKED_IN_TYPE(src);
+ #elif INPUT0_LAYOUT_B_FS_YX_FSV4
+ const __global uint* ptr = input + input_offset + kh * DILATION_SIZE_Y * input_y_pitch + (x_chunk + lid) * input_x_pitch;
+ PACKED_IN_TYPE src = AS_PACKED_IN_TYPE(ptr[0]);
+ slm_block_tail[lid] = src;
+ #endif
} else {
#if ASYMMETRIC_DATA_QUANTIZATION
slm_block_tail[lid] = AS_PACKED_IN_TYPE(zp);
return "conv";
}
- void run_expect(const VVVVF<OutputT>& expected) {
+ virtual void run_expect(const VVVVF<OutputT>& expected) {
auto engine = get_test_engine();
auto topo = build_topology(engine);
}
template <typename InputT, typename WeightsT, typename OutputT>
+class convolution_random_test_fsv4_input : public convolution_random_test_base<InputT, WeightsT, OutputT> {
+public:
+ using parent = convolution_random_test_base<InputT, WeightsT, OutputT>;
+ topology build_topology(const cldnn::engine& engine) override {
+ auto input_lay = layout(this->input_type(), format::b_fs_yx_fsv4, this->input_size());
+ auto wei_lay = layout(this->weights_type(), format::bfyx, this->weights_size());
+
+ auto wei_mem = memory::allocate(engine, wei_lay);
+ auto wei_flat = flatten_4d(format::bfyx, this->_weights);
+ set_values(wei_mem, wei_flat);
+ layout reordered_layout = layout{this->input_type(), this->input_format(), this->input_size(), this->padding_size()};
+ auto topo = topology();
+ topo.add(input_layout("input", input_lay));
+ topo.add(reorder("input_reorder", "input", reordered_layout));
+ std::string input_id = "input_reorder";
+ if (this->has_input_zp()) {
+ auto input_zp_lay = layout(this->input_type(), format::bfyx, tensor(feature(this->input_features())));
+ auto input_zp_mem = memory::allocate(engine, input_zp_lay);
+ set_values(input_zp_mem, this->_input_zp);
+ topo.add(data("input_zp", input_zp_mem));
+ topo.add(eltwise("input_asymm", { "input_reorder", "input_zp" }, eltwise_mode::sub));
+ input_id = "input_asymm";
+ }
+ topo.add(data("weights", wei_mem));
+ std::string weights_id = "weights";
+ if (this->has_weights_zp()) {
+ auto weights_zp_lay = layout(this->weights_type(), format::bfyx, tensor(batch(this->output_features())));
+ auto weights_zp_mem = memory::allocate(engine, weights_zp_lay);
+ set_values(weights_zp_mem, this->_weights_zp);
+ topo.add(data("weights_zp", weights_zp_mem));
+ topo.add(eltwise("weights_asymm", { "weights", "weights_zp" }, eltwise_mode::sub));
+ weights_id = "weights_asymm";
+ }
+ if (!this->has_bias()) {
+ auto conv_prim = convolution(
+ "conv",
+ input_id,
+ { weights_id },
+ static_cast<uint32_t>(this->groups()),
+ tensor(batch(0), feature(0), spatial(this->_stride_x, this->_stride_y)),
+ tensor(batch(0), feature(0), spatial(this->_offset_x, this->_offset_y)),
+ tensor(batch(0), feature(0), spatial(this->_dilation_x, this->_dilation_y)));
+ conv_prim.output_data_type = this->output_type();
+ topo.add(conv_prim);
+ } else {
+ auto bias_lay = layout(this->output_type(), format::bfyx, tensor(feature(this->output_features())));
+ auto bias_mem = memory::allocate(engine, bias_lay);
+ set_values(bias_mem, this->_bias);
+ topo.add(data("bias", bias_mem));
+ auto conv_prim = convolution(
+ "conv",
+ input_id,
+ { weights_id },
+ { "bias" },
+ static_cast<uint32_t>(this->groups()),
+ tensor(batch(0), feature(0), spatial(this->_stride_x, this->_stride_y)),
+ tensor(batch(0), feature(0), spatial(this->_offset_x, this->_offset_y)),
+ tensor(batch(0), feature(0), spatial(this->_dilation_x, this->_dilation_y)));
+ conv_prim.output_data_type = this->output_type();
+ topo.add(conv_prim);
+ }
+
+ return topo;
+ }
+ void run_expect(const VVVVF<OutputT>& expected) override {
+ auto engine = get_test_engine();
+
+ auto topo = this->build_topology(engine);
+
+ auto build_opts = build_options(
+ build_option::optimize_data(true),
+ build_option::force_implementations({ {"conv", { this->input_format(), ""}} })
+ );
+ auto prog = program(engine, topo, build_opts);
+
+ auto net = network(prog, 0);
+
+ auto input_lay = layout(this->input_type(), format::b_fs_yx_fsv4, this->input_size());
+ auto input_mem = memory::allocate(engine, input_lay);
+ std::vector<InputT> input_flat(input_lay.get_linear_size(), static_cast<InputT>(0));
+ for (size_t bi = 0; bi < this->batch_num(); ++bi)
+ for (size_t fi = 0; fi < this->input_features(); ++fi)
+ for (size_t yi = 0; yi < this->input_y(); ++yi)
+ for (size_t xi = 0; xi < this->input_x(); ++xi) {
+ tensor coords = tensor(batch(bi), feature(fi), spatial(xi, yi, 0, 0));
+ size_t offset = input_lay.get_linear_offset(coords);
+ input_flat[offset] = this->_input[bi][fi][yi][xi];
+ }
+ set_values(input_mem, input_flat);
+
+ net.set_input_data("input", input_mem);
+ auto result = net.execute();
+ auto out_mem = result.at(this->output_primitive_id()).get_memory();
+ auto out_lay = out_mem.get_layout();
+ auto out_ptr = out_mem.cldnn::memory::template pointer<OutputT>();
+
+ std::stringstream description;
+ for (auto i : net.get_primitives_info()) {
+ if (i.original_id == "conv") {
+ std::cout << i.kernel_id << std::endl;
+ description << " kernel: " << i.kernel_id << std::endl;
+ }
+ }
+ description << " executed: ";
+ for (auto e : net.get_executed_primitive_ids()) {
+ description << e << ", ";
+ }
+
+ ASSERT_EQ(out_lay.data_type, this->output_type());
+ ASSERT_EQ(out_lay.size.batch[0], expected.size());
+ ASSERT_EQ(out_lay.size.feature[0], expected[0].size());
+ ASSERT_EQ(out_lay.size.spatial[1], expected[0][0].size());
+ ASSERT_EQ(out_lay.size.spatial[0], expected[0][0][0].size());
+
+ for (size_t bi = 0; bi < this->batch_num(); ++bi)
+ for (size_t fi = 0; fi < this->output_features(); ++fi)
+ for (size_t yi = 0; yi < expected[0][0].size(); ++yi)
+ for (size_t xi = 0; xi < expected[0][0][0].size(); ++xi) {
+ tensor coords = tensor(batch(bi), feature(fi), spatial(xi, yi, 0, 0));
+ size_t offset = out_lay.get_linear_offset(coords);
+
+ ASSERT_EQ(out_ptr[offset], expected[bi][fi][yi][xi])
+ << "at b= " << bi << ", f= " << fi << ", y= " << yi << ", x= " << xi << std::endl
+ << description.str();
+ }
+ }
+};
+
+template <typename InputT, typename WeightsT, typename OutputT>
class convolution_scale_random_test : public convolution_random_test_base<InputT, WeightsT, OutputT> {
public:
using parent = convolution_random_test_base<InputT, WeightsT, OutputT>;
using convolution_random_test_s8s8f32 = convolution_random_test_base<int8_t, int8_t, float>;
using convolution_random_test_u8s8f32 = convolution_random_test_base<uint8_t, int8_t, float>;
+using convolution_random_test_fsv4_input_s8s8f32 = convolution_random_test_fsv4_input<int8_t, int8_t, float>;
+using convolution_random_test_fsv4_input_u8s8f32 = convolution_random_test_fsv4_input<uint8_t, int8_t, float>;
+
using convolution_scale_random_test_s8s8f32 = convolution_scale_random_test<int8_t, int8_t, float>;
using convolution_scale_random_test_u8s8f32 = convolution_scale_random_test<uint8_t, int8_t, float>;
ASSERT_NO_FATAL_FAILURE(test.run_random(GetParam()));
}
+TEST_P(convolution_random_smoke_test, s8s8f32_fsv4_input) {
+ convolution_random_test_fsv4_input_s8s8f32 test;
+ ASSERT_NO_FATAL_FAILURE(test.run_random(GetParam()));
+}
+
+TEST_P(convolution_random_smoke_test, u8s8f32_fsv4_input) {
+ convolution_random_test_fsv4_input_u8s8f32 test;
+ ASSERT_NO_FATAL_FAILURE(test.run_random(GetParam()));
+}
+
INSTANTIATE_TEST_CASE_P(
basic,
convolution_random_smoke_test,