conv_eltw_test_params{CASE_CONV_ELTW_FP32_4, 2, 6},
conv_eltw_test_params{CASE_CONV_ELTW_FP32_5, 3, 6},
conv_eltw_test_params{CASE_CONV_ELTW_FP32_6, 3, 6},
- conv_eltw_test_params{CASE_CONV_ELTW_FP32_7, 4, 6},
- conv_eltw_test_params{CASE_CONV_ELTW_FP32_8, 4, 6},
+ conv_eltw_test_params{CASE_CONV_ELTW_FP32_7, 3, 6},
+ conv_eltw_test_params{CASE_CONV_ELTW_FP32_8, 3, 6},
}), );
mvn_test_params{ CASE_MVN_I8_4, 2, 6 },
mvn_test_params{ CASE_MVN_I8_5, 2, 6 },
mvn_test_params{ CASE_MVN_I8_6, 2, 6 },
- mvn_test_params{ CASE_MVN_I8_7, 4, 6 },
+ mvn_test_params{ CASE_MVN_I8_7, 3, 6 },
mvn_test_params{ CASE_MVN_3D_I8_1, 2, 6 },
mvn_test_params{ CASE_MVN_3D_I8_2, 2, 6 },
mvn_test_params{ CASE_MVN_3D_I8_3, 2, 6 },
mvn_test_params{ CASE_MVN_3D_I8_4, 2, 6 },
- mvn_test_params{ CASE_MVN_3D_I8_5, 4, 6 },
+ mvn_test_params{ CASE_MVN_3D_I8_5, 3, 6 },
mvn_test_params{ CASE_MVN_U8_1, 2, 6 },
mvn_test_params{ CASE_MVN_U8_2, 2, 6 },
mvn_test_params{ CASE_MVN_U8_3, 2, 6 },
mvn_test_params{ CASE_MVN_U8_4, 2, 6 },
mvn_test_params{ CASE_MVN_U8_5, 2, 6 },
mvn_test_params{ CASE_MVN_U8_6, 2, 6 },
- mvn_test_params{ CASE_MVN_U8_7, 4, 6 },
+ mvn_test_params{ CASE_MVN_U8_7, 3, 6 },
mvn_test_params{ CASE_MVN_3D_U8_1, 2, 6 },
mvn_test_params{ CASE_MVN_3D_U8_2, 2, 6 },
mvn_test_params{ CASE_MVN_3D_U8_3, 2, 6 },
mvn_test_params{ CASE_MVN_3D_U8_4, 2, 6 },
- mvn_test_params{ CASE_MVN_3D_U8_5, 4, 6 },
+ mvn_test_params{ CASE_MVN_3D_U8_5, 3, 6 },
}), );
class mvn_eltwise : public MVNFusingTest {};
INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_activation_quantize_i8_eltwise_quantize_u8,
::testing::ValuesIn(std::vector<conv_eltw_test_params>{
- conv_eltw_test_params{CASE_DECONV_ELTW_FP32_1, 5, 7},
- conv_eltw_test_params{CASE_DECONV_ELTW_FP32_2, 5, 7},
- conv_eltw_test_params{CASE_DECONV_ELTW_FP32_3, 5, 7},
- conv_eltw_test_params{CASE_DECONV_ELTW_FP32_4, 5, 7},
- conv_eltw_test_params{CASE_DECONV_ELTW_FP32_5, 5, 7},
- conv_eltw_test_params{CASE_DECONV_ELTW_FP32_6, 5, 7},
- conv_eltw_test_params{CASE_DECONV_ELTW_FP32_7, 5, 7},
- conv_eltw_test_params{CASE_DECONV_ELTW_FP32_8, 5, 7},
+ conv_eltw_test_params{CASE_DECONV_ELTW_FP32_1, 4, 7},
+ conv_eltw_test_params{CASE_DECONV_ELTW_FP32_2, 4, 7},
+ conv_eltw_test_params{CASE_DECONV_ELTW_FP32_3, 4, 7},
+ conv_eltw_test_params{CASE_DECONV_ELTW_FP32_4, 4, 7},
+ conv_eltw_test_params{CASE_DECONV_ELTW_FP32_5, 4, 7},
+ conv_eltw_test_params{CASE_DECONV_ELTW_FP32_6, 4, 7},
+ conv_eltw_test_params{CASE_DECONV_ELTW_FP32_7, 4, 7},
+ conv_eltw_test_params{CASE_DECONV_ELTW_FP32_8, 4, 7},
conv_eltw_test_params{CASE_DECONV_ELTW_i8_1, 2, 7},
conv_eltw_test_params{CASE_DECONV_ELTW_i8_2, 2, 7},
space_to_batch_test_params{CASE_SPACE_TO_BATCH_I8_1, 2, 5},
space_to_batch_test_params{CASE_SPACE_TO_BATCH_I8_2, 2, 5},
}), );
+
+/* ----------------------------------------------------------------------------------------------------- */
+/* ---------------------------------------- Eltwise cases -------------------------------------------------- */
+/* ----------------------------------------------------------------------------------------------------- */
+struct eltwise_test_params {
+ tensor input_size;
+ data_types input_type;
+ data_types input_type2;
+ format input_format;
+ data_types default_type;
+ format default_format;
+ eltwise_mode mode;
+ size_t expected_fused_primitives;
+ size_t expected_not_fused_primitives;
+};
+
+#define CASE_ELTWISE_FP32_1 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_FP32_2 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_FP32_3 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_FP16_1 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_FP16_2 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_FP16_3 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_I8_1 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_I8_2 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_I8_3 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_U8_1 {2, 16, 4, 4}, data_types::u8, data_types::u8, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_U8_2 {2, 16, 4, 4}, data_types::u8, data_types::u8, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_U8_3 {2, 16, 4, 4}, data_types::u8, data_types::u8, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_FP32_FP16_1 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_FP32_FP16_2 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_FP32_FP16_3 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_FP16_FP32_1 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_FP16_FP32_2 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_FP16_FP32_3 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_I8_FP16_1 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_I8_FP16_2 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_I8_FP16_3 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_I8_FP32_1 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_I8_FP32_2 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_I8_FP32_3 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_U8_FP16_1 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_U8_FP16_2 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_U8_FP16_3 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_U8_FP32_1 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_U8_FP32_2 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum, 3, 4
+#define CASE_ELTWISE_U8_FP32_3 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
+
+
+class EltwiseFusingTest : public ::BaseFusingTest<eltwise_test_params> {
+public:
+ void execute(eltwise_test_params& p) {
+ auto input_prim = get_mem(get_input_layout(p));
+ auto input_prim2 = get_mem(get_input_layout2(p));
+
+ network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
+ network network_fused(this->engine, this->topology_fused, bo_fused);
+
+ network_fused.set_input_data("input", input_prim);
+ network_fused.set_input_data("input2", input_prim2);
+ network_not_fused.set_input_data("input", input_prim);
+ network_not_fused.set_input_data("input2", input_prim2);
+
+ compare(network_not_fused, network_fused, p);
+ }
+
+ layout get_input_layout(eltwise_test_params& p) { return layout{p.input_type, p.input_format, p.input_size}; }
+ layout get_input_layout2(eltwise_test_params& p) { return layout{p.input_type2, p.input_format, p.input_size}; }
+
+ layout get_per_channel_layout(eltwise_test_params& p) {
+ return layout{p.default_type, p.default_format, tensor{1, p.input_size.feature[0], 1, 1}};
+ }
+};
+
+class eltwise_quantize : public EltwiseFusingTest {};
+TEST_P(eltwise_quantize, u8) {
+ auto p = GetParam();
+ create_topologies(input_layout("input", get_input_layout(p)),
+ input_layout("input2", get_input_layout2(p)),
+ eltwise("eltwise", {"input", "input2"}, p.mode, p.default_type),
+ data("in_lo", get_mem(get_single_element_layout(p), min_random, 0)),
+ data("in_hi", get_mem(get_single_element_layout(p), 1, max_random)),
+ data("out_lo", get_mem(get_single_element_layout(p), 0)),
+ data("out_hi", get_mem(get_single_element_layout(p), 255)),
+ quantize("quantize", "eltwise", "in_lo", "in_hi", "out_lo", "out_hi", 256, data_types::u8),
+ reorder("out", "quantize", p.default_format, data_types::f32));
+
+ tolerance = 1.f;
+ execute(p);
+}
+
+TEST_P(eltwise_quantize, i8_per_channel) {
+ auto p = GetParam();
+ create_topologies(input_layout("input", get_input_layout(p)),
+ input_layout("input2", get_input_layout2(p)),
+ eltwise("eltwise", {"input", "input2"}, p.mode, p.default_type),
+ data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
+ data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
+ data("out_lo", get_mem(get_single_element_layout(p), -128)),
+ data("out_hi", get_mem(get_single_element_layout(p), 127)),
+ quantize("quantize", "eltwise", "in_lo", "in_hi", "out_lo", "out_hi", 256, data_types::i8),
+ reorder("out", "quantize", p.default_format, data_types::f32));
+
+ tolerance = 1.f;
+ execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu,
+ eltwise_quantize,
+ ::testing::ValuesIn(std::vector<eltwise_test_params>{
+ eltwise_test_params{CASE_ELTWISE_FP16_1},
+ eltwise_test_params{CASE_ELTWISE_FP16_2},
+ eltwise_test_params{CASE_ELTWISE_FP16_3},
+ eltwise_test_params{CASE_ELTWISE_FP32_1},
+ eltwise_test_params{CASE_ELTWISE_FP32_2},
+ eltwise_test_params{CASE_ELTWISE_FP32_3},
+ eltwise_test_params{CASE_ELTWISE_FP32_FP16_1},
+ eltwise_test_params{CASE_ELTWISE_FP32_FP16_2},
+ eltwise_test_params{CASE_ELTWISE_FP32_FP16_3},
+ eltwise_test_params{CASE_ELTWISE_FP16_FP32_1},
+ eltwise_test_params{CASE_ELTWISE_FP16_FP32_2},
+ eltwise_test_params{CASE_ELTWISE_FP16_FP32_3},
+ eltwise_test_params{CASE_ELTWISE_I8_FP32_1},
+ eltwise_test_params{CASE_ELTWISE_I8_FP32_2},
+ eltwise_test_params{CASE_ELTWISE_I8_FP32_3},
+ eltwise_test_params{CASE_ELTWISE_U8_FP32_1},
+ eltwise_test_params{CASE_ELTWISE_U8_FP32_2},
+ eltwise_test_params{CASE_ELTWISE_U8_FP32_3},
+ eltwise_test_params{CASE_ELTWISE_I8_FP16_1},
+ eltwise_test_params{CASE_ELTWISE_I8_FP16_2},
+ eltwise_test_params{CASE_ELTWISE_I8_FP16_3},
+ eltwise_test_params{CASE_ELTWISE_U8_FP16_1},
+ eltwise_test_params{CASE_ELTWISE_U8_FP16_2},
+ eltwise_test_params{CASE_ELTWISE_U8_FP16_3},
+ }), );