// fuse_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct eltwise_fuse_params : fuse_params {
- eltwise_fuse_params() : fuse_params(KernelType::ELTWISE) {}
+ EltwiseMode mode;
+
+ eltwise_fuse_params(EltwiseMode mode) : fuse_params(KernelType::ELTWISE), mode(mode) {}
};
struct scale_fuse_params : fuse_params {
break;
}
case KernelType::ELTWISE: {
+ auto p = desc.GetOpParams<eltwise_fuse_params>();
+ if (!p)
+ throw std::runtime_error("[clDNN] Eltwise fuse params can't be nullptr");
+
+ std::string op = "";
+ switch (p->mode)
+ {
+ case kernel_selector::EltwiseMode::ADD:
+ op = "+";
+ break;
+ case kernel_selector::EltwiseMode::MUL:
+ op = "*";
+ break;
+ default:
+ throw std::runtime_error("[clDNN] Eltwise mode is not supported in fused ops codegen");
+ }
+
op_decls += "\\\n\t" + GetOutputType(vec_size) + " " + out_var + " = " + in_vars_converted[0] +
- " + " + ConvertToOutputType(in_var, vec_size) + ";";
+ op + ConvertToOutputType(in_var, vec_size) + ";";
break;
}
case KernelType::QUANTIZE: {
namespace cldnn {
namespace gpu {
-namespace {
-inline kernel_selector::eltwise_mode convert_to_eltwise_mode(eltwise_mode mode) {
- switch (mode) {
- case eltwise_mode::sum:
- return kernel_selector::eltwise_mode::ADD;
- case eltwise_mode::sub:
- return kernel_selector::eltwise_mode::SUB;
- case eltwise_mode::max:
- return kernel_selector::eltwise_mode::MAX;
- case eltwise_mode::prod:
- return kernel_selector::eltwise_mode::MUL;
- case eltwise_mode::div:
- return kernel_selector::eltwise_mode::DIV;
- case eltwise_mode::min:
- return kernel_selector::eltwise_mode::MIN;
- case eltwise_mode::pow:
- return kernel_selector::eltwise_mode::POW;
- case eltwise_mode::mod:
- return kernel_selector::eltwise_mode::MODULU;
- case eltwise_mode::eq:
- return kernel_selector::eltwise_mode::EQ;
- case eltwise_mode::ne:
- return kernel_selector::eltwise_mode::NE;
- case eltwise_mode::lt:
- return kernel_selector::eltwise_mode::LT;
- case eltwise_mode::le:
- return kernel_selector::eltwise_mode::LE;
- case eltwise_mode::gt:
- return kernel_selector::eltwise_mode::GT;
- case eltwise_mode::ge:
- return kernel_selector::eltwise_mode::GE;
- case eltwise_mode::logic_and:
- return kernel_selector::eltwise_mode::LOGIC_AND;
- case eltwise_mode::logic_or:
- return kernel_selector::eltwise_mode::LOGIC_OR;
- case eltwise_mode::logic_xor:
- return kernel_selector::eltwise_mode::LOGIC_XOR;
- case eltwise_mode::squared_diff:
- return kernel_selector::eltwise_mode::SQUARED_DIFF;
- case eltwise_mode::floor_mod:
- return kernel_selector::eltwise_mode::FLOOR_MOD;
- default:
- return kernel_selector::eltwise_mode::ADD;
- }
-}
-} // namespace
-
struct eltwise_gpu : typed_primitive_gpu_impl<eltwise> {
using parent = typed_primitive_gpu_impl<eltwise>;
using parent::parent;
auto fuse_eltwise_f = [&](eltwise_node& node) {
std::shared_ptr<const cldnn::eltwise> prim = node.get_primitive();
+ const std::vector<eltwise_mode> supported_modes = {
+ eltwise_mode::sum,
+ eltwise_mode::prod
+ };
+
if (node.is_output() || node.inputs_count() != 2 ||
- prim->mode != eltwise_mode::sum || !prim->stride.empty())
+ std::find(supported_modes.begin(), supported_modes.end(), prim->mode) == supported_modes.end() ||
+ !prim->stride.empty())
return;
std::vector<cldnn::program_node*> parents = node.get_dependencies();
#include <string>
namespace cldnn {
+
+inline kernel_selector::eltwise_mode convert_to_eltwise_mode(eltwise_mode mode) {
+ switch (mode) {
+ case eltwise_mode::sum:
+ return kernel_selector::eltwise_mode::ADD;
+ case eltwise_mode::sub:
+ return kernel_selector::eltwise_mode::SUB;
+ case eltwise_mode::max:
+ return kernel_selector::eltwise_mode::MAX;
+ case eltwise_mode::prod:
+ return kernel_selector::eltwise_mode::MUL;
+ case eltwise_mode::div:
+ return kernel_selector::eltwise_mode::DIV;
+ case eltwise_mode::min:
+ return kernel_selector::eltwise_mode::MIN;
+ case eltwise_mode::pow:
+ return kernel_selector::eltwise_mode::POW;
+ case eltwise_mode::mod:
+ return kernel_selector::eltwise_mode::MODULU;
+ case eltwise_mode::eq:
+ return kernel_selector::eltwise_mode::EQ;
+ case eltwise_mode::ne:
+ return kernel_selector::eltwise_mode::NE;
+ case eltwise_mode::lt:
+ return kernel_selector::eltwise_mode::LT;
+ case eltwise_mode::le:
+ return kernel_selector::eltwise_mode::LE;
+ case eltwise_mode::gt:
+ return kernel_selector::eltwise_mode::GT;
+ case eltwise_mode::ge:
+ return kernel_selector::eltwise_mode::GE;
+ case eltwise_mode::logic_and:
+ return kernel_selector::eltwise_mode::LOGIC_AND;
+ case eltwise_mode::logic_or:
+ return kernel_selector::eltwise_mode::LOGIC_OR;
+ case eltwise_mode::logic_xor:
+ return kernel_selector::eltwise_mode::LOGIC_XOR;
+ case eltwise_mode::squared_diff:
+ return kernel_selector::eltwise_mode::SQUARED_DIFF;
+ case eltwise_mode::floor_mod:
+ return kernel_selector::eltwise_mode::FLOOR_MOD;
+ default:
+ return kernel_selector::eltwise_mode::ADD;
+ }
+}
+
template <>
struct typed_program_node<eltwise> : public typed_program_node_base<eltwise> {
using parent = typed_program_node_base<eltwise>;
size_t inputs_count() const { return get_primitive()->input.size(); }
std::shared_ptr<kernel_selector::fuse_params> get_fuse_params() const override {
- return std::make_shared<kernel_selector::eltwise_fuse_params>();
+ kernel_selector::eltwise_mode mode = convert_to_eltwise_mode(get_primitive()->mode);
+ return std::make_shared<kernel_selector::eltwise_fuse_params>(mode);
}
};
}), );
class conv_fp32_prelu_eltwise : public ConvFusingTest {};
-TEST_P(conv_fp32_prelu_eltwise, basic) {
+TEST_P(conv_fp32_prelu_eltwise, basic_sum) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
data("weights", get_mem(get_weights_layout(p))),
execute(p);
}
-TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast) {
+TEST_P(conv_fp32_prelu_eltwise, basic_prod) {
+ auto p = GetParam();
+ create_topologies(input_layout("input", get_input_layout(p)),
+ data("weights", get_mem(get_weights_layout(p))),
+ data("bias", get_mem(get_bias_layout(p))),
+ data("slope_data", get_mem(get_per_channel_layout(p))),
+ data("eltwise_data", get_mem(get_output_layout(p))),
+ convolution("conv_prim", "input", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
+ activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
+ eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::prod),
+ reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
+ );
+
+ tolerance = 1e-5f;
+ execute(p);
+}
+
+TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_sum) {
auto p = GetParam();
tensor eltw_shape = p.default_format.spatial_num() == 2 ? tensor{1, 1, 1, 1} : tensor{1, 1, 1, 1, 1};
create_topologies(input_layout("input", get_input_layout(p)),
execute(p);
}
+TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_prod) {
+ auto p = GetParam();
+ tensor eltw_shape = p.default_format.spatial_num() == 2 ? tensor{1, 1, 1, 1} : tensor{1, 1, 1, 1, 1};
+ create_topologies(input_layout("input", get_input_layout(p)),
+ data("weights", get_mem(get_weights_layout(p))),
+ data("bias", get_mem(get_bias_layout(p))),
+ data("slope_data", get_mem(get_per_channel_layout(p))),
+ data("eltwise_data", get_mem(layout{ p.data_type, p.input_format, eltw_shape })),
+ convolution("conv_prim", "input", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
+ activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
+ eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::prod),
+ reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
+ );
+
+ tolerance = 1e-5f;
+ execute(p);
+}
+
TEST_P(conv_fp32_prelu_eltwise, vector_ops) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),