} else if (l == WeightsLayout::g_os_zyx_is_osv16_isv16 || l == WeightsLayout::g_os_zyx_is_osv16_isv32 ||
l == WeightsLayout::g_os_zyx_is_osv32_isv16 || l == WeightsLayout::g_os_zyx_is_osv32_isv32) {
args macroNameArgs = {"prefix", "g", "o", "i", "z", "y", "x"};
- args funcArgs = {"g", "o", "i", "z", "y", "x", "g_size", "o_size", "i_size", "z_size", "y_size", "x_size", "osv", "isv"};
+ args funcArgs = {"g", "o", "i", "z", "y", "x", "g_size", "o_size", "i_size", "z_size", "y_size", "x_size", "osv", "isv"};
const auto name = toString(l);
const auto body = R"V0G0N( \
uint is_size = (i_size + isv - 1) / isv; \
this->macroName = MacroName(name, macroNameArgs);
this->calcFunction = FuncBody(name, funcArgs, body);
std::string osv = "16", isv = "16";
- if (l == WeightsLayout::g_os_zyx_is_osv16_isv16) {
+ if (l == WeightsLayout::g_os_zyx_is_osv16_isv16) {
osv = "16"; isv = "16";
} else if (l == WeightsLayout::g_os_zyx_is_osv16_isv32) {
osv = "16"; isv = "32";
if (is_grouped_4d_layout) {
index_macro_name = _name + "_GET_INDEX(g, o, i, y, x)";
auto layout_str = toString(layout);
- if (layout == WeightsLayout::goiyx)
+ if (layout == WeightsLayout::goiyx)
index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", g, o, i, 0, y, x)";
else if (layout == WeightsLayout::g_os_is_yx_isv16_osv16)
index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", g, o, i, 0, y, x, 16)";
if (is_grouped_5d_layout) {
index_macro_name = _name + "_GET_INDEX(g, o, i, z, y, x)";
auto layout_str = toString(layout);
- if (layout == WeightsLayout::goizyx)
+ if (layout == WeightsLayout::goizyx)
index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", g, o, i, z, y, x)";
else if (layout == WeightsLayout::g_os_is_zyx_isv16_osv16)
index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", g, o, i, z, y, x, 16)";
if (is_common_4d_layout) {
index_macro_name = _name + "_GET_INDEX(o, i, y, x)";
auto layout_str = toString(layout);
- if (layout == WeightsLayout::oiyx)
+ if (layout == WeightsLayout::oiyx)
index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", 0, o, i, 0, y, x)";
else if (layout == WeightsLayout::os_is_yx_isv16_osv16)
index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", 0, o, i, 0, y, x, 16)";
if (is_common_5d_layout) {
index_macro_name = _name + "_GET_INDEX(o, i, z, y, x)";
auto layout_str = toString(layout);
- if (layout == WeightsLayout::oizyx)
+ if (layout == WeightsLayout::oizyx)
index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", 0, o, i, z, y, x)";
else if (layout == WeightsLayout::os_is_zyx_isv16_osv16)
index_func_val = "GET_WEIGHTS_" + layout_str + "_INDEX(" + _name + ", 0, o, i, z, y, x, 16)";
max_func(zero, min_func(one, (JitTerm)((alpha * input + beta).str()))).str()));
break;
}
+ case ActivationFunction::HSIGMOID: {
+ std::string type_suffix = out_dt == Datatype::F32 ? "f" : "h";
+ const JitTerm three("3." + type_suffix);
+ const JitTerm six("6." + type_suffix);
+ jitConstants.AddConstant(MakeJitConstant(
+ macro_def,
+ (min_func(max_func(zero, input + three), six) / six).str()));
+ break;
+ }
case ActivationFunction::SIGN:
jitConstants.AddConstant(MakeJitConstant(
macro_def,
}
}
+TEST(activation_f16_fw_gpu, basic_yxfb_hsigmoid) {
+ const auto& engine = get_test_engine();
+
+ auto input = memory::allocate(engine, { data_types::f16, format::yxfb, { 1, 2, 5, 2 } });
+ set_values(input,
+ { FLOAT16(0.0f), FLOAT16(-2.0f), FLOAT16(-3.0f), FLOAT16(4.0f), FLOAT16(5.0f),
+ FLOAT16(2.0f), FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.0f), FLOAT16(-6.0f),
+ FLOAT16(3.0f), FLOAT16(-3.0f), FLOAT16(3.0f), FLOAT16(5.0f), FLOAT16(1.0f),
+ FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(-1.0f), FLOAT16(1.0f) });
+
+ topology topology(
+ input_layout("input", input.get_layout()),
+ activation("hsigmoid", "input", activation_func::hsigmoid));
+ network network(engine, topology);
+ network.set_input_data("input", input);
+ auto outputs = network.execute();
+ EXPECT_EQ(outputs.size(), size_t(1));
+ EXPECT_EQ(outputs.begin()->first, "hsigmoid");
+
+ auto output_memory = outputs.at("hsigmoid").get_memory();
+ auto output_layout = output_memory.get_layout();
+ auto output_ptr = output_memory.pointer<FLOAT16>();
+ auto input_ptr = input.pointer<FLOAT16>();
+
+ int y_size = output_layout.size.spatial[1];
+ int x_size = output_layout.size.spatial[0];
+ int f_size = output_layout.size.feature[0];
+ int b_size = output_layout.size.batch[0];
+ EXPECT_EQ(output_layout.format, format::yxfb);
+ EXPECT_EQ(y_size, 2);
+ EXPECT_EQ(x_size, 5);
+ EXPECT_EQ(f_size, 2);
+ EXPECT_EQ(b_size, 1);
+
+ for (size_t i = 0; i < output_layout.get_linear_size(); ++i) {
+ EXPECT_NEAR((FLOAT16)(std::fmin(std::fmax(0.f, (float)input_ptr[i] + 3.f), 6.f) / 6.f),
+ output_ptr[i], 1e-3f);
+ }
+}
+
TEST(activation_f32_fw_gpu, basic_yxfb_all_functions)
{
// Input:
activation_func::swish,
activation_func::hswish,
activation_func::mish,
- activation_func::gelu
+ activation_func::gelu,
+ activation_func::hsigmoid
};
activation_additional_params params = { 0.5f, 2.5f };
EXPECT_NEAR(0.5f * (float)input_ptr[i] * (1.f + std::erf((float)(input_ptr[i]) / std::sqrt(2.0f))),
output_ptr[i], 1e-5f);
break;
+ case activation_func::hsigmoid:
+ EXPECT_FLOAT_EQ(std::fmin(std::fmax(0.f, (float)input_ptr[i] + 3.f), 6.f) / 6.f, output_ptr[i]);
+ break;
default:
break;
}