Added the swish activation function.
It needs input of activation and output of activation
to calculate derivative value.
So, I overload run_prime_fn function to use
input and output for calculating derivative.
- add swish activation
- add test case about swish activation function
**Self evaluation:**
1. Build test: [x]Passed []Failed []Skipped
2. Run test: [x]Passed []Failed []Skipped
Signed-off-by: Seungbaek Hong <sb92.hong@samsung.com>
return Activation("Activation=relu", properties);
}
+/**
+ * @brief Helper function to create swish activation layer
+ */
+inline std::unique_ptr<Layer>
+Swish(const std::vector<std::string> &properties = {}) {
+ return Activation("Activation=swish", properties);
+}
+
/**
* @brief Helper function to create Tanh layer
*/
std::function<Tensor &(Tensor &, Tensor &, Tensor const &)> const
&activation_prime_fn) {
_act_fn = activation_fn;
+ _act_prime_fn =
+ [activation_prime_fn](Tensor const &t_in, Tensor &t_out,
+ Tensor &outgoing_derivative,
+ Tensor const &incoming_derivative) -> Tensor & {
+ return activation_prime_fn(t_out, outgoing_derivative, incoming_derivative);
+ };
+
+ return ML_ERROR_NONE;
+}
+
+int ActiFunc::setActivation(
+ std::function<Tensor &(Tensor const &, Tensor &)> const &activation_fn,
+ std::function<Tensor &(Tensor const &, Tensor const &, Tensor &,
+ Tensor const &)> const &activation_prime_fn) {
+ if (in_place)
+ return ML_ERROR_INVALID_PARAMETER;
+
+ _act_fn = activation_fn;
_act_prime_fn = activation_prime_fn;
return ML_ERROR_NONE;
_act_fn = activation_fn;
if (!in_place) {
_act_prime_fn =
- [activation_prime_fn](Tensor &x, Tensor &ret_derivative,
- Tensor const &derivative) -> Tensor & {
+ [activation_prime_fn](Tensor const &t_in, Tensor &t_out,
+ Tensor &outgoing_derivative,
+ Tensor const &incoming_derivative) -> Tensor & {
/** @todo update this based on supportInPlace */
- activation_prime_fn(x, ret_derivative);
- ret_derivative.multiply_i_strided(derivative);
+ activation_prime_fn(t_out, outgoing_derivative);
+ outgoing_derivative.multiply_i_strided(incoming_derivative);
- return ret_derivative;
+ return outgoing_derivative;
};
} else {
_act_prime_fn =
- [activation_prime_fn](Tensor &x, Tensor &ret_derivative,
- Tensor const &derivative) -> Tensor & {
- activation_prime_fn(x, x);
- derivative.multiply_strided(x, ret_derivative);
+ [activation_prime_fn](Tensor const &t_in, Tensor &t_out,
+ Tensor &outgoing_derivative,
+ Tensor const &incoming_derivative) -> Tensor & {
+ activation_prime_fn(t_out, t_out);
+ incoming_derivative.multiply_strided(t_out, outgoing_derivative);
- return ret_derivative;
+ return outgoing_derivative;
};
}
};
if (!in_place) {
_act_prime_fn =
- [activation_prime_fn](Tensor &x, Tensor &ret_derivative,
- Tensor const &derivative) -> Tensor & {
+ [activation_prime_fn](Tensor const &t_in, Tensor &t_out,
+ Tensor &outgoing_derivative,
+ Tensor const &incoming_derivative) -> Tensor & {
/** @todo update this based on supportInPlace */
- x.apply(activation_prime_fn, ret_derivative);
- ret_derivative.multiply_i_strided(derivative);
+ t_out.apply(activation_prime_fn, outgoing_derivative);
+ outgoing_derivative.multiply_i_strided(incoming_derivative);
- return ret_derivative;
+ return outgoing_derivative;
};
} else {
_act_prime_fn =
- [activation_prime_fn](Tensor &x, Tensor &ret_derivative,
- Tensor const &derivative) -> Tensor & {
- x.apply(activation_prime_fn, x);
- derivative.multiply_strided(x, ret_derivative);
+ [activation_prime_fn](Tensor const &t_in, Tensor &t_out,
+ Tensor &outgoing_derivative,
+ Tensor const &incoming_derivative) -> Tensor & {
+ t_out.apply(activation_prime_fn, t_out);
+ incoming_derivative.multiply_strided(t_out, outgoing_derivative);
- return ret_derivative;
+ return outgoing_derivative;
};
}
case ActivationType::ACT_LEAKY_RELU:
this->setActivation(leakyRelu, leakyReluPrime);
break;
+ case ActivationType::ACT_SWISH:
+ in_place = false;
+ this->setActivation(swish, swishPrime);
+ break;
case ActivationType::ACT_NONE:
this->setActivation(no_op, no_op_prime);
break;
_act_fn(input, output);
}
+Tensor &ActiFunc::run_prime_fn(Tensor &input, Tensor &output,
+ Tensor &outgoing_derivative,
+ Tensor const &incoming_derivative) {
+ return _act_prime_fn(input, output, outgoing_derivative, incoming_derivative);
+}
+
Tensor &ActiFunc::run_prime_fn(Tensor &output, Tensor &outgoing_derivative,
Tensor const &incoming_derivative) {
- return _act_prime_fn(output, outgoing_derivative, incoming_derivative);
+ return _act_prime_fn(Tensor(), output, outgoing_derivative,
+ incoming_derivative);
}
bool ActiFunc::supportInPlace() const { return in_place; }
return x >= 0.0f ? 1.0f : NEGATIVE_SLOPE;
}
+Tensor &ActiFunc::swish(Tensor const &t_in, Tensor &t_out) {
+ t_in.apply([&](float x) { return sigmoid(x); }, t_out);
+ t_out.multiply_i(t_in);
+
+ return t_out;
+}
+
+Tensor &ActiFunc::swishPrime(Tensor const &t_in, Tensor const &t_out,
+ Tensor &outgoing_derivative,
+ Tensor const &incoming_derivative) {
+ if (outgoing_derivative.empty())
+ outgoing_derivative = Tensor(t_out.getDim());
+
+ Tensor tmp = Tensor(t_out.getDim());
+ t_in.apply([&](float x) { return sigmoid(x); }, outgoing_derivative);
+ t_out.apply([&](float x) { return 1 - x; }, tmp);
+ outgoing_derivative.multiply_i(tmp);
+ outgoing_derivative.add_i(t_out);
+
+ outgoing_derivative.multiply_i_strided(incoming_derivative);
+
+ return outgoing_derivative;
+}
+
void ActiFunc::executeInPlace(bool val) {
if (val && !supportInPlace())
throw std::runtime_error("Error setting activation layer to work in-place");
*/
void run_fn(Tensor const &input, Tensor &output);
+ /**
+ * @brief run prime function
+ *
+ * @param[in] input input
+ * @param[in] output output
+ * @param[out] outgoing_derivative outgoing derivative
+ * @param[in] incoming_derivative incoming derivative
+ * @retVal Tensor
+ */
+ Tensor &run_prime_fn(Tensor &input, Tensor &output,
+ Tensor &outgoing_derivative,
+ Tensor const &incoming_derivative);
+
/**
* @brief run prime function
*
*/
static float leakyReluPrime(float x);
+ /**
+ * @brief swish activation function
+ * @param[in] t_in input tensor
+ * @param[in] t_out output tensor
+ */
+ static Tensor &swish(Tensor const &t_in, Tensor &t_out);
+
+ /**
+ * @brief derivative swish function
+ * @param[in] t_in input tensor
+ * @param[in] t_out output tensor
+ * @param[in] outgoing_derivative outgoing derivative
+ * @param[in] incoming_derivative incoming derivative
+ */
+ static Tensor &swishPrime(Tensor const &t_in, Tensor const &t_out,
+ Tensor &outgoing_derivative,
+ Tensor const &incoming_derivative = Tensor());
+
/**
* @brief setActivation by custom activation function
* @note apply derivative as this activation_prime_fn does not utilize
std::function<Tensor &(Tensor &, Tensor &, Tensor const &)> const
&activation_prime_fn);
+ /**
+ * @brief setActivation by custom activation function
+ * @note derivative not applied here as this activation_prime_fn applies
+ * derivative itself
+ * @param[in] activation_fn activation function to be used
+ * @param[in] activtion_prime_fn activation prime function to be used
+ * @retval #ML_ERROR_NONE when successful
+ */
+ int setActivation(
+ std::function<Tensor &(Tensor const &, Tensor &)> const &activation_fn,
+ std::function<Tensor &(Tensor const &, Tensor const &, Tensor &,
+ Tensor const &)> const &activation_prime_fn);
+
/**
* @brief setActivation by custom activation function
* @note apply derivative as this activation_prime_fn does not utilize
std::function<float(float const)> const &activation_fn,
std::function<float(float const)> const &activation_prime_fn);
+ /**
+ * @brief setActivation by custom activation function
+ * @note apply derivative as this activation_prime_fn does not utilize
+ * derivative
+ * @param[in] std::function<float(float const)> activation_fn activation
+ * function to be used
+ * @param[in] std::function<float(float const, float const)>
+ * activation_prime_fn activation_prime_function to be used
+ * @retval #ML_ERROR_NONE when successful
+ */
+ int setActivation(
+ std::function<float(float const)> const &activation_fn,
+ std::function<float(float const, float const)> const &activation_prime_fn);
+
/**
* @brief Notify that this layer will execute in-place
*
private:
std::function<Tensor &(Tensor const &, Tensor &)> _act_fn;
- std::function<Tensor &(Tensor &, Tensor &, Tensor const &)> _act_prime_fn;
+ std::function<Tensor &(Tensor const &, Tensor &, Tensor &, Tensor const &)>
+ _act_prime_fn; /**< prime function with input and output*/
ActivationType
activation_type; /**< type of the activation represented by this */
void ActivationLayer::calcDerivative(RunLayerContext &context) {
const Tensor &deriv = context.getIncomingDerivative(SINGLE_INOUT_IDX);
Tensor &ret = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
+ Tensor &in = context.getInput(SINGLE_INOUT_IDX);
Tensor &out = context.getOutput(SINGLE_INOUT_IDX);
- acti_func.run_prime_fn(out, ret, deriv);
+ acti_func.run_prime_fn(in, out, ret, deriv);
}
void ActivationLayer::exportTo(Exporter &exporter,
ACT_TANH, /**< tanh */
ACT_SIGMOID, /**< sigmoid */
ACT_RELU, /**< ReLU */
+ ACT_SWISH, /**< Swish */
ACT_SOFTMAX, /**< softmax */
ACT_LEAKY_RELU, /**< Leaky ReLU */
ACT_NONE, /**< no op */
struct ActivationTypeInfo {
using Enum = nntrainer::ActivationType;
static constexpr std::initializer_list<Enum> EnumList = {
- Enum::ACT_TANH, Enum::ACT_SIGMOID, Enum::ACT_RELU,
- Enum::ACT_SOFTMAX, Enum::ACT_LEAKY_RELU, Enum::ACT_NONE,
- Enum::ACT_UNKNOWN};
+ Enum::ACT_TANH, Enum::ACT_SIGMOID, Enum::ACT_RELU, Enum::ACT_SOFTMAX,
+ Enum::ACT_LEAKY_RELU, Enum::ACT_SWISH, Enum::ACT_NONE, Enum::ACT_UNKNOWN};
- static constexpr const char *EnumStr[] = {
- "tanh", "sigmoid", "relu", "softmax", "leaky_relu", "none", "unknown"};
+ static constexpr const char *EnumStr[] = {"tanh", "sigmoid", "relu",
+ "softmax", "leaky_relu", "swish",
+ "none", "unknown"};
};
/**
nntrainer::ActivationLayer::type, {"activation=relu"},
LayerCreateSetPropertyOptions::AVAILABLE_FROM_APP_CONTEXT, false, 1);
+auto semantic_activation_swish = LayerSemanticsParamType(
+ nntrainer::createLayer<nntrainer::ActivationLayer>,
+ nntrainer::ActivationLayer::type, {"activation=swish"},
+ LayerCreateSetPropertyOptions::AVAILABLE_FROM_APP_CONTEXT, false, 1);
+
auto semantic_activation_sigmoid = LayerSemanticsParamType(
nntrainer::createLayer<nntrainer::ActivationLayer>,
nntrainer::ActivationLayer::type, {"activation=sigmoid"},
nntrainer::ActivationLayer::type, {"activation=none"},
LayerCreateSetPropertyOptions::AVAILABLE_FROM_APP_CONTEXT, false, 1);
-GTEST_PARAMETER_TEST(Activation, LayerSemantics,
- ::testing::Values(semantic_activation_relu,
- semantic_activation_sigmoid,
- semantic_activation_softmax,
- semantic_activation_tanh,
- semantic_activation_none));
+GTEST_PARAMETER_TEST(
+ Activation, LayerSemantics,
+ ::testing::Values(semantic_activation_relu, semantic_activation_swish,
+ semantic_activation_sigmoid, semantic_activation_softmax,
+ semantic_activation_tanh, semantic_activation_none));
}
}
+TEST(nntrainer_activation, swish_01_p) {
+ int batch = 3;
+ int channel = 1;
+ int height = 1;
+ int width = 10;
+ float answer[30] = {
+ -0.16052495, -0.12766725, -0.0900332, -0.04750208, 0,
+ 0.05249792, 0.10996679, 0.17233276, 0.23947506, 0.31122968,
+ -0.24802041, -0.21260624, -0.16052495, -0.0900332, 0,
+ 0.10996679, 0.23947506, 0.3873938, 0.5519796, 0.73105854,
+ -0.27777028, -0.26014543, -0.21260624, -0.12766725, 0,
+ 0.17233276, 0.3873938, 0.6398545, 0.9222298, 1.2263616};
+
+ nntrainer::Tensor input(batch, channel, height, width);
+ GEN_TEST_INPUT(input, (l - 4) * 0.1 * (i + 1));
+
+ nntrainer::Tensor results(batch, channel, height, width);
+ results = nntrainer::ActiFunc::swish(input, results);
+
+ float *data = results.getData();
+ ASSERT_NE(nullptr, data);
+ float *indata = input.getData();
+ ASSERT_NE(nullptr, indata);
+
+ for (int i = 0; i < batch * height * width; ++i) {
+ EXPECT_NEAR(data[i], answer[i], tolerance);
+ }
+}
+
+TEST(nntrainer_activation, swishPrime_01_p) {
+
+ int batch = 3;
+ int channel = 1;
+ int height = 1;
+ int width = 10;
+ float answer[30] = {
+ 0.30520803, 0.35221997, 0.40066269, 0.45008320, 0.50000000, 0.54991674,
+ 0.59933728, 0.64778000, 0.69479191, 0.73996115, 0.13889773, 0.21707317,
+ 0.30520803, 0.40066269, 0.50000000, 0.59933728, 0.69479191, 0.78292680,
+ 0.86110222, 0.92767054, 0.01800188, 0.10410020, 0.21707317, 0.35221997,
+ 0.50000000, 0.64778000, 0.78292680, 0.89589977, 0.98199815, 1.04129410};
+
+ nntrainer::Tensor input(batch, channel, height, width);
+ GEN_TEST_INPUT(input, (l - 4) * 0.1 * (i + 1));
+
+ nntrainer::Tensor results(batch, channel, height, width);
+ nntrainer::ActiFunc::swish(input, results);
+
+ nntrainer::Tensor prime_results(batch, channel, height, width);
+ nntrainer::ActiFunc::swishPrime(input, results, prime_results);
+
+ float *data = prime_results.getData();
+ ASSERT_NE(nullptr, data);
+
+ for (int i = 0; i < batch * height * width; ++i) {
+ EXPECT_NEAR(data[i], answer[i], tolerance);
+ }
+}
+
/**
* @brief Main gtest
*/