[activation] add swish activation function
authorSeungbaek Hong <sb92.hong@samsung.com>
Mon, 9 Jan 2023 07:03:13 +0000 (16:03 +0900)
committerjijoong.moon <jijoong.moon@samsung.com>
Tue, 4 Apr 2023 01:39:05 +0000 (10:39 +0900)
Added the swish activation function.

It needs input of activation and output of activation
to calculate derivative value.

So, I overload run_prime_fn function to use
input and output for calculating derivative.

- add swish activation
- add test case about swish activation function

**Self evaluation:**
1. Build test: [x]Passed []Failed []Skipped
2. Run test: [x]Passed []Failed []Skipped

Signed-off-by: Seungbaek Hong <sb92.hong@samsung.com>
api/ccapi/include/layer.h
nntrainer/layers/acti_func.cpp
nntrainer/layers/acti_func.h
nntrainer/layers/activation_layer.cpp
nntrainer/layers/common_properties.h
test/unittest/layers/unittest_layers_activation.cpp
test/unittest/unittest_nntrainer_activations.cpp

index fecab71757603826f02e0560b193feabe6b3d2d5..cdf2a13da5e8ae2feffb907abc3e1f52c9d3a82b 100644 (file)
@@ -521,6 +521,14 @@ ReLU(const std::vector<std::string> &properties = {}) {
   return Activation("Activation=relu", properties);
 }
 
+/**
+ * @brief Helper function to create swish activation layer
+ */
+inline std::unique_ptr<Layer>
+Swish(const std::vector<std::string> &properties = {}) {
+  return Activation("Activation=swish", properties);
+}
+
 /**
  * @brief Helper function to create Tanh layer
  */
index c47e15fb13b39911c6a3280be25d6af4d723d80f..fe52e00cd9f7eec125675875553ecd08ed3d418f 100644 (file)
@@ -39,6 +39,24 @@ int ActiFunc::setActivation(
   std::function<Tensor &(Tensor &, Tensor &, Tensor const &)> const
     &activation_prime_fn) {
   _act_fn = activation_fn;
+  _act_prime_fn =
+    [activation_prime_fn](Tensor const &t_in, Tensor &t_out,
+                          Tensor &outgoing_derivative,
+                          Tensor const &incoming_derivative) -> Tensor & {
+    return activation_prime_fn(t_out, outgoing_derivative, incoming_derivative);
+  };
+
+  return ML_ERROR_NONE;
+}
+
+int ActiFunc::setActivation(
+  std::function<Tensor &(Tensor const &, Tensor &)> const &activation_fn,
+  std::function<Tensor &(Tensor const &, Tensor const &, Tensor &,
+                         Tensor const &)> const &activation_prime_fn) {
+  if (in_place)
+    return ML_ERROR_INVALID_PARAMETER;
+
+  _act_fn = activation_fn;
   _act_prime_fn = activation_prime_fn;
 
   return ML_ERROR_NONE;
@@ -50,22 +68,24 @@ int ActiFunc::setActivation(
   _act_fn = activation_fn;
   if (!in_place) {
     _act_prime_fn =
-      [activation_prime_fn](Tensor &x, Tensor &ret_derivative,
-                            Tensor const &derivative) -> Tensor & {
+      [activation_prime_fn](Tensor const &t_in, Tensor &t_out,
+                            Tensor &outgoing_derivative,
+                            Tensor const &incoming_derivative) -> Tensor & {
       /** @todo update this based on supportInPlace */
-      activation_prime_fn(x, ret_derivative);
-      ret_derivative.multiply_i_strided(derivative);
+      activation_prime_fn(t_out, outgoing_derivative);
+      outgoing_derivative.multiply_i_strided(incoming_derivative);
 
-      return ret_derivative;
+      return outgoing_derivative;
     };
   } else {
     _act_prime_fn =
-      [activation_prime_fn](Tensor &x, Tensor &ret_derivative,
-                            Tensor const &derivative) -> Tensor & {
-      activation_prime_fn(x, x);
-      derivative.multiply_strided(x, ret_derivative);
+      [activation_prime_fn](Tensor const &t_in, Tensor &t_out,
+                            Tensor &outgoing_derivative,
+                            Tensor const &incoming_derivative) -> Tensor & {
+      activation_prime_fn(t_out, t_out);
+      incoming_derivative.multiply_strided(t_out, outgoing_derivative);
 
-      return ret_derivative;
+      return outgoing_derivative;
     };
   }
 
@@ -80,22 +100,24 @@ int ActiFunc::setActivation(
   };
   if (!in_place) {
     _act_prime_fn =
-      [activation_prime_fn](Tensor &x, Tensor &ret_derivative,
-                            Tensor const &derivative) -> Tensor & {
+      [activation_prime_fn](Tensor const &t_in, Tensor &t_out,
+                            Tensor &outgoing_derivative,
+                            Tensor const &incoming_derivative) -> Tensor & {
       /** @todo update this based on supportInPlace */
-      x.apply(activation_prime_fn, ret_derivative);
-      ret_derivative.multiply_i_strided(derivative);
+      t_out.apply(activation_prime_fn, outgoing_derivative);
+      outgoing_derivative.multiply_i_strided(incoming_derivative);
 
-      return ret_derivative;
+      return outgoing_derivative;
     };
   } else {
     _act_prime_fn =
-      [activation_prime_fn](Tensor &x, Tensor &ret_derivative,
-                            Tensor const &derivative) -> Tensor & {
-      x.apply(activation_prime_fn, x);
-      derivative.multiply_strided(x, ret_derivative);
+      [activation_prime_fn](Tensor const &t_in, Tensor &t_out,
+                            Tensor &outgoing_derivative,
+                            Tensor const &incoming_derivative) -> Tensor & {
+      t_out.apply(activation_prime_fn, t_out);
+      incoming_derivative.multiply_strided(t_out, outgoing_derivative);
 
-      return ret_derivative;
+      return outgoing_derivative;
     };
   }
 
@@ -126,6 +148,10 @@ void ActiFunc::setActiFunc(ActivationType acti_type) {
   case ActivationType::ACT_LEAKY_RELU:
     this->setActivation(leakyRelu, leakyReluPrime);
     break;
+  case ActivationType::ACT_SWISH:
+    in_place = false;
+    this->setActivation(swish, swishPrime);
+    break;
   case ActivationType::ACT_NONE:
     this->setActivation(no_op, no_op_prime);
     break;
@@ -139,9 +165,16 @@ void ActiFunc::run_fn(Tensor const &input, Tensor &output) {
   _act_fn(input, output);
 }
 
+Tensor &ActiFunc::run_prime_fn(Tensor &input, Tensor &output,
+                               Tensor &outgoing_derivative,
+                               Tensor const &incoming_derivative) {
+  return _act_prime_fn(input, output, outgoing_derivative, incoming_derivative);
+}
+
 Tensor &ActiFunc::run_prime_fn(Tensor &output, Tensor &outgoing_derivative,
                                Tensor const &incoming_derivative) {
-  return _act_prime_fn(output, outgoing_derivative, incoming_derivative);
+  return _act_prime_fn(Tensor(), output, outgoing_derivative,
+                       incoming_derivative);
 }
 
 bool ActiFunc::supportInPlace() const { return in_place; }
@@ -299,6 +332,30 @@ float ActiFunc::leakyReluPrime(float x) {
   return x >= 0.0f ? 1.0f : NEGATIVE_SLOPE;
 }
 
+Tensor &ActiFunc::swish(Tensor const &t_in, Tensor &t_out) {
+  t_in.apply([&](float x) { return sigmoid(x); }, t_out);
+  t_out.multiply_i(t_in);
+
+  return t_out;
+}
+
+Tensor &ActiFunc::swishPrime(Tensor const &t_in, Tensor const &t_out,
+                             Tensor &outgoing_derivative,
+                             Tensor const &incoming_derivative) {
+  if (outgoing_derivative.empty())
+    outgoing_derivative = Tensor(t_out.getDim());
+
+  Tensor tmp = Tensor(t_out.getDim());
+  t_in.apply([&](float x) { return sigmoid(x); }, outgoing_derivative);
+  t_out.apply([&](float x) { return 1 - x; }, tmp);
+  outgoing_derivative.multiply_i(tmp);
+  outgoing_derivative.add_i(t_out);
+
+  outgoing_derivative.multiply_i_strided(incoming_derivative);
+
+  return outgoing_derivative;
+}
+
 void ActiFunc::executeInPlace(bool val) {
   if (val && !supportInPlace())
     throw std::runtime_error("Error setting activation layer to work in-place");
index 10c5b8301d77013ab4322889df746094da61e17b..b6d14c19e615367f1fe2f5ec4ae6720022aa2a9b 100644 (file)
@@ -54,6 +54,19 @@ public:
    */
   void run_fn(Tensor const &input, Tensor &output);
 
+  /**
+   * @brief run prime function
+   *
+   * @param[in] input input
+   * @param[in] output output
+   * @param[out] outgoing_derivative outgoing derivative
+   * @param[in] incoming_derivative incoming derivative
+   * @retVal    Tensor
+   */
+  Tensor &run_prime_fn(Tensor &input, Tensor &output,
+                       Tensor &outgoing_derivative,
+                       Tensor const &incoming_derivative);
+
   /**
    * @brief run prime function
    *
@@ -158,6 +171,24 @@ public:
    */
   static float leakyReluPrime(float x);
 
+  /**
+   * @brief     swish activation function
+   * @param[in] t_in input tensor
+   * @param[in] t_out output tensor
+   */
+  static Tensor &swish(Tensor const &t_in, Tensor &t_out);
+
+  /**
+   * @brief     derivative swish function
+   * @param[in] t_in input tensor
+   * @param[in] t_out output tensor
+   * @param[in] outgoing_derivative outgoing derivative
+   * @param[in] incoming_derivative incoming derivative
+   */
+  static Tensor &swishPrime(Tensor const &t_in, Tensor const &t_out,
+                            Tensor &outgoing_derivative,
+                            Tensor const &incoming_derivative = Tensor());
+
   /**
    * @brief setActivation by custom activation function
    * @note  apply derivative as this activation_prime_fn does not utilize
@@ -187,6 +218,19 @@ public:
     std::function<Tensor &(Tensor &, Tensor &, Tensor const &)> const
       &activation_prime_fn);
 
+  /**
+   * @brief setActivation by custom activation function
+   * @note  derivative not applied here as this activation_prime_fn applies
+   * derivative itself
+   * @param[in] activation_fn activation function to be used
+   * @param[in] activtion_prime_fn activation prime function to be used
+   * @retval #ML_ERROR_NONE when successful
+   */
+  int setActivation(
+    std::function<Tensor &(Tensor const &, Tensor &)> const &activation_fn,
+    std::function<Tensor &(Tensor const &, Tensor const &, Tensor &,
+                           Tensor const &)> const &activation_prime_fn);
+
   /**
    * @brief setActivation by custom activation function
    * @note  apply derivative as this activation_prime_fn does not utilize
@@ -201,6 +245,20 @@ public:
     std::function<float(float const)> const &activation_fn,
     std::function<float(float const)> const &activation_prime_fn);
 
+  /**
+   * @brief setActivation by custom activation function
+   * @note  apply derivative as this activation_prime_fn does not utilize
+   * derivative
+   * @param[in] std::function<float(float const)> activation_fn activation
+   * function to be used
+   * @param[in] std::function<float(float const, float const)>
+   * activation_prime_fn activation_prime_function to be used
+   * @retval #ML_ERROR_NONE when successful
+   */
+  int setActivation(
+    std::function<float(float const)> const &activation_fn,
+    std::function<float(float const, float const)> const &activation_prime_fn);
+
   /**
    * @brief   Notify that this layer will execute in-place
    *
@@ -210,7 +268,8 @@ public:
 
 private:
   std::function<Tensor &(Tensor const &, Tensor &)> _act_fn;
-  std::function<Tensor &(Tensor &, Tensor &, Tensor const &)> _act_prime_fn;
+  std::function<Tensor &(Tensor const &, Tensor &, Tensor &, Tensor const &)>
+    _act_prime_fn; /**< prime function with input and output*/
 
   ActivationType
     activation_type; /**< type of the activation represented by this */
index 6f4a0388536247e824503b30d958563b2ad1a88b..484b3feb4b70069ec113ebf17e4fc068135072b9 100644 (file)
@@ -69,9 +69,10 @@ void ActivationLayer::forwarding(RunLayerContext &context, bool training) {
 void ActivationLayer::calcDerivative(RunLayerContext &context) {
   const Tensor &deriv = context.getIncomingDerivative(SINGLE_INOUT_IDX);
   Tensor &ret = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
+  Tensor &in = context.getInput(SINGLE_INOUT_IDX);
   Tensor &out = context.getOutput(SINGLE_INOUT_IDX);
 
-  acti_func.run_prime_fn(out, ret, deriv);
+  acti_func.run_prime_fn(in, out, ret, deriv);
 }
 
 void ActivationLayer::exportTo(Exporter &exporter,
index bf42d0fc4bf23603b5b68e58d80d57031ea6a4d2..143dce6790dd331ca9895b3bd901c1d732b80bf1 100644 (file)
@@ -33,6 +33,7 @@ enum class ActivationType {
   ACT_TANH,       /**< tanh */
   ACT_SIGMOID,    /**< sigmoid */
   ACT_RELU,       /**< ReLU */
+  ACT_SWISH,      /**< Swish */
   ACT_SOFTMAX,    /**< softmax */
   ACT_LEAKY_RELU, /**< Leaky ReLU */
   ACT_NONE,       /**< no op */
@@ -839,12 +840,12 @@ public:
 struct ActivationTypeInfo {
   using Enum = nntrainer::ActivationType;
   static constexpr std::initializer_list<Enum> EnumList = {
-    Enum::ACT_TANH,    Enum::ACT_SIGMOID,    Enum::ACT_RELU,
-    Enum::ACT_SOFTMAX, Enum::ACT_LEAKY_RELU, Enum::ACT_NONE,
-    Enum::ACT_UNKNOWN};
+    Enum::ACT_TANH,       Enum::ACT_SIGMOID, Enum::ACT_RELU, Enum::ACT_SOFTMAX,
+    Enum::ACT_LEAKY_RELU, Enum::ACT_SWISH,   Enum::ACT_NONE, Enum::ACT_UNKNOWN};
 
-  static constexpr const char *EnumStr[] = {
-    "tanh", "sigmoid", "relu", "softmax", "leaky_relu", "none", "unknown"};
+  static constexpr const char *EnumStr[] = {"tanh",    "sigmoid",    "relu",
+                                            "softmax", "leaky_relu", "swish",
+                                            "none",    "unknown"};
 };
 
 /**
index 1fe62884312daad2d233b21c823e70ca7da4dfc9..a43f8fc94e2bfff1848fa38f87c584f617fb5fd8 100644 (file)
@@ -21,6 +21,11 @@ auto semantic_activation_relu = LayerSemanticsParamType(
   nntrainer::ActivationLayer::type, {"activation=relu"},
   LayerCreateSetPropertyOptions::AVAILABLE_FROM_APP_CONTEXT, false, 1);
 
+auto semantic_activation_swish = LayerSemanticsParamType(
+  nntrainer::createLayer<nntrainer::ActivationLayer>,
+  nntrainer::ActivationLayer::type, {"activation=swish"},
+  LayerCreateSetPropertyOptions::AVAILABLE_FROM_APP_CONTEXT, false, 1);
+
 auto semantic_activation_sigmoid = LayerSemanticsParamType(
   nntrainer::createLayer<nntrainer::ActivationLayer>,
   nntrainer::ActivationLayer::type, {"activation=sigmoid"},
@@ -41,9 +46,8 @@ auto semantic_activation_none = LayerSemanticsParamType(
   nntrainer::ActivationLayer::type, {"activation=none"},
   LayerCreateSetPropertyOptions::AVAILABLE_FROM_APP_CONTEXT, false, 1);
 
-GTEST_PARAMETER_TEST(Activation, LayerSemantics,
-                     ::testing::Values(semantic_activation_relu,
-                                       semantic_activation_sigmoid,
-                                       semantic_activation_softmax,
-                                       semantic_activation_tanh,
-                                       semantic_activation_none));
+GTEST_PARAMETER_TEST(
+  Activation, LayerSemantics,
+  ::testing::Values(semantic_activation_relu, semantic_activation_swish,
+                    semantic_activation_sigmoid, semantic_activation_softmax,
+                    semantic_activation_tanh, semantic_activation_none));
index 7e9aa3b908e76dc25a08b8a6b32634e272fc12d6..25c1bfbae2f321cc0ef6e4ed85cdc49f970e5104 100644 (file)
@@ -289,6 +289,65 @@ TEST(nntrainer_activation, reluPrime_01_p) {
   }
 }
 
+TEST(nntrainer_activation, swish_01_p) {
+  int batch = 3;
+  int channel = 1;
+  int height = 1;
+  int width = 10;
+  float answer[30] = {
+    -0.16052495, -0.12766725, -0.0900332,  -0.04750208, 0,
+    0.05249792,  0.10996679,  0.17233276,  0.23947506,  0.31122968,
+    -0.24802041, -0.21260624, -0.16052495, -0.0900332,  0,
+    0.10996679,  0.23947506,  0.3873938,   0.5519796,   0.73105854,
+    -0.27777028, -0.26014543, -0.21260624, -0.12766725, 0,
+    0.17233276,  0.3873938,   0.6398545,   0.9222298,   1.2263616};
+
+  nntrainer::Tensor input(batch, channel, height, width);
+  GEN_TEST_INPUT(input, (l - 4) * 0.1 * (i + 1));
+
+  nntrainer::Tensor results(batch, channel, height, width);
+  results = nntrainer::ActiFunc::swish(input, results);
+
+  float *data = results.getData();
+  ASSERT_NE(nullptr, data);
+  float *indata = input.getData();
+  ASSERT_NE(nullptr, indata);
+
+  for (int i = 0; i < batch * height * width; ++i) {
+    EXPECT_NEAR(data[i], answer[i], tolerance);
+  }
+}
+
+TEST(nntrainer_activation, swishPrime_01_p) {
+
+  int batch = 3;
+  int channel = 1;
+  int height = 1;
+  int width = 10;
+  float answer[30] = {
+    0.30520803, 0.35221997, 0.40066269, 0.45008320, 0.50000000, 0.54991674,
+    0.59933728, 0.64778000, 0.69479191, 0.73996115, 0.13889773, 0.21707317,
+    0.30520803, 0.40066269, 0.50000000, 0.59933728, 0.69479191, 0.78292680,
+    0.86110222, 0.92767054, 0.01800188, 0.10410020, 0.21707317, 0.35221997,
+    0.50000000, 0.64778000, 0.78292680, 0.89589977, 0.98199815, 1.04129410};
+
+  nntrainer::Tensor input(batch, channel, height, width);
+  GEN_TEST_INPUT(input, (l - 4) * 0.1 * (i + 1));
+
+  nntrainer::Tensor results(batch, channel, height, width);
+  nntrainer::ActiFunc::swish(input, results);
+
+  nntrainer::Tensor prime_results(batch, channel, height, width);
+  nntrainer::ActiFunc::swishPrime(input, results, prime_results);
+
+  float *data = prime_results.getData();
+  ASSERT_NE(nullptr, data);
+
+  for (int i = 0; i < batch * height * width; ++i) {
+    EXPECT_NEAR(data[i], answer[i], tolerance);
+  }
+}
+
 /**
  * @brief Main gtest
  */