[Layer] Introduce `upsample2d` layer
authorheka1024 <heka1024@gmail.com>
Sun, 9 Jun 2024 10:55:33 +0000 (19:55 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Tue, 2 Jul 2024 07:53:58 +0000 (16:53 +0900)
Add `upsample2d` layer in nntrainer. This could be used in YOLO or other layers.

**Self evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test: [X]Passed [ ]Failed [ ]Skipped

Co-authored-by: Boseong Seo <suzy13549@snu.ac.kr>
Co-authored-by: kimhan0515 <kimhan0515@gmail.com>
Signed-off-by: heka1024 <heka1024@gmail.com>
api/ccapi/include/layer.h
nntrainer/app_context.cpp
nntrainer/layers/common_properties.h
nntrainer/layers/meson.build
nntrainer/layers/upsample2d_layer.cpp [new file with mode: 0644]
nntrainer/layers/upsample2d_layer.h [new file with mode: 0644]
packaging/unittest_layers.tar.gz
test/input_gen/gen_layer_tests.py
test/unittest/layers/meson.build
test/unittest/layers/unittest_layers_upsample2d.cpp [new file with mode: 0644]

index 81afe86ee2797405f208f68b0f62bdc3896f0225..9414f0168000d231541115f1e197b39bc0080f68 100644 (file)
@@ -100,6 +100,7 @@ enum LayerType {
   LAYER_REDUCE_MEAN,              /**< Reduce mean Layer type */
   LAYER_LOSS_CONSTANT_DERIVATIVE, /**< Synthetic loss layer to feed constant
                                      derivative */
+  LAYER_UPSAMPLE2D,               /**< Upsample 2D Layer type */
   LAYER_UNKNOWN = ML_TRAIN_LAYER_TYPE_UNKNOWN /**< Unknown */
 };
 
@@ -547,6 +548,14 @@ Identity(const std::vector<std::string> &properties = {}) {
   return createLayer(LayerType::LAYER_IDENTITY, properties);
 }
 
+/**
+ * @brief Helper function to create Upsample2d layer
+ */
+inline std::unique_ptr<Layer>
+Upsample2D(const std::vector<std::string> &properties = {}) {
+  return createLayer(LayerType::LAYER_UPSAMPLE2D, properties);
+}
+
 /**
  * @brief Helper function to create activation layer
  */
index 7c14fd8ae2339ddd9dfd3be191e19a42d949cbdf..fad75f9ae0525968d279ac8bba0ab19236b8b586 100644 (file)
@@ -73,6 +73,7 @@
 #include <rnncell.h>
 #include <split_layer.h>
 #include <time_dist.h>
+#include <upsample2d_layer.h>
 #include <zoneout_lstmcell.h>
 
 #ifdef ENABLE_TFLITE_BACKBONE
@@ -306,6 +307,8 @@ static void add_default_object(AppContext &ac) {
                      LayerType::LAYER_POSITIONAL_ENCODING);
   ac.registerFactory(nntrainer::createLayer<IdentityLayer>, IdentityLayer::type,
                      LayerType::LAYER_IDENTITY);
+  ac.registerFactory(nntrainer::createLayer<Upsample2dLayer>,
+                     Upsample2dLayer::type, LayerType::LAYER_UPSAMPLE2D);
 
 #ifdef ENABLE_NNSTREAMER_BACKBONE
   ac.registerFactory(nntrainer::createLayer<NNStreamerLayer>,
index 3c5d73300df9ce27a9e5781a3898c5589458b92e..33802beacfd7358cee3343c40f3000282d468625 100644 (file)
@@ -869,10 +869,9 @@ struct ActivationTypeInfo {
     Enum::ACT_GELU,    Enum::ACT_QUICK_GELU, Enum::ACT_NONE,
     Enum::ACT_UNKNOWN};
 
-  static constexpr const char *EnumStr[] = {"tanh",    "sigmoid",    "relu",
-                                            "softmax", "leaky_relu", "swish",
-                                            "gelu",    "quick_gelu", "none",
-                                            "unknown"};
+  static constexpr const char *EnumStr[] = {
+    "tanh",  "sigmoid", "relu",       "softmax", "leaky_relu",
+    "swish", "gelu",    "quick_gelu", "none",    "unknown"};
 };
 
 /**
@@ -1068,6 +1067,34 @@ public:
   static constexpr const char *key = "weight_regularizer";
 };
 
+/**
+ * @brief     Enumeration of upsample type
+ * @todo Support torch and keras supported modes like bicubic
+ */
+struct UpsampleModeInfo {
+  /**
+   * @brief   Upsampling operation type class
+   */
+  enum class Interpolation { nearest, bilinear };
+
+  using Enum = Interpolation;
+  
+  static constexpr std::initializer_list<Interpolation> EnumList = {
+    Interpolation::nearest, Interpolation::bilinear};
+
+  static constexpr const char *EnumStr[] = {"nearest", "bilinear"};
+};
+
+/**
+ * @brief Upsample Type Enumeration Information
+ *
+ */
+class UpsampleMode final : public EnumProperty<UpsampleModeInfo> {
+public:
+  using prop_tag = enum_class_prop_tag;
+  static constexpr const char *key = "upsample";
+};
+
 /**
  * @brief     Enumeration of pooling type
  */
index 9d42a3e34bdbe3c3cd236480342ff69d13ffaf92..75a59605ab902cad91a4829199b471702e147742 100644 (file)
@@ -42,7 +42,8 @@ layer_sources = [
   'reshape_layer.cpp',
   'reduce_mean_layer.cpp',
   'positional_encoding_layer.cpp',
-  'identity_layer.cpp'
+  'identity_layer.cpp',
+  'upsample2d_layer.cpp'
 ]
 
 layer_headers = [
diff --git a/nntrainer/layers/upsample2d_layer.cpp b/nntrainer/layers/upsample2d_layer.cpp
new file mode 100644 (file)
index 0000000..e4a7fb2
--- /dev/null
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2024 heka1024 <heka1024@gmail.com>
+ *
+ * @file   upsample2d_layer.h
+ * @date   15 June 2024
+ * @brief  It is a implementation of upsample layer for given size and
+ * interpolation method
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author heka1024 <heka1024@gmail.com>
+ * @bug    No known bugs except for NYI items
+ */
+
+#include <layer_context.h>
+#include <node_exporter.h>
+#include <upsample2d_layer.h>
+
+namespace nntrainer {
+static constexpr size_t SINGLE_INOUT_IDX = 0;
+
+Upsample2dLayer::Upsample2dLayer() :
+  Layer(),
+  upsample2d_props(props::UpsampleMode(),
+                   std::array<props::KernelSize, UPSAMPLE2D_DIM>()) {}
+
+void Upsample2dLayer::finalize(nntrainer::InitLayerContext &context) {
+  std::vector<nntrainer::TensorDim> dim = context.getInputDimensions();
+
+  const auto &kernel_size =
+    std::get<std::array<props::KernelSize, UPSAMPLE2D_DIM>>(upsample2d_props);
+
+  for (unsigned int i = 0; i < dim.size(); ++i) {
+    if (dim[i].getDataLen() == 0) {
+      throw std::invalid_argument("Input dimension is not set");
+    } else {
+      dim[i].channel(dim[i].channel());
+      dim[i].height(dim[i].height() * kernel_size[0]);
+      dim[i].width(dim[i].width() * kernel_size[1]);
+    }
+  }
+
+  context.setOutputDimensions(dim);
+}
+
+void Upsample2dLayer::forwarding(nntrainer::RunLayerContext &context,
+                                 bool training) {
+  nntrainer::Tensor &in = context.getInput(SINGLE_INOUT_IDX);
+  nntrainer::Tensor &out = context.getOutput(SINGLE_INOUT_IDX);
+
+  const auto &upsampling_type =
+    std::get<props::UpsampleMode>(upsample2d_props).get();
+  const auto &kernel_size =
+    std::get<std::array<props::KernelSize, UPSAMPLE2D_DIM>>(upsample2d_props);
+
+  switch (upsampling_type) {
+  case props::UpsampleModeInfo::Interpolation::nearest:
+    for (int b = 0; b < (int)out.batch(); b++) {
+      for (int c = 0; c < (int)out.channel(); c++) {
+        for (int h = 0; h < (int)out.height(); h++) {
+          for (int w = 0; w < (int)out.width(); w++) {
+            out.setValue(
+              b, c, h, w,
+              in.getValue(b, c, h / kernel_size[0], w / kernel_size[1]));
+          }
+        }
+      }
+    }
+    break;
+  case props::UpsampleModeInfo::Interpolation::bilinear: {
+    float scale_h = kernel_size[0];
+    float scale_w = kernel_size[1];
+
+    for (int b = 0; b < (int)out.batch(); b++) {
+      for (int c = 0; c < (int)out.channel(); c++) {
+        for (int h = 0; h < (int)out.height(); h++) {
+          for (int w = 0; w < (int)out.width(); w++) {
+            float x_in = (w + 0.5f) / scale_w - 0.5f;
+            float y_in = (h + 0.5f) / scale_h - 0.5f;
+
+            if (x_in < 0) {
+              x_in = 0.0f;
+            }
+            if (y_in < 0) {
+              y_in = 0.0f;
+            }
+
+            int x0 = static_cast<int>(floor(x_in));
+            int y0 = static_cast<int>(floor(y_in));
+            int x1 = std::min(x0 + 1, (int)in.width() - 1);
+            int y1 = std::min(y0 + 1, (int)in.height() - 1);
+
+            float dx = x_in - x0;
+            float dy = y_in - y0;
+
+            float top = (1.0f - dx) * in.getValue(b, c, y1, x0) +
+                        dx * in.getValue(b, c, y1, x1);
+            float bottom = (1.0f - dx) * in.getValue(b, c, y0, x0) +
+                           dx * in.getValue(b, c, y0, x1);
+            float v = (1.0f - dy) * bottom + dy * top;
+            out.setValue(b, c, h, w, v);
+          }
+        }
+      }
+    }
+  } break;
+  default:
+    throw std::runtime_error("Error: Unknown Upsample Mode Type");
+  }
+}
+
+void Upsample2dLayer::calcDerivative(nntrainer::RunLayerContext &context) {
+  const nntrainer::Tensor &derivative_ =
+    context.getIncomingDerivative(SINGLE_INOUT_IDX);
+
+  nntrainer::Tensor &dx = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
+
+  const auto &kernel_size =
+    std::get<std::array<props::KernelSize, UPSAMPLE2D_DIM>>(upsample2d_props);
+  const auto &upsampling_type =
+    std::get<props::UpsampleMode>(upsample2d_props).get();
+
+  switch (upsampling_type) {
+  case props::UpsampleModeInfo::Interpolation::nearest: {
+    float val = 0;
+    for (int b = 0; b < (int)derivative_.batch(); b++) {
+      for (int c = 0; c < (int)derivative_.channel(); c++) {
+        for (int h = 0; h < (int)derivative_.height(); h++) {
+          for (int w = 0; w < (int)derivative_.width(); w++) {
+            if (h % kernel_size[0] == 0 && w % kernel_size[1] == 0) {
+              dx.setValue(b, c, h / kernel_size[0], w / kernel_size[1], 0);
+            }
+
+            val = dx.getValue(b, c, h / kernel_size[0], w / kernel_size[1]) +
+                  derivative_.getValue(b, c, h, w);
+            dx.setValue(b, c, h / kernel_size[0], w / kernel_size[1], val);
+          }
+        }
+      }
+    }
+  } break;
+  case props::UpsampleModeInfo::Interpolation::bilinear: {
+    dx.setZero();
+
+    int input_height = dx.height();
+    int input_width = dx.width();
+
+    for (int b = 0; b < (int)derivative_.batch(); b++) {
+      for (int c = 0; c < (int)derivative_.channel(); c++) {
+        for (int h = 0; h < (int)derivative_.height(); h++) {
+          for (int w = 0; w < (int)derivative_.width(); w++) {
+            float in_h = (h + 0.5f) / kernel_size[0] - 0.5f;
+            float in_w = (w + 0.5f) / kernel_size[1] - 0.5f;
+
+            if (in_h < 0) {
+              in_h = 0.0f;
+            }
+            if (in_w < 0) {
+              in_w = 0.0f;
+            }
+
+            int y0 = static_cast<int>(floor(in_h));
+            int x0 = static_cast<int>(floor(in_w));
+            int y1 = std::min(y0 + 1, input_height - 1);
+            int x1 = std::min(x0 + 1, input_width - 1);
+
+            float dx_ = (in_w - x0); // Due to name conflict with dx
+            float dy_ = (in_h - y0);
+
+            float top_left_weight = (1.0 - dy_) * (1.0 - dx_);
+            float top_right_weight = (1.0 - dy_) * dx_;
+            float bottom_left_weight = dy_ * (1.0 - dx_);
+            float bottom_right_weight = dy_ * dx_;
+
+            float grad = derivative_.getValue(b, c, h, w);
+
+            dx.addValue(b, c, y0, x0, top_left_weight * grad, 1.0f);
+            dx.addValue(b, c, y0, x1, top_right_weight * grad, 1.0f);
+            dx.addValue(b, c, y1, x0, bottom_left_weight * grad, 1.0f);
+            dx.addValue(b, c, y1, x1, bottom_right_weight * grad, 1.0f);
+          }
+        }
+      }
+    }
+  } break;
+  default:
+    throw std::runtime_error("Error: Unknown Upsample Mode Type");
+  }
+}
+
+void Upsample2dLayer::setProperty(const std::vector<std::string> &values) {
+  auto remain_props = loadProperties(values, upsample2d_props);
+
+  if (!remain_props.empty()) {
+    std::string msg = "[Upsample2dLayer] Unknown properties set with count" +
+                      std::to_string(values.size());
+    throw exception::not_supported(msg);
+  }
+}
+} // namespace nntrainer
diff --git a/nntrainer/layers/upsample2d_layer.h b/nntrainer/layers/upsample2d_layer.h
new file mode 100644 (file)
index 0000000..d0628a1
--- /dev/null
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2024 heka1024 <heka1024@gmail.com>
+ *
+ * @file   upsample2d_layer.h
+ * @date   15 June 2024
+ * @brief  This is Upsample2d Layer Class of Neural Network
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author heka1024 <heka1024@gmail.com>
+ * @bug    No known bugs except for NYI items
+ *
+ */
+
+#ifndef __UPSAMPLE2D_LAYER_H__
+#define __UPSAMPLE2D_LAYER_H__
+#ifdef __cplusplus
+
+#include <common_properties.h>
+#include <layer_impl.h>
+
+#include <node_exporter.h>
+
+namespace nntrainer {
+
+constexpr const unsigned int UPSAMPLE2D_DIM = 2;
+
+/**
+ * @class   Upsample2dLayer
+ * @brief   Upsamle 2d layer
+ */
+class Upsample2dLayer : public Layer {
+public:
+  /**
+   * @brief Construct a new Upsample layer object
+   *
+   */
+  Upsample2dLayer();
+
+  /**
+   * @brief Destroy the Upsample layer object
+   *
+   */
+  ~Upsample2dLayer() {}
+
+  /**
+   * @copydoc Layer::finalize(InitLayerContext &context)
+   */
+  void finalize(nntrainer::InitLayerContext &context) override;
+
+  /**
+   * @copydoc Layer::forwarding(RunLayerContext &context, bool training)
+   */
+  void forwarding(nntrainer::RunLayerContext &context, bool training) override;
+
+  /**
+   * @copydoc Layer::calcDerivative(RunLayerContext &context)
+   */
+  void calcDerivative(nntrainer::RunLayerContext &context) override;
+
+  /**
+   * @copydoc bool supportBackwarding() const
+   */
+  bool supportBackwarding() const override { return true; };
+
+  /**
+   * @copydoc Layer::exportTo(Exporter &exporter, ExportMethods method)
+   */
+  void exportTo(nntrainer::Exporter &exporter,
+                const ml::train::ExportMethods &method) const override{};
+
+  /**
+   * @copydoc Layer::getType()
+   */
+  const std::string getType() const override { return Upsample2dLayer::type; };
+
+  /**
+   * @copydoc Layer::setProperty(const std::vector<std::string> &values)
+   */
+  void setProperty(const std::vector<std::string> &values) override;
+
+  inline static const std::string type = "upsample2d";
+
+private:
+  std::tuple<props::UpsampleMode, std::array<props::KernelSize, UPSAMPLE2D_DIM>>
+    upsample2d_props; /* mode, size of kernel */
+};
+} // namespace nntrainer
+
+#endif /* __cplusplus */
+#endif /* __UPSAMPLE2D_LAYER_H__ */
index 7a435aadf450e91772b7af0e61af527a3b404952..12f3b86d7f84aa437fe7140d349e74ef8cd4bf19 100644 (file)
Binary files a/packaging/unittest_layers.tar.gz and b/packaging/unittest_layers.tar.gz differ
index 99017d071fcfb8b8ee41bbf209622e24b2f28d8a..ccb8f223c620dbdf096e07cba89e9a28114b2732 100644 (file)
@@ -172,6 +172,21 @@ if __name__ == "__main__":
     record_single(conv, (1, 3, 11, 11), "conv2d_sb_same_dilation")
     record_single(conv, (3, 3, 11, 11), "conv2d_mb_same_dilation")
 
+    conv = K.layers.UpSampling2D(size=(2, 2), interpolation="nearest", input_shape=(2, 2, 1))
+    record_single(conv, (1, 2, 2, 1), "upsample2d_2x2_nearest")  # input_shape: n h w c
+
+    conv = K.layers.UpSampling2D(size=(3, 3), interpolation="nearest", input_shape=(3, 3, 1))
+    record_single(conv, (1, 3, 3, 1), "upsample2d_3x3_nearest")
+
+    conv = K.layers.UpSampling2D(size=(2, 2), interpolation="bilinear", input_shape=(2, 2, 1))
+    record_single(conv, (1, 2, 2, 1), "upsample2d_2x2_bilinear")  # input_shape: n h w c
+
+    conv = K.layers.UpSampling2D(size=(3, 3), interpolation="bilinear", input_shape=(3, 3, 1))
+    record_single(conv, (1, 3, 3, 1), "upsample2d_3x3_bilinear")
+
+    conv = K.layers.UpSampling2D(size=(4, 4), interpolation="bilinear", input_shape=(10, 10, 1))
+    record_single(conv, (1, 10, 10, 1), "upsample2d_big_bilinear")
+
     # use float data to generate input here
     attention = K.layers.Attention()
     record_single(
index 5aae748adfae852cdedabf5c0a0524c05cde7d84..59992b17f607811bbb30bc8494e505b0525f4a59 100644 (file)
@@ -64,6 +64,7 @@ test_target = [
   # 'unittest_layers_mol_attention.cpp',
   'unittest_layers_multi_head_attention.cpp',
   'unittest_layers_positional_encoding.cpp',
+  'unittest_layers_upsample2d.cpp'
 ]
 
 if get_option('enable-tflite-backbone')
diff --git a/test/unittest/layers/unittest_layers_upsample2d.cpp b/test/unittest/layers/unittest_layers_upsample2d.cpp
new file mode 100644 (file)
index 0000000..0435c01
--- /dev/null
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2024 heka1024 <heka1024@gmail.com>
+ *
+ * @file unittest_layers_upsample2d.cppp
+ * @date 15 June 2024
+ * @brief Unit test for upsample2d layer
+ * @see        https://github.com/nnstreamer/nntrainer
+ * @author heka1024 <heka1024@gmail.com>
+ * @bug No known bugs except for NYI items
+ */
+#include <tuple>
+
+#include <gtest/gtest.h>
+
+#include <layers_common_tests.h>
+#include <upsample2d_layer.h>
+
+auto semantic_upsample2d = LayerSemanticsParamType(
+  nntrainer::createLayer<nntrainer::Upsample2dLayer>,
+  nntrainer::Upsample2dLayer::type, {"upsample=nearest", "kernel_size=2,2"},
+  LayerCreateSetPropertyOptions::AVAILABLE_FROM_APP_CONTEXT, false, 1);
+
+GTEST_PARAMETER_TEST(Upsample2D, LayerSemantics,
+                     ::testing::Values(semantic_upsample2d));
+
+auto upsampling_2x2_nearest = LayerGoldenTestParamType(
+  nntrainer::createLayer<nntrainer::Upsample2dLayer>,
+  {"upsample=nearest", "kernel_size=2,2"}, "1:1:2:2",
+  "upsample2d_2x2_nearest.nnlayergolden", LayerGoldenTestParamOptions::DEFAULT,
+  "nchw", "fp32", "fp32");
+
+auto upsampling_3x3_nearest = LayerGoldenTestParamType(
+  nntrainer::createLayer<nntrainer::Upsample2dLayer>,
+  {"upsample=nearest", "kernel_size=3,3"}, "1:1:3:3",
+  "upsample2d_3x3_nearest.nnlayergolden", LayerGoldenTestParamOptions::DEFAULT,
+  "nchw", "fp32", "fp32");
+
+auto upsampling_2x2_bilinear = LayerGoldenTestParamType(
+  nntrainer::createLayer<nntrainer::Upsample2dLayer>,
+  {"upsample=bilinear", "kernel_size=2,2"}, "1:1:2:2",
+  "upsample2d_2x2_bilinear.nnlayergolden", LayerGoldenTestParamOptions::DEFAULT,
+  "nchw", "fp32", "fp32");
+
+auto upsampling_3x3_bilinear = LayerGoldenTestParamType(
+  nntrainer::createLayer<nntrainer::Upsample2dLayer>,
+  {"upsample=bilinear", "kernel_size=3,3"}, "1:1:3:3",
+  "upsample2d_3x3_bilinear.nnlayergolden", LayerGoldenTestParamOptions::DEFAULT,
+  "nchw", "fp32", "fp32");
+
+auto upsampling_big_bilinear = LayerGoldenTestParamType(
+  nntrainer::createLayer<nntrainer::Upsample2dLayer>,
+  {"upsample=bilinear", "kernel_size=4,4"}, "1:1:10:10",
+  "upsample2d_big_bilinear.nnlayergolden", LayerGoldenTestParamOptions::DEFAULT,
+  "nchw", "fp32", "fp32");
+
+GTEST_PARAMETER_TEST(Upsample2D, LayerGoldenTest,
+                     ::testing::Values(upsampling_2x2_nearest,
+                                       upsampling_3x3_nearest,
+                                       upsampling_2x2_bilinear,
+                                       upsampling_3x3_bilinear,
+                                       upsampling_big_bilinear));