[Layer] Introduce `upsample2d` layer

author heka1024 <heka1024@gmail.com>

Sun, 9 Jun 2024 10:55:33 +0000 (19:55 +0900)

committer Jijoong Moon <jijoong.moon@samsung.com>

Tue, 2 Jul 2024 07:53:58 +0000 (16:53 +0900)
author heka1024 <heka1024@gmail.com>
Sun, 9 Jun 2024 10:55:33 +0000 (19:55 +0900)
committer Jijoong Moon <jijoong.moon@samsung.com>
Tue, 2 Jul 2024 07:53:58 +0000 (16:53 +0900)
diff --git a/api/ccapi/include/layer.h b/api/ccapi/include/layer.h

index 81afe86ee2797405f208f68b0f62bdc3896f0225..9414f0168000d231541115f1e197b39bc0080f68 100644 (file)
--- a/api/ccapi/include/layer.h
+++ b/api/ccapi/include/layer.h
@@ -100,6 +100,7 @@ enum LayerType {
    LAYER_REDUCE_MEAN,              /**< Reduce mean Layer type */
    LAYER_LOSS_CONSTANT_DERIVATIVE, /**< Synthetic loss layer to feed constant
                                       derivative */
+  LAYER_UPSAMPLE2D,               /**< Upsample 2D Layer type */
    LAYER_UNKNOWN = ML_TRAIN_LAYER_TYPE_UNKNOWN /**< Unknown */
  };
  
@@ -547,6 +548,14 @@ Identity(const std::vector<std::string> &properties = {}) {
    return createLayer(LayerType::LAYER_IDENTITY, properties);
  }
  
+/**
+ * @brief Helper function to create Upsample2d layer
+ */
+inline std::unique_ptr<Layer>
+Upsample2D(const std::vector<std::string> &properties = {}) {
+  return createLayer(LayerType::LAYER_UPSAMPLE2D, properties);
+}
+
  /**
   * @brief Helper function to create activation layer
   */
diff --git a/nntrainer/app_context.cpp b/nntrainer/app_context.cpp

index 7c14fd8ae2339ddd9dfd3be191e19a42d949cbdf..fad75f9ae0525968d279ac8bba0ab19236b8b586 100644 (file)
--- a/nntrainer/app_context.cpp
+++ b/nntrainer/app_context.cpp
@@ -73,6 +73,7 @@
  #include <rnncell.h>
  #include <split_layer.h>
  #include <time_dist.h>
+#include <upsample2d_layer.h>
  #include <zoneout_lstmcell.h>
  
  #ifdef ENABLE_TFLITE_BACKBONE
@@ -306,6 +307,8 @@ static void add_default_object(AppContext &ac) {
                       LayerType::LAYER_POSITIONAL_ENCODING);
    ac.registerFactory(nntrainer::createLayer<IdentityLayer>, IdentityLayer::type,
                       LayerType::LAYER_IDENTITY);
+  ac.registerFactory(nntrainer::createLayer<Upsample2dLayer>,
+                     Upsample2dLayer::type, LayerType::LAYER_UPSAMPLE2D);
  
  #ifdef ENABLE_NNSTREAMER_BACKBONE
    ac.registerFactory(nntrainer::createLayer<NNStreamerLayer>,
diff --git a/nntrainer/layers/common_properties.h b/nntrainer/layers/common_properties.h

index 3c5d73300df9ce27a9e5781a3898c5589458b92e..33802beacfd7358cee3343c40f3000282d468625 100644 (file)
--- a/nntrainer/layers/common_properties.h
+++ b/nntrainer/layers/common_properties.h
@@ -869,10 +869,9 @@ struct ActivationTypeInfo {
      Enum::ACT_GELU,    Enum::ACT_QUICK_GELU, Enum::ACT_NONE,
      Enum::ACT_UNKNOWN};
  
-  static constexpr const char *EnumStr[] = {"tanh",    "sigmoid",    "relu",
-                                            "softmax", "leaky_relu", "swish",
-                                            "gelu",    "quick_gelu", "none",
-                                            "unknown"};
+  static constexpr const char *EnumStr[] = {
+    "tanh",  "sigmoid", "relu",       "softmax", "leaky_relu",
+    "swish", "gelu",    "quick_gelu", "none",    "unknown"};
  };
  
  /**
@@ -1068,6 +1067,34 @@ public:
    static constexpr const char *key = "weight_regularizer";
  };
  
+/**
+ * @brief     Enumeration of upsample type
+ * @todo Support torch and keras supported modes like bicubic
+ */
+struct UpsampleModeInfo {
+  /**
+   * @brief   Upsampling operation type class
+   */
+  enum class Interpolation { nearest, bilinear };
+
+  using Enum = Interpolation;
+  
+  static constexpr std::initializer_list<Interpolation> EnumList = {
+    Interpolation::nearest, Interpolation::bilinear};
+
+  static constexpr const char *EnumStr[] = {"nearest", "bilinear"};
+};
+
+/**
+ * @brief Upsample Type Enumeration Information
+ *
+ */
+class UpsampleMode final : public EnumProperty<UpsampleModeInfo> {
+public:
+  using prop_tag = enum_class_prop_tag;
+  static constexpr const char *key = "upsample";
+};
+
  /**
   * @brief     Enumeration of pooling type
   */
diff --git a/nntrainer/layers/meson.build b/nntrainer/layers/meson.build

index 9d42a3e34bdbe3c3cd236480342ff69d13ffaf92..75a59605ab902cad91a4829199b471702e147742 100644 (file)
--- a/nntrainer/layers/meson.build
+++ b/nntrainer/layers/meson.build
@@ -42,7 +42,8 @@ layer_sources = [
    'reshape_layer.cpp',
    'reduce_mean_layer.cpp',
    'positional_encoding_layer.cpp',
-  'identity_layer.cpp'
+  'identity_layer.cpp',
+  'upsample2d_layer.cpp'
  ]
  
  layer_headers = [
diff --git a/nntrainer/layers/upsample2d_layer.cpp b/nntrainer/layers/upsample2d_layer.cpp

new file mode 100644 (file)

index 0000000..e4a7fb2
--- /dev/null
+++ b/nntrainer/layers/upsample2d_layer.cpp
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2024 heka1024 <heka1024@gmail.com>
+ *
+ * @file   upsample2d_layer.h
+ * @date   15 June 2024
+ * @brief  It is a implementation of upsample layer for given size and
+ * interpolation method
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author heka1024 <heka1024@gmail.com>
+ * @bug    No known bugs except for NYI items
+ */
+
+#include <layer_context.h>
+#include <node_exporter.h>
+#include <upsample2d_layer.h>
+
+namespace nntrainer {
+static constexpr size_t SINGLE_INOUT_IDX = 0;
+
+Upsample2dLayer::Upsample2dLayer() :
+  Layer(),
+  upsample2d_props(props::UpsampleMode(),
+                   std::array<props::KernelSize, UPSAMPLE2D_DIM>()) {}
+
+void Upsample2dLayer::finalize(nntrainer::InitLayerContext &context) {
+  std::vector<nntrainer::TensorDim> dim = context.getInputDimensions();
+
+  const auto &kernel_size =
+    std::get<std::array<props::KernelSize, UPSAMPLE2D_DIM>>(upsample2d_props);
+
+  for (unsigned int i = 0; i < dim.size(); ++i) {
+    if (dim[i].getDataLen() == 0) {
+      throw std::invalid_argument("Input dimension is not set");
+    } else {
+      dim[i].channel(dim[i].channel());
+      dim[i].height(dim[i].height() * kernel_size[0]);
+      dim[i].width(dim[i].width() * kernel_size[1]);
+    }
+  }
+
+  context.setOutputDimensions(dim);
+}
+
+void Upsample2dLayer::forwarding(nntrainer::RunLayerContext &context,
+                                 bool training) {
+  nntrainer::Tensor &in = context.getInput(SINGLE_INOUT_IDX);
+  nntrainer::Tensor &out = context.getOutput(SINGLE_INOUT_IDX);
+
+  const auto &upsampling_type =
+    std::get<props::UpsampleMode>(upsample2d_props).get();
+  const auto &kernel_size =
+    std::get<std::array<props::KernelSize, UPSAMPLE2D_DIM>>(upsample2d_props);
+
+  switch (upsampling_type) {
+  case props::UpsampleModeInfo::Interpolation::nearest:
+    for (int b = 0; b < (int)out.batch(); b++) {
+      for (int c = 0; c < (int)out.channel(); c++) {
+        for (int h = 0; h < (int)out.height(); h++) {
+          for (int w = 0; w < (int)out.width(); w++) {
+            out.setValue(
+              b, c, h, w,
+              in.getValue(b, c, h / kernel_size[0], w / kernel_size[1]));
+          }
+        }
+      }
+    }
+    break;
+  case props::UpsampleModeInfo::Interpolation::bilinear: {
+    float scale_h = kernel_size[0];
+    float scale_w = kernel_size[1];
+
+    for (int b = 0; b < (int)out.batch(); b++) {
+      for (int c = 0; c < (int)out.channel(); c++) {
+        for (int h = 0; h < (int)out.height(); h++) {
+          for (int w = 0; w < (int)out.width(); w++) {
+            float x_in = (w + 0.5f) / scale_w - 0.5f;
+            float y_in = (h + 0.5f) / scale_h - 0.5f;
+
+            if (x_in < 0) {
+              x_in = 0.0f;
+            }
+            if (y_in < 0) {
+              y_in = 0.0f;
+            }
+
+            int x0 = static_cast<int>(floor(x_in));
+            int y0 = static_cast<int>(floor(y_in));
+            int x1 = std::min(x0 + 1, (int)in.width() - 1);
+            int y1 = std::min(y0 + 1, (int)in.height() - 1);
+
+            float dx = x_in - x0;
+            float dy = y_in - y0;
+
+            float top = (1.0f - dx) * in.getValue(b, c, y1, x0) +
+                        dx * in.getValue(b, c, y1, x1);
+            float bottom = (1.0f - dx) * in.getValue(b, c, y0, x0) +
+                           dx * in.getValue(b, c, y0, x1);
+            float v = (1.0f - dy) * bottom + dy * top;
+            out.setValue(b, c, h, w, v);
+          }
+        }
+      }
+    }
+  } break;
+  default:
+    throw std::runtime_error("Error: Unknown Upsample Mode Type");
+  }
+}
+
+void Upsample2dLayer::calcDerivative(nntrainer::RunLayerContext &context) {
+  const nntrainer::Tensor &derivative_ =
+    context.getIncomingDerivative(SINGLE_INOUT_IDX);
+
+  nntrainer::Tensor &dx = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
+
+  const auto &kernel_size =
+    std::get<std::array<props::KernelSize, UPSAMPLE2D_DIM>>(upsample2d_props);
+  const auto &upsampling_type =
+    std::get<props::UpsampleMode>(upsample2d_props).get();
+
+  switch (upsampling_type) {
+  case props::UpsampleModeInfo::Interpolation::nearest: {
+    float val = 0;
+    for (int b = 0; b < (int)derivative_.batch(); b++) {
+      for (int c = 0; c < (int)derivative_.channel(); c++) {
+        for (int h = 0; h < (int)derivative_.height(); h++) {
+          for (int w = 0; w < (int)derivative_.width(); w++) {
+            if (h % kernel_size[0] == 0 && w % kernel_size[1] == 0) {
+              dx.setValue(b, c, h / kernel_size[0], w / kernel_size[1], 0);
+            }
+
+            val = dx.getValue(b, c, h / kernel_size[0], w / kernel_size[1]) +
+                  derivative_.getValue(b, c, h, w);
+            dx.setValue(b, c, h / kernel_size[0], w / kernel_size[1], val);
+          }
+        }
+      }
+    }
+  } break;
+  case props::UpsampleModeInfo::Interpolation::bilinear: {
+    dx.setZero();
+
+    int input_height = dx.height();
+    int input_width = dx.width();
+
+    for (int b = 0; b < (int)derivative_.batch(); b++) {
+      for (int c = 0; c < (int)derivative_.channel(); c++) {
+        for (int h = 0; h < (int)derivative_.height(); h++) {
+          for (int w = 0; w < (int)derivative_.width(); w++) {
+            float in_h = (h + 0.5f) / kernel_size[0] - 0.5f;
+            float in_w = (w + 0.5f) / kernel_size[1] - 0.5f;
+
+            if (in_h < 0) {
+              in_h = 0.0f;
+            }
+            if (in_w < 0) {
+              in_w = 0.0f;
+            }
+
+            int y0 = static_cast<int>(floor(in_h));
+            int x0 = static_cast<int>(floor(in_w));
+            int y1 = std::min(y0 + 1, input_height - 1);
+            int x1 = std::min(x0 + 1, input_width - 1);
+
+            float dx_ = (in_w - x0); // Due to name conflict with dx
+            float dy_ = (in_h - y0);
+
+            float top_left_weight = (1.0 - dy_) * (1.0 - dx_);
+            float top_right_weight = (1.0 - dy_) * dx_;
+            float bottom_left_weight = dy_ * (1.0 - dx_);
+            float bottom_right_weight = dy_ * dx_;
+
+            float grad = derivative_.getValue(b, c, h, w);
+
+            dx.addValue(b, c, y0, x0, top_left_weight * grad, 1.0f);
+            dx.addValue(b, c, y0, x1, top_right_weight * grad, 1.0f);
+            dx.addValue(b, c, y1, x0, bottom_left_weight * grad, 1.0f);
+            dx.addValue(b, c, y1, x1, bottom_right_weight * grad, 1.0f);
+          }
+        }
+      }
+    }
+  } break;
+  default:
+    throw std::runtime_error("Error: Unknown Upsample Mode Type");
+  }
+}
+
+void Upsample2dLayer::setProperty(const std::vector<std::string> &values) {
+  auto remain_props = loadProperties(values, upsample2d_props);
+
+  if (!remain_props.empty()) {
+    std::string msg = "[Upsample2dLayer] Unknown properties set with count" +
+                      std::to_string(values.size());
+    throw exception::not_supported(msg);
+  }
+}
+} // namespace nntrainer
diff --git a/nntrainer/layers/upsample2d_layer.h b/nntrainer/layers/upsample2d_layer.h

new file mode 100644 (file)

index 0000000..d0628a1
--- /dev/null
+++ b/nntrainer/layers/upsample2d_layer.h
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2024 heka1024 <heka1024@gmail.com>
+ *
+ * @file   upsample2d_layer.h
+ * @date   15 June 2024
+ * @brief  This is Upsample2d Layer Class of Neural Network
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author heka1024 <heka1024@gmail.com>
+ * @bug    No known bugs except for NYI items
+ *
+ */
+
+#ifndef __UPSAMPLE2D_LAYER_H__
+#define __UPSAMPLE2D_LAYER_H__
+#ifdef __cplusplus
+
+#include <common_properties.h>
+#include <layer_impl.h>
+
+#include <node_exporter.h>
+
+namespace nntrainer {
+
+constexpr const unsigned int UPSAMPLE2D_DIM = 2;
+
+/**
+ * @class   Upsample2dLayer
+ * @brief   Upsamle 2d layer
+ */
+class Upsample2dLayer : public Layer {
+public:
+  /**
+   * @brief Construct a new Upsample layer object
+   *
+   */
+  Upsample2dLayer();
+
+  /**
+   * @brief Destroy the Upsample layer object
+   *
+   */
+  ~Upsample2dLayer() {}
+
+  /**
+   * @copydoc Layer::finalize(InitLayerContext &context)
+   */
+  void finalize(nntrainer::InitLayerContext &context) override;
+
+  /**
+   * @copydoc Layer::forwarding(RunLayerContext &context, bool training)
+   */
+  void forwarding(nntrainer::RunLayerContext &context, bool training) override;
+
+  /**
+   * @copydoc Layer::calcDerivative(RunLayerContext &context)
+   */
+  void calcDerivative(nntrainer::RunLayerContext &context) override;
+
+  /**
+   * @copydoc bool supportBackwarding() const
+   */
+  bool supportBackwarding() const override { return true; };
+
+  /**
+   * @copydoc Layer::exportTo(Exporter &exporter, ExportMethods method)
+   */
+  void exportTo(nntrainer::Exporter &exporter,
+                const ml::train::ExportMethods &method) const override{};
+
+  /**
+   * @copydoc Layer::getType()
+   */
+  const std::string getType() const override { return Upsample2dLayer::type; };
+
+  /**
+   * @copydoc Layer::setProperty(const std::vector<std::string> &values)
+   */
+  void setProperty(const std::vector<std::string> &values) override;
+
+  inline static const std::string type = "upsample2d";
+
+private:
+  std::tuple<props::UpsampleMode, std::array<props::KernelSize, UPSAMPLE2D_DIM>>
+    upsample2d_props; /* mode, size of kernel */
+};
+} // namespace nntrainer
+
+#endif /* __cplusplus */
+#endif /* __UPSAMPLE2D_LAYER_H__ */
diff --git a/packaging/unittest_layers.tar.gz b/packaging/unittest_layers.tar.gz

index 7a435aadf450e91772b7af0e61af527a3b404952..12f3b86d7f84aa437fe7140d349e74ef8cd4bf19 100644 (file)

Binary files a/packaging/unittest_layers.tar.gz and b/packaging/unittest_layers.tar.gz differ
diff --git a/test/input_gen/gen_layer_tests.py b/test/input_gen/gen_layer_tests.py

index 99017d071fcfb8b8ee41bbf209622e24b2f28d8a..ccb8f223c620dbdf096e07cba89e9a28114b2732 100644 (file)
--- a/test/input_gen/gen_layer_tests.py
+++ b/test/input_gen/gen_layer_tests.py
@@ -172,6 +172,21 @@ if __name__ == "__main__":
      record_single(conv, (1, 3, 11, 11), "conv2d_sb_same_dilation")
      record_single(conv, (3, 3, 11, 11), "conv2d_mb_same_dilation")
  
+    conv = K.layers.UpSampling2D(size=(2, 2), interpolation="nearest", input_shape=(2, 2, 1))
+    record_single(conv, (1, 2, 2, 1), "upsample2d_2x2_nearest")  # input_shape: n h w c
+
+    conv = K.layers.UpSampling2D(size=(3, 3), interpolation="nearest", input_shape=(3, 3, 1))
+    record_single(conv, (1, 3, 3, 1), "upsample2d_3x3_nearest")
+
+    conv = K.layers.UpSampling2D(size=(2, 2), interpolation="bilinear", input_shape=(2, 2, 1))
+    record_single(conv, (1, 2, 2, 1), "upsample2d_2x2_bilinear")  # input_shape: n h w c
+
+    conv = K.layers.UpSampling2D(size=(3, 3), interpolation="bilinear", input_shape=(3, 3, 1))
+    record_single(conv, (1, 3, 3, 1), "upsample2d_3x3_bilinear")
+
+    conv = K.layers.UpSampling2D(size=(4, 4), interpolation="bilinear", input_shape=(10, 10, 1))
+    record_single(conv, (1, 10, 10, 1), "upsample2d_big_bilinear")
+
      # use float data to generate input here
      attention = K.layers.Attention()
      record_single(
diff --git a/test/unittest/layers/meson.build b/test/unittest/layers/meson.build

index 5aae748adfae852cdedabf5c0a0524c05cde7d84..59992b17f607811bbb30bc8494e505b0525f4a59 100644 (file)
--- a/test/unittest/layers/meson.build
+++ b/test/unittest/layers/meson.build
@@ -64,6 +64,7 @@ test_target = [
    # 'unittest_layers_mol_attention.cpp',
    'unittest_layers_multi_head_attention.cpp',
    'unittest_layers_positional_encoding.cpp',
+  'unittest_layers_upsample2d.cpp'
  ]
  
  if get_option('enable-tflite-backbone')
diff --git a/test/unittest/layers/unittest_layers_upsample2d.cpp b/test/unittest/layers/unittest_layers_upsample2d.cpp

new file mode 100644 (file)

index 0000000..0435c01
--- /dev/null
+++ b/test/unittest/layers/unittest_layers_upsample2d.cpp
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2024 heka1024 <heka1024@gmail.com>
+ *
+ * @file unittest_layers_upsample2d.cppp
+ * @date 15 June 2024
+ * @brief Unit test for upsample2d layer
+ * @see        https://github.com/nnstreamer/nntrainer
+ * @author heka1024 <heka1024@gmail.com>
+ * @bug No known bugs except for NYI items
+ */
+#include <tuple>
+
+#include <gtest/gtest.h>
+
+#include <layers_common_tests.h>
+#include <upsample2d_layer.h>
+
+auto semantic_upsample2d = LayerSemanticsParamType(
+  nntrainer::createLayer<nntrainer::Upsample2dLayer>,
+  nntrainer::Upsample2dLayer::type, {"upsample=nearest", "kernel_size=2,2"},
+  LayerCreateSetPropertyOptions::AVAILABLE_FROM_APP_CONTEXT, false, 1);
+
+GTEST_PARAMETER_TEST(Upsample2D, LayerSemantics,
+                     ::testing::Values(semantic_upsample2d));
+
+auto upsampling_2x2_nearest = LayerGoldenTestParamType(
+  nntrainer::createLayer<nntrainer::Upsample2dLayer>,
+  {"upsample=nearest", "kernel_size=2,2"}, "1:1:2:2",
+  "upsample2d_2x2_nearest.nnlayergolden", LayerGoldenTestParamOptions::DEFAULT,
+  "nchw", "fp32", "fp32");
+
+auto upsampling_3x3_nearest = LayerGoldenTestParamType(
+  nntrainer::createLayer<nntrainer::Upsample2dLayer>,
+  {"upsample=nearest", "kernel_size=3,3"}, "1:1:3:3",
+  "upsample2d_3x3_nearest.nnlayergolden", LayerGoldenTestParamOptions::DEFAULT,
+  "nchw", "fp32", "fp32");
+
+auto upsampling_2x2_bilinear = LayerGoldenTestParamType(
+  nntrainer::createLayer<nntrainer::Upsample2dLayer>,
+  {"upsample=bilinear", "kernel_size=2,2"}, "1:1:2:2",
+  "upsample2d_2x2_bilinear.nnlayergolden", LayerGoldenTestParamOptions::DEFAULT,
+  "nchw", "fp32", "fp32");
+
+auto upsampling_3x3_bilinear = LayerGoldenTestParamType(
+  nntrainer::createLayer<nntrainer::Upsample2dLayer>,
+  {"upsample=bilinear", "kernel_size=3,3"}, "1:1:3:3",
+  "upsample2d_3x3_bilinear.nnlayergolden", LayerGoldenTestParamOptions::DEFAULT,
+  "nchw", "fp32", "fp32");
+
+auto upsampling_big_bilinear = LayerGoldenTestParamType(
+  nntrainer::createLayer<nntrainer::Upsample2dLayer>,
+  {"upsample=bilinear", "kernel_size=4,4"}, "1:1:10:10",
+  "upsample2d_big_bilinear.nnlayergolden", LayerGoldenTestParamOptions::DEFAULT,
+  "nchw", "fp32", "fp32");
+
+GTEST_PARAMETER_TEST(Upsample2D, LayerGoldenTest,
+                     ::testing::Values(upsampling_2x2_nearest,
+                                       upsampling_3x3_nearest,
+                                       upsampling_2x2_bilinear,
+                                       upsampling_3x3_bilinear,
+                                       upsampling_big_bilinear));
author	heka1024 <heka1024@gmail.com>
	Sun, 9 Jun 2024 10:55:33 +0000 (19:55 +0900)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Tue, 2 Jul 2024 07:53:58 +0000 (16:53 +0900)
api/ccapi/include/layer.h		patch \| blob \| history
nntrainer/app_context.cpp		patch \| blob \| history
nntrainer/layers/common_properties.h		patch \| blob \| history
nntrainer/layers/meson.build		patch \| blob \| history
nntrainer/layers/upsample2d_layer.cpp	[new file with mode: 0644]	patch \| blob
nntrainer/layers/upsample2d_layer.h	[new file with mode: 0644]	patch \| blob
packaging/unittest_layers.tar.gz		patch \| blob \| history
test/input_gen/gen_layer_tests.py		patch \| blob \| history
test/unittest/layers/meson.build		patch \| blob \| history
test/unittest/layers/unittest_layers_upsample2d.cpp	[new file with mode: 0644]	patch \| blob