[Layer] add Weight Layer

author Seungbaek Hong <sb92.hong@samsung.com>

Tue, 30 Jul 2024 06:17:17 +0000 (15:17 +0900)

committer Jijoong Moon <jijoong.moon@samsung.com>

Fri, 9 Aug 2024 00:36:49 +0000 (09:36 +0900)
author Seungbaek Hong <sb92.hong@samsung.com>
Tue, 30 Jul 2024 06:17:17 +0000 (15:17 +0900)
committer Jijoong Moon <jijoong.moon@samsung.com>
Fri, 9 Aug 2024 00:36:49 +0000 (09:36 +0900)
diff --git a/api/ccapi/include/layer.h b/api/ccapi/include/layer.h

index e384231e6fa3deab758d47d6e6bb3296156a9232..19266ae5a760f15ee8a77941f046876b29771571 100644 (file)
--- a/api/ccapi/include/layer.h
+++ b/api/ccapi/include/layer.h
@@ -36,6 +36,7 @@ namespace train {
   */
  enum LayerType {
    LAYER_IN = ML_TRAIN_LAYER_TYPE_INPUT,      /**< Input Layer type */
+  LAYER_WEIGHT = ML_TRAIN_LAYER_TYPE_WEIGHT, /**< Weight Layer type */
    LAYER_FC = ML_TRAIN_LAYER_TYPE_FC,         /**< Fully Connected Layer type */
    LAYER_SWIGLU = ML_TRAIN_LAYER_TYPE_SWIGLU, /**< Swiglu Layer type */
    LAYER_BN = ML_TRAIN_LAYER_TYPE_BN, /**< Batch Normalization Layer type */
@@ -102,7 +103,7 @@ enum LayerType {
                                       derivative */
    LAYER_UPSAMPLE2D,               /**< Upsample 2D Layer type */
    LAYER_RMSNORM = ML_TRAIN_LAYER_TYPE_RMSNORM, /**<RMS NORM Layer */
-  LAYER_UNKNOWN = ML_TRAIN_LAYER_TYPE_UNKNOWN /**< Unknown */
+  LAYER_UNKNOWN = ML_TRAIN_LAYER_TYPE_UNKNOWN  /**< Unknown */
  };
  
  /**
@@ -290,6 +291,14 @@ Input(const std::vector<std::string> &properties = {}) {
    return createLayer(LayerType::LAYER_IN, properties);
  }
  
+/**
+ * @brief Helper function to create weight layer
+ */
+inline std::unique_ptr<Layer>
+WeightLayer(const std::vector<std::string> &properties = {}) {
+  return createLayer(LayerType::LAYER_WEIGHT, properties);
+}
+
  /**
   * @brief Helper function to create fully connected layer
   */
@@ -311,9 +320,9 @@ Swiglu(const std::vector<std::string> &properties = {},
  /**
   * @brief Helper function to create RMS normalization layer for GPU
   */
-inline std::unique_ptr<Layer> RMSNormCl(
-  const std::vector<std::string> &properties = {},
-  const LayerComputeEngine &compute_engine = LayerComputeEngine::GPU) {
+inline std::unique_ptr<Layer>
+RMSNormCl(const std::vector<std::string> &properties = {},
+          const LayerComputeEngine &compute_engine = LayerComputeEngine::GPU) {
    return createLayer(LayerType::LAYER_RMSNORM, properties, compute_engine);
  }
  
diff --git a/api/nntrainer-api-common.h b/api/nntrainer-api-common.h

index 76d9976f3b50597f1a89edf788e5bb0d6f4e18ac..97a5a71fad62b6ec6abc1328f1f79b3cf6b04db4 100644 (file)
--- a/api/nntrainer-api-common.h
+++ b/api/nntrainer-api-common.h
@@ -64,6 +64,7 @@ typedef enum {
      28, /**< Positional Encoding Layer type (Since 7.0) */
    ML_TRAIN_LAYER_TYPE_IDENTITY = 29, /**< Identity Layer type (Since 8.0) */
    ML_TRAIN_LAYER_TYPE_SWIGLU = 30,   /**< Swiglu Layer type */
+  ML_TRAIN_LAYER_TYPE_WEIGHT = 31,   /**< Weight Layer type (Since 9.0)*/
    ML_TRAIN_LAYER_TYPE_PREPROCESS_FLIP =
      300, /**< Preprocess flip Layer (Since 6.5) */
    ML_TRAIN_LAYER_TYPE_PREPROCESS_TRANSLATE =
@@ -77,7 +78,7 @@ typedef enum {
    ML_TRAIN_LAYER_TYPE_LOSS_CROSS_ENTROPY_SOFTMAX = 502, /**< Cross Entropy with
                                         Softmax Loss Layer type (Since 6.5) */
    ML_TRAIN_LAYER_TYPE_RMSNORM = 503, /**< Cross Entropy with */
-  ML_TRAIN_LAYER_TYPE_UNKNOWN = 999                     /**< Unknown Layer */
+  ML_TRAIN_LAYER_TYPE_UNKNOWN = 999  /**< Unknown Layer */
  } ml_train_layer_type_e;
  
  /**
diff --git a/nntrainer/app_context.cpp b/nntrainer/app_context.cpp

index fad75f9ae0525968d279ac8bba0ab19236b8b586..09b6fd10f4bbf1c9714a97edc00ba57d0d61a01e 100644 (file)
--- a/nntrainer/app_context.cpp
+++ b/nntrainer/app_context.cpp
@@ -74,6 +74,7 @@
  #include <split_layer.h>
  #include <time_dist.h>
  #include <upsample2d_layer.h>
+#include <weight_layer.h>
  #include <zoneout_lstmcell.h>
  
  #ifdef ENABLE_TFLITE_BACKBONE
@@ -245,6 +246,8 @@ static void add_default_object(AppContext &ac) {
    using LayerType = ml::train::LayerType;
    ac.registerFactory(nntrainer::createLayer<InputLayer>, InputLayer::type,
                       LayerType::LAYER_IN);
+  ac.registerFactory(nntrainer::createLayer<WeightLayer>, WeightLayer::type,
+                     LayerType::LAYER_WEIGHT);
    ac.registerFactory(nntrainer::createLayer<FullyConnectedLayer>,
                       FullyConnectedLayer::type, LayerType::LAYER_FC);
    ac.registerFactory(nntrainer::createLayer<BatchNormalizationLayer>,
diff --git a/nntrainer/layers/common_properties.h b/nntrainer/layers/common_properties.h

index 2591ab454b3f0eadf8565152f23954a71e45b45f..64818be4fc2698e0643948b2e6b9a087a56967a9 100644 (file)
--- a/nntrainer/layers/common_properties.h
+++ b/nntrainer/layers/common_properties.h
@@ -114,6 +114,16 @@ public:
    using prop_tag = bool_prop_tag;
  };
  
+/**
+ * @brief Tensor Dimension property
+ *
+ */
+class TensorDimension : public TensorDimProperty {
+public:
+  static constexpr const char *key = "dim"; /**< unique key to access */
+  using prop_tag = dimension_prop_tag;      /**< property type */
+};
+
  /**
   * @brief trainable property, use this to set and check how if certain layer is
   * trainable
diff --git a/nntrainer/layers/meson.build b/nntrainer/layers/meson.build

index 75a59605ab902cad91a4829199b471702e147742..c612d8c177ef4165da4a44f4153dab5976255fad 100644 (file)
--- a/nntrainer/layers/meson.build
+++ b/nntrainer/layers/meson.build
@@ -4,6 +4,7 @@ nntrainer_inc_abs += meson.current_source_dir() / 'loss'
  
  layer_sources = [
    'activation_layer.cpp',
+  'weight_layer.cpp',
    'addition_layer.cpp',
    'attention_layer.cpp',
    'mol_attention_layer.cpp',
diff --git a/nntrainer/layers/weight_layer.cpp b/nntrainer/layers/weight_layer.cpp

new file mode 100644 (file)

index 0000000..8404348
--- /dev/null
+++ b/nntrainer/layers/weight_layer.cpp
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2024 SeungBaek Hong <sb92.hong@samsung.com>
+ *
+ * @file   weight_layer.cpp
+ * @date   2 August 2024
+ * @brief  This is a layer that simply stores a weight tensor without any
+ * operation.
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author SeungBaek Hong <sb92.hong@samsung.com>
+ * @bug    No known bugs except for NYI items
+ *
+ */
+
+#include <common_properties.h>
+#include <layer_context.h>
+#include <lazy_tensor.h>
+#include <nntrainer_error.h>
+#include <nntrainer_log.h>
+#include <node_exporter.h>
+#include <util_func.h>
+#include <weight_layer.h>
+
+#include <iostream>
+
+namespace nntrainer {
+
+static constexpr size_t SINGLE_INOUT_IDX = 0;
+
+WeightLayer::WeightLayer() : LayerImpl() {
+  weight_idx.fill(std::numeric_limits<unsigned>::max());
+}
+
+void WeightLayer::finalize(InitLayerContext &context) {
+  auto &weight_regularizer =
+    std::get<props::WeightRegularizer>(*layer_impl_props);
+  auto &weight_regularizer_constant =
+    std::get<props::WeightRegularizerConstant>(*layer_impl_props);
+  auto &weight_initializer =
+    std::get<props::WeightInitializer>(*layer_impl_props);
+  auto &weight_decay = std::get<props::WeightDecay>(*layer_impl_props);
+
+  const auto &weight_dim = std::get<props::TensorDimension>(weight_props).get();
+
+  std::vector<TensorDim> output_dims(1);
+
+  output_dims[0] = weight_dim;
+
+  output_dims[0].setTensorType(
+    {context.getFormat(), context.getActivationDataType()});
+
+  context.setOutputDimensions(output_dims);
+
+  weight_idx[0] = context.requestWeight(
+    weight_dim, weight_initializer, weight_regularizer,
+    weight_regularizer_constant, weight_decay, "weight", true);
+}
+
+void WeightLayer::exportTo(Exporter &exporter,
+                           const ml::train::ExportMethods &method) const {
+  LayerImpl::exportTo(exporter, method);
+  exporter.saveResult(weight_props, method, this);
+}
+
+void WeightLayer::setProperty(const std::vector<std::string> &values) {
+  auto remain_props = loadProperties(values, weight_props);
+  LayerImpl::setProperty(remain_props);
+}
+
+void WeightLayer::forwarding(RunLayerContext &context, bool training) {
+  Tensor &weight = context.getWeight(weight_idx[0]);
+  Tensor &output = context.getOutput(SINGLE_INOUT_IDX);
+  output.copy(weight);
+}
+
+void WeightLayer::calcDerivative(RunLayerContext &context) {
+  throw exception::not_supported(
+    "calcDerivative for weight layer is not supported");
+}
+
+void WeightLayer::calcGradient(RunLayerContext &context) {
+  Tensor &djdw = context.getWeightGrad(weight_idx[0]);
+  const Tensor &derivative_ = context.getIncomingDerivative(SINGLE_INOUT_IDX);
+  djdw.copy(derivative_);
+}
+
+} /* namespace nntrainer */
diff --git a/nntrainer/layers/weight_layer.h b/nntrainer/layers/weight_layer.h

new file mode 100644 (file)

index 0000000..6c3a42f
--- /dev/null
+++ b/nntrainer/layers/weight_layer.h
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2024 SeungBaek Hong <sb92.hong@samsung.com>
+ *
+ * @file   weight_layer.h
+ * @date   2 August 2024
+ * @brief  This is a layer that simply stores a weight tensor without any
+ * operation.
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author SeungBaek Hong <sb92.hong@samsung.com>
+ * @bug    No known bugs except for NYI items
+ *
+ */
+
+#ifndef __WEIGHT_LAYER_H__
+#define __WEIGHT_LAYER_H__
+#ifdef __cplusplus
+
+#include <common_properties.h>
+#include <layer_impl.h>
+
+namespace nntrainer {
+
+/**
+ * @class   Weight Layer
+ * @brief   A layer that simply stores a weight tensor
+ */
+class WeightLayer : public LayerImpl {
+public:
+  /**
+   * @brief     Constructor of Weight Layer
+   */
+  WeightLayer();
+
+  /**
+   * @brief     Destructor of Weight Layer
+   */
+  ~WeightLayer() = default;
+
+  /**
+   *  @brief  Move constructor.
+   *  @param[in] WeightLayer &&
+   */
+  WeightLayer(WeightLayer &&rhs) noexcept = default;
+
+  /**
+   * @brief  Move assignment operator.
+   * @parma[in] rhs WeightLayer to be moved.
+   */
+  WeightLayer &operator=(WeightLayer &&rhs) = default;
+
+  /**
+   * @copydoc Layer::finalize(InitLayerContext &context)
+   */
+  void finalize(InitLayerContext &context) override;
+
+  /**
+   * @copydoc Layer::forwarding(RunLayerContext &context, bool training)
+   */
+  void forwarding(RunLayerContext &context, bool training) override;
+
+  /**
+   * @copydoc Layer::calcDerivative(RunLayerContext &context)
+   */
+  void calcDerivative(RunLayerContext &context) override;
+
+  /**
+   * @copydoc Layer::calcGradient(RunLayerContext &context)
+   */
+  void calcGradient(RunLayerContext &context) override;
+
+  /**
+   * @copydoc Layer::exportTo(Exporter &exporter, ml::train::ExportMethods
+   * method)
+   */
+  void exportTo(Exporter &exporter,
+                const ml::train::ExportMethods &method) const override;
+
+  /**
+   * @copydoc Layer::getType()
+   */
+  const std::string getType() const override { return WeightLayer::type; };
+
+  /**
+   * @copydoc Layer::supportBackwarding()
+   */
+  bool supportBackwarding() const override { return true; }
+
+  /**
+   * @copydoc Layer::setProperty(const PropertyType type, const std::string
+   * &value)
+   */
+  void setProperty(const std::vector<std::string> &values) override;
+
+  inline static const std::string type = "weight";
+
+private:
+  std::tuple<props::TensorDimension> weight_props;
+  std::array<unsigned int, 1> weight_idx; /**< indices of the weights */
+};
+} // namespace nntrainer
+
+#endif /* __cplusplus */
+#endif /* __WEIGHT_LAYER_H__ */
diff --git a/nntrainer/utils/base_properties.h b/nntrainer/utils/base_properties.h

index 259637a6d97d0f59db5e29296275a69d022b1826..c24948a33bddd1240f60addefe81910cb7df3c37 100644 (file)
--- a/nntrainer/utils/base_properties.h
+++ b/nntrainer/utils/base_properties.h
@@ -277,6 +277,19 @@ public:
    static EnumInfo enum_info_;
  };
  
+/**
+ * @brief abstract class for tensor dimension
+ *
+ */
+class TensorDimProperty : public Property<TensorDim> {
+public:
+  /**
+   * @brief Destroy the TensorDim Property object
+   *
+   */
+  virtual ~TensorDimProperty() = default;
+};
+
  /**
   * @brief abstract class for positive integer
   *
diff --git a/test/ccapi/unittest_ccapi.cpp b/test/ccapi/unittest_ccapi.cpp

index 98c006430d30154137917707581620b41d8fb99d..34c99f4f5ba9e915d4ac20dbacbd6ecb0fa6a90e 100644 (file)
--- a/test/ccapi/unittest_ccapi.cpp
+++ b/test/ccapi/unittest_ccapi.cpp
@@ -61,6 +61,9 @@ TEST(ccapi_layer, construct_02_p) {
    EXPECT_NO_THROW(layer = ml::train::layer::Input());
    EXPECT_EQ(layer->getType(), "input");
  
+  EXPECT_NO_THROW(layer = ml::train::layer::WeightLayer());
+  EXPECT_EQ(layer->getType(), "weight");
+
    EXPECT_NO_THROW(layer = ml::train::layer::FullyConnected());
    EXPECT_EQ(layer->getType(), "fully_connected");
  
diff --git a/test/unittest/layers/meson.build b/test/unittest/layers/meson.build

index 59992b17f607811bbb30bc8494e505b0525f4a59..c65609e8816256a2ba0d4852b71f574cfea733b5 100644 (file)
--- a/test/unittest/layers/meson.build
+++ b/test/unittest/layers/meson.build
@@ -36,6 +36,7 @@ test_target = [
    'unittest_layers.cpp',
    'unittest_layers_impl.cpp',
    'unittest_layers_input.cpp',
+  'unittest_layers_weight.cpp',
    'unittest_layers_loss.cpp',
    'unittest_layers_fully_connected.cpp',
    'unittest_layers_batch_normalization.cpp',
diff --git a/test/unittest/layers/unittest_layers_weight.cpp b/test/unittest/layers/unittest_layers_weight.cpp

new file mode 100644 (file)

index 0000000..3bc6c8c
--- /dev/null
+++ b/test/unittest/layers/unittest_layers_weight.cpp
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2024 SeungBaek Hong <sb92.hong@samsung.com>
+ *
+ * @file unittest_layers_weight.cpp
+ * @date 30 July 2024
+ * @brief Weight Layer Test
+ * @see        https://github.com/nnstreamer/nntrainer
+ * @author SeungBaek Hong <sb92.hong@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+#include <tuple>
+
+#include <gtest/gtest.h>
+
+#include <layers_common_tests.h>
+#include <weight_layer.h>
+
+auto semantic_weight = LayerSemanticsParamType(
+  nntrainer::createLayer<nntrainer::WeightLayer>, nntrainer::WeightLayer::type,
+  {"dim=1:1:1"}, LayerCreateSetPropertyOptions::AVAILABLE_FROM_APP_CONTEXT,
+  false, 1);
+
+auto semantic_weight_multi = LayerSemanticsParamType(
+  nntrainer::createLayer<nntrainer::WeightLayer>, nntrainer::WeightLayer::type,
+  {"dim=1:1:1"}, LayerCreateSetPropertyOptions::AVAILABLE_FROM_APP_CONTEXT,
+  false, 2);
+
+GTEST_PARAMETER_TEST(Weight, LayerSemantics,
+                     ::testing::Values(semantic_weight, semantic_weight_multi));
author	Seungbaek Hong <sb92.hong@samsung.com>
	Tue, 30 Jul 2024 06:17:17 +0000 (15:17 +0900)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Fri, 9 Aug 2024 00:36:49 +0000 (09:36 +0900)
api/ccapi/include/layer.h		patch \| blob \| history
api/nntrainer-api-common.h		patch \| blob \| history
nntrainer/app_context.cpp		patch \| blob \| history
nntrainer/layers/common_properties.h		patch \| blob \| history
nntrainer/layers/meson.build		patch \| blob \| history
nntrainer/layers/weight_layer.cpp	[new file with mode: 0644]	patch \| blob
nntrainer/layers/weight_layer.h	[new file with mode: 0644]	patch \| blob
nntrainer/utils/base_properties.h		patch \| blob \| history
test/ccapi/unittest_ccapi.cpp		patch \| blob \| history
test/unittest/layers/meson.build		patch \| blob \| history
test/unittest/layers/unittest_layers_weight.cpp	[new file with mode: 0644]	patch \| blob