[Layer] add Weight Layer
authorSeungbaek Hong <sb92.hong@samsung.com>
Tue, 30 Jul 2024 06:17:17 +0000 (15:17 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Fri, 9 Aug 2024 00:36:49 +0000 (09:36 +0900)
- This layer contains only weights for building tensor-level graph

**Self-evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test:   [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Seungbaek Hong <sb92.hong@samsung.com>
api/ccapi/include/layer.h
api/nntrainer-api-common.h
nntrainer/app_context.cpp
nntrainer/layers/common_properties.h
nntrainer/layers/meson.build
nntrainer/layers/weight_layer.cpp [new file with mode: 0644]
nntrainer/layers/weight_layer.h [new file with mode: 0644]
nntrainer/utils/base_properties.h
test/ccapi/unittest_ccapi.cpp
test/unittest/layers/meson.build
test/unittest/layers/unittest_layers_weight.cpp [new file with mode: 0644]

index e384231e6fa3deab758d47d6e6bb3296156a9232..19266ae5a760f15ee8a77941f046876b29771571 100644 (file)
@@ -36,6 +36,7 @@ namespace train {
  */
 enum LayerType {
   LAYER_IN = ML_TRAIN_LAYER_TYPE_INPUT,      /**< Input Layer type */
+  LAYER_WEIGHT = ML_TRAIN_LAYER_TYPE_WEIGHT, /**< Weight Layer type */
   LAYER_FC = ML_TRAIN_LAYER_TYPE_FC,         /**< Fully Connected Layer type */
   LAYER_SWIGLU = ML_TRAIN_LAYER_TYPE_SWIGLU, /**< Swiglu Layer type */
   LAYER_BN = ML_TRAIN_LAYER_TYPE_BN, /**< Batch Normalization Layer type */
@@ -102,7 +103,7 @@ enum LayerType {
                                      derivative */
   LAYER_UPSAMPLE2D,               /**< Upsample 2D Layer type */
   LAYER_RMSNORM = ML_TRAIN_LAYER_TYPE_RMSNORM, /**<RMS NORM Layer */
-  LAYER_UNKNOWN = ML_TRAIN_LAYER_TYPE_UNKNOWN /**< Unknown */
+  LAYER_UNKNOWN = ML_TRAIN_LAYER_TYPE_UNKNOWN  /**< Unknown */
 };
 
 /**
@@ -290,6 +291,14 @@ Input(const std::vector<std::string> &properties = {}) {
   return createLayer(LayerType::LAYER_IN, properties);
 }
 
+/**
+ * @brief Helper function to create weight layer
+ */
+inline std::unique_ptr<Layer>
+WeightLayer(const std::vector<std::string> &properties = {}) {
+  return createLayer(LayerType::LAYER_WEIGHT, properties);
+}
+
 /**
  * @brief Helper function to create fully connected layer
  */
@@ -311,9 +320,9 @@ Swiglu(const std::vector<std::string> &properties = {},
 /**
  * @brief Helper function to create RMS normalization layer for GPU
  */
-inline std::unique_ptr<Layer> RMSNormCl(
-  const std::vector<std::string> &properties = {},
-  const LayerComputeEngine &compute_engine = LayerComputeEngine::GPU) {
+inline std::unique_ptr<Layer>
+RMSNormCl(const std::vector<std::string> &properties = {},
+          const LayerComputeEngine &compute_engine = LayerComputeEngine::GPU) {
   return createLayer(LayerType::LAYER_RMSNORM, properties, compute_engine);
 }
 
index 76d9976f3b50597f1a89edf788e5bb0d6f4e18ac..97a5a71fad62b6ec6abc1328f1f79b3cf6b04db4 100644 (file)
@@ -64,6 +64,7 @@ typedef enum {
     28, /**< Positional Encoding Layer type (Since 7.0) */
   ML_TRAIN_LAYER_TYPE_IDENTITY = 29, /**< Identity Layer type (Since 8.0) */
   ML_TRAIN_LAYER_TYPE_SWIGLU = 30,   /**< Swiglu Layer type */
+  ML_TRAIN_LAYER_TYPE_WEIGHT = 31,   /**< Weight Layer type (Since 9.0)*/
   ML_TRAIN_LAYER_TYPE_PREPROCESS_FLIP =
     300, /**< Preprocess flip Layer (Since 6.5) */
   ML_TRAIN_LAYER_TYPE_PREPROCESS_TRANSLATE =
@@ -77,7 +78,7 @@ typedef enum {
   ML_TRAIN_LAYER_TYPE_LOSS_CROSS_ENTROPY_SOFTMAX = 502, /**< Cross Entropy with
                                        Softmax Loss Layer type (Since 6.5) */
   ML_TRAIN_LAYER_TYPE_RMSNORM = 503, /**< Cross Entropy with */
-  ML_TRAIN_LAYER_TYPE_UNKNOWN = 999                     /**< Unknown Layer */
+  ML_TRAIN_LAYER_TYPE_UNKNOWN = 999  /**< Unknown Layer */
 } ml_train_layer_type_e;
 
 /**
index fad75f9ae0525968d279ac8bba0ab19236b8b586..09b6fd10f4bbf1c9714a97edc00ba57d0d61a01e 100644 (file)
@@ -74,6 +74,7 @@
 #include <split_layer.h>
 #include <time_dist.h>
 #include <upsample2d_layer.h>
+#include <weight_layer.h>
 #include <zoneout_lstmcell.h>
 
 #ifdef ENABLE_TFLITE_BACKBONE
@@ -245,6 +246,8 @@ static void add_default_object(AppContext &ac) {
   using LayerType = ml::train::LayerType;
   ac.registerFactory(nntrainer::createLayer<InputLayer>, InputLayer::type,
                      LayerType::LAYER_IN);
+  ac.registerFactory(nntrainer::createLayer<WeightLayer>, WeightLayer::type,
+                     LayerType::LAYER_WEIGHT);
   ac.registerFactory(nntrainer::createLayer<FullyConnectedLayer>,
                      FullyConnectedLayer::type, LayerType::LAYER_FC);
   ac.registerFactory(nntrainer::createLayer<BatchNormalizationLayer>,
index 2591ab454b3f0eadf8565152f23954a71e45b45f..64818be4fc2698e0643948b2e6b9a087a56967a9 100644 (file)
@@ -114,6 +114,16 @@ public:
   using prop_tag = bool_prop_tag;
 };
 
+/**
+ * @brief Tensor Dimension property
+ *
+ */
+class TensorDimension : public TensorDimProperty {
+public:
+  static constexpr const char *key = "dim"; /**< unique key to access */
+  using prop_tag = dimension_prop_tag;      /**< property type */
+};
+
 /**
  * @brief trainable property, use this to set and check how if certain layer is
  * trainable
index 75a59605ab902cad91a4829199b471702e147742..c612d8c177ef4165da4a44f4153dab5976255fad 100644 (file)
@@ -4,6 +4,7 @@ nntrainer_inc_abs += meson.current_source_dir() / 'loss'
 
 layer_sources = [
   'activation_layer.cpp',
+  'weight_layer.cpp',
   'addition_layer.cpp',
   'attention_layer.cpp',
   'mol_attention_layer.cpp',
diff --git a/nntrainer/layers/weight_layer.cpp b/nntrainer/layers/weight_layer.cpp
new file mode 100644 (file)
index 0000000..8404348
--- /dev/null
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2024 SeungBaek Hong <sb92.hong@samsung.com>
+ *
+ * @file   weight_layer.cpp
+ * @date   2 August 2024
+ * @brief  This is a layer that simply stores a weight tensor without any
+ * operation.
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author SeungBaek Hong <sb92.hong@samsung.com>
+ * @bug    No known bugs except for NYI items
+ *
+ */
+
+#include <common_properties.h>
+#include <layer_context.h>
+#include <lazy_tensor.h>
+#include <nntrainer_error.h>
+#include <nntrainer_log.h>
+#include <node_exporter.h>
+#include <util_func.h>
+#include <weight_layer.h>
+
+#include <iostream>
+
+namespace nntrainer {
+
+static constexpr size_t SINGLE_INOUT_IDX = 0;
+
+WeightLayer::WeightLayer() : LayerImpl() {
+  weight_idx.fill(std::numeric_limits<unsigned>::max());
+}
+
+void WeightLayer::finalize(InitLayerContext &context) {
+  auto &weight_regularizer =
+    std::get<props::WeightRegularizer>(*layer_impl_props);
+  auto &weight_regularizer_constant =
+    std::get<props::WeightRegularizerConstant>(*layer_impl_props);
+  auto &weight_initializer =
+    std::get<props::WeightInitializer>(*layer_impl_props);
+  auto &weight_decay = std::get<props::WeightDecay>(*layer_impl_props);
+
+  const auto &weight_dim = std::get<props::TensorDimension>(weight_props).get();
+
+  std::vector<TensorDim> output_dims(1);
+
+  output_dims[0] = weight_dim;
+
+  output_dims[0].setTensorType(
+    {context.getFormat(), context.getActivationDataType()});
+
+  context.setOutputDimensions(output_dims);
+
+  weight_idx[0] = context.requestWeight(
+    weight_dim, weight_initializer, weight_regularizer,
+    weight_regularizer_constant, weight_decay, "weight", true);
+}
+
+void WeightLayer::exportTo(Exporter &exporter,
+                           const ml::train::ExportMethods &method) const {
+  LayerImpl::exportTo(exporter, method);
+  exporter.saveResult(weight_props, method, this);
+}
+
+void WeightLayer::setProperty(const std::vector<std::string> &values) {
+  auto remain_props = loadProperties(values, weight_props);
+  LayerImpl::setProperty(remain_props);
+}
+
+void WeightLayer::forwarding(RunLayerContext &context, bool training) {
+  Tensor &weight = context.getWeight(weight_idx[0]);
+  Tensor &output = context.getOutput(SINGLE_INOUT_IDX);
+  output.copy(weight);
+}
+
+void WeightLayer::calcDerivative(RunLayerContext &context) {
+  throw exception::not_supported(
+    "calcDerivative for weight layer is not supported");
+}
+
+void WeightLayer::calcGradient(RunLayerContext &context) {
+  Tensor &djdw = context.getWeightGrad(weight_idx[0]);
+  const Tensor &derivative_ = context.getIncomingDerivative(SINGLE_INOUT_IDX);
+  djdw.copy(derivative_);
+}
+
+} /* namespace nntrainer */
diff --git a/nntrainer/layers/weight_layer.h b/nntrainer/layers/weight_layer.h
new file mode 100644 (file)
index 0000000..6c3a42f
--- /dev/null
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2024 SeungBaek Hong <sb92.hong@samsung.com>
+ *
+ * @file   weight_layer.h
+ * @date   2 August 2024
+ * @brief  This is a layer that simply stores a weight tensor without any
+ * operation.
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author SeungBaek Hong <sb92.hong@samsung.com>
+ * @bug    No known bugs except for NYI items
+ *
+ */
+
+#ifndef __WEIGHT_LAYER_H__
+#define __WEIGHT_LAYER_H__
+#ifdef __cplusplus
+
+#include <common_properties.h>
+#include <layer_impl.h>
+
+namespace nntrainer {
+
+/**
+ * @class   Weight Layer
+ * @brief   A layer that simply stores a weight tensor
+ */
+class WeightLayer : public LayerImpl {
+public:
+  /**
+   * @brief     Constructor of Weight Layer
+   */
+  WeightLayer();
+
+  /**
+   * @brief     Destructor of Weight Layer
+   */
+  ~WeightLayer() = default;
+
+  /**
+   *  @brief  Move constructor.
+   *  @param[in] WeightLayer &&
+   */
+  WeightLayer(WeightLayer &&rhs) noexcept = default;
+
+  /**
+   * @brief  Move assignment operator.
+   * @parma[in] rhs WeightLayer to be moved.
+   */
+  WeightLayer &operator=(WeightLayer &&rhs) = default;
+
+  /**
+   * @copydoc Layer::finalize(InitLayerContext &context)
+   */
+  void finalize(InitLayerContext &context) override;
+
+  /**
+   * @copydoc Layer::forwarding(RunLayerContext &context, bool training)
+   */
+  void forwarding(RunLayerContext &context, bool training) override;
+
+  /**
+   * @copydoc Layer::calcDerivative(RunLayerContext &context)
+   */
+  void calcDerivative(RunLayerContext &context) override;
+
+  /**
+   * @copydoc Layer::calcGradient(RunLayerContext &context)
+   */
+  void calcGradient(RunLayerContext &context) override;
+
+  /**
+   * @copydoc Layer::exportTo(Exporter &exporter, ml::train::ExportMethods
+   * method)
+   */
+  void exportTo(Exporter &exporter,
+                const ml::train::ExportMethods &method) const override;
+
+  /**
+   * @copydoc Layer::getType()
+   */
+  const std::string getType() const override { return WeightLayer::type; };
+
+  /**
+   * @copydoc Layer::supportBackwarding()
+   */
+  bool supportBackwarding() const override { return true; }
+
+  /**
+   * @copydoc Layer::setProperty(const PropertyType type, const std::string
+   * &value)
+   */
+  void setProperty(const std::vector<std::string> &values) override;
+
+  inline static const std::string type = "weight";
+
+private:
+  std::tuple<props::TensorDimension> weight_props;
+  std::array<unsigned int, 1> weight_idx; /**< indices of the weights */
+};
+} // namespace nntrainer
+
+#endif /* __cplusplus */
+#endif /* __WEIGHT_LAYER_H__ */
index 259637a6d97d0f59db5e29296275a69d022b1826..c24948a33bddd1240f60addefe81910cb7df3c37 100644 (file)
@@ -277,6 +277,19 @@ public:
   static EnumInfo enum_info_;
 };
 
+/**
+ * @brief abstract class for tensor dimension
+ *
+ */
+class TensorDimProperty : public Property<TensorDim> {
+public:
+  /**
+   * @brief Destroy the TensorDim Property object
+   *
+   */
+  virtual ~TensorDimProperty() = default;
+};
+
 /**
  * @brief abstract class for positive integer
  *
index 98c006430d30154137917707581620b41d8fb99d..34c99f4f5ba9e915d4ac20dbacbd6ecb0fa6a90e 100644 (file)
@@ -61,6 +61,9 @@ TEST(ccapi_layer, construct_02_p) {
   EXPECT_NO_THROW(layer = ml::train::layer::Input());
   EXPECT_EQ(layer->getType(), "input");
 
+  EXPECT_NO_THROW(layer = ml::train::layer::WeightLayer());
+  EXPECT_EQ(layer->getType(), "weight");
+
   EXPECT_NO_THROW(layer = ml::train::layer::FullyConnected());
   EXPECT_EQ(layer->getType(), "fully_connected");
 
index 59992b17f607811bbb30bc8494e505b0525f4a59..c65609e8816256a2ba0d4852b71f574cfea733b5 100644 (file)
@@ -36,6 +36,7 @@ test_target = [
   'unittest_layers.cpp',
   'unittest_layers_impl.cpp',
   'unittest_layers_input.cpp',
+  'unittest_layers_weight.cpp',
   'unittest_layers_loss.cpp',
   'unittest_layers_fully_connected.cpp',
   'unittest_layers_batch_normalization.cpp',
diff --git a/test/unittest/layers/unittest_layers_weight.cpp b/test/unittest/layers/unittest_layers_weight.cpp
new file mode 100644 (file)
index 0000000..3bc6c8c
--- /dev/null
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2024 SeungBaek Hong <sb92.hong@samsung.com>
+ *
+ * @file unittest_layers_weight.cpp
+ * @date 30 July 2024
+ * @brief Weight Layer Test
+ * @see        https://github.com/nnstreamer/nntrainer
+ * @author SeungBaek Hong <sb92.hong@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+#include <tuple>
+
+#include <gtest/gtest.h>
+
+#include <layers_common_tests.h>
+#include <weight_layer.h>
+
+auto semantic_weight = LayerSemanticsParamType(
+  nntrainer::createLayer<nntrainer::WeightLayer>, nntrainer::WeightLayer::type,
+  {"dim=1:1:1"}, LayerCreateSetPropertyOptions::AVAILABLE_FROM_APP_CONTEXT,
+  false, 1);
+
+auto semantic_weight_multi = LayerSemanticsParamType(
+  nntrainer::createLayer<nntrainer::WeightLayer>, nntrainer::WeightLayer::type,
+  {"dim=1:1:1"}, LayerCreateSetPropertyOptions::AVAILABLE_FROM_APP_CONTEXT,
+  false, 2);
+
+GTEST_PARAMETER_TEST(Weight, LayerSemantics,
+                     ::testing::Values(semantic_weight, semantic_weight_multi));