*/
enum LayerType {
LAYER_IN = ML_TRAIN_LAYER_TYPE_INPUT, /**< Input Layer type */
+ LAYER_WEIGHT = ML_TRAIN_LAYER_TYPE_WEIGHT, /**< Weight Layer type */
LAYER_FC = ML_TRAIN_LAYER_TYPE_FC, /**< Fully Connected Layer type */
LAYER_SWIGLU = ML_TRAIN_LAYER_TYPE_SWIGLU, /**< Swiglu Layer type */
LAYER_BN = ML_TRAIN_LAYER_TYPE_BN, /**< Batch Normalization Layer type */
derivative */
LAYER_UPSAMPLE2D, /**< Upsample 2D Layer type */
LAYER_RMSNORM = ML_TRAIN_LAYER_TYPE_RMSNORM, /**<RMS NORM Layer */
- LAYER_UNKNOWN = ML_TRAIN_LAYER_TYPE_UNKNOWN /**< Unknown */
+ LAYER_UNKNOWN = ML_TRAIN_LAYER_TYPE_UNKNOWN /**< Unknown */
};
/**
return createLayer(LayerType::LAYER_IN, properties);
}
+/**
+ * @brief Helper function to create weight layer
+ */
+inline std::unique_ptr<Layer>
+WeightLayer(const std::vector<std::string> &properties = {}) {
+ return createLayer(LayerType::LAYER_WEIGHT, properties);
+}
+
/**
* @brief Helper function to create fully connected layer
*/
/**
* @brief Helper function to create RMS normalization layer for GPU
*/
-inline std::unique_ptr<Layer> RMSNormCl(
- const std::vector<std::string> &properties = {},
- const LayerComputeEngine &compute_engine = LayerComputeEngine::GPU) {
+inline std::unique_ptr<Layer>
+RMSNormCl(const std::vector<std::string> &properties = {},
+ const LayerComputeEngine &compute_engine = LayerComputeEngine::GPU) {
return createLayer(LayerType::LAYER_RMSNORM, properties, compute_engine);
}
28, /**< Positional Encoding Layer type (Since 7.0) */
ML_TRAIN_LAYER_TYPE_IDENTITY = 29, /**< Identity Layer type (Since 8.0) */
ML_TRAIN_LAYER_TYPE_SWIGLU = 30, /**< Swiglu Layer type */
+ ML_TRAIN_LAYER_TYPE_WEIGHT = 31, /**< Weight Layer type (Since 9.0)*/
ML_TRAIN_LAYER_TYPE_PREPROCESS_FLIP =
300, /**< Preprocess flip Layer (Since 6.5) */
ML_TRAIN_LAYER_TYPE_PREPROCESS_TRANSLATE =
ML_TRAIN_LAYER_TYPE_LOSS_CROSS_ENTROPY_SOFTMAX = 502, /**< Cross Entropy with
Softmax Loss Layer type (Since 6.5) */
ML_TRAIN_LAYER_TYPE_RMSNORM = 503, /**< Cross Entropy with */
- ML_TRAIN_LAYER_TYPE_UNKNOWN = 999 /**< Unknown Layer */
+ ML_TRAIN_LAYER_TYPE_UNKNOWN = 999 /**< Unknown Layer */
} ml_train_layer_type_e;
/**
#include <split_layer.h>
#include <time_dist.h>
#include <upsample2d_layer.h>
+#include <weight_layer.h>
#include <zoneout_lstmcell.h>
#ifdef ENABLE_TFLITE_BACKBONE
using LayerType = ml::train::LayerType;
ac.registerFactory(nntrainer::createLayer<InputLayer>, InputLayer::type,
LayerType::LAYER_IN);
+ ac.registerFactory(nntrainer::createLayer<WeightLayer>, WeightLayer::type,
+ LayerType::LAYER_WEIGHT);
ac.registerFactory(nntrainer::createLayer<FullyConnectedLayer>,
FullyConnectedLayer::type, LayerType::LAYER_FC);
ac.registerFactory(nntrainer::createLayer<BatchNormalizationLayer>,
using prop_tag = bool_prop_tag;
};
+/**
+ * @brief Tensor Dimension property
+ *
+ */
+class TensorDimension : public TensorDimProperty {
+public:
+ static constexpr const char *key = "dim"; /**< unique key to access */
+ using prop_tag = dimension_prop_tag; /**< property type */
+};
+
/**
* @brief trainable property, use this to set and check how if certain layer is
* trainable
layer_sources = [
'activation_layer.cpp',
+ 'weight_layer.cpp',
'addition_layer.cpp',
'attention_layer.cpp',
'mol_attention_layer.cpp',
--- /dev/null
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2024 SeungBaek Hong <sb92.hong@samsung.com>
+ *
+ * @file weight_layer.cpp
+ * @date 2 August 2024
+ * @brief This is a layer that simply stores a weight tensor without any
+ * operation.
+ * @see https://github.com/nnstreamer/nntrainer
+ * @author SeungBaek Hong <sb92.hong@samsung.com>
+ * @bug No known bugs except for NYI items
+ *
+ */
+
+#include <common_properties.h>
+#include <layer_context.h>
+#include <lazy_tensor.h>
+#include <nntrainer_error.h>
+#include <nntrainer_log.h>
+#include <node_exporter.h>
+#include <util_func.h>
+#include <weight_layer.h>
+
+#include <iostream>
+
+namespace nntrainer {
+
+static constexpr size_t SINGLE_INOUT_IDX = 0;
+
+WeightLayer::WeightLayer() : LayerImpl() {
+ weight_idx.fill(std::numeric_limits<unsigned>::max());
+}
+
+void WeightLayer::finalize(InitLayerContext &context) {
+ auto &weight_regularizer =
+ std::get<props::WeightRegularizer>(*layer_impl_props);
+ auto &weight_regularizer_constant =
+ std::get<props::WeightRegularizerConstant>(*layer_impl_props);
+ auto &weight_initializer =
+ std::get<props::WeightInitializer>(*layer_impl_props);
+ auto &weight_decay = std::get<props::WeightDecay>(*layer_impl_props);
+
+ const auto &weight_dim = std::get<props::TensorDimension>(weight_props).get();
+
+ std::vector<TensorDim> output_dims(1);
+
+ output_dims[0] = weight_dim;
+
+ output_dims[0].setTensorType(
+ {context.getFormat(), context.getActivationDataType()});
+
+ context.setOutputDimensions(output_dims);
+
+ weight_idx[0] = context.requestWeight(
+ weight_dim, weight_initializer, weight_regularizer,
+ weight_regularizer_constant, weight_decay, "weight", true);
+}
+
+void WeightLayer::exportTo(Exporter &exporter,
+ const ml::train::ExportMethods &method) const {
+ LayerImpl::exportTo(exporter, method);
+ exporter.saveResult(weight_props, method, this);
+}
+
+void WeightLayer::setProperty(const std::vector<std::string> &values) {
+ auto remain_props = loadProperties(values, weight_props);
+ LayerImpl::setProperty(remain_props);
+}
+
+void WeightLayer::forwarding(RunLayerContext &context, bool training) {
+ Tensor &weight = context.getWeight(weight_idx[0]);
+ Tensor &output = context.getOutput(SINGLE_INOUT_IDX);
+ output.copy(weight);
+}
+
+void WeightLayer::calcDerivative(RunLayerContext &context) {
+ throw exception::not_supported(
+ "calcDerivative for weight layer is not supported");
+}
+
+void WeightLayer::calcGradient(RunLayerContext &context) {
+ Tensor &djdw = context.getWeightGrad(weight_idx[0]);
+ const Tensor &derivative_ = context.getIncomingDerivative(SINGLE_INOUT_IDX);
+ djdw.copy(derivative_);
+}
+
+} /* namespace nntrainer */
--- /dev/null
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2024 SeungBaek Hong <sb92.hong@samsung.com>
+ *
+ * @file weight_layer.h
+ * @date 2 August 2024
+ * @brief This is a layer that simply stores a weight tensor without any
+ * operation.
+ * @see https://github.com/nnstreamer/nntrainer
+ * @author SeungBaek Hong <sb92.hong@samsung.com>
+ * @bug No known bugs except for NYI items
+ *
+ */
+
+#ifndef __WEIGHT_LAYER_H__
+#define __WEIGHT_LAYER_H__
+#ifdef __cplusplus
+
+#include <common_properties.h>
+#include <layer_impl.h>
+
+namespace nntrainer {
+
+/**
+ * @class Weight Layer
+ * @brief A layer that simply stores a weight tensor
+ */
+class WeightLayer : public LayerImpl {
+public:
+ /**
+ * @brief Constructor of Weight Layer
+ */
+ WeightLayer();
+
+ /**
+ * @brief Destructor of Weight Layer
+ */
+ ~WeightLayer() = default;
+
+ /**
+ * @brief Move constructor.
+ * @param[in] WeightLayer &&
+ */
+ WeightLayer(WeightLayer &&rhs) noexcept = default;
+
+ /**
+ * @brief Move assignment operator.
+ * @parma[in] rhs WeightLayer to be moved.
+ */
+ WeightLayer &operator=(WeightLayer &&rhs) = default;
+
+ /**
+ * @copydoc Layer::finalize(InitLayerContext &context)
+ */
+ void finalize(InitLayerContext &context) override;
+
+ /**
+ * @copydoc Layer::forwarding(RunLayerContext &context, bool training)
+ */
+ void forwarding(RunLayerContext &context, bool training) override;
+
+ /**
+ * @copydoc Layer::calcDerivative(RunLayerContext &context)
+ */
+ void calcDerivative(RunLayerContext &context) override;
+
+ /**
+ * @copydoc Layer::calcGradient(RunLayerContext &context)
+ */
+ void calcGradient(RunLayerContext &context) override;
+
+ /**
+ * @copydoc Layer::exportTo(Exporter &exporter, ml::train::ExportMethods
+ * method)
+ */
+ void exportTo(Exporter &exporter,
+ const ml::train::ExportMethods &method) const override;
+
+ /**
+ * @copydoc Layer::getType()
+ */
+ const std::string getType() const override { return WeightLayer::type; };
+
+ /**
+ * @copydoc Layer::supportBackwarding()
+ */
+ bool supportBackwarding() const override { return true; }
+
+ /**
+ * @copydoc Layer::setProperty(const PropertyType type, const std::string
+ * &value)
+ */
+ void setProperty(const std::vector<std::string> &values) override;
+
+ inline static const std::string type = "weight";
+
+private:
+ std::tuple<props::TensorDimension> weight_props;
+ std::array<unsigned int, 1> weight_idx; /**< indices of the weights */
+};
+} // namespace nntrainer
+
+#endif /* __cplusplus */
+#endif /* __WEIGHT_LAYER_H__ */
static EnumInfo enum_info_;
};
+/**
+ * @brief abstract class for tensor dimension
+ *
+ */
+class TensorDimProperty : public Property<TensorDim> {
+public:
+ /**
+ * @brief Destroy the TensorDim Property object
+ *
+ */
+ virtual ~TensorDimProperty() = default;
+};
+
/**
* @brief abstract class for positive integer
*
EXPECT_NO_THROW(layer = ml::train::layer::Input());
EXPECT_EQ(layer->getType(), "input");
+ EXPECT_NO_THROW(layer = ml::train::layer::WeightLayer());
+ EXPECT_EQ(layer->getType(), "weight");
+
EXPECT_NO_THROW(layer = ml::train::layer::FullyConnected());
EXPECT_EQ(layer->getType(), "fully_connected");
'unittest_layers.cpp',
'unittest_layers_impl.cpp',
'unittest_layers_input.cpp',
+ 'unittest_layers_weight.cpp',
'unittest_layers_loss.cpp',
'unittest_layers_fully_connected.cpp',
'unittest_layers_batch_normalization.cpp',
--- /dev/null
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2024 SeungBaek Hong <sb92.hong@samsung.com>
+ *
+ * @file unittest_layers_weight.cpp
+ * @date 30 July 2024
+ * @brief Weight Layer Test
+ * @see https://github.com/nnstreamer/nntrainer
+ * @author SeungBaek Hong <sb92.hong@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+#include <tuple>
+
+#include <gtest/gtest.h>
+
+#include <layers_common_tests.h>
+#include <weight_layer.h>
+
+auto semantic_weight = LayerSemanticsParamType(
+ nntrainer::createLayer<nntrainer::WeightLayer>, nntrainer::WeightLayer::type,
+ {"dim=1:1:1"}, LayerCreateSetPropertyOptions::AVAILABLE_FROM_APP_CONTEXT,
+ false, 1);
+
+auto semantic_weight_multi = LayerSemanticsParamType(
+ nntrainer::createLayer<nntrainer::WeightLayer>, nntrainer::WeightLayer::type,
+ {"dim=1:1:1"}, LayerCreateSetPropertyOptions::AVAILABLE_FROM_APP_CONTEXT,
+ false, 2);
+
+GTEST_PARAMETER_TEST(Weight, LayerSemantics,
+ ::testing::Values(semantic_weight, semantic_weight_multi));