LAYER_MULTI_HEAD_ATTENTION =
ML_TRAIN_LAYER_TYPE_MULTI_HEAD_ATTENTION, /**< Multi Head Attention Layer
type */
+ LAYER_POSITIONAL_ENCODING =
+ ML_TRAIN_LAYER_TYPE_POSITIONAL_ENCODING, /**< Positional Encoding Layer type
+ */
LAYER_PREPROCESS_FLIP =
ML_TRAIN_LAYER_TYPE_PREPROCESS_FLIP, /**< Preprocess flip Layer type */
LAYER_PREPROCESS_TRANSLATE =
return createLayer(LayerType::LAYER_MULTI_HEAD_ATTENTION, properties);
}
+/**
+ * @brief Helper function to create Positional Encoding Layer
+ */
+inline std::unique_ptr<Layer>
+PositionalEncoding(const std::vector<std::string> &properties = {}) {
+ return createLayer(LayerType::LAYER_POSITIONAL_ENCODING, properties);
+}
+
/**
* @brief Helper function to create Permute Layer
*/
#include <plugged_layer.h>
#include <plugged_optimizer.h>
#include <pooling2d_layer.h>
+#include <positional_encoding_layer.h>
#include <preprocess_flip_layer.h>
#include <preprocess_l2norm_layer.h>
#include <preprocess_translate_layer.h>
LayerType::LAYER_MULTI_HEAD_ATTENTION);
ac.registerFactory(nntrainer::createLayer<ReduceMeanLayer>,
ReduceMeanLayer::type, LayerType::LAYER_REDUCE_MEAN);
+ ac.registerFactory(nntrainer::createLayer<PositionalEncodingLayer>,
+ PositionalEncodingLayer::type,
+ LayerType::LAYER_POSITIONAL_ENCODING);
ac.registerFactory(nntrainer::createLayer<IdentityLayer>, IdentityLayer::type,
LayerType::LAYER_IDENTITY);
'layer_context.cpp',
'reshape_layer.cpp',
'reduce_mean_layer.cpp',
+ 'positional_encoding_layer.cpp',
'identity_layer.cpp'
]
--- /dev/null
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2022 Hyeonseok Lee <hs89.lee@samsung.com>
+ *
+ * @file positional_encoding_layer.cpp
+ * @date 16 August 2022
+ * @brief This file contains the positional encoding layer in transformer
+ * @see https://github.com/nnstreamer/nntrainer
+ * https://arxiv.org/abs/1607.06450
+ * @author Hyeonseok Lee <hs89.lee@samsung.com>
+ * @bug No known bugs except for NYI items
+ *
+ */
+
+#include <math.h>
+#include <regex>
+
+#include <positional_encoding_layer.h>
+#include <tensor_dim.h>
+
+namespace nntrainer {
+
+static constexpr size_t SINGLE_INOUT_IDX = 0;
+
+enum PositionalEncodingParams {
+ positional_encoding,
+};
+
+PositionalEncodingLayer::PositionalEncodingLayer() :
+ positional_encoding_props(props::MaxTimestep()) {
+ weight_idx.fill(std::numeric_limits<unsigned>::max());
+}
+
+PositionalEncodingLayer::~PositionalEncodingLayer() {}
+
+void PositionalEncodingLayer::finalize(InitLayerContext &context) {
+ unsigned int max_token_size =
+ std::get<props::MaxTimestep>(positional_encoding_props);
+
+ std::vector<ml::train::TensorDim> input_dims = context.getInputDimensions();
+ context.setOutputDimensions(input_dims);
+
+ unsigned int model_dim = input_dims[SINGLE_INOUT_IDX].width();
+
+ ml::train::TensorDim pe_dim({max_token_size, model_dim});
+ weight_idx[PositionalEncodingParams::positional_encoding] =
+ context.requestTensor(pe_dim, "positional_encoding",
+ nntrainer::Tensor::Initializer::NONE, false,
+ nntrainer::TensorLifespan::MAX_LIFESPAN);
+}
+
+void PositionalEncodingLayer::forwarding(RunLayerContext &context,
+ bool training) {
+ const nntrainer::Tensor &input = context.getInput(SINGLE_INOUT_IDX);
+ nntrainer::Tensor &output = context.getOutput(SINGLE_INOUT_IDX);
+
+ nntrainer::Tensor &pe = context.getTensor(
+ weight_idx[PositionalEncodingParams::positional_encoding]);
+
+ TensorDim input_dim = input.getDim();
+ TensorDim pe_partial_dim({input_dim.height(), input_dim.width()});
+ nntrainer::Tensor pe_partial = pe.getSharedDataTensor(pe_partial_dim, 0);
+
+ if (!isPEcalculated) {
+ calculatePositionalEncoding(context);
+ }
+
+ input.add(pe_partial, output);
+}
+
+void PositionalEncodingLayer::calcDerivative(RunLayerContext &context) {
+ const nntrainer::Tensor &incoming_derivative =
+ context.getIncomingDerivative(SINGLE_INOUT_IDX);
+ nntrainer::Tensor &outgoing_derivative =
+ context.getOutgoingDerivative(SINGLE_INOUT_IDX);
+
+ outgoing_derivative.copyData(incoming_derivative);
+}
+
+void PositionalEncodingLayer::calculatePositionalEncoding(
+ nntrainer::RunLayerContext &context) {
+ unsigned int max_token_size =
+ std::get<props::MaxTimestep>(positional_encoding_props);
+
+ unsigned int model_dim = context.getInput(SINGLE_INOUT_IDX).getDim().width();
+
+ nntrainer::Tensor &pe = context.getTensor(
+ weight_idx[PositionalEncodingParams::positional_encoding]);
+
+ float value;
+ for (unsigned int i = 0; i < max_token_size; ++i) {
+ for (unsigned int j = 0; j < model_dim; ++j) {
+ unsigned int jj = (j >> 1) << 1;
+ value = i / powf(10000.0f, jj / (float)model_dim);
+ if (j & 1) {
+ value = cosf(value);
+ } else {
+ value = sinf(value);
+ }
+ pe.setValue(0, 0, i, j, value);
+ }
+ }
+
+ isPEcalculated = true;
+}
+
+void PositionalEncodingLayer::setProperty(
+ const std::vector<std::string> &values) {
+ auto remain_props = loadProperties(values, positional_encoding_props);
+ NNTR_THROW_IF(!remain_props.empty(), std::invalid_argument)
+ << "[positional encoding layer] Unknown Layer Properties count " +
+ std::to_string(values.size());
+}
+
+void PositionalEncodingLayer::exportTo(
+ Exporter &exporter, const ml::train::ExportMethods &method) const {
+ exporter.saveResult(positional_encoding_props, method, this);
+}
+
+} /* namespace nntrainer */
--- /dev/null
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2022 Hyeonseok Lee <hs89.lee@samsung.com>
+ *
+ * @file positional_encoding_layer.h
+ * @date 16 August 2022
+ * @brief This file contains the positional encoding layer in transformer
+ * @see https://github.com/nnstreamer/nntrainer
+ * https://arxiv.org/abs/1607.06450
+ * @author Hyeonseok Lee <hs89.lee@samsung.com>
+ * @bug No known bugs except for NYI items
+ *
+ */
+
+#ifndef __POSITIONAL_ENCODING_LAYER_H__
+#define __POSITIONAL_ENCODING_LAYER_H__
+#ifdef __cplusplus
+
+#include <base_properties.h>
+#include <layer_context.h>
+#include <layer_devel.h>
+#include <node_exporter.h>
+
+namespace nntrainer {
+
+/**
+ * @class Positional encoding Layer
+ * @brief Implementation of positional encoding layer which is described in
+ * paper "Attention is all you need"
+ */
+class PositionalEncodingLayer : public Layer {
+public:
+ /**
+ * @brief Constructor of PositionalEncodingLayer
+ */
+ PositionalEncodingLayer();
+
+ /**
+ * @brief Destructor of PositionalEncodingLayer
+ */
+ ~PositionalEncodingLayer();
+
+ /**
+ * @brief Move constructor of PositionalEncodingLayer.
+ * @param[in] PositionalEncodingLayer &&
+ */
+ PositionalEncodingLayer(PositionalEncodingLayer &&rhs) noexcept = default;
+
+ /**
+ * @brief Move assignment operator.
+ * @parma[in] rhs PositionalEncodingLayer to be moved.
+ */
+ PositionalEncodingLayer &operator=(PositionalEncodingLayer &&rhs) = default;
+
+ /**
+ * @copydoc Layer::finalize(InitLayerContext &context)
+ */
+ void finalize(InitLayerContext &context) override;
+
+ /**
+ * @copydoc Layer::forwarding(RunLayerContext &context, bool training)
+ */
+ void forwarding(RunLayerContext &context, bool training) override;
+
+ /**
+ * @copydoc Layer::calcDerivative(RunLayerContext &context)
+ */
+ void calcDerivative(RunLayerContext &context) override;
+
+ /**
+ * @copydoc bool supportBackwarding() const
+ */
+ bool supportBackwarding() const override { return true; };
+
+ /**
+ * @copydoc Layer::exportTo(Exporter &exporter, ml::train::ExportMethods
+ * method)
+ */
+ void exportTo(Exporter &exporter,
+ const ml::train::ExportMethods &method) const override;
+
+ /**
+ * @copydoc Layer::setProperty(const std::vector<std::string> &values)
+ */
+ void setProperty(const std::vector<std::string> &values) override;
+
+ /**
+ * @copydoc Layer::getType()
+ */
+ const std::string getType() const override {
+ return PositionalEncodingLayer::type;
+ };
+
+ inline static const std::string type = "positional_encoding";
+
+private:
+ std::tuple<props::MaxTimestep> positional_encoding_props;
+ std::array<unsigned int, 1> weight_idx;
+ bool isPEcalculated; // bool value to check positional encoding is already
+ // calculated
+
+ /**
+ * @brief calculate positional encoding
+ * @param context Context of the layer
+ */
+ void calculatePositionalEncoding(RunLayerContext &context);
+};
+
+} // namespace nntrainer
+
+#endif /* __cplusplus */
+#endif /* __MULTI_HEAD_ATTENTION_LAYER_H__ */