[positional encoding] implement positional encoding layer
authorhyeonseok lee <hs89.lee@samsung.com>
Thu, 25 Aug 2022 14:03:54 +0000 (23:03 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Wed, 7 Sep 2022 13:23:09 +0000 (22:23 +0900)
 - Positional encoding just needed to be calculated only once
   so make its lifespan as max lifespan

Signed-off-by: hyeonseok lee <hs89.lee@samsung.com>
api/ccapi/include/layer.h
nntrainer/app_context.cpp
nntrainer/layers/meson.build
nntrainer/layers/positional_encoding_layer.cpp [new file with mode: 0644]
nntrainer/layers/positional_encoding_layer.h [new file with mode: 0644]

index 332e566693dc635ef46869ed22967dd23c23aa3f..2f41e99de345b11e8a26356629ed988da96b06fa 100644 (file)
@@ -66,6 +66,9 @@ enum LayerType {
   LAYER_MULTI_HEAD_ATTENTION =
     ML_TRAIN_LAYER_TYPE_MULTI_HEAD_ATTENTION, /**< Multi Head Attention Layer
                                                  type */
+  LAYER_POSITIONAL_ENCODING =
+    ML_TRAIN_LAYER_TYPE_POSITIONAL_ENCODING, /**< Positional Encoding Layer type
+                                              */
   LAYER_PREPROCESS_FLIP =
     ML_TRAIN_LAYER_TYPE_PREPROCESS_FLIP, /**< Preprocess flip Layer type */
   LAYER_PREPROCESS_TRANSLATE =
@@ -436,6 +439,14 @@ MultiHeadAttention(const std::vector<std::string> &properties = {}) {
   return createLayer(LayerType::LAYER_MULTI_HEAD_ATTENTION, properties);
 }
 
+/**
+ * @brief Helper function to create Positional Encoding Layer
+ */
+inline std::unique_ptr<Layer>
+PositionalEncoding(const std::vector<std::string> &properties = {}) {
+  return createLayer(LayerType::LAYER_POSITIONAL_ENCODING, properties);
+}
+
 /**
  * @brief Helper function to create Permute Layer
  */
index 1c3fb8d46c138915ac8d71ef68dfd1c96e4d2669..02cf3db970a66b09d9f28aa8250c0f561914de4b 100644 (file)
@@ -63,6 +63,7 @@
 #include <plugged_layer.h>
 #include <plugged_optimizer.h>
 #include <pooling2d_layer.h>
+#include <positional_encoding_layer.h>
 #include <preprocess_flip_layer.h>
 #include <preprocess_l2norm_layer.h>
 #include <preprocess_translate_layer.h>
@@ -290,6 +291,9 @@ static void add_default_object(AppContext &ac) {
                      LayerType::LAYER_MULTI_HEAD_ATTENTION);
   ac.registerFactory(nntrainer::createLayer<ReduceMeanLayer>,
                      ReduceMeanLayer::type, LayerType::LAYER_REDUCE_MEAN);
+  ac.registerFactory(nntrainer::createLayer<PositionalEncodingLayer>,
+                     PositionalEncodingLayer::type,
+                     LayerType::LAYER_POSITIONAL_ENCODING);
   ac.registerFactory(nntrainer::createLayer<IdentityLayer>, IdentityLayer::type,
                      LayerType::LAYER_IDENTITY);
 
index f98fc15af2a10129cf4d489f2b25a0d19210098c..400a631acecf3ac8e105130e79562963a7a0cdd9 100644 (file)
@@ -41,6 +41,7 @@ layer_sources = [
   'layer_context.cpp',
   'reshape_layer.cpp',
   'reduce_mean_layer.cpp',
+  'positional_encoding_layer.cpp',
   'identity_layer.cpp'
 ]
 
diff --git a/nntrainer/layers/positional_encoding_layer.cpp b/nntrainer/layers/positional_encoding_layer.cpp
new file mode 100644 (file)
index 0000000..724e983
--- /dev/null
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2022 Hyeonseok Lee <hs89.lee@samsung.com>
+ *
+ * @file   positional_encoding_layer.cpp
+ * @date   16 August 2022
+ * @brief  This file contains the positional encoding layer in transformer
+ * @see    https://github.com/nnstreamer/nntrainer
+ *         https://arxiv.org/abs/1607.06450
+ * @author Hyeonseok Lee <hs89.lee@samsung.com>
+ * @bug    No known bugs except for NYI items
+ *
+ */
+
+#include <math.h>
+#include <regex>
+
+#include <positional_encoding_layer.h>
+#include <tensor_dim.h>
+
+namespace nntrainer {
+
+static constexpr size_t SINGLE_INOUT_IDX = 0;
+
+enum PositionalEncodingParams {
+  positional_encoding,
+};
+
+PositionalEncodingLayer::PositionalEncodingLayer() :
+  positional_encoding_props(props::MaxTimestep()) {
+  weight_idx.fill(std::numeric_limits<unsigned>::max());
+}
+
+PositionalEncodingLayer::~PositionalEncodingLayer() {}
+
+void PositionalEncodingLayer::finalize(InitLayerContext &context) {
+  unsigned int max_token_size =
+    std::get<props::MaxTimestep>(positional_encoding_props);
+
+  std::vector<ml::train::TensorDim> input_dims = context.getInputDimensions();
+  context.setOutputDimensions(input_dims);
+
+  unsigned int model_dim = input_dims[SINGLE_INOUT_IDX].width();
+
+  ml::train::TensorDim pe_dim({max_token_size, model_dim});
+  weight_idx[PositionalEncodingParams::positional_encoding] =
+    context.requestTensor(pe_dim, "positional_encoding",
+                          nntrainer::Tensor::Initializer::NONE, false,
+                          nntrainer::TensorLifespan::MAX_LIFESPAN);
+}
+
+void PositionalEncodingLayer::forwarding(RunLayerContext &context,
+                                         bool training) {
+  const nntrainer::Tensor &input = context.getInput(SINGLE_INOUT_IDX);
+  nntrainer::Tensor &output = context.getOutput(SINGLE_INOUT_IDX);
+
+  nntrainer::Tensor &pe = context.getTensor(
+    weight_idx[PositionalEncodingParams::positional_encoding]);
+
+  TensorDim input_dim = input.getDim();
+  TensorDim pe_partial_dim({input_dim.height(), input_dim.width()});
+  nntrainer::Tensor pe_partial = pe.getSharedDataTensor(pe_partial_dim, 0);
+
+  if (!isPEcalculated) {
+    calculatePositionalEncoding(context);
+  }
+
+  input.add(pe_partial, output);
+}
+
+void PositionalEncodingLayer::calcDerivative(RunLayerContext &context) {
+  const nntrainer::Tensor &incoming_derivative =
+    context.getIncomingDerivative(SINGLE_INOUT_IDX);
+  nntrainer::Tensor &outgoing_derivative =
+    context.getOutgoingDerivative(SINGLE_INOUT_IDX);
+
+  outgoing_derivative.copyData(incoming_derivative);
+}
+
+void PositionalEncodingLayer::calculatePositionalEncoding(
+  nntrainer::RunLayerContext &context) {
+  unsigned int max_token_size =
+    std::get<props::MaxTimestep>(positional_encoding_props);
+
+  unsigned int model_dim = context.getInput(SINGLE_INOUT_IDX).getDim().width();
+
+  nntrainer::Tensor &pe = context.getTensor(
+    weight_idx[PositionalEncodingParams::positional_encoding]);
+
+  float value;
+  for (unsigned int i = 0; i < max_token_size; ++i) {
+    for (unsigned int j = 0; j < model_dim; ++j) {
+      unsigned int jj = (j >> 1) << 1;
+      value = i / powf(10000.0f, jj / (float)model_dim);
+      if (j & 1) {
+        value = cosf(value);
+      } else {
+        value = sinf(value);
+      }
+      pe.setValue(0, 0, i, j, value);
+    }
+  }
+
+  isPEcalculated = true;
+}
+
+void PositionalEncodingLayer::setProperty(
+  const std::vector<std::string> &values) {
+  auto remain_props = loadProperties(values, positional_encoding_props);
+  NNTR_THROW_IF(!remain_props.empty(), std::invalid_argument)
+    << "[positional encoding layer] Unknown Layer Properties count " +
+         std::to_string(values.size());
+}
+
+void PositionalEncodingLayer::exportTo(
+  Exporter &exporter, const ml::train::ExportMethods &method) const {
+  exporter.saveResult(positional_encoding_props, method, this);
+}
+
+} /* namespace nntrainer */
diff --git a/nntrainer/layers/positional_encoding_layer.h b/nntrainer/layers/positional_encoding_layer.h
new file mode 100644 (file)
index 0000000..1c58c83
--- /dev/null
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2022 Hyeonseok Lee <hs89.lee@samsung.com>
+ *
+ * @file   positional_encoding_layer.h
+ * @date   16 August 2022
+ * @brief  This file contains the positional encoding layer in transformer
+ * @see    https://github.com/nnstreamer/nntrainer
+ *         https://arxiv.org/abs/1607.06450
+ * @author Hyeonseok Lee <hs89.lee@samsung.com>
+ * @bug    No known bugs except for NYI items
+ *
+ */
+
+#ifndef __POSITIONAL_ENCODING_LAYER_H__
+#define __POSITIONAL_ENCODING_LAYER_H__
+#ifdef __cplusplus
+
+#include <base_properties.h>
+#include <layer_context.h>
+#include <layer_devel.h>
+#include <node_exporter.h>
+
+namespace nntrainer {
+
+/**
+ * @class   Positional encoding Layer
+ * @brief   Implementation of positional encoding layer which is described in
+ * paper "Attention is all you need"
+ */
+class PositionalEncodingLayer : public Layer {
+public:
+  /**
+   * @brief     Constructor of PositionalEncodingLayer
+   */
+  PositionalEncodingLayer();
+
+  /**
+   * @brief     Destructor of PositionalEncodingLayer
+   */
+  ~PositionalEncodingLayer();
+
+  /**
+   *  @brief  Move constructor of PositionalEncodingLayer.
+   *  @param[in] PositionalEncodingLayer &&
+   */
+  PositionalEncodingLayer(PositionalEncodingLayer &&rhs) noexcept = default;
+
+  /**
+   * @brief  Move assignment operator.
+   * @parma[in] rhs PositionalEncodingLayer to be moved.
+   */
+  PositionalEncodingLayer &operator=(PositionalEncodingLayer &&rhs) = default;
+
+  /**
+   * @copydoc Layer::finalize(InitLayerContext &context)
+   */
+  void finalize(InitLayerContext &context) override;
+
+  /**
+   * @copydoc Layer::forwarding(RunLayerContext &context, bool training)
+   */
+  void forwarding(RunLayerContext &context, bool training) override;
+
+  /**
+   * @copydoc Layer::calcDerivative(RunLayerContext &context)
+   */
+  void calcDerivative(RunLayerContext &context) override;
+
+  /**
+   * @copydoc bool supportBackwarding() const
+   */
+  bool supportBackwarding() const override { return true; };
+
+  /**
+   * @copydoc Layer::exportTo(Exporter &exporter, ml::train::ExportMethods
+   * method)
+   */
+  void exportTo(Exporter &exporter,
+                const ml::train::ExportMethods &method) const override;
+
+  /**
+   * @copydoc Layer::setProperty(const std::vector<std::string> &values)
+   */
+  void setProperty(const std::vector<std::string> &values) override;
+
+  /**
+   * @copydoc Layer::getType()
+   */
+  const std::string getType() const override {
+    return PositionalEncodingLayer::type;
+  };
+
+  inline static const std::string type = "positional_encoding";
+
+private:
+  std::tuple<props::MaxTimestep> positional_encoding_props;
+  std::array<unsigned int, 1> weight_idx;
+  bool isPEcalculated; // bool value to check positional encoding is already
+                       // calculated
+
+  /**
+   * @brief calculate positional encoding
+   * @param context Context of the layer
+   */
+  void calculatePositionalEncoding(RunLayerContext &context);
+};
+
+} // namespace nntrainer
+
+#endif /* __cplusplus */
+#endif /* __MULTI_HEAD_ATTENTION_LAYER_H__ */