[layer] fix for mol attention layer

author Parichay Kapoor <pk.kapoor@samsung.com>

Fri, 3 Dec 2021 12:09:51 +0000 (21:09 +0900)

committer Jijoong Moon <jijoong.moon@samsung.com>

Fri, 3 Dec 2021 12:51:57 +0000 (21:51 +0900)
author Parichay Kapoor <pk.kapoor@samsung.com>
Fri, 3 Dec 2021 12:09:51 +0000 (21:09 +0900)
committer Jijoong Moon <jijoong.moon@samsung.com>
Fri, 3 Dec 2021 12:51:57 +0000 (21:51 +0900)
diff --git a/nntrainer/layers/mol_attention_layer.cpp b/nntrainer/layers/mol_attention_layer.cpp

index a857a38540bab71969db4a493a399baf7bddcac5..f90fe7dcce705bba49af6fc82a69e6135d03831c 100644 (file)
--- a/nntrainer/layers/mol_attention_layer.cpp
+++ b/nntrainer/layers/mol_attention_layer.cpp
@@ -43,7 +43,8 @@ enum AttentionParams {
    prob_left,
    prob_right,
    u_neg_div,
-  u_pos_div
+  u_pos_div,
+  dstate
  };
  
  void MoLAttentionLayer::finalize(InitLayerContext &context) {
@@ -137,6 +138,9 @@ void MoLAttentionLayer::finalize(InitLayerContext &context) {
    wt_idx[AttentionParams::u_pos_div] =
      context.requestTensor(prob_dim, "u_pos_div", Tensor::Initializer::NONE,
                            false, TensorLifespan::ITERATION_LIFESPAN);
+  wt_idx[AttentionParams::dstate] =
+    context.requestTensor(state_dim, "dstate", Tensor::Initializer::NONE, false,
+                          TensorLifespan::BACKWARD_FUNC_LIFESPAN);
  
    context.setOutputDimensions({query_dim, state_dim});
  }
@@ -331,6 +335,7 @@ void MoLAttentionLayer::calcDerivative(RunLayerContext &context) {
      context.getOutgoingDerivative(wt_idx[AttentionParams::value]);
    Tensor &dstate =
      context.getOutgoingDerivative(wt_idx[AttentionParams::state]);
+  Tensor &dstate_local = context.getTensor(wt_idx[AttentionParams::dstate]);
  
    Tensor &derivative = context.getIncomingDerivative(SINGLE_INOUT_IDX);
  
@@ -346,6 +351,8 @@ void MoLAttentionLayer::calcDerivative(RunLayerContext &context) {
  
    if (!helper_exec)
      calcDerivativeHelper(context, dstate);
+  else
+    dstate.copyData(dstate_local);
  
    Tensor dfc_tanh = Tensor(fc_out.getDim());
    dfc_tanh.dot_deriv_wrt_1(fc_proj_w, dfc_proj_out);
@@ -357,8 +364,7 @@ void MoLAttentionLayer::calcDerivative(RunLayerContext &context) {
  
  void MoLAttentionLayer::calcGradient(RunLayerContext &context) {
    Tensor &query = context.getInput(wt_idx[AttentionParams::query]);
-  Tensor &dstate =
-    context.getOutgoingDerivative(wt_idx[AttentionParams::state]);
+  Tensor &dstate = context.getTensor(wt_idx[AttentionParams::dstate]);
  
    Tensor &fc_proj_w = context.getWeight(wt_idx[AttentionParams::fc_proj_w]);
    Tensor &dfc_w = context.getWeightGrad(wt_idx[AttentionParams::fc_w]);
diff --git a/nntrainer/layers/mol_attention_layer.h b/nntrainer/layers/mol_attention_layer.h

index e41e5f471b0eda73cb378228b8ba4a57485701dc..141c7c7f3ac8e03366a065aa6122a95421b7404b 100644 (file)
--- a/nntrainer/layers/mol_attention_layer.h
+++ b/nntrainer/layers/mol_attention_layer.h
@@ -106,7 +106,7 @@ private:
    ActiFunc softmax; /** softmax activation operation */
    ActiFunc tanh;    /** softmax activation operation */
    ActiFunc sigmoid; /** softmax activation operation */
-  std::array<unsigned int, 15>
+  std::array<unsigned int, 16>
      wt_idx; /**< indices of the weights and tensors */
  
    /**
author	Parichay Kapoor <pk.kapoor@samsung.com>
	Fri, 3 Dec 2021 12:09:51 +0000 (21:09 +0900)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Fri, 3 Dec 2021 12:51:57 +0000 (21:51 +0900)
nntrainer/layers/mol_attention_layer.cpp		patch \| blob \| history
nntrainer/layers/mol_attention_layer.h		patch \| blob \| history