[gtest] Verify attention layer with fp16

author skykongkong8 <ss.kong@samsung.com>

Thu, 10 Aug 2023 00:31:38 +0000 (09:31 +0900)

committer Jijoong Moon <jijoong.moon@samsung.com>

Mon, 21 Aug 2023 06:29:23 +0000 (15:29 +0900)
author skykongkong8 <ss.kong@samsung.com>
Thu, 10 Aug 2023 00:31:38 +0000 (09:31 +0900)
committer Jijoong Moon <jijoong.moon@samsung.com>
Mon, 21 Aug 2023 06:29:23 +0000 (15:29 +0900)
diff --git a/nntrainer/layers/attention_layer.cpp b/nntrainer/layers/attention_layer.cpp

index a2767cdbb3dcc2267a53527cdf7a7bfc389e6438..014393d732500056c6b98425210f2ca17896167f 100644 (file)
--- a/nntrainer/layers/attention_layer.cpp
+++ b/nntrainer/layers/attention_layer.cpp
@@ -18,7 +18,8 @@
  
  namespace nntrainer {
  
-AttentionLayer::AttentionLayer() : sm(ActivationType::ACT_SOFTMAX) {
+AttentionLayer::AttentionLayer() {
+
    wt_idx.fill(std::numeric_limits<unsigned>::max());
  }
  
@@ -66,6 +67,13 @@ void AttentionLayer::finalize(InitLayerContext &context) {
                            false, TensorLifespan::ITERATION_LIFESPAN);
  
    context.setOutputDimensions({query_dim});
+
+  auto data_type = context.getActivationDataType();
+  if (data_type == ml::train::TensorDim::DataType::FP32) {
+    sm.setActiFunc<float>(ActivationType::ACT_SOFTMAX);
+  } else if (data_type == ml::train::TensorDim::DataType::FP16) {
+    sm.setActiFunc<_FP16>(ActivationType::ACT_SOFTMAX);
+  }
  }
  
  void AttentionLayer::forwarding(RunLayerContext &context, bool training) {
@@ -97,7 +105,8 @@ void AttentionLayer::calcDerivative(RunLayerContext &context) {
    Tensor &weights = context.getTensor(wt_idx[AttentionParams::weights]);
  
    Tensor dweight = Tensor(
-    TensorDim({derivative.batch(), 1, derivative.height(), value.height()}));
+    TensorDim({derivative.batch(), 1, derivative.height(), value.height()},
+              weights.getTensorType()));
  
    /** derivative for dot 2 */
    dweight.dot_batched_deriv_wrt_1(value, derivative);
diff --git a/test/unittest/layers/layers_golden_tests.cpp b/test/unittest/layers/layers_golden_tests.cpp

index bf8e6651bc34b1e5dc8b3f8568615813c5d50571..ccfc42001ad5abca84a6f393eef42ee67b13d67d 100644 (file)
--- a/test/unittest/layers/layers_golden_tests.cpp
+++ b/test/unittest/layers/layers_golden_tests.cpp
@@ -220,7 +220,7 @@ static void compareRunContext(RunLayerContext &rc, std::ifstream &file,
                                   (d1 != 0 && float_eq(d2, 0)),
                                 1);
        }
-      const float epsilon = 1e-4;
+      const float epsilon = 1e-3;
  
        auto tensor = t1.clone();
        auto answer = t2.clone();
@@ -230,7 +230,7 @@ static void compareRunContext(RunLayerContext &rc, std::ifstream &file,
        auto mean_squared_error = mse<_FP16>(
          answer.getData<_FP16>(), answer.getData<_FP16>(), tensor.size());
  
-      EXPECT_IN_RANGE(cos_sim, 0.99, 1);
+      EXPECT_IN_RANGE(cos_sim, 1 - epsilon, 1 + epsilon);
        EXPECT_IN_RANGE(mean_squared_error, 0, epsilon);
  
        return (weak_match == total);
diff --git a/test/unittest/layers/unittest_layers_attention.cpp b/test/unittest/layers/unittest_layers_attention.cpp

index c2750d6fb968a51f0009633bf5160f865b507dca..2e89494b1765cf7380e0a94657f7abcb6ce64c76 100644 (file)
--- a/test/unittest/layers/unittest_layers_attention.cpp
+++ b/test/unittest/layers/unittest_layers_attention.cpp
@@ -43,3 +43,21 @@ GTEST_PARAMETER_TEST(Attention, LayerGoldenTest,
                       ::testing::Values(attention_shared_kv,
                                         attention_shared_kv_batched,
                                         attention_batched));
+
+auto attention_shared_kv_fp16fp16 = LayerGoldenTestParamType(
+  nntrainer::createLayer<nntrainer::AttentionLayer>, {}, "1:1:5:7,1:1:3:7",
+  "attention_shared_kv_fp16fp16.nnlayergolden",
+  LayerGoldenTestParamOptions::DEFAULT, "nchw", "fp16", "fp16");
+
+auto attention_shared_kv_batched_fp16fp16 = LayerGoldenTestParamType(
+  nntrainer::createLayer<nntrainer::AttentionLayer>, {}, "2:1:5:7,2:1:3:7",
+  "attention_shared_kv_batched_fp16fp16.nnlayergolden",
+  LayerGoldenTestParamOptions::DEFAULT, "nchw", "fp16", "fp16");
+
+auto attention_batched_fp16fp16 = LayerGoldenTestParamType(
+  nntrainer::createLayer<nntrainer::AttentionLayer>, {},
+  "2:1:5:7,2:1:3:7,2:1:3:7", "attention_batched_fp16fp16.nnlayergolden",
+  LayerGoldenTestParamOptions::DEFAULT, "nchw", "fp16", "fp16");
+
+GTEST_PARAMETER_TEST(Attention16, LayerGoldenTest,
+                     ::testing::Values(attention_shared_kv_fp16fp16));
author	skykongkong8 <ss.kong@samsung.com>
	Thu, 10 Aug 2023 00:31:38 +0000 (09:31 +0900)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Mon, 21 Aug 2023 06:29:23 +0000 (15:29 +0900)
nntrainer/layers/attention_layer.cpp		patch \| blob \| history
test/unittest/layers/layers_golden_tests.cpp		patch \| blob \| history
test/unittest/layers/unittest_layers_attention.cpp		patch \| blob \| history