[blas/neon] Added unit test for NEON fp16 SGEMM

author Debadri Samaddar <s.debadri@samsung.com>

Wed, 30 Aug 2023 14:01:27 +0000 (19:31 +0530)

committer Jijoong Moon <jijoong.moon@samsung.com>

Thu, 7 Sep 2023 00:03:35 +0000 (09:03 +0900)
author Debadri Samaddar <s.debadri@samsung.com>
Wed, 30 Aug 2023 14:01:27 +0000 (19:31 +0530)
committer Jijoong Moon <jijoong.moon@samsung.com>
Thu, 7 Sep 2023 00:03:35 +0000 (09:03 +0900)
diff --git a/test/unittest/unittest_nntrainer_tensor_neon_fp16.cpp b/test/unittest/unittest_nntrainer_tensor_neon_fp16.cpp

index 973c1f83a5d8d68a50cebc6344bc9e3fd7dff1c5..743acb22f21f0f72eb5d927cedc5450cc06a5955 100644 (file)
--- a/test/unittest/unittest_nntrainer_tensor_neon_fp16.cpp
+++ b/test/unittest/unittest_nntrainer_tensor_neon_fp16.cpp
@@ -368,6 +368,61 @@ TEST(nntrainer_Tensor, sum_sgemv) {
    EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1);
  }
  
+TEST(nntrainer_Tensor, dot_sgemm) {
+  int batch = 1;
+  int channel = 1;
+  int height = 8;
+  int width = 16;
+
+  int height_t = 8;
+  int width_t = 16;
+
+  bool transA = true;
+  bool transB = false;
+
+  nntrainer::TensorDim::TensorType t_type_nchw_fp16 = {
+    nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16};
+
+  nntrainer::TensorDim::TensorType t_type_nchw_fp32 = {
+    nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP32};
+
+  nntrainer::Tensor input(batch, channel, height, width, t_type_nchw_fp16);
+  nntrainer::Tensor m(batch, channel, height_t, width_t, t_type_nchw_fp16);
+
+  nntrainer::Tensor input_fp32(batch, channel, height, width, t_type_nchw_fp32);
+  nntrainer::Tensor m_fp32(batch, channel, height_t, width_t, t_type_nchw_fp32);
+
+  const float alpha = 1e-5;
+
+  GEN_TEST_INPUT(input, i * (batch * height * channel) * alpha +
+                          j * (batch * height) * alpha + k * (width)*alpha + l +
+                          1);
+  GEN_TEST_INPUT(m, i * (batch * height_t * channel) * alpha +
+                      j * (batch * height_t) * alpha + k * (width_t)*alpha + l +
+                      1);
+
+  GEN_TEST_INPUT(input_fp32, i * (batch * height * channel) * alpha +
+                               j * (batch * height) * alpha +
+                               k * (width)*alpha + l + 1);
+  GEN_TEST_INPUT(m_fp32, i * (batch * height_t * channel) * alpha +
+                           j * (batch * height_t) * alpha +
+                           k * (width_t)*alpha + l + 1);
+
+  nntrainer::Tensor result0 = input.dot(m, transA, transB);
+  nntrainer::Tensor result0_fp32 = input_fp32.dot(m_fp32, transA, transB);
+
+  float mseErrorNeon = mse<__fp16>(
+    result0.getData<__fp16>(), result0_fp32.getData<float>(), result0.size());
+
+  double cosSimNeon = cosine_similarity<__fp16>(
+    result0.getData<__fp16>(), result0_fp32.getData<float>(), result0.size());
+
+  const float epsilon = 1e-2;
+
+  EXPECT_IN_RANGE(mseErrorNeon, 0, epsilon);
+  EXPECT_IN_RANGE((float)cosSimNeon, 0.99, 1);
+}
+
  GTEST_API_ int main(int argc, char **argv) {
    int result = -1;
author	Debadri Samaddar <s.debadri@samsung.com>
	Wed, 30 Aug 2023 14:01:27 +0000 (19:31 +0530)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Thu, 7 Sep 2023 00:03:35 +0000 (09:03 +0900)