[ hgemm ] Remove unnecessary K1 GEMM functions

author skykongkong8 <ss.kong@samsung.com>

Wed, 10 Jul 2024 08:43:39 +0000 (17:43 +0900)

committer Jijoong Moon <jijoong.moon@samsung.com>

Tue, 30 Jul 2024 22:45:30 +0000 (07:45 +0900)
author skykongkong8 <ss.kong@samsung.com>
Wed, 10 Jul 2024 08:43:39 +0000 (17:43 +0900)
committer Jijoong Moon <jijoong.moon@samsung.com>
Tue, 30 Jul 2024 22:45:30 +0000 (07:45 +0900)
diff --git a/nntrainer/tensor/hgemm/hgemm.cpp b/nntrainer/tensor/hgemm/hgemm.cpp

index cadb28bbe9ddbb09f56c143a0739698293cd7905..48db72a0a5a7d03bde2b18d751918f0c50da081a 100644 (file)
--- a/nntrainer/tensor/hgemm/hgemm.cpp
+++ b/nntrainer/tensor/hgemm/hgemm.cpp
@@ -20,6 +20,7 @@
  #include <hgemm_padding.h>
  #include <hgemm_transA.h>
  #include <hgemm_transAB.h>
+#include <limits>
  #include <hgemm_transB.h>
  #include <hgemm_util.h>
  
@@ -156,16 +157,26 @@ void hgemm_K1(const __fp16 *A, const __fp16 *B, __fp16 *C, unsigned int M,
                bool TransA, bool TransB) {
    unsigned int lda = (TransA) ? M : K;
    unsigned int ldb = (TransB) ? K : N;
-
-  return hgemm_K1_noTrans(M, N, K, A, lda, B, ldb, C, N, alpha, beta);
-
-  if (!TransA && TransB) {
-    hgemm_K1_transB(M, N, K, A, lda, B, ldb, C, N, alpha, beta);
-  } else if (TransA && !TransB) {
-    hgemm_K1_transA(M, N, K, A, lda, B, ldb, C, N, alpha, beta);
-  } else if (!TransA && !TransB) {
-    hgemm_K1_noTrans(M, N, K, A, lda, B, ldb, C, N, alpha, beta);
-  } else { // TransA && TransB
-    hgemm_K1_transAB(M, N, K, A, lda, B, ldb, C, N, alpha, beta);
+  unsigned int ldc = N;
+
+  const float eps = std::numeric_limits<float>::epsilon();
+  float16x8_t a_vec;
+  unsigned int N8 = (N >> 3) << 3;
+  for (unsigned int m = 0; m < M; ++m) {
+    a_vec = vmovq_n_f16(alpha * A[m]);
+    if (std::fpclassify(beta) != FP_ZERO) {
+      for (unsigned int n = 0; n < N8; n += 8) {
+        vst1q_f16(&C[m * ldc + n],
+                  vaddq_f16(vmulq_f16(a_vec, vld1q_f16(&B[n])),
+                            vmulq_n_f16(vld1q_f16(&C[m * ldc + n]), beta)));
+      }
+    } else {
+      for (unsigned int n = 0; n < N8; n += 8) {
+        vst1q_f16(&C[m * ldc + n], vmulq_f16(a_vec, vld1q_f16(&B[n])));
+      }
+    }
+    for (unsigned int n = N8; n < N; ++n) {
+      C[m * ldc + n] = alpha * A[m] * B[n] + beta * C[m * ldc + n];
+    }
    }
  }
diff --git a/nntrainer/tensor/hgemm/hgemm_noTrans.cpp b/nntrainer/tensor/hgemm/hgemm_noTrans.cpp

index bff0c308a282daed3b3dd6781686a18e196fe094..b1497329dde98aa47c3cbacef2299a02f28c38db 100644 (file)
--- a/nntrainer/tensor/hgemm/hgemm_noTrans.cpp
+++ b/nntrainer/tensor/hgemm/hgemm_noTrans.cpp
@@ -1183,29 +1183,3 @@ void hgemm_noTrans_fallback(unsigned int M, unsigned int N, unsigned int K,
      }
    }
  }
-
-void hgemm_K1_noTrans(unsigned int M, unsigned int N, unsigned int K,
-                      const __fp16 *A, unsigned int lda, const __fp16 *B,
-                      unsigned int ldb, __fp16 *C, unsigned int ldc,
-                      float alpha, float beta) {
-  const float eps = std::numeric_limits<float>::epsilon();
-  float16x8_t a_vec;
-  unsigned int N8 = (N >> 3) << 3;
-  for (unsigned int m = 0; m < M; ++m) {
-    a_vec = vmovq_n_f16(alpha * A[m]);
-    if (std::fpclassify(beta) != FP_ZERO) {
-      for (unsigned int n = 0; n < N8; n += 8) {
-        vst1q_f16(&C[m * ldc + n],
-                  vaddq_f16(vmulq_f16(a_vec, vld1q_f16(&B[n])),
-                            vmulq_n_f16(vld1q_f16(&C[m * ldc + n]), beta)));
-      }
-    } else {
-      for (unsigned int n = 0; n < N8; n += 8) {
-        vst1q_f16(&C[m * ldc + n], vmulq_f16(a_vec, vld1q_f16(&B[n])));
-      }
-    }
-    for (unsigned int n = N8; n < N; ++n) {
-      C[m * ldc + n] = alpha * A[m] * B[n] + beta * C[m * ldc + n];
-    }
-  }
-}
diff --git a/nntrainer/tensor/hgemm/hgemm_noTrans.h b/nntrainer/tensor/hgemm/hgemm_noTrans.h

index 1270f372438dba5dd3cb0dcc58d1432f5a03f2c9..06b1f1023d9c9f90b1001e548f354d8ed3911d4a 100644 (file)
--- a/nntrainer/tensor/hgemm/hgemm_noTrans.h
+++ b/nntrainer/tensor/hgemm/hgemm_noTrans.h
@@ -251,24 +251,6 @@ void hgemm_noTrans_8x16(unsigned int M, unsigned int N, unsigned int K,
                          unsigned int ldb, float *C, unsigned int ldc,
                          float alpha = 1.F, float beta = 0.F);
  
-/**
- * @brief     hgemm fallback with neon : Y = alpha*op(A)*op(B) + beta*C,
- * @param M length of the row of matrix A
- * @param N length of the col of matrix B
- * @param K length of the col of matrix A
- * @param A input matrix A
- * @param lda length of the col of matrix A
- * @param B input matrix B
- * @param ldb length of the col of matrix B
- * @param C output matrix C
- * @param ldc length of the col of matrix C
- * @param[in] alpha float number
- * @param[in] beta float number
- */
-void hgemm_K1_noTrans(unsigned int M, unsigned int N, unsigned int K,
-                      const __fp16 *A, unsigned int lda, const __fp16 *B,
-                      unsigned int ldb, __fp16 *C, unsigned int ldc,
-                      float alpha = 1.F, float beta = 0.F);
  /**
   * @brief     hgemm fallback with neon : Y = alpha*op(A)*op(B) + beta*C,
   * @param M length of the row of matrix A
diff --git a/nntrainer/tensor/hgemm/hgemm_transA.cpp b/nntrainer/tensor/hgemm/hgemm_transA.cpp

index b510a3496518d25186c52177acb7e7a06803edb5..725847f8ebc3f535709c252e899e21a6135eb719 100644 (file)
--- a/nntrainer/tensor/hgemm/hgemm_transA.cpp
+++ b/nntrainer/tensor/hgemm/hgemm_transA.cpp
@@ -26,16 +26,3 @@ void hgemm_transA(const __fp16 *A, const __fp16 *B, float *C, unsigned int M,
  
    free(A_T);
  }
-
-void hgemm_K1_transA(unsigned int M, unsigned int N, unsigned int K,
-                     const __fp16 *A, unsigned int lda, const __fp16 *B,
-                     unsigned int ldb, __fp16 *C, unsigned int ldc, float alpha,
-                     float beta) {
-  __fp16 *A_T = alignedMalloc(M * K);
-
-  transpose_neon<__fp16>(K, M, A, M, A_T, K);
-
-  hgemm_K1_noTrans(M, N, K, A_T, lda, B, ldb, C, ldc, alpha, beta);
-
-  free(A_T);
-}
diff --git a/nntrainer/tensor/hgemm/hgemm_transA.h b/nntrainer/tensor/hgemm/hgemm_transA.h

index 68272bd240fd7b74ff70a675fcbc2a84eff5117f..8de44c844ec00af07cfc360fd9c6e1fe7df15dff 100644 (file)
--- a/nntrainer/tensor/hgemm/hgemm_transA.h
+++ b/nntrainer/tensor/hgemm/hgemm_transA.h
@@ -25,21 +25,4 @@
   */
  void hgemm_transA(const __fp16 *A, const __fp16 *B, float *C, unsigned int M,
                    unsigned int N, unsigned int K, float alpha, float beta);
-/**
- * @brief     hgemm fallback with neon : Y = alpha*op(A)*op(B) + beta*C,
- * @param M length of the row of matrix A
- * @param N length of the col of matrix B
- * @param K length of the col of matrix A
- * @param A input matrix A
- * @param lda length of the col of matrix A
- * @param B input matrix B
- * @param ldb length of the col of matrix B
- * @param C output matrix C
- * @param ldc length of the col of matrix C
- * @param[in] alpha float number
- * @param[in] beta float number
- */
-void hgemm_K1_transA(unsigned int M, unsigned int N, unsigned int K,
-                     const __fp16 *A, unsigned int lda, const __fp16 *B,
-                     unsigned int ldb, __fp16 *C, unsigned int ldc,
-                     float alpha = 1.F, float beta = 0.F);
+
diff --git a/nntrainer/tensor/hgemm/hgemm_transAB.cpp b/nntrainer/tensor/hgemm/hgemm_transAB.cpp

index 0ab9708b017837e79dc66d58791c8b9af32f87f1..e4f3d32cbf45c1d7c2fadb8a454af84ad39ea3b3 100644 (file)
--- a/nntrainer/tensor/hgemm/hgemm_transAB.cpp
+++ b/nntrainer/tensor/hgemm/hgemm_transAB.cpp
@@ -29,19 +29,3 @@ void hgemm_transAB(const __fp16 *A, const __fp16 *B, float *C, unsigned int M,
    free(A_T);
    free(B_T);
  }
-
-void hgemm_K1_transAB(unsigned int M, unsigned int N, unsigned int K,
-                      const __fp16 *A, unsigned int lda, const __fp16 *B,
-                      unsigned int ldb, __fp16 *C, unsigned int ldc,
-                      float alpha, float beta) {
-  __fp16 *A_T = alignedMalloc(M * K);
-  __fp16 *B_T = alignedMalloc(K * N);
-
-  transpose_neon<__fp16>(K, M, A, M, A_T, K);
-  transpose_neon<__fp16>(N, K, B, K, B_T, N);
-
-  hgemm_K1_noTrans(M, N, K, A_T, lda, B_T, ldb, C, ldc, alpha, beta);
-
-  free(A_T);
-  free(B_T);
-}
diff --git a/nntrainer/tensor/hgemm/hgemm_transAB.h b/nntrainer/tensor/hgemm/hgemm_transAB.h

index 08e131d5e5f4023bf316b0c5fb2400d1d2c198bc..2c228031c898880d5b944e6dfff8d6f72900707f 100644 (file)
--- a/nntrainer/tensor/hgemm/hgemm_transAB.h
+++ b/nntrainer/tensor/hgemm/hgemm_transAB.h
@@ -25,21 +25,3 @@
   */
  void hgemm_transAB(const __fp16 *A, const __fp16 *B, float *C, unsigned int M,
                     unsigned int N, unsigned int K, float alpha, float beta);
-/**
- * @brief     hgemm fallback with neon : Y = alpha*op(A)*op(B) + beta*C,
- * @param M length of the row of matrix A
- * @param N length of the col of matrix B
- * @param K length of the col of matrix A
- * @param A input matrix A
- * @param lda length of the col of matrix A
- * @param B input matrix B
- * @param ldb length of the col of matrix B
- * @param C output matrix C
- * @param ldc length of the col of matrix C
- * @param[in] alpha float number
- * @param[in] beta float number
- */
-void hgemm_K1_transAB(unsigned int M, unsigned int N, unsigned int K,
-                      const __fp16 *A, unsigned int lda, const __fp16 *B,
-                      unsigned int ldb, __fp16 *C, unsigned int ldc,
-                      float alpha = 1.F, float beta = 0.F);
-\ No newline at end of file
diff --git a/nntrainer/tensor/hgemm/hgemm_transB.cpp b/nntrainer/tensor/hgemm/hgemm_transB.cpp

index f224e5f7b4cc55278e41af44c906b4b3535537c4..10d9871ce795e59042926f95955aaeec977ba07b 100644 (file)
--- a/nntrainer/tensor/hgemm/hgemm_transB.cpp
+++ b/nntrainer/tensor/hgemm/hgemm_transB.cpp
@@ -113,16 +113,3 @@ void hgemm_transB_fallback(const __fp16 *A, const __fp16 *B, float *C,
  
    free(B_T);
  }
-
-void hgemm_K1_transB(unsigned int M, unsigned int N, unsigned int K,
-                     const __fp16 *A, unsigned int lda, const __fp16 *B,
-                     unsigned int ldb, __fp16 *C, unsigned int ldc, float alpha,
-                     float beta) {
-  __fp16 *B_T = alignedMalloc(K * N);
-
-  transpose_neon<__fp16>(N, K, B, K, B_T, N);
-
-  hgemm_K1_noTrans(M, N, K, A, lda, B_T, ldb, C, ldc, alpha, beta);
-
-  free(B_T);
-}
diff --git a/nntrainer/tensor/hgemm/hgemm_transB.h b/nntrainer/tensor/hgemm/hgemm_transB.h

index cda6422edeb662b9ee57e60aad40f77e71748fad..0c47d23d7454befa3ed1f4b4e462f4e0aaacc088 100644 (file)
--- a/nntrainer/tensor/hgemm/hgemm_transB.h
+++ b/nntrainer/tensor/hgemm/hgemm_transB.h
@@ -46,21 +46,3 @@ void hgemm_transB_8x16(unsigned int M, unsigned int N, unsigned int K,
                         const __fp16 *A, unsigned int lda, const __fp16 *B,
                         unsigned int ldb, float *C, unsigned int ldc,
                         float alpha = 1.F, float beta = 0.F);
-/**
- * @brief     hgemm fallback with neon : Y = alpha*op(A)*op(B) + beta*C,
- * @param M length of the row of matrix A
- * @param N length of the col of matrix B
- * @param K length of the col of matrix A
- * @param A input matrix A
- * @param lda length of the col of matrix A
- * @param B input matrix B
- * @param ldb length of the col of matrix B
- * @param C output matrix C
- * @param ldc length of the col of matrix C
- * @param[in] alpha float number
- * @param[in] beta float number
- */
-void hgemm_K1_transB(unsigned int M, unsigned int N, unsigned int K,
-                     const __fp16 *A, unsigned int lda, const __fp16 *B,
-                     unsigned int ldb, __fp16 *C, unsigned int ldc,
-                     float alpha = 1.F, float beta = 0.F);
author	skykongkong8 <ss.kong@samsung.com>
	Wed, 10 Jul 2024 08:43:39 +0000 (17:43 +0900)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Tue, 30 Jul 2024 22:45:30 +0000 (07:45 +0900)
nntrainer/tensor/hgemm/hgemm.cpp		patch \| blob \| history
nntrainer/tensor/hgemm/hgemm_noTrans.cpp		patch \| blob \| history
nntrainer/tensor/hgemm/hgemm_noTrans.h		patch \| blob \| history
nntrainer/tensor/hgemm/hgemm_transA.cpp		patch \| blob \| history
nntrainer/tensor/hgemm/hgemm_transA.h		patch \| blob \| history
nntrainer/tensor/hgemm/hgemm_transAB.cpp		patch \| blob \| history
nntrainer/tensor/hgemm/hgemm_transAB.h		patch \| blob \| history
nntrainer/tensor/hgemm/hgemm_transB.cpp		patch \| blob \| history
nntrainer/tensor/hgemm/hgemm_transB.h		patch \| blob \| history