*
*/
+/**
+ * @brief Padding function for matrix A in HGEMM
+ *
+ * @param A src matrix to pad
+ * @param Ap dst matrix after padding
+ * @param M row length of matrix A
+ * @param K col length of matrix A
+ * @param M8 Least multiple of 8 that is bigger than or equal to M
+ * @param K8 Least multiple of 8 that is bigger than or equal to K
+ * @param transA Whether the matrix A is transposed or not
+ */
void hgemm_padding_A(const __fp16 *A, __fp16 *Ap, unsigned int M,
unsigned int K, unsigned int M8, unsigned int K8,
bool transA);
+
+/**
+ * @brief Padding function for non-transposed matrix A in HGEMM
+ *
+ * @param A src matrix to pad
+ * @param Ap dst matrix after padding
+ * @param M row length of matrix A
+ * @param K col length of matrix A
+ * @param M8 Least multiple of 8 that is bigger than or equal to M
+ * @param K8 Least multiple of 8 that is bigger than or equal to K
+ */
void hgemm_padding_A_noTrans(const __fp16 *A, __fp16 *Ap, unsigned int M,
unsigned int K, unsigned int M8, unsigned int K8);
-void hgemm_padding_A_noTrans_wrt_M(const __fp16 *A, __fp16 *Ap,
- unsigned int M, unsigned int K,
- unsigned int M8, unsigned int K8);
-void hgemm_padding_A_noTrans_wrt_K(const __fp16 *A, __fp16 *Ap,
- unsigned int M, unsigned int K,
- unsigned int M8, unsigned int K8);
-void hgemm_padding_A_noTrans_wrt_MK(const __fp16 *A, __fp16 *Ap,
- unsigned int M, unsigned int K,
- unsigned int M8, unsigned int K8);
+
+/**
+ * @brief Padding function for non-transposed matrix A in HGEMM w.r.t. M
+ * direction
+ *
+ * @param A src matrix to pad
+ * @param Ap dst matrix after padding
+ * @param M row length of matrix A
+ * @param K col length of matrix A
+ * @param M8 Least multiple of 8 that is bigger than or equal to M
+ * @param K8 Least multiple of 8 that is bigger than or equal to K
+ */
+void hgemm_padding_A_noTrans_wrt_M(const __fp16 *A, __fp16 *Ap, unsigned int M,
+ unsigned int K, unsigned int M8,
+ unsigned int K8);
+/**
+ * @brief Padding function for non-transposed matrix A in HGEMM w.r.t. K
+ * direction
+ *
+ * @param A src matrix to pad
+ * @param Ap dst matrix after padding
+ * @param M row length of matrix A
+ * @param K col length of matrix A
+ * @param M8 Least multiple of 8 that is bigger than or equal to M
+ * @param K8 Least multiple of 8 that is bigger than or equal to K
+ */
+void hgemm_padding_A_noTrans_wrt_K(const __fp16 *A, __fp16 *Ap, unsigned int M,
+ unsigned int K, unsigned int M8,
+ unsigned int K8);
+
+/**
+ * @brief Padding function for non-transposed matrix A in HGEMM w.r.t. M and K
+ * direction
+ *
+ * @param A src matrix to pad
+ * @param Ap dst matrix after padding
+ * @param M row length of matrix A
+ * @param K col length of matrix A
+ * @param M8 Least multiple of 8 that is bigger than or equal to M
+ * @param K8 Least multiple of 8 that is bigger than or equal to K
+ */
+void hgemm_padding_A_noTrans_wrt_MK(const __fp16 *A, __fp16 *Ap, unsigned int M,
+ unsigned int K, unsigned int M8,
+ unsigned int K8);
+/**
+ * @brief Padding function for transposed matrix A in HGEMM
+ *
+ * @param A src matrix to pad
+ * @param Ap dst matrix after padding
+ * @param M row length of matrix A
+ * @param K col length of matrix A
+ * @param M8 Least multiple of 8 that is bigger than or equal to M
+ * @param K8 Least multiple of 8 that is bigger than or equal to K
+ */
void hgemm_padding_A_Trans(const __fp16 *A, __fp16 *Ap, unsigned int M,
unsigned int K, unsigned int M8, unsigned int K8);
-void hgemm_padding_A_Trans_wrt_M(const __fp16 *A, __fp16 *Ap,
- unsigned int M, unsigned int K,
- unsigned int M8, unsigned int K8);
-void hgemm_padding_A_Trans_wrt_K(const __fp16 *A, __fp16 *Ap,
- unsigned int M, unsigned int K,
- unsigned int M8, unsigned int K8);
-void hgemm_padding_A_Trans_wrt_MK(const __fp16 *A, __fp16 *Ap,
- unsigned int M, unsigned int K,
- unsigned int M8, unsigned int K8);
+/**
+ * @brief Padding function for transposed matrix A in HGEMM w.r.t. M direction
+ *
+ * @param A src matrix to pad
+ * @param Ap dst matrix after padding
+ * @param M row length of matrix A
+ * @param K col length of matrix A
+ * @param M8 Least multiple of 8 that is bigger than or equal to M
+ * @param K8 Least multiple of 8 that is bigger than or equal to K
+ */
+void hgemm_padding_A_Trans_wrt_M(const __fp16 *A, __fp16 *Ap, unsigned int M,
+ unsigned int K, unsigned int M8,
+ unsigned int K8);
+/**
+ * @brief Padding function for transposed matrix A in HGEMM w.r.t. K direction
+ *
+ * @param A src matrix to pad
+ * @param Ap dst matrix after padding
+ * @param M row length of matrix A
+ * @param K col length of matrix A
+ * @param M8 Least multiple of 8 that is bigger than or equal to M
+ * @param K8 Least multiple of 8 that is bigger than or equal to K
+ */
+void hgemm_padding_A_Trans_wrt_K(const __fp16 *A, __fp16 *Ap, unsigned int M,
+ unsigned int K, unsigned int M8,
+ unsigned int K8);
+/**
+ * @brief Padding function for transposed matrix A in HGEMM w.r.t. M and K
+ * direction
+ *
+ * @param A src matrix to pad
+ * @param Ap dst matrix after padding
+ * @param M row length of matrix A
+ * @param K col length of matrix A
+ * @param M8 Least multiple of 8 that is bigger than or equal to M
+ * @param K8 Least multiple of 8 that is bigger than or equal to K
+ */
+void hgemm_padding_A_Trans_wrt_MK(const __fp16 *A, __fp16 *Ap, unsigned int M,
+ unsigned int K, unsigned int M8,
+ unsigned int K8);
void hgemm_padding_B_noTrans_wrt_N(const __fp16 *B, __fp16 *Bp, unsigned int K,
unsigned int N, unsigned int K8,
unsigned int N16) {
- std::cerr << "NYI : hgemm_padding_B_noTrans_wrt_N\n";
+ std::cerr << "Error : hgemm_padding_B_noTrans_wrt_N NYI!\n";
}
void hgemm_padding_B_noTrans_wrt_K(const __fp16 *B, __fp16 *Bp, unsigned int K,
void hgemm_padding_B_noTrans_wrt_KN(const __fp16 *B, __fp16 *Bp, unsigned int K,
unsigned int N, unsigned int K8,
unsigned int N16) {
- std::cerr << "NYI : hgemm_padding_B_noTrans_wrt_KN\n";
+ std::cerr << "Error : hgemm_padding_B_noTrans_wrt_KN NYI!\n";
}
+
void hgemm_padding_B_Trans_wrt_N(const __fp16 *B, __fp16 *Bp, unsigned int K,
unsigned int N, unsigned int K8,
unsigned int N16) {
- std::cerr << "NYI : hgemm_padding_B_Trans_wrt_N\n";
+ std::cerr << "Error : hgemm_padding_B_Trans_wrt_N NYI!\n";
}
void hgemm_padding_B_Trans_wrt_K(const __fp16 *B, __fp16 *Bp, unsigned int K,
void hgemm_padding_B_Trans_wrt_KN(const __fp16 *B, __fp16 *Bp, unsigned int K,
unsigned int N, unsigned int K8,
unsigned int N16) {
- std::cerr << "NYI : hgemm_padding_B_Trans_wrt_KN\n";
+ std::cerr << "Error : hgemm_padding_B_Trans_wrt_KN NYI!\n";
}
*
*/
+/**
+ * @brief Padding function for matrix B in HGEMM
+ *
+ * @param B src matrix to pad
+ * @param Bp dst matrix after padding
+ * @param K row length of matrix B
+ * @param N col length of matrix B
+ * @param K8 Least multiple of 8 that is bigger than or equal to K
+ * @param N16 Least multiple of 16 that is bigger than or equal to N
+ * @param transB Whether the matrix B is transposed or not
+ */
void hgemm_padding_B(const __fp16 *B, __fp16 *Bp, unsigned int K,
unsigned int N, unsigned int K8, unsigned int N16,
bool transB);
+/**
+ * @brief Padding function for non-transposed matrix B in HGEMM
+ *
+ * @param B src matrix to pad
+ * @param Bp dst matrix after padding
+ * @param K row length of matrix B
+ * @param N col length of matrix B
+ * @param K8 Least multiple of 8 that is bigger than or equal to K
+ * @param N16 Least multiple of 16 that is bigger than or equal to N
+ */
void hgemm_padding_B_noTrans(const __fp16 *B, __fp16 *Bp, unsigned int K,
unsigned int N, unsigned int K8, unsigned int N16);
-
-void hgemm_padding_B_noTrans_wrt_N(const __fp16 *B, __fp16 *Bp,
- unsigned int K, unsigned int N,
- unsigned int K8, unsigned int N16);
-
-void hgemm_padding_B_noTrans_wrt_K(const __fp16 *B, __fp16 *Bp,
- unsigned int K, unsigned int N,
- unsigned int K8, unsigned int N16);
-
-void hgemm_padding_B_noTrans_wrt_KN(const __fp16 *B, __fp16 *Bp,
- unsigned int K, unsigned int N,
- unsigned int K8, unsigned int N16);
-
+/**
+ * @brief Padding function for non-transposed matrix B in HGEMM w.r.t. N
+ * direction
+ *
+ * @param B src matrix to pad
+ * @param Bp dst matrix after padding
+ * @param K row length of matrix B
+ * @param N col length of matrix B
+ * @param K8 Least multiple of 8 that is bigger than or equal to K
+ * @param N16 Least multiple of 16 that is bigger than or equal to N
+ */
+void hgemm_padding_B_noTrans_wrt_N(const __fp16 *B, __fp16 *Bp, unsigned int K,
+ unsigned int N, unsigned int K8,
+ unsigned int N16);
+/**
+ * @brief Padding function for non-transposed matrix B in HGEMM w.r.t. K
+ * direction
+ *
+ * @param B src matrix to pad
+ * @param Bp dst matrix after padding
+ * @param K row length of matrix B
+ * @param N col length of matrix B
+ * @param K8 Least multiple of 8 that is bigger than or equal to K
+ * @param N16 Least multiple of 16 that is bigger than or equal to N
+ */
+void hgemm_padding_B_noTrans_wrt_K(const __fp16 *B, __fp16 *Bp, unsigned int K,
+ unsigned int N, unsigned int K8,
+ unsigned int N16);
+/**
+ * @brief Padding function for non-transposed matrix B in HGEMM w.r.t. N and K
+ * direction
+ *
+ * @param B src matrix to pad
+ * @param Bp dst matrix after padding
+ * @param K row length of matrix B
+ * @param N col length of matrix B
+ * @param K8 Least multiple of 8 that is bigger than or equal to K
+ * @param N16 Least multiple of 16 that is bigger than or equal to N
+ */
+void hgemm_padding_B_noTrans_wrt_KN(const __fp16 *B, __fp16 *Bp, unsigned int K,
+ unsigned int N, unsigned int K8,
+ unsigned int N16);
+/**
+ * @brief Padding function for transposed matrix B in HGEMM
+ *
+ * @param B src matrix to pad
+ * @param Bp dst matrix after padding
+ * @param K row length of matrix B
+ * @param N col length of matrix B
+ * @param K8 Least multiple of 8 that is bigger than or equal to K
+ * @param N16 Least multiple of 16 that is bigger than or equal to N
+ */
void hgemm_padding_B_Trans(const __fp16 *B, __fp16 *Bp, unsigned int K,
unsigned int N, unsigned int K8, unsigned int N16);
+/**
+ * @brief Padding function for transposed matrix B in HGEMM w.r.t. N direction
+ *
+ * @param B src matrix to pad
+ * @param Bp dst matrix after padding
+ * @param K row length of matrix B
+ * @param N col length of matrix B
+ * @param K8 Least multiple of 8 that is bigger than or equal to K
+ * @param N16 Least multiple of 16 that is bigger than or equal to N
+ */
+void hgemm_padding_B_Trans_wrt_N(const __fp16 *B, __fp16 *Bp, unsigned int K,
+ unsigned int N, unsigned int K8,
+ unsigned int N16);
+/**
+ * @brief Padding function for transposed matrix B in HGEMM w.r.t. K direction
+ *
+ * @param B src matrix to pad
+ * @param Bp dst matrix after padding
+ * @param K row length of matrix B
+ * @param N col length of matrix B
+ * @param K8 Least multiple of 8 that is bigger than or equal to K
+ * @param N16 Least multiple of 16 that is bigger than or equal to N
+ */
+void hgemm_padding_B_Trans_wrt_K(const __fp16 *B, __fp16 *Bp, unsigned int K,
+ unsigned int N, unsigned int K8,
+ unsigned int N16);
-void hgemm_padding_B_Trans_wrt_N(const __fp16 *B, __fp16 *Bp,
- unsigned int K, unsigned int N,
- unsigned int K8, unsigned int N16);
-
-void hgemm_padding_B_Trans_wrt_K(const __fp16 *B, __fp16 *Bp,
- unsigned int K, unsigned int N,
- unsigned int K8, unsigned int N16);
-
-void hgemm_padding_B_Trans_wrt_KN(const __fp16 *B, __fp16 *Bp,
- unsigned int K, unsigned int N,
- unsigned int K8, unsigned int N16);
+/**
+ * @brief Padding function for transposed matrix B in HGEMM w.r.t. K and N
+ * direction
+ *
+ * @param B src matrix to pad
+ * @param Bp dst matrix after padding
+ * @param K row length of matrix B
+ * @param N col length of matrix B
+ * @param K8 Least multiple of 8 that is bigger than or equal to K
+ * @param N16 Least multiple of 16 that is bigger than or equal to N
+ */
+void hgemm_padding_B_Trans_wrt_KN(const __fp16 *B, __fp16 *Bp, unsigned int K,
+ unsigned int N, unsigned int K8,
+ unsigned int N16);