free(C32);
}
-void hgemm_K1(const __fp16 *A, const __fp16 *B, __fp16 *C, uint32_t M,
- uint32_t N, uint32_t K, float alpha, float beta, bool TransA,
- bool TransB) {
- unsigned int lda = (TransA) ? M : K;
- unsigned int ldb = (TransB) ? K : N;
- if (!TransA && TransB) {
- hgemm_K1_transB(M, N, K, A, lda, B, ldb, C, N, alpha, beta);
- } else if (TransA && !TransB) {
- hgemm_K1_transA(M, N, K, A, lda, B, ldb, C, N, alpha, beta);
- } else if (!TransA && !TransB) {
- hgemm_K1_noTrans(M, N, K, A, lda, B, ldb, C, N, alpha, beta);
- } else { // TransA && TransB
- hgemm_K1_transAB(M, N, K, A, lda, B, ldb, C, N, alpha, beta);
- }
-}
void ele_mul(const unsigned int N, const __fp16 *X, const __fp16 *Y, __fp16 *Z,
float alpha, float beta) {
void hgemm(const __fp16 *A, const __fp16 *B, __fp16 *C, uint32_t M, uint32_t N,
uint32_t K, float alpha, float beta, bool TransA, bool TransB);
-/**
- * @brief hgemm computation with neon : Y = alpha*op(A)*op(B) + beta*C,
- * where op(X) is one of X or X**T
- * @param[in] A __fp16 * for Matrix A
- * @param[in] B __fp16 * for Matrix B
- * @param[in] C __fp16 * for Matrix C
- * @param[in] M number of op(A)'s and C's row
- * @param[in] N number of op(B)'s and C's columns
- * @param[in] K number of op(A)'s and columns and op(B)'s rows
- * @param[in] alpha float number
- * @param[in] beta float number
- */
-void hgemm_K1(const __fp16 *A, const __fp16 *B, __fp16 *C, uint32_t M,
- uint32_t N, uint32_t K, float alpha, float beta, bool TransA,
- bool TransB);
/**
* @brief squared root transformation with neon : X = sqrt(X)
free(A_T);
free(B_T);
}
+
+void hgemm_K1(const __fp16 *A, const __fp16 *B, __fp16 *C, uint32_t M,
+ uint32_t N, uint32_t K, float alpha, float beta, bool TransA,
+ bool TransB) {
+ unsigned int lda = (TransA) ? M : K;
+ unsigned int ldb = (TransB) ? K : N;
+ if (!TransA && TransB) {
+ hgemm_K1_transB(M, N, K, A, lda, B, ldb, C, N, alpha, beta);
+ } else if (TransA && !TransB) {
+ hgemm_K1_transA(M, N, K, A, lda, B, ldb, C, N, alpha, beta);
+ } else if (!TransA && !TransB) {
+ hgemm_K1_noTrans(M, N, K, A, lda, B, ldb, C, N, alpha, beta);
+ } else { // TransA && TransB
+ hgemm_K1_transAB(M, N, K, A, lda, B, ldb, C, N, alpha, beta);
+ }
+}
*/
void hgemm_transAB(const __fp16 *A, const __fp16 *B, float *C, unsigned int M,
unsigned int N, unsigned int K, float alpha, float beta);
+/**
+ * @brief hgemm computation with neon : Y = alpha*op(A)*op(B) + beta*C,
+ * where op(X) is one of X or X**T
+ * @param[in] A __fp16 * for Matrix A
+ * @param[in] B __fp16 * for Matrix B
+ * @param[in] C __fp16 * for Matrix C
+ * @param[in] M number of op(A)'s and C's row
+ * @param[in] N number of op(B)'s and C's columns
+ * @param[in] K number of op(A)'s and columns and op(B)'s rows
+ * @param[in] alpha float number
+ * @param[in] beta float number
+ */
+void hgemm_K1(const __fp16 *A, const __fp16 *B, __fp16 *C, uint32_t M,
+ uint32_t N, uint32_t K, float alpha, float beta, bool TransA,
+ bool TransB);