From aa50185647ba6966dcdb731372af2ecd5ae3b1d4 Mon Sep 17 00:00:00 2001 From: Wangyang Guo Date: Thu, 5 Aug 2021 02:45:53 +0000 Subject: [PATCH] Small Matrix: better handle with GEMM3M marco --- interface/gemm.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/interface/gemm.c b/interface/gemm.c index f4b9f15..775f654 100644 --- a/interface/gemm.c +++ b/interface/gemm.c @@ -105,6 +105,7 @@ static int (*gemm[])(blas_arg_t *, BLASLONG *, BLASLONG *, IFLOAT *, IFLOAT *, B #endif }; +#ifndef GEMM3M #ifdef SMALL_MATRIX_OPT #ifndef DYNAMIC_ARCH #define SMALL_KERNEL_ADDR(table, idx) ((void *)(table[idx])) @@ -115,18 +116,14 @@ static int (*gemm[])(blas_arg_t *, BLASLONG *, BLASLONG *, IFLOAT *, IFLOAT *, B #ifndef COMPLEX static size_t gemm_small_kernel[] = { -#ifndef GEMM3M GEMM_SMALL_KERNEL_NN, GEMM_SMALL_KERNEL_TN, 0, 0, GEMM_SMALL_KERNEL_NT, GEMM_SMALL_KERNEL_TT, 0, 0, -#endif }; static size_t gemm_small_kernel_b0[] = { -#ifndef GEMM3M GEMM_SMALL_KERNEL_B0_NN, GEMM_SMALL_KERNEL_B0_TN, 0, 0, GEMM_SMALL_KERNEL_B0_NT, GEMM_SMALL_KERNEL_B0_TT, 0, 0, -#endif }; #define GEMM_SMALL_KERNEL_B0(idx) (int (*)(BLASLONG, BLASLONG, BLASLONG, FLOAT *, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG)) SMALL_KERNEL_ADDR(gemm_small_kernel_b0, (idx)) @@ -134,27 +131,24 @@ static size_t gemm_small_kernel_b0[] = { #else static size_t zgemm_small_kernel[] = { -#ifndef GEMM3M GEMM_SMALL_KERNEL_NN, GEMM_SMALL_KERNEL_TN, GEMM_SMALL_KERNEL_RN, GEMM_SMALL_KERNEL_CN, GEMM_SMALL_KERNEL_NT, GEMM_SMALL_KERNEL_TT, GEMM_SMALL_KERNEL_RT, GEMM_SMALL_KERNEL_CT, GEMM_SMALL_KERNEL_NR, GEMM_SMALL_KERNEL_TR, GEMM_SMALL_KERNEL_RR, GEMM_SMALL_KERNEL_CR, GEMM_SMALL_KERNEL_NC, GEMM_SMALL_KERNEL_TC, GEMM_SMALL_KERNEL_RC, GEMM_SMALL_KERNEL_CC, -#endif }; static size_t zgemm_small_kernel_b0[] = { -#ifndef GEMM3M GEMM_SMALL_KERNEL_B0_NN, GEMM_SMALL_KERNEL_B0_TN, GEMM_SMALL_KERNEL_B0_RN, GEMM_SMALL_KERNEL_B0_CN, GEMM_SMALL_KERNEL_B0_NT, GEMM_SMALL_KERNEL_B0_TT, GEMM_SMALL_KERNEL_B0_RT, GEMM_SMALL_KERNEL_B0_CT, GEMM_SMALL_KERNEL_B0_NR, GEMM_SMALL_KERNEL_B0_TR, GEMM_SMALL_KERNEL_B0_RR, GEMM_SMALL_KERNEL_B0_CR, GEMM_SMALL_KERNEL_B0_NC, GEMM_SMALL_KERNEL_B0_TC, GEMM_SMALL_KERNEL_B0_RC, GEMM_SMALL_KERNEL_B0_CC, -#endif }; #define ZGEMM_SMALL_KERNEL(idx) (int (*)(BLASLONG, BLASLONG, BLASLONG, FLOAT *, BLASLONG, FLOAT , FLOAT, FLOAT *, BLASLONG, FLOAT , FLOAT, FLOAT *, BLASLONG)) SMALL_KERNEL_ADDR(zgemm_small_kernel, (idx)) #define ZGEMM_SMALL_KERNEL_B0(idx) (int (*)(BLASLONG, BLASLONG, BLASLONG, FLOAT *, BLASLONG, FLOAT , FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG)) SMALL_KERNEL_ADDR(zgemm_small_kernel_b0, (idx)) #endif #endif +#endif #ifndef CBLAS @@ -468,6 +462,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS FUNCTION_PROFILE_START(); +#ifndef GEMM3M #ifdef SMALL_MATRIX_OPT #if !defined(COMPLEX) if(GEMM_SMALL_MATRIX_PERMIT(transa, transb, args.m, args.n, args.k, *(FLOAT *)(args.alpha), *(FLOAT *)(args.beta))){ @@ -489,6 +484,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS } #endif #endif +#endif buffer = (XFLOAT *)blas_memory_alloc(0); -- 2.7.4