int dtb_entries;
int offsetA, offsetB, align;
-#ifdef BUILD_HALF
- int shgemm_p, shgemm_q, shgemm_r;
- int shgemm_unroll_m, shgemm_unroll_n, shgemm_unroll_mn;
+#ifdef BUILD_BFLOAT16
+ int sbgemm_p, sbgemm_q, sbgemm_r;
+ int sbgemm_unroll_m, sbgemm_unroll_n, sbgemm_unroll_mn;
void (*shstobf16_k) (BLASLONG, float *, BLASLONG, bfloat16 *, BLASLONG);
void (*shdtobf16_k) (BLASLONG, double *, BLASLONG, bfloat16 *, BLASLONG);
float (*shasum_k) (BLASLONG, float *, BLASLONG);
float (*shsum_k) (BLASLONG, float *, BLASLONG);
int (*shcopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
- float (*shdot_k) (BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG);
- double (*dshdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
+ float (*sbdot_k) (BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG);
+ double (*dsbdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*shrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
int (*shscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*shswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
- int (*shgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
- int (*shgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
+ int (*sbgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
+ int (*sbgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*shger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*shsymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*shsymv_U) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
- int (*shgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, bfloat16 *, float *, BLASLONG);
- int (*shgemm_beta )(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float *, BLASLONG);
+ int (*sbgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, bfloat16 *, float *, BLASLONG);
+ int (*sbgemm_beta )(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float *, BLASLONG);
- int (*shgemm_incopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
- int (*shgemm_itcopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
- int (*shgemm_oncopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
- int (*shgemm_otcopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
+ int (*sbgemm_incopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
+ int (*sbgemm_itcopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
+ int (*sbgemm_oncopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
+ int (*sbgemm_otcopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
int (*shtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
int (*shtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
#endif
-#if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX) || (BUILD_COMPLEX16)
+#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
int sgemm_p, sgemm_q, sgemm_r;
int sgemm_unroll_m, sgemm_unroll_n, sgemm_unroll_mn;
#endif
int exclusive_cache;
-#if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX)
+#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
float (*samax_k) (BLASLONG, float *, BLASLONG);
float (*samin_k) (BLASLONG, float *, BLASLONG);
float (*smax_k) (BLASLONG, float *, BLASLONG);
float (*snrm2_k) (BLASLONG, float *, BLASLONG);
float (*sasum_k) (BLASLONG, float *, BLASLONG);
#endif
-
-#if BUILD_SINGLE
+#ifdef BUILD_SINGLE
float (*ssum_k) (BLASLONG, float *, BLASLONG);
#endif
-#if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX)
+#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
int (*scopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
float (*sdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
//double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*srot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
int (*saxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
-#endif
-#if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX) || (BUILD_COMPLEX16)
int (*sscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
-#endif
-#if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX)
int (*sswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*sgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*sgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
#endif
-
-#if BUILD_SINGLE
+#ifdef BUILD_SINGLE
int (*sger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*ssymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*ssymv_U) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
#endif
-
-#if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX)
+#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
#ifdef ARCH_X86_64
void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG);
int (*sgemm_direct_performant) (BLASLONG M, BLASLONG N, BLASLONG K);
int (*sgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
int (*sgemm_otcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
#endif
-
-#if (BUILD_SINGLE) || (BUILD_DOUBLE)
+#ifdef BUILD_SINGLE
int (*strsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
int (*strsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
int (*strsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
int (*strsm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
int (*strsm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
int (*strsm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
-#endif
-#if BUILD_SINGLE
+
int (*strmm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
int (*strmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
int (*strmm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
int (*ssymm_iltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
int (*ssymm_outcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
int (*ssymm_oltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
-#endif
-#if (BUILD_SINGLE) || (BUILD_DOUBLE)
+
int (*sneg_tcopy) (BLASLONG, BLASLONG, float *, BLASLONG, float *);
int (*slaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
#endif
-#if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
+#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
int dgemm_p, dgemm_q, dgemm_r;
int dgemm_unroll_m, dgemm_unroll_n, dgemm_unroll_mn;
#endif
-#if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
+#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
double (*damax_k) (BLASLONG, double *, BLASLONG);
double (*damin_k) (BLASLONG, double *, BLASLONG);
double (*dmax_k) (BLASLONG, double *, BLASLONG);
BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
#endif
-#if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
+#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
double (*dnrm2_k) (BLASLONG, double *, BLASLONG);
double (*dasum_k) (BLASLONG, double *, BLASLONG);
#endif
-#if BUILD_DOUBLE
+#ifdef BUILD_DOUBLE
double (*dsum_k) (BLASLONG, double *, BLASLONG);
#endif
-#if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
+#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
int (*dcopy_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
double (*ddot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
#endif
-#if (BUILD_SINGLE) || (BUILD_DOUBLE)
+#if defined (BUILD_SINGLE) || defined(BUILD_DOUBLE)
double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
#endif
-#if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
+#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
int (*drot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);
int (*daxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int (*dscal_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int (*dgemv_n) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
int (*dgemv_t) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
#endif
-
-#if BUILD_DOUBLE
+#ifdef BUILD_DOUBLE
int (*dger_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
int (*dsymv_L) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
int (*dsymv_U) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
#endif
-
-#if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
+#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
int (*dgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG);
int (*dgemm_beta )(BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int (*dgemm_oncopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
int (*dgemm_otcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
#endif
-
-#if BUILD_DOUBLE
+#ifdef BUILD_DOUBLE
int (*dtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
int (*dtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
int (*dtrsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
#endif
-
-#if (BUILD_COMPLEX) || (BUILD_COMPLEX16)
+#ifdef BUILD_COMPLEX
int cgemm_p, cgemm_q, cgemm_r;
int cgemm_unroll_m, cgemm_unroll_n, cgemm_unroll_mn;
+
float (*camax_k) (BLASLONG, float *, BLASLONG);
float (*camin_k) (BLASLONG, float *, BLASLONG);
BLASLONG (*icamax_k)(BLASLONG, float *, BLASLONG);
BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
-#endif
-#if BUILD_COMPLEX
float (*cnrm2_k) (BLASLONG, float *, BLASLONG);
float (*casum_k) (BLASLONG, float *, BLASLONG);
float (*csum_k) (BLASLONG, float *, BLASLONG);
-#endif
-#if (BUILD_COMPLEX)|| (BUILD_COMPLEX16)
int (*ccopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
openblas_complex_float (*cdotu_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
openblas_complex_float (*cdotc_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
-#endif
-#if BUILD_COMPLEX
int (*csrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
-#endif
-#if (BUILD_COMPLEX)|| (BUILD_COMPLEX16)
+
int (*caxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*caxpyc_k)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*cscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*cgemv_u) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*cgemv_s) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*cgemv_d) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
-#endif
-#if (BUILD_COMPLEX)
int (*cgeru_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*cgerc_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*cgerv_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*chemv_U) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*chemv_M) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*chemv_V) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
-#endif
-#if (BUILD_COMPLEX) || (BUILD_COMPLEX16)
int (*cgemm_kernel_n )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
int (*cgemm_kernel_l )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
int (*cgemm_kernel_r )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
int (*cgemm_kernel_b )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
int (*cgemm_beta )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
+
int (*cgemm_incopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
int (*cgemm_itcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
int (*cgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
int (*ctrsm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
int (*ctrsm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
int (*ctrsm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
-#endif
-#if (BUILD_COMPLEX)
int (*ctrmm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
int (*ctrmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
int (*chemm3m_olcopyr)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
int (*chemm3m_oucopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
int (*chemm3m_olcopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
-#endif
-#if (BUILD_COMPLEX) || (BUILD_COMPLEX16)
+
int (*cneg_tcopy) (BLASLONG, BLASLONG, float *, BLASLONG, float *);
int (*claswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
#endif
-
-#if BUILD_COMPLEX16
+#ifdef BUILD_COMPLEX16
int zgemm_p, zgemm_q, zgemm_r;
int zgemm_unroll_m, zgemm_unroll_n, zgemm_unroll_mn;
void (*init)(void);
int snum_opt, dnum_opt, qnum_opt;
-
-#if BUILD_SINGLE
+#ifdef BUILD_SINGLE
int (*saxpby_k) (BLASLONG, float, float*, BLASLONG,float, float*, BLASLONG);
#endif
-#if BUILD_DOUBLE
+#ifdef BUILD_DOUBLE
int (*daxpby_k) (BLASLONG, double, double*, BLASLONG,double, double*, BLASLONG);
#endif
-#if BUILD_COMPLEX
+#ifdef BUILD_COMPLEX
int (*caxpby_k) (BLASLONG, float, float, float*, BLASLONG,float,float, float*, BLASLONG);
#endif
-#if BUILD_COMPLEX16
+#ifdef BUILD_COMPLEX16
int (*zaxpby_k) (BLASLONG, double, double, double*, BLASLONG,double,double, double*, BLASLONG);
#endif
-#if BUILD_SINGLE
+#ifdef BUILD_SINGLE
int (*somatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
int (*somatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
#endif
-#if BUILD_DOUBLE
+#ifdef BUILD_DOUBLE
int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
#endif
-#if BUILD_COMPLEX
+#ifdef BUILD_COMPLEX
int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
#endif
-#if BUILD_COMPLEX16
+#ifdef BUILD_COMPLEX16
int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
#endif
-#if BUILD_SINGLE
+#ifdef BUILD_SINGLE
int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG);
#endif
-#if BUILD_DOUBLE
+#ifdef BUILD_DOUBLE
int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG);
#endif
-#if BUILD_COMPLEX
+#ifdef BUILD_COMPLEX
int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
#endif
-#if BUILD_COMPLEX16
+#ifdef BUILD_COMPLEX16
int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
#endif
-#if BUILD_SINGLE
+#ifdef BUILD_SINGLE
int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
#endif
-#if BUILD_DOUBLE
+#ifdef BUILD_DOUBLE
int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
#endif
-#if BUILD_COMPLEX
+#ifdef BUILD_COMPLEX
int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
#endif
-#if BUILD_COMPLEX16
+#ifdef BUILD_COMPLEX16
int (*zgeadd_k) (BLASLONG, BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG);
#endif
} gotoblas_t;
#define HAVE_EX_L2 gotoblas -> exclusive_cache
-#ifdef BUILD_HALF
-#define SHGEMM_P gotoblas -> shgemm_p
-#define SHGEMM_Q gotoblas -> shgemm_q
-#define SHGEMM_R gotoblas -> shgemm_r
-#define SHGEMM_UNROLL_M gotoblas -> shgemm_unroll_m
-#define SHGEMM_UNROLL_N gotoblas -> shgemm_unroll_n
-#define SHGEMM_UNROLL_MN gotoblas -> shgemm_unroll_mn
+#ifdef BUILD_BFLOAT16
+#define SBGEMM_P gotoblas -> sbgemm_p
+#define SBGEMM_Q gotoblas -> sbgemm_q
+#define SBGEMM_R gotoblas -> sbgemm_r
+#define SBGEMM_UNROLL_M gotoblas -> sbgemm_unroll_m
+#define SBGEMM_UNROLL_N gotoblas -> sbgemm_unroll_n
+#define SBGEMM_UNROLL_MN gotoblas -> sbgemm_unroll_mn
#endif
-#if (BUILD_SINGLE)
+#if defined (BUILD_SINGLE)
#define SGEMM_P gotoblas -> sgemm_p
#define SGEMM_Q gotoblas -> sgemm_q
#define SGEMM_R gotoblas -> sgemm_r
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn
#endif
-#if (BUILD_DOUBLE)
+#if defined (BUILD_DOUBLE)
#define DGEMM_P gotoblas -> dgemm_p
#define DGEMM_Q gotoblas -> dgemm_q
#define DGEMM_R gotoblas -> dgemm_r
#define DGEMM_UNROLL_M gotoblas -> dgemm_unroll_m
#define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n
#define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn
-#if ! (BUILD_SINGLE)
-#define SGEMM_P gotoblas -> sgemm_p
-#define SGEMM_Q gotoblas -> sgemm_q
-#define SGEMM_R gotoblas -> sgemm_r
-#define SGEMM_UNROLL_M gotoblas -> sgemm_unroll_m
-#define SGEMM_UNROLL_N gotoblas -> sgemm_unroll_n
-#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn
-#endif
#endif
#define QGEMM_P gotoblas -> qgemm_p
#define QGEMM_UNROLL_N gotoblas -> qgemm_unroll_n
#define QGEMM_UNROLL_MN gotoblas -> qgemm_unroll_mn
-#if BUILD_COMPLEX
+#ifdef BUILD_COMPLEX
#define CGEMM_P gotoblas -> cgemm_p
#define CGEMM_Q gotoblas -> cgemm_q
#define CGEMM_R gotoblas -> cgemm_r
#endif
#endif
-#if BUILD_COMPLEX16
+#ifdef BUILD_COMPLEX16
#define ZGEMM_P gotoblas -> zgemm_p
#define ZGEMM_Q gotoblas -> zgemm_q
#define ZGEMM_R gotoblas -> zgemm_r
#define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n
#define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn
#endif
-#ifndef BUILD_COMPLEX
-#define CGEMM_P gotoblas -> cgemm_p
-#define CGEMM_Q gotoblas -> cgemm_q
-#define CGEMM_R gotoblas -> cgemm_r
-#define CGEMM_UNROLL_M gotoblas -> cgemm_unroll_m
-#define CGEMM_UNROLL_N gotoblas -> cgemm_unroll_n
-#define CGEMM_UNROLL_MN gotoblas -> cgemm_unroll_mn
-#endif
#endif
#define XGEMM_P gotoblas -> xgemm_p
#define HAVE_EX_L2 0
#endif
-#ifdef BUILD_HALF
-#define SHGEMM_P SHGEMM_DEFAULT_P
-#define SHGEMM_Q SHGEMM_DEFAULT_Q
-#define SHGEMM_R SHGEMM_DEFAULT_R
-#define SHGEMM_UNROLL_M SHGEMM_DEFAULT_UNROLL_M
-#define SHGEMM_UNROLL_N SHGEMM_DEFAULT_UNROLL_N
-#ifdef SHGEMM_DEFAULT_UNROLL_MN
-#define SHGEMM_UNROLL_MN SHGEMM_DEFAULT_UNROLL_MN
+#ifdef BUILD_BFLOAT16
+#define SBGEMM_P SBGEMM_DEFAULT_P
+#define SBGEMM_Q SBGEMM_DEFAULT_Q
+#define SBGEMM_R SBGEMM_DEFAULT_R
+#define SBGEMM_UNROLL_M SBGEMM_DEFAULT_UNROLL_M
+#define SBGEMM_UNROLL_N SBGEMM_DEFAULT_UNROLL_N
+#ifdef SBGEMM_DEFAULT_UNROLL_MN
+#define SBGEMM_UNROLL_MN SBGEMM_DEFAULT_UNROLL_MN
#else
-#define SHGEMM_UNROLL_MN MAX((SHGEMM_UNROLL_M), (SHGEMM_UNROLL_N))
+#define SBGEMM_UNROLL_MN MAX((SBGEMM_UNROLL_M), (SBGEMM_UNROLL_N))
#endif
#endif
#endif
#ifndef COMPLEX
-#if (XDOUBLE)
+#if defined(XDOUBLE)
#define GEMM_P QGEMM_P
#define GEMM_Q QGEMM_Q
#define GEMM_R QGEMM_R
#define GEMM_DEFAULT_R DGEMM_DEFAULT_R
#define GEMM_DEFAULT_UNROLL_M DGEMM_DEFAULT_UNROLL_M
#define GEMM_DEFAULT_UNROLL_N DGEMM_DEFAULT_UNROLL_N
-#elif (HALF)
-#define GEMM_P SHGEMM_P
-#define GEMM_Q SHGEMM_Q
-#define GEMM_R SHGEMM_R
-#define GEMM_UNROLL_M SHGEMM_UNROLL_M
-#define GEMM_UNROLL_N SHGEMM_UNROLL_N
-#define GEMM_UNROLL_MN SHGEMM_UNROLL_MN
-#define GEMM_DEFAULT_P SHGEMM_DEFAULT_P
-#define GEMM_DEFAULT_Q SHGEMM_DEFAULT_Q
-#define GEMM_DEFAULT_R SHGEMM_DEFAULT_R
-#define GEMM_DEFAULT_UNROLL_M SHGEMM_DEFAULT_UNROLL_M
-#define GEMM_DEFAULT_UNROLL_N SHGEMM_DEFAULT_UNROLL_N
+#elif defined(BFLOAT16)
+#define GEMM_P SBGEMM_P
+#define GEMM_Q SBGEMM_Q
+#define GEMM_R SBGEMM_R
+#define GEMM_UNROLL_M SBGEMM_UNROLL_M
+#define GEMM_UNROLL_N SBGEMM_UNROLL_N
+#define GEMM_UNROLL_MN SBGEMM_UNROLL_MN
+#define GEMM_DEFAULT_P SBGEMM_DEFAULT_P
+#define GEMM_DEFAULT_Q SBGEMM_DEFAULT_Q
+#define GEMM_DEFAULT_R SBGEMM_DEFAULT_R
+#define GEMM_DEFAULT_UNROLL_M SBGEMM_DEFAULT_UNROLL_M
+#define GEMM_DEFAULT_UNROLL_N SBGEMM_DEFAULT_UNROLL_N
#else
#define GEMM_P SGEMM_P
#define GEMM_Q SGEMM_Q
#define GEMM_DEFAULT_UNROLL_N SGEMM_DEFAULT_UNROLL_N
#endif
#else
-#if (XDOUBLE)
+#if defined(XDOUBLE)
#define GEMM_P XGEMM_P
#define GEMM_Q XGEMM_Q
#define GEMM_R XGEMM_R
#define GEMM_THREAD gemm_thread_n
#endif
-#ifndef SHGEMM_DEFAULT_R
-#define SHGEMM_DEFAULT_R (((BUFFER_SIZE - ((SHGEMM_DEFAULT_P * SHGEMM_DEFAULT_Q * 4 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (SHGEMM_DEFAULT_Q * 4) - 15) & ~15UL)
+#ifndef SBGEMM_DEFAULT_R
+#define SBGEMM_DEFAULT_R (((BUFFER_SIZE - ((SBGEMM_DEFAULT_P * SBGEMM_DEFAULT_Q * 4 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (SBGEMM_DEFAULT_Q * 4) - 15) & ~15UL)
#endif
#ifndef SGEMM_DEFAULT_R
#ifndef GEMM3M_P
#ifdef XDOUBLE
#define GEMM3M_P XGEMM3M_P
-#elif defined (DOUBLE)
+#elif defined(DOUBLE)
#define GEMM3M_P ZGEMM3M_P
#else
#define GEMM3M_P CGEMM3M_P
#ifndef GEMM3M_Q
#ifdef XDOUBLE
#define GEMM3M_Q XGEMM3M_Q
-#elif defined (DOUBLE)
+#elif defined(DOUBLE)
#define GEMM3M_Q ZGEMM3M_Q
#else
#define GEMM3M_Q CGEMM3M_Q
#ifndef GEMM3M_R
#ifdef XDOUBLE
#define GEMM3M_R XGEMM3M_R
-#elif defined (DOUBLE)
+#elif defined(DOUBLE)
#define GEMM3M_R ZGEMM3M_R
#else
#define GEMM3M_R CGEMM3M_R