extern "C" {
#endif
-int sgemm_beta(BLASLONG, BLASLONG, BLASLONG, float,
+int sgemm_beta(BLASLONG, BLASLONG, BLASLONG, float,
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
-int dgemm_beta(BLASLONG, BLASLONG, BLASLONG, double,
+int dgemm_beta(BLASLONG, BLASLONG, BLASLONG, double,
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int cgemm_beta(BLASLONG, BLASLONG, BLASLONG, float, float,
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
#ifdef EXPRECISION
-int qgemm_beta(BLASLONG, BLASLONG, BLASLONG, xdouble,
+int qgemm_beta(BLASLONG, BLASLONG, BLASLONG, xdouble,
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
int xgemm_beta(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
#else
-int qgemm_beta(BLASLONG, BLASLONG, BLASLONG, xdouble *,
+int qgemm_beta(BLASLONG, BLASLONG, BLASLONG, xdouble *,
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
int xgemm_beta(BLASLONG, BLASLONG, BLASLONG, xdouble *,
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
int somatcopy_k_rn(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
int somatcopy_k_ct(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
int somatcopy_k_rt(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
+int simatcopy_k_cn(BLASLONG, BLASLONG, float, float *, BLASLONG);
+int simatcopy_k_rn(BLASLONG, BLASLONG, float, float *, BLASLONG);
+int simatcopy_k_ct(BLASLONG, BLASLONG, float, float *, BLASLONG);
+int simatcopy_k_rt(BLASLONG, BLASLONG, float, float *, BLASLONG);
int domatcopy_k_cn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
int domatcopy_k_rn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
int domatcopy_k_ct(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
int domatcopy_k_rt(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
+int dimatcopy_k_cn(BLASLONG, BLASLONG, double, double *, BLASLONG);
+int dimatcopy_k_rn(BLASLONG, BLASLONG, double, double *, BLASLONG);
+int dimatcopy_k_ct(BLASLONG, BLASLONG, double, double *, BLASLONG);
+int dimatcopy_k_rt(BLASLONG, BLASLONG, double, double *, BLASLONG);
+
+int comatcopy_k_cn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
+int comatcopy_k_rn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
+int comatcopy_k_ct(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
+int comatcopy_k_rt(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
+int cimatcopy_k_cn(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
+int cimatcopy_k_rn(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
+int cimatcopy_k_ct(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
+int cimatcopy_k_rt(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
+
+int comatcopy_k_cnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
+int comatcopy_k_rnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
+int comatcopy_k_ctc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
+int comatcopy_k_rtc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
+int cimatcopy_k_cnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
+int cimatcopy_k_rnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
+int cimatcopy_k_ctc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
+int cimatcopy_k_rtc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
+
+int zomatcopy_k_cn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
+int zomatcopy_k_rn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
+int zomatcopy_k_ct(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
+int zomatcopy_k_rt(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
+int zimatcopy_k_cn(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
+int zimatcopy_k_rn(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
+int zimatcopy_k_ct(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
+int zimatcopy_k_rt(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
+
+int zomatcopy_k_cnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
+int zomatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
+int zomatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
+int zomatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
+int zimatcopy_k_cnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
+int zimatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
+int zimatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
+int zimatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
+
+int sgeadd_k(BLASLONG, BLASLONG, float, float*, BLASLONG, float, float *, BLASLONG);
+int dgeadd_k(BLASLONG, BLASLONG, double, double*, BLASLONG, double, double *, BLASLONG);
+int cgeadd_k(BLASLONG, BLASLONG, float, float, float*, BLASLONG, float, float, float *, BLASLONG);
+int zgeadd_k(BLASLONG, BLASLONG, double,double, double*, BLASLONG, double, double, double *, BLASLONG);
+
#ifdef __CUDACC__
}