From 39cc6b21d3e282e8b91ffe4255e02e586885db2c Mon Sep 17 00:00:00 2001 From: Martin Koehler Date: Mon, 16 Feb 2015 13:46:20 +0100 Subject: [PATCH] Add ATLAS-style ?geadd function --- cblas.h | 10 ++++ cblas_noconst.h | 10 ++++ common_c.h | 2 + common_d.h | 3 ++ common_interface.h | 6 +++ common_level3.h | 5 ++ common_macro.h | 6 ++- common_param.h | 4 ++ common_s.h | 2 + common_z.h | 3 ++ exports/gensymbol | 4 +- interface/Makefile | 49 ++++++++++++++--- interface/geadd.c | 148 ++++++++++++++++++++++++++++++++++++++++++++++++++++ interface/zgeadd.c | 146 +++++++++++++++++++++++++++++++++++++++++++++++++++ kernel/Makefile.L3 | 40 ++++++++++++-- kernel/arm/geadd.c | 64 +++++++++++++++++++++++ kernel/arm/zgeadd.c | 65 +++++++++++++++++++++++ 17 files changed, 553 insertions(+), 14 deletions(-) create mode 100644 interface/geadd.c create mode 100644 interface/zgeadd.c create mode 100644 kernel/arm/geadd.c create mode 100644 kernel/arm/zgeadd.c diff --git a/cblas.h b/cblas.h index a21863d..d6949e1 100644 --- a/cblas.h +++ b/cblas.h @@ -347,6 +347,16 @@ void cblas_cimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum void cblas_zimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double* calpha, double* a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb); +void cblas_sgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float cbeta, + float *c, OPENBLAS_CONST blasint cldc); +void cblas_dgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double cbeta, + double *c, OPENBLAS_CONST blasint cldc); +void cblas_cgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float *calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float *cbeta, + float *c, OPENBLAS_CONST blasint cldc); +void cblas_zgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double *calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double *cbeta, + double *c, OPENBLAS_CONST blasint cldc); + + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/cblas_noconst.h b/cblas_noconst.h index f6a6baf..4451c30 100644 --- a/cblas_noconst.h +++ b/cblas_noconst.h @@ -333,6 +333,16 @@ void cblas_cimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, bl blasint clda, blasint cldb); void cblas_zimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double* calpha, double* a, blasint clda, blasint cldb); + +void cblas_sgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float calpha, float *a, blasint clda, float cbeta, + float *c, blasint cldc); +void cblas_dgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double calpha, double *a, blasint clda, double cbeta, + double *c, blasint cldc); +void cblas_cgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float *calpha, float *a, blasint clda, float *cbeta, + float *c, blasint cldc); +void cblas_zgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double *calpha, double *a, blasint clda, double *cbeta, + double *c, blasint cldc); + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/common_c.h b/common_c.h index 724d1e2..741d7d0 100644 --- a/common_c.h +++ b/common_c.h @@ -220,6 +220,7 @@ #define COMATCOPY_K_CTC comatcopy_k_ctc #define COMATCOPY_K_RTC comatcopy_k_rtc +#define CGEADD_K cgeadd_k #else @@ -402,6 +403,7 @@ #define COMATCOPY_K_RNC gotoblas -> comatcopy_k_rnc #define COMATCOPY_K_CTC gotoblas -> comatcopy_k_ctc #define COMATCOPY_K_RTC gotoblas -> comatcopy_k_rtc +#define CGEADD_K gotoblas -> cgeadd_k #endif diff --git a/common_d.h b/common_d.h index c34e1f2..d6dfd7f 100644 --- a/common_d.h +++ b/common_d.h @@ -149,6 +149,7 @@ #define DOMATCOPY_K_RN domatcopy_k_rn #define DOMATCOPY_K_CT domatcopy_k_ct #define DOMATCOPY_K_RT domatcopy_k_rt +#define DGEADD_K dgeadd_k #else @@ -267,6 +268,8 @@ #define DOMATCOPY_K_CT gotoblas -> domatcopy_k_ct #define DOMATCOPY_K_RT gotoblas -> domatcopy_k_rt +#define DGEADD_K gotoblas -> dgeadd_k + #endif #define DGEMM_NN dgemm_nn diff --git a/common_interface.h b/common_interface.h index ddd2cf6..15f69e0 100644 --- a/common_interface.h +++ b/common_interface.h @@ -754,6 +754,12 @@ void BLASFUNC(dimatcopy) (char *, char *, blasint *, blasint *, double *, do void BLASFUNC(cimatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, blasint *); void BLASFUNC(zimatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, blasint *); +void BLASFUNC(sgeadd) (blasint *, blasint *, float *, float *, blasint *, float *, float *, blasint*); +void BLASFUNC(dgeadd) (blasint *, blasint *, double *, double *, blasint *, double *, double *, blasint*); +void BLASFUNC(cgeadd) (blasint *, blasint *, float *, float *, blasint *, float *, float *, blasint*); +void BLASFUNC(zgeadd) (blasint *, blasint *, double *, double *, blasint *, double *, double *, blasint*); + + #ifdef __cplusplus } diff --git a/common_level3.h b/common_level3.h index 0babd45..e0ecbc4 100644 --- a/common_level3.h +++ b/common_level3.h @@ -1762,6 +1762,11 @@ int zomatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, dou int zomatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); int zomatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); +int sgeadd_k(BLASLONG, BLASLONG, float, float*, BLASLONG, float, float *, BLASLONG); +int dgeadd_k(BLASLONG, BLASLONG, double, double*, BLASLONG, double, double *, BLASLONG); +int cgeadd_k(BLASLONG, BLASLONG, float, float, float*, BLASLONG, float, float, float *, BLASLONG); +int zgeadd_k(BLASLONG, BLASLONG, double,double, double*, BLASLONG, double, double, double *, BLASLONG); + #ifdef __CUDACC__ } diff --git a/common_macro.h b/common_macro.h index f9de377..8555baa 100644 --- a/common_macro.h +++ b/common_macro.h @@ -634,7 +634,7 @@ #define OMATCOPY_K_RN DOMATCOPY_K_RN #define OMATCOPY_K_CT DOMATCOPY_K_CT #define OMATCOPY_K_RT DOMATCOPY_K_RT - +#define GEADD_K DGEADD_K #else #define AMAX_K SAMAX_K @@ -932,6 +932,7 @@ #define OMATCOPY_K_CT SOMATCOPY_K_CT #define OMATCOPY_K_RT SOMATCOPY_K_RT +#define GEADD_K SGEADD_K #endif #else #ifdef XDOUBLE @@ -1746,6 +1747,7 @@ #define OMATCOPY_K_RNC ZOMATCOPY_K_RNC #define OMATCOPY_K_CTC ZOMATCOPY_K_CTC #define OMATCOPY_K_RTC ZOMATCOPY_K_RTC +#define GEADD_K ZGEADD_K #else @@ -2159,6 +2161,8 @@ #define OMATCOPY_K_CTC COMATCOPY_K_CTC #define OMATCOPY_K_RTC COMATCOPY_K_RTC +#define GEADD_K CGEADD_K + #endif #endif diff --git a/common_param.h b/common_param.h index 49c1bf7..1b56e85 100644 --- a/common_param.h +++ b/common_param.h @@ -855,6 +855,10 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); + int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG); + int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG); + int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG); + int (*zgeadd_k) (BLASLONG, BLASLONG, float, double, double *, BLASLONG, double, double, double *, BLASLONG); } gotoblas_t; diff --git a/common_s.h b/common_s.h index 4e9b6db..a4d8679 100644 --- a/common_s.h +++ b/common_s.h @@ -153,6 +153,7 @@ #define SOMATCOPY_K_CT somatcopy_k_ct #define SOMATCOPY_K_RT somatcopy_k_rt +#define SGEADD_K sgeadd_k #else @@ -274,6 +275,7 @@ #define SOMATCOPY_K_CT gotoblas -> somatcopy_k_ct #define SOMATCOPY_K_RT gotoblas -> somatcopy_k_rt +#define SGEADD_K gotoblas -> sgeadd_k #endif diff --git a/common_z.h b/common_z.h index 133dea8..85f577a 100644 --- a/common_z.h +++ b/common_z.h @@ -220,6 +220,7 @@ #define ZOMATCOPY_K_CTC zomatcopy_k_ctc #define ZOMATCOPY_K_RTC zomatcopy_k_rtc +#define ZGEADD_K zgeadd_k #else @@ -403,6 +404,8 @@ #define ZOMATCOPY_K_CTC gotoblas -> zomatcopy_k_ctc #define ZOMATCOPY_K_RTC gotoblas -> zomatcopy_k_rtc +#define ZGEADD_K zgeadd_k + #endif #define ZGEMM_NN zgemm_nn diff --git a/exports/gensymbol b/exports/gensymbol index 2155f80..12ca737 100644 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -23,7 +23,8 @@ zhpr,zrotg,zscal,zswap,zsymm,zsyr2k,zsyrk,ztbmv, ztbsv,ztpmv,ztpsv,ztrmm,ztrmv,ztrsm,ztrsv, zsymv, xerbla, - saxpby,daxpby,caxpby,zaxpby + saxpby,daxpby,caxpby,zaxpby, + sgeadd,dgeadd,cgeadd,zgeadd, ); @cblasobjs = ( @@ -55,6 +56,7 @@ cblas_saxpby,cblas_daxpby,cblas_caxpby,cblas_zaxpby, cblas_somatcopy, cblas_domatcopy, cblas_comatcopy, cblas_zomatcopy, cblas_simatcopy, cblas_dimatcopy, cblas_cimatcopy, cblas_zimatcopy, + cblas_sgeadd, cblas_dgeadd,cblas_cgeadd, cblas_zgeadd ); @exblasobjs = ( diff --git a/interface/Makefile b/interface/Makefile index 54699b7..1666d91 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -43,7 +43,8 @@ SBLAS2OBJS = \ SBLAS3OBJS = \ sgemm.$(SUFFIX) ssymm.$(SUFFIX) strmm.$(SUFFIX) \ strsm.$(SUFFIX) ssyrk.$(SUFFIX) ssyr2k.$(SUFFIX) \ - somatcopy.$(SUFFIX) simatcopy.$(SUFFIX) + somatcopy.$(SUFFIX) simatcopy.$(SUFFIX)\ + sgeadd.$(SUFFIX) DBLAS1OBJS = \ @@ -68,7 +69,8 @@ DBLAS2OBJS = \ DBLAS3OBJS = \ dgemm.$(SUFFIX) dsymm.$(SUFFIX) dtrmm.$(SUFFIX) \ dtrsm.$(SUFFIX) dsyrk.$(SUFFIX) dsyr2k.$(SUFFIX) \ - domatcopy.$(SUFFIX) dimatcopy.$(SUFFIX) + domatcopy.$(SUFFIX) dimatcopy.$(SUFFIX)\ + dgeadd.$(SUFFIX) CBLAS1OBJS = \ caxpy.$(SUFFIX) caxpyc.$(SUFFIX) cswap.$(SUFFIX) \ @@ -96,7 +98,8 @@ CBLAS3OBJS = \ cgemm.$(SUFFIX) csymm.$(SUFFIX) ctrmm.$(SUFFIX) \ ctrsm.$(SUFFIX) csyrk.$(SUFFIX) csyr2k.$(SUFFIX) \ chemm.$(SUFFIX) cherk.$(SUFFIX) cher2k.$(SUFFIX) \ - comatcopy.$(SUFFIX) cimatcopy.$(SUFFIX) + comatcopy.$(SUFFIX) cimatcopy.$(SUFFIX)\ + cgeadd.$(SUFFIX) ZBLAS1OBJS = \ zaxpy.$(SUFFIX) zaxpyc.$(SUFFIX) zswap.$(SUFFIX) \ @@ -124,7 +127,8 @@ ZBLAS3OBJS = \ zgemm.$(SUFFIX) zsymm.$(SUFFIX) ztrmm.$(SUFFIX) \ ztrsm.$(SUFFIX) zsyrk.$(SUFFIX) zsyr2k.$(SUFFIX) \ zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \ - zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX) + zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX)\ + zgeadd.$(SUFFIX) ifeq ($(SUPPORT_GEMM3M), 1) @@ -269,7 +273,8 @@ CSBLAS2OBJS = \ CSBLAS3OBJS = \ cblas_sgemm.$(SUFFIX) cblas_ssymm.$(SUFFIX) cblas_strmm.$(SUFFIX) cblas_strsm.$(SUFFIX) \ - cblas_ssyrk.$(SUFFIX) cblas_ssyr2k.$(SUFFIX) cblas_somatcopy.$(SUFFIX) cblas_simatcopy.$(SUFFIX) + cblas_ssyrk.$(SUFFIX) cblas_ssyr2k.$(SUFFIX) cblas_somatcopy.$(SUFFIX) cblas_simatcopy.$(SUFFIX)\ + cblas_sgeadd.$(SUFFIX) CDBLAS1OBJS = \ cblas_idamax.$(SUFFIX) cblas_dasum.$(SUFFIX) cblas_daxpy.$(SUFFIX) \ @@ -285,7 +290,8 @@ CDBLAS2OBJS = \ CDBLAS3OBJS += \ cblas_dgemm.$(SUFFIX) cblas_dsymm.$(SUFFIX) cblas_dtrmm.$(SUFFIX) cblas_dtrsm.$(SUFFIX) \ - cblas_dsyrk.$(SUFFIX) cblas_dsyr2k.$(SUFFIX) cblas_domatcopy.$(SUFFIX) cblas_dimatcopy.$(SUFFIX) + cblas_dsyrk.$(SUFFIX) cblas_dsyr2k.$(SUFFIX) cblas_domatcopy.$(SUFFIX) cblas_dimatcopy.$(SUFFIX) \ + cblas_dgeadd.$(SUFFIX) CCBLAS1OBJS = \ cblas_icamax.$(SUFFIX) cblas_scasum.$(SUFFIX) cblas_caxpy.$(SUFFIX) \ @@ -308,7 +314,9 @@ CCBLAS3OBJS = \ cblas_cgemm.$(SUFFIX) cblas_csymm.$(SUFFIX) cblas_ctrmm.$(SUFFIX) cblas_ctrsm.$(SUFFIX) \ cblas_csyrk.$(SUFFIX) cblas_csyr2k.$(SUFFIX) \ cblas_chemm.$(SUFFIX) cblas_cherk.$(SUFFIX) cblas_cher2k.$(SUFFIX) \ - cblas_comatcopy.$(SUFFIX) cblas_cimatcopy.$(SUFFIX) + cblas_comatcopy.$(SUFFIX) cblas_cimatcopy.$(SUFFIX)\ + cblas_cgeadd.$(SUFFIX) + CZBLAS1OBJS = \ @@ -332,7 +340,9 @@ CZBLAS3OBJS = \ cblas_zgemm.$(SUFFIX) cblas_zsymm.$(SUFFIX) cblas_ztrmm.$(SUFFIX) cblas_ztrsm.$(SUFFIX) \ cblas_zsyrk.$(SUFFIX) cblas_zsyr2k.$(SUFFIX) \ cblas_zhemm.$(SUFFIX) cblas_zherk.$(SUFFIX) cblas_zher2k.$(SUFFIX)\ - cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX) + cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX) \ + cblas_zgeadd.$(SUFFIX) + ifeq ($(SUPPORT_GEMM3M), 1) @@ -2103,4 +2113,27 @@ zimatcopy.$(SUFFIX) zimatcopy.$(PSUFFIX) : zimatcopy.c cblas_zimatcopy.$(SUFFIX) cblas_zimatcopy.$(PSUFFIX) : zimatcopy.c $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) +sgeadd.$(SUFFIX) sgeadd.$(PSUFFIX) : geadd.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dgeadd.$(SUFFIX) dgeadd.$(PSUFFIX) : geadd.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cgeadd.$(SUFFIX) cgeadd.$(PSUFFIX) : zgeadd.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zgeadd.$(SUFFIX) zgeadd.$(PSUFFIX) : zgeadd.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cblas_sgeadd.$(SUFFIX) cblas_sgeadd.$(PSUFFIX) : geadd.c + $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) + +cblas_dgeadd.$(SUFFIX) cblas_dgeadd.$(PSUFFIX) : geadd.c + $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) + +cblas_cgeadd.$(SUFFIX) cblas_cgeadd.$(PSUFFIX) : zgeadd.c + $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) + +cblas_zgeadd.$(SUFFIX) cblas_zgeadd.$(PSUFFIX) : zgeadd.c + $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) diff --git a/interface/geadd.c b/interface/geadd.c new file mode 100644 index 0000000..f0befa1 --- /dev/null +++ b/interface/geadd.c @@ -0,0 +1,148 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#if defined(DOUBLE) +#define ERROR_NAME "DGEADD " +#else +#define ERROR_NAME "SGEADD " +#endif + +#ifndef CBLAS + +void NAME(blasint *M, blasint *N, FLOAT *ALPHA, FLOAT *a, blasint *LDA, + FLOAT *BETA, FLOAT *c, blasint *LDC) +{ + + blasint m = *M; + blasint n = *N; + blasint lda = *LDA; + blasint ldc = *LDC; + FLOAT alpha = *ALPHA; + FLOAT beta = *BETA; + + blasint info; + + PRINT_DEBUG_NAME; + + info = 0; + + + if (lda < MAX(1, m)) info = 6; + if (ldc < MAX(1, m)) info = 8; + + if (n < 0) info = 2; + if (m < 0) info = 1; + + if (info != 0){ + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else +void CNAME( enum CBLAS_ORDER order, blasint m, blasint n, FLOAT alpha, FLOAT *a, blasint lda, FLOAT beta, + FLOAT *c, blasint ldc) +{ +/* +void CNAME(enum CBLAS_ORDER order, + blasint m, blasint n, + FLOAT alpha, + FLOAT *a, blasint lda, + FLOAT beta, + FLOAT *c, blasint ldc){ */ + + blasint info, t; + + PRINT_DEBUG_CNAME; + + info = 0; + + if (order == CblasColMajor) { + + info = -1; + + if (ldc < MAX(1, m)) info = 8; + if (lda < MAX(1, m)) info = 5; + if (n < 0) info = 2; + if (m < 0) info = 1; + + } + + if (order == CblasRowMajor) { + info = -1; + + t = n; + n = m; + m = t; + + if (ldc < MAX(1, m)) info = 8; + if (lda < MAX(1, m)) info = 5; + if (n < 0) info = 2; + if (m < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if ((m==0) || (n==0)) return; + + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + + GEADD_K(m,n,alpha, a, lda, beta, c, ldc); + + + FUNCTION_PROFILE_END(1, 2* m * n , 2 * m * n); + + IDEBUG_END; + + return; + +} diff --git a/interface/zgeadd.c b/interface/zgeadd.c new file mode 100644 index 0000000..7124cf2 --- /dev/null +++ b/interface/zgeadd.c @@ -0,0 +1,146 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#if defined(DOUBLE) +#define ERROR_NAME "ZGEADD " +#else +#define ERROR_NAME "CGEADD " +#endif + +#ifndef CBLAS + +void NAME(blasint *M, blasint *N, FLOAT *ALPHA, FLOAT *a, blasint *LDA, + FLOAT *BETA, FLOAT *c, blasint *LDC) +{ + + blasint m = *M; + blasint n = *N; + blasint lda = *LDA; + blasint ldc = *LDC; + + blasint info; + + PRINT_DEBUG_NAME; + + info = 0; + + + if (lda < MAX(1, m)) info = 6; + if (ldc < MAX(1, m)) info = 8; + + if (n < 0) info = 2; + if (m < 0) info = 1; + + if (info != 0){ + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else +void CNAME( enum CBLAS_ORDER order, blasint m, blasint n, FLOAT *ALPHA, FLOAT *a, blasint lda, FLOAT *BETA, + FLOAT *c, blasint ldc) +{ +/* +void CNAME(enum CBLAS_ORDER order, + blasint m, blasint n, + FLOAT alpha, + FLOAT *a, blasint lda, + FLOAT beta, + FLOAT *c, blasint ldc){ */ + + blasint info, t; + + PRINT_DEBUG_CNAME; + + info = 0; + + if (order == CblasColMajor) { + + info = -1; + + if (ldc < MAX(1, m)) info = 8; + if (lda < MAX(1, m)) info = 5; + if (n < 0) info = 2; + if (m < 0) info = 1; + + } + + if (order == CblasRowMajor) { + info = -1; + + t = n; + n = m; + m = t; + + if (ldc < MAX(1, m)) info = 8; + if (lda < MAX(1, m)) info = 5; + if (n < 0) info = 2; + if (m < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if ((m==0) || (n==0)) return; + + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + + GEADD_K(m,n,ALPHA[0],ALPHA[1], a, lda, BETA[0], BETA[1], c, ldc); + + + FUNCTION_PROFILE_END(1, 2* m * n , 2 * m * n); + + IDEBUG_END; + + return; + +} diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index 5702b7a..a3ccc19 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -329,23 +329,27 @@ endif ###### BLAS extensions ##### SBLASOBJS += \ somatcopy_k_cn$(TSUFFIX).$(SUFFIX) somatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ - somatcopy_k_ct$(TSUFFIX).$(SUFFIX) somatcopy_k_rt$(TSUFFIX).$(SUFFIX) + somatcopy_k_ct$(TSUFFIX).$(SUFFIX) somatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ + sgeadd_k$(TSUFFIX).$(SUFFIX) DBLASOBJS += \ domatcopy_k_cn$(TSUFFIX).$(SUFFIX) domatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ - domatcopy_k_ct$(TSUFFIX).$(SUFFIX) domatcopy_k_rt$(TSUFFIX).$(SUFFIX) + domatcopy_k_ct$(TSUFFIX).$(SUFFIX) domatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ + dgeadd_k$(TSUFFIX).$(SUFFIX) CBLASOBJS += \ comatcopy_k_cn$(TSUFFIX).$(SUFFIX) comatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ comatcopy_k_ct$(TSUFFIX).$(SUFFIX) comatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ comatcopy_k_cnc$(TSUFFIX).$(SUFFIX) comatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ - comatcopy_k_ctc$(TSUFFIX).$(SUFFIX) comatcopy_k_rtc$(TSUFFIX).$(SUFFIX) + comatcopy_k_ctc$(TSUFFIX).$(SUFFIX) comatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \ + cgeadd_k$(TSUFFIX).$(SUFFIX) ZBLASOBJS += \ zomatcopy_k_cn$(TSUFFIX).$(SUFFIX) zomatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ zomatcopy_k_ct$(TSUFFIX).$(SUFFIX) zomatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ zomatcopy_k_cnc$(TSUFFIX).$(SUFFIX) zomatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ - zomatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zomatcopy_k_rtc$(TSUFFIX).$(SUFFIX) + zomatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zomatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \ + zgeadd_k$(TSUFFIX).$(SUFFIX) SGEMMINCOPYOBJ_P = $(SGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) @@ -3440,3 +3444,31 @@ $(KDIR)zomatcopy_k_rtc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_RTC) $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ +ifndef SGEADD_K +SGEADD_K = ../arm/geadd.c +endif + +$(KDIR)sgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K) + $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UROWM $< -o $@ + +ifndef DGEADD_K +DGEADD_K = ../arm/geadd.c +endif + +$(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K) + $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ + +ifndef CGEADD_K +CGEADD_K = ../arm/zgeadd.c +endif + +$(KDIR)cgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEADD_K) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM $< -o $@ + +ifndef ZGEADD_K +ZGEADD_K = ../arm/zgeadd.c +endif + +$(KDIR)zgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEADD_K) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM $< -o $@ + diff --git a/kernel/arm/geadd.c b/kernel/arm/geadd.c new file mode 100644 index 0000000..062918b --- /dev/null +++ b/kernel/arm/geadd.c @@ -0,0 +1,64 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" + + +int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT beta, FLOAT *b, BLASLONG ldb) +{ + BLASLONG i; + FLOAT *aptr,*bptr; + + if ( rows <= 0 ) return(0); + if ( cols <= 0 ) return(0); + + + aptr = a; + bptr = b; + + if ( alpha == 0.0 ) + { + for ( i=0; i