From: wernsaar Date: Sat, 28 Sep 2013 17:02:25 +0000 (+0200) Subject: initial checkin of kernel/arm X-Git-Tag: v0.2.9.rc1~7^2~6^2~83 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=70411af888f220c205d457ab7e653a10a0e18108;p=platform%2Fupstream%2Fopenblas.git initial checkin of kernel/arm --- diff --git a/kernel/arm/KERNEL b/kernel/arm/KERNEL new file mode 100644 index 0000000..aeccfbf --- /dev/null +++ b/kernel/arm/KERNEL @@ -0,0 +1,46 @@ +ifndef SNRM2KERNEL +SNRM2KERNEL = nrm2.c +endif + +ifndef DNRM2KERNEL +DNRM2KERNEL = nrm2.c +endif + +ifndef CNRM2KERNEL +CNRM2KERNEL = znrm2.c +endif + +ifndef ZNRM2KERNEL +ZNRM2KERNEL = znrm2.c +endif + +ifndef SCABS_KERNEL +SCABS_KERNEL = ../generic/cabs.c +endif + +ifndef DCABS_KERNEL +DCABS_KERNEL = ../generic/cabs.c +endif + +ifndef QCABS_KERNEL +QCABS_KERNEL = ../generic/cabs.c +endif + +ifndef LSAME_KERNEL +LSAME_KERNEL = ../generic/lsame.c +endif + +ifndef SGEMM_BETA +SGEMM_BETA = ../generic/gemm_beta.c +endif +ifndef DGEMM_BETA +DGEMM_BETA = ../generic/gemm_beta.c +endif +ifndef CGEMM_BETA +CGEMM_BETA = ../generic/zgemm_beta.c +endif +ifndef ZGEMM_BETA +ZGEMM_BETA = ../generic/zgemm_beta.c +endif + + diff --git a/kernel/arm/KERNEL.ARMV7 b/kernel/arm/KERNEL.ARMV7 new file mode 100644 index 0000000..a607010 --- /dev/null +++ b/kernel/arm/KERNEL.ARMV7 @@ -0,0 +1,143 @@ +SAMAXKERNEL = ../arm/amax.c +DAMAXKERNEL = ../arm/amax.c +CAMAXKERNEL = ../arm/zamax.c +ZAMAXKERNEL = ../arm/zamax.c + +SAMINKERNEL = ../arm/amin.c +DAMINKERNEL = ../arm/amin.c +CAMINKERNEL = ../arm/zamin.c +ZAMINKERNEL = ../arm/zamin.c + +SMAXKERNEL = ../arm/max.c +DMAXKERNEL = ../arm/max.c + +SMINKERNEL = ../arm/min.c +DMINKERNEL = ../arm/min.c + +ISAMAXKERNEL = ../arm/iamax.c +IDAMAXKERNEL = ../arm/iamax.c +ICAMAXKERNEL = ../arm/izamax.c +IZAMAXKERNEL = ../arm/izamax.c + +ISAMINKERNEL = ../arm/iamin.c +IDAMINKERNEL = ../arm/iamin.c +ICAMINKERNEL = ../arm/izamin.c +IZAMINKERNEL = ../arm/izamin.c + +ISMAXKERNEL = ../arm/imax.c +IDMAXKERNEL = ../arm/imax.c + +ISMINKERNEL = ../arm/imin.c +IDMINKERNEL = ../arm/imin.c + +SSWAPKERNEL = ../arm/swap.c +DSWAPKERNEL = ../arm/swap.c +CSWAPKERNEL = ../arm/zswap.c +ZSWAPKERNEL = ../arm/zswap.c + +SASUMKERNEL = ../arm/asum.c +DASUMKERNEL = ../arm/asum.c +CASUMKERNEL = ../arm/zasum.c +ZASUMKERNEL = ../arm/zasum.c + +SAXPYKERNEL = ../arm/axpy.c +DAXPYKERNEL = ../arm/axpy.c +CAXPYKERNEL = ../arm/zaxpy.c +ZAXPYKERNEL = ../arm/zaxpy.c + +SCOPYKERNEL = ../arm/copy.c +DCOPYKERNEL = ../arm/copy.c +CCOPYKERNEL = ../arm/zcopy.c +ZCOPYKERNEL = ../arm/zcopy.c + +SDOTKERNEL = ../arm/dot.c +DDOTKERNEL = ../arm/dot.c +CDOTKERNEL = ../arm/zdot.c +ZDOTKERNEL = ../arm/zdot.c + +SNRM2KERNEL = ../arm/nrm2.c +DNRM2KERNEL = ../arm/nrm2.c +CNRM2KERNEL = ../arm/znrm2.c +ZNRM2KERNEL = ../arm/znrm2.c + +SROTKERNEL = ../arm/rot.c +DROTKERNEL = ../arm/rot.c +CROTKERNEL = ../arm/zrot.c +ZROTKERNEL = ../arm/zrot.c + +SSCALKERNEL = ../arm/scal.c +DSCALKERNEL = ../arm/scal.c +CSCALKERNEL = ../arm/zscal.c +ZSCALKERNEL = ../arm/zscal.c + +SGEMVNKERNEL = gemv_n.c +DGEMVNKERNEL = gemv_n.c +CGEMVNKERNEL = zgemv_n.c +ZGEMVNKERNEL = zgemv_n.c + +SGEMVTKERNEL = gemv_t.c +DGEMVTKERNEL = gemv_t.c +CGEMVTKERNEL = zgemv_t.c +ZGEMVTKERNEL = zgemv_t.c + +STRMMKERNEL = ../generic/trmmkernel_2x2.c +DTRMMKERNEL = dtrmm_kernel_8x2_vfpv3.S +CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c +ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c + +SGEMMKERNEL = ../generic/gemmkernel_2x2.c +SGEMMONCOPY = ../generic/gemm_ncopy_2.c +SGEMMOTCOPY = ../generic/gemm_tcopy_2.c +SGEMMONCOPYOBJ = sgemm_oncopy.o +SGEMMOTCOPYOBJ = sgemm_otcopy.o + +#DGEMMKERNEL = ../generic/gemmkernel_2x2.c +#DGEMMKERNEL = dgemm_kernel_4x2_vfpv2.S +DGEMMKERNEL = dgemm_kernel_8x2_vfpv3.S +DGEMMINCOPY = ../generic/gemm_ncopy_8.c +DGEMMITCOPY = ../generic/gemm_tcopy_8.c +DGEMMONCOPY = ../generic/gemm_ncopy_2.c +DGEMMOTCOPY = ../generic/gemm_tcopy_2.c +DGEMMINCOPYOBJ = dgemm_incopy.o +DGEMMITCOPYOBJ = dgemm_itcopy.o +DGEMMONCOPYOBJ = dgemm_oncopy.o +DGEMMOTCOPYOBJ = dgemm_otcopy.o + +CGEMMKERNEL = ../generic/zgemmkernel_2x2.c +CGEMMONCOPY = ../generic/zgemm_ncopy_2.c +CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +CGEMMONCOPYOBJ = cgemm_oncopy.o +CGEMMOTCOPYOBJ = cgemm_otcopy.o + +ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c +ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c +ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +ZGEMMONCOPYOBJ = zgemm_oncopy.o +ZGEMMOTCOPYOBJ = zgemm_otcopy.o + +STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +CGEMM3MKERNEL = zgemm3m_kernel_8x4_core2.S +ZGEMM3MKERNEL = zgemm3m_kernel_4x4_core2.S + + + + diff --git a/kernel/arm/amax.c b/kernel/arm/amax.c new file mode 100644 index 0000000..55107ca --- /dev/null +++ b/kernel/arm/amax.c @@ -0,0 +1,73 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + + if (n < 0 || inc_x < 1 ) return(maxf); + + maxf=ABS(x[0]); + + while(i < n) + { + if( ABS(x[ix]) > ABS(maxf) ) + { + maxf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(maxf); +} + + diff --git a/kernel/arm/amin.c b/kernel/arm/amin.c new file mode 100644 index 0000000..3f7e97b --- /dev/null +++ b/kernel/arm/amin.c @@ -0,0 +1,73 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + + if (n < 0 || inc_x < 1 ) return(minf); + + minf=ABS(x[0]); + + while(i < n) + { + if( ABS(x[ix]) < ABS(minf) ) + { + minf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(minf); +} + + diff --git a/kernel/arm/asum.c b/kernel/arm/asum.c new file mode 100644 index 0000000..5ac6936 --- /dev/null +++ b/kernel/arm/asum.c @@ -0,0 +1,67 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT sumf = 0.0; + if (n < 0 || inc_x < 1 ) return(sumf); + + n *= inc_x; + while(i < n) + { + sumf += ABS(x[i]); + i += inc_x; + } + return(sumf); +} + + diff --git a/kernel/arm/axpy.c b/kernel/arm/axpy.c new file mode 100644 index 0000000..dceddf7 --- /dev/null +++ b/kernel/arm/axpy.c @@ -0,0 +1,64 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG ix,iy; + + if ( n < 0 ) return(0); + if ( da == 0.0 ) return(0); + + ix = 0; + iy = 0; + + while(i < n) + { + + y[iy] += da * x[ix] ; + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/arm/copy.c b/kernel/arm/copy.c new file mode 100644 index 0000000..f742a4a --- /dev/null +++ b/kernel/arm/copy.c @@ -0,0 +1,59 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" + +int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + + if ( n < 0 ) return(0); + + while(i < n) + { + + y[iy] = x[ix] ; + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/arm/dgemm_kernel_8x2_vfpv3.S b/kernel/arm/dgemm_kernel_8x2_vfpv3.S new file mode 100644 index 0000000..3c474a1 --- /dev/null +++ b/kernel/arm/dgemm_kernel_8x2_vfpv3.S @@ -0,0 +1,1223 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/28 Saar +* BLASTEST : OK +* CTEST : OK +* TEST : OK +* +* 2013/09/22 Saar +* UNROLL_N 2 +* UNROLL_M 8 +* DGEMM_P 64 +* DGEMM_Q 64 +* DGEMM_R 512 +* A_PRE 128 +* B_PRE 128 +* +* Performance on Odroid U2: +* +* 1 Core: 0.92 GFLOPS ATLAS: 0.81 GFLOPS +* 2 Cores: 1.83 GFLOPS ATLAS: 1.51 GFLOPS +* 3 Cores: 2.67 GFLOPS ATLAS: 1.51 GFLOPS +* 4 Cores: 3.52 GFLOPS ATLAS: 1.51 GFLOPS +* +* 2013/09/28 Saar +* UNROLL_N 2 +* UNROLL_M 8 +* DGEMM_P 128 +* DGEMM_Q 128 +* DGEMM_R 2048 +* A_PRE 128 +* B_PRE 128 +* C_PRE 32 +* +* Performance on Odroid U2: +* +* 1 Core: 0.99 GFLOPS ATLAS: 0.82 GFLOPS +* 2 Cores: 1.97 GFLOPS ATLAS: 1.59 GFLOPS +* 3 Cores: 2.86 GFLOPS ATLAS: 1.59 GFLOPS +* 4 Cores: 3.79 GFLOPS ATLAS: 1.59 GFLOPS +**************************************************************************************/ + +#define ASSEMBLER +#include "common.h" + +#define STACKSIZE 252 + +#define OLD_M r0 +#define OLD_N r1 +#define OLD_K r2 +#define OLD_A r3 +#define OLD_ALPHA d0 + +/****************************************************** +* [fp, #-128] - [fp, #-64] is reserved +* for store and restore of floating point +* registers +*******************************************************/ + +#define C [fp, #-248 ] +#define LDC [fp, #-252 ] +#define M [fp, #-256 ] +#define N [fp, #-260 ] +#define K [fp, #-264 ] +#define A [fp, #-268 ] + +#define ALPHA [fp, #-276 ] + +#define B [fp, #4 ] +#define OLD_C [fp, #8 ] +#define OLD_LDC [fp, #12 ] + +#define I r0 +#define J r1 +#define L r2 + +#define AO r5 +#define BO r6 + +#define CO1 r8 +#define CO2 r9 + +#define K1 r7 +#define BC r12 + +#define A_PRE 128 +#define A_PRE1 160 +#define B_PRE 128 +#define C_PRE 32 + +/************************************************************************************** +* Macro definitions +**************************************************************************************/ + +.macro INIT8x2 + + vsub.f64 d8 , d8 , d8 + vmov.f64 d9 , d8 + vmov.f64 d10, d8 + vmov.f64 d11, d8 + vmov.f64 d12, d8 + vmov.f64 d13, d8 + vmov.f64 d14, d8 + vmov.f64 d15, d8 + vmov.f64 d16, d8 + vmov.f64 d17, d8 + vmov.f64 d18, d8 + vmov.f64 d19, d8 + vmov.f64 d20, d8 + vmov.f64 d21, d8 + vmov.f64 d22, d8 + vmov.f64 d23, d8 + +.endm + +.macro KERNEL8x2_START + + vldm BO!, { d24 , d25} + vldm AO!, { d0, d1 , d2, d3, d4 , d5 , d6 , d7 } + + vmul.f64 d26 , d0 , d24 + vmul.f64 d29 , d0 , d25 + vmul.f64 d27 , d1 , d24 + vmul.f64 d30 , d1 , d25 + vmul.f64 d28 , d2 , d24 + vmul.f64 d31 , d2 , d25 + + pld [AO , #A_PRE] + + vadd.f64 d8 , d8 , d26 + vadd.f64 d16 , d16, d29 + vadd.f64 d9 , d9 , d27 + vadd.f64 d17 , d17, d30 + vadd.f64 d10 , d10, d28 + vadd.f64 d18 , d18, d31 + + vmul.f64 d26 , d3 , d24 + vmul.f64 d27 , d4 , d24 + vmul.f64 d28 , d5 , d24 + vmul.f64 d29 , d3 , d25 + vmul.f64 d30 , d4 , d25 + vmul.f64 d31 , d5 , d25 + + pld [AO , #A_PRE1] + + vadd.f64 d11 , d11, d26 + vadd.f64 d12 , d12, d27 + vadd.f64 d13 , d13, d28 + vadd.f64 d19 , d19, d29 + vadd.f64 d20 , d20, d30 + vadd.f64 d21 , d21, d31 + + vmul.f64 d26 , d6 , d24 + vmul.f64 d27 , d7 , d24 + vmul.f64 d29 , d6 , d25 + vmul.f64 d30 , d7 , d25 + + vldm BO!, { d24 , d25} + vldm AO!, { d0, d1 , d2, d3, d4 , d5 , d6 , d7 } + + vadd.f64 d14 , d14, d26 + vadd.f64 d15 , d15, d27 + vadd.f64 d22 , d22, d29 + vadd.f64 d23 , d23, d30 + +.endm + + +.macro KERNEL8x2_M + + vmul.f64 d26 , d0 , d24 + vmul.f64 d29 , d0 , d25 + vmul.f64 d27 , d1 , d24 + vmul.f64 d30 , d1 , d25 + vmul.f64 d28 , d2 , d24 + vmul.f64 d31 , d2 , d25 + + pld [AO , #A_PRE] + + vadd.f64 d8 , d8 , d26 + vadd.f64 d16 , d16, d29 + vadd.f64 d9 , d9 , d27 + vadd.f64 d17 , d17, d30 + vadd.f64 d10 , d10, d28 + vadd.f64 d18 , d18, d31 + + vmul.f64 d26 , d3 , d24 + vmul.f64 d27 , d4 , d24 + vmul.f64 d28 , d5 , d24 + vmul.f64 d29 , d3 , d25 + vmul.f64 d30 , d4 , d25 + vmul.f64 d31 , d5 , d25 + + pld [AO , #A_PRE1] + + vadd.f64 d11 , d11, d26 + vadd.f64 d12 , d12, d27 + vadd.f64 d13 , d13, d28 + vadd.f64 d19 , d19, d29 + vadd.f64 d20 , d20, d30 + vadd.f64 d21 , d21, d31 + + vmul.f64 d26 , d6 , d24 + vmul.f64 d27 , d7 , d24 + vmul.f64 d29 , d6 , d25 + vmul.f64 d30 , d7 , d25 + + vldm BO!, { d24 , d25} + vldm AO!, { d0, d1 , d2, d3, d4 , d5 , d6 , d7 } + + vadd.f64 d14 , d14, d26 + vadd.f64 d15 , d15, d27 + vadd.f64 d22 , d22, d29 + vadd.f64 d23 , d23, d30 + +.endm + + +.macro KERNEL8x2_END + + vmul.f64 d26 , d0 , d24 + vmul.f64 d29 , d0 , d25 + vmul.f64 d27 , d1 , d24 + vmul.f64 d30 , d1 , d25 + vmul.f64 d28 , d2 , d24 + vmul.f64 d31 , d2 , d25 + + pld [AO , #A_PRE] + + vadd.f64 d8 , d8 , d26 + vadd.f64 d16 , d16, d29 + vadd.f64 d9 , d9 , d27 + vadd.f64 d17 , d17, d30 + vadd.f64 d10 , d10, d28 + vadd.f64 d18 , d18, d31 + + vmul.f64 d26 , d3 , d24 + vmul.f64 d27 , d4 , d24 + vmul.f64 d28 , d5 , d24 + vmul.f64 d29 , d3 , d25 + vmul.f64 d30 , d4 , d25 + vmul.f64 d31 , d5 , d25 + + pld [AO , #A_PRE1] + + vadd.f64 d11 , d11, d26 + vadd.f64 d12 , d12, d27 + vadd.f64 d13 , d13, d28 + vadd.f64 d19 , d19, d29 + vadd.f64 d20 , d20, d30 + vadd.f64 d21 , d21, d31 + + vmul.f64 d26 , d6 , d24 + vmul.f64 d27 , d7 , d24 + vmul.f64 d29 , d6 , d25 + vmul.f64 d30 , d7 , d25 + + vadd.f64 d14 , d14, d26 + vadd.f64 d15 , d15, d27 + vadd.f64 d22 , d22, d29 + vadd.f64 d23 , d23, d30 + +.endm + + + + +.macro KERNEL8x2 + + vldm BO!, { d24 , d25} + vldm AO!, { d0, d1 , d2, d3, d4 , d5 , d6 , d7 } + + vmul.f64 d26 , d0 , d24 + vmul.f64 d29 , d0 , d25 + vmul.f64 d27 , d1 , d24 + vmul.f64 d30 , d1 , d25 + vmul.f64 d28 , d2 , d24 + vmul.f64 d31 , d2 , d25 + + pld [AO , #A_PRE] + + vadd.f64 d8 , d8 , d26 + vadd.f64 d16 , d16, d29 + vadd.f64 d9 , d9 , d27 + vadd.f64 d17 , d17, d30 + vadd.f64 d10 , d10, d28 + vadd.f64 d18 , d18, d31 + + vmul.f64 d26 , d3 , d24 + vmul.f64 d27 , d4 , d24 + vmul.f64 d28 , d5 , d24 + vmul.f64 d29 , d3 , d25 + vmul.f64 d30 , d4 , d25 + vmul.f64 d31 , d5 , d25 + + pld [AO , #A_PRE1] + + vadd.f64 d11 , d11, d26 + vadd.f64 d12 , d12, d27 + vadd.f64 d13 , d13, d28 + vadd.f64 d19 , d19, d29 + vadd.f64 d20 , d20, d30 + vadd.f64 d21 , d21, d31 + + vmul.f64 d26 , d6 , d24 + vmul.f64 d27 , d7 , d24 + vmul.f64 d29 , d6 , d25 + vmul.f64 d30 , d7 , d25 + + vadd.f64 d14 , d14, d26 + vadd.f64 d15 , d15, d27 + vadd.f64 d22 , d22, d29 + vadd.f64 d23 , d23, d30 + +.endm + +.macro SAVE8x2 + + vldr d0, ALPHA + vldm CO2, { d24, d25, d26 , d27 , d28 , d29 , d30 , d31 } + + vmul.f64 d8 , d0 , d8 + vmul.f64 d9 , d0 , d9 + vmul.f64 d10, d0 , d10 + vmul.f64 d11, d0 , d11 + vmul.f64 d12, d0 , d12 + vmul.f64 d13, d0 , d13 + vmul.f64 d14, d0 , d14 + vmul.f64 d15, d0 , d15 + + vmul.f64 d16, d0 , d16 + vmul.f64 d17, d0 , d17 + vmul.f64 d18, d0 , d18 + vmul.f64 d19, d0 , d19 + vmul.f64 d20, d0 , d20 + vmul.f64 d21, d0 , d21 + vmul.f64 d22, d0 , d22 + vmul.f64 d23, d0 , d23 + + vldm CO1, { d0 , d1 , d2 , d3 , d4 , d5 , d6 , d7 } + + vadd.f64 d16, d16, d24 + vadd.f64 d17, d17, d25 + vadd.f64 d18, d18, d26 + vadd.f64 d19, d19, d27 + + vadd.f64 d20, d20, d28 + vadd.f64 d21, d21, d29 + vadd.f64 d22, d22, d30 + vadd.f64 d23, d23, d31 + + vadd.f64 d8 , d8 , d0 + vadd.f64 d9 , d9 , d1 + vadd.f64 d10, d10, d2 + vadd.f64 d11, d11, d3 + + vadd.f64 d12, d12, d4 + vadd.f64 d13, d13, d5 + vadd.f64 d14, d14, d6 + vadd.f64 d15, d15, d7 + + vstm CO2!, { d16, d17, d18 , d19 , d20 , d21 , d22 , d23 } + vstm CO1!, { d8 , d9 , d10 , d11 , d12, d13 , d14 , d15 } + +.endm + + +/*************************************************************************************/ + + +.macro INIT4x2 + + vsub.f64 d8 , d8 , d8 + vsub.f64 d9 , d9 , d9 + vsub.f64 d10, d10, d10 + vsub.f64 d11, d11, d11 + vsub.f64 d12, d12, d12 + vsub.f64 d13, d13, d13 + vsub.f64 d14, d14, d14 + vsub.f64 d15, d15, d15 + +.endm + +.macro KERNEL4x2 + + vldm AO!, { d0, d1 , d2, d3 } + vldm BO!, { d4, d5 } + + vmul.f64 d6 , d0 , d4 + vmul.f64 d7 , d1 , d4 + vadd.f64 d8 , d8 , d6 + vadd.f64 d9 , d9 , d7 + + vmul.f64 d6 , d2 , d4 + vmul.f64 d7 , d3 , d4 + vadd.f64 d10, d10, d6 + vadd.f64 d11, d11, d7 + + vmul.f64 d6 , d0 , d5 + vmul.f64 d7 , d1 , d5 + vadd.f64 d12, d12, d6 + vadd.f64 d13, d13, d7 + + vmul.f64 d6 , d2 , d5 + vmul.f64 d7 , d3 , d5 + vadd.f64 d14, d14, d6 + vadd.f64 d15, d15, d7 + +.endm + +.macro SAVE4x2 + + vldr d0, ALPHA + + vmul.f64 d8 , d0 , d8 + vmul.f64 d9 , d0 , d9 + vmul.f64 d10, d0 , d10 + vmul.f64 d11, d0 , d11 + vmul.f64 d12, d0 , d12 + vmul.f64 d13, d0 , d13 + vmul.f64 d14, d0 , d14 + vmul.f64 d15, d0 , d15 + + vldm CO1, { d0, d1 , d2 , d3 } + vldm CO2, { d4, d5 , d6 , d7 } + + vadd.f64 d8 , d8 , d0 + vadd.f64 d9 , d9 , d1 + vadd.f64 d10, d10, d2 + vadd.f64 d11, d11, d3 + + vadd.f64 d12, d12, d4 + vadd.f64 d13, d13, d5 + vadd.f64 d14, d14, d6 + vadd.f64 d15, d15, d7 + + vstm CO1!, { d8 , d9 , d10 , d11 } + vstm CO2!, { d12, d13 ,d14 , d15 } + +.endm + + + +/*************************************************************************************/ + +.macro INIT2x2 + + vsub.f64 d8 , d8 , d8 + vsub.f64 d9 , d9 , d9 + vsub.f64 d12, d12, d12 + vsub.f64 d13, d13, d13 + +.endm + +.macro KERNEL2x2 + + vldm AO!, { d0, d1 } + vldm BO!, { d4, d5 } + + vmul.f64 d6 , d0 , d4 + vmul.f64 d7 , d1 , d4 + vadd.f64 d8 , d8 , d6 + vadd.f64 d9 , d9 , d7 + + vmul.f64 d6 , d0 , d5 + vmul.f64 d7 , d1 , d5 + vadd.f64 d12, d12, d6 + vadd.f64 d13, d13, d7 + +.endm + +.macro SAVE2x2 + + vldr d0, ALPHA + + vmul.f64 d8 , d0 , d8 + vmul.f64 d9 , d0 , d9 + vmul.f64 d12, d0 , d12 + vmul.f64 d13, d0 , d13 + + vldm CO1, { d0, d1 } + vldm CO2, { d4, d5 } + + vadd.f64 d8 , d8 , d0 + vadd.f64 d9 , d9 , d1 + + vadd.f64 d12, d12, d4 + vadd.f64 d13, d13, d5 + + vstm CO1!, { d8 , d9 } + vstm CO2!, { d12, d13 } + +.endm + +/*************************************************************************************/ + +.macro INIT1x2 + + vsub.f64 d8 , d8 , d8 + vsub.f64 d12, d12, d12 + +.endm + +.macro KERNEL1x2 + + vldm AO!, { d0 } + vldm BO!, { d4, d5 } + + vmul.f64 d6 , d0 , d4 + vadd.f64 d8 , d8 , d6 + + vmul.f64 d6 , d0 , d5 + vadd.f64 d12, d12, d6 + +.endm + +.macro SAVE1x2 + + vldr d0, ALPHA + + vmul.f64 d8 , d0 , d8 + vmul.f64 d12, d0 , d12 + + vldm CO1, { d0 } + vldm CO2, { d4 } + + vadd.f64 d8 , d8 , d0 + vadd.f64 d12, d12, d4 + + vstm CO1!, { d8 } + vstm CO2!, { d12} + +.endm + +/*************************************************************************************/ + +.macro INIT8x1 + + vsub.f64 d8 , d8 , d8 + vsub.f64 d9 , d9 , d9 + vsub.f64 d10, d10, d10 + vsub.f64 d11, d11, d11 + vsub.f64 d12, d12, d12 + vsub.f64 d13, d13, d13 + vsub.f64 d14, d14, d14 + vsub.f64 d15, d15, d15 + +.endm + +.macro KERNEL8x1 + + vldm AO!, { d0, d1 , d2, d3, d4 , d5 , d6 , d7 } + vldm BO!, { d24 } + + vmul.f64 d26 , d0 , d24 + vmul.f64 d27 , d1 , d24 + vadd.f64 d8 , d8 , d26 + vadd.f64 d9 , d9 , d27 + + vmul.f64 d28 , d2 , d24 + vmul.f64 d29 , d3 , d24 + vadd.f64 d10 , d10, d28 + vadd.f64 d11 , d11, d29 + + vmul.f64 d26 , d4 , d24 + vmul.f64 d27 , d5 , d24 + vadd.f64 d12 , d12, d26 + vadd.f64 d13 , d13, d27 + + vmul.f64 d28 , d6 , d24 + vmul.f64 d29 , d7 , d24 + vadd.f64 d14 , d14, d28 + vadd.f64 d15 , d15, d29 + + +.endm + +.macro SAVE8x1 + + vldr d0, ALPHA + + vmul.f64 d8 , d0 , d8 + vmul.f64 d9 , d0 , d9 + vmul.f64 d10, d0 , d10 + vmul.f64 d11, d0 , d11 + vmul.f64 d12, d0 , d12 + vmul.f64 d13, d0 , d13 + vmul.f64 d14, d0 , d14 + vmul.f64 d15, d0 , d15 + + vldm CO1, { d0, d1 , d2 , d3 , d4 , d5 , d6 , d7 } + + vadd.f64 d8 , d8 , d0 + vadd.f64 d9 , d9 , d1 + vadd.f64 d10, d10, d2 + vadd.f64 d11, d11, d3 + + vadd.f64 d12, d12, d4 + vadd.f64 d13, d13, d5 + vadd.f64 d14, d14, d6 + vadd.f64 d15, d15, d7 + + vstm CO1!, { d8 , d9 , d10 , d11 , d12, d13 ,d14 , d15 } + +.endm + + +/*************************************************************************************/ + +.macro INIT4x1 + + vsub.f64 d8 , d8 , d8 + vsub.f64 d9 , d9 , d9 + vsub.f64 d10, d10, d10 + vsub.f64 d11, d11, d11 + +.endm + +.macro KERNEL4x1 + + vldm AO!, { d0, d1 , d2, d3 } + vldm BO!, { d4 } + + vmul.f64 d6 , d0 , d4 + vmul.f64 d7 , d1 , d4 + vadd.f64 d8 , d8 , d6 + vadd.f64 d9 , d9 , d7 + + vmul.f64 d6 , d2 , d4 + vmul.f64 d7 , d3 , d4 + vadd.f64 d10, d10, d6 + vadd.f64 d11, d11, d7 + +.endm + +.macro SAVE4x1 + + vldr d0, ALPHA + + vmul.f64 d8 , d0 , d8 + vmul.f64 d9 , d0 , d9 + vmul.f64 d10, d0 , d10 + vmul.f64 d11, d0 , d11 + + vldm CO1, { d0, d1 , d2 , d3 } + + vadd.f64 d8 , d8 , d0 + vadd.f64 d9 , d9 , d1 + vadd.f64 d10, d10, d2 + vadd.f64 d11, d11, d3 + + vstm CO1!, { d8 , d9 , d10 , d11 } + +.endm + +/*************************************************************************************/ + +.macro INIT2x1 + + vsub.f64 d8 , d8 , d8 + vsub.f64 d9 , d9 , d9 + +.endm + +.macro KERNEL2x1 + + vldm AO!, { d0, d1 } + vldm BO!, { d4 } + + vmul.f64 d6 , d0 , d4 + vmul.f64 d7 , d1 , d4 + vadd.f64 d8 , d8 , d6 + vadd.f64 d9 , d9 , d7 + +.endm + +.macro SAVE2x1 + + vldr d0, ALPHA + + vmul.f64 d8 , d0 , d8 + vmul.f64 d9 , d0 , d9 + + vldm CO1, { d0, d1 } + + vadd.f64 d8 , d8 , d0 + vadd.f64 d9 , d9 , d1 + + vstm CO1!, { d8 , d9 } + +.endm + +/*************************************************************************************/ + +.macro INIT1x1 + + vsub.f64 d8 , d8 , d8 + +.endm + +.macro KERNEL1x1 + + vldm AO!, { d0 } + vldm BO!, { d4 } + + vmul.f64 d6 , d0 , d4 + vadd.f64 d8 , d8 , d6 + +.endm + +.macro SAVE1x1 + + vldr d0, ALPHA + + vmul.f64 d8 , d0 , d8 + + vldm CO1, { d0 } + + vadd.f64 d8 , d8 , d0 + + vstm CO1!, { d8 } + +.endm + + + +/************************************************************************************** +* End of macro definitions +**************************************************************************************/ + + PROLOGUE + + .align 5 + + push {r4 - r9, fp} + add fp, sp, #24 + sub sp, sp, #STACKSIZE // reserve stack + + str OLD_M, M + str OLD_N, N + str OLD_K, K + str OLD_A, A + vstr OLD_ALPHA, ALPHA + + sub r3, fp, #128 + vstm r3, { d8 - d15} // store floating point registers + + ldr r3, OLD_LDC + lsl r3, r3, #3 // ldc = ldc * 8 + str r3, LDC + + ldr r3, OLD_C + str r3, C + + ldr K1, K + ldr BC, B + + ldr J, N + asrs J, J, #1 // J = J / 2 + ble _L1_BEGIN + +_L2_BEGIN: + + ldr CO1, C // CO1 = C + ldr r4 , LDC + add CO2, CO1, r4 // CO2 = C + LDC + add r3 , CO2, r4 // C = CO2 + LDC + str r3 , C // store C + + ldr AO, A // AO = A + +_L2_M8_BEGIN: + + ldr I, M + asrs I, I, #3 // I = I / 8 + ble _L2_M4_BEGIN + +_L2_M8_20: + + pld [CO1, #C_PRE] + pld [CO2, #C_PRE] + INIT8x2 + + mov BO, BC + asrs L , K1, #3 // L = L / 8 + ble _L2_M8_40 + .align 5 + +_L2_M8_22: + + pld [BO , #B_PRE] + KERNEL8x2_START + KERNEL8x2_M + pld [BO , #B_PRE] + KERNEL8x2_M + KERNEL8x2_M + + pld [BO , #B_PRE] + KERNEL8x2_M + KERNEL8x2_M + pld [BO , #B_PRE] + KERNEL8x2_M + KERNEL8x2_END + + subs L, L, #1 + bgt _L2_M8_22 + + +_L2_M8_40: + + ands L , K1, #7 // L = L % 8 + ble _L2_M8_100 + +_L2_M8_42: + + KERNEL8x2 + + subs L, L, #1 + bgt _L2_M8_42 + +_L2_M8_100: + + SAVE8x2 + +_L2_M8_END: + + subs I, I, #1 + bgt _L2_M8_20 + + +_L2_M4_BEGIN: + + ldr I, M + tst I , #7 + ble _L2_END + + tst I , #4 + ble _L2_M2_BEGIN + +_L2_M4_20: + + INIT4x2 + + mov BO, BC + asrs L , K1, #3 // L = L / 8 + ble _L2_M4_40 + .align 5 + +_L2_M4_22: + + KERNEL4x2 + KERNEL4x2 + KERNEL4x2 + KERNEL4x2 + + KERNEL4x2 + KERNEL4x2 + KERNEL4x2 + KERNEL4x2 + + subs L, L, #1 + bgt _L2_M4_22 + + +_L2_M4_40: + + ands L , K1, #7 // L = L % 8 + ble _L2_M4_100 + +_L2_M4_42: + + KERNEL4x2 + + subs L, L, #1 + bgt _L2_M4_42 + +_L2_M4_100: + + SAVE4x2 + +_L2_M4_END: + + + +_L2_M2_BEGIN: + + tst I, #2 // I = I / 2 + ble _L2_M1_BEGIN + +_L2_M2_20: + + INIT2x2 + + mov BO, BC + asrs L , K1, #2 // L = L / 4 + ble _L2_M2_40 + +_L2_M2_22: + + KERNEL2x2 + KERNEL2x2 + KERNEL2x2 + KERNEL2x2 + + subs L, L, #1 + bgt _L2_M2_22 + + +_L2_M2_40: + + ands L , K1, #3 // L = L % 4 + ble _L2_M2_100 + +_L2_M2_42: + + KERNEL2x2 + + subs L, L, #1 + bgt _L2_M2_42 + +_L2_M2_100: + + SAVE2x2 + +_L2_M2_END: + + +_L2_M1_BEGIN: + + tst I, #1 // I = I % 2 + ble _L2_END + +_L2_M1_20: + + INIT1x2 + + mov BO, BC + asrs L , K1, #2 // L = L / 4 + ble _L2_M1_40 + +_L2_M1_22: + + KERNEL1x2 + KERNEL1x2 + KERNEL1x2 + KERNEL1x2 + + subs L, L, #1 + bgt _L2_M1_22 + + +_L2_M1_40: + + ands L , K1, #3 // L = L % 4 + ble _L2_M1_100 + +_L2_M1_42: + + KERNEL1x2 + + subs L, L, #1 + bgt _L2_M1_42 + +_L2_M1_100: + + SAVE1x2 + + +_L2_END: + + mov r3, BC + mov r4, K1 + lsl r4, r4, #4 // k * 2 * 8 + add r3, r3, r4 // B = B + K * 2 * 8 + mov BC, r3 + + subs J , #1 // j-- + bgt _L2_BEGIN + + +_L1_BEGIN: + + ldr J, N + tst J , #1 // J = J % 2 + ble _L999 + + ldr CO1, C // CO1 = C + ldr r4 , LDC + add r3 , CO1, r4 // C = CO1 + LDC + str r3 , C // store C + + ldr AO, A // AO = A + + + +_L1_M8_BEGIN: + + ldr I, M + asrs I, I, #3 // I = I / 8 + ble _L1_M4_BEGIN + +_L1_M8_20: + + INIT8x1 + + mov BO, BC + asrs L , K1, #3 // L = L / 8 + ble _L1_M8_40 + +_L1_M8_22: + + KERNEL8x1 + KERNEL8x1 + KERNEL8x1 + KERNEL8x1 + KERNEL8x1 + KERNEL8x1 + KERNEL8x1 + KERNEL8x1 + + subs L, L, #1 + bgt _L1_M8_22 + + +_L1_M8_40: + + ands L , K1, #7 // L = L % 8 + ble _L1_M8_100 + +_L1_M8_42: + + KERNEL8x1 + + subs L, L, #1 + bgt _L1_M8_42 + +_L1_M8_100: + + SAVE8x1 + +_L1_M8_END: + + subs I, I, #1 + bgt _L1_M8_20 + + + + +_L1_M4_BEGIN: + + ldr I, M + tst I, #7 // I = I % 8 + ble _L1_END + + tst I, #4 // I = I % 8 + ble _L1_M2_BEGIN + +_L1_M4_20: + + INIT4x1 + + mov BO, BC + asrs L , K1, #2 // L = L / 4 + ble _L1_M4_40 + +_L1_M4_22: + + KERNEL4x1 + KERNEL4x1 + KERNEL4x1 + KERNEL4x1 + + subs L, L, #1 + bgt _L1_M4_22 + + +_L1_M4_40: + + ands L , K1, #3 // L = L % 4 + ble _L1_M4_100 + +_L1_M4_42: + + KERNEL4x1 + + subs L, L, #1 + bgt _L1_M4_42 + +_L1_M4_100: + + SAVE4x1 + +_L1_M4_END: + + + + +_L1_M2_BEGIN: + + tst I, #2 // I = I % 4 + ble _L1_M1_BEGIN + +_L1_M2_20: + + INIT2x1 + + mov BO, BC + asrs L , K1, #2 // L = L / 4 + ble _L1_M2_40 + +_L1_M2_22: + + KERNEL2x1 + KERNEL2x1 + KERNEL2x1 + KERNEL2x1 + + subs L, L, #1 + bgt _L1_M2_22 + + +_L1_M2_40: + + ands L , K1, #3 // L = L % 4 + ble _L1_M2_100 + +_L1_M2_42: + + KERNEL2x1 + + subs L, L, #1 + bgt _L1_M2_42 + +_L1_M2_100: + + SAVE2x1 + +_L1_M2_END: + + + +_L1_M1_BEGIN: + + tst I, #1 // I = I % 4 + ble _L1_END + +_L1_M1_20: + + INIT1x1 + + mov BO, BC + asrs L , K1, #2 // L = L / 4 + ble _L1_M1_40 + +_L1_M1_22: + + KERNEL1x1 + KERNEL1x1 + KERNEL1x1 + KERNEL1x1 + + subs L, L, #1 + bgt _L1_M1_22 + + +_L1_M1_40: + + ands L , K1, #3 // L = L % 4 + ble _L1_M1_100 + +_L1_M1_42: + + KERNEL1x1 + + subs L, L, #1 + bgt _L1_M1_42 + +_L1_M1_100: + + SAVE1x1 + + +_L1_END: + + + +_L999: + + sub r3, fp, #128 + vldm r3, { d8 - d15} // restore floating point registers + + movs r0, #0 // set return value + sub sp, fp, #24 + pop {r4 - r9, fp} + bx lr + + EPILOGUE + diff --git a/kernel/arm/dot.c b/kernel/arm/dot.c new file mode 100644 index 0000000..30490e2 --- /dev/null +++ b/kernel/arm/dot.c @@ -0,0 +1,64 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" + +#if defined(DSDOT) +double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +#else +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +#endif +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + double dot = 0.0 ; + + if ( n < 0 ) return(dot); + + while(i < n) + { + + dot += y[iy] * x[ix] ; + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + return(dot); + +} + + diff --git a/kernel/arm/dtrmm_kernel_8x2_vfpv3.S b/kernel/arm/dtrmm_kernel_8x2_vfpv3.S new file mode 100644 index 0000000..9306166 --- /dev/null +++ b/kernel/arm/dtrmm_kernel_8x2_vfpv3.S @@ -0,0 +1,1521 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/28 Saar +* BLASTEST : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#define ASSEMBLER +#include "common.h" + +#define STACKSIZE 252 + +#define OLD_M r0 +#define OLD_N r1 +#define OLD_K r2 +#define OLD_A r3 +#define OLD_ALPHA d0 + +/****************************************************** +* [fp, #-128] - [fp, #-64] is reserved +* for store and restore of floating point +* registers +*******************************************************/ + + +#define KK [fp, #-240 ] +#define KKK [fp, #-244] +#define C [fp, #-248 ] +#define LDC [fp, #-252 ] +#define M [fp, #-256 ] +#define N [fp, #-260 ] +#define K [fp, #-264 ] +#define A [fp, #-268 ] + +#define ALPHA [fp, #-276 ] + +#define B [fp, #4 ] +#define OLD_C [fp, #8 ] +#define OLD_LDC [fp, #12 ] +#define OFFSET [fp, #16 ] + +#define I r0 +#define J r1 +#define L r2 + +#define AO r5 +#define BO r6 + +#define CO1 r8 +#define CO2 r9 + +#define K1 r7 +#define BC r12 + +#define A_PRE 128 +#define A_PRE1 160 +#define B_PRE 128 +#define C_PRE 32 + +/************************************************************************************** +* Macro definitions +**************************************************************************************/ + +.macro INIT8x2 + + vsub.f64 d8 , d8 , d8 + vmov.f64 d9 , d8 + vmov.f64 d10, d8 + vmov.f64 d11, d8 + vmov.f64 d12, d8 + vmov.f64 d13, d8 + vmov.f64 d14, d8 + vmov.f64 d15, d8 + vmov.f64 d16, d8 + vmov.f64 d17, d8 + vmov.f64 d18, d8 + vmov.f64 d19, d8 + vmov.f64 d20, d8 + vmov.f64 d21, d8 + vmov.f64 d22, d8 + vmov.f64 d23, d8 + +.endm + +.macro KERNEL8x2_START + + vldm BO!, { d24 , d25} + vldm AO!, { d0, d1 , d2, d3, d4 , d5 , d6 , d7 } + + vmul.f64 d26 , d0 , d24 + vmul.f64 d29 , d0 , d25 + vmul.f64 d27 , d1 , d24 + vmul.f64 d30 , d1 , d25 + vmul.f64 d28 , d2 , d24 + vmul.f64 d31 , d2 , d25 + + pld [AO , #A_PRE] + + vadd.f64 d8 , d8 , d26 + vadd.f64 d16 , d16, d29 + vadd.f64 d9 , d9 , d27 + vadd.f64 d17 , d17, d30 + vadd.f64 d10 , d10, d28 + vadd.f64 d18 , d18, d31 + + vmul.f64 d26 , d3 , d24 + vmul.f64 d27 , d4 , d24 + vmul.f64 d28 , d5 , d24 + vmul.f64 d29 , d3 , d25 + vmul.f64 d30 , d4 , d25 + vmul.f64 d31 , d5 , d25 + + pld [AO , #A_PRE1] + + vadd.f64 d11 , d11, d26 + vadd.f64 d12 , d12, d27 + vadd.f64 d13 , d13, d28 + vadd.f64 d19 , d19, d29 + vadd.f64 d20 , d20, d30 + vadd.f64 d21 , d21, d31 + + vmul.f64 d26 , d6 , d24 + vmul.f64 d27 , d7 , d24 + vmul.f64 d29 , d6 , d25 + vmul.f64 d30 , d7 , d25 + + vldm BO!, { d24 , d25} + vldm AO!, { d0, d1 , d2, d3, d4 , d5 , d6 , d7 } + + vadd.f64 d14 , d14, d26 + vadd.f64 d15 , d15, d27 + vadd.f64 d22 , d22, d29 + vadd.f64 d23 , d23, d30 + +.endm + + +.macro KERNEL8x2_M + + vmul.f64 d26 , d0 , d24 + vmul.f64 d29 , d0 , d25 + vmul.f64 d27 , d1 , d24 + vmul.f64 d30 , d1 , d25 + vmul.f64 d28 , d2 , d24 + vmul.f64 d31 , d2 , d25 + + pld [AO , #A_PRE] + + vadd.f64 d8 , d8 , d26 + vadd.f64 d16 , d16, d29 + vadd.f64 d9 , d9 , d27 + vadd.f64 d17 , d17, d30 + vadd.f64 d10 , d10, d28 + vadd.f64 d18 , d18, d31 + + vmul.f64 d26 , d3 , d24 + vmul.f64 d27 , d4 , d24 + vmul.f64 d28 , d5 , d24 + vmul.f64 d29 , d3 , d25 + vmul.f64 d30 , d4 , d25 + vmul.f64 d31 , d5 , d25 + + pld [AO , #A_PRE1] + + vadd.f64 d11 , d11, d26 + vadd.f64 d12 , d12, d27 + vadd.f64 d13 , d13, d28 + vadd.f64 d19 , d19, d29 + vadd.f64 d20 , d20, d30 + vadd.f64 d21 , d21, d31 + + vmul.f64 d26 , d6 , d24 + vmul.f64 d27 , d7 , d24 + vmul.f64 d29 , d6 , d25 + vmul.f64 d30 , d7 , d25 + + vldm BO!, { d24 , d25} + vldm AO!, { d0, d1 , d2, d3, d4 , d5 , d6 , d7 } + + vadd.f64 d14 , d14, d26 + vadd.f64 d15 , d15, d27 + vadd.f64 d22 , d22, d29 + vadd.f64 d23 , d23, d30 + +.endm + + +.macro KERNEL8x2_END + + vmul.f64 d26 , d0 , d24 + vmul.f64 d29 , d0 , d25 + vmul.f64 d27 , d1 , d24 + vmul.f64 d30 , d1 , d25 + vmul.f64 d28 , d2 , d24 + vmul.f64 d31 , d2 , d25 + + pld [AO , #A_PRE] + + vadd.f64 d8 , d8 , d26 + vadd.f64 d16 , d16, d29 + vadd.f64 d9 , d9 , d27 + vadd.f64 d17 , d17, d30 + vadd.f64 d10 , d10, d28 + vadd.f64 d18 , d18, d31 + + vmul.f64 d26 , d3 , d24 + vmul.f64 d27 , d4 , d24 + vmul.f64 d28 , d5 , d24 + vmul.f64 d29 , d3 , d25 + vmul.f64 d30 , d4 , d25 + vmul.f64 d31 , d5 , d25 + + pld [AO , #A_PRE1] + + vadd.f64 d11 , d11, d26 + vadd.f64 d12 , d12, d27 + vadd.f64 d13 , d13, d28 + vadd.f64 d19 , d19, d29 + vadd.f64 d20 , d20, d30 + vadd.f64 d21 , d21, d31 + + vmul.f64 d26 , d6 , d24 + vmul.f64 d27 , d7 , d24 + vmul.f64 d29 , d6 , d25 + vmul.f64 d30 , d7 , d25 + + vadd.f64 d14 , d14, d26 + vadd.f64 d15 , d15, d27 + vadd.f64 d22 , d22, d29 + vadd.f64 d23 , d23, d30 + +.endm + + + + +.macro KERNEL8x2 + + vldm BO!, { d24 , d25} + vldm AO!, { d0, d1 , d2, d3, d4 , d5 , d6 , d7 } + + vmul.f64 d26 , d0 , d24 + vmul.f64 d29 , d0 , d25 + vmul.f64 d27 , d1 , d24 + vmul.f64 d30 , d1 , d25 + vmul.f64 d28 , d2 , d24 + vmul.f64 d31 , d2 , d25 + + pld [AO , #A_PRE] + + vadd.f64 d8 , d8 , d26 + vadd.f64 d16 , d16, d29 + vadd.f64 d9 , d9 , d27 + vadd.f64 d17 , d17, d30 + vadd.f64 d10 , d10, d28 + vadd.f64 d18 , d18, d31 + + vmul.f64 d26 , d3 , d24 + vmul.f64 d27 , d4 , d24 + vmul.f64 d28 , d5 , d24 + vmul.f64 d29 , d3 , d25 + vmul.f64 d30 , d4 , d25 + vmul.f64 d31 , d5 , d25 + + pld [AO , #A_PRE1] + + vadd.f64 d11 , d11, d26 + vadd.f64 d12 , d12, d27 + vadd.f64 d13 , d13, d28 + vadd.f64 d19 , d19, d29 + vadd.f64 d20 , d20, d30 + vadd.f64 d21 , d21, d31 + + vmul.f64 d26 , d6 , d24 + vmul.f64 d27 , d7 , d24 + vmul.f64 d29 , d6 , d25 + vmul.f64 d30 , d7 , d25 + + vadd.f64 d14 , d14, d26 + vadd.f64 d15 , d15, d27 + vadd.f64 d22 , d22, d29 + vadd.f64 d23 , d23, d30 + +.endm + +.macro SAVE8x2 + + vldr d0, ALPHA + + vmul.f64 d8 , d0 , d8 + vmul.f64 d9 , d0 , d9 + vmul.f64 d10, d0 , d10 + vmul.f64 d11, d0 , d11 + vmul.f64 d12, d0 , d12 + vmul.f64 d13, d0 , d13 + vmul.f64 d14, d0 , d14 + vmul.f64 d15, d0 , d15 + + vmul.f64 d16, d0 , d16 + vmul.f64 d17, d0 , d17 + vmul.f64 d18, d0 , d18 + vmul.f64 d19, d0 , d19 + vmul.f64 d20, d0 , d20 + vmul.f64 d21, d0 , d21 + vmul.f64 d22, d0 , d22 + vmul.f64 d23, d0 , d23 + + vstm CO2!, { d16, d17, d18 , d19 , d20 , d21 , d22 , d23 } + vstm CO1!, { d8 , d9 , d10 , d11 , d12, d13 , d14 , d15 } + +.endm + + +/*************************************************************************************/ + + +.macro INIT4x2 + + vsub.f64 d8 , d8 , d8 + vsub.f64 d9 , d9 , d9 + vsub.f64 d10, d10, d10 + vsub.f64 d11, d11, d11 + vsub.f64 d12, d12, d12 + vsub.f64 d13, d13, d13 + vsub.f64 d14, d14, d14 + vsub.f64 d15, d15, d15 + +.endm + +.macro KERNEL4x2 + + vldm AO!, { d0, d1 , d2, d3 } + vldm BO!, { d4, d5 } + + vmul.f64 d6 , d0 , d4 + vmul.f64 d7 , d1 , d4 + vadd.f64 d8 , d8 , d6 + vadd.f64 d9 , d9 , d7 + + vmul.f64 d6 , d2 , d4 + vmul.f64 d7 , d3 , d4 + vadd.f64 d10, d10, d6 + vadd.f64 d11, d11, d7 + + vmul.f64 d6 , d0 , d5 + vmul.f64 d7 , d1 , d5 + vadd.f64 d12, d12, d6 + vadd.f64 d13, d13, d7 + + vmul.f64 d6 , d2 , d5 + vmul.f64 d7 , d3 , d5 + vadd.f64 d14, d14, d6 + vadd.f64 d15, d15, d7 + +.endm + +.macro SAVE4x2 + + vldr d0, ALPHA + + vmul.f64 d8 , d0 , d8 + vmul.f64 d9 , d0 , d9 + vmul.f64 d10, d0 , d10 + vmul.f64 d11, d0 , d11 + vmul.f64 d12, d0 , d12 + vmul.f64 d13, d0 , d13 + vmul.f64 d14, d0 , d14 + vmul.f64 d15, d0 , d15 + + vstm CO1!, { d8 , d9 , d10 , d11 } + vstm CO2!, { d12, d13 ,d14 , d15 } + +.endm + + + +/*************************************************************************************/ + +.macro INIT2x2 + + vsub.f64 d8 , d8 , d8 + vsub.f64 d9 , d9 , d9 + vsub.f64 d12, d12, d12 + vsub.f64 d13, d13, d13 + +.endm + +.macro KERNEL2x2 + + vldm AO!, { d0, d1 } + vldm BO!, { d4, d5 } + + vmul.f64 d6 , d0 , d4 + vmul.f64 d7 , d1 , d4 + vadd.f64 d8 , d8 , d6 + vadd.f64 d9 , d9 , d7 + + vmul.f64 d6 , d0 , d5 + vmul.f64 d7 , d1 , d5 + vadd.f64 d12, d12, d6 + vadd.f64 d13, d13, d7 + +.endm + +.macro SAVE2x2 + + vldr d0, ALPHA + + vmul.f64 d8 , d0 , d8 + vmul.f64 d9 , d0 , d9 + vmul.f64 d12, d0 , d12 + vmul.f64 d13, d0 , d13 + + vstm CO1!, { d8 , d9 } + vstm CO2!, { d12, d13 } + +.endm + +/*************************************************************************************/ + +.macro INIT1x2 + + vsub.f64 d8 , d8 , d8 + vsub.f64 d12, d12, d12 + +.endm + +.macro KERNEL1x2 + + vldm AO!, { d0 } + vldm BO!, { d4, d5 } + + vmul.f64 d6 , d0 , d4 + vadd.f64 d8 , d8 , d6 + + vmul.f64 d6 , d0 , d5 + vadd.f64 d12, d12, d6 + +.endm + +.macro SAVE1x2 + + vldr d0, ALPHA + + vmul.f64 d8 , d0 , d8 + vmul.f64 d12, d0 , d12 + + vstm CO1!, { d8 } + vstm CO2!, { d12} + +.endm + +/*************************************************************************************/ + +.macro INIT8x1 + + vsub.f64 d8 , d8 , d8 + vsub.f64 d9 , d9 , d9 + vsub.f64 d10, d10, d10 + vsub.f64 d11, d11, d11 + vsub.f64 d12, d12, d12 + vsub.f64 d13, d13, d13 + vsub.f64 d14, d14, d14 + vsub.f64 d15, d15, d15 + +.endm + +.macro KERNEL8x1 + + vldm AO!, { d0, d1 , d2, d3, d4 , d5 , d6 , d7 } + vldm BO!, { d24 } + + vmul.f64 d26 , d0 , d24 + vmul.f64 d27 , d1 , d24 + vadd.f64 d8 , d8 , d26 + vadd.f64 d9 , d9 , d27 + + vmul.f64 d28 , d2 , d24 + vmul.f64 d29 , d3 , d24 + vadd.f64 d10 , d10, d28 + vadd.f64 d11 , d11, d29 + + vmul.f64 d26 , d4 , d24 + vmul.f64 d27 , d5 , d24 + vadd.f64 d12 , d12, d26 + vadd.f64 d13 , d13, d27 + + vmul.f64 d28 , d6 , d24 + vmul.f64 d29 , d7 , d24 + vadd.f64 d14 , d14, d28 + vadd.f64 d15 , d15, d29 + + +.endm + +.macro SAVE8x1 + + vldr d0, ALPHA + + vmul.f64 d8 , d0 , d8 + vmul.f64 d9 , d0 , d9 + vmul.f64 d10, d0 , d10 + vmul.f64 d11, d0 , d11 + vmul.f64 d12, d0 , d12 + vmul.f64 d13, d0 , d13 + vmul.f64 d14, d0 , d14 + vmul.f64 d15, d0 , d15 + + vstm CO1!, { d8 , d9 , d10 , d11 , d12, d13 ,d14 , d15 } + +.endm + + +/*************************************************************************************/ + +.macro INIT4x1 + + vsub.f64 d8 , d8 , d8 + vsub.f64 d9 , d9 , d9 + vsub.f64 d10, d10, d10 + vsub.f64 d11, d11, d11 + +.endm + +.macro KERNEL4x1 + + vldm AO!, { d0, d1 , d2, d3 } + vldm BO!, { d4 } + + vmul.f64 d6 , d0 , d4 + vmul.f64 d7 , d1 , d4 + vadd.f64 d8 , d8 , d6 + vadd.f64 d9 , d9 , d7 + + vmul.f64 d6 , d2 , d4 + vmul.f64 d7 , d3 , d4 + vadd.f64 d10, d10, d6 + vadd.f64 d11, d11, d7 + +.endm + +.macro SAVE4x1 + + vldr d0, ALPHA + + vmul.f64 d8 , d0 , d8 + vmul.f64 d9 , d0 , d9 + vmul.f64 d10, d0 , d10 + vmul.f64 d11, d0 , d11 + + vstm CO1!, { d8 , d9 , d10 , d11 } + +.endm + +/*************************************************************************************/ + +.macro INIT2x1 + + vsub.f64 d8 , d8 , d8 + vsub.f64 d9 , d9 , d9 + +.endm + +.macro KERNEL2x1 + + vldm AO!, { d0, d1 } + vldm BO!, { d4 } + + vmul.f64 d6 , d0 , d4 + vmul.f64 d7 , d1 , d4 + vadd.f64 d8 , d8 , d6 + vadd.f64 d9 , d9 , d7 + +.endm + +.macro SAVE2x1 + + vldr d0, ALPHA + + vmul.f64 d8 , d0 , d8 + vmul.f64 d9 , d0 , d9 + + vstm CO1!, { d8 , d9 } + +.endm + +/*************************************************************************************/ + +.macro INIT1x1 + + vsub.f64 d8 , d8 , d8 + +.endm + +.macro KERNEL1x1 + + vldm AO!, { d0 } + vldm BO!, { d4 } + + vmul.f64 d6 , d0 , d4 + vadd.f64 d8 , d8 , d6 + +.endm + +.macro SAVE1x1 + + vldr d0, ALPHA + + vmul.f64 d8 , d0 , d8 + + vstm CO1!, { d8 } + +.endm + + + +/************************************************************************************** +* End of macro definitions +**************************************************************************************/ + + PROLOGUE + + .align 5 + + push {r4 - r9, fp} + add fp, sp, #24 + sub sp, sp, #STACKSIZE // reserve stack + + str OLD_M, M + str OLD_N, N + str OLD_K, K + str OLD_A, A + vstr OLD_ALPHA, ALPHA + + sub r3, fp, #128 + vstm r3, { d8 - d15} // store floating point registers + + ldr r3, OLD_LDC + lsl r3, r3, #3 // ldc = ldc * 8 + str r3, LDC + + ldr r3, OLD_C + str r3, C + + ldr BC, B + + ldr r3, OFFSET +#ifndef LEFT + neg r3 , r3 +#endif + str r3 , KK + + ldr J, N + asrs J, J, #1 // J = J / 2 + ble _L1_BEGIN + +_L2_BEGIN: + + ldr CO1, C // CO1 = C + ldr r4 , LDC + add CO2, CO1, r4 // CO2 = C + LDC + add r3 , CO2, r4 // C = CO2 + LDC + str r3 , C // store C + +#if defined(LEFT) + ldr r3 , OFFSET + str r3 , KK +#endif + + ldr AO, A // AO = A + +_L2_M8_BEGIN: + + ldr I, M + asrs I, I, #3 // I = I / 8 + ble _L2_M4_BEGIN + +_L2_M8_20: + + pld [CO1, #C_PRE] + pld [CO2, #C_PRE] + + INIT8x2 + +#if (defined(LEFT) && defined(TRANSA)) || \ + (!defined(LEFT) && !defined(TRANSA)) + + mov BO, BC +#else + mov BO, BC + ldr r3 , KK + lsls r4 , r3 , #4 // 2 double values + add BO , BO , r4 + lsls r4 , r3 , #6 // 8 double values + add AO , AO , r4 + +#endif + +#ifndef TRMMKERNEL + ldr L , K +#elif (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) + ldr L , K + ldr r3, KK + sub L , L, r3 + str L , KKK +#else + ldr L , KK +#ifdef LEFT + add L , L , #8 // number of values in AO +#else + add L , L , #2 // number of values in BO +#endif + str L , KKK +#endif + + asrs K1 , L, #3 // L = L / 8 + ble _L2_M8_40 + .align 5 + +_L2_M8_22: + + pld [BO , #B_PRE] + KERNEL8x2_START + KERNEL8x2_M + pld [BO , #B_PRE] + KERNEL8x2_M + KERNEL8x2_M + + pld [BO , #B_PRE] + KERNEL8x2_M + KERNEL8x2_M + pld [BO , #B_PRE] + KERNEL8x2_M + KERNEL8x2_END + + subs K1, K1, #1 + bgt _L2_M8_22 + + +_L2_M8_40: + + ands K1 , L, #7 // L = L % 8 + ble _L2_M8_100 + +_L2_M8_42: + + KERNEL8x2 + + subs K1, K1, #1 + bgt _L2_M8_42 + +_L2_M8_100: + + SAVE8x2 + +#if (defined(LEFT) && defined(TRANSA)) || \ + (!defined(LEFT) && !defined(TRANSA)) + ldr r3 , K + ldr r4 , KKK + sub r3 , r3 , r4 + lsls r4 , r3 , #4 // 2 double values + add BO , BO , r4 + lsls r4 , r3 , #6 // 8 double values + add AO , AO , r4 +#endif + +#if defined(LEFT) + ldr r3 , KK + add r3 , r3 , #8 // number of values in AO + str r3 , KK +#endif + + +_L2_M8_END: + + subs I, I, #1 + bgt _L2_M8_20 + + +_L2_M4_BEGIN: + + ldr I, M + tst I , #7 + ble _L2_END + + tst I , #4 + ble _L2_M2_BEGIN + +_L2_M4_20: + + INIT4x2 + +#if (defined(LEFT) && defined(TRANSA)) || \ + (!defined(LEFT) && !defined(TRANSA)) + + mov BO, BC +#else + mov BO, BC + ldr r3 , KK + lsls r4 , r3 , #4 // 2 double values + add BO , BO , r4 + lsls r4 , r3 , #5 // 4 double values + add AO , AO , r4 + +#endif + +#ifndef TRMMKERNEL + ldr L , K +#elif (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) + ldr L , K + ldr r3, KK + sub L , L, r3 + str L , KKK +#else + ldr L , KK +#ifdef LEFT + add L , L , #4 // number of values in AO +#else + add L , L , #2 // number of values in BO +#endif + str L , KKK +#endif + + asrs K1, L, #3 // L = L / 8 + ble _L2_M4_40 + .align 5 + +_L2_M4_22: + + KERNEL4x2 + KERNEL4x2 + KERNEL4x2 + KERNEL4x2 + + KERNEL4x2 + KERNEL4x2 + KERNEL4x2 + KERNEL4x2 + + subs K1, K1, #1 + bgt _L2_M4_22 + + +_L2_M4_40: + + ands K1, L, #7 // L = L % 8 + ble _L2_M4_100 + +_L2_M4_42: + + KERNEL4x2 + + subs K1, K1, #1 + bgt _L2_M4_42 + +_L2_M4_100: + + SAVE4x2 + +#if (defined(LEFT) && defined(TRANSA)) || \ + (!defined(LEFT) && !defined(TRANSA)) + ldr r3 , K + ldr r4 , KKK + sub r3 , r3 , r4 + lsls r4 , r3 , #4 // 2 double values + add BO , BO , r4 + lsls r4 , r3 , #5 // 4 double values + add AO , AO , r4 +#endif + +#if defined(LEFT) + ldr r3 , KK + add r3 , r3 , #4 // number of values in AO + str r3 , KK +#endif + +_L2_M4_END: + + + +_L2_M2_BEGIN: + + tst I, #2 // I = I / 2 + ble _L2_M1_BEGIN + +_L2_M2_20: + + INIT2x2 + +#if (defined(LEFT) && defined(TRANSA)) || \ + (!defined(LEFT) && !defined(TRANSA)) + + mov BO, BC +#else + mov BO, BC + ldr r3 , KK + lsls r4 , r3 , #4 // 2 double values + add BO , BO , r4 + lsls r4 , r3 , #4 // 2 double values + add AO , AO , r4 + +#endif + +#ifndef TRMMKERNEL + ldr L , K +#elif (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) + ldr L , K + ldr r3, KK + sub L , L, r3 + str L , KKK +#else + ldr L , KK +#ifdef LEFT + add L , L , #2 // number of values in AO +#else + add L , L , #2 // number of values in BO +#endif + str L , KKK +#endif + + asrs K1, L, #2 // L = L / 4 + ble _L2_M2_40 + +_L2_M2_22: + + KERNEL2x2 + KERNEL2x2 + KERNEL2x2 + KERNEL2x2 + + subs K1, K1, #1 + bgt _L2_M2_22 + + +_L2_M2_40: + + ands K1, L, #3 // L = L % 4 + ble _L2_M2_100 + +_L2_M2_42: + + KERNEL2x2 + + subs K1, K1, #1 + bgt _L2_M2_42 + +_L2_M2_100: + + SAVE2x2 + +#if (defined(LEFT) && defined(TRANSA)) || \ + (!defined(LEFT) && !defined(TRANSA)) + ldr r3 , K + ldr r4 , KKK + sub r3 , r3 , r4 + lsls r4 , r3 , #4 // 2 double values + add BO , BO , r4 + lsls r4 , r3 , #4 // 2 double values + add AO , AO , r4 +#endif + +#if defined(LEFT) + ldr r3 , KK + add r3 , r3 , #2 // number of values in AO + str r3 , KK +#endif + + +_L2_M2_END: + + +_L2_M1_BEGIN: + + tst I, #1 // I = I % 2 + ble _L2_END + +_L2_M1_20: + + INIT1x2 + +#if (defined(LEFT) && defined(TRANSA)) || \ + (!defined(LEFT) && !defined(TRANSA)) + + mov BO, BC +#else + mov BO, BC + ldr r3 , KK + lsls r4 , r3 , #4 // 2 double values + add BO , BO , r4 + lsls r4 , r3 , #3 // 1 double value + add AO , AO , r4 + +#endif + +#ifndef TRMMKERNEL + ldr L , K +#elif (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) + ldr L , K + ldr r3, KK + sub L , L, r3 + str L , KKK +#else + ldr L , KK +#ifdef LEFT + add L , L , #1 // number of values in AO +#else + add L , L , #2 // number of values in BO +#endif + str L , KKK +#endif + + asrs K1, L, #2 // L = L / 4 + ble _L2_M1_40 + +_L2_M1_22: + + KERNEL1x2 + KERNEL1x2 + KERNEL1x2 + KERNEL1x2 + + subs K1, K1, #1 + bgt _L2_M1_22 + + +_L2_M1_40: + + ands K1, L, #3 // L = L % 4 + ble _L2_M1_100 + +_L2_M1_42: + + KERNEL1x2 + + subs K1, K1, #1 + bgt _L2_M1_42 + +_L2_M1_100: + + SAVE1x2 + +#if (defined(LEFT) && defined(TRANSA)) || \ + (!defined(LEFT) && !defined(TRANSA)) + ldr r3 , K + ldr r4 , KKK + sub r3 , r3 , r4 + lsls r4 , r3 , #4 // 2 double values + add BO , BO , r4 + lsls r4 , r3 , #3 // 1 double value + add AO , AO , r4 +#endif + +#if defined(LEFT) + ldr r3 , KK + add r3 , r3 , #1 // number of values in AO + str r3 , KK +#endif + + + +_L2_END: + + mov r3, BC + ldr r4, K + lsl r4, r4, #4 // k * 2 * 8 + add r3, r3, r4 // B = B + K * 2 * 8 + mov BC, r3 + +#if !defined(LEFT) + ldr r3 , KK + add r3 , r3 , #2 // number of values in BO + str r3 , KK +#endif + + subs J , #1 // j-- + bgt _L2_BEGIN + + +_L1_BEGIN: + + ldr J, N + tst J , #1 // J = J % 2 + ble _L999 + + ldr CO1, C // CO1 = C + ldr r4 , LDC + add r3 , CO1, r4 // C = CO1 + LDC + str r3 , C // store C + +#if defined(LEFT) + ldr r3 , OFFSET + str r3 , KK +#endif + + ldr AO, A // AO = A + + + +_L1_M8_BEGIN: + + ldr I, M + asrs I, I, #3 // I = I / 8 + ble _L1_M4_BEGIN + +_L1_M8_20: + + INIT8x1 + +#if (defined(LEFT) && defined(TRANSA)) || \ + (!defined(LEFT) && !defined(TRANSA)) + + mov BO, BC +#else + mov BO, BC + ldr r3 , KK + lsls r4 , r3 , #3 // 1 double value + add BO , BO , r4 + lsls r4 , r3 , #6 // 8 double values + add AO , AO , r4 + +#endif + +#ifndef TRMMKERNEL + ldr L , K +#elif (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) + ldr L , K + ldr r3, KK + sub L , L, r3 + str L , KKK +#else + ldr L , KK +#ifdef LEFT + add L , L , #8 // number of values in AO +#else + add L , L , #1 // number of values in BO +#endif + str L , KKK +#endif + + asrs K1, L, #3 // L = L / 8 + ble _L1_M8_40 + +_L1_M8_22: + + KERNEL8x1 + KERNEL8x1 + KERNEL8x1 + KERNEL8x1 + KERNEL8x1 + KERNEL8x1 + KERNEL8x1 + KERNEL8x1 + + subs K1, K1, #1 + bgt _L1_M8_22 + + +_L1_M8_40: + + ands K1, L, #7 // L = L % 8 + ble _L1_M8_100 + +_L1_M8_42: + + KERNEL8x1 + + subs K1, K1, #1 + bgt _L1_M8_42 + +_L1_M8_100: + + SAVE8x1 + +#if (defined(LEFT) && defined(TRANSA)) || \ + (!defined(LEFT) && !defined(TRANSA)) + ldr r3 , K + ldr r4 , KKK + sub r3 , r3 , r4 + lsls r4 , r3 , #3 // 1 double value + add BO , BO , r4 + lsls r4 , r3 , #6 // 8 double values + add AO , AO , r4 +#endif + +#if defined(LEFT) + ldr r3 , KK + add r3 , r3 , #8 // number of values in AO + str r3 , KK +#endif + + +_L1_M8_END: + + subs I, I, #1 + bgt _L1_M8_20 + + + + +_L1_M4_BEGIN: + + ldr I, M + tst I, #7 // I = I % 8 + ble _L1_END + + tst I, #4 // I = I % 8 + ble _L1_M2_BEGIN + +_L1_M4_20: + + INIT4x1 + +#if (defined(LEFT) && defined(TRANSA)) || \ + (!defined(LEFT) && !defined(TRANSA)) + + mov BO, BC +#else + mov BO, BC + ldr r3 , KK + lsls r4 , r3 , #3 // 1 double value + add BO , BO , r4 + lsls r4 , r3 , #5 // 4 double values + add AO , AO , r4 + +#endif + +#ifndef TRMMKERNEL + ldr L , K +#elif (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) + ldr L , K + ldr r3, KK + sub L , L, r3 + str L , KKK +#else + ldr L , KK +#ifdef LEFT + add L , L , #4 // number of values in AO +#else + add L , L , #1 // number of values in BO +#endif + str L , KKK +#endif + + asrs K1, L, #2 // L = L / 4 + ble _L1_M4_40 + +_L1_M4_22: + + KERNEL4x1 + KERNEL4x1 + KERNEL4x1 + KERNEL4x1 + + subs K1, K1, #1 + bgt _L1_M4_22 + + +_L1_M4_40: + + ands K1, L, #3 // L = L % 4 + ble _L1_M4_100 + +_L1_M4_42: + + KERNEL4x1 + + subs K1, K1, #1 + bgt _L1_M4_42 + +_L1_M4_100: + + SAVE4x1 + +#if (defined(LEFT) && defined(TRANSA)) || \ + (!defined(LEFT) && !defined(TRANSA)) + ldr r3 , K + ldr r4 , KKK + sub r3 , r3 , r4 + lsls r4 , r3 , #3 // 1 double value + add BO , BO , r4 + lsls r4 , r3 , #5 // 4 double values + add AO , AO , r4 +#endif + +#if defined(LEFT) + ldr r3 , KK + add r3 , r3 , #4 // number of values in AO + str r3 , KK +#endif + + +_L1_M4_END: + + + + +_L1_M2_BEGIN: + + tst I, #2 // I = I % 4 + ble _L1_M1_BEGIN + +_L1_M2_20: + + INIT2x1 + +#if (defined(LEFT) && defined(TRANSA)) || \ + (!defined(LEFT) && !defined(TRANSA)) + + mov BO, BC +#else + mov BO, BC + ldr r3 , KK + lsls r4 , r3 , #3 // 1 double value + add BO , BO , r4 + lsls r4 , r3 , #4 // 2 double values + add AO , AO , r4 + +#endif + +#ifndef TRMMKERNEL + ldr L , K +#elif (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) + ldr L , K + ldr r3, KK + sub L , L, r3 + str L , KKK +#else + ldr L , KK +#ifdef LEFT + add L , L , #2 // number of values in AO +#else + add L , L , #1 // number of values in BO +#endif + str L , KKK +#endif + + asrs K1, L, #2 // L = L / 4 + ble _L1_M2_40 + +_L1_M2_22: + + KERNEL2x1 + KERNEL2x1 + KERNEL2x1 + KERNEL2x1 + + subs K1, K1, #1 + bgt _L1_M2_22 + + +_L1_M2_40: + + ands K1 , L, #3 // L = L % 4 + ble _L1_M2_100 + +_L1_M2_42: + + KERNEL2x1 + + subs K1, K1, #1 + bgt _L1_M2_42 + +_L1_M2_100: + + SAVE2x1 + +#if (defined(LEFT) && defined(TRANSA)) || \ + (!defined(LEFT) && !defined(TRANSA)) + ldr r3 , K + ldr r4 , KKK + sub r3 , r3 , r4 + lsls r4 , r3 , #3 // 1 double value + add BO , BO , r4 + lsls r4 , r3 , #4 // 2 double values + add AO , AO , r4 +#endif + +#if defined(LEFT) + ldr r3 , KK + add r3 , r3 , #2 // number of values in AO + str r3 , KK +#endif + + +_L1_M2_END: + + + +_L1_M1_BEGIN: + + tst I, #1 // I = I % 4 + ble _L1_END + +_L1_M1_20: + + INIT1x1 + +#if (defined(LEFT) && defined(TRANSA)) || \ + (!defined(LEFT) && !defined(TRANSA)) + + mov BO, BC +#else + mov BO, BC + ldr r3 , KK + lsls r4 , r3 , #3 // 1 double value + add BO , BO , r4 + lsls r4 , r3 , #3 // 1 double value + add AO , AO , r4 + +#endif + +#ifndef TRMMKERNEL + ldr L , K +#elif (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) + ldr L , K + ldr r3, KK + sub L , L, r3 + str L , KKK +#else + ldr L , KK +#ifdef LEFT + add L , L , #1 // number of values in AO +#else + add L , L , #1 // number of values in BO +#endif + str L , KKK +#endif + + asrs K1, L, #2 // L = L / 4 + ble _L1_M1_40 + +_L1_M1_22: + + KERNEL1x1 + KERNEL1x1 + KERNEL1x1 + KERNEL1x1 + + subs K1, K1, #1 + bgt _L1_M1_22 + + +_L1_M1_40: + + ands K1 , L, #3 // L = L % 4 + ble _L1_M1_100 + +_L1_M1_42: + + KERNEL1x1 + + subs K1, K1, #1 + bgt _L1_M1_42 + +_L1_M1_100: + + SAVE1x1 + +#if (defined(LEFT) && defined(TRANSA)) || \ + (!defined(LEFT) && !defined(TRANSA)) + ldr r3 , K + ldr r4 , KKK + sub r3 , r3 , r4 + lsls r4 , r3 , #3 // 1 double value + add BO , BO , r4 + lsls r4 , r3 , #3 // 1 double value + add AO , AO , r4 +#endif + +#if defined(LEFT) + ldr r3 , KK + add r3 , r3 , #1 // number of values in AO + str r3 , KK +#endif + + + +_L1_END: + + + +_L999: + + sub r3, fp, #128 + vldm r3, { d8 - d15} // restore floating point registers + + movs r0, #0 // set return value + sub sp, fp, #24 + pop {r4 - r9, fp} + bx lr + + EPILOGUE + diff --git a/kernel/arm/gemv_n.c b/kernel/arm/gemv_n.c new file mode 100644 index 0000000..aedcca9 --- /dev/null +++ b/kernel/arm/gemv_n.c @@ -0,0 +1,67 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +/************************************************************************************** + * * 2013/09/14 Saar + * * BLASTEST float : OK + * * BLASTEST double : OK + * CTEST : OK + * TEST : OK + * * + * **************************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) +{ + BLASLONG i; + BLASLONG ix,iy; + BLASLONG j; + FLOAT *a_ptr; + FLOAT temp; + + ix = 0; + a_ptr = a; + + for (j=0; j + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + BLASLONG max=0; + + if (n < 0 || inc_x < 1 ) return(max); + + maxf=ABS(x[0]); + + while(i < n) + { + if( ABS(x[ix]) > ABS(maxf) ) + { + max = i; + maxf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(max+1); +} + + diff --git a/kernel/arm/iamin.c b/kernel/arm/iamin.c new file mode 100644 index 0000000..fdb5d7a --- /dev/null +++ b/kernel/arm/iamin.c @@ -0,0 +1,75 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + BLASLONG min=0; + + if (n < 0 || inc_x < 1 ) return(min); + + minf=ABS(x[0]); + + while(i < n) + { + if( ABS(x[ix]) < ABS(minf) ) + { + min = i; + minf = ABS(x[ix]); + } + ix += inc_x; + i++; + } + return(min+1); +} + + diff --git a/kernel/arm/imax.c b/kernel/arm/imax.c new file mode 100644 index 0000000..e3e4b9a --- /dev/null +++ b/kernel/arm/imax.c @@ -0,0 +1,67 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + BLASLONG max=0; + + if (n < 0 || inc_x < 1 ) return(max); + + maxf=x[0]; + + while(i < n) + { + if( x[ix] > maxf ) + { + max = i; + maxf = x[ix]; + } + ix += inc_x; + i++; + } + return(max+1); +} + + diff --git a/kernel/arm/imin.c b/kernel/arm/imin.c new file mode 100644 index 0000000..fbcadc2 --- /dev/null +++ b/kernel/arm/imin.c @@ -0,0 +1,65 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +/************************************************************************************** +* 2013/08/19 Saar +* BLASTEST float +* BLASTEST double +* +**************************************************************************************/ + +#include "common.h" +#include + + + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + BLASLONG min=0; + + if (n < 0 || inc_x < 1 ) return(min); + + minf=x[0]; + + while(i < n) + { + if( x[ix] > minf ) + { + min = i; + minf = x[ix]; + } + ix += inc_x; + i++; + } + return(min+1); +} + + diff --git a/kernel/arm/izamax.c b/kernel/arm/izamax.c new file mode 100644 index 0000000..a6ba863 --- /dev/null +++ b/kernel/arm/izamax.c @@ -0,0 +1,81 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf[2]; + BLASLONG max=0; + BLASLONG inc_x2; + + if (n < 0 || inc_x < 1 ) return(max); + + inc_x2 = 2 * inc_x; + + maxf[0] = ABS(x[ix]); + maxf[1] = ABS(x[ix+1]); + + while(i < n) + { + if( CABS1(x,ix) > CABS1(maxf,0) ) + { + max = i; + maxf[0] = ABS(x[ix]); + maxf[1] = ABS(x[ix+1]); + } + ix += inc_x2; + i++; + } + return(max+1); +} + + diff --git a/kernel/arm/izamin.c b/kernel/arm/izamin.c new file mode 100644 index 0000000..45c2a7c --- /dev/null +++ b/kernel/arm/izamin.c @@ -0,0 +1,81 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf[2]; + BLASLONG min=0; + BLASLONG inc_x2; + + if (n < 0 || inc_x < 1 ) return(min); + + inc_x2 = 2 * inc_x; + + minf[0] = ABS(x[ix]); + minf[1] = ABS(x[ix+1]); + + while(i < n) + { + if( CABS1(x,ix) < CABS1(minf,0) ) + { + min = i; + minf[0] = ABS(x[ix]); + minf[1] = ABS(x[ix+1]); + } + ix += inc_x2; + i++; + } + return(min+1); +} + + diff --git a/kernel/arm/max.c b/kernel/arm/max.c new file mode 100644 index 0000000..3239e34 --- /dev/null +++ b/kernel/arm/max.c @@ -0,0 +1,63 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf=0.0; + + if (n < 0 || inc_x < 1 ) return(maxf); + + maxf=x[0]; + + while(i < n) + { + if( x[ix] > maxf ) + { + maxf = x[ix]; + } + ix += inc_x; + i++; + } + return(maxf); +} + + diff --git a/kernel/arm/min.c b/kernel/arm/min.c new file mode 100644 index 0000000..de4c471 --- /dev/null +++ b/kernel/arm/min.c @@ -0,0 +1,63 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : NoTest +* BLASTEST double : NoTest +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf=0.0; + + if (n < 0 || inc_x < 1 ) return(minf); + + minf=x[0]; + + while(i < n) + { + if( x[ix] < minf ) + { + minf = x[ix]; + } + ix += inc_x; + i++; + } + return(minf); +} + + diff --git a/kernel/arm/nrm2.c b/kernel/arm/nrm2.c new file mode 100644 index 0000000..d65c5a4 --- /dev/null +++ b/kernel/arm/nrm2.c @@ -0,0 +1,88 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/13 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT scale = 0.0; + FLOAT ssq = 1.0; + FLOAT absxi = 0.0; + + + if (n < 0 || inc_x < 1 ) return(0.0); + if ( n == 1 ) return( ABS(x[0]) ); + + n *= inc_x; + while(i < n) + { + + if ( x[i] != 0.0 ) + { + absxi = ABS( x[i] ); + if ( scale < absxi ) + { + ssq = 1 + ssq * ( scale / absxi ) * ( scale / absxi ); + scale = absxi ; + } + else + { + ssq += ( absxi/scale ) * ( absxi/scale ); + } + + } + i += inc_x; + } + scale = scale * sqrt( ssq ); + return(scale); + +} + + diff --git a/kernel/arm/rot.c b/kernel/arm/rot.c new file mode 100644 index 0000000..aa60b44 --- /dev/null +++ b/kernel/arm/rot.c @@ -0,0 +1,62 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" + +int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + FLOAT temp; + + if ( n <= 0 ) return(0); + + while(i < n) + { + temp = c*x[ix] + s*y[iy] ; + y[iy] = c*y[iy] - s*x[ix] ; + x[ix] = temp ; + + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/arm/scal.c b/kernel/arm/scal.c new file mode 100644 index 0000000..d385c46 --- /dev/null +++ b/kernel/arm/scal.c @@ -0,0 +1,58 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + + if ( n < 0 || inc_x < 1 ) return(0); + if ( da == 1.0 ) return(0); + + n *= inc_x; + while(i < n) + { + + x[i] = da * x[i] ; + i += inc_x ; + + } + return(0); + +} + + diff --git a/kernel/arm/swap.c b/kernel/arm/swap.c new file mode 100644 index 0000000..1ca9e76 --- /dev/null +++ b/kernel/arm/swap.c @@ -0,0 +1,62 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/08/20 Saar +* BLASTEST float OK +* BLASTEST double OK +* +**************************************************************************************/ + +#include "common.h" +#include + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + FLOAT temp; + + if ( n < 0 ) return(0); + + while(i < n) + { + + temp = x[ix] ; + x[ix] = y[iy] ; + y[iy] = temp ; + + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/arm/zamax.c b/kernel/arm/zamax.c new file mode 100644 index 0000000..8c2a5c3 --- /dev/null +++ b/kernel/arm/zamax.c @@ -0,0 +1,81 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT maxf[2]; + BLASLONG max=0; + BLASLONG inc_x2; + + if (n < 0 || inc_x < 1 ) return(0.0); + + inc_x2 = 2 * inc_x; + + maxf[0] = ABS(x[ix]); + maxf[1] = ABS(x[ix+1]); + + while(i < n) + { + if( CABS1(x,ix) > CABS1(maxf,0) ) + { + max = i; + maxf[0] = ABS(x[ix]); + maxf[1] = ABS(x[ix+1]); + } + ix += inc_x2; + i++; + } + return(CABS1(maxf,0)); +} + + diff --git a/kernel/arm/zamin.c b/kernel/arm/zamin.c new file mode 100644 index 0000000..6956ced --- /dev/null +++ b/kernel/arm/zamin.c @@ -0,0 +1,81 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : NoTest +* TEST : NoTest +* +**************************************************************************************/ + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + BLASLONG ix=0; + FLOAT minf[2]; + BLASLONG min=0; + BLASLONG inc_x2; + + if (n < 0 || inc_x < 1 ) return(0.0); + + inc_x2 = 2 * inc_x; + + minf[0] = ABS(x[ix]); + minf[1] = ABS(x[ix+1]); + + while(i < n) + { + if( CABS1(x,ix) < CABS1(minf,0) ) + { + min = i; + minf[0] = ABS(x[ix]); + minf[1] = ABS(x[ix+1]); + } + ix += inc_x2; + i++; + } + return(CABS1(minf,0)); +} + + diff --git a/kernel/arm/zasum.c b/kernel/arm/zasum.c new file mode 100644 index 0000000..13acfc0 --- /dev/null +++ b/kernel/arm/zasum.c @@ -0,0 +1,71 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + + +#include "common.h" +#include + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + +#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT sumf = 0.0; + BLASLONG inc_x2; + if (n < 0 || inc_x < 1 ) return(sumf); + + inc_x2 = 2 * inc_x; + + n *= inc_x2; + while(i < n) + { + sumf += CABS1(x,i); + i += inc_x2; + } + return(sumf); +} + + diff --git a/kernel/arm/zaxpy.c b/kernel/arm/zaxpy.c new file mode 100644 index 0000000..28a4380 --- /dev/null +++ b/kernel/arm/zaxpy.c @@ -0,0 +1,72 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/15 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG ix,iy; + + if ( n < 0 ) return(0); + if ( da_r == 0.0 && da_i == 0.0 ) return(0); + + ix = 0; + iy = 0; + + BLASLONG inc_x2 = 2 * inc_x; + BLASLONG inc_y2 = 2 * inc_y; + + while(i < n) + { +#if !defined(CONJ) + y[iy] += ( da_r * x[ix] - da_i * x[ix+1] ) ; + y[iy+1] += ( da_r * x[ix+1] + da_i * x[ix] ) ; +#else + y[iy] += ( da_r * x[ix] + da_i * x[ix+1] ) ; + y[iy+1] -= ( da_r * x[ix+1] - da_i * x[ix] ) ; +#endif + ix += inc_x2 ; + iy += inc_y2 ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/arm/zcopy.c b/kernel/arm/zcopy.c new file mode 100644 index 0000000..6547112 --- /dev/null +++ b/kernel/arm/zcopy.c @@ -0,0 +1,63 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" + +int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + + if ( n < 0 ) return(0); + + BLASLONG inc_x2 = 2 * inc_x; + BLASLONG inc_y2 = 2 * inc_y; + + while(i < n) + { + + y[iy] = x[ix] ; + y[iy+1] = x[ix+1] ; + ix += inc_x2; + iy += inc_y2; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/arm/zdot.c b/kernel/arm/zdot.c new file mode 100644 index 0000000..096ced9 --- /dev/null +++ b/kernel/arm/zdot.c @@ -0,0 +1,78 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : FAIL +* BLASTEST double : FAIL +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" +#include + +FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + FLOAT dot[2]; + FLOAT _Complex result; + + dot[0]=0.0; + dot[1]=0.0; + + __real__ result = 0.0 ; + __imag__ result = 0.0 ; + + if ( n < 1 ) return(result); + + BLASLONG inc_x2 = 2 * inc_x ; + BLASLONG inc_y2 = 2 * inc_y ; + + while(i < n) + { +#if !defined(CONJ) + dot[0] += ( x[ix] * y[iy] - x[ix+1] * y[iy+1] ) ; + dot[1] += ( x[ix+1] * y[iy] + x[ix] * y[iy+1] ) ; +#else + dot[0] += ( x[ix] * y[iy] + x[ix+1] * y[iy+1] ) ; + dot[1] -= ( x[ix+1] * y[iy] - x[ix] * y[iy+1] ) ; +#endif + ix += inc_x2 ; + iy += inc_y2 ; + i++ ; + + } + __real__ result = dot[0]; + __imag__ result = dot[1]; + return(result); + +} + + diff --git a/kernel/arm/zgemv_n.c b/kernel/arm/zgemv_n.c new file mode 100644 index 0000000..5f00c34 --- /dev/null +++ b/kernel/arm/zgemv_n.c @@ -0,0 +1,125 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** + * * 2013/09/15 Saar + * * BLASTEST float : OK + * * BLASTEST double : OK + * CTEST : OK + * TEST : OK + * * + * **************************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) +{ + BLASLONG i; + BLASLONG ix,iy; + BLASLONG j; + FLOAT *a_ptr; + FLOAT temp_r,temp_i; + BLASLONG inc_x2,inc_y2; + BLASLONG lda2; + BLASLONG i2; + + if( alpha_r == 0.0 && alpha_i == 0.0 ) return(0); + + lda2 = 2*lda; + + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; + + ix = 0; + a_ptr = a; + +#if !defined(CONJ) + for (j=0; j + +#if defined(DOUBLE) + +#define ABS fabs + +#else + +#define ABS fabsf + +#endif + + + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ + BLASLONG i=0; + FLOAT scale = 0.0; + FLOAT ssq = 1.0; + BLASLONG inc_x2; + FLOAT temp; + + if (n < 0 || inc_x < 1 ) return(0.0); + + inc_x2 = 2 * inc_x; + + n *= inc_x2; + while(i < n) + { + + if ( x[i] != 0.0 ) + { + temp = ABS( x[i] ); + if ( scale < temp ) + { + ssq = 1 + ssq * ( scale / temp ) * ( scale / temp ); + scale = temp ; + } + else + { + ssq += ( temp / scale ) * ( temp / scale ); + } + + } + + if ( x[i+1] != 0.0 ) + { + temp = ABS( x[i+1] ); + if ( scale < temp ) + { + ssq = 1 + ssq * ( scale / temp ) * ( scale / temp ); + scale = temp ; + } + else + { + ssq += ( temp / scale ) * ( temp / scale ); + } + + } + + + i += inc_x2; + } + scale = scale * sqrt( ssq ); + return(scale); + +} + + diff --git a/kernel/arm/zrot.c b/kernel/arm/zrot.c new file mode 100644 index 0000000..4a2f37f --- /dev/null +++ b/kernel/arm/zrot.c @@ -0,0 +1,68 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" + +int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + FLOAT temp[2]; + + if ( n <= 0 ) return(0); + + BLASLONG inc_x2 = 2 * inc_x ; + BLASLONG inc_y2 = 2 * inc_y ; + + while(i < n) + { + temp[0] = c*x[ix] + s*y[iy] ; + temp[1] = c*x[ix+1] + s*y[iy+1] ; + y[iy] = c*y[iy] - s*x[ix] ; + y[iy+1] = c*y[iy+1] - s*x[ix+1] ; + x[ix] = temp[0] ; + x[ix+1] = temp[1] ; + + ix += inc_x2 ; + iy += inc_y2 ; + i++ ; + + } + return(0); + +} + + diff --git a/kernel/arm/zscal.c b/kernel/arm/zscal.c new file mode 100644 index 0000000..833dc8c --- /dev/null +++ b/kernel/arm/zscal.c @@ -0,0 +1,64 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/************************************************************************************** +* 2013/09/14 Saar +* BLASTEST float : OK +* BLASTEST double : OK +* CTEST : OK +* TEST : OK +* +**************************************************************************************/ + +#include "common.h" + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG inc_x2; + BLASLONG ip = 0; + FLOAT temp; + + if ( n < 0 || inc_x < 1 ) return(0); + + inc_x2 = 2 * inc_x; + for ( i=0; i + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dummy4, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + FLOAT temp[2]; + + if ( n < 0 ) return(0); + + BLASLONG inc_x2 = 2 * inc_x; + BLASLONG inc_y2 = 2 * inc_y; + + while(i < n) + { + + temp[0] = x[ix] ; + temp[1] = x[ix+1] ; + x[ix] = y[iy] ; + x[ix+1] = y[iy+1] ; + y[iy] = temp[0] ; + y[iy+1] = temp[1] ; + + ix += inc_x2 ; + iy += inc_y2 ; + i++ ; + + } + return(0); + +} + +