arm: add softfp support in vfp gemv kernels
authorAshwin Sekhar T K <ashwin.sekhar@cavium.com>
Sat, 1 Jul 2017 19:08:44 +0000 (00:38 +0530)
committerAshwin Sekhar T K <ashwin.sekhar@cavium.com>
Sat, 1 Jul 2017 19:33:31 +0000 (01:03 +0530)
kernel/arm/KERNEL.ARMV6
kernel/arm/KERNEL.ARMV7
kernel/arm/cgemv_n_vfp.S
kernel/arm/cgemv_t_vfp.S
kernel/arm/gemv_n_vfp.S
kernel/arm/gemv_n_vfpv3.S
kernel/arm/gemv_t_vfp.S
kernel/arm/gemv_t_vfpv3.S
kernel/arm/zgemv_n_vfp.S
kernel/arm/zgemv_t_vfp.S

index 86d3dab..022a931 100644 (file)
@@ -62,6 +62,16 @@ DSWAPKERNEL  = swap_vfp.S
 CSWAPKERNEL  = swap_vfp.S
 ZSWAPKERNEL  = swap_vfp.S
 
+SGEMVNKERNEL = gemv_n_vfp.S
+DGEMVNKERNEL = gemv_n_vfp.S
+CGEMVNKERNEL = cgemv_n_vfp.S
+ZGEMVNKERNEL = zgemv_n_vfp.S
+
+SGEMVTKERNEL = gemv_t_vfp.S
+DGEMVTKERNEL = gemv_t_vfp.S
+CGEMVTKERNEL = cgemv_t_vfp.S
+ZGEMVTKERNEL = zgemv_t_vfp.S
+
 SGEMMKERNEL    = ../generic/gemmkernel_4x2.c
 ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
 SGEMMINCOPY    = sgemm_ncopy_4_vfp.S
@@ -101,16 +111,6 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy.o
 
 ifeq ($(ARM_ABI),hard)
 
-SGEMVNKERNEL = gemv_n_vfp.S
-DGEMVNKERNEL = gemv_n_vfp.S
-CGEMVNKERNEL = cgemv_n_vfp.S
-ZGEMVNKERNEL = zgemv_n_vfp.S
-
-SGEMVTKERNEL = gemv_t_vfp.S
-DGEMVTKERNEL = gemv_t_vfp.S
-CGEMVTKERNEL = cgemv_t_vfp.S
-ZGEMVTKERNEL = zgemv_t_vfp.S
-
 STRMMKERNEL    = strmm_kernel_4x2_vfp.S
 DTRMMKERNEL    = dtrmm_kernel_4x2_vfp.S
 CTRMMKERNEL    = ctrmm_kernel_2x2_vfp.S
index f4823b7..0872cb8 100644 (file)
@@ -5,6 +5,9 @@ DNRM2KERNEL  = nrm2_vfpv3.S
 CNRM2KERNEL  = nrm2_vfpv3.S
 ZNRM2KERNEL  = nrm2_vfpv3.S
 
+SGEMVNKERNEL = gemv_n_vfpv3.S
+DGEMVNKERNEL = gemv_n_vfpv3.S
+
 STRMMKERNEL    = ../generic/trmmkernel_4x4.c
 DTRMMKERNEL    = ../generic/trmmkernel_4x4.c
 
@@ -22,9 +25,6 @@ DGEMMOTCOPYOBJ = dgemm_otcopy.o
 
 ifeq ($(ARM_ABI),hard)
 
-SGEMVNKERNEL = gemv_n_vfpv3.S
-DGEMVNKERNEL = gemv_n_vfpv3.S
-
 STRMMKERNEL  =  strmm_kernel_4x4_vfpv3.S
 DTRMMKERNEL  =  dtrmm_kernel_4x4_vfpv3.S
 CTRMMKERNEL  =  ctrmm_kernel_2x2_vfpv3.S
index 5d27486..4a1cd2d 100644 (file)
@@ -38,11 +38,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #define STACKSIZE 256
 
-#define        OLD_LDA         [fp, #0 ]
-#define        X               [fp, #4 ]
-#define        OLD_INC_X       [fp, #8 ]
-#define        Y               [fp, #12 ]
-#define        OLD_INC_Y       [fp, #16 ]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR     r3
+#define OLD_ALPHAI     [fp, #0 ]
+#define OLD_A_SOFTFP   [fp, #4 ]
+#define OLD_LDA                [fp, #8 ]
+#define X              [fp, #12 ]
+#define OLD_INC_X      [fp, #16 ]
+#define Y              [fp, #20 ]
+#define OLD_INC_Y      [fp, #24 ]
+#else
+#define OLD_LDA                [fp, #0 ]
+#define X              [fp, #4 ]
+#define OLD_INC_X      [fp, #8 ]
+#define Y              [fp, #12 ]
+#define OLD_INC_Y      [fp, #16 ]
+#endif
+
 #define OLD_A          r3
 #define        OLD_M           r0
 
@@ -462,6 +474,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        cmp     N, #0
        ble     cgemvn_kernel_L999
 
+#if !defined(__ARM_PCS_VFP)
+       vmov    s0, OLD_ALPHAR
+       vldr    s1, OLD_ALPHAI
+       ldr     OLD_A, OLD_A_SOFTFP
+#endif
+
        str     OLD_A, A
        str     OLD_M, M
        vstr    s0 , ALPHA_R
index 76c8a8f..e1c750c 100644 (file)
@@ -38,11 +38,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #define STACKSIZE 256
 
-#define        OLD_LDA         [fp, #0 ]
-#define        X               [fp, #4 ]
-#define        OLD_INC_X       [fp, #8 ]
-#define        Y               [fp, #12 ]
-#define        OLD_INC_Y       [fp, #16 ]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR     r3
+#define OLD_ALPHAI     [fp, #0 ]
+#define OLD_A_SOFTFP   [fp, #4 ]
+#define OLD_LDA                [fp, #8 ]
+#define X              [fp, #12 ]
+#define OLD_INC_X      [fp, #16 ]
+#define Y              [fp, #20 ]
+#define OLD_INC_Y      [fp, #24 ]
+#else
+#define OLD_LDA                [fp, #0 ]
+#define X              [fp, #4 ]
+#define OLD_INC_X      [fp, #8 ]
+#define Y              [fp, #12 ]
+#define OLD_INC_Y      [fp, #16 ]
+#endif
+
 #define OLD_A          r3
 #define        OLD_N           r1
 
@@ -359,6 +371,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        cmp     OLD_N, #0
        ble     cgemvt_kernel_L999
 
+#if !defined(__ARM_PCS_VFP)
+       vmov    s0, OLD_ALPHAR
+       vldr    s1, OLD_ALPHAI
+       ldr     OLD_A, OLD_A_SOFTFP
+#endif
+
        str     OLD_A, A
        str     OLD_N, N
 
index 385370b..7c154d7 100644 (file)
@@ -38,11 +38,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #define STACKSIZE 256
 
-#define        OLD_LDA         [fp, #0 ]
-#define        X               [fp, #4 ]
-#define        OLD_INC_X       [fp, #8 ]
-#define        Y               [fp, #12 ]
-#define        OLD_INC_Y       [fp, #16 ]
+#if !defined(__ARM_PCS_VFP)
+
+#if !defined(DOUBLE)
+#define OLD_ALPHA      r3
+#define OLD_A_SOFTFP   [fp, #0 ]
+#define OLD_LDA                [fp, #4 ]
+#define X              [fp, #8 ]
+#define OLD_INC_X      [fp, #12 ]
+#define Y              [fp, #16 ]
+#define OLD_INC_Y      [fp, #20 ]
+#else
+#define OLD_ALPHA      [fp, #0 ]
+#define OLD_A_SOFTFP   [fp, #8 ]
+#define OLD_LDA                [fp, #12]
+#define X              [fp, #16]
+#define OLD_INC_X      [fp, #20]
+#define Y              [fp, #24]
+#define OLD_INC_Y      [fp, #28]
+#endif
+
+#else
+
+#define OLD_LDA                [fp, #0 ]
+#define X              [fp, #4 ]
+#define OLD_INC_X      [fp, #8 ]
+#define Y              [fp, #12 ]
+#define OLD_INC_Y      [fp, #16 ]
+
+#endif
+
 #define OLD_A          r3
 #define        OLD_M           r0
 
@@ -508,6 +533,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        cmp     N, #0
        ble     gemvn_kernel_L999
 
+#if !defined(__ARM_PCS_VFP)
+#if !defined(DOUBLE)
+       vmov    s0, OLD_ALPHA
+#else
+       vldr    d0, OLD_ALPHA
+#endif
+       ldr     OLD_A, OLD_A_SOFTFP
+#endif
+
        str     OLD_A, A
        str     OLD_M, M
 
index 93bf23e..54f958b 100644 (file)
@@ -38,25 +38,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #define STACKSIZE 256
 
-#ifndef ARM_SOFTFP_ABI
-//hard
-#define        OLD_LDA         [fp, #0 ]
-#define        X               [fp, #4 ]
-#define        OLD_INC_X       [fp, #8 ]
-#define        Y               [fp, #12 ]
-#define        OLD_INC_Y       [fp, #16 ]
-#define OLD_A          r3
-#else
-#define OLD_A_SOFTFP   [fp, #0 ]
-#define        OLD_LDA         [fp, #4 ]
-#define        X               [fp, #8 ]
-#define        OLD_INC_X       [fp, #12 ]
-#define        Y               [fp, #16 ]
-#define        OLD_INC_Y       [fp, #20 ]
+#if !defined(__ARM_PCS_VFP)
+
+#if !defined(DOUBLE)
 #define OLD_ALPHA      r3
-#define OLD_A          r3
+#define OLD_A_SOFTFP   [fp, #0 ]
+#define OLD_LDA                [fp, #4 ]
+#define X              [fp, #8 ]
+#define OLD_INC_X      [fp, #12 ]
+#define Y              [fp, #16 ]
+#define OLD_INC_Y      [fp, #20 ]
+#else
+#define OLD_ALPHA      [fp, #0 ]
+#define OLD_A_SOFTFP   [fp, #8 ]
+#define OLD_LDA                [fp, #12]
+#define X              [fp, #16]
+#define OLD_INC_X      [fp, #20]
+#define Y              [fp, #24]
+#define OLD_INC_Y      [fp, #28]
+#endif
+
+#else
+
+#define OLD_LDA                [fp, #0 ]
+#define X              [fp, #4 ]
+#define OLD_INC_X      [fp, #8 ]
+#define Y              [fp, #12 ]
+#define OLD_INC_Y      [fp, #16 ]
+
 #endif
 
+#define OLD_A          r3
 #define        OLD_M           r0
 
 #define AO1    r0
@@ -565,18 +577,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        cmp     N, #0
        ble     gemvn_kernel_L999
 
-#ifndef DOUBLE
-#ifdef ARM_SOFTFP_ABI
-
-       vmov    s0,     OLD_ALPHA
-       ldr     OLD_A,  OLD_A_SOFTFP
+#if !defined(__ARM_PCS_VFP)
+#if !defined(DOUBLE)
+       vmov    s0, OLD_ALPHA
+#else
+       vldr    d0, OLD_ALPHA
 #endif
+       ldr     OLD_A, OLD_A_SOFTFP
 #endif
 
        str     OLD_A, A
        str     OLD_M, M
-       
-       
+
        ldr    INC_X , OLD_INC_X
        ldr    INC_Y , OLD_INC_Y
 
index 816be54..9559d18 100644 (file)
@@ -38,25 +38,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #define STACKSIZE 256
 
-#ifndef ARM_SOFTFP_ABI
-//hard abi     
-#define        OLD_LDA         [fp, #0 ]
-#define        X               [fp, #4 ]
-#define        OLD_INC_X       [fp, #8 ]
-#define        Y               [fp, #12 ]
-#define        OLD_INC_Y       [fp, #16 ]
-#define OLD_A          r3
-#else
-#define OLD_A_SOFTFP   [fp, #0 ]
-#define        OLD_LDA         [fp, #4 ]
-#define        X               [fp, #8 ]
-#define        OLD_INC_X       [fp, #12 ]
-#define        Y               [fp, #16 ]
-#define        OLD_INC_Y       [fp, #20 ]
+#if !defined(__ARM_PCS_VFP)
+
+#if !defined(DOUBLE)
 #define OLD_ALPHA      r3
-#define OLD_A          r3
+#define OLD_A_SOFTFP   [fp, #0 ]
+#define OLD_LDA                [fp, #4 ]
+#define X              [fp, #8 ]
+#define OLD_INC_X      [fp, #12 ]
+#define Y              [fp, #16 ]
+#define OLD_INC_Y      [fp, #20 ]
+#else
+#define OLD_ALPHA      [fp, #0 ]
+#define OLD_A_SOFTFP   [fp, #8 ]
+#define OLD_LDA                [fp, #12]
+#define X              [fp, #16]
+#define OLD_INC_X      [fp, #20]
+#define Y              [fp, #24]
+#define OLD_INC_Y      [fp, #28]
 #endif
 
+#else
+
+#define OLD_LDA                [fp, #0 ]
+#define X              [fp, #4 ]
+#define OLD_INC_X      [fp, #8 ]
+#define Y              [fp, #12 ]
+#define OLD_INC_Y      [fp, #16 ]
+
+#endif
+
+#define OLD_A          r3
 #define        OLD_N           r1
 
 #define M      r0
@@ -518,11 +530,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        cmp     OLD_N, #0
        ble     gemvt_kernel_L999
 
-#ifndef DOUBLE
-#ifdef ARM_SOFTFP_ABI
-       vmov    s0,     OLD_ALPHA
-       ldr     OLD_A,  OLD_A_SOFTFP
+#if !defined(__ARM_PCS_VFP)
+#if !defined(DOUBLE)
+       vmov    s0, OLD_ALPHA
+#else
+       vldr    d0, OLD_ALPHA
 #endif
+       ldr     OLD_A, OLD_A_SOFTFP
 #endif
 
        str     OLD_A, A
index 7ae5799..b1d3dad 100644 (file)
@@ -38,11 +38,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #define STACKSIZE 256
 
-#define        OLD_LDA         [fp, #0 ]
-#define        X               [fp, #4 ]
-#define        OLD_INC_X       [fp, #8 ]
-#define        Y               [fp, #12 ]
-#define        OLD_INC_Y       [fp, #16 ]
+#if !defined(__ARM_PCS_VFP)
+
+#if !defined(DOUBLE)
+#define OLD_ALPHA      r3
+#define OLD_A_SOFTFP   [fp, #0 ]
+#define OLD_LDA                [fp, #4 ]
+#define X              [fp, #8 ]
+#define OLD_INC_X      [fp, #12 ]
+#define Y              [fp, #16 ]
+#define OLD_INC_Y      [fp, #20 ]
+#else
+#define OLD_ALPHA      [fp, #0 ]
+#define OLD_A_SOFTFP   [fp, #8 ]
+#define OLD_LDA                [fp, #12]
+#define X              [fp, #16]
+#define OLD_INC_X      [fp, #20]
+#define Y              [fp, #24]
+#define OLD_INC_Y      [fp, #28]
+#endif
+
+#else
+
+#define OLD_LDA                [fp, #0 ]
+#define X              [fp, #4 ]
+#define OLD_INC_X      [fp, #8 ]
+#define Y              [fp, #12 ]
+#define OLD_INC_Y      [fp, #16 ]
+
+#endif
+
 #define OLD_A          r3
 #define        OLD_N           r1
 
@@ -476,6 +501,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        cmp     OLD_N, #0
        ble     gemvt_kernel_L999
 
+#if !defined(__ARM_PCS_VFP)
+#if !defined(DOUBLE)
+       vmov    s0, OLD_ALPHA
+#else
+       vldr    d0, OLD_ALPHA
+#endif
+       ldr     OLD_A, OLD_A_SOFTFP
+#endif
+
        str     OLD_A, A
        str     OLD_N, N
 
index da9a910..7d55678 100644 (file)
@@ -38,11 +38,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #define STACKSIZE 256
 
-#define        OLD_LDA         [fp, #0 ]
-#define        X               [fp, #4 ]
-#define        OLD_INC_X       [fp, #8 ]
-#define        Y               [fp, #12 ]
-#define        OLD_INC_Y       [fp, #16 ]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR     [fp, #0 ]
+#define OLD_ALPHAI     [fp, #8 ]
+#define OLD_A_SOFTFP   [fp, #16]
+#define OLD_LDA                [fp, #20]
+#define X              [fp, #24]
+#define OLD_INC_X      [fp, #28]
+#define Y              [fp, #32]
+#define OLD_INC_Y      [fp, #36]
+#else
+#define OLD_LDA                [fp, #0 ]
+#define X              [fp, #4 ]
+#define OLD_INC_X      [fp, #8 ]
+#define Y              [fp, #12 ]
+#define OLD_INC_Y      [fp, #16 ]
+#endif
+
 #define OLD_A          r3
 #define        OLD_M           r0
 
@@ -465,6 +477,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        cmp     N, #0
        ble     zgemvn_kernel_L999
 
+#if !defined(__ARM_PCS_VFP)
+       vldr    d0, OLD_ALPHAR
+       vldr    d1, OLD_ALPHAI
+       ldr     OLD_A, OLD_A_SOFTFP
+#endif
+
        str     OLD_A, A
        str     OLD_M, M
        vstr    d0 , ALPHA_R
index 211fa07..4070261 100644 (file)
@@ -38,11 +38,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #define STACKSIZE 256
 
-#define        OLD_LDA         [fp, #0 ]
-#define        X               [fp, #4 ]
-#define        OLD_INC_X       [fp, #8 ]
-#define        Y               [fp, #12 ]
-#define        OLD_INC_Y       [fp, #16 ]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR     [fp, #0 ]
+#define OLD_ALPHAI     [fp, #8 ]
+#define OLD_A_SOFTFP   [fp, #16]
+#define OLD_LDA                [fp, #20]
+#define X              [fp, #24]
+#define OLD_INC_X      [fp, #28]
+#define Y              [fp, #32]
+#define OLD_INC_Y      [fp, #36]
+#else
+#define OLD_LDA                [fp, #0 ]
+#define X              [fp, #4 ]
+#define OLD_INC_X      [fp, #8 ]
+#define Y              [fp, #12 ]
+#define OLD_INC_Y      [fp, #16 ]
+#endif
+
 #define OLD_A          r3
 #define        OLD_N           r1
 
@@ -360,6 +372,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        cmp     OLD_N, #0
        ble     zgemvt_kernel_L999
 
+#if !defined(__ARM_PCS_VFP)
+       vldr    d0, OLD_ALPHAR
+       vldr    d1, OLD_ALPHAI
+       ldr     OLD_A, OLD_A_SOFTFP
+#endif
+
        str     OLD_A, A
        str     OLD_N, N