arm: add softfp support in zgemm/ztrmm vfp kernels
authorAshwin Sekhar T K <ashwin.sekhar@cavium.com>
Sat, 1 Jul 2017 21:24:32 +0000 (02:54 +0530)
committerAshwin Sekhar T K <ashwin.sekhar@cavium.com>
Sat, 1 Jul 2017 21:24:32 +0000 (02:54 +0530)
kernel/arm/KERNEL.ARMV6
kernel/arm/KERNEL.ARMV7
kernel/arm/zgemm_kernel_2x2_vfp.S
kernel/arm/zgemm_kernel_2x2_vfpv3.S
kernel/arm/ztrmm_kernel_2x2_vfp.S
kernel/arm/ztrmm_kernel_2x2_vfpv3.S

index e8fc3df..960dae6 100644 (file)
@@ -96,25 +96,20 @@ DGEMMOTCOPY    = ../generic/gemm_tcopy_2.c
 DGEMMONCOPYOBJ = dgemm_oncopy.o
 DGEMMOTCOPYOBJ = dgemm_otcopy.o
 
-STRMMKERNEL    = strmm_kernel_4x2_vfp.S
-DTRMMKERNEL    = dtrmm_kernel_4x2_vfp.S
-CTRMMKERNEL    = ctrmm_kernel_2x2_vfp.S
-
 CGEMMKERNEL    = cgemm_kernel_2x2_vfp.S
 CGEMMONCOPY    = cgemm_ncopy_2_vfp.S
 CGEMMOTCOPY    = cgemm_tcopy_2_vfp.S
 CGEMMONCOPYOBJ = cgemm_oncopy.o
 CGEMMOTCOPYOBJ = cgemm_otcopy.o
 
+ZGEMMKERNEL    = zgemm_kernel_2x2_vfp.S
 ZGEMMONCOPY    = zgemm_ncopy_2_vfp.S
 ZGEMMOTCOPY    = zgemm_tcopy_2_vfp.S
 ZGEMMONCOPYOBJ = zgemm_oncopy.o
 ZGEMMOTCOPYOBJ = zgemm_otcopy.o
 
-ifeq ($(ARM_ABI),hard)
-
+STRMMKERNEL    = strmm_kernel_4x2_vfp.S
+DTRMMKERNEL    = dtrmm_kernel_4x2_vfp.S
+CTRMMKERNEL    = ctrmm_kernel_2x2_vfp.S
 ZTRMMKERNEL    = ztrmm_kernel_2x2_vfp.S
 
-ZGEMMKERNEL    = zgemm_kernel_2x2_vfp.S
-
-endif
index 4bfe18d..5e0b4cf 100644 (file)
@@ -8,10 +8,6 @@ ZNRM2KERNEL  = nrm2_vfpv3.S
 SGEMVNKERNEL = gemv_n_vfpv3.S
 DGEMVNKERNEL = gemv_n_vfpv3.S
 
-STRMMKERNEL  =  strmm_kernel_4x4_vfpv3.S
-DTRMMKERNEL  =  dtrmm_kernel_4x4_vfpv3.S
-CTRMMKERNEL  =  ctrmm_kernel_2x2_vfpv3.S
-
 SGEMMKERNEL    =  sgemm_kernel_4x4_vfpv3.S
 SGEMMONCOPY    =  sgemm_ncopy_4_vfp.S
 SGEMMOTCOPY    =  sgemm_tcopy_4_vfp.S
@@ -25,11 +21,10 @@ DGEMMONCOPYOBJ = dgemm_oncopy.o
 DGEMMOTCOPYOBJ = dgemm_otcopy.o
 
 CGEMMKERNEL    = cgemm_kernel_2x2_vfpv3.S
+ZGEMMKERNEL    = zgemm_kernel_2x2_vfpv3.S
 
-ifeq ($(ARM_ABI),hard)
-
+STRMMKERNEL  =  strmm_kernel_4x4_vfpv3.S
+DTRMMKERNEL  =  dtrmm_kernel_4x4_vfpv3.S
+CTRMMKERNEL  =  ctrmm_kernel_2x2_vfpv3.S
 ZTRMMKERNEL  =  ztrmm_kernel_2x2_vfpv3.S
 
-ZGEMMKERNEL    = zgemm_kernel_2x2_vfpv3.S
-
-endif
index 46507c4..618f097 100644 (file)
@@ -64,9 +64,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define ALPHA_I        [fp, #-272]
 #define ALPHA_R        [fp, #-280]
 
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR_SOFTFP      [fp, #4]
+#define OLD_ALPHAI_SOFTFP      [fp, #12]
+#define OLD_A_SOFTFP   [fp, #20 ]
+#define B      [fp, #24 ]
+#define C      [fp, #28 ]
+#define OLD_LDC        [fp, #32 ]
+#else
 #define B      [fp, #4 ]
 #define C      [fp, #8 ]
 #define OLD_LDC        [fp, #12 ]
+#endif
 
 #define I      r0
 #define J      r1
@@ -863,6 +872,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        add     fp, sp, #24
        sub     sp, sp, #STACKSIZE                              // reserve stack
 
+#if !defined(__ARM_PCS_VFP)
+       vldr    OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
+       vldr    OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
+       ldr     OLD_A, OLD_A_SOFTFP
+#endif
        str     OLD_M, M
        str     OLD_N, N
        str     OLD_K, K
index 5a99f79..0fe0c19 100644 (file)
@@ -80,9 +80,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define ALPHA_I        [fp, #-272]
 #define ALPHA_R        [fp, #-280]
 
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR_SOFTFP      [fp, #4]
+#define OLD_ALPHAI_SOFTFP      [fp, #12]
+#define OLD_A_SOFTFP   [fp, #20 ]
+#define B      [fp, #24 ]
+#define C      [fp, #28 ]
+#define OLD_LDC        [fp, #32 ]
+#else
 #define B      [fp, #4 ]
 #define C      [fp, #8 ]
 #define OLD_LDC        [fp, #12 ]
+#endif
 
 #define I      r0
 #define J      r1
@@ -909,6 +918,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        add     fp, sp, #24
        sub     sp, sp, #STACKSIZE                              // reserve stack
 
+#if !defined(__ARM_PCS_VFP)
+       vldr    OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
+       vldr    OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
+       ldr     OLD_A, OLD_A_SOFTFP
+#endif
        str     OLD_M, M
        str     OLD_N, N
        str     OLD_K, K
index dc80b17..78d09a9 100644 (file)
@@ -66,10 +66,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define ALPHA_I        [fp, #-272]
 #define ALPHA_R        [fp, #-280]
 
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR_SOFTFP      [fp, #4]
+#define OLD_ALPHAI_SOFTFP      [fp, #12]
+#define OLD_A_SOFTFP   [fp, #20 ]
+#define B      [fp, #24 ]
+#define C      [fp, #28 ]
+#define OLD_LDC        [fp, #32 ]
+#define OFFSET  [fp, #36 ]
+#else
 #define B      [fp, #4 ]
 #define C      [fp, #8 ]
 #define OLD_LDC        [fp, #12 ]
 #define OFFSET  [fp, #16 ]
+#endif
 
 #define I      r0
 #define J      r1
@@ -882,6 +892,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        add     fp, sp, #24
        sub     sp, sp, #STACKSIZE                              // reserve stack
 
+#if !defined(__ARM_PCS_VFP)
+       vldr    OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
+       vldr    OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
+       ldr     OLD_A, OLD_A_SOFTFP
+#endif
        str     OLD_M, M
        str     OLD_N, N
        str     OLD_K, K
index 5a808cc..bf72ce6 100644 (file)
@@ -66,10 +66,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define ALPHA_I        [fp, #-272]
 #define ALPHA_R        [fp, #-280]
 
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR_SOFTFP      [fp, #4]
+#define OLD_ALPHAI_SOFTFP      [fp, #12]
+#define OLD_A_SOFTFP   [fp, #20 ]
+#define B      [fp, #24 ]
+#define C      [fp, #28 ]
+#define OLD_LDC        [fp, #32 ]
+#define OFFSET  [fp, #36 ]
+#else
 #define B      [fp, #4 ]
 #define C      [fp, #8 ]
 #define OLD_LDC        [fp, #12 ]
 #define OFFSET  [fp, #16 ]
+#endif
 
 #define I      r0
 #define J      r1
@@ -883,6 +893,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        add     fp, sp, #24
        sub     sp, sp, #STACKSIZE                              // reserve stack
 
+#if !defined(__ARM_PCS_VFP)
+       vldr    OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
+       vldr    OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
+       ldr     OLD_A, OLD_A_SOFTFP
+#endif
        str     OLD_M, M
        str     OLD_N, N
        str     OLD_K, K