CSWAPKERNEL = swap_vfp.S
ZSWAPKERNEL = swap_vfp.S
+SGEMVNKERNEL = gemv_n_vfp.S
+DGEMVNKERNEL = gemv_n_vfp.S
+CGEMVNKERNEL = cgemv_n_vfp.S
+ZGEMVNKERNEL = zgemv_n_vfp.S
+
+SGEMVTKERNEL = gemv_t_vfp.S
+DGEMVTKERNEL = gemv_t_vfp.S
+CGEMVTKERNEL = cgemv_t_vfp.S
+ZGEMVTKERNEL = zgemv_t_vfp.S
+
SGEMMKERNEL = ../generic/gemmkernel_4x2.c
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
SGEMMINCOPY = sgemm_ncopy_4_vfp.S
ifeq ($(ARM_ABI),hard)
-SGEMVNKERNEL = gemv_n_vfp.S
-DGEMVNKERNEL = gemv_n_vfp.S
-CGEMVNKERNEL = cgemv_n_vfp.S
-ZGEMVNKERNEL = zgemv_n_vfp.S
-
-SGEMVTKERNEL = gemv_t_vfp.S
-DGEMVTKERNEL = gemv_t_vfp.S
-CGEMVTKERNEL = cgemv_t_vfp.S
-ZGEMVTKERNEL = zgemv_t_vfp.S
-
STRMMKERNEL = strmm_kernel_4x2_vfp.S
DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
CNRM2KERNEL = nrm2_vfpv3.S
ZNRM2KERNEL = nrm2_vfpv3.S
+SGEMVNKERNEL = gemv_n_vfpv3.S
+DGEMVNKERNEL = gemv_n_vfpv3.S
+
STRMMKERNEL = ../generic/trmmkernel_4x4.c
DTRMMKERNEL = ../generic/trmmkernel_4x4.c
ifeq ($(ARM_ABI),hard)
-SGEMVNKERNEL = gemv_n_vfpv3.S
-DGEMVNKERNEL = gemv_n_vfpv3.S
-
STRMMKERNEL = strmm_kernel_4x4_vfpv3.S
DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S
CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S
#define STACKSIZE 256
-#define OLD_LDA [fp, #0 ]
-#define X [fp, #4 ]
-#define OLD_INC_X [fp, #8 ]
-#define Y [fp, #12 ]
-#define OLD_INC_Y [fp, #16 ]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR r3
+#define OLD_ALPHAI [fp, #0 ]
+#define OLD_A_SOFTFP [fp, #4 ]
+#define OLD_LDA [fp, #8 ]
+#define X [fp, #12 ]
+#define OLD_INC_X [fp, #16 ]
+#define Y [fp, #20 ]
+#define OLD_INC_Y [fp, #24 ]
+#else
+#define OLD_LDA [fp, #0 ]
+#define X [fp, #4 ]
+#define OLD_INC_X [fp, #8 ]
+#define Y [fp, #12 ]
+#define OLD_INC_Y [fp, #16 ]
+#endif
+
#define OLD_A r3
#define OLD_M r0
cmp N, #0
ble cgemvn_kernel_L999
+#if !defined(__ARM_PCS_VFP)
+ vmov s0, OLD_ALPHAR
+ vldr s1, OLD_ALPHAI
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
+
str OLD_A, A
str OLD_M, M
vstr s0 , ALPHA_R
#define STACKSIZE 256
-#define OLD_LDA [fp, #0 ]
-#define X [fp, #4 ]
-#define OLD_INC_X [fp, #8 ]
-#define Y [fp, #12 ]
-#define OLD_INC_Y [fp, #16 ]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR r3
+#define OLD_ALPHAI [fp, #0 ]
+#define OLD_A_SOFTFP [fp, #4 ]
+#define OLD_LDA [fp, #8 ]
+#define X [fp, #12 ]
+#define OLD_INC_X [fp, #16 ]
+#define Y [fp, #20 ]
+#define OLD_INC_Y [fp, #24 ]
+#else
+#define OLD_LDA [fp, #0 ]
+#define X [fp, #4 ]
+#define OLD_INC_X [fp, #8 ]
+#define Y [fp, #12 ]
+#define OLD_INC_Y [fp, #16 ]
+#endif
+
#define OLD_A r3
#define OLD_N r1
cmp OLD_N, #0
ble cgemvt_kernel_L999
+#if !defined(__ARM_PCS_VFP)
+ vmov s0, OLD_ALPHAR
+ vldr s1, OLD_ALPHAI
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
+
str OLD_A, A
str OLD_N, N
#define STACKSIZE 256
-#define OLD_LDA [fp, #0 ]
-#define X [fp, #4 ]
-#define OLD_INC_X [fp, #8 ]
-#define Y [fp, #12 ]
-#define OLD_INC_Y [fp, #16 ]
+#if !defined(__ARM_PCS_VFP)
+
+#if !defined(DOUBLE)
+#define OLD_ALPHA r3
+#define OLD_A_SOFTFP [fp, #0 ]
+#define OLD_LDA [fp, #4 ]
+#define X [fp, #8 ]
+#define OLD_INC_X [fp, #12 ]
+#define Y [fp, #16 ]
+#define OLD_INC_Y [fp, #20 ]
+#else
+#define OLD_ALPHA [fp, #0 ]
+#define OLD_A_SOFTFP [fp, #8 ]
+#define OLD_LDA [fp, #12]
+#define X [fp, #16]
+#define OLD_INC_X [fp, #20]
+#define Y [fp, #24]
+#define OLD_INC_Y [fp, #28]
+#endif
+
+#else
+
+#define OLD_LDA [fp, #0 ]
+#define X [fp, #4 ]
+#define OLD_INC_X [fp, #8 ]
+#define Y [fp, #12 ]
+#define OLD_INC_Y [fp, #16 ]
+
+#endif
+
#define OLD_A r3
#define OLD_M r0
cmp N, #0
ble gemvn_kernel_L999
+#if !defined(__ARM_PCS_VFP)
+#if !defined(DOUBLE)
+ vmov s0, OLD_ALPHA
+#else
+ vldr d0, OLD_ALPHA
+#endif
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
+
str OLD_A, A
str OLD_M, M
#define STACKSIZE 256
-#ifndef ARM_SOFTFP_ABI
-//hard
-#define OLD_LDA [fp, #0 ]
-#define X [fp, #4 ]
-#define OLD_INC_X [fp, #8 ]
-#define Y [fp, #12 ]
-#define OLD_INC_Y [fp, #16 ]
-#define OLD_A r3
-#else
-#define OLD_A_SOFTFP [fp, #0 ]
-#define OLD_LDA [fp, #4 ]
-#define X [fp, #8 ]
-#define OLD_INC_X [fp, #12 ]
-#define Y [fp, #16 ]
-#define OLD_INC_Y [fp, #20 ]
+#if !defined(__ARM_PCS_VFP)
+
+#if !defined(DOUBLE)
#define OLD_ALPHA r3
-#define OLD_A r3
+#define OLD_A_SOFTFP [fp, #0 ]
+#define OLD_LDA [fp, #4 ]
+#define X [fp, #8 ]
+#define OLD_INC_X [fp, #12 ]
+#define Y [fp, #16 ]
+#define OLD_INC_Y [fp, #20 ]
+#else
+#define OLD_ALPHA [fp, #0 ]
+#define OLD_A_SOFTFP [fp, #8 ]
+#define OLD_LDA [fp, #12]
+#define X [fp, #16]
+#define OLD_INC_X [fp, #20]
+#define Y [fp, #24]
+#define OLD_INC_Y [fp, #28]
+#endif
+
+#else
+
+#define OLD_LDA [fp, #0 ]
+#define X [fp, #4 ]
+#define OLD_INC_X [fp, #8 ]
+#define Y [fp, #12 ]
+#define OLD_INC_Y [fp, #16 ]
+
#endif
+#define OLD_A r3
#define OLD_M r0
#define AO1 r0
cmp N, #0
ble gemvn_kernel_L999
-#ifndef DOUBLE
-#ifdef ARM_SOFTFP_ABI
-
- vmov s0, OLD_ALPHA
- ldr OLD_A, OLD_A_SOFTFP
+#if !defined(__ARM_PCS_VFP)
+#if !defined(DOUBLE)
+ vmov s0, OLD_ALPHA
+#else
+ vldr d0, OLD_ALPHA
#endif
+ ldr OLD_A, OLD_A_SOFTFP
#endif
str OLD_A, A
str OLD_M, M
-
-
+
ldr INC_X , OLD_INC_X
ldr INC_Y , OLD_INC_Y
#define STACKSIZE 256
-#ifndef ARM_SOFTFP_ABI
-//hard abi
-#define OLD_LDA [fp, #0 ]
-#define X [fp, #4 ]
-#define OLD_INC_X [fp, #8 ]
-#define Y [fp, #12 ]
-#define OLD_INC_Y [fp, #16 ]
-#define OLD_A r3
-#else
-#define OLD_A_SOFTFP [fp, #0 ]
-#define OLD_LDA [fp, #4 ]
-#define X [fp, #8 ]
-#define OLD_INC_X [fp, #12 ]
-#define Y [fp, #16 ]
-#define OLD_INC_Y [fp, #20 ]
+#if !defined(__ARM_PCS_VFP)
+
+#if !defined(DOUBLE)
#define OLD_ALPHA r3
-#define OLD_A r3
+#define OLD_A_SOFTFP [fp, #0 ]
+#define OLD_LDA [fp, #4 ]
+#define X [fp, #8 ]
+#define OLD_INC_X [fp, #12 ]
+#define Y [fp, #16 ]
+#define OLD_INC_Y [fp, #20 ]
+#else
+#define OLD_ALPHA [fp, #0 ]
+#define OLD_A_SOFTFP [fp, #8 ]
+#define OLD_LDA [fp, #12]
+#define X [fp, #16]
+#define OLD_INC_X [fp, #20]
+#define Y [fp, #24]
+#define OLD_INC_Y [fp, #28]
#endif
+#else
+
+#define OLD_LDA [fp, #0 ]
+#define X [fp, #4 ]
+#define OLD_INC_X [fp, #8 ]
+#define Y [fp, #12 ]
+#define OLD_INC_Y [fp, #16 ]
+
+#endif
+
+#define OLD_A r3
#define OLD_N r1
#define M r0
cmp OLD_N, #0
ble gemvt_kernel_L999
-#ifndef DOUBLE
-#ifdef ARM_SOFTFP_ABI
- vmov s0, OLD_ALPHA
- ldr OLD_A, OLD_A_SOFTFP
+#if !defined(__ARM_PCS_VFP)
+#if !defined(DOUBLE)
+ vmov s0, OLD_ALPHA
+#else
+ vldr d0, OLD_ALPHA
#endif
+ ldr OLD_A, OLD_A_SOFTFP
#endif
str OLD_A, A
#define STACKSIZE 256
-#define OLD_LDA [fp, #0 ]
-#define X [fp, #4 ]
-#define OLD_INC_X [fp, #8 ]
-#define Y [fp, #12 ]
-#define OLD_INC_Y [fp, #16 ]
+#if !defined(__ARM_PCS_VFP)
+
+#if !defined(DOUBLE)
+#define OLD_ALPHA r3
+#define OLD_A_SOFTFP [fp, #0 ]
+#define OLD_LDA [fp, #4 ]
+#define X [fp, #8 ]
+#define OLD_INC_X [fp, #12 ]
+#define Y [fp, #16 ]
+#define OLD_INC_Y [fp, #20 ]
+#else
+#define OLD_ALPHA [fp, #0 ]
+#define OLD_A_SOFTFP [fp, #8 ]
+#define OLD_LDA [fp, #12]
+#define X [fp, #16]
+#define OLD_INC_X [fp, #20]
+#define Y [fp, #24]
+#define OLD_INC_Y [fp, #28]
+#endif
+
+#else
+
+#define OLD_LDA [fp, #0 ]
+#define X [fp, #4 ]
+#define OLD_INC_X [fp, #8 ]
+#define Y [fp, #12 ]
+#define OLD_INC_Y [fp, #16 ]
+
+#endif
+
#define OLD_A r3
#define OLD_N r1
cmp OLD_N, #0
ble gemvt_kernel_L999
+#if !defined(__ARM_PCS_VFP)
+#if !defined(DOUBLE)
+ vmov s0, OLD_ALPHA
+#else
+ vldr d0, OLD_ALPHA
+#endif
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
+
str OLD_A, A
str OLD_N, N
#define STACKSIZE 256
-#define OLD_LDA [fp, #0 ]
-#define X [fp, #4 ]
-#define OLD_INC_X [fp, #8 ]
-#define Y [fp, #12 ]
-#define OLD_INC_Y [fp, #16 ]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR [fp, #0 ]
+#define OLD_ALPHAI [fp, #8 ]
+#define OLD_A_SOFTFP [fp, #16]
+#define OLD_LDA [fp, #20]
+#define X [fp, #24]
+#define OLD_INC_X [fp, #28]
+#define Y [fp, #32]
+#define OLD_INC_Y [fp, #36]
+#else
+#define OLD_LDA [fp, #0 ]
+#define X [fp, #4 ]
+#define OLD_INC_X [fp, #8 ]
+#define Y [fp, #12 ]
+#define OLD_INC_Y [fp, #16 ]
+#endif
+
#define OLD_A r3
#define OLD_M r0
cmp N, #0
ble zgemvn_kernel_L999
+#if !defined(__ARM_PCS_VFP)
+ vldr d0, OLD_ALPHAR
+ vldr d1, OLD_ALPHAI
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
+
str OLD_A, A
str OLD_M, M
vstr d0 , ALPHA_R
#define STACKSIZE 256
-#define OLD_LDA [fp, #0 ]
-#define X [fp, #4 ]
-#define OLD_INC_X [fp, #8 ]
-#define Y [fp, #12 ]
-#define OLD_INC_Y [fp, #16 ]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR [fp, #0 ]
+#define OLD_ALPHAI [fp, #8 ]
+#define OLD_A_SOFTFP [fp, #16]
+#define OLD_LDA [fp, #20]
+#define X [fp, #24]
+#define OLD_INC_X [fp, #28]
+#define Y [fp, #32]
+#define OLD_INC_Y [fp, #36]
+#else
+#define OLD_LDA [fp, #0 ]
+#define X [fp, #4 ]
+#define OLD_INC_X [fp, #8 ]
+#define Y [fp, #12 ]
+#define OLD_INC_Y [fp, #16 ]
+#endif
+
#define OLD_A r3
#define OLD_N r1
cmp OLD_N, #0
ble zgemvt_kernel_L999
+#if !defined(__ARM_PCS_VFP)
+ vldr d0, OLD_ALPHAR
+ vldr d1, OLD_ALPHAI
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
+
str OLD_A, A
str OLD_N, N