#define STACKSIZE 256
-#define OLD_LDA [fp, #0 ]
-#define X [fp, #4 ]
-#define OLD_INC_X [fp, #8 ]
-#define Y [fp, #12 ]
-#define OLD_INC_Y [fp, #16 ]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR [fp, #0 ]
+#define OLD_ALPHAI [fp, #8 ]
+#define OLD_A_SOFTFP [fp, #16]
+#define OLD_LDA [fp, #20]
+#define X [fp, #24]
+#define OLD_INC_X [fp, #28]
+#define Y [fp, #32]
+#define OLD_INC_Y [fp, #36]
+#else
+#define OLD_LDA [fp, #0 ]
+#define X [fp, #4 ]
+#define OLD_INC_X [fp, #8 ]
+#define Y [fp, #12 ]
+#define OLD_INC_Y [fp, #16 ]
+#endif
+
#define OLD_A r3
#define OLD_M r0
#define I r12
+#define FP_ZERO [fp, #-228]
+#define FP_ZERO_0 [fp, #-228]
+#define FP_ZERO_1 [fp, #-224]
+
+
#define ALPHA_I [fp, #-236]
#define ALPHA_R [fp, #-244]
#if !defined(CONJ) && !defined(XCONJ)
- #define KMAC_R fnmacd
+ #define KMAC_R vmls.f64
#define KMAC_I fmacd
#define FMAC_R1 fmacd
- #define FMAC_R2 fnmacd
+ #define FMAC_R2 vmls.f64
#define FMAC_I1 fmacd
#define FMAC_I2 fmacd
#elif defined(CONJ) && !defined(XCONJ)
#define KMAC_R fmacd
- #define KMAC_I fnmacd
+ #define KMAC_I vmls.f64
#define FMAC_R1 fmacd
- #define FMAC_R2 fnmacd
+ #define FMAC_R2 vmls.f64
#define FMAC_I1 fmacd
#define FMAC_I2 fmacd
#elif !defined(CONJ) && defined(XCONJ)
#define KMAC_R fmacd
- #define KMAC_I fnmacd
+ #define KMAC_I vmls.f64
#define FMAC_R1 fmacd
#define FMAC_R2 fmacd
- #define FMAC_I1 fnmacd
+ #define FMAC_I1 vmls.f64
#define FMAC_I2 fmacd
#else
- #define KMAC_R fnmacd
+ #define KMAC_R vmls.f64
#define KMAC_I fmacd
#define FMAC_R1 fmacd
#define FMAC_R2 fmacd
- #define FMAC_I1 fnmacd
+ #define FMAC_I1 vmls.f64
#define FMAC_I2 fmacd
#endif
.macro INIT_F4
pld [ YO, #Y_PRE ]
- vsub.f64 d8 , d8 , d8
+ fldd d8, FP_ZERO
vmov.f64 d9 , d8
vmov.f64 d10, d8
vmov.f64 d11, d8
.macro INIT_F1
- vsub.f64 d8 , d8 , d8
+ fldd d8, FP_ZERO
vmov.f64 d9 , d8
.endm
.macro INIT_S4
- vsub.f64 d8 , d8 , d8
+ fldd d8, FP_ZERO
vmov.f64 d9 , d8
vmov.f64 d10, d8
vmov.f64 d11, d8
.macro INIT_S1
- vsub.f64 d8 , d8 , d8
+ fldd d8, FP_ZERO
vmov.f64 d9 , d8
.endm
vstm r12, { s8 - s15 } // store floating point registers
#endif
+ movs r12, #0
+ str r12, FP_ZERO
+ str r12, FP_ZERO_1
+
cmp OLD_M, #0
ble zgemvn_kernel_L999
cmp N, #0
ble zgemvn_kernel_L999
+#if !defined(__ARM_PCS_VFP)
+ vldr d0, OLD_ALPHAR
+ vldr d1, OLD_ALPHAI
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
+
str OLD_A, A
str OLD_M, M
vstr d0 , ALPHA_R