#define STACKSIZE 256
+#ifndef ARM_SOFTFP_ABI
+//hard
#define OLD_INC_X [fp, #0 ]
#define OLD_Y [fp, #4 ]
#define OLD_INC_Y [fp, #8 ]
-
-
+#else
+#define OLD_X [fp, #0 ]
+#define OLD_INC_X [fp, #4 ]
+#define OLD_Y [fp, #8 ]
+#define OLD_INC_Y [fp, #12 ]
+#endif
+
#define N r0
#define Y r1
#define INC_X r2
add fp, sp, #8
sub sp, sp, #STACKSIZE // reserve stack
+#ifdef ARM_SOFTFP_ABI
+#ifndef DOUBLE
+ vmov s0, r3 //move alpha to s0
+ ldr X, OLD_X
+#endif
+#endif
+
ldr INC_X , OLD_INC_X
ldr Y, OLD_Y
ldr INC_Y , OLD_INC_Y
#define STACKSIZE 256
+#ifndef ARM_SOFTFP_ABI
+//hard
#define OLD_LDA [fp, #0 ]
#define X [fp, #4 ]
#define OLD_INC_X [fp, #8 ]
#define Y [fp, #12 ]
#define OLD_INC_Y [fp, #16 ]
#define OLD_A r3
+#else
+#define OLD_A_SOFTFP [fp, #0 ]
+#define OLD_LDA [fp, #4 ]
+#define X [fp, #8 ]
+#define OLD_INC_X [fp, #12 ]
+#define Y [fp, #16 ]
+#define OLD_INC_Y [fp, #20 ]
+#define OLD_ALPHA r3
+#define OLD_A r3
+#endif
+
#define OLD_M r0
#define AO1 r0
cmp N, #0
ble gemvn_kernel_L999
+#ifndef DOUBLE
+#ifdef ARM_SOFTFP_ABI
+
+ vmov s0, OLD_ALPHA
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
+#endif
+
str OLD_A, A
str OLD_M, M
-
+
+
ldr INC_X , OLD_INC_X
ldr INC_Y , OLD_INC_Y
#define STACKSIZE 256
+#ifndef ARM_SOFTFP_ABI
+//hard abi
#define OLD_LDA [fp, #0 ]
#define X [fp, #4 ]
#define OLD_INC_X [fp, #8 ]
#define Y [fp, #12 ]
#define OLD_INC_Y [fp, #16 ]
#define OLD_A r3
+#else
+#define OLD_A_SOFTFP [fp, #0 ]
+#define OLD_LDA [fp, #4 ]
+#define X [fp, #8 ]
+#define OLD_INC_X [fp, #12 ]
+#define Y [fp, #16 ]
+#define OLD_INC_Y [fp, #20 ]
+#define OLD_ALPHA r3
+#define OLD_A r3
+#endif
+
#define OLD_N r1
#define M r0
cmp OLD_N, #0
ble gemvt_kernel_L999
+#ifndef DOUBLE
+#ifdef ARM_SOFTFP_ABI
+ vmov s0, OLD_ALPHA
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
+#endif
+
str OLD_A, A
str OLD_N, N
#else
vsqrt.f32 s1, s1
vmul.f32 s0, s0, s1
+#ifdef ARM_SOFTFP_ABI
+ vmov r0, s0
+#endif
#endif
bx lr
vadd.f64 d0 , d0, d1 // set return value
+#ifdef ARM_SOFTFP_ABI
+ vmov r0, r1, d0
+#endif
+
#else
vadd.f32 s0 , s0, s1 // set return value
-
+#ifdef ARM_SOFTFP_ABI
+ vmov r0, s0
+#endif
#endif
sub sp, fp, #24
pop {r4 - r9, fp}