modify push operations for stack aligned
authoryang <yang.zhang@arm.com>
Thu, 22 Nov 2012 02:41:45 +0000 (10:41 +0800)
committeryang <yang.zhang@arm.com>
Thu, 22 Nov 2012 02:41:45 +0000 (10:41 +0800)
modules/dsp/NE10_cfft.neon.s
modules/dsp/NE10_fir.neon.s
modules/dsp/NE10_iir.neon.s

index 118705f..fbd3bcd 100644 (file)
@@ -55,9 +55,8 @@
 
 ne10_radix4_butterfly_float_neon:
 
-        PUSH    {r4-r11,lr}
+        PUSH    {r4-r12,lr}    @push r12: to keep stack 8 bytes aligned
         VPUSH   {d8-d15}
-        SUB     sp, sp, #4    @keep stack 8 bytes aligned
 
         qInp1   .qn Q0.F32
         qInp2   .qn Q1.F32
@@ -320,9 +319,8 @@ fftCopyLoop:
 
 fftEnd:
         @/* Retureq From Function*/
-        ADD     sp, sp, #4
         VPOP    {d8-d15}
-        POP     {r4-r11,pc}
+        POP     {r4-r12,pc}
 
         @/*
         @ * @brief  Core radix-4 IFFT of floating-point data.  Do not call this function directly.
@@ -341,9 +339,8 @@ fftEnd:
 
 ne10_radix4_butterfly_inverse_float_neon:
 
-        PUSH    {r4-r11,lr}
+        PUSH    {r4-r12,lr}    @push r12: to keep stack 8 bytes aligned
         VPUSH   {d8-d15}
-        SUB     sp, sp, #4    @keep stack 8 bytes aligned
 
         qInp1   .qn Q0.F32
         qInp2   .qn Q1.F32
@@ -664,6 +661,8 @@ ifftLastStageSetLoop:
         VDUP.S32      q8,grpCount
         VCVT.F32.S32  q8,  q8
         VRECPE.F32    q8,  q8
+        @LDR           grpCount,[sp,#0]          @revert the original value
+        @VDUP.f32      q8,grpCount
 
         VMUL    qInp1,qInp1,qRe1
         VMUL    qInp2,qInp2,qRe1
@@ -725,9 +724,8 @@ ifftCopyLoop:
 
 ifftEnd:
         @/* Retureq From Function*/
-        ADD     sp, sp, #4
         VPOP    {d8-d15}
-        POP     {r4-r11,pc}
+        POP     {r4-r12,pc}
 
 
         .end
index 9a721fb..9797bd7 100644 (file)
@@ -66,8 +66,7 @@
         .thumb_func
 
 ne10_fir_float_neon:
-                    PUSH    {r4-r11,lr}
-                    SUB     sp, sp, #4    @keep stack 8 bytes aligned
+                    PUSH    {r4-r12,lr}    @push r12: to keep stack 8 bytes aligned
 @/*ARM Registers*/
 pStateStruct     .req   R0
 pSrc             .req   R1
@@ -347,8 +346,7 @@ firEnd:
                     ADD         pStateCurnt,pStateCurnt,mask, LSL #2
 
                     @/*Return From Function*/
-                    ADD     sp, sp, #4
-                    POP     {r4-r11,pc}
+                    POP     {r4-r12,pc}
 @/*ARM Registers*/
 .unreq    pStateStruct
 .unreq    pSrc
@@ -439,9 +437,8 @@ firEnd:
 
 ne10_fir_decimate_float_neon:
 
-                            PUSH    {r4-r11,lr}
+                            PUSH    {r4-r12,lr}    @push r12: to keep stack 8 bytes aligned
                             VPUSH   {d8-d9}
-                            SUB     sp, sp, #4    @keep stack 8 bytes aligned
 
 @/*ARM Registers*/
 pStateStruct     .req   R0
@@ -762,9 +759,8 @@ firDecimateEnd:
                     ADD         pX,pX,mask, LSL #2
 
                     @// Return From Function
-                    ADD     sp, sp, #4
                     VPOP    {d8-d9}
-                    POP     {r4-r11,pc}
+                    POP     {r4-r12,pc}
 
 @/*ARM Registers*/
 .unreq    pStateStruct
@@ -874,8 +870,7 @@ firDecimateEnd:
         .thumb_func
 
 ne10_fir_interpolate_float_neon:
-                            PUSH    {r4-r11,lr}
-                            SUB     sp, sp, #4    @keep stack 8 bytes aligned
+                            PUSH    {r4-r12,lr}    @push r12: to keep stack 8 bytes aligned
 
 
 @/*ARM Registers*/
@@ -1165,8 +1160,7 @@ firInterpolateEnd:
                     ADD         pStateCurnt,pStateCurnt,mask, LSL #2
 
                     @/*Return From Function*/
-                    ADD     sp, sp, #4
-                    POP     {r4-r11,pc}
+                    POP     {r4-r12,pc}
 @/*ARM Registers*/
 .unreq    pStateStruct
 .unreq    pSrc
@@ -1265,8 +1259,7 @@ firInterpolateEnd:
 
 ne10_fir_lattice_float_neon:
 
-                        PUSH    {r4-r11,lr}
-                        SUB     sp, sp, #4    @keep stack 8 bytes aligned
+                        PUSH    {r4-r12,lr}    @push r12: to keep stack 8 bytes aligned
 
 @/*ARM Registers*/
 pStateStruct     .req   R0
@@ -1553,8 +1546,7 @@ firLatticeEndinnerLoop1:
 
 firLatticeEnd:
                             @/*Return From Function*/
-                            ADD     sp, sp, #4
-                            POP     {r4-r11,pc}
+                            POP     {r4-r12,pc}
 
 @/*ARM Registers*/
 .unreq    pStateStruct
index 689868e..c0358cc 100644 (file)
@@ -60,9 +60,8 @@
         .thumb_func
 
 ne10_iir_lattice_float_neon:
-                        PUSH    {r4-r11,lr}
+                        PUSH    {r4-r12,lr}    @push r12: to keep stack 8 bytes aligned
                         VPUSH   {d8-d9}
-                        SUB     sp, sp, #4    @keep stack 8 bytes aligned
 
 @/*ARM Registers*/
 pStateStruct     .req   R0
@@ -394,8 +393,7 @@ iirLatticeEnd:
 .unreq    qGnext
 .unreq    dGnext_0
 .unreq    dGnext_1
-                            ADD     sp, sp, #4
                             VPOP    {d8-d9}
-                            POP     {r4-r11,pc}
+                            POP     {r4-r12,pc}
 
         .end