make the following modification
authoryang <yang.zhang@arm.com>
Wed, 10 Oct 2012 06:20:54 +0000 (14:20 +0800)
committeryang <yang.zhang@arm.com>
Wed, 10 Oct 2012 06:20:54 +0000 (14:20 +0800)
1. removed arm assembly in C file
2. modified the interface of rfft

inc/NE10_dsp.h
modules/dsp/NE10_cfft.c
modules/dsp/NE10_init_dsp.c
modules/dsp/NE10_rfft.c
modules/dsp/NE10_rfft.neon.c

index c483272..1cc492e 100644 (file)
@@ -48,7 +48,8 @@ extern void (*ne10_radix4_butterfly_inverse_float)(ne10_float32_t *pDst,
 extern void (*ne10_rfft_float)(
                      const ne10_rfft_instance_f32_t * S,
                      ne10_float32_t * pSrc,
-                     ne10_float32_t * pDst);
+                     ne10_float32_t * pDst,
+                     ne10_float32_t * pTemp);
 /* init functions*/
 extern ne10_result_t ne10_cfft_radix4_init_float(ne10_cfft_radix4_instance_f32_t * S,
                      ne10_uint16_t fftLen,
@@ -72,7 +73,8 @@ extern void ne10_radix4_butterfly_inverse_float_c(ne10_float32_t *pDst,
 extern void ne10_rfft_float_c(
                      const ne10_rfft_instance_f32_t * S,
                      ne10_float32_t * pSrc,
-                     ne10_float32_t * pDst);
+                     ne10_float32_t * pDst,
+                     ne10_float32_t * pTemp);
 
 
 /* NEON version*/
@@ -89,7 +91,8 @@ extern void ne10_radix4_butterfly_inverse_float_neon(ne10_float32_t *pDst,
 extern void ne10_rfft_float_neon(
                      const ne10_rfft_instance_f32_t * S,
                      ne10_float32_t * pSrc,
-                     ne10_float32_t * pDst);
+                     ne10_float32_t * pDst,
+                     ne10_float32_t * pTemp);
 
 #ifdef __cplusplus
 }
index 4e50462..ba3d9dd 100644 (file)
@@ -225,8 +225,13 @@ void ne10_radix4_butterfly_float_c(
     }
 
     /* if the N is even power of 4, copy the output to dst buffer */
-    asm("CLZ %[result], %[input]" : [result] "=r"(SubFFTNum) : [input] "r"(N));
-    SubFFTNum = (32-SubFFTNum)>>1;
+    SubFFTNum = 0;
+    set = N;
+    while (set > 1)
+    {
+        set = set>>2;
+        SubFFTNum++;
+    }
 
     if((SubFFTNum&1) == 0)
     {
@@ -550,8 +555,13 @@ void ne10_radix4_butterfly_inverse_float_c(
 
 
     /* if the N is even power of 4, copy the output to dst buffer */
-    asm("CLZ %[result], %[input]" : [result] "=r"(SubFFTNum) : [input] "r"(N));
-    SubFFTNum = (32-SubFFTNum)>>1;
+    SubFFTNum = 0;
+    set = N;
+    while (set > 1)
+    {
+        set = set>>2;
+        SubFFTNum++;
+    }
 
     if((SubFFTNum&1) == 0)
     {
index 93de12c..fefcc1e 100644 (file)
@@ -49,4 +49,5 @@ void (*ne10_radix4_butterfly_inverse_float)(ne10_float32_t *pDst,
 void (*ne10_rfft_float)(
                      const ne10_rfft_instance_f32_t * S,
                      ne10_float32_t * pSrc,
-                     ne10_float32_t * pDst);
+                     ne10_float32_t * pDst,
+                     ne10_float32_t * pTemp);
index 5d7fb56..1c0c036 100644 (file)
@@ -168,7 +168,8 @@ static void ne10_split_rifft_float_c(
 void ne10_rfft_float_c(
                      const ne10_rfft_instance_f32_t * S,
                      ne10_float32_t * pSrc,
-                     ne10_float32_t * pDst)
+                     ne10_float32_t * pDst,
+                     ne10_float32_t * pTemp)
 {
     const ne10_cfft_radix4_instance_f32_t *S_CFFT = S->p_cfft;
 
@@ -177,16 +178,16 @@ void ne10_rfft_float_c(
     {
         /*  Real IFFT core process */
         ne10_split_rifft_float_c(pSrc, S->fft_len_real, S->p_twiddle_A_real,
-                S->p_twiddle_B_real, pDst);
+                S->p_twiddle_B_real, pTemp);
         /* Complex radix-4 IFFT process */
-        ne10_radix4_butterfly_inverse_float_c(pDst, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle);
+        ne10_radix4_butterfly_inverse_float_c(pDst, pTemp, S_CFFT->fft_len, S_CFFT->p_twiddle);
     }
     else
     {
         /* Complex radix-4 FFT process */
-        ne10_radix4_butterfly_float_c(pDst, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle);
+        ne10_radix4_butterfly_float_c(pTemp, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle);
         /*  Real FFT core process */
-        ne10_split_rfft_float_c(pSrc, S->fft_len_real, S->p_twiddle_A_real,
+        ne10_split_rfft_float_c(pTemp, S->fft_len_real, S->p_twiddle_A_real,
                 S->p_twiddle_B_real, pDst);
     }
 
index a7ad649..ec39b0d 100644 (file)
@@ -463,7 +463,8 @@ static void ne10_split_rifft_float_neon(
 void ne10_rfft_float_neon(
                      const ne10_rfft_instance_f32_t * S,
                      ne10_float32_t * pSrc,
-                     ne10_float32_t * pDst)
+                     ne10_float32_t * pDst,
+                     ne10_float32_t * pTemp)
 {
     const ne10_cfft_radix4_instance_f32_t *S_CFFT = S->p_cfft;
 
@@ -472,16 +473,16 @@ void ne10_rfft_float_neon(
     {
         /*  Real IFFT core process */
         ne10_split_rifft_float_neon(pSrc, S->fft_len_real, S->p_twiddle_A_real,
-                S->p_twiddle_B_real, pDst);
+                S->p_twiddle_B_real, pTemp);
         /* Complex radix-4 IFFT process */
-        ne10_radix4_butterfly_inverse_float_neon(pDst, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle);
+        ne10_radix4_butterfly_inverse_float_neon(pDst, pTemp, S_CFFT->fft_len, S_CFFT->p_twiddle);
     }
     else
     {
         /* Complex radix-4 FFT process */
-        ne10_radix4_butterfly_float_neon(pDst, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle);
+        ne10_radix4_butterfly_float_neon(pTemp, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle);
         /*  Real FFT core process */
-        ne10_split_rfft_float_neon(pSrc, S->fft_len_real, S->p_twiddle_A_real,
+        ne10_split_rfft_float_neon(pTemp, S->fft_len_real, S->p_twiddle_A_real,
                 S->p_twiddle_B_real, pDst);
     }