make the following modification

author yang <yang.zhang@arm.com>

Wed, 10 Oct 2012 06:20:54 +0000 (14:20 +0800)

committer yang <yang.zhang@arm.com>

Wed, 10 Oct 2012 06:20:54 +0000 (14:20 +0800)
author yang <yang.zhang@arm.com>
Wed, 10 Oct 2012 06:20:54 +0000 (14:20 +0800)
committer yang <yang.zhang@arm.com>
Wed, 10 Oct 2012 06:20:54 +0000 (14:20 +0800)
diff --git a/inc/NE10_dsp.h b/inc/NE10_dsp.h

index c4832723034a500feb649f9ec9376d32227429e6..1cc492e79958b38cd69392a5ea4e5804870d0573 100644 (file)
--- a/inc/NE10_dsp.h
+++ b/inc/NE10_dsp.h
@@ -48,7 +48,8 @@ extern void (*ne10_radix4_butterfly_inverse_float)(ne10_float32_t *pDst,
  extern void (*ne10_rfft_float)(
                       const ne10_rfft_instance_f32_t * S,
                       ne10_float32_t * pSrc,
-                     ne10_float32_t * pDst);
+                     ne10_float32_t * pDst,
+                     ne10_float32_t * pTemp);
  /* init functions*/
  extern ne10_result_t ne10_cfft_radix4_init_float(ne10_cfft_radix4_instance_f32_t * S,
                       ne10_uint16_t fftLen,
@@ -72,7 +73,8 @@ extern void ne10_radix4_butterfly_inverse_float_c(ne10_float32_t *pDst,
  extern void ne10_rfft_float_c(
                       const ne10_rfft_instance_f32_t * S,
                       ne10_float32_t * pSrc,
-                     ne10_float32_t * pDst);
+                     ne10_float32_t * pDst,
+                     ne10_float32_t * pTemp);
  
  
  /* NEON version*/
@@ -89,7 +91,8 @@ extern void ne10_radix4_butterfly_inverse_float_neon(ne10_float32_t *pDst,
  extern void ne10_rfft_float_neon(
                       const ne10_rfft_instance_f32_t * S,
                       ne10_float32_t * pSrc,
-                     ne10_float32_t * pDst);
+                     ne10_float32_t * pDst,
+                     ne10_float32_t * pTemp);
  
  #ifdef __cplusplus
  }
diff --git a/modules/dsp/NE10_cfft.c b/modules/dsp/NE10_cfft.c

index 4e504624f2b868605d359886bd9da1f2a3ae16e8..ba3d9ddc7268d81931047334c31fa4f571dd3e3e 100644 (file)
--- a/modules/dsp/NE10_cfft.c
+++ b/modules/dsp/NE10_cfft.c
@@ -225,8 +225,13 @@ void ne10_radix4_butterfly_float_c(
      }
  
      /* if the N is even power of 4, copy the output to dst buffer */
-    asm("CLZ %[result], %[input]" : [result] "=r"(SubFFTNum) : [input] "r"(N));
-    SubFFTNum = (32-SubFFTNum)>>1;
+    SubFFTNum = 0;
+    set = N;
+    while (set > 1)
+    {
+        set = set>>2;
+        SubFFTNum++;
+    }
  
      if((SubFFTNum&1) == 0)
      {
@@ -550,8 +555,13 @@ void ne10_radix4_butterfly_inverse_float_c(
  
  
      /* if the N is even power of 4, copy the output to dst buffer */
-    asm("CLZ %[result], %[input]" : [result] "=r"(SubFFTNum) : [input] "r"(N));
-    SubFFTNum = (32-SubFFTNum)>>1;
+    SubFFTNum = 0;
+    set = N;
+    while (set > 1)
+    {
+        set = set>>2;
+        SubFFTNum++;
+    }
  
      if((SubFFTNum&1) == 0)
      {
diff --git a/modules/dsp/NE10_init_dsp.c b/modules/dsp/NE10_init_dsp.c

index 93de12c6e7c8cb98cb7c9e9bb1cbf2585ce5cb82..fefcc1e31859f7595ce0ef29c13d1c164b9c0d7b 100644 (file)
--- a/modules/dsp/NE10_init_dsp.c
+++ b/modules/dsp/NE10_init_dsp.c
@@ -49,4 +49,5 @@ void (*ne10_radix4_butterfly_inverse_float)(ne10_float32_t *pDst,
  void (*ne10_rfft_float)(
                       const ne10_rfft_instance_f32_t * S,
                       ne10_float32_t * pSrc,
-                     ne10_float32_t * pDst);
+                     ne10_float32_t * pDst,
+                     ne10_float32_t * pTemp);
diff --git a/modules/dsp/NE10_rfft.c b/modules/dsp/NE10_rfft.c

index 5d7fb5689f60fdcd9cb3c493eb2400e891ac9c44..1c0c036b7b4e542411e0703baa48f5e5d0b7e40f 100644 (file)
--- a/modules/dsp/NE10_rfft.c
+++ b/modules/dsp/NE10_rfft.c
@@ -168,7 +168,8 @@ static void ne10_split_rifft_float_c(
  void ne10_rfft_float_c(
                       const ne10_rfft_instance_f32_t * S,
                       ne10_float32_t * pSrc,
-                     ne10_float32_t * pDst)
+                     ne10_float32_t * pDst,
+                     ne10_float32_t * pTemp)
  {
      const ne10_cfft_radix4_instance_f32_t *S_CFFT = S->p_cfft;
  
@@ -177,16 +178,16 @@ void ne10_rfft_float_c(
      {
          /*  Real IFFT core process */
          ne10_split_rifft_float_c(pSrc, S->fft_len_real, S->p_twiddle_A_real,
-                S->p_twiddle_B_real, pDst);
+                S->p_twiddle_B_real, pTemp);
          /* Complex radix-4 IFFT process */
-        ne10_radix4_butterfly_inverse_float_c(pDst, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle);
+        ne10_radix4_butterfly_inverse_float_c(pDst, pTemp, S_CFFT->fft_len, S_CFFT->p_twiddle);
      }
      else
      {
          /* Complex radix-4 FFT process */
-        ne10_radix4_butterfly_float_c(pDst, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle);
+        ne10_radix4_butterfly_float_c(pTemp, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle);
          /*  Real FFT core process */
-        ne10_split_rfft_float_c(pSrc, S->fft_len_real, S->p_twiddle_A_real,
+        ne10_split_rfft_float_c(pTemp, S->fft_len_real, S->p_twiddle_A_real,
                  S->p_twiddle_B_real, pDst);
      }
  
diff --git a/modules/dsp/NE10_rfft.neon.c b/modules/dsp/NE10_rfft.neon.c

index a7ad649a9a4dc9b3e645c56546864987603cae50..ec39b0d1557f5ca03c5167340642efb1399256fb 100644 (file)
--- a/modules/dsp/NE10_rfft.neon.c
+++ b/modules/dsp/NE10_rfft.neon.c
@@ -463,7 +463,8 @@ static void ne10_split_rifft_float_neon(
  void ne10_rfft_float_neon(
                       const ne10_rfft_instance_f32_t * S,
                       ne10_float32_t * pSrc,
-                     ne10_float32_t * pDst)
+                     ne10_float32_t * pDst,
+                     ne10_float32_t * pTemp)
  {
      const ne10_cfft_radix4_instance_f32_t *S_CFFT = S->p_cfft;
  
@@ -472,16 +473,16 @@ void ne10_rfft_float_neon(
      {
          /*  Real IFFT core process */
          ne10_split_rifft_float_neon(pSrc, S->fft_len_real, S->p_twiddle_A_real,
-                S->p_twiddle_B_real, pDst);
+                S->p_twiddle_B_real, pTemp);
          /* Complex radix-4 IFFT process */
-        ne10_radix4_butterfly_inverse_float_neon(pDst, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle);
+        ne10_radix4_butterfly_inverse_float_neon(pDst, pTemp, S_CFFT->fft_len, S_CFFT->p_twiddle);
      }
      else
      {
          /* Complex radix-4 FFT process */
-        ne10_radix4_butterfly_float_neon(pDst, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle);
+        ne10_radix4_butterfly_float_neon(pTemp, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle);
          /*  Real FFT core process */
-        ne10_split_rfft_float_neon(pSrc, S->fft_len_real, S->p_twiddle_A_real,
+        ne10_split_rfft_float_neon(pTemp, S->fft_len_real, S->p_twiddle_A_real,
                  S->p_twiddle_B_real, pDst);
      }
author	yang <yang.zhang@arm.com>
	Wed, 10 Oct 2012 06:20:54 +0000 (14:20 +0800)
committer	yang <yang.zhang@arm.com>
	Wed, 10 Oct 2012 06:20:54 +0000 (14:20 +0800)
inc/NE10_dsp.h		patch \| blob \| history
modules/dsp/NE10_cfft.c		patch \| blob \| history
modules/dsp/NE10_init_dsp.c		patch \| blob \| history
modules/dsp/NE10_rfft.c		patch \| blob \| history
modules/dsp/NE10_rfft.neon.c		patch \| blob \| history