From: yang Date: Wed, 10 Oct 2012 06:20:54 +0000 (+0800) Subject: make the following modification X-Git-Tag: v1.0.0~26^2~2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f8357c04b4a7416675d1b10d1d97edb778b1479c;p=platform%2Fupstream%2Fne10.git make the following modification 1. removed arm assembly in C file 2. modified the interface of rfft --- diff --git a/inc/NE10_dsp.h b/inc/NE10_dsp.h index c483272..1cc492e 100644 --- a/inc/NE10_dsp.h +++ b/inc/NE10_dsp.h @@ -48,7 +48,8 @@ extern void (*ne10_radix4_butterfly_inverse_float)(ne10_float32_t *pDst, extern void (*ne10_rfft_float)( const ne10_rfft_instance_f32_t * S, ne10_float32_t * pSrc, - ne10_float32_t * pDst); + ne10_float32_t * pDst, + ne10_float32_t * pTemp); /* init functions*/ extern ne10_result_t ne10_cfft_radix4_init_float(ne10_cfft_radix4_instance_f32_t * S, ne10_uint16_t fftLen, @@ -72,7 +73,8 @@ extern void ne10_radix4_butterfly_inverse_float_c(ne10_float32_t *pDst, extern void ne10_rfft_float_c( const ne10_rfft_instance_f32_t * S, ne10_float32_t * pSrc, - ne10_float32_t * pDst); + ne10_float32_t * pDst, + ne10_float32_t * pTemp); /* NEON version*/ @@ -89,7 +91,8 @@ extern void ne10_radix4_butterfly_inverse_float_neon(ne10_float32_t *pDst, extern void ne10_rfft_float_neon( const ne10_rfft_instance_f32_t * S, ne10_float32_t * pSrc, - ne10_float32_t * pDst); + ne10_float32_t * pDst, + ne10_float32_t * pTemp); #ifdef __cplusplus } diff --git a/modules/dsp/NE10_cfft.c b/modules/dsp/NE10_cfft.c index 4e50462..ba3d9dd 100644 --- a/modules/dsp/NE10_cfft.c +++ b/modules/dsp/NE10_cfft.c @@ -225,8 +225,13 @@ void ne10_radix4_butterfly_float_c( } /* if the N is even power of 4, copy the output to dst buffer */ - asm("CLZ %[result], %[input]" : [result] "=r"(SubFFTNum) : [input] "r"(N)); - SubFFTNum = (32-SubFFTNum)>>1; + SubFFTNum = 0; + set = N; + while (set > 1) + { + set = set>>2; + SubFFTNum++; + } if((SubFFTNum&1) == 0) { @@ -550,8 +555,13 @@ void ne10_radix4_butterfly_inverse_float_c( /* if the N is even power of 4, copy the output to dst buffer */ - asm("CLZ %[result], %[input]" : [result] "=r"(SubFFTNum) : [input] "r"(N)); - SubFFTNum = (32-SubFFTNum)>>1; + SubFFTNum = 0; + set = N; + while (set > 1) + { + set = set>>2; + SubFFTNum++; + } if((SubFFTNum&1) == 0) { diff --git a/modules/dsp/NE10_init_dsp.c b/modules/dsp/NE10_init_dsp.c index 93de12c..fefcc1e 100644 --- a/modules/dsp/NE10_init_dsp.c +++ b/modules/dsp/NE10_init_dsp.c @@ -49,4 +49,5 @@ void (*ne10_radix4_butterfly_inverse_float)(ne10_float32_t *pDst, void (*ne10_rfft_float)( const ne10_rfft_instance_f32_t * S, ne10_float32_t * pSrc, - ne10_float32_t * pDst); + ne10_float32_t * pDst, + ne10_float32_t * pTemp); diff --git a/modules/dsp/NE10_rfft.c b/modules/dsp/NE10_rfft.c index 5d7fb56..1c0c036 100644 --- a/modules/dsp/NE10_rfft.c +++ b/modules/dsp/NE10_rfft.c @@ -168,7 +168,8 @@ static void ne10_split_rifft_float_c( void ne10_rfft_float_c( const ne10_rfft_instance_f32_t * S, ne10_float32_t * pSrc, - ne10_float32_t * pDst) + ne10_float32_t * pDst, + ne10_float32_t * pTemp) { const ne10_cfft_radix4_instance_f32_t *S_CFFT = S->p_cfft; @@ -177,16 +178,16 @@ void ne10_rfft_float_c( { /* Real IFFT core process */ ne10_split_rifft_float_c(pSrc, S->fft_len_real, S->p_twiddle_A_real, - S->p_twiddle_B_real, pDst); + S->p_twiddle_B_real, pTemp); /* Complex radix-4 IFFT process */ - ne10_radix4_butterfly_inverse_float_c(pDst, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle); + ne10_radix4_butterfly_inverse_float_c(pDst, pTemp, S_CFFT->fft_len, S_CFFT->p_twiddle); } else { /* Complex radix-4 FFT process */ - ne10_radix4_butterfly_float_c(pDst, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle); + ne10_radix4_butterfly_float_c(pTemp, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle); /* Real FFT core process */ - ne10_split_rfft_float_c(pSrc, S->fft_len_real, S->p_twiddle_A_real, + ne10_split_rfft_float_c(pTemp, S->fft_len_real, S->p_twiddle_A_real, S->p_twiddle_B_real, pDst); } diff --git a/modules/dsp/NE10_rfft.neon.c b/modules/dsp/NE10_rfft.neon.c index a7ad649..ec39b0d 100644 --- a/modules/dsp/NE10_rfft.neon.c +++ b/modules/dsp/NE10_rfft.neon.c @@ -463,7 +463,8 @@ static void ne10_split_rifft_float_neon( void ne10_rfft_float_neon( const ne10_rfft_instance_f32_t * S, ne10_float32_t * pSrc, - ne10_float32_t * pDst) + ne10_float32_t * pDst, + ne10_float32_t * pTemp) { const ne10_cfft_radix4_instance_f32_t *S_CFFT = S->p_cfft; @@ -472,16 +473,16 @@ void ne10_rfft_float_neon( { /* Real IFFT core process */ ne10_split_rifft_float_neon(pSrc, S->fft_len_real, S->p_twiddle_A_real, - S->p_twiddle_B_real, pDst); + S->p_twiddle_B_real, pTemp); /* Complex radix-4 IFFT process */ - ne10_radix4_butterfly_inverse_float_neon(pDst, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle); + ne10_radix4_butterfly_inverse_float_neon(pDst, pTemp, S_CFFT->fft_len, S_CFFT->p_twiddle); } else { /* Complex radix-4 FFT process */ - ne10_radix4_butterfly_float_neon(pDst, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle); + ne10_radix4_butterfly_float_neon(pTemp, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle); /* Real FFT core process */ - ne10_split_rfft_float_neon(pSrc, S->fft_len_real, S->p_twiddle_A_real, + ne10_split_rfft_float_neon(pTemp, S->fft_len_real, S->p_twiddle_A_real, S->p_twiddle_B_real, pDst); }