extern void (*ne10_rfft_float)(
const ne10_rfft_instance_f32_t * S,
ne10_float32_t * pSrc,
- ne10_float32_t * pDst);
+ ne10_float32_t * pDst,
+ ne10_float32_t * pTemp);
/* init functions*/
extern ne10_result_t ne10_cfft_radix4_init_float(ne10_cfft_radix4_instance_f32_t * S,
ne10_uint16_t fftLen,
extern void ne10_rfft_float_c(
const ne10_rfft_instance_f32_t * S,
ne10_float32_t * pSrc,
- ne10_float32_t * pDst);
+ ne10_float32_t * pDst,
+ ne10_float32_t * pTemp);
/* NEON version*/
extern void ne10_rfft_float_neon(
const ne10_rfft_instance_f32_t * S,
ne10_float32_t * pSrc,
- ne10_float32_t * pDst);
+ ne10_float32_t * pDst,
+ ne10_float32_t * pTemp);
#ifdef __cplusplus
}
}
/* if the N is even power of 4, copy the output to dst buffer */
- asm("CLZ %[result], %[input]" : [result] "=r"(SubFFTNum) : [input] "r"(N));
- SubFFTNum = (32-SubFFTNum)>>1;
+ SubFFTNum = 0;
+ set = N;
+ while (set > 1)
+ {
+ set = set>>2;
+ SubFFTNum++;
+ }
if((SubFFTNum&1) == 0)
{
/* if the N is even power of 4, copy the output to dst buffer */
- asm("CLZ %[result], %[input]" : [result] "=r"(SubFFTNum) : [input] "r"(N));
- SubFFTNum = (32-SubFFTNum)>>1;
+ SubFFTNum = 0;
+ set = N;
+ while (set > 1)
+ {
+ set = set>>2;
+ SubFFTNum++;
+ }
if((SubFFTNum&1) == 0)
{
void (*ne10_rfft_float)(
const ne10_rfft_instance_f32_t * S,
ne10_float32_t * pSrc,
- ne10_float32_t * pDst);
+ ne10_float32_t * pDst,
+ ne10_float32_t * pTemp);
void ne10_rfft_float_c(
const ne10_rfft_instance_f32_t * S,
ne10_float32_t * pSrc,
- ne10_float32_t * pDst)
+ ne10_float32_t * pDst,
+ ne10_float32_t * pTemp)
{
const ne10_cfft_radix4_instance_f32_t *S_CFFT = S->p_cfft;
{
/* Real IFFT core process */
ne10_split_rifft_float_c(pSrc, S->fft_len_real, S->p_twiddle_A_real,
- S->p_twiddle_B_real, pDst);
+ S->p_twiddle_B_real, pTemp);
/* Complex radix-4 IFFT process */
- ne10_radix4_butterfly_inverse_float_c(pDst, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle);
+ ne10_radix4_butterfly_inverse_float_c(pDst, pTemp, S_CFFT->fft_len, S_CFFT->p_twiddle);
}
else
{
/* Complex radix-4 FFT process */
- ne10_radix4_butterfly_float_c(pDst, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle);
+ ne10_radix4_butterfly_float_c(pTemp, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle);
/* Real FFT core process */
- ne10_split_rfft_float_c(pSrc, S->fft_len_real, S->p_twiddle_A_real,
+ ne10_split_rfft_float_c(pTemp, S->fft_len_real, S->p_twiddle_A_real,
S->p_twiddle_B_real, pDst);
}
void ne10_rfft_float_neon(
const ne10_rfft_instance_f32_t * S,
ne10_float32_t * pSrc,
- ne10_float32_t * pDst)
+ ne10_float32_t * pDst,
+ ne10_float32_t * pTemp)
{
const ne10_cfft_radix4_instance_f32_t *S_CFFT = S->p_cfft;
{
/* Real IFFT core process */
ne10_split_rifft_float_neon(pSrc, S->fft_len_real, S->p_twiddle_A_real,
- S->p_twiddle_B_real, pDst);
+ S->p_twiddle_B_real, pTemp);
/* Complex radix-4 IFFT process */
- ne10_radix4_butterfly_inverse_float_neon(pDst, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle);
+ ne10_radix4_butterfly_inverse_float_neon(pDst, pTemp, S_CFFT->fft_len, S_CFFT->p_twiddle);
}
else
{
/* Complex radix-4 FFT process */
- ne10_radix4_butterfly_float_neon(pDst, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle);
+ ne10_radix4_butterfly_float_neon(pTemp, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle);
/* Real FFT core process */
- ne10_split_rfft_float_neon(pSrc, S->fft_len_real, S->p_twiddle_A_real,
+ ne10_split_rfft_float_neon(pTemp, S->fft_len_real, S->p_twiddle_A_real,
S->p_twiddle_B_real, pDst);
}