From c3bbc6148cbbc7cb9c0f5e94456bfaac4c77976b Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Fri, 30 May 2014 19:36:23 +0800 Subject: [PATCH] optimize float complex FFT 1. To optimize FFT, the algorithm is changed. Bit reversal is removed and radix 8 is added. 2. After test, the optimized FFT show the best performance, so that the old implementations are removed. The performance result is as follows: toolchain: gcc 4.8 at -O2 omx fft's execute time is the base. The ratio is less, the performance is better. panda board A9: | |16 |32 |64 |128 |256 |512 |1024 |2048 |4096 | |Ne10 |84.27%|89.57%|85.63%|85.79%|87.89%|87.91%|83.51%|97.08%|92.68%| |omx |100% |100% |100% |100% |100% |100% |100% |100% |100% | nexus10 A15: | |16 |32 |64 |128 |256 |512 |1024 |2048 |4096 | |Ne10 |84.88%|98.43%|89.46%|101.0%|99.24%|103.2%|93.80%|105.1%|97.44%| |omx |100% |100% |100% |100% |100% |100% |100% |100% |100% | Change-Id: I363ee1602f08532e566d3a5a4f3d7a99972a1283 --- inc/NE10_dsp.h | 51 - modules/CMakeLists.txt | 6 - modules/dsp/NE10_cfft.c | 718 ------------- modules/dsp/NE10_cfft.neon.s | 738 ------------- modules/dsp/NE10_cfft_init.c | 387 ------- modules/dsp/NE10_fft.h | 41 +- modules/dsp/NE10_fft_float32.c | 940 ++++++++++++---- modules/dsp/NE10_fft_float32.neon.c | 479 +-------- modules/dsp/NE10_fft_float32.neon.s | 1657 +++++++++++------------------ modules/dsp/NE10_init_dsp.c | 24 - modules/dsp/NE10_rfft.c | 292 ----- modules/dsp/NE10_rfft.neon.c | 508 --------- modules/dsp/NE10_rfft_init.c | 1180 -------------------- modules/dsp/test/test_main.c | 16 +- modules/dsp/test/test_suite_cfft.c | 648 ----------- modules/dsp/test/test_suite_fft_float32.c | 20 +- modules/dsp/test/test_suite_fft_int16.c | 12 +- modules/dsp/test/test_suite_fft_int32.c | 24 +- modules/dsp/test/test_suite_rfft.c | 663 ------------ test/CMakeLists.txt | 2 - 20 files changed, 1390 insertions(+), 7016 deletions(-) delete mode 100644 modules/dsp/NE10_cfft.c delete mode 100644 modules/dsp/NE10_cfft.neon.s delete mode 100644 modules/dsp/NE10_cfft_init.c delete mode 100644 modules/dsp/NE10_rfft.c delete mode 100644 modules/dsp/NE10_rfft.neon.c delete mode 100644 modules/dsp/NE10_rfft_init.c delete mode 100644 modules/dsp/test/test_suite_cfft.c delete mode 100644 modules/dsp/test/test_suite_rfft.c diff --git a/inc/NE10_dsp.h b/inc/NE10_dsp.h index dbb2fa0..9360438 100644 --- a/inc/NE10_dsp.h +++ b/inc/NE10_dsp.h @@ -46,22 +46,6 @@ extern "C" { /* fft functions*/ /* function pointers*/ - extern void (*ne10_radix4_butterfly_float) (ne10_float32_t *pDst, - ne10_float32_t *pSrc, - ne10_uint16_t N, - ne10_float32_t *pCoef); - - extern void (*ne10_radix4_butterfly_inverse_float) (ne10_float32_t *pDst, - ne10_float32_t *pSrc, - ne10_uint16_t N, - ne10_float32_t *pCoef, - ne10_float32_t onebyN); - - extern void (*ne10_rfft_float) (const ne10_rfft_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_float32_t * pTemp); - extern void (*ne10_fft_c2c_1d_float32) (ne10_fft_cpx_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_cpx_float32_t *twiddles, @@ -150,22 +134,6 @@ extern "C" { extern ne10_fft_r2c_cfg_int16_t ne10_fft_alloc_r2c_int16 (ne10_int32_t nfft); /* C version*/ - extern void ne10_radix4_butterfly_float_c (ne10_float32_t *pDst, - ne10_float32_t *pSrc, - ne10_uint16_t N, - ne10_float32_t *pCoef); - - extern void ne10_radix4_butterfly_inverse_float_c (ne10_float32_t *pDst, - ne10_float32_t *pSrc, - ne10_uint16_t N, - ne10_float32_t *pCoef, - ne10_float32_t onebyN); - - extern void ne10_rfft_float_c (const ne10_rfft_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_float32_t * pTemp); - extern void ne10_fft_c2c_1d_float32_c (ne10_fft_cpx_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_cpx_float32_t *twiddles, @@ -237,25 +205,6 @@ extern "C" { /* NEON version*/ - /** - * @addtogroup CFFT_CIFFT - * @{ - */ - extern void ne10_radix4_butterfly_float_neon (ne10_float32_t *pDst, - ne10_float32_t *pSrc, - ne10_uint16_t N, - ne10_float32_t *pCoef) - asm ("ne10_radix4_butterfly_float_neon"); - - extern void ne10_radix4_butterfly_inverse_float_neon (ne10_float32_t *pDst, - ne10_float32_t *pSrc, - ne10_uint16_t N, - ne10_float32_t *pCoef, - ne10_float32_t onebyN) - asm ("ne10_radix4_butterfly_inverse_float_neon"); - /** @} */ //end of CFFT_CIFFT group - - extern void ne10_rfft_float_neon (const ne10_rfft_instance_f32_t * S, ne10_float32_t * pSrc, ne10_float32_t * pDst, diff --git a/modules/CMakeLists.txt b/modules/CMakeLists.txt index 972ac56..aecc7de 100644 --- a/modules/CMakeLists.txt +++ b/modules/CMakeLists.txt @@ -173,13 +173,9 @@ if(NE10_ENABLE_DSP) # Add dsp C files. set(NE10_DSP_C_SRCS ${PROJECT_SOURCE_DIR}/common/NE10_mask_table.c - ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_cfft.c - ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_cfft_init.c - ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_rfft.c ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_float32.c ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_int32.c ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_int16.c - ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_rfft_init.c ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fir.c ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fir_init.c ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_iir.c @@ -188,7 +184,6 @@ if(NE10_ENABLE_DSP) # Add dsp intrinsic NEON files. set(NE10_DSP_INTRINSIC_SRCS - ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_rfft.neon.c ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_float32.neon.c ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_int32.neon.c ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_int16.neon.c @@ -201,7 +196,6 @@ if(NE10_ENABLE_DSP) # Add dsp NEON files. set(NE10_DSP_NEON_SRCS - ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_cfft.neon.s ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_float32.neon.s ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_int32.neon.s ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_int16.neon.s diff --git a/modules/dsp/NE10_cfft.c b/modules/dsp/NE10_cfft.c deleted file mode 100644 index 50aecf4..0000000 --- a/modules/dsp/NE10_cfft.c +++ /dev/null @@ -1,718 +0,0 @@ -/* - * Copyright 2012-14 ARM Limited - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of ARM Limited nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * NE10 Library : dsp/NE10_cfft.c - */ - -#include "NE10_types.h" -/** - * @ingroup groupDSPs - */ - -/** - * @defgroup CFFT_CIFFT Complex FFT - * - * \par - * Complex Fast Fourier Transform(CFFT) and Complex Inverse Fast Fourier Transform(CIFFT) is an efficient algorithm to compute Discrete Fourier Transform(DFT) and Inverse Discrete Fourier Transform(IDFT). - * Computational complexity of CFFT reduces drastically when compared to DFT. - * \par - * This set of functions implements CFFT/CIFFT - * for floating-point data types. The functions operate on out-of-place buffer which use different buffer for input and output. - * Complex input is stored in input buffer in an interleaved fashion. - * - * \par - * The functions operate on blocks of input and output data and each call to the function processes - * 2*fftLen samples through the transform. pSrc points to input arrays containing 2*fftLen values. - * \par - * The pDst points to the array of output buffer of size 2*fftLen and inputs and outputs are stored in an interleaved fashion as shown below. - *
 {real[0], imag[0], real[1], imag[1],..} 
- * - * \par Lengths supported by the transform: - * \par - * Internally, the functions utilize a radix-4 decimation in frequency(DIF) algorithm - * and the size of the FFT supported are of the lengths [16, 64, 256, 1024]. - * - * - * \par Algorithm: - * - * Complex Fast Fourier Transform: - * \par - * Input real and imaginary data: - *
- * x(n) = xa + j * ya
- * x(n+N/4 ) = xb + j * yb
- * x(n+N/2 ) = xc + j * yc
- * x(n+3N 4) = xd + j * yd
- * 
- * where N is length of FFT - * \par - * Output real and imaginary data: - *
- * X(4r) = xa'+ j * ya'
- * X(4r+1) = xb'+ j * yb'
- * X(4r+2) = xc'+ j * yc'
- * X(4r+3) = xd'+ j * yd'
- * 
- * \par - * Twiddle factors for radix-4 FFT: - *
- * Wn = co1 + j * (- si1)
- * W2n = co2 + j * (- si2)
- * W3n = co3 + j * (- si3)
- * 
- * - * \par - * \image html CFFT.gif "Radix-4 Decimation-in Frequency Complex Fast Fourier Transform" - * - * \par - * Output from Radix-4 CFFT Results in Digit reversal order. Interchange middle two branches of every butterfly results in Bit reversed output. - * \par - * Butterfly CFFT equations: - *
- * xa' = xa + xb + xc + xd
- * ya' = ya + yb + yc + yd
- * xc' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
- * yc' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
- * xb' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
- * yb' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
- * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
- * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
- * 
- * - * - * Complex Inverse Fast Fourier Transform: - * \par - * CIFFT uses same twiddle factor table as CFFT with modifications in the design equation as shown below. - * - * \par - * Modified Butterfly CIFFT equations: - *
- * xa' = xa + xb + xc + xd
- * ya' = ya + yb + yc + yd
- * xc' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
- * yc' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
- * xb' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
- * yb' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
- * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
- * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
- * 
- * - * \par Instance Structure - * A separate instance structure must be defined for each Instance but the twiddle factors and bit reversal tables can be reused. - * There are separate instance structure declarations for each of the 3 supported data types. - * - * \par Initialization Functions - * There is also an associated initialization function for each data type. - * The initialization function performs the following operations: - * - Sets the values of the internal structure fields. - * - Initializes twiddle factor table and bit reversal table pointers - * \par - * Use of the initialization function is optional. - * However, if the initialization function is used, then the instance structure cannot be placed into a const data section. - * To place an instance structure into a const data section, the instance structure must be manually initialized. - * Manually initialize the instance structure as follows: - *
- *ne10_cfft_radix4_instance_f32_t = {fft_len, ifft_flag, bit_reverse_flag, p_twiddle, p_bit_rev_table, twid_coef_modifier, bit_rev_factor, one_by_fft_len};
- * 
- * \par - * where fftLen length of CFFT/CIFFT; ifft_flag Flag for selection of CFFT or CIFFT(Set ifft_flag to calculate CIFFT otherwise calculates CFFT); - * bit_reverse_flag Flag for selection of output order(Set bitReverseFlag to output in normal order otherwise output in bit reversed order); - * p_twiddlepoints to array of twiddle coefficients; pBitRevTable points to the array of bit reversal table. - * p_bit_rev_table modifier for bit reversal table which supports all FFT lengths with same table. - * twid_coef_modifier modifier for twiddle factor table which supports all FFT lengths with same table; - * one_by_fft_len value of 1/fftLen to calculate CIFFT; - * - */ - - -/** - * @addtogroup CFFT_CIFFT - * @{ - */ - -/** - * @brief Core radix-4 FFT of floating-point data. - * @param[out] *pDst point to the output buffer (out-of-place) - * @param[in] *pSrc point to the input buffer (out-of-place: the pSrc is used for intermedia buffer, so the input buffer is destroyed) - * @param[in] N length of FFT - * @param[in] *pCoef point to the twiddle factors - * @return none. - * The function implements a Radix-4 Complex FFT - * Can support FFT lengths of 16, 64, 256, 1024 - */ - -void ne10_radix4_butterfly_float_c( - ne10_float32_t *pDst, - ne10_float32_t *pSrc, - ne10_uint16_t N, - ne10_float32_t *pCoef) -{ - ne10_int32_t set,grp; - ne10_int32_t setCount,grpCount,grpStep,twidStep; - ne10_float32_t *pTw2,*pTw3,*pTw4,*pT1,*pTmp; - ne10_float32_t *pOut1,*pOut2,*pOut3,*pOut4; - ne10_int32_t SubFFTSize=4,SubFFTNum=N/4; /*Intial Number of Groups and Group Size*/ - ne10_float32_t InpRe1,InpIm1,InpRe2,InpIm2,InpRe3,InpIm3,InpRe4,InpIm4; - ne10_float32_t OutRe1,OutIm1,OutRe2,OutIm2,OutRe3,OutIm3,OutRe4,OutIm4; - ne10_float32_t ReTmp1,ImTmp1,ReTmp2,ImTmp2,ReTmp3,ImTmp3,ReTmp4,ImTmp4; - ne10_float32_t TwRe2,TwIm2,TwRe3,TwIm3,TwRe4,TwIm4; - ne10_float32_t ReTmpT2,ImTmpT2,ReTmpT3,ImTmpT3,ReTmpT4,ImTmpT4; - ne10_int32_t InpStep=(N>>2),OutStep=1; - - /*First stage*/ - grpCount = SubFFTNum; - pT1 = pSrc; - pOut1 = pDst; - - for(grp = 0; grp < grpCount; grp++) - { - InpRe1 = pT1[0]; - InpIm1 = pT1[1]; - InpRe2 = pT1[InpStep<<1]; - InpIm2 = pT1[(InpStep<<1)+1]; - InpRe3 = pT1[(InpStep<<2)]; - InpIm3 = pT1[(InpStep<<2)+1]; - InpRe4 = pT1[3*(InpStep<<1)]; - InpIm4 = pT1[3*(InpStep<<1)+1]; - - //Inp1 + Inp3 - ReTmp1 = InpRe1 + InpRe3; - ImTmp1 = InpIm1 + InpIm3; - //Inp1 - Inp3 - ReTmp2 = InpRe1 - InpRe3; - ImTmp2 = InpIm1 - InpIm3; - - //Inp2 + Inp4 - ReTmp3 = InpRe2 + InpRe4; - ImTmp3 = InpIm2 + InpIm4; - - //Inp2 - Inp4 - ReTmp4 = InpRe2 - InpRe4; - ImTmp4 = InpIm2 - InpIm4; - - /*Radix-4 Butterfly calculation*/ - /*Third Result*/ - OutRe3 = ReTmp1 - ReTmp3; - OutIm3 = ImTmp1 - ImTmp3; - /*First Result*/ - OutRe1 = ReTmp1 + ReTmp3; - OutIm1 = ImTmp1 + ImTmp3; - /*Second result*/ - OutRe2 = ReTmp2 + ImTmp4; - OutIm2 = ImTmp2 - ReTmp4; - /*Fourth Result*/ - OutRe4 = ReTmp2 - ImTmp4; - OutIm4 = ImTmp2 + ReTmp4; - - *pOut1++ = OutRe1; - *pOut1++ = OutIm1; - *pOut1++ = OutRe2; - *pOut1++ = OutIm2; - *pOut1++ = OutRe3; - *pOut1++ = OutIm3; - *pOut1++ = OutRe4; - *pOut1++ = OutIm4; - pT1+=2; - - } - /*Remaining FFT Stages: Second Stage to Last Stage*/ - /* Update the Grp count and size for the next stage */ - SubFFTSize = SubFFTSize<<2; - SubFFTNum = SubFFTNum>>2; - twidStep = 0; - /*Swap Input and Output*/ - pTmp = pDst; - pDst = pSrc; - pSrc= pTmp; - - while(SubFFTNum > 0) - { - grpCount = SubFFTNum; /*Number of Blocks*/ - setCount = SubFFTSize>>2; /* setCount is number of Butterflies */ - grpStep = 0; - OutStep = (OutStep<<2); - pT1 = pSrc; - for(grp = 0; grp < grpCount; grp++) - { - pOut1 = pDst + (grpStep<<1); - pOut2 = pOut1 + (OutStep<<1); - pOut3 = pOut2 + (OutStep<<1); - pOut4 = pOut3 + (OutStep<<1); - - pTw2 = pCoef + twidStep; - pTw3 = pTw2 + (setCount<<1); - pTw4 = pTw3 + (setCount<<1); - for(set = 0; set < setCount; set++) - { - InpRe1 = pT1[0]; - InpIm1 = pT1[1]; - InpRe2 = pT1[InpStep<<1]; - InpIm2 = pT1[(InpStep<<1)+1]; - InpRe3 = pT1[(InpStep<<2)]; - InpIm3 = pT1[(InpStep<<2)+1]; - InpRe4 = pT1[3*(InpStep<<1)]; - InpIm4 = pT1[3*(InpStep<<1)+1]; - - /*Load Twiddles*/ - TwRe2 = *pTw2++; - TwIm2 = *pTw2++; - TwRe3 = *pTw3++; - TwIm3 = *pTw3++; - TwRe4 = *pTw4++; - TwIm4 = *pTw4++; - - - /*Butterfly calculation*/ - //CPLX_MUL (pTmpT2, pTw2, Inp2); - ReTmpT2 = InpRe2*TwRe2 + InpIm2*TwIm2; - ImTmpT2 = InpIm2*TwRe2 - InpRe2*TwIm2; - - //CPLX_MUL (pTmpT3, pTw3, Inp3); - ReTmpT3 = InpRe3*TwRe3 + InpIm3*TwIm3; - ImTmpT3 = InpIm3*TwRe3 - InpRe3*TwIm3; - - - //CPLX_MUL (pTmpT4, pTw4, Inp4); - ReTmpT4 = InpRe4*TwRe4 + InpIm4*TwIm4; - ImTmpT4 = InpIm4*TwRe4 - InpRe4*TwIm4; - //CPLX_ADD (pTmp1, Inp1, pTmpT3); - ReTmp1 = InpRe1 + ReTmpT3; - ImTmp1 = InpIm1 + ImTmpT3; - - //CPLX_SUB (pTmp2, pT1, pTmpT3); - ReTmp2 = InpRe1 - ReTmpT3; - ImTmp2 = InpIm1 - ImTmpT3; - - //CPLX_ADD (pTmp3, pTmpT2, pTmpT4); - ReTmp3 = ReTmpT2 + ReTmpT4; - ImTmp3 = ImTmpT2 + ImTmpT4; - - //CPLX_SUB (pTmp4, pTmpT2, pTmpT4); - ReTmp4 = ReTmpT2 - ReTmpT4; - ImTmp4 = ImTmpT2 - ImTmpT4; - - - - /*Third Result*/ - //CPLX_SUB (pT3, pTmp1, pTmp3); - OutRe3 = ReTmp1 - ReTmp3; - OutIm3 = ImTmp1 - ImTmp3; - /*First Result*/ - //CPLX_ADD (pT1, pTmp1, pTmp3); - - OutRe1 = ReTmp1 + ReTmp3; - OutIm1 = ImTmp1 + ImTmp3; - /*Second result*/ - //CPLX_ADD_SUB_X (pT2, pTmp2, pTmp4); - OutRe2 = ReTmp2 + ImTmp4; - OutIm2 = ImTmp2 - ReTmp4; - /*Fourth Result*/ - //CPLX_SUB_ADD_X (pT4, pTmp2, pTmp4); - OutRe4 = ReTmp2 - ImTmp4; - OutIm4 = ImTmp2 + ReTmp4; - /*Store the Result*/ - *pOut1++ = OutRe1; - *pOut1++ = OutIm1; - *pOut2++ = OutRe2; - *pOut2++ = OutIm2; - *pOut3++ = OutRe3; - *pOut3++ = OutIm3; - *pOut4++ = OutRe4; - *pOut4++ = OutIm4; - - pT1+=2; - } - grpStep = grpStep + SubFFTSize; - } - /* Update the Grp count and size for the next stage */ - twidStep+= (3*(setCount<<1)); - SubFFTSize = SubFFTSize<<2; - SubFFTNum = SubFFTNum>>2; - /*Swap Input and Output*/ - pTmp = pDst; - pDst = pSrc; - pSrc= pTmp; - } - - /* if the N is even power of 4, copy the output to dst buffer */ - SubFFTNum = 0; - set = N; - while (set > 1) - { - set = set>>2; - SubFFTNum++; - } - - if((SubFFTNum&1) == 0) - { - pT1 = pSrc; - pOut1 = pDst; - for(grpCount = 0; grpCount < N; grpCount++) - { - *pOut1++ = *pT1++; - *pOut1++ = *pT1++; - }; - } -} - - -/** - * @brief Core radix-4 IFFT of floating-point data. - * @param[out] *pDst point to the output buffer (out-of-place) - * @param[in] *pSrc point to the input buffer (out-of-place: the pSrc is used for intermedia buffer, so the input buffer is destroyed) - * @param[in] N length of FFT - * @param[in] *pCoef point to the twiddle factors - * @return none. - * The function implements a Radix-4 Complex IFFT - */ - -void ne10_radix4_butterfly_inverse_float_c( - ne10_float32_t *pDst, - ne10_float32_t *pSrc, - ne10_uint16_t N, - ne10_float32_t *pCoef, - ne10_float32_t onebyN) -{ - ne10_int32_t set,grp; - ne10_int32_t setCount,grpCount,grpStep,twidStep; - ne10_float32_t *pTw2,*pTw3,*pTw4,*pT1,*pTmp; - ne10_float32_t *pOut1,*pOut2,*pOut3,*pOut4; - ne10_int32_t SubFFTSize=4,SubFFTNum=N/4; /*Intial Number of Groups and Group Size*/ - ne10_float32_t InpRe1,InpIm1,InpRe2,InpIm2,InpRe3,InpIm3,InpRe4,InpIm4; - ne10_float32_t OutRe1,OutIm1,OutRe2,OutIm2,OutRe3,OutIm3,OutRe4,OutIm4; - ne10_float32_t ReTmp1,ImTmp1,ReTmp2,ImTmp2,ReTmp3,ImTmp3,ReTmp4,ImTmp4; - ne10_float32_t TwRe2,TwIm2,TwRe3,TwIm3,TwRe4,TwIm4; - ne10_float32_t ReTmpT2,ImTmpT2,ReTmpT3,ImTmpT3,ReTmpT4,ImTmpT4; - ne10_int32_t InpStep=(N>>2),OutStep=1; - - /*First stage*/ - grpCount = SubFFTNum; - pT1 = pSrc; - pOut1 = pDst; - - for(grp = 0; grp < grpCount; grp++) - { - InpRe1 = pT1[0]; - InpIm1 = pT1[1]; - InpRe2 = pT1[InpStep<<1]; - InpIm2 = pT1[(InpStep<<1)+1]; - InpRe3 = pT1[(InpStep<<2)]; - InpIm3 = pT1[(InpStep<<2)+1]; - InpRe4 = pT1[3*(InpStep<<1)]; - InpIm4 = pT1[3*(InpStep<<1)+1]; - - //Inp1 + Inp3 - ReTmp1 = InpRe1 + InpRe3; - ImTmp1 = InpIm1 + InpIm3; - //Inp1 - Inp3 - ReTmp2 = InpRe1 - InpRe3; - ImTmp2 = InpIm1 - InpIm3; - - //Inp2 + Inp4 - ReTmp3 = InpRe2 + InpRe4; - ImTmp3 = InpIm2 + InpIm4; - - //Inp2 - Inp4 - ReTmp4 = InpRe2 - InpRe4; - ImTmp4 = InpIm2 - InpIm4; - - /*Radix-4 Butterfly calculation*/ - /*Third Result*/ - OutRe3 = ReTmp1 - ReTmp3; - OutIm3 = ImTmp1 - ImTmp3; - /*First Result*/ - OutRe1 = ReTmp1 + ReTmp3; - OutIm1 = ImTmp1 + ImTmp3; - /*Second result*/ - OutRe2 = ReTmp2 - ImTmp4; - OutIm2 = ImTmp2 + ReTmp4; - /*Fourth Result*/ - OutRe4 = ReTmp2 + ImTmp4; - OutIm4 = ImTmp2 - ReTmp4; - - *pOut1++ = OutRe1; - *pOut1++ = OutIm1; - *pOut1++ = OutRe2; - *pOut1++ = OutIm2; - *pOut1++ = OutRe3; - *pOut1++ = OutIm3; - *pOut1++ = OutRe4; - *pOut1++ = OutIm4; - pT1+=2; - - } - /*Intermediate FFT Stages: Second Stage to Last but one Stage*/ - /* Update the Grp count and size for the next stage */ - SubFFTSize = SubFFTSize<<2; - SubFFTNum = SubFFTNum>>2; - twidStep = 0; - /*Swap Input and Output*/ - pTmp = pDst; - pDst = pSrc; - pSrc= pTmp; - - while(SubFFTNum > 1) - { - grpCount = SubFFTNum; /*Number of Blocks*/ - setCount = SubFFTSize>>2; /* setCount is number of Butterflies */ - grpStep = 0; - OutStep = (OutStep<<2); - pT1 = pSrc; - for(grp = 0; grp < grpCount; grp++) - { - pOut1 = pDst + (grpStep<<1); - pOut2 = pOut1 + (OutStep<<1); - pOut3 = pOut2 + (OutStep<<1); - pOut4 = pOut3 + (OutStep<<1); - - pTw2 = pCoef + twidStep; - pTw3 = pTw2 + (setCount<<1); - pTw4 = pTw3 + (setCount<<1); - for(set = 0; set < setCount; set++) - { - InpRe1 = pT1[0]; - InpIm1 = pT1[1]; - InpRe2 = pT1[InpStep<<1]; - InpIm2 = pT1[(InpStep<<1)+1]; - InpRe3 = pT1[(InpStep<<2)]; - InpIm3 = pT1[(InpStep<<2)+1]; - InpRe4 = pT1[3*(InpStep<<1)]; - InpIm4 = pT1[3*(InpStep<<1)+1]; - - /*Load Twiddles*/ - TwRe2 = *pTw2++; - TwIm2 = *pTw2++; - TwRe3 = *pTw3++; - TwIm3 = *pTw3++; - TwRe4 = *pTw4++; - TwIm4 = *pTw4++; - - - /*Butterfly calculation*/ - //CPLX_MUL (pTmpT2, pTw2, Inp2); - ReTmpT2 = InpRe2*TwRe2 - InpIm2*TwIm2; - ImTmpT2 = InpIm2*TwRe2 + InpRe2*TwIm2; - - //CPLX_MUL (pTmpT3, pTw3, Inp3); - ReTmpT3 = InpRe3*TwRe3 - InpIm3*TwIm3; - ImTmpT3 = InpIm3*TwRe3 + InpRe3*TwIm3; - - //CPLX_MUL (pTmpT4, pTw4, Inp4); - ReTmpT4 = InpRe4*TwRe4 - InpIm4*TwIm4; - ImTmpT4 = InpIm4*TwRe4 + InpRe4*TwIm4; - - //CPLX_ADD (pTmp1, Inp1, pTmpT3); - ReTmp1 = InpRe1 + ReTmpT3; - ImTmp1 = InpIm1 + ImTmpT3; - - //CPLX_SUB (pTmp2, pT1, pTmpT3); - ReTmp2 = InpRe1 - ReTmpT3; - ImTmp2 = InpIm1 - ImTmpT3; - - //CPLX_ADD (pTmp3, pTmpT2, pTmpT4); - ReTmp3 = ReTmpT2 + ReTmpT4; - ImTmp3 = ImTmpT2 + ImTmpT4; - - //CPLX_SUB (pTmp4, pTmpT2, pTmpT4); - ReTmp4 = ReTmpT2 - ReTmpT4; - ImTmp4 = ImTmpT2 - ImTmpT4; - - - /*Third Result*/ - //CPLX_SUB (pT3, pTmp1, pTmp3); - OutRe3 = ReTmp1 - ReTmp3; - OutIm3 = ImTmp1 - ImTmp3; - - /*First Result*/ - //CPLX_ADD (pT1, pTmp1, pTmp3); - OutRe1 = ReTmp1 + ReTmp3; - OutIm1 = ImTmp1 + ImTmp3; - - /*Second result*/ - //CPLX_SUB_ADD_X (pT2, pTmp2, pTmp4); - OutRe2 = ReTmp2 - ImTmp4; - OutIm2 = ImTmp2 + ReTmp4; - - /*Fourth Result*/ - //CPLX_ADD_SUB_X (pT4, pTmp2, pTmp4); - OutRe4 = ReTmp2 + ImTmp4; - OutIm4 = ImTmp2 - ReTmp4; - - /*Store the Result*/ - *pOut1++ = OutRe1; - *pOut1++ = OutIm1; - *pOut2++ = OutRe2; - *pOut2++ = OutIm2; - *pOut3++ = OutRe3; - *pOut3++ = OutIm3; - *pOut4++ = OutRe4; - *pOut4++ = OutIm4; - - pT1+=2; - } - grpStep = grpStep + SubFFTSize; - } - /* Update the Grp count and size for the next stage */ - twidStep+= (3*(setCount<<1)); - SubFFTSize = SubFFTSize<<2; - SubFFTNum = SubFFTNum>>2; - /*Swap Input and Output*/ - pTmp = pDst; - pDst = pSrc; - pSrc= pTmp; - } - - /* last stage */ - setCount = SubFFTSize>>2; /* setCount is number of Butterflies */ - grpStep = 0; - OutStep = (OutStep<<2); - pT1 = pSrc; - - pOut1 = pDst + (grpStep<<1); - pOut2 = pOut1 + (OutStep<<1); - pOut3 = pOut2 + (OutStep<<1); - pOut4 = pOut3 + (OutStep<<1); - - pTw2 = pCoef + twidStep; - pTw3 = pTw2 + (setCount<<1); - pTw4 = pTw3 + (setCount<<1); - - for(set = 0; set < setCount; set++) - { - InpRe1 = pT1[0]; - InpIm1 = pT1[1]; - InpRe2 = pT1[InpStep<<1]; - InpIm2 = pT1[(InpStep<<1)+1]; - InpRe3 = pT1[(InpStep<<2)]; - InpIm3 = pT1[(InpStep<<2)+1]; - InpRe4 = pT1[3*(InpStep<<1)]; - InpIm4 = pT1[3*(InpStep<<1)+1]; - - /*Load Twiddles*/ - TwRe2 = *pTw2++; - TwIm2 = *pTw2++; - TwRe3 = *pTw3++; - TwIm3 = *pTw3++; - TwRe4 = *pTw4++; - TwIm4 = *pTw4++; - - - /*Butterfly calculation*/ - //CPLX_MUL (pTmpT2, pTw2, Inp2); - ReTmpT2 = InpRe2*TwRe2 - InpIm2*TwIm2; - ImTmpT2 = InpIm2*TwRe2 + InpRe2*TwIm2; - - //CPLX_MUL (pTmpT3, pTw3, Inp3); - ReTmpT3 = InpRe3*TwRe3 - InpIm3*TwIm3; - ImTmpT3 = InpIm3*TwRe3 + InpRe3*TwIm3; - - //CPLX_MUL (pTmpT4, pTw4, Inp4); - ReTmpT4 = InpRe4*TwRe4 - InpIm4*TwIm4; - ImTmpT4 = InpIm4*TwRe4 + InpRe4*TwIm4; - - //CPLX_ADD (pTmp1, Inp1, pTmpT3); - ReTmp1 = InpRe1 + ReTmpT3; - ImTmp1 = InpIm1 + ImTmpT3; - - //CPLX_SUB (pTmp2, pT1, pTmpT3); - ReTmp2 = InpRe1 - ReTmpT3; - ImTmp2 = InpIm1 - ImTmpT3; - - //CPLX_ADD (pTmp3, pTmpT2, pTmpT4); - ReTmp3 = ReTmpT2 + ReTmpT4; - ImTmp3 = ImTmpT2 + ImTmpT4; - - //CPLX_SUB (pTmp4, pTmpT2, pTmpT4); - ReTmp4 = ReTmpT2 - ReTmpT4; - ImTmp4 = ImTmpT2 - ImTmpT4; - - - /*Third Result*/ - //CPLX_SUB (pT3, pTmp1, pTmp3); - OutRe3 = ReTmp1 - ReTmp3; - OutIm3 = ImTmp1 - ImTmp3; - - /*First Result*/ - //CPLX_ADD (pT1, pTmp1, pTmp3); - OutRe1 = ReTmp1 + ReTmp3; - OutIm1 = ImTmp1 + ImTmp3; - - /*Second result*/ - //CPLX_SUB_ADD_X (pT2, pTmp2, pTmp4); - OutRe2 = ReTmp2 - ImTmp4; - OutIm2 = ImTmp2 + ReTmp4; - - /*Fourth Result*/ - //CPLX_ADD_SUB_X (pT4, pTmp2, pTmp4); - OutRe4 = ReTmp2 + ImTmp4; - OutIm4 = ImTmp2 - ReTmp4; - - /*Store the Result*/ - *pOut1++ = OutRe1 * onebyN; - *pOut1++ = OutIm1 * onebyN; - *pOut2++ = OutRe2 * onebyN; - *pOut2++ = OutIm2 * onebyN; - *pOut3++ = OutRe3 * onebyN; - *pOut3++ = OutIm3 * onebyN; - *pOut4++ = OutRe4 * onebyN; - *pOut4++ = OutIm4 * onebyN; - - pT1+=2; - } - - /*Swap Input and Output*/ - pTmp = pDst; - pDst = pSrc; - pSrc= pTmp; - - - /* if the N is even power of 4, copy the output to dst buffer */ - SubFFTNum = 0; - set = N; - while (set > 1) - { - set = set>>2; - SubFFTNum++; - } - - if((SubFFTNum&1) == 0) - { - pT1 = pSrc; - pOut1 = pDst; - for(grpCount = 0; grpCount < N; grpCount++) - { - *pOut1++ = *pT1++; - *pOut1++ = *pT1++; - }; - } -} - - -/** - * @} end of CFFT_CIFFT group - */ diff --git a/modules/dsp/NE10_cfft.neon.s b/modules/dsp/NE10_cfft.neon.s deleted file mode 100644 index b542107..0000000 --- a/modules/dsp/NE10_cfft.neon.s +++ /dev/null @@ -1,738 +0,0 @@ -@ -@ Copyright 2012-14 ARM Limited -@ All rights reserved. -@ -@ Redistribution and use in source and binary forms, with or without -@ modification, are permitted provided that the following conditions are met: -@ * Redistributions of source code must retain the above copyright -@ notice, this list of conditions and the following disclaimer. -@ * Redistributions in binary form must reproduce the above copyright -@ notice, this list of conditions and the following disclaimer in the -@ documentation and/or other materials provided with the distribution. -@ * Neither the name of ARM Limited nor the -@ names of its contributors may be used to endorse or promote products -@ derived from this software without specific prior written permission. -@ -@ THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND -@ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -@ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -@ DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY -@ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -@ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -@ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -@ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -@ - -@/* -@ * NE10 Library : dsp/NE10_cfft.neon.s -@ */ - -@/* -@ * Note: -@ * 1. Currently, this is for soft VFP EABI, not for hard vfpv3 ABI yet -@ * 2. In the assembly code, we use D0-D31 registers. So VFPv3-D32 is used. In VFPv3-D16, there will be failure -@ */ - - .text - .syntax unified - - @/* - @ * @brief Core radix-4 FFT of floating-point data. Do not call this function directly. - @ * @param[out] *pDst points to the output buffer - @ * @param[in] *pSrc points to the input buffer - @ * @param[in] N length of FFT - @ * @param[in] *pCoef points to the twiddle factors - @ * @retureq none. - @ * The function implements a Radix-4 Complex FFT - @ */ - - .align 4 - .global ne10_radix4_butterfly_float_neon - .thumb - .thumb_func - -ne10_radix4_butterfly_float_neon: - - PUSH {r4-r12,lr} @push r12: to keep stack 8 bytes aligned - VPUSH {d8-d15} - - qInp1 .qn Q0.F32 - qInp2 .qn Q1.F32 - qInp3 .qn Q2.F32 - qInp4 .qn Q3.F32 - qInp5 .qn Q4.F32 - qInp6 .qn Q5.F32 - qInp7 .qn Q6.F32 - qInp8 .qn Q7.F32 - - qTwd2Re .qn Q8.F32 - qTwd2Im .qn Q9.F32 - qTwd3Re .qn Q10.F32 - qTwd3Im .qn Q11.F32 - qTwd4Re .qn Q12.F32 - qTwd4Im .qn Q13.F32 - - qReTmpT2 .qn Q14.F32 - qImTmpT2 .qn Q15.F32 - - qReTmpT3 .qn Q2.F32 - qImTmpT3 .qn Q3.F32 - - qReTmpT4 .qn Q4.F32 - qImTmpT4 .qn Q5.F32 - - qRe1 .qn Q8.F32 - qIm1 .qn Q9.F32 - qRe2 .qn Q10.F32 - qIm2 .qn Q11.F32 - qRe3 .qn Q12.F32 - qIm3 .qn Q13.F32 - qRe4 .qn Q14.F32 - qIm4 .qn Q15.F32 - - pDst .req R0 - pSrc .req R1 - fftSize .req R2 - pCoef .req R3 - - - SubFFTSize .req R4 - SubFFTNum .req R5 - grpCount .req R6 - twidStep .req R8 - setCount .req R9 - grpStep .req R10 - - pT1 .req R7 - pOut1 .req R11 - pTw2 .req R12 - TwdStep .req R14 - pTmp .req R7 - - LSR SubFFTNum,fftSize,#2 - MOV SubFFTSize,#4 - MOV pT1,pSrc - LSR grpCount,SubFFTNum,#2 - MOV pOut1,pDst - LSL fftSize,#1 - -fftGrpLoop: - VLD2 {qInp1,qInp2},[pT1],fftSize @/*Load Input Values*/ - VLD2 {qInp3,qInp4},[pT1],fftSize - VLD2 {qInp5,qInp6},[pT1],fftSize - VLD2 {qInp7,qInp8},[pT1],fftSize - - @/*pSrc[0] + pSrc[2]*/ - VADD qRe1,qInp1,qInp5 - VADD qIm1,qInp2,qInp6 - @/*pSrc[0] - pSrc[2]*/ - VSUB qRe2,qInp1,qInp5 - VSUB qIm2,qInp2,qInp6 - @/*pSrc[1] + pSrc[3]*/ - VADD qRe3,qInp3,qInp7 - VADD qIm3,qInp4,qInp8 - @/*pSrc[1] - pSrc[3]*/ - VSUB qRe4,qInp3,qInp7 - VSUB qIm4,qInp4,qInp8 - - @/*Radix-4 Butterfly calculation*/ - @/*Third Result*/ - VSUB qInp5,qRe1,qRe3 - VSUB qInp6,qIm1,qIm3 - @/*First Result*/ - VADD qInp1,qRe1,qRe3 - VADD qInp2,qIm1,qIm3 - @/*Second result*/ - VADD qInp3,qRe2,qIm4 - VSUB qInp4,qIm2,qRe4 - @/*Fourth Result*/ - VSUB qInp7,qRe2,qIm4 - VADD qInp8,qIm2,qRe4 - - @/*Get Result in correct order for storing*/ - @/*4Re2,4Re0,3Re2,3Re0 2Re2,2Re0,1Re2,1Re0*/ - VZIP qInp1,qInp5 - @/*4Re3,4Re1,3Re3,3Re1 2Re3,2Re1,1Re3,1Re1*/ - VZIP qInp3,qInp7 - - @/*4Im2,4Im0,3Im2,3Im0 2Im2,2Im0,1Im2,1Im0*/ - VZIP qInp2,qInp6 - @/*4Im3,4Im1,3Im2,3Im1 2Im3,2Im1,1Im3,1Im1*/ - VZIP qInp4,qInp8 - - SUB pT1,pT1,fftSize, LSL #2 - - VST4.F32 {d0,d2,d4,d6},[pOut1]! - VST4.F32 {d1,d3,d5,d7},[pOut1]! - SUBS grpCount,#1 - ADD pT1,pT1,#32 - VST4.F32 {d8,d10,d12,d14},[pOut1]! - VST4.F32 {d9,d11,d13,d15},[pOut1]! - - BGT fftGrpLoop - - @/* Swap Input and Output*/ - MOV pTmp,pDst - MOV pDst,pSrc - MOV pSrc,pTmp - - @/*Remaining FFT Stages: Second Stage to Last Stage*/ - @/* Update the Grp count and size for the next stage */ - LSR SubFFTNum,#2 - LSL SubFFTSize,#2 - -fftStageLoop: - MOV grpCount,SubFFTNum - MOV grpStep,#0 - ADD pT1,pSrc,fftSize - LSL TwdStep,SubFFTSize,#1 - -fftGrpLoop1: - LSR setCount,SubFFTSize,#2 - ADD pOut1,pDst,grpStep,LSL #3 - MOV pTw2,pCoef - - LSL SubFFTSize,#1 - -fftSetLoop: - VLD2 {qTwd2Re,qTwd2Im},[pTw2],TwdStep - VLD2 {qInp3,qInp4},[pT1],fftSize - @/*CPLX_MUL (pTmpT2, pTw2, pT2);*/ - VMUL qReTmpT2,qTwd2Re,qInp3 - VMUL qImTmpT2,qTwd2Re,qInp4 - VLD2 {qTwd3Re,qTwd3Im},[pTw2],TwdStep - VLD2 {qInp5,qInp6},[pT1],fftSize - VMLA qReTmpT2,qTwd2Im,qInp4 - VMLS qImTmpT2,qTwd2Im,qInp3 - - - @/*CPLX_MUL (pTmpT3, pTw3, pT3);*/ - VMUL qReTmpT3,qTwd3Re,qInp5 - VMUL qImTmpT3,qTwd3Re,qInp6 - VLD2 {qTwd4Re,qTwd4Im},[pTw2] - VLD2 {qInp7,qInp8},[pT1],fftSize - VMLA qReTmpT3,qTwd3Im,qInp6 - VMLS qImTmpT3,qTwd3Im,qInp5 - - SUB pT1,pT1,fftSize, LSL #2 - - - @/*CPLX_MUL (pTmpT4, pTw4, pT4);*/ - VMUL qReTmpT4,qTwd4Re,qInp7 - VMUL qImTmpT4,qTwd4Re,qInp8 - VLD2 {qInp1,qInp2},[pT1],fftSize - VMLA qReTmpT4,qTwd4Im,qInp8 - VMLS qImTmpT4,qTwd4Im,qInp7 - - - @/*CPLX_ADD (pTmp1, pT1, pTmpT3);*/ - VADD qRe1,qInp1,qReTmpT3 - VADD qIm1,qInp2,qImTmpT3 - @/*CPLX_SUB (pTmp2, pT1, pTmpT3);*/ - VSUB qRe2,qInp1,qReTmpT3 - VSUB qIm2,qInp2,qImTmpT3 - @/*CPLX_ADD (pTmp3, pTmpT2, pTmpT4);*/ - VADD qRe3,qReTmpT2,qReTmpT4 - VADD qIm3,qImTmpT2,qImTmpT4 - @/*CPLX_SUB (pTmp4, pTmpT2, pTmpT4);*/ - VSUB qRe4,qReTmpT2,qReTmpT4 - VSUB qIm4,qImTmpT2,qImTmpT4 - - @/*CPLX_ADD (pT1, pTmp1, pTmp3);*/ - VADD qInp1,qRe1,qRe3 - VADD qInp2,qIm1,qIm3 - - @/*CPLX_ADD_SUB_X (pT2, pTmp2, pTmp4);*/ - VADD qInp3,qRe2,qIm4 - VSUB qInp4,qIm2,qRe4 - - @/*CPLX_SUB (pT3, pTmp1, pTmp3);*/ - VSUB qInp5,qRe1,qRe3 - VSUB qInp6,qIm1,qIm3 - @/*CPLX_SUB_ADD_X (pT4, pTmp2, pTmp4);*/ - VSUB qInp7,qRe2,qIm4 - VADD qInp8,qIm2,qRe4 - - SUBS setCount,#4 - @/* Store the Result*/ - - VST2 {qInp1,qInp2},[pOut1],SubFFTSize - VST2 {qInp3,qInp4},[pOut1],SubFFTSize - - VST2 {qInp5,qInp6},[pOut1],SubFFTSize - VST2 {qInp7,qInp8},[pOut1],SubFFTSize - - SUB pTw2,pTw2,TwdStep, LSL #1 - SUB pOut1,pOut1,SubFFTSize, LSL #2 - - ADD pT1,pT1,#32 - ADD pTw2,pTw2,#32 - ADD pOut1,pOut1,#32 - - BGT fftSetLoop - LSR SubFFTSize,#1 - SUBS grpCount,grpCount,#1 - ADD grpStep,grpStep,SubFFTSize - - BGT fftGrpLoop1 - @/* Update the Grp count and size for the next stage */ - ADD twidStep,SubFFTSize,SubFFTSize, LSL #1 - LSRS SubFFTNum,SubFFTNum,#2 - - @/* Swap Input and Output*/ - MOV pTmp,pDst - MOV pDst,pSrc - MOV pSrc,pTmp - - ADD pCoef,pCoef,twidStep,LSL #1 - - LSL SubFFTSize,SubFFTSize,#2 - - BGT fftStageLoop - - @/* if the N is even power of 4, copy the output to dst buffer */ - ASR fftSize,fftSize,#1 - CLZ SubFFTNum,fftSize - MOV setCount, #32 - SUB SubFFTNum, setCount, SubFFTNum - ASR SubFFTNum,SubFFTNum,#1 - ANDS SubFFTNum, SubFFTNum, #1 - - BNE fftEnd - - ASR grpCount, fftSize, #4 -fftCopyLoop: - VLD1.F32 {d0,d1,d2,d3},[pSrc]! - VLD1.F32 {d4,d5,d6,d7},[pSrc]! - VLD1.F32 {d8,d9,d10,d11},[pSrc]! - VLD1.F32 {d12,d13,d14,d15},[pSrc]! - - SUBS grpCount,#1 - VST1.F32 {d0,d1,d2,d3},[pDst]! - VST1.F32 {d4,d5,d6,d7},[pDst]! - VST1.F32 {d8,d9,d10,d11},[pDst]! - VST1.F32 {d12,d13,d14,d15},[pDst]! - - BGT fftCopyLoop - -fftEnd: - @/* Retureq From Function*/ - VPOP {d8-d15} - POP {r4-r12,pc} - - @/* - @ * @brief Core radix-4 IFFT of floating-point data. Do not call this function directly. - @ * @param[out] *pDst points to the output buffer - @ * @param[in] *pSrc points to the input buffer - @ * @param[in] N length of FFT - @ * @param[in] *pCoef points to the twiddle factors - @ * @param[in] onebyN reciprocal of FFT length - @ * @retureq none. - @ * The function implements a Radix-4 Complex FFT - @ */ - - .align 4 - .global ne10_radix4_butterfly_inverse_float_neon - .thumb - .thumb_func - -ne10_radix4_butterfly_inverse_float_neon: - - PUSH {r4-r12,lr} @push r12: to keep stack 8 bytes aligned - VPUSH {d8-d15} -#if defined (NE10_ENABLE_HF) - VPUSH {s0,s1} -#endif - qInp1 .qn Q0.F32 - qInp2 .qn Q1.F32 - qInp3 .qn Q2.F32 - qInp4 .qn Q3.F32 - qInp5 .qn Q4.F32 - qInp6 .qn Q5.F32 - qInp7 .qn Q6.F32 - qInp8 .qn Q7.F32 - - qTwd2Re .qn Q8.F32 - qTwd2Im .qn Q9.F32 - qTwd3Re .qn Q10.F32 - qTwd3Im .qn Q11.F32 - qTwd4Re .qn Q12.F32 - qTwd4Im .qn Q13.F32 - - qReTmpT2 .qn Q14.F32 - qImTmpT2 .qn Q15.F32 - - qReTmpT3 .qn Q2.F32 - qImTmpT3 .qn Q3.F32 - - qReTmpT4 .qn Q4.F32 - qImTmpT4 .qn Q5.F32 - - qRe1 .qn Q8.F32 - qIm1 .qn Q9.F32 - qRe2 .qn Q10.F32 - qIm2 .qn Q11.F32 - qRe3 .qn Q12.F32 - qIm3 .qn Q13.F32 - qRe4 .qn Q14.F32 - qIm4 .qn Q15.F32 - - pDst .req R0 - pSrc .req R1 - fftSize .req R2 - pCoef .req R3 - - - SubFFTSize .req R4 - SubFFTNum .req R5 - grpCount .req R6 - twidStep .req R8 - setCount .req R9 - grpStep .req R10 - - pT1 .req R7 - pOut1 .req R11 - pTw2 .req R12 - TwdStep .req R14 - pTmp .req R7 - - LSR SubFFTNum,fftSize,#2 - MOV SubFFTSize,#4 - MOV pT1,pSrc - LSR grpCount,SubFFTNum,#2 - MOV pOut1,pDst - LSL fftSize,#1 - -ifftGrpLoop: - VLD2 {qInp1,qInp2},[pT1],fftSize @/*Load Input Values*/ - VLD2 {qInp3,qInp4},[pT1],fftSize - VLD2 {qInp5,qInp6},[pT1],fftSize - VLD2 {qInp7,qInp8},[pT1],fftSize - - @/*pSrc[0] + pSrc[2]*/ - VADD qRe1,qInp1,qInp5 - VADD qIm1,qInp2,qInp6 - @/*pSrc[0] - pSrc[2]*/ - VSUB qRe2,qInp1,qInp5 - VSUB qIm2,qInp2,qInp6 - @/*pSrc[1] + pSrc[3]*/ - VADD qRe3,qInp3,qInp7 - VADD qIm3,qInp4,qInp8 - @/*pSrc[1] - pSrc[3]*/ - VSUB qRe4,qInp3,qInp7 - VSUB qIm4,qInp4,qInp8 - - @/*Radix-4 Butterfly calculation*/ - @/*Third Result*/ - VSUB qInp5,qRe1,qRe3 - VSUB qInp6,qIm1,qIm3 - @/*First Result*/ - VADD qInp1,qRe1,qRe3 - VADD qInp2,qIm1,qIm3 - @/*Second result*/ - VSUB qInp3,qRe2,qIm4 - VADD qInp4,qIm2,qRe4 - @/*Fourth Result*/ - VADD qInp7,qRe2,qIm4 - VSUB qInp8,qIm2,qRe4 - - @/*Get Result in correct order for storing*/ - @/*4Re2,4Re0,3Re2,3Re0 2Re2,2Re0,1Re2,1Re0*/ - VZIP qInp1,qInp5 - @/*4Re3,4Re1,3Re3,3Re1 2Re3,2Re1,1Re3,1Re1*/ - VZIP qInp3,qInp7 - - @/*4Im2,4Im0,3Im2,3Im0 2Im2,2Im0,1Im2,1Im0*/ - VZIP qInp2,qInp6 - @/*4Im3,4Im1,3Im2,3Im1 2Im3,2Im1,1Im3,1Im1*/ - VZIP qInp4,qInp8 - - - SUB pT1,pT1,fftSize, LSL #2 - - - VST4.F32 {d0,d2,d4,d6},[pOut1]! - VST4.F32 {d1,d3,d5,d7},[pOut1]! - SUBS grpCount,#1 - ADD pT1,pT1,#32 - VST4.F32 {d8,d10,d12,d14},[pOut1]! - VST4.F32 {d9,d11,d13,d15},[pOut1]! - - - BGT ifftGrpLoop - - @/* Swap Input and Output*/ - MOV pTmp,pDst - MOV pDst,pSrc - MOV pSrc,pTmp - - @/*Intermediate FFT Stages: Second Stage to Last but one Stage*/ - @/* Update the Grp count and size for the next stage */ - - LSR SubFFTNum,#2 - LSL SubFFTSize,#2 - SUBS pTmp, SubFFTNum, #1 - BEQ ifftLastStageLoop - -ifftStageLoop: - MOV grpCount,SubFFTNum - MOV grpStep,#0 - ADD pT1,pSrc,fftSize - LSL TwdStep,SubFFTSize,#1 - -ifftGrpLoop1: - LSR setCount,SubFFTSize,#2 - ADD pOut1,pDst,grpStep,LSL #3 - MOV pTw2,pCoef - - LSL SubFFTSize,#1 - -ifftSetLoop: - VLD2 {qTwd2Re,qTwd2Im},[pTw2],TwdStep - VLD2 {qInp3,qInp4},[pT1],fftSize - @/*CPLX_MUL (pTmpT2, pTw2, pT2);*/ - VMUL qReTmpT2,qTwd2Re,qInp3 - VMUL qImTmpT2,qTwd2Re,qInp4 - VLD2 {qTwd3Re,qTwd3Im},[pTw2],TwdStep - VLD2 {qInp5,qInp6},[pT1],fftSize - VMLS qReTmpT2,qTwd2Im,qInp4 - VMLA qImTmpT2,qTwd2Im,qInp3 - - - @/*CPLX_MUL (pTmpT3, pTw3, pT3);*/ - VMUL qReTmpT3,qTwd3Re,qInp5 - VMUL qImTmpT3,qTwd3Re,qInp6 - VLD2 {qTwd4Re,qTwd4Im},[pTw2] - VLD2 {qInp7,qInp8},[pT1],fftSize - VMLS qReTmpT3,qTwd3Im,qInp6 - VMLA qImTmpT3,qTwd3Im,qInp5 - - SUB pT1,pT1,fftSize, LSL #2 - - - @/*CPLX_MUL (pTmpT4, pTw4, pT4);*/ - VMUL qReTmpT4,qTwd4Re,qInp7 - VMUL qImTmpT4,qTwd4Re,qInp8 - VLD2 {qInp1,qInp2},[pT1],fftSize - VMLS qReTmpT4,qTwd4Im,qInp8 - VMLA qImTmpT4,qTwd4Im,qInp7 - - - @/*CPLX_ADD (pTmp1, pT1, pTmpT3);*/ - VADD qRe1,qInp1,qReTmpT3 - VADD qIm1,qInp2,qImTmpT3 - @/*CPLX_SUB (pTmp2, pT1, pTmpT3);*/ - VSUB qRe2,qInp1,qReTmpT3 - VSUB qIm2,qInp2,qImTmpT3 - @/*CPLX_ADD (pTmp3, pTmpT2, pTmpT4);*/ - VADD qRe3,qReTmpT2,qReTmpT4 - VADD qIm3,qImTmpT2,qImTmpT4 - @/*CPLX_SUB (pTmp4, pTmpT2, pTmpT4);*/ - VSUB qRe4,qReTmpT2,qReTmpT4 - VSUB qIm4,qImTmpT2,qImTmpT4 - - @/*CPLX_ADD (pT1, pTmp1, pTmp3);*/ - VADD qInp1,qRe1,qRe3 - VADD qInp2,qIm1,qIm3 - - @/*CPLX_SUB_ADD_X (pT2, pTmp2, pTmp4);*/ - VSUB qInp3,qRe2,qIm4 - VADD qInp4,qIm2,qRe4 - - @/*CPLX_SUB (pT3, pTmp1, pTmp3);*/ - VSUB qInp5,qRe1,qRe3 - VSUB qInp6,qIm1,qIm3 - @/*CPLX_ADD_SUB_X (pT4, pTmp2, pTmp4);*/ - VADD qInp7,qRe2,qIm4 - VSUB qInp8,qIm2,qRe4 - - SUBS setCount,#4 - @/* Store the Result*/ - - VST2 {qInp1,qInp2},[pOut1],SubFFTSize - VST2 {qInp3,qInp4},[pOut1],SubFFTSize - - VST2 {qInp5,qInp6},[pOut1],SubFFTSize - VST2 {qInp7,qInp8},[pOut1],SubFFTSize - - SUB pTw2,pTw2,TwdStep, LSL #1 - SUB pOut1,pOut1,SubFFTSize, LSL #2 - - ADD pT1,pT1,#32 - ADD pTw2,pTw2,#32 - ADD pOut1,pOut1,#32 - - - BGT ifftSetLoop - LSR SubFFTSize,#1 - SUBS grpCount,grpCount,#1 - ADD grpStep,grpStep,SubFFTSize - - BGT ifftGrpLoop1 - @/* Update the Grp count and size for the next stage */ - ADD twidStep,SubFFTSize,SubFFTSize, LSL #1 - LSR SubFFTNum,SubFFTNum,#2 - SUBS pTmp, SubFFTNum, #1 - - @/* Swap Input and Output*/ - MOV pTmp,pDst - MOV pDst,pSrc - MOV pSrc,pTmp - - ADD pCoef,pCoef,twidStep,LSL #1 - - LSL SubFFTSize,SubFFTSize,#2 - - BGT ifftStageLoop - - @/* last stage */ -ifftLastStageLoop: - MOV grpStep,#0 - ADD pT1,pSrc,fftSize - LSL TwdStep,SubFFTSize,#1 - -@ifftLastStageGrpLoop1: - LSR setCount,SubFFTSize,#2 - ADD pOut1,pDst,grpStep,LSL #3 - MOV pTw2,pCoef - - LSL SubFFTSize,#1 - -ifftLastStageSetLoop: - VLD2 {qTwd2Re,qTwd2Im},[pTw2],TwdStep - VLD2 {qInp3,qInp4},[pT1],fftSize - @/*CPLX_MUL (pTmpT2, pTw2, pT2);*/ - VMUL qReTmpT2,qTwd2Re,qInp3 - VMUL qImTmpT2,qTwd2Re,qInp4 - VLD2 {qTwd3Re,qTwd3Im},[pTw2],TwdStep - VLD2 {qInp5,qInp6},[pT1],fftSize - VMLS qReTmpT2,qTwd2Im,qInp4 - VMLA qImTmpT2,qTwd2Im,qInp3 - - - @/*CPLX_MUL (pTmpT3, pTw3, pT3);*/ - VMUL qReTmpT3,qTwd3Re,qInp5 - VMUL qImTmpT3,qTwd3Re,qInp6 - VLD2 {qTwd4Re,qTwd4Im},[pTw2] - VLD2 {qInp7,qInp8},[pT1],fftSize - VMLS qReTmpT3,qTwd3Im,qInp6 - VMLA qImTmpT3,qTwd3Im,qInp5 - - SUB pT1,pT1,fftSize, LSL #2 - - - @/*CPLX_MUL (pTmpT4, pTw4, pT4);*/ - VMUL qReTmpT4,qTwd4Re,qInp7 - VMUL qImTmpT4,qTwd4Re,qInp8 - VLD2 {qInp1,qInp2},[pT1],fftSize - VMLS qReTmpT4,qTwd4Im,qInp8 - VMLA qImTmpT4,qTwd4Im,qInp7 - - - @/*CPLX_ADD (pTmp1, pT1, pTmpT3);*/ - VADD qRe1,qInp1,qReTmpT3 - VADD qIm1,qInp2,qImTmpT3 - @/*CPLX_SUB (pTmp2, pT1, pTmpT3);*/ - VSUB qRe2,qInp1,qReTmpT3 - VSUB qIm2,qInp2,qImTmpT3 - @/*CPLX_ADD (pTmp3, pTmpT2, pTmpT4);*/ - VADD qRe3,qReTmpT2,qReTmpT4 - VADD qIm3,qImTmpT2,qImTmpT4 - @/*CPLX_SUB (pTmp4, pTmpT2, pTmpT4);*/ - VSUB qRe4,qReTmpT2,qReTmpT4 - VSUB qIm4,qImTmpT2,qImTmpT4 - - @/*CPLX_ADD (pT1, pTmp1, pTmp3);*/ - VADD qInp1,qRe1,qRe3 - VADD qInp2,qIm1,qIm3 - - @/*CPLX_SUB_ADD_X (pT2, pTmp2, pTmp4);*/ - VSUB qInp3,qRe2,qIm4 - VADD qInp4,qIm2,qRe4 - - @/*CPLX_SUB (pT3, pTmp1, pTmp3);*/ - VSUB qInp5,qRe1,qRe3 - VSUB qInp6,qIm1,qIm3 - @/*CPLX_ADD_SUB_X (pT4, pTmp2, pTmp4);*/ - VADD qInp7,qRe2,qIm4 - VSUB qInp8,qIm2,qRe4 - - @/* multiply onebyN */ -#if defined (NE10_ENABLE_HF) - LDR grpCount,[sp,#0] @revert the original value -#else - LDR grpCount,[sp,#104] @revert the original value -#endif - VDUP.f32 q8,grpCount - - VMUL qInp1,qInp1,qRe1 - VMUL qInp2,qInp2,qRe1 - VMUL qInp3,qInp3,qRe1 - VMUL qInp4,qInp4,qRe1 - VMUL qInp5,qInp5,qRe1 - VMUL qInp6,qInp6,qRe1 - VMUL qInp7,qInp7,qRe1 - VMUL qInp8,qInp8,qRe1 - - SUBS setCount,#4 - @/* Store the Result*/ - - VST2 {qInp1,qInp2},[pOut1],SubFFTSize - VST2 {qInp3,qInp4},[pOut1],SubFFTSize - - VST2 {qInp5,qInp6},[pOut1],SubFFTSize - VST2 {qInp7,qInp8},[pOut1],SubFFTSize - - SUB pTw2,pTw2,TwdStep, LSL #1 - SUB pOut1,pOut1,SubFFTSize, LSL #2 - - ADD pT1,pT1,#32 - ADD pTw2,pTw2,#32 - ADD pOut1,pOut1,#32 - - BGT ifftLastStageSetLoop - - @/* Swap Input and Output*/ - MOV pTmp,pDst - MOV pDst,pSrc - MOV pSrc,pTmp - - @/* if the N is even power of 4, copy the output to dst buffer */ - ASR fftSize,fftSize,#1 - CLZ SubFFTNum,fftSize - MOV setCount, #32 - SUB SubFFTNum, setCount, SubFFTNum - ASR SubFFTNum,SubFFTNum,#1 - ANDS SubFFTNum, SubFFTNum, #1 - - BNE ifftEnd - - ASR grpCount, fftSize, #4 - -ifftCopyLoop: - VLD1.F32 {d0,d1,d2,d3},[pSrc]! - VLD1.F32 {d4,d5,d6,d7},[pSrc]! - VLD1.F32 {d8,d9,d10,d11},[pSrc]! - VLD1.F32 {d12,d13,d14,d15},[pSrc]! - - SUBS grpCount,#1 - VST1.F32 {d0,d1,d2,d3},[pDst]! - VST1.F32 {d4,d5,d6,d7},[pDst]! - VST1.F32 {d8,d9,d10,d11},[pDst]! - VST1.F32 {d12,d13,d14,d15},[pDst]! - - BGT ifftCopyLoop - -ifftEnd: - @/* Retureq From Function*/ -#if defined (NE10_ENABLE_HF) - VPOP {s0,s1} -#endif - VPOP {d8-d15} - POP {r4-r12,pc} - - - .end - diff --git a/modules/dsp/NE10_cfft_init.c b/modules/dsp/NE10_cfft_init.c deleted file mode 100644 index 350d791..0000000 --- a/modules/dsp/NE10_cfft_init.c +++ /dev/null @@ -1,387 +0,0 @@ -/* - * Copyright 2012-14 ARM Limited - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of ARM Limited nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "NE10_types.h" - -/* -* @brief Twiddle factors Table -*/ -/** Pseudo code for Twiddle factor Tables Generation: - -for i=1 to N - cfft_twiddle_coef(2*i) = cos((i-1) * 2*PI/(float)N)) - cfft_twiddle_coef(2*i + 1) = sin((i-1) * 2*PI/(float)N)) -end - -where N = 1024 and PI = 3.14159265358979 - -N is the maximum FFT Size supported and -Cos and Sin values are interleaved fashion -*/ - -/*Twiddles below are generated for each FFT-DIT stage seperately*/ - -static ne10_float32_t cfft_twiddle_coef[2040]={ -1.000000,0.000000,0.923880,0.382683,0.707107,0.707107,0.382683,0.923880, -1.000000,0.000000,0.707107,0.707107,0.000000,1.000000,-0.707107,0.707107, -1.000000,0.000000,0.382683,0.923880,-0.707107,0.707107,-0.923880,-0.382683, - -1.000000,0.000000,0.995185,0.098017,0.980785,0.195090,0.956940,0.290285,0.923880 -,0.382683,0.881921,0.471397,0.831470,0.555570,0.773010,0.634393,0.707107, -0.707107,0.634393,0.773010,0.555570,0.831470,0.471397,0.881921,0.382683,0.923880, -0.290285,0.956940,0.195090,0.980785,0.098017,0.995185, - -1.000000,0.000000,0.980785,0.195090,0.923880,0.382683,0.831470,0.555570,0.707107 -,0.707107,0.555570,0.831470,0.382683,0.923880,0.195090,0.980785,0.000000, -1.000000,-0.195090,0.980785,-0.382683,0.923880,-0.555570,0.831470,-0.707107,0.707107, --0.831470,0.555570,-0.923880,0.382683,-0.980785,0.195090, - -1.000000,0.000000,0.956940,0.290285,0.831470,0.555570,0.634393,0.773010,0.382683 -,0.923880,0.098017,0.995185,-0.195090,0.980785,-0.471397,0.881921,-0.707107, -0.707107,-0.881921,0.471397,-0.980785,0.195090,-0.995185,-0.098017,-0.923880, --0.382683,-0.773010,-0.634393,-0.555570,-0.831470,-0.290285,-0.956940, - -1.000000,0.000000,0.999699,0.024541,0.998795,0.049068,0.997290,0.073565,0.995185 -,0.098017,0.992480,0.122411,0.989177,0.146730,0.985278,0.170962,0.980785, -0.195090,0.975702,0.219101,0.970031,0.242980,0.963776,0.266713,0.956940,0.290285, -0.949528,0.313682,0.941544,0.336890,0.932993,0.359895,0.923880,0.382683,0.914210, -0.405241,0.903989,0.427555,0.893224,0.449611,0.881921,0.471397,0.870087,0.492898, -0.857729,0.514103,0.844854,0.534998,0.831470,0.555570,0.817585,0.575808,0.803208, -0.595699,0.788346,0.615232,0.773010,0.634393,0.757209,0.653173,0.740951,0.671559, -0.724247,0.689541,0.707107,0.707107,0.689541,0.724247,0.671559,0.740951,0.653173, -0.757209,0.634393,0.773010,0.615232,0.788346,0.595699,0.803208,0.575808,0.817585, -0.555570,0.831470,0.534998,0.844854,0.514103,0.857729,0.492898,0.870087,0.471397 -,0.881921,0.449611,0.893224,0.427555,0.903989,0.405241,0.914210,0.382683, -0.923880,0.359895,0.932993,0.336890,0.941544,0.313682,0.949528,0.290285,0.956940, -0.266713,0.963776,0.242980,0.970031,0.219101,0.975702,0.195090,0.980785,0.170962, -0.985278,0.146730,0.989177,0.122411,0.992480,0.098017,0.995185,0.073565,0.997290, -0.049068,0.998795,0.024541,0.999699, - -1.000000,0.000000,0.998795,0.049068,0.995185,0.098017,0.989177,0.146730,0.980785 -,0.195090,0.970031,0.242980,0.956940,0.290285,0.941544,0.336890,0.923880, -0.382683,0.903989,0.427555,0.881921,0.471397,0.857729,0.514103,0.831470,0.555570, -0.803208,0.595699,0.773010,0.634393,0.740951,0.671559,0.707107,0.707107,0.671559, -0.740951,0.634393,0.773010,0.595699,0.803208,0.555570,0.831470,0.514103,0.857729, -0.471397,0.881921,0.427555,0.903989,0.382683,0.923880,0.336890,0.941544,0.290285, -0.956940,0.242980,0.970031,0.195090,0.980785,0.146730,0.989177,0.098017,0.995185, -0.049068,0.998795,0.000000,1.000000,-0.049068,0.998795,-0.098017,0.995185, --0.146730,0.989177,-0.195090,0.980785,-0.242980,0.970031,-0.290285,0.956940,-0.336890, -0.941544,-0.382683,0.923880,-0.427555,0.903989,-0.471397,0.881921,-0.514103, -0.857729,-0.555570,0.831470,-0.595699,0.803208,-0.634393,0.773010,-0.671559,0.740951, --0.707107,0.707107,-0.740951,0.671559,-0.773010,0.634393,-0.803208,0.595699, --0.831470,0.555570,-0.857729,0.514103,-0.881921,0.471397,-0.903989,0.427555, --0.923880,0.382683,-0.941544,0.336890,-0.956940,0.290285,-0.970031,0.242980,-0.980785, -0.195090,-0.989177,0.146730,-0.995185,0.098017,-0.998795,0.049068, - -1.000000,0.000000,0.997290,0.073565,0.989177,0.146730,0.975702,0.219101,0.956940 -,0.290285,0.932993,0.359895,0.903989,0.427555,0.870087,0.492898,0.831470, -0.555570,0.788346,0.615232,0.740951,0.671559,0.689541,0.724247,0.634393,0.773010, -0.575808,0.817585,0.514103,0.857729,0.449611,0.893224,0.382683,0.923880,0.313682, -0.949528,0.242980,0.970031,0.170962,0.985278,0.098017,0.995185,0.024541,0.999699, --0.049068,0.998795,-0.122411,0.992480,-0.195090,0.980785,-0.266713,0.963776, --0.336890,0.941544,-0.405241,0.914210,-0.471397,0.881921,-0.534998,0.844854,-0.595699, -0.803208,-0.653173,0.757209,-0.707107,0.707107,-0.757209,0.653173,-0.803208, -0.595699,-0.844854,0.534998,-0.881921,0.471397,-0.914210,0.405241,-0.941544,0.336890, --0.963776,0.266713,-0.980785,0.195090,-0.992480,0.122411,-0.998795,0.049068, --0.999699,-0.024541,-0.995185,-0.098017,-0.985278,-0.170962,-0.970031,-0.242980, --0.949528,-0.313682,-0.923880,-0.382683,-0.893224,-0.449611,-0.857729,-0.514103, --0.817585,-0.575808,-0.773010,-0.634393,-0.724247,-0.689541,-0.671559,-0.740951, --0.615232,-0.788346,-0.555570,-0.831470,-0.492898,-0.870087,-0.427555,-0.903989, --0.359895,-0.932993,-0.290285,-0.956940,-0.219101,-0.975702,-0.146730,-0.989177, --0.073565,-0.997290, - - -1.000000,0.000000,0.999981,0.006136,0.999925,0.012272,0.999831,0.018407,0.999699 -,0.024541,0.999529,0.030675,0.999322,0.036807,0.999078,0.042938,0.998795, -0.049068,0.998476,0.055195,0.998118,0.061321,0.997723,0.067444,0.997290,0.073565, -0.996820,0.079682,0.996313,0.085797,0.995767,0.091909,0.995185,0.098017,0.994565, -0.104122,0.993907,0.110222,0.993212,0.116319,0.992480,0.122411,0.991710,0.128498, -0.990903,0.134581,0.990058,0.140658,0.989177,0.146730,0.988258,0.152797,0.987301, -0.158858,0.986308,0.164913,0.985278,0.170962,0.984210,0.177004,0.983105,0.183040, -0.981964,0.189069,0.980785,0.195090,0.979570,0.201105,0.978317,0.207111,0.977028, -0.213110,0.975702,0.219101,0.974339,0.225084,0.972940,0.231058,0.971504,0.237024, -0.970031,0.242980,0.968522,0.248928,0.966976,0.254866,0.965394,0.260794,0.963776 -,0.266713,0.962121,0.272621,0.960431,0.278520,0.958703,0.284408,0.956940, -0.290285,0.955141,0.296151,0.953306,0.302006,0.951435,0.307850,0.949528,0.313682, -0.947586,0.319502,0.945607,0.325310,0.943593,0.331106,0.941544,0.336890,0.939459, -0.342661,0.937339,0.348419,0.935184,0.354164,0.932993,0.359895,0.930767,0.365613, -0.928506,0.371317,0.926210,0.377007,0.923880,0.382683,0.921514,0.388345,0.919114, -0.393992,0.916679,0.399624,0.914210,0.405241,0.911706,0.410843,0.909168,0.416430, -0.906596,0.422000,0.903989,0.427555,0.901349,0.433094,0.898674,0.438616,0.895966, -0.444122,0.893224,0.449611,0.890449,0.455084,0.887640,0.460539,0.884797,0.465977, -0.881921,0.471397,0.879012,0.476799,0.876070,0.482184,0.873095,0.487550,0.870087 -,0.492898,0.867046,0.498228,0.863973,0.503538,0.860867,0.508830,0.857729, -0.514103,0.854558,0.519356,0.851355,0.524590,0.848120,0.529804,0.844854,0.534998, -0.841555,0.540171,0.838225,0.545325,0.834863,0.550458,0.831470,0.555570,0.828045, -0.560662,0.824589,0.565732,0.821102,0.570781,0.817585,0.575808,0.814036,0.580814, -0.810457,0.585798,0.806848,0.590760,0.803208,0.595699,0.799537,0.600616,0.795837, -0.605511,0.792107,0.610383,0.788346,0.615232,0.784557,0.620057,0.780737,0.624860, -0.776888,0.629638,0.773010,0.634393,0.769103,0.639124,0.765167,0.643832,0.761202, -0.648514,0.757209,0.653173,0.753187,0.657807,0.749136,0.662416,0.745058,0.667000, -0.740951,0.671559,0.736817,0.676093,0.732654,0.680601,0.728464,0.685084,0.724247 -,0.689541,0.720003,0.693971,0.715731,0.698376,0.711432,0.702755,0.707107, -0.707107,0.702755,0.711432,0.698376,0.715731,0.693971,0.720003,0.689541,0.724247, -0.685084,0.728464,0.680601,0.732654,0.676093,0.736817,0.671559,0.740951,0.667000, -0.745058,0.662416,0.749136,0.657807,0.753187,0.653173,0.757209,0.648514,0.761202, -0.643832,0.765167,0.639124,0.769103,0.634393,0.773010,0.629638,0.776888,0.624860, -0.780737,0.620057,0.784557,0.615232,0.788346,0.610383,0.792107,0.605511,0.795837, -0.600616,0.799537,0.595699,0.803208,0.590760,0.806848,0.585798,0.810457,0.580814, -0.814036,0.575808,0.817585,0.570781,0.821102,0.565732,0.824589,0.560662,0.828045, -0.555570,0.831470,0.550458,0.834863,0.545325,0.838225,0.540171,0.841555,0.534998 -,0.844854,0.529804,0.848120,0.524590,0.851355,0.519356,0.854558,0.514103, -0.857729,0.508830,0.860867,0.503538,0.863973,0.498228,0.867046,0.492898,0.870087, -0.487550,0.873095,0.482184,0.876070,0.476799,0.879012,0.471397,0.881921,0.465977, -0.884797,0.460539,0.887640,0.455084,0.890449,0.449611,0.893224,0.444122,0.895966, -0.438616,0.898674,0.433094,0.901349,0.427555,0.903989,0.422000,0.906596,0.416430, -0.909168,0.410843,0.911706,0.405241,0.914210,0.399624,0.916679,0.393992,0.919114, -0.388345,0.921514,0.382683,0.923880,0.377007,0.926210,0.371317,0.928506,0.365613, -0.930767,0.359895,0.932993,0.354164,0.935184,0.348419,0.937339,0.342661,0.939459, -0.336890,0.941544,0.331106,0.943593,0.325310,0.945607,0.319502,0.947586,0.313682 -,0.949528,0.307850,0.951435,0.302006,0.953306,0.296151,0.955141,0.290285, -0.956940,0.284408,0.958703,0.278520,0.960431,0.272621,0.962121,0.266713,0.963776, -0.260794,0.965394,0.254866,0.966976,0.248928,0.968522,0.242980,0.970031,0.237024, -0.971504,0.231058,0.972940,0.225084,0.974339,0.219101,0.975702,0.213110,0.977028, -0.207111,0.978317,0.201105,0.979570,0.195090,0.980785,0.189069,0.981964,0.183040, -0.983105,0.177004,0.984210,0.170962,0.985278,0.164913,0.986308,0.158858,0.987301, -0.152797,0.988258,0.146730,0.989177,0.140658,0.990058,0.134581,0.990903,0.128498, -0.991710,0.122411,0.992480,0.116319,0.993212,0.110222,0.993907,0.104122,0.994565, -0.098017,0.995185,0.091909,0.995767,0.085797,0.996313,0.079682,0.996820,0.073565 -,0.997290,0.067444,0.997723,0.061321,0.998118,0.055195,0.998476,0.049068, -0.998795,0.042938,0.999078,0.036807,0.999322,0.030675,0.999529,0.024541,0.999699, -0.018407,0.999831,0.012272,0.999925,0.006136,0.999981, - - -1.000000,0.000000,0.999925,0.012272,0.999699,0.024541,0.999322,0.036807,0.998795 -,0.049068,0.998118,0.061321,0.997290,0.073565,0.996313,0.085797,0.995185, -0.098017,0.993907,0.110222,0.992480,0.122411,0.990903,0.134581,0.989177,0.146730, -0.987301,0.158858,0.985278,0.170962,0.983105,0.183040,0.980785,0.195090,0.978317, -0.207111,0.975702,0.219101,0.972940,0.231058,0.970031,0.242980,0.966976,0.254866, -0.963776,0.266713,0.960431,0.278520,0.956940,0.290285,0.953306,0.302006,0.949528, -0.313682,0.945607,0.325310,0.941544,0.336890,0.937339,0.348419,0.932993,0.359895, -0.928506,0.371317,0.923880,0.382683,0.919114,0.393992,0.914210,0.405241,0.909168, -0.416430,0.903989,0.427555,0.898674,0.438616,0.893224,0.449611,0.887640,0.460539, -0.881921,0.471397,0.876070,0.482184,0.870087,0.492898,0.863973,0.503538,0.857729 -,0.514103,0.851355,0.524590,0.844854,0.534998,0.838225,0.545325,0.831470, -0.555570,0.824589,0.565732,0.817585,0.575808,0.810457,0.585798,0.803208,0.595699, -0.795837,0.605511,0.788346,0.615232,0.780737,0.624860,0.773010,0.634393,0.765167, -0.643832,0.757209,0.653173,0.749136,0.662416,0.740951,0.671559,0.732654,0.680601, -0.724247,0.689541,0.715731,0.698376,0.707107,0.707107,0.698376,0.715731,0.689541, -0.724247,0.680601,0.732654,0.671559,0.740951,0.662416,0.749136,0.653173,0.757209, -0.643832,0.765167,0.634393,0.773010,0.624860,0.780737,0.615232,0.788346,0.605511, -0.795837,0.595699,0.803208,0.585798,0.810457,0.575808,0.817585,0.565732,0.824589, -0.555570,0.831470,0.545325,0.838225,0.534998,0.844854,0.524590,0.851355,0.514103 -,0.857729,0.503538,0.863973,0.492898,0.870087,0.482184,0.876070,0.471397, -0.881921,0.460539,0.887640,0.449611,0.893224,0.438616,0.898674,0.427555,0.903989, -0.416430,0.909168,0.405241,0.914210,0.393992,0.919114,0.382683,0.923880,0.371317, -0.928506,0.359895,0.932993,0.348419,0.937339,0.336890,0.941544,0.325310,0.945607, -0.313682,0.949528,0.302006,0.953306,0.290285,0.956940,0.278520,0.960431,0.266713, -0.963776,0.254866,0.966976,0.242980,0.970031,0.231058,0.972940,0.219101,0.975702, -0.207111,0.978317,0.195090,0.980785,0.183040,0.983105,0.170962,0.985278,0.158858, -0.987301,0.146730,0.989177,0.134581,0.990903,0.122411,0.992480,0.110222,0.993907, -0.098017,0.995185,0.085797,0.996313,0.073565,0.997290,0.061321,0.998118,0.049068 -,0.998795,0.036807,0.999322,0.024541,0.999699,0.012272,0.999925,0.000000, -1.000000,-0.012272,0.999925,-0.024541,0.999699,-0.036807,0.999322,-0.049068,0.998795, --0.061321,0.998118,-0.073565,0.997290,-0.085797,0.996313,-0.098017,0.995185, --0.110222,0.993907,-0.122411,0.992480,-0.134581,0.990903,-0.146730,0.989177,-0.158858, -0.987301,-0.170962,0.985278,-0.183040,0.983105,-0.195090,0.980785,-0.207111, -0.978317,-0.219101,0.975702,-0.231058,0.972940,-0.242980,0.970031,-0.254866, -0.966976,-0.266713,0.963776,-0.278520,0.960431,-0.290285,0.956940,-0.302006,0.953306, --0.313682,0.949528,-0.325310,0.945607,-0.336890,0.941544,-0.348419,0.937339, --0.359895,0.932993,-0.371317,0.928506,-0.382683,0.923880,-0.393992,0.919114,-0.405241, -0.914210,-0.416430,0.909168,-0.427555,0.903989,-0.438616,0.898674,-0.449611, -0.893224,-0.460539,0.887640,-0.471397,0.881921,-0.482184,0.876070,-0.492898,0.870087, --0.503538,0.863973,-0.514103,0.857729,-0.524590,0.851355,-0.534998,0.844854, --0.545325,0.838225,-0.555570,0.831470,-0.565732,0.824589,-0.575808,0.817585, --0.585798,0.810457,-0.595699,0.803208,-0.605511,0.795837,-0.615232,0.788346,-0.624860, -0.780737,-0.634393,0.773010,-0.643832,0.765167,-0.653173,0.757209,-0.662416, -0.749136,-0.671559,0.740951,-0.680601,0.732654,-0.689541,0.724247,-0.698376,0.715731, --0.707107,0.707107,-0.715731,0.698376,-0.724247,0.689541,-0.732654,0.680601, --0.740951,0.671559,-0.749136,0.662416,-0.757209,0.653173,-0.765167,0.643832, --0.773010,0.634393,-0.780737,0.624860,-0.788346,0.615232,-0.795837,0.605511,-0.803208, -0.595699,-0.810457,0.585798,-0.817585,0.575808,-0.824589,0.565732,-0.831470, -0.555570,-0.838225,0.545325,-0.844854,0.534998,-0.851355,0.524590,-0.857729,0.514103, --0.863973,0.503538,-0.870087,0.492898,-0.876070,0.482184,-0.881921,0.471397, --0.887640,0.460539,-0.893224,0.449611,-0.898674,0.438616,-0.903989,0.427555,-0.909168, -0.416430,-0.914210,0.405241,-0.919114,0.393992,-0.923880,0.382683,-0.928506, -0.371317,-0.932993,0.359895,-0.937339,0.348419,-0.941544,0.336890,-0.945607, -0.325310,-0.949528,0.313682,-0.953306,0.302006,-0.956940,0.290285,-0.960431,0.278520, --0.963776,0.266713,-0.966976,0.254866,-0.970031,0.242980,-0.972940,0.231058, --0.975702,0.219101,-0.978317,0.207111,-0.980785,0.195090,-0.983105,0.183040,-0.985278, -0.170962,-0.987301,0.158858,-0.989177,0.146730,-0.990903,0.134581,-0.992480, -0.122411,-0.993907,0.110222,-0.995185,0.098017,-0.996313,0.085797,-0.997290,0.073565, --0.998118,0.061321,-0.998795,0.049068,-0.999322,0.036807,-0.999699,0.024541, --0.999925,0.012272, - - - -1.000000,0.000000,0.999831,0.018407,0.999322,0.036807,0.998476,0.055195,0.997290 -,0.073565,0.995767,0.091909,0.993907,0.110222,0.991710,0.128498,0.989177, -0.146730,0.986308,0.164913,0.983105,0.183040,0.979570,0.201105,0.975702,0.219101, -0.971504,0.237024,0.966976,0.254866,0.962121,0.272621,0.956940,0.290285,0.951435, -0.307850,0.945607,0.325310,0.939459,0.342661,0.932993,0.359895,0.926210,0.377007, -0.919114,0.393992,0.911706,0.410843,0.903989,0.427555,0.895966,0.444122,0.887640, -0.460539,0.879012,0.476799,0.870087,0.492898,0.860867,0.508830,0.851355,0.524590, -0.841555,0.540171,0.831470,0.555570,0.821102,0.570781,0.810457,0.585798,0.799537, -0.600616,0.788346,0.615232,0.776888,0.629638,0.765167,0.643832,0.753187,0.657807, -0.740951,0.671559,0.728464,0.685084,0.715731,0.698376,0.702755,0.711432,0.689541 -,0.724247,0.676093,0.736817,0.662416,0.749136,0.648514,0.761202,0.634393, -0.773010,0.620057,0.784557,0.605511,0.795837,0.590760,0.806848,0.575808,0.817585, -0.560662,0.828045,0.545325,0.838225,0.529804,0.848120,0.514103,0.857729,0.498228, -0.867046,0.482184,0.876070,0.465977,0.884797,0.449611,0.893224,0.433094,0.901349, -0.416430,0.909168,0.399624,0.916679,0.382683,0.923880,0.365613,0.930767,0.348419, -0.937339,0.331106,0.943593,0.313682,0.949528,0.296151,0.955141,0.278520,0.960431, -0.260794,0.965394,0.242980,0.970031,0.225084,0.974339,0.207111,0.978317,0.189069, -0.981964,0.170962,0.985278,0.152797,0.988258,0.134581,0.990903,0.116319,0.993212, -0.098017,0.995185,0.079682,0.996820,0.061321,0.998118,0.042938,0.999078,0.024541 -,0.999699,0.006136,0.999981,-0.012272,0.999925,-0.030675,0.999529,-0.049068, -0.998795,-0.067444,0.997723,-0.085797,0.996313,-0.104122,0.994565,-0.122411, -0.992480,-0.140658,0.990058,-0.158858,0.987301,-0.177004,0.984210,-0.195090,0.980785, --0.213110,0.977028,-0.231058,0.972940,-0.248928,0.968522,-0.266713,0.963776, --0.284408,0.958703,-0.302006,0.953306,-0.319502,0.947586,-0.336890,0.941544,-0.354164, -0.935184,-0.371317,0.928506,-0.388345,0.921514,-0.405241,0.914210,-0.422000, -0.906596,-0.438616,0.898674,-0.455084,0.890449,-0.471397,0.881921,-0.487550,0.873095, --0.503538,0.863973,-0.519356,0.854558,-0.534998,0.844854,-0.550458,0.834863, --0.565732,0.824589,-0.580814,0.814036,-0.595699,0.803208,-0.610383,0.792107, --0.624860,0.780737,-0.639124,0.769103,-0.653173,0.757209,-0.667000,0.745058,-0.680601, -0.732654,-0.693971,0.720003,-0.707107,0.707107,-0.720003,0.693971,-0.732654, -0.680601,-0.745058,0.667000,-0.757209,0.653173,-0.769103,0.639124,-0.780737,0.624860, --0.792107,0.610383,-0.803208,0.595699,-0.814036,0.580814,-0.824589,0.565732, --0.834863,0.550458,-0.844854,0.534998,-0.854558,0.519356,-0.863973,0.503538,-0.873095 -,0.487550,-0.881921,0.471397,-0.890449,0.455084,-0.898674,0.438616,-0.906596, -0.422000,-0.914210,0.405241,-0.921514,0.388345,-0.928506,0.371317,-0.935184, -0.354164,-0.941544,0.336890,-0.947586,0.319502,-0.953306,0.302006,-0.958703,0.284408, --0.963776,0.266713,-0.968522,0.248928,-0.972940,0.231058,-0.977028,0.213110, --0.980785,0.195090,-0.984210,0.177004,-0.987301,0.158858,-0.990058,0.140658,-0.992480, -0.122411,-0.994565,0.104122,-0.996313,0.085797,-0.997723,0.067444,-0.998795, -0.049068,-0.999529,0.030675,-0.999925,0.012272,-0.999981,-0.006136,-0.999699, --0.024541,-0.999078,-0.042938,-0.998118,-0.061321,-0.996820,-0.079682,-0.995185, --0.098017,-0.993212,-0.116319,-0.990903,-0.134581,-0.988258,-0.152797,-0.985278, --0.170962,-0.981964,-0.189069,-0.978317,-0.207111,-0.974339,-0.225084,-0.970031, --0.242980,-0.965394,-0.260794,-0.960431,-0.278520,-0.955141,-0.296151,-0.949528, --0.313682,-0.943593,-0.331106,-0.937339,-0.348419,-0.930767,-0.365613,-0.923880, --0.382683,-0.916679,-0.399624,-0.909168,-0.416430,-0.901349,-0.433094,-0.893224, --0.449611,-0.884797,-0.465977,-0.876070,-0.482184,-0.867046,-0.498228,-0.857729, --0.514103,-0.848120,-0.529804,-0.838225,-0.545325,-0.828045,-0.560662,-0.817585, --0.575808,-0.806848,-0.590760,-0.795837,-0.605511,-0.784557,-0.620057,-0.773010, --0.634393,-0.761202,-0.648514,-0.749136,-0.662416,-0.736817,-0.676093,-0.724247, --0.689541,-0.711432,-0.702755,-0.698376,-0.715731,-0.685084,-0.728464,-0.671559, --0.740951,-0.657807,-0.753187,-0.643832,-0.765167,-0.629638,-0.776888,-0.615232, --0.788346,-0.600616,-0.799537,-0.585798,-0.810457,-0.570781,-0.821102,-0.555570, --0.831470,-0.540171,-0.841555,-0.524590,-0.851355,-0.508830,-0.860867,-0.492898, --0.870087,-0.476799,-0.879012,-0.460539,-0.887640,-0.444122,-0.895966,-0.427555, --0.903989,-0.410843,-0.911706,-0.393992,-0.919114,-0.377007,-0.926210,-0.359895, --0.932993,-0.342661,-0.939459,-0.325310,-0.945607,-0.307850,-0.951435,-0.290285, --0.956940,-0.272621,-0.962121,-0.254866,-0.966976,-0.237024,-0.971504,-0.219101, --0.975702,-0.201105,-0.979570,-0.183040,-0.983105,-0.164913,-0.986308,-0.146730, --0.989177,-0.128498,-0.991710,-0.110222,-0.993907,-0.091909,-0.995767,-0.073565, --0.997290,-0.055195,-0.998476,-0.036807,-0.999322,-0.018407,-0.999831 - -}; - -/* -* @brief Initialization function for the floating point CFFT/CIFFT function. -* -* @param[in,out] *S points to an instance of the floating point CFFT/CIFFT function structure. -* @param[in] fftLen length of the CFFT/CIFFT . -* @param[in] ifft_flag Flag for the selection of CFFT or CIFFT -* @return The function returns NE10_OK if initialization was successful or NE10_ERR if -* fftLen is not a supported value. -* -* The function inialises the Twiddle factors table and bit reverse table -*/ - -ne10_result_t ne10_cfft_radix4_init_float( - ne10_cfft_radix4_instance_f32_t * S, - ne10_uint16_t fftLen, - ne10_uint8_t ifftFlag) -{ - ne10_uint32_t i,j; - /* Initialise the default arm status */ - ne10_result_t status = NE10_OK; - - /* Initialise the FFT length */ - S->fft_len = fftLen; - - /* Initialise the twiddle coef modifier value */ - S->twid_coef_modifier = 1u; - - /* Initialise the Flag for selection of CFFT or CIFFT */ - S->ifft_flag = ifftFlag; - - /* Initializations of structure parameters depending on the FFT length */ - switch (S->fft_len) - { - - case 1024u: - /* Initializations of structure parameters for 1024 point FFT */ - - /* Initialise the Twiddle coefficient pointer */ - S->p_twiddle = (ne10_float32_t *) cfft_twiddle_coef; - - /* Initialise the bit reversal table modifier */ - S->bit_rev_factor = 1u; - /* Initialise the 1/N Value */ - S->one_by_fft_len = 0.0009765625f; - break; - - - case 256u: - /* Initializations of structure parameters for 256 point FFT */ - - /* Initialise the Twiddle coefficient pointer */ - S->p_twiddle = (ne10_float32_t *) cfft_twiddle_coef; - S->bit_rev_factor = 4u; - S->one_by_fft_len = 0.00390625f; - break; - - case 64u: - /* Initializations of structure parameters for 64 point FFT */ - /* Initialise the Twiddle coefficient pointer */ - S->p_twiddle = (ne10_float32_t *) cfft_twiddle_coef; - S->bit_rev_factor = 16u; - S->one_by_fft_len = 0.015625f; - break; - - case 16u: - /* Initializations of structure parameters for 16 point FFT */ - - /* Initialise the Twiddle coefficient pointer */ - S->p_twiddle = (ne10_float32_t *) cfft_twiddle_coef; - - S->bit_rev_factor = 64u; - S->one_by_fft_len = 0.0625f; - break; - - - default: - /* Reporting argument error if fftSize is not valid value */ - status = NE10_ERR; - break; - } - return status; -} - diff --git a/modules/dsp/NE10_fft.h b/modules/dsp/NE10_fft.h index 65136a5..519179c 100644 --- a/modules/dsp/NE10_fft.h +++ b/modules/dsp/NE10_fft.h @@ -46,10 +46,6 @@ extern "C" { /*common fft functions */ /*common functions for float fft */ - extern void ne10_data_bitreversal_float32 (ne10_fft_cpx_float32_t * Fout, - const ne10_fft_cpx_float32_t * f, - ne10_int32_t fstride, - ne10_int32_t * factors); extern void ne10_fft_split_r2c_1d_float32 (ne10_fft_cpx_float32_t *dst, const ne10_fft_cpx_float32_t *src, ne10_fft_cpx_float32_t *twiddles, @@ -58,41 +54,17 @@ extern "C" { const ne10_fft_cpx_float32_t *src, ne10_fft_cpx_float32_t *twiddles, ne10_int32_t ncfft); - extern void ne10_radix4_butterfly_forward_float32_neon (ne10_fft_cpx_float32_t * Fout, + extern void ne10_mixed_radix_fft_forward_float32_neon (ne10_fft_cpx_float32_t * Fout, + ne10_fft_cpx_float32_t * Fin, ne10_int32_t * factors, ne10_fft_cpx_float32_t * twiddles) - asm ("ne10_radix4_butterfly_forward_float32_neon"); - extern void ne10_radix4_butterfly_backward_float32_neon (ne10_fft_cpx_float32_t * Fout, - ne10_int32_t * factors, - ne10_fft_cpx_float32_t * twiddles) - asm ("ne10_radix4_butterfly_backward_float32_neon"); + asm ("ne10_mixed_radix_fft_forward_float32_neon"); - extern void ne10_radix2_butterfly_forward_float32_neon (ne10_fft_cpx_float32_t * Fout, + extern void ne10_mixed_radix_fft_backward_float32_neon (ne10_fft_cpx_float32_t * Fout, + ne10_fft_cpx_float32_t * Fin, ne10_int32_t * factors, ne10_fft_cpx_float32_t * twiddles) - asm ("ne10_radix2_butterfly_forward_float32_neon"); - extern void ne10_radix2_butterfly_backward_float32_neon (ne10_fft_cpx_float32_t * Fout, - ne10_int32_t * factors, - ne10_fft_cpx_float32_t * twiddles) - asm ("ne10_radix2_butterfly_backward_float32_neon"); - - extern void ne10_mixed_radix_butterfly_length_even_power2_float32_neon (ne10_fft_cpx_float32_t * Fout, - ne10_int32_t * factors, - ne10_fft_cpx_float32_t * twiddles) - asm ("ne10_mixed_radix_butterfly_length_even_power2_float32_neon"); - extern void ne10_mixed_radix_butterfly_length_odd_power2_float32_neon (ne10_fft_cpx_float32_t * Fout, - ne10_int32_t * factors, - ne10_fft_cpx_float32_t * twiddles) - asm ("ne10_mixed_radix_butterfly_length_odd_power2_float32_neon"); - - extern void ne10_mixed_radix_butterfly_inverse_length_even_power2_float32_neon (ne10_fft_cpx_float32_t * Fout, - ne10_int32_t * factors, - ne10_fft_cpx_float32_t * twiddles) - asm ("ne10_mixed_radix_butterfly_inverse_length_even_power2_float32_neon"); - extern void ne10_mixed_radix_butterfly_inverse_length_odd_power2_float32_neon (ne10_fft_cpx_float32_t * Fout, - ne10_int32_t * factors, - ne10_fft_cpx_float32_t * twiddles) - asm ("ne10_mixed_radix_butterfly_inverse_length_odd_power2_float32_neon"); + asm ("ne10_mixed_radix_fft_backward_float32_neon"); /* common functions for fixed point fft */ /* bit reversal for int 16 */ @@ -188,6 +160,7 @@ extern "C" { ne10_fft_cpx_int32_t * twiddles) asm ("ne10_radix2_butterfly_backward_int32_scaled_neon"); + #ifdef __cplusplus } #endif diff --git a/modules/dsp/NE10_fft_float32.c b/modules/dsp/NE10_fft_float32.c index 03e7be2..37383d6 100644 --- a/modules/dsp/NE10_fft_float32.c +++ b/modules/dsp/NE10_fft_float32.c @@ -49,229 +49,796 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #include "NE10_fft.h" static void ne10_mixed_radix_butterfly_float32_c (ne10_fft_cpx_float32_t * Fout, + ne10_fft_cpx_float32_t * Fin, ne10_int32_t * factors, ne10_fft_cpx_float32_t * twiddles) { - ne10_int32_t i, j, mstride; + ne10_int32_t fstride, mstride, N; + ne10_int32_t fstride1; + ne10_int32_t f_count, m_count; ne10_int32_t stage_count; - ne10_int32_t fstride; - ne10_fft_cpx_float32_t tmp; - ne10_fft_cpx_float32_t scratch[6]; - ne10_fft_cpx_float32_t *tw, *tw1, *tw2, *tw3; - ne10_fft_cpx_float32_t * F; + ne10_fft_cpx_float32_t scratch_in[8]; + ne10_fft_cpx_float32_t scratch_out[8]; + ne10_fft_cpx_float32_t scratch[16]; + ne10_fft_cpx_float32_t scratch_tw[6]; + ne10_fft_cpx_float32_t *Fin1, *Fin2, *Fout1, *Fout2; + ne10_fft_cpx_float32_t *Fout_ls = Fout; + ne10_fft_cpx_float32_t *Ftmp; + ne10_fft_cpx_float32_t *tw, *tw1, *tw2; + const ne10_float32_t TW_81 = 0.70710678; + const ne10_float32_t TW_81N = -0.70710678; - // the first stage + // init fstride, mstride, N stage_count = factors[0]; fstride = factors[1]; - if (factors[2 * stage_count] == 2) // length of FFT is 2^n (n is odd) + mstride = factors[ (stage_count << 1) - 1 ]; + N = factors[ stage_count << 1 ]; // radix + + // the first stage + Fin1 = Fin; + Fout1 = Fout; + if (N == 2) // length of FFT is 2^n (n is odd) { - //fstride is nfft>>1 - for (i = 0; i < fstride; i++) + // radix 8 + N = fstride >> 1; // 1/4 of length of FFT + tw = twiddles; + fstride1 = fstride >> 2; + + Fin1 = Fin; + for (f_count = 0; f_count < fstride1; f_count ++) { - tmp.r = Fout[2 * i + 1].r; - tmp.i = Fout[2 * i + 1].i; - Fout[2 * i + 1].r = Fout[2 * i].r - tmp.r; - Fout[2 * i + 1].i = Fout[2 * i].i - tmp.i; - Fout[2 * i].r = Fout[2 * i].r + tmp.r; - Fout[2 * i].i = Fout[2 * i].i + tmp.i; - } + Fout1 = & Fout[ f_count * 8 ]; + // load + scratch_tw[0] = tw[0]; + scratch_tw[2] = tw[2]; + scratch_tw[4] = tw[4]; + scratch_tw[1] = tw[1]; + scratch_tw[3] = tw[3]; + scratch_tw[5] = tw[5]; + + scratch_in[0].r = Fin1[0].r + Fin1[0 + fstride].r; + scratch_in[0].i = Fin1[0].i + Fin1[0 + fstride].i; + scratch_in[1].r = Fin1[0].r - Fin1[0 + fstride].r; + scratch_in[1].i = Fin1[0].i - Fin1[0 + fstride].i; + scratch_in[2].r = Fin1[fstride1].r + Fin1[fstride1 + fstride].r; + scratch_in[2].i = Fin1[fstride1].i + Fin1[fstride1 + fstride].i; + scratch_in[3].r = Fin1[fstride1].r - Fin1[fstride1 + fstride].r; + scratch_in[3].i = Fin1[fstride1].i - Fin1[fstride1 + fstride].i; + scratch_in[4].r = Fin1[fstride1 * 2].r + Fin1[fstride1 * 2 + fstride].r; + scratch_in[4].i = Fin1[fstride1 * 2].i + Fin1[fstride1 * 2 + fstride].i; + scratch_in[5].r = Fin1[fstride1 * 2].r - Fin1[fstride1 * 2 + fstride].r; + scratch_in[5].i = Fin1[fstride1 * 2].i - Fin1[fstride1 * 2 + fstride].i; + scratch_in[6].r = Fin1[fstride1 * 3].r + Fin1[fstride1 * 3 + fstride].r; + scratch_in[6].i = Fin1[fstride1 * 3].i + Fin1[fstride1 * 3 + fstride].i; + scratch_in[7].r = Fin1[fstride1 * 3].r - Fin1[fstride1 * 3 + fstride].r; + scratch_in[7].i = Fin1[fstride1 * 3].i - Fin1[fstride1 * 3 + fstride].i; + + // radix 4 butterfly without twiddles + scratch[0] = scratch_in[0]; + scratch[1] = scratch_in[1]; + + scratch[2] = scratch_in[2]; + scratch[3].r = (scratch_in[3].r + scratch_in[3].i) * TW_81; + scratch[3].i = (scratch_in[3].i - scratch_in[3].r) * TW_81; + + scratch[4] = scratch_in[4]; + scratch[5].r = scratch_in[5].i; + scratch[5].i = -scratch_in[5].r; + + scratch[6].r = scratch_in[6].r; + scratch[6].i = scratch_in[6].i; + scratch[7].r = (scratch_in[7].r - scratch_in[7].i) * TW_81N; + scratch[7].i = (scratch_in[7].i + scratch_in[7].r) * TW_81N; + + // radix 2 butterfly + scratch[8].r = scratch[0].r + scratch[4].r; + scratch[8].i = scratch[0].i + scratch[4].i; + scratch[9].r = scratch[1].r + scratch[5].r; + scratch[9].i = scratch[1].i + scratch[5].i; + + scratch[10].r = scratch[0].r - scratch[4].r; + scratch[10].i = scratch[0].i - scratch[4].i; + scratch[11].r = scratch[1].r - scratch[5].r; + scratch[11].i = scratch[1].i - scratch[5].i; + + // radix 2 butterfly + scratch[12].r = scratch[2].r + scratch[6].r; + scratch[12].i = scratch[2].i + scratch[6].i; + scratch[13].r = scratch[3].r + scratch[7].r; + scratch[13].i = scratch[3].i + scratch[7].i; + + scratch[14].r = scratch[2].r - scratch[6].r; + scratch[14].i = scratch[2].i - scratch[6].i; + scratch[15].r = scratch[3].r - scratch[7].r; + scratch[15].i = scratch[3].i - scratch[7].i; + + // third result + scratch_out[4].r = scratch[8].r - scratch[12].r; + scratch_out[4].i = scratch[8].i - scratch[12].i; + scratch_out[5].r = scratch[9].r - scratch[13].r; + scratch_out[5].i = scratch[9].i - scratch[13].i; + + // first result + scratch_out[0].r = scratch[8].r + scratch[12].r; + scratch_out[0].i = scratch[8].i + scratch[12].i; + scratch_out[1].r = scratch[9].r + scratch[13].r; + scratch_out[1].i = scratch[9].i + scratch[13].i; + + // second result + scratch_out[2].r = scratch[10].r + scratch[14].i; + scratch_out[2].i = scratch[10].i - scratch[14].r; + scratch_out[3].r = scratch[11].r + scratch[15].i; + scratch_out[3].i = scratch[11].i - scratch[15].r; + + // forth result + scratch_out[6].r = scratch[10].r - scratch[14].i; + scratch_out[6].i = scratch[10].i + scratch[14].r; + scratch_out[7].r = scratch[11].r - scratch[15].i; + scratch_out[7].i = scratch[11].i + scratch[15].r; + + // store + Fout1[0] = scratch_out[0]; + Fout1[1] = scratch_out[1]; + Fout1[2] = scratch_out[2]; + Fout1[3] = scratch_out[3]; + Fout1[4] = scratch_out[4]; + Fout1[5] = scratch_out[5]; + Fout1[6] = scratch_out[6]; + Fout1[7] = scratch_out[7]; + + Fin1 += 1; + } // f_count + tw += 6; + mstride <<= 2; + fstride >>= 4; + stage_count -= 2; + + // swap + Ftmp = Fin; + Fin = Fout; + Fout = Ftmp; } - else if (factors[2 * stage_count] == 4) // length of FFT is 2^n (n is even) + else if (N == 4) // length of FFT is 2^n (n is even) { //fstride is nfft>>2 - for (i = 0; i < fstride; i++) + for (f_count = fstride; f_count ; f_count --) { - scratch[2].r = Fout[4 * i].r - Fout[4 * i + 2].r; - scratch[2].i = Fout[4 * i].i - Fout[4 * i + 2].i; - - Fout[4 * i].r += Fout[4 * i + 2].r; - Fout[4 * i].i += Fout[4 * i + 2].i; - - scratch[0].r = Fout[4 * i + 1].r + Fout[4 * i + 3].r; - scratch[0].i = Fout[4 * i + 1].i + Fout[4 * i + 3].i; - - scratch[1].r = Fout[4 * i + 1].r - Fout[4 * i + 3].r; - scratch[1].i = Fout[4 * i + 1].i - Fout[4 * i + 3].i; - Fout[4 * i + 2].r = Fout[4 * i].r - scratch[0].r; - Fout[4 * i + 2].i = Fout[4 * i].i - scratch[0].i; - - Fout[4 * i].r += scratch[0].r; - Fout[4 * i].i += scratch[0].i; - - Fout[4 * i + 1].r = scratch[2].r + scratch[1].i; - Fout[4 * i + 1].i = scratch[2].i - scratch[1].r; - Fout[4 * i + 3].r = scratch[2].r - scratch[1].i; - Fout[4 * i + 3].i = scratch[2].i + scratch[1].r; - } + // load + scratch_in[0] = *Fin1; + Fin2 = Fin1 + fstride; + scratch_in[1] = *Fin2; + Fin2 = Fin2 + fstride; + scratch_in[2] = *Fin2; + Fin2 = Fin2 + fstride; + scratch_in[3] = *Fin2; + + // radix 4 butterfly without twiddles + + // radix 2 butterfly + scratch[0].r = scratch_in[0].r + scratch_in[2].r; + scratch[0].i = scratch_in[0].i + scratch_in[2].i; + + scratch[1].r = scratch_in[0].r - scratch_in[2].r; + scratch[1].i = scratch_in[0].i - scratch_in[2].i; + + // radix 2 butterfly + scratch[2].r = scratch_in[1].r + scratch_in[3].r; + scratch[2].i = scratch_in[1].i + scratch_in[3].i; + + scratch[3].r = scratch_in[1].r - scratch_in[3].r; + scratch[3].i = scratch_in[1].i - scratch_in[3].i; + + // third result + scratch_out[2].r = scratch[0].r - scratch[2].r; + scratch_out[2].i = scratch[0].i - scratch[2].i; + + // first result + scratch_out[0].r = scratch[0].r + scratch[2].r; + scratch_out[0].i = scratch[0].i + scratch[2].i; + + // second result + scratch_out[1].r = scratch[1].r + scratch[3].i; + scratch_out[1].i = scratch[1].i - scratch[3].r; + + // forth result + scratch_out[3].r = scratch[1].r - scratch[3].i; + scratch_out[3].i = scratch[1].i + scratch[3].r; + + // store + * Fout1 ++ = scratch_out[0]; + * Fout1 ++ = scratch_out[1]; + * Fout1 ++ = scratch_out[2]; + * Fout1 ++ = scratch_out[3]; + + Fin1++; + } // f_count + + N = fstride; // 1/4 of length of FFT + + // swap + Ftmp = Fin; + Fin = Fout; + Fout = Ftmp; + + // update address for other stages + stage_count--; + tw = twiddles; + fstride >>= 2; + // end of first stage } - stage_count--; - // other stages - mstride = factors[2 * stage_count + 1]; - tw = twiddles; - for (; stage_count > 0; stage_count--) + + // others but the last one + for (; stage_count > 1 ; stage_count--) { - fstride = fstride >> 2; - for (i = 0; i < fstride; i++) + Fin1 = Fin; + for (f_count = 0; f_count < fstride; f_count ++) { - F = &Fout[i * mstride * 4]; + Fout1 = & Fout[ f_count * mstride << 2 ]; tw1 = tw; - tw2 = tw + mstride; - tw3 = tw + mstride * 2; - for (j = 0; j < mstride; j++) + for (m_count = mstride; m_count ; m_count --) { - scratch[0].r = F[mstride].r * tw1->r - F[mstride].i * tw1->i; - scratch[0].i = F[mstride].r * tw1->i + F[mstride].i * tw1->r; - scratch[1].r = F[mstride * 2].r * tw2->r - F[mstride * 2].i * tw2->i; - scratch[1].i = F[mstride * 2].r * tw2->i + F[mstride * 2].i * tw2->r; - scratch[2].r = F[mstride * 3].r * tw3->r - F[mstride * 3].i * tw3->i; - scratch[2].i = F[mstride * 3].r * tw3->i + F[mstride * 3].i * tw3->r; - - scratch[5].r = F->r - scratch[1].r; - scratch[5].i = F->i - scratch[1].i; - F->r += scratch[1].r; - F->i += scratch[1].i; - - scratch[3].r = scratch[0].r + scratch[2].r; - scratch[3].i = scratch[0].i + scratch[2].i; - scratch[4].r = scratch[0].r - scratch[2].r; - scratch[4].i = scratch[0].i - scratch[2].i; - - F[mstride * 2].r = F->r - scratch[3].r; - F[mstride * 2].i = F->i - scratch[3].i; - F->r += scratch[3].r; - F->i += scratch[3].i; - - F[mstride].r = scratch[5].r + scratch[4].i; - F[mstride].i = scratch[5].i - scratch[4].r; - F[mstride * 3].r = scratch[5].r - scratch[4].i; - F[mstride * 3].i = scratch[5].i + scratch[4].r; + // load + scratch_tw[0] = *tw1; + tw2 = tw1 + mstride; + scratch_tw[1] = *tw2; + tw2 += mstride; + scratch_tw[2] = *tw2; + scratch_in[0] = * Fin1; + Fin2 = Fin1 + N; + scratch_in[1] = * Fin2; + Fin2 += N; + scratch_in[2] = * Fin2; + Fin2 += N; + scratch_in[3] = * Fin2; + + // radix 4 butterfly with twiddles + + scratch[0] = scratch_in[0]; + scratch[1].r = scratch_in[1].r * scratch_tw[0].r - scratch_in[1].i * scratch_tw[0].i; + scratch[1].i = scratch_in[1].i * scratch_tw[0].r + scratch_in[1].r * scratch_tw[0].i; + + scratch[2].r = scratch_in[2].r * scratch_tw[1].r - scratch_in[2].i * scratch_tw[1].i; + scratch[2].i = scratch_in[2].i * scratch_tw[1].r + scratch_in[2].r * scratch_tw[1].i; + + scratch[3].r = scratch_in[3].r * scratch_tw[2].r - scratch_in[3].i * scratch_tw[2].i; + scratch[3].i = scratch_in[3].i * scratch_tw[2].r + scratch_in[3].r * scratch_tw[2].i; + + // radix 2 butterfly + scratch[4].r = scratch[0].r + scratch[2].r; + scratch[4].i = scratch[0].i + scratch[2].i; + + scratch[5].r = scratch[0].r - scratch[2].r; + scratch[5].i = scratch[0].i - scratch[2].i; + + // radix 2 butterfly + scratch[6].r = scratch[1].r + scratch[3].r; + scratch[6].i = scratch[1].i + scratch[3].i; + + scratch[7].r = scratch[1].r - scratch[3].r; + scratch[7].i = scratch[1].i - scratch[3].i; + + // third result + scratch_out[2].r = scratch[4].r - scratch[6].r; + scratch_out[2].i = scratch[4].i - scratch[6].i; + + // first result + scratch_out[0].r = scratch[4].r + scratch[6].r; + scratch_out[0].i = scratch[4].i + scratch[6].i; + + // second result + scratch_out[1].r = scratch[5].r + scratch[7].i; + scratch_out[1].i = scratch[5].i - scratch[7].r; + + // forth result + scratch_out[3].r = scratch[5].r - scratch[7].i; + scratch_out[3].i = scratch[5].i + scratch[7].r; + + // store + *Fout1 = scratch_out[0]; + Fout2 = Fout1 + mstride; + *Fout2 = scratch_out[1]; + Fout2 += mstride; + *Fout2 = scratch_out[2]; + Fout2 += mstride; + *Fout2 = scratch_out[3]; tw1++; - tw2++; - tw3++; - F++; - } - } + Fin1 ++; + Fout1 ++; + } // m_count + } // f_count tw += mstride * 3; mstride <<= 2; - } + // swap + Ftmp = Fin; + Fin = Fout; + Fout = Ftmp; + fstride >>= 2; + } // stage_count + + // the last one + if (stage_count) + { + Fin1 = Fin; + // if stage count is even, output to the input array + Fout1 = Fout_ls; + for (f_count = 0; f_count < fstride; f_count ++) + { + tw1 = tw; + for (m_count = mstride; m_count ; m_count --) + { + // load + scratch_tw[0] = *tw1; + tw2 = tw1 + mstride; + scratch_tw[1] = *tw2; + tw2 += mstride; + scratch_tw[2] = *tw2; + scratch_in[0] = * Fin1; + Fin2 = Fin1 + N; + scratch_in[1] = * Fin2; + Fin2 += N; + scratch_in[2] = * Fin2; + Fin2 += N; + scratch_in[3] = * Fin2; + + // radix 4 butterfly with twiddles + + scratch[0] = scratch_in[0]; + scratch[1].r = scratch_in[1].r * scratch_tw[0].r - scratch_in[1].i * scratch_tw[0].i; + scratch[1].i = scratch_in[1].i * scratch_tw[0].r + scratch_in[1].r * scratch_tw[0].i; + + scratch[2].r = scratch_in[2].r * scratch_tw[1].r - scratch_in[2].i * scratch_tw[1].i; + scratch[2].i = scratch_in[2].i * scratch_tw[1].r + scratch_in[2].r * scratch_tw[1].i; + + scratch[3].r = scratch_in[3].r * scratch_tw[2].r - scratch_in[3].i * scratch_tw[2].i; + scratch[3].i = scratch_in[3].i * scratch_tw[2].r + scratch_in[3].r * scratch_tw[2].i; + + // radix 2 butterfly + scratch[4].r = scratch[0].r + scratch[2].r; + scratch[4].i = scratch[0].i + scratch[2].i; + + scratch[5].r = scratch[0].r - scratch[2].r; + scratch[5].i = scratch[0].i - scratch[2].i; + + // radix 2 butterfly + scratch[6].r = scratch[1].r + scratch[3].r; + scratch[6].i = scratch[1].i + scratch[3].i; + + scratch[7].r = scratch[1].r - scratch[3].r; + scratch[7].i = scratch[1].i - scratch[3].i; + + // third result + scratch_out[2].r = scratch[4].r - scratch[6].r; + scratch_out[2].i = scratch[4].i - scratch[6].i; + + // first result + scratch_out[0].r = scratch[4].r + scratch[6].r; + scratch_out[0].i = scratch[4].i + scratch[6].i; + + // second result + scratch_out[1].r = scratch[5].r + scratch[7].i; + scratch_out[1].i = scratch[5].i - scratch[7].r; + + // forth result + scratch_out[3].r = scratch[5].r - scratch[7].i; + scratch_out[3].i = scratch[5].i + scratch[7].r; + + // store + *Fout1 = scratch_out[0]; + Fout2 = Fout1 + N; + *Fout2 = scratch_out[1]; + Fout2 += N; + *Fout2 = scratch_out[2]; + Fout2 += N; + *Fout2 = scratch_out[3]; + + tw1 ++; + Fin1 ++; + Fout1 ++; + } // m_count + } // f_count + } // last stage } static void ne10_mixed_radix_butterfly_inverse_float32_c (ne10_fft_cpx_float32_t * Fout, + ne10_fft_cpx_float32_t * Fin, ne10_int32_t * factors, ne10_fft_cpx_float32_t * twiddles) - { - ne10_int32_t i, j, mstride; + ne10_int32_t fstride, mstride, N; + ne10_int32_t fstride1; + ne10_int32_t f_count, m_count; ne10_int32_t stage_count; - ne10_int32_t fstride; - ne10_fft_cpx_float32_t tmp; - ne10_fft_cpx_float32_t scratch[6]; - ne10_fft_cpx_float32_t *tw, *tw1, *tw2, *tw3; - ne10_fft_cpx_float32_t * F; + ne10_fft_cpx_float32_t scratch_in[8]; + ne10_fft_cpx_float32_t scratch_out[8]; + ne10_fft_cpx_float32_t scratch[16]; + ne10_fft_cpx_float32_t scratch_tw[6]; + ne10_fft_cpx_float32_t *Fin1, *Fin2, *Fout1, *Fout2; + ne10_fft_cpx_float32_t *Fout_ls = Fout; + ne10_fft_cpx_float32_t *Ftmp; + ne10_fft_cpx_float32_t *tw, *tw1, *tw2; + const ne10_float32_t TW_81 = 0.70710678; + const ne10_float32_t TW_81N = -0.70710678; - // the first stage + // init fstride, mstride, N stage_count = factors[0]; fstride = factors[1]; - if (factors[2 * stage_count] == 2) // length of FFT is 2^n (n is odd) + mstride = factors[ (stage_count << 1) - 1 ]; + N = factors[ stage_count << 1 ]; // radix + + // the first stage + Fin1 = Fin; + Fout1 = Fout; + if (N == 2) // length of FFT is 2^n (n is odd) { - //fstride is nfft>>1; - for (i = 0; i < fstride; i++) + // radix 8 + N = fstride >> 1; // 1/4 of length of FFT + tw = twiddles; + fstride1 = fstride >> 2; + + Fin1 = Fin; + for (f_count = 0; f_count < fstride1; f_count ++) { - tmp.r = Fout[2 * i + 1].r; - tmp.i = Fout[2 * i + 1].i; - Fout[2 * i + 1].r = Fout[2 * i].r - tmp.r; - Fout[2 * i + 1].i = Fout[2 * i].i - tmp.i; - Fout[2 * i].r = Fout[2 * i].r + tmp.r; - Fout[2 * i].i = Fout[2 * i].i + tmp.i; - } + Fout1 = & Fout[ f_count * 8 ]; + // load + scratch_tw[0] = tw[0]; + scratch_tw[2] = tw[2]; + scratch_tw[4] = tw[4]; + scratch_tw[1] = tw[1]; + scratch_tw[3] = tw[3]; + scratch_tw[5] = tw[5]; + + scratch_in[0].r = Fin1[0].r + Fin1[0 + fstride].r; + scratch_in[0].i = Fin1[0].i + Fin1[0 + fstride].i; + scratch_in[1].r = Fin1[0].r - Fin1[0 + fstride].r; + scratch_in[1].i = Fin1[0].i - Fin1[0 + fstride].i; + scratch_in[2].r = Fin1[fstride1].r + Fin1[fstride1 + fstride].r; + scratch_in[2].i = Fin1[fstride1].i + Fin1[fstride1 + fstride].i; + scratch_in[3].r = Fin1[fstride1].r - Fin1[fstride1 + fstride].r; + scratch_in[3].i = Fin1[fstride1].i - Fin1[fstride1 + fstride].i; + scratch_in[4].r = Fin1[fstride1 * 2].r + Fin1[fstride1 * 2 + fstride].r; + scratch_in[4].i = Fin1[fstride1 * 2].i + Fin1[fstride1 * 2 + fstride].i; + scratch_in[5].r = Fin1[fstride1 * 2].r - Fin1[fstride1 * 2 + fstride].r; + scratch_in[5].i = Fin1[fstride1 * 2].i - Fin1[fstride1 * 2 + fstride].i; + scratch_in[6].r = Fin1[fstride1 * 3].r + Fin1[fstride1 * 3 + fstride].r; + scratch_in[6].i = Fin1[fstride1 * 3].i + Fin1[fstride1 * 3 + fstride].i; + scratch_in[7].r = Fin1[fstride1 * 3].r - Fin1[fstride1 * 3 + fstride].r; + scratch_in[7].i = Fin1[fstride1 * 3].i - Fin1[fstride1 * 3 + fstride].i; + + // radix 4 butterfly with twiddles + + scratch[0] = scratch_in[0]; + scratch[1] = scratch_in[1]; + + scratch[2] = scratch_in[2]; + scratch[3].r = (scratch_in[3].r - scratch_in[3].i) * TW_81; + scratch[3].i = (scratch_in[3].i + scratch_in[3].r) * TW_81; + + scratch[4] = scratch_in[4]; + scratch[5].r = -scratch_in[5].i; + scratch[5].i = scratch_in[5].r; + + scratch[6].r = scratch_in[6].r; + scratch[6].i = scratch_in[6].i; + scratch[7].r = (scratch_in[7].r + scratch_in[7].i) * TW_81N; + scratch[7].i = (scratch_in[7].i - scratch_in[7].r) * TW_81N; + + // radix 2 butterfly + scratch[8].r = scratch[0].r + scratch[4].r; + scratch[8].i = scratch[0].i + scratch[4].i; + scratch[9].r = scratch[1].r + scratch[5].r; + scratch[9].i = scratch[1].i + scratch[5].i; + + scratch[10].r = scratch[0].r - scratch[4].r; + scratch[10].i = scratch[0].i - scratch[4].i; + scratch[11].r = scratch[1].r - scratch[5].r; + scratch[11].i = scratch[1].i - scratch[5].i; + + // radix 2 butterfly + scratch[12].r = scratch[2].r + scratch[6].r; + scratch[12].i = scratch[2].i + scratch[6].i; + scratch[13].r = scratch[3].r + scratch[7].r; + scratch[13].i = scratch[3].i + scratch[7].i; + + scratch[14].r = scratch[2].r - scratch[6].r; + scratch[14].i = scratch[2].i - scratch[6].i; + scratch[15].r = scratch[3].r - scratch[7].r; + scratch[15].i = scratch[3].i - scratch[7].i; + + // third result + scratch_out[4].r = scratch[8].r - scratch[12].r; + scratch_out[4].i = scratch[8].i - scratch[12].i; + scratch_out[5].r = scratch[9].r - scratch[13].r; + scratch_out[5].i = scratch[9].i - scratch[13].i; + + // first result + scratch_out[0].r = scratch[8].r + scratch[12].r; + scratch_out[0].i = scratch[8].i + scratch[12].i; + scratch_out[1].r = scratch[9].r + scratch[13].r; + scratch_out[1].i = scratch[9].i + scratch[13].i; + + // second result + scratch_out[2].r = scratch[10].r - scratch[14].i; + scratch_out[2].i = scratch[10].i + scratch[14].r; + scratch_out[3].r = scratch[11].r - scratch[15].i; + scratch_out[3].i = scratch[11].i + scratch[15].r; + + // forth result + scratch_out[6].r = scratch[10].r + scratch[14].i; + scratch_out[6].i = scratch[10].i - scratch[14].r; + scratch_out[7].r = scratch[11].r + scratch[15].i; + scratch_out[7].i = scratch[11].i - scratch[15].r; + + // store + Fout1[0] = scratch_out[0]; + Fout1[1] = scratch_out[1]; + Fout1[2] = scratch_out[2]; + Fout1[3] = scratch_out[3]; + Fout1[4] = scratch_out[4]; + Fout1[5] = scratch_out[5]; + Fout1[6] = scratch_out[6]; + Fout1[7] = scratch_out[7]; + + Fin1 += 1; + } // f_count + tw += 6; + mstride <<= 2; + fstride >>= 4; + stage_count -= 2; + + // swap + Ftmp = Fin; + Fin = Fout; + Fout = Ftmp; } - else if (factors[2 * stage_count] == 4) // length of FFT is 2^n (n is even) + else if (N == 4) // length of FFT is 2^n (n is even) { //fstride is nfft>>2 - for (i = 0; i < fstride; i++) + for (f_count = fstride; f_count ; f_count --) { - scratch[2].r = Fout[4 * i].r - Fout[4 * i + 2].r; - scratch[2].i = Fout[4 * i].i - Fout[4 * i + 2].i; - - Fout[4 * i].r += Fout[4 * i + 2].r; - Fout[4 * i].i += Fout[4 * i + 2].i; - - scratch[0].r = Fout[4 * i + 1].r + Fout[4 * i + 3].r; - scratch[0].i = Fout[4 * i + 1].i + Fout[4 * i + 3].i; - - scratch[1].r = Fout[4 * i + 1].r - Fout[4 * i + 3].r; - scratch[1].i = Fout[4 * i + 1].i - Fout[4 * i + 3].i; - Fout[4 * i + 2].r = Fout[4 * i].r - scratch[0].r; - Fout[4 * i + 2].i = Fout[4 * i].i - scratch[0].i; - - Fout[4 * i].r += scratch[0].r; - Fout[4 * i].i += scratch[0].i; - - Fout[4 * i + 1].r = scratch[2].r - scratch[1].i; - Fout[4 * i + 1].i = scratch[2].i + scratch[1].r; - Fout[4 * i + 3].r = scratch[2].r + scratch[1].i; - Fout[4 * i + 3].i = scratch[2].i - scratch[1].r; - } + // load + scratch_in[0] = *Fin1; + Fin2 = Fin1 + fstride; + scratch_in[1] = *Fin2; + Fin2 = Fin2 + fstride; + scratch_in[2] = *Fin2; + Fin2 = Fin2 + fstride; + scratch_in[3] = *Fin2; + + // radix 4 butterfly without twiddles + + // radix 2 butterfly + scratch[0].r = scratch_in[0].r + scratch_in[2].r; + scratch[0].i = scratch_in[0].i + scratch_in[2].i; + + scratch[1].r = scratch_in[0].r - scratch_in[2].r; + scratch[1].i = scratch_in[0].i - scratch_in[2].i; + + // radix 2 butterfly + scratch[2].r = scratch_in[1].r + scratch_in[3].r; + scratch[2].i = scratch_in[1].i + scratch_in[3].i; + + scratch[3].r = scratch_in[1].r - scratch_in[3].r; + scratch[3].i = scratch_in[1].i - scratch_in[3].i; + + // third result + scratch_out[2].r = scratch[0].r - scratch[2].r; + scratch_out[2].i = scratch[0].i - scratch[2].i; + + // first result + scratch_out[0].r = scratch[0].r + scratch[2].r; + scratch_out[0].i = scratch[0].i + scratch[2].i; + + // second result + scratch_out[1].r = scratch[1].r - scratch[3].i; + scratch_out[1].i = scratch[1].i + scratch[3].r; + + // forth result + scratch_out[3].r = scratch[1].r + scratch[3].i; + scratch_out[3].i = scratch[1].i - scratch[3].r; + + // store + * Fout1 ++ = scratch_out[0]; + * Fout1 ++ = scratch_out[1]; + * Fout1 ++ = scratch_out[2]; + * Fout1 ++ = scratch_out[3]; + + Fin1++; + } // f_count + + N = fstride; // 1/4 of length of FFT + + // swap + Ftmp = Fin; + Fin = Fout; + Fout = Ftmp; + + // update address for other stages + stage_count--; + tw = twiddles; + fstride >>= 2; + // end of first stage } - stage_count--; - // other stages - mstride = factors[2 * stage_count + 1]; - tw = twiddles; - for (; stage_count > 0; stage_count--) + + // others but the last one + for (; stage_count > 1 ; stage_count--) { - fstride = fstride >> 2; - for (i = 0; i < fstride; i++) + Fin1 = Fin; + for (f_count = 0; f_count < fstride; f_count ++) { - F = &Fout[i * mstride * 4]; + Fout1 = & Fout[ f_count * mstride << 2 ]; tw1 = tw; - tw2 = tw + mstride; - tw3 = tw + mstride * 2; - for (j = 0; j < mstride; j++) + for (m_count = mstride; m_count ; m_count --) { - scratch[0].r = F[mstride].r * tw1->r + F[mstride].i * tw1->i; - scratch[0].i = F[mstride].i * tw1->r - F[mstride].r * tw1->i; - scratch[1].r = F[mstride * 2].r * tw2->r + F[mstride * 2].i * tw2->i; - scratch[1].i = F[mstride * 2].i * tw2->r - F[mstride * 2].r * tw2->i; - scratch[2].r = F[mstride * 3].r * tw3->r + F[mstride * 3].i * tw3->i; - scratch[2].i = F[mstride * 3].i * tw3->r - F[mstride * 3].r * tw3->i; - - scratch[5].r = F->r - scratch[1].r; - scratch[5].i = F->i - scratch[1].i; - F->r += scratch[1].r; - F->i += scratch[1].i; - - scratch[3].r = scratch[0].r + scratch[2].r; - scratch[3].i = scratch[0].i + scratch[2].i; - scratch[4].r = scratch[0].r - scratch[2].r; - scratch[4].i = scratch[0].i - scratch[2].i; - - F[mstride * 2].r = F->r - scratch[3].r; - F[mstride * 2].i = F->i - scratch[3].i; - F->r += scratch[3].r; - F->i += scratch[3].i; - - F[mstride].r = scratch[5].r - scratch[4].i; - F[mstride].i = scratch[5].i + scratch[4].r; - F[mstride * 3].r = scratch[5].r + scratch[4].i; - F[mstride * 3].i = scratch[5].i - scratch[4].r; + // load + scratch_tw[0] = *tw1; + tw2 = tw1 + mstride; + scratch_tw[1] = *tw2; + tw2 += mstride; + scratch_tw[2] = *tw2; + scratch_in[0] = * Fin1; + Fin2 = Fin1 + N; + scratch_in[1] = * Fin2; + Fin2 += N; + scratch_in[2] = * Fin2; + Fin2 += N; + scratch_in[3] = * Fin2; + + // radix 4 butterfly with twiddles + + scratch[0] = scratch_in[0]; + scratch[1].r = scratch_in[1].r * scratch_tw[0].r + scratch_in[1].i * scratch_tw[0].i; + scratch[1].i = scratch_in[1].i * scratch_tw[0].r - scratch_in[1].r * scratch_tw[0].i; + + scratch[2].r = scratch_in[2].r * scratch_tw[1].r + scratch_in[2].i * scratch_tw[1].i; + scratch[2].i = scratch_in[2].i * scratch_tw[1].r - scratch_in[2].r * scratch_tw[1].i; + + scratch[3].r = scratch_in[3].r * scratch_tw[2].r + scratch_in[3].i * scratch_tw[2].i; + scratch[3].i = scratch_in[3].i * scratch_tw[2].r - scratch_in[3].r * scratch_tw[2].i; + + // radix 2 butterfly + scratch[4].r = scratch[0].r + scratch[2].r; + scratch[4].i = scratch[0].i + scratch[2].i; + + scratch[5].r = scratch[0].r - scratch[2].r; + scratch[5].i = scratch[0].i - scratch[2].i; + + // radix 2 butterfly + scratch[6].r = scratch[1].r + scratch[3].r; + scratch[6].i = scratch[1].i + scratch[3].i; + + scratch[7].r = scratch[1].r - scratch[3].r; + scratch[7].i = scratch[1].i - scratch[3].i; + + // third result + scratch_out[2].r = scratch[4].r - scratch[6].r; + scratch_out[2].i = scratch[4].i - scratch[6].i; + + // first result + scratch_out[0].r = scratch[4].r + scratch[6].r; + scratch_out[0].i = scratch[4].i + scratch[6].i; + + // second result + scratch_out[1].r = scratch[5].r - scratch[7].i; + scratch_out[1].i = scratch[5].i + scratch[7].r; + + // forth result + scratch_out[3].r = scratch[5].r + scratch[7].i; + scratch_out[3].i = scratch[5].i - scratch[7].r; + + // store + *Fout1 = scratch_out[0]; + Fout2 = Fout1 + mstride; + *Fout2 = scratch_out[1]; + Fout2 += mstride; + *Fout2 = scratch_out[2]; + Fout2 += mstride; + *Fout2 = scratch_out[3]; tw1++; - tw2++; - tw3++; - F++; - } - } + Fin1 ++; + Fout1 ++; + } // m_count + } // f_count tw += mstride * 3; mstride <<= 2; - } + // swap + Ftmp = Fin; + Fin = Fout; + Fout = Ftmp; + fstride >>= 2; + } // stage_count + + // the last one + if (stage_count) + { + Fin1 = Fin; + // if stage count is even, output to the input array + Fout1 = Fout_ls; + + for (f_count = 0; f_count < fstride; f_count ++) + { + tw1 = tw; + for (m_count = mstride; m_count ; m_count --) + { + // load + scratch_tw[0] = *tw1; + tw2 = tw1 + mstride; + scratch_tw[1] = *tw2; + tw2 += mstride; + scratch_tw[2] = *tw2; + scratch_in[0] = * Fin1; + Fin2 = Fin1 + N; + scratch_in[1] = * Fin2; + Fin2 += N; + scratch_in[2] = * Fin2; + Fin2 += N; + scratch_in[3] = * Fin2; + + // radix 4 butterfly with twiddles + + scratch[0] = scratch_in[0]; + scratch[1].r = scratch_in[1].r * scratch_tw[0].r + scratch_in[1].i * scratch_tw[0].i; + scratch[1].i = scratch_in[1].i * scratch_tw[0].r - scratch_in[1].r * scratch_tw[0].i; + + scratch[2].r = scratch_in[2].r * scratch_tw[1].r + scratch_in[2].i * scratch_tw[1].i; + scratch[2].i = scratch_in[2].i * scratch_tw[1].r - scratch_in[2].r * scratch_tw[1].i; + + scratch[3].r = scratch_in[3].r * scratch_tw[2].r + scratch_in[3].i * scratch_tw[2].i; + scratch[3].i = scratch_in[3].i * scratch_tw[2].r - scratch_in[3].r * scratch_tw[2].i; + + // radix 2 butterfly + scratch[4].r = scratch[0].r + scratch[2].r; + scratch[4].i = scratch[0].i + scratch[2].i; + + scratch[5].r = scratch[0].r - scratch[2].r; + scratch[5].i = scratch[0].i - scratch[2].i; + + // radix 2 butterfly + scratch[6].r = scratch[1].r + scratch[3].r; + scratch[6].i = scratch[1].i + scratch[3].i; + + scratch[7].r = scratch[1].r - scratch[3].r; + scratch[7].i = scratch[1].i - scratch[3].i; + + // third result + scratch_out[2].r = scratch[4].r - scratch[6].r; + scratch_out[2].i = scratch[4].i - scratch[6].i; + + // first result + scratch_out[0].r = scratch[4].r + scratch[6].r; + scratch_out[0].i = scratch[4].i + scratch[6].i; + + // second result + scratch_out[1].r = scratch[5].r - scratch[7].i; + scratch_out[1].i = scratch[5].i + scratch[7].r; + + // forth result + scratch_out[3].r = scratch[5].r + scratch[7].i; + scratch_out[3].i = scratch[5].i - scratch[7].r; + + // store + *Fout1 = scratch_out[0]; + Fout2 = Fout1 + N; + *Fout2 = scratch_out[1]; + Fout2 += N; + *Fout2 = scratch_out[2]; + Fout2 += N; + *Fout2 = scratch_out[3]; + + tw1 ++; + Fin1 ++; + Fout1 ++; + } // m_count + } // f_count + } // last stage } /* factors buffer: @@ -307,34 +874,6 @@ static ne10_int32_t ne10_factor (ne10_int32_t n, ne10_int32_t * facbuf) return NE10_OK; } -void ne10_data_bitreversal_float32 (ne10_fft_cpx_float32_t * Fout, - const ne10_fft_cpx_float32_t * f, - ne10_int32_t fstride, - ne10_int32_t * factors) -{ - const ne10_int32_t p = *factors++; /* the radix */ - const ne10_int32_t m = *factors++; /* stage's fft length/p */ - const ne10_fft_cpx_float32_t * Fout_end = Fout + p * m; - if (m == 1) - { - do - { - *Fout = *f; - f += fstride; - } - while (++Fout != Fout_end); - } - else - { - do - { - ne10_data_bitreversal_float32 (Fout, f, fstride * p, factors); - f += fstride; - } - while ( (Fout += m) != Fout_end); - } - -} void ne10_fft_split_r2c_1d_float32 (ne10_fft_cpx_float32_t *dst, const ne10_fft_cpx_float32_t *src, @@ -560,14 +1099,13 @@ void ne10_fft_c2c_1d_float32_c (ne10_fft_cpx_float32_t *fout, ne10_int32_t nfft, ne10_int32_t inverse_fft) { - // copy the data from input to output and bit reversal - ne10_data_bitreversal_float32 (fout, fin, 1, &factors[2]); if (inverse_fft) - ne10_mixed_radix_butterfly_inverse_float32_c (fout, factors, twiddles); + ne10_mixed_radix_butterfly_inverse_float32_c (fout, fin, factors, twiddles); else - ne10_mixed_radix_butterfly_float32_c (fout, factors, twiddles); + ne10_mixed_radix_butterfly_float32_c (fout, fin, factors, twiddles); } + /** * @} */ //end of C2C_FFT_IFFT group diff --git a/modules/dsp/NE10_fft_float32.neon.c b/modules/dsp/NE10_fft_float32.neon.c index 38b69d0..5d2234d 100644 --- a/modules/dsp/NE10_fft_float32.neon.c +++ b/modules/dsp/NE10_fft_float32.neon.c @@ -136,10 +136,10 @@ static inline void ne10_fft8_forward_float32 (ne10_fft_cpx_float32_t * Fout, Fout[6].r = t0_r - t3_i; Fout[6].i = t0_i + t3_r; - t4_r = (s3_r + s3_i) * TW_81; - t4_i = -(s3_r - s3_i) * TW_81; - t5_r = (s7_r - s7_i) * TW_81; - t5_i = (s7_r + s7_i) * TW_81; + t4_r = (s3_r + s3_i) * TW_81; + t4_i = - (s3_r - s3_i) * TW_81; + t5_r = (s7_r - s7_i) * TW_81; + t5_i = (s7_r + s7_i) * TW_81; t0_r = s1_r - s5_i; t0_i = s1_i + s5_r; @@ -205,7 +205,7 @@ static inline void ne10_fft8_backward_float32 (ne10_fft_cpx_float32_t * Fout, t4_r = (s3_r - s3_i) * TW_81; t4_i = (s3_r + s3_i) * TW_81; t5_r = (s7_r + s7_i) * TW_81; - t5_i = -(s7_r - s7_i) * TW_81; + t5_i = - (s7_r - s7_i) * TW_81; t0_r = s1_r + s5_i; t0_i = s1_i - s5_r; @@ -225,404 +225,6 @@ static inline void ne10_fft8_backward_float32 (ne10_fft_cpx_float32_t * Fout, Fout[7].i = t0_i - t3_r; } -static inline ne10_data_bitreversal_butterfly4_forward_float32 (ne10_fft_cpx_float32_t * out, - ne10_fft_cpx_float32_t * in, - ne10_int32_t fstride, - ne10_int32_t stride1) -{ - ne10_float32_t t0_r, t0_i, t1_r, t1_i, t2_r, t2_i, t3_r, t3_i, t4_r, t4_i, t5_r, t5_i; - t2_r = in[0].r - in[fstride * 2].r; - t2_i = in[0].i - in[fstride * 2].i; - t3_r = in[0].r + in[fstride * 2].r; - t3_i = in[0].i + in[fstride * 2].i; - t0_r = in[fstride].r + in[fstride * 3].r; - t0_i = in[fstride].i + in[fstride * 3].i; - t1_r = in[fstride].r - in[fstride * 3].r; - t1_i = in[fstride].i - in[fstride * 3].i; - out[2].r = t3_r - t0_r; - out[2].i = t3_i - t0_i; - out[0].r = t3_r + t0_r; - out[0].i = t3_i + t0_i; - out[1].r = t2_r + t1_i; - out[1].i = t2_i - t1_r; - out[3].r = t2_r - t1_i; - out[3].i = t2_i + t1_r; -} - -static inline ne10_data_bitreversal_butterfly4_backward_float32 (ne10_fft_cpx_float32_t * out, - ne10_fft_cpx_float32_t * in, - ne10_int32_t fstride, - ne10_int32_t stride1) -{ - ne10_float32_t t0_r, t0_i, t1_r, t1_i, t2_r, t2_i, t3_r, t3_i, t4_r, t4_i, t5_r, t5_i; - t2_r = in[0].r - in[fstride * 2].r; - t2_i = in[0].i - in[fstride * 2].i; - t3_r = in[0].r + in[fstride * 2].r; - t3_i = in[0].i + in[fstride * 2].i; - t0_r = in[fstride].r + in[fstride * 3].r; - t0_i = in[fstride].i + in[fstride * 3].i; - t1_r = in[fstride].r - in[fstride * 3].r; - t1_i = in[fstride].i - in[fstride * 3].i; - out[2].r = t3_r - t0_r; - out[2].i = t3_i - t0_i; - out[0].r = t3_r + t0_r; - out[0].i = t3_i + t0_i; - out[1].r = t2_r - t1_i; - out[1].i = t2_i + t1_r; - out[3].r = t2_r + t1_i; - out[3].i = t2_i - t1_r; -} - -#define ne10_data_bitreversal64_butterfly4_float32(inverse) \ -static void ne10_data_bitreversal64_butterfly4_##inverse##_float32 (ne10_fft_cpx_float32_t * Fout, \ - ne10_fft_cpx_float32_t * Fin) \ -{ \ - ne10_int32_t i, p; \ - ne10_int32_t fstride; \ - ne10_int32_t stride1; \ - ne10_int32_t t0_r, t0_i, t1_r, t1_i, t2_r, t2_i, t3_r, t3_i, t4_r, t4_i, t5_r, t5_i; \ - ne10_fft_cpx_float32_t * F; \ - ne10_fft_cpx_float32_t * in; \ - fstride = 16; \ - F = Fout; \ - in = Fin; \ - stride1 = fstride >> 2; \ - for (i = 0; i < 4; i++) \ - { \ - in = &Fin[i]; \ - for (p = 0; p < 4; p++) \ - { \ - ne10_data_bitreversal_butterfly4_##inverse##_float32 (F, in, fstride, stride1); \ - in += stride1; \ - F += 4; \ - } \ - } \ -} - -#define ne10_data_bitreversal256_butterfly4_float32(inverse) \ -static void ne10_data_bitreversal256_butterfly4_##inverse##_float32 (ne10_fft_cpx_float32_t * Fout, \ - ne10_fft_cpx_float32_t * Fin) \ -{ \ - ne10_int32_t i, j, p; \ - ne10_int32_t fstride; \ - ne10_int32_t stride1; \ - ne10_int32_t stride2; \ - ne10_int32_t t0_r, t0_i, t1_r, t1_i, t2_r, t2_i, t3_r, t3_i, t4_r, t4_i, t5_r, t5_i; \ - ne10_fft_cpx_float32_t * F; \ - ne10_fft_cpx_float32_t * in; \ - fstride = 64; \ - F = Fout; \ - in = Fin; \ - stride1 = fstride >> 2; \ - stride2 = stride1 >> 2; \ - for (j = 0; j < 4; j++) \ - { \ - for (i = 0; i < 4; i++) \ - { \ - in = &Fin[j + i * stride2]; \ - for (p = 0; p < 4; p++) \ - { \ - ne10_data_bitreversal_butterfly4_##inverse##_float32 (F, in, fstride, stride1); \ - in += stride1; \ - F += 4; \ - } \ - } \ - } \ -} - -#define ne10_data_bitreversal1024_butterfly4_float32(inverse) \ -static void ne10_data_bitreversal1024_butterfly4_##inverse##_float32 (ne10_fft_cpx_float32_t * Fout, \ - ne10_fft_cpx_float32_t * Fin) \ -{ \ - ne10_int32_t i, j, k, p; \ - ne10_int32_t fstride; \ - ne10_int32_t stride1; \ - ne10_int32_t stride2; \ - ne10_int32_t stride3; \ - ne10_int32_t t0_r, t0_i, t1_r, t1_i, t2_r, t2_i, t3_r, t3_i, t4_r, t4_i, t5_r, t5_i; \ - ne10_fft_cpx_float32_t * F; \ - ne10_fft_cpx_float32_t * in; \ - fstride = 256; \ - F = Fout; \ - in = Fin; \ - stride1 = fstride >> 2; \ - stride2 = stride1 >> 2; \ - stride3 = stride2 >> 2; \ - for (k = 0; k < 4; k++) \ - { \ - for (j = 0; j < 4; j++) \ - { \ - for (i = 0; i < 4; i++) \ - { \ - in = &Fin[k + j * stride3 + i * stride2]; \ - for (p = 0; p < 4; p++) \ - { \ - ne10_data_bitreversal_butterfly4_##inverse##_float32 (F, in, fstride, stride1); \ - in += stride1; \ - F += 4; \ - } \ - } \ - } \ - } \ -} - -#define ne10_data_bitreversal4096_butterfly4_float32(inverse) \ -static void ne10_data_bitreversal4096_butterfly4_##inverse##_float32 (ne10_fft_cpx_float32_t * Fout, \ - ne10_fft_cpx_float32_t * Fin) \ -{ \ - ne10_int32_t i, j, k, l, p; \ - ne10_int32_t fstride; \ - ne10_int32_t stride1; \ - ne10_int32_t stride2; \ - ne10_int32_t stride3; \ - ne10_int32_t stride4; \ - ne10_int32_t t0_r, t0_i, t1_r, t1_i, t2_r, t2_i, t3_r, t3_i, t4_r, t4_i, t5_r, t5_i; \ - ne10_fft_cpx_float32_t * F; \ - ne10_fft_cpx_float32_t * in; \ - fstride = 1024; \ - F = Fout; \ - in = Fin; \ - stride1 = fstride >> 2; \ - stride2 = stride1 >> 2; \ - stride3 = stride2 >> 2; \ - stride4 = stride3 >> 2; \ - for (l = 0; l < 4; l++) \ - { \ - for (k = 0; k < 4; k++) \ - { \ - for (j = 0; j < 4; j++) \ - { \ - for (i = 0; i < 4; i++) \ - { \ - in = &Fin[l + k*stride4 + j * stride3 + i * stride2]; \ - for (p = 0; p < 4; p++) \ - { \ - ne10_data_bitreversal_butterfly4_##inverse##_float32 (F, in, fstride, stride1); \ - in += stride1; \ - F += 4; \ - } \ - } \ - } \ - } \ - } \ -} - -#define ne10_butterfly_length_even_power2_float32_neon(inverse) \ -static void ne10_butterfly_##inverse##_length_even_power2_float32_neon (ne10_fft_cpx_float32_t * Fout, \ - ne10_fft_cpx_float32_t * Fin, \ - ne10_int32_t * factors, \ - ne10_fft_cpx_float32_t * twiddles) \ -{ \ - ne10_int32_t fstride = factors[1]; \ - if (fstride == 16) \ - ne10_data_bitreversal64_butterfly4_##inverse##_float32 (Fout, Fin); \ - else if (fstride == 64) \ - ne10_data_bitreversal256_butterfly4_##inverse##_float32 (Fout, Fin); \ - else if (fstride == 256) \ - ne10_data_bitreversal1024_butterfly4_##inverse##_float32 (Fout, Fin); \ - else if (fstride == 1024) \ - ne10_data_bitreversal4096_butterfly4_##inverse##_float32 (Fout, Fin); \ - ne10_radix4_butterfly_##inverse##_float32_neon (Fout, factors, twiddles);\ -} - -ne10_data_bitreversal64_butterfly4_float32 (forward) -ne10_data_bitreversal64_butterfly4_float32 (backward) -ne10_data_bitreversal256_butterfly4_float32 (forward) -ne10_data_bitreversal256_butterfly4_float32 (backward) -ne10_data_bitreversal1024_butterfly4_float32 (forward) -ne10_data_bitreversal1024_butterfly4_float32 (backward) -ne10_data_bitreversal4096_butterfly4_float32 (forward) -ne10_data_bitreversal4096_butterfly4_float32 (backward) - -ne10_butterfly_length_even_power2_float32_neon (forward) -ne10_butterfly_length_even_power2_float32_neon (backward) - -static inline ne10_data_bitreversal_butterfly2_float32_neon (ne10_fft_cpx_float32_t * out, - ne10_fft_cpx_float32_t * in, - ne10_int32_t fstride, - ne10_int32_t stride1) -{ - float32x2_t d_in0_0, d_in0_1; - float32x2_t d_in1_0, d_in1_1; - float32x2_t d_in2_0, d_in2_1; - float32x2_t d_in3_0, d_in3_1; - float32x4_t q_in01_0, q_in01_1, q_in23_0, q_in23_1; - float32x4_t q_out01_0, q_out01_1, q_out23_0, q_out23_1; - /* load loop */ - d_in0_0 = vld1_f32 ( (float32_t*) (&in[0])); - d_in0_1 = vld1_f32 ( (float32_t*) (&in[fstride])); - d_in1_0 = vld1_f32 ( (float32_t*) (&in[stride1])); - d_in1_1 = vld1_f32 ( (float32_t*) (&in[stride1 + fstride])); - d_in2_0 = vld1_f32 ( (float32_t*) (&in[stride1 * 2])); - d_in2_1 = vld1_f32 ( (float32_t*) (&in[stride1 * 2 + fstride])); - d_in3_0 = vld1_f32 ( (float32_t*) (&in[stride1 * 3])); - d_in3_1 = vld1_f32 ( (float32_t*) (&in[stride1 * 3 + fstride])); - /* calculate loop */ - q_in01_0 = vcombine_f32 (d_in0_0, d_in1_0); - q_in01_1 = vcombine_f32 (d_in0_1, d_in1_1); - q_in23_0 = vcombine_f32 (d_in2_0, d_in3_0); - q_in23_1 = vcombine_f32 (d_in2_1, d_in3_1); - q_out01_0 = vaddq_f32 (q_in01_0, q_in01_1); - q_out01_1 = vsubq_f32 (q_in01_0, q_in01_1); - q_out23_0 = vaddq_f32 (q_in23_0, q_in23_1); - q_out23_1 = vsubq_f32 (q_in23_0, q_in23_1); - /* store loop */ - vst1q_f32 ( (float32_t*) (&out[0]), vcombine_f32 (vget_low_f32 (q_out01_0), vget_low_f32 (q_out01_1))); - vst1q_f32 ( (float32_t*) (&out[2]), vcombine_f32 (vget_high_f32 (q_out01_0), vget_high_f32 (q_out01_1))); - vst1q_f32 ( (float32_t*) (&out[4]), vcombine_f32 (vget_low_f32 (q_out23_0), vget_low_f32 (q_out23_1))); - vst1q_f32 ( (float32_t*) (&out[6]), vcombine_f32 (vget_high_f32 (q_out23_0), vget_high_f32 (q_out23_1))); -} - -static void ne10_data_bitreversal32_float32_neon (ne10_fft_cpx_float32_t * Fout, - ne10_fft_cpx_float32_t * Fin) -{ - ne10_int32_t i; - ne10_int32_t fstride; - - ne10_fft_cpx_float32_t * F; - ne10_fft_cpx_float32_t * in; - ne10_int32_t stride1; - ne10_int32_t stride2; - - // get the input, resort, calculate the first stage - fstride = 16; - - F = Fout; - in = Fin; - stride1 = fstride >> 2; - stride2 = stride1 >> 2; - for (i = 0; i < 4; i++) - { - ne10_data_bitreversal_butterfly2_float32_neon (F, in, fstride, stride1); - F += 8; - in += stride2; - } -} - -static void ne10_data_bitreversal128_float32_neon (ne10_fft_cpx_float32_t * Fout, - ne10_fft_cpx_float32_t * Fin) -{ - ne10_int32_t i, j; - ne10_int32_t fstride; - ne10_int32_t stride1; - ne10_int32_t stride2; - - ne10_fft_cpx_float32_t * F; - ne10_fft_cpx_float32_t * in; - - // get the input, resort, calculate the first stage - fstride = 64; - F = Fout; - stride1 = fstride >> 2; - stride2 = stride1 >> 2; - for (j = 0; j < 4; j++) - { - in = &Fin[j]; - for (i = 0; i < 4; i++) - { - ne10_data_bitreversal_butterfly2_float32_neon (F, in, fstride, stride1); - F += 8; - in += stride2; - } - } -} - - -static void ne10_data_bitreversal512_float32_neon (ne10_fft_cpx_float32_t * Fout, - ne10_fft_cpx_float32_t * Fin) -{ - ne10_int32_t i, j, k; - ne10_int32_t fstride; - ne10_int32_t stride1; - ne10_int32_t stride2; - ne10_int32_t stride3; - - ne10_fft_cpx_float32_t * F; - ne10_fft_cpx_float32_t * in; - - // get the input, resort, calculate the first stage - fstride = 256; - - F = Fout; - stride1 = fstride >> 2; - stride2 = stride1 >> 2; - stride3 = stride2 >> 2; - for (k = 0; k < 4; k++) - { - for (j = 0; j < 4; j++) - { - in = &Fin[k + j * stride3]; - for (i = 0; i < 4; i++) - { - ne10_data_bitreversal_butterfly2_float32_neon (F, in, fstride, stride1); - F += 8; - in += stride2; - } - } - } -} -static void ne10_data_bitreversal2048_float32_neon (ne10_fft_cpx_float32_t * Fout, - ne10_fft_cpx_float32_t * Fin) -{ - ne10_int32_t i, j, k, l; - ne10_int32_t fstride; - ne10_int32_t stride1; - ne10_int32_t stride2; - ne10_int32_t stride3; - ne10_int32_t stride4; - - ne10_fft_cpx_float32_t * F; - ne10_fft_cpx_float32_t * in; - - // get the input, resort, calculate the first stage - fstride = 1024; - - F = Fout; - stride1 = fstride >> 2; - stride2 = stride1 >> 2; - stride3 = stride2 >> 2; - stride4 = stride3 >> 2; - for (l = 0; l < 4; l++) - { - for (k = 0; k < 4; k++) - { - for (j = 0; j < 4; j++) - { - in = &Fin[l + k * stride4 + j * stride3]; - for (i = 0; i < 4; i++) - { - ne10_data_bitreversal_butterfly2_float32_neon (F, in, fstride, stride1); - F += 8; - in += stride2; - } - } - } - } -} - - -#define ne10_butterfly_length_odd_power2_float32_neon(inverse) \ -static void ne10_butterfly_##inverse##_length_odd_power2_float32_neon (ne10_fft_cpx_float32_t * Fout, \ - ne10_fft_cpx_float32_t * Fin, \ - ne10_int32_t * factors, \ - ne10_fft_cpx_float32_t * twiddles) \ -{ \ - ne10_int32_t fstride = factors[1]; \ - ne10_int32_t i; \ - if (fstride == 16) \ - ne10_data_bitreversal32_float32_neon (Fout, Fin); \ - else if (fstride == 64) \ - ne10_data_bitreversal128_float32_neon (Fout, Fin); \ - else if (fstride == 256) \ - ne10_data_bitreversal512_float32_neon (Fout, Fin); \ - else if (fstride == 1024) \ - ne10_data_bitreversal2048_float32_neon (Fout, Fin); \ - ne10_radix2_butterfly_##inverse##_float32_neon (Fout, factors, twiddles); \ -} - -ne10_butterfly_length_odd_power2_float32_neon (forward) -ne10_butterfly_length_odd_power2_float32_neon (backward) - static void ne10_fft16_forward_float32_neon (ne10_fft_cpx_float32_t * Fout, ne10_fft_cpx_float32_t * Fin, ne10_fft_cpx_float32_t * twiddles) @@ -857,46 +459,6 @@ static void ne10_fft16_backward_float32_neon (ne10_fft_cpx_float32_t * Fout, vst2q_f32 (p_dst3, q2_out_cdef); } -static void ne10_mixed_radix_butterfly_forward_float32_neon (ne10_fft_cpx_float32_t * Fout, - ne10_int32_t * factors, - ne10_fft_cpx_float32_t * twiddles) -{ - ne10_int32_t stage_count; - - // the first stage - stage_count = factors[0]; - if (factors[2 * stage_count] == 2) - { - //radix 2/4, FFT length is 2^n (n is odd) - ne10_mixed_radix_butterfly_length_odd_power2_float32_neon (Fout, factors, twiddles); - } - else if (factors[2 * stage_count] == 4) - { - //radix 4, FFT length is 2^n (n is even) - ne10_mixed_radix_butterfly_length_even_power2_float32_neon (Fout, factors, twiddles); - } -} - -static void ne10_mixed_radix_butterfly_backward_float32_neon (ne10_fft_cpx_float32_t * Fout, - ne10_int32_t * factors, - ne10_fft_cpx_float32_t * twiddles) -{ - ne10_int32_t stage_count; - - stage_count = factors[0]; - if (factors[2 * stage_count] == 2) - { - //radix 2/4, FFT length is 2^n (n is odd) - ne10_mixed_radix_butterfly_inverse_length_odd_power2_float32_neon (Fout, factors, twiddles); - } - else if (factors[2 * stage_count] == 4) - { - //radix 4, FFT length is 2^n (n is even) - ne10_mixed_radix_butterfly_inverse_length_even_power2_float32_neon (Fout, factors, twiddles); - } -} - - void ne10_fft_split_r2c_1d_float32_neon (ne10_fft_cpx_float32_t *dst, const ne10_fft_cpx_float32_t *src, ne10_fft_cpx_float32_t *twiddles, @@ -1104,6 +666,7 @@ void ne10_fft_split_c2r_1d_float32_neon (ne10_fft_cpx_float32_t *dst, * Otherwise, this FFT is an out-of-place algorithm. When you want to get an in-place FFT, it creates a temp buffer as * output buffer and then copies the temp buffer back to input buffer. For the usage of this function, please check test/test_suite_fft_float32.c */ + void ne10_fft_c2c_1d_float32_neon (ne10_fft_cpx_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_cpx_float32_t *twiddles, @@ -1124,21 +687,8 @@ void ne10_fft_c2c_1d_float32_neon (ne10_fft_cpx_float32_t *fout, case 16: ne10_fft16_backward_float32_neon (fout, fin, twiddles); break; - case 32: - case 128: - case 512: - case 2048: - ne10_butterfly_backward_length_odd_power2_float32_neon (fout, fin, factors, twiddles); - break; - case 64: - case 256: - case 1024: - case 4096: - ne10_butterfly_backward_length_even_power2_float32_neon (fout, fin, factors, twiddles); - break; default: - ne10_data_bitreversal_float32 (fout, fin, 1, &factors[2]); - ne10_mixed_radix_butterfly_backward_float32_neon (fout, factors, twiddles); + ne10_mixed_radix_fft_backward_float32_neon (fout, fin, factors, twiddles); break; } } @@ -1155,21 +705,8 @@ void ne10_fft_c2c_1d_float32_neon (ne10_fft_cpx_float32_t *fout, case 16: ne10_fft16_forward_float32_neon (fout, fin, twiddles); break; - case 32: - case 128: - case 512: - case 2048: - ne10_butterfly_forward_length_odd_power2_float32_neon (fout, fin, factors, twiddles); - break; - case 64: - case 256: - case 1024: - case 4096: - ne10_butterfly_forward_length_even_power2_float32_neon (fout, fin, factors, twiddles); - break; default: - ne10_data_bitreversal_float32 (fout, fin, 1, &factors[2]); - ne10_mixed_radix_butterfly_forward_float32_neon (fout, factors, twiddles); + ne10_mixed_radix_fft_forward_float32_neon (fout, fin, factors, twiddles); break; } } diff --git a/modules/dsp/NE10_fft_float32.neon.s b/modules/dsp/NE10_fft_float32.neon.s index 1c0874f..254552b 100644 --- a/modules/dsp/NE10_fft_float32.neon.s +++ b/modules/dsp/NE10_fft_float32.neon.s @@ -35,788 +35,583 @@ /* Registers define*/ /*ARM Registers*/ p_fout .req r0 - p_factors .req r1 - p_twiddles .req r2 - p_fin .req r3 - p_fout0 .req r4 - p_fout1 .req r5 - p_fout2 .req r6 - p_fout3 .req r7 - stage_count .req r8 - fstride .req r9 - mstride .req r10 - count .req r1 - count_f .req r1 - count_m .req r12 - p_tw1 .req r3 - p_tw2 .req r11 - p_tw3 .req r14 - radix .req r5 - tmp0 .req r12 + p_fin .req r1 + p_factors .req r2 + p_twiddles .req r3 + stage_count .req r4 + fstride .req r5 + mstride .req r6 + + radix .req r12 + p_fin0 .req r7 + p_fin1 .req r8 + p_fin2 .req r9 + p_fin3 .req r10 + p_tmp .req r11 + count .req r2 + fstride1 .req r2 + fstep .req r7 + + p_out_ls .req r14 + nstep .req r2 + mstep .req r7 + count_f .req r8 + count_m .req r9 + p_tw1 .req r10 + p_in1 .req r11 + p_out1 .req r12 + tmp0 .req r9 /*NEON variale Declaration for the first stage*/ - d_in0_r01 .dn d0 - d_in0_i01 .dn d2 - d_in1_r01 .dn d4 - d_in1_i01 .dn d6 - d_in0_r23 .dn d1 - d_in0_i23 .dn d3 - d_in1_r23 .dn d5 - d_in1_i23 .dn d7 - q_in0_r0123 .qn q0 - q_in0_i0123 .qn q1 - q_in1_r0123 .qn q2 - q_in1_i0123 .qn q3 - d_out0_r01 .dn d16 - d_out0_i01 .dn d18 - d_out1_r01 .dn d20 - d_out1_i01 .dn d22 - d_out0_r23 .dn d17 - d_out0_i23 .dn d19 - d_out1_r23 .dn d21 - d_out1_i23 .dn d23 - q_out0_r0123 .qn q8 - q_out0_i0123 .qn q9 - q_out1_r0123 .qn q10 - q_out1_i0123 .qn q11 - - d_in0_0 .dn d0 - d_in1_0 .dn d1 - d_in2_0 .dn d2 - d_in3_0 .dn d3 - d_in0_1 .dn d4 - d_in1_1 .dn d5 - d_in2_1 .dn d6 - d_in3_1 .dn d7 q_in0_01 .qn q0 - q_in1_01 .qn q2 - q_in2_01 .qn q1 + q_in1_01 .qn q1 + q_in2_01 .qn q2 q_in3_01 .qn q3 - d_out0_0 .dn d16 - d_out1_0 .dn d17 - d_out2_0 .dn d18 - d_out3_0 .dn d19 - d_out0_1 .dn d20 - d_out1_1 .dn d21 - d_out2_1 .dn d22 - d_out3_1 .dn d23 - q_out0_01 .qn q8 - q_out1_01 .qn q10 - q_out2_01 .qn q9 - q_out3_01 .qn q11 - d_s0 .dn d24 - q_s0_01 .qn q12 - d_s1 .dn d26 - q_s1_01 .qn q13 - d_s2 .dn d28 - q_s2_01 .qn q14 + q_s0_2 .qn q4 + q_s1_2 .qn q5 + q_s2_2 .qn q6 + q_s3_2 .qn q7 + d_s1_r2 .dn d10 + d_s1_i2 .dn d11 + d_s3_r2 .dn d14 + d_s3_i2 .dn d15 + q_out0_2 .qn q8 + q_out1_2 .qn q9 + q_out2_2 .qn q10 + q_out3_2 .qn q11 + d_out1_r15 .dn d18 + d_out1_i15 .dn d19 + d_out3_r37 .dn d22 + d_out3_i37 .dn d23 + + d_in0_r .dn d0 + d_in0_i .dn d1 + d_in1_r .dn d2 + d_in1_i .dn d3 + d_in2_r .dn d4 + d_in2_i .dn d5 + d_in3_r .dn d6 + d_in3_i .dn d7 + d_in4_r .dn d8 + d_in4_i .dn d9 + d_in5_r .dn d10 + d_in5_i .dn d11 + d_in6_r .dn d12 + d_in6_i .dn d13 + d_in7_r .dn d14 + d_in7_i .dn d15 + q_in0 .qn q0 + q_in1 .qn q1 + q_in2 .qn q2 + q_in3 .qn q3 + q_in4 .qn q4 + q_in5 .qn q5 + q_in6 .qn q6 + q_in7 .qn q7 + q_sin0 .qn q8 + q_sin1 .qn q9 + q_sin2 .qn q10 + q_sin3 .qn q11 + q_sin4 .qn q12 + q_sin5 .qn q13 + q_sin6 .qn q14 + q_sin7 .qn q15 + d_sin3_r .dn d22 + d_sin3_i .dn d23 + d_sin5_r .dn d26 + d_sin5_i .dn d27 + d_sin7_r .dn d30 + d_sin7_i .dn d31 + + d_tw_twn .dn d0 + d_s3_r .dn d2 + d_s3_i .dn d3 + d_s7_r .dn d4 + d_s7_i .dn d5 + q_s3 .qn q1 + q_s7 .qn q2 + q_s8 .qn q11 + q_s9 .qn q15 + q_s10 .qn q3 + q_s11 .qn q4 + q_s12 .qn q5 + q_s13 .qn q6 + q_s14 .qn q7 + q_s15 .qn q0 + q_out0 .qn q1 + q_out1 .qn q2 + q_out2 .qn q8 + q_out3 .qn q9 + q_out4 .qn q10 + q_out5 .qn q12 + q_out6 .qn q13 + q_out7 .qn q14 + d_s10_r .dn d6 + d_s10_i .dn d7 + d_s11_r .dn d8 + d_s11_i .dn d9 + d_s14_r .dn d14 + d_s14_i .dn d15 + d_s15_r .dn d0 + d_s15_i .dn d1 + d_out2_r .dn d16 + d_out2_i .dn d17 + d_out3_r .dn d18 + d_out3_i .dn d19 + d_out6_r .dn d26 + d_out6_i .dn d27 + d_out7_r .dn d28 + d_out7_i .dn d29 - /*NEON variale Declaration for mstride loop */ - q_fin0_r .qn q0 - q_fin0_i .qn q1 - q_fin1_r .qn q0 - q_fin1_i .qn q1 - q_tw1_r .qn q2 - q_tw1_i .qn q3 - q_fin2_r .qn q8 - q_fin2_i .qn q9 - q_tw2_r .qn q10 - q_tw2_i .qn q11 - q_fin3_r .qn q4 - q_fin3_i .qn q5 - q_tw3_r .qn q6 - q_tw3_i .qn q7 - q_s0_r .qn q12 - q_s0_i .qn q13 - q_s1_r .qn q14 - q_s1_i .qn q15 - q_s2_r .qn q2 - q_s2_i .qn q10 - q_s5_r .qn q4 - q_s5_i .qn q5 - q_s4_r .qn q6 - q_s4_i .qn q7 - q_s3_r .qn q8 - q_s3_i .qn q9 - q_fout0_r .qn q0 - q_fout0_i .qn q1 - q_fout2_r .qn q2 - q_fout2_i .qn q3 - q_fout1_r .qn q12 - q_fout1_i .qn q13 - q_fout3_r .qn q14 - q_fout3_i .qn q15 - - /*NEON variale Declaration for mstride 2 loop */ - d_tw1_r01 .dn d16 - d_tw2_r01 .dn d17 - d_tw1_i01 .dn d18 - d_tw2_i01 .dn d19 - d_tw3_r01 .dn d20 - d_tw3_i01 .dn d21 - q_fin0_r0123 .qn q0 - q_fin0_i0123 .qn q1 - d_fin0_r01 .dn d0 - d_fin1_r01 .dn d1 - d_fin0_i01 .dn d2 - d_fin1_i01 .dn d3 - d_fin2_r01 .dn d4 - d_fin3_r01 .dn d5 - d_fin2_i01 .dn d6 - d_fin3_i01 .dn d7 - d_fin0_r23 .dn d22 - d_fin1_r23 .dn d23 - d_fin0_i23 .dn d24 - d_fin1_i23 .dn d25 - d_fin2_r23 .dn d26 - d_fin3_r23 .dn d27 - d_fin2_i23 .dn d28 - d_fin3_i23 .dn d29 - q_s0_r0123 .qn q13 - q_s0_i0123 .qn q14 - d_s0_r01 .dn d26 - d_s0_r23 .dn d27 - d_s0_i01 .dn d28 - d_s0_i23 .dn d29 - q_s1_r0123 .qn q5 - q_s1_i0123 .qn q6 - d_s1_r01 .dn d10 - d_s1_r23 .dn d11 - d_s1_i01 .dn d12 - d_s1_i23 .dn d13 - q_s2_r0123 .qn q15 - q_s2_i0123 .qn q4 - d_s2_r01 .dn d30 - d_s2_r23 .dn d31 - d_s2_i01 .dn d8 - d_s2_i23 .dn d9 - q_s5_r0123 .qn q11 - q_s5_i0123 .qn q12 - q_s4_r0123 .qn q5 - q_s4_i0123 .qn q10 - q_s3_r0123 .qn q6 - q_s3_i0123 .qn q7 - q_fout0_r0123 .qn q0 - q_fout0_i0123 .qn q1 - q_fout2_r0123 .qn q2 - q_fout2_i0123 .qn q3 - q_fout1_r0123 .qn q13 - q_fout1_i0123 .qn q14 - q_fout3_r0123 .qn q6 - q_fout3_i0123 .qn q7 - d_fout0_r01 .dn d0 - d_fout1_r01 .dn d1 - d_fout0_i01 .dn d2 - d_fout1_i01 .dn d3 - d_fout2_r01 .dn d4 - d_fout3_r01 .dn d5 - d_fout2_i01 .dn d6 - d_fout3_i01 .dn d7 - d_fout0_r23 .dn d26 - d_fout1_r23 .dn d27 - d_fout0_i23 .dn d28 - d_fout1_i23 .dn d29 - d_fout2_r23 .dn d12 - d_fout3_r23 .dn d13 - d_fout2_i23 .dn d14 - d_fout3_i23 .dn d15 - - d_tmp0 .dn d30 - d_tmp1 .dn d31 - q_tmp .qn q15 - d_tmp2_0 .dn d28 - d_tmp2_1 .dn d29 - q_tmp2 .qn q14 - - .macro RADIX4_BUTTERFLY_P4 - vld2.32 {q_fin1_r, q_fin1_i}, [p_fout1] - vld2.32 {q_tw1_r, q_tw1_i}, [p_tw1]! - vld2.32 {q_fin2_r, q_fin2_i}, [p_fout2] - vld2.32 {q_tw2_r, q_tw2_i}, [p_tw2]! - vld2.32 {q_fin3_r, q_fin3_i}, [p_fout3] - vld2.32 {q_tw3_r, q_tw3_i}, [p_tw3]! - - vmul.f32 q_s0_r, q_fin1_r, q_tw1_r - vmul.f32 q_s0_i, q_fin1_i, q_tw1_r - vmul.f32 q_s1_r, q_fin2_r, q_tw2_r - vmul.f32 q_s1_i, q_fin2_i, q_tw2_r - vmul.f32 q_s2_r, q_fin3_r, q_tw3_r - vmul.f32 q_s2_i, q_fin3_i, q_tw3_r - vmls.f32 q_s0_r, q_fin1_i, q_tw1_i - vmla.f32 q_s0_i, q_fin1_r, q_tw1_i - vmls.f32 q_s1_r, q_fin2_i, q_tw2_i - vmla.f32 q_s1_i, q_fin2_r, q_tw2_i - vld2.32 {q_fin0_r, q_fin0_i}, [p_fout0] - vmls.f32 q_s2_r, q_fin3_i, q_tw3_i - vmla.f32 q_s2_i, q_fin3_r, q_tw3_i - - vsub.f32 q_s5_r, q_fin0_r, q_s1_r - vsub.f32 q_s5_i, q_fin0_i, q_s1_i - vadd.f32 q_fout0_r, q_fin0_r, q_s1_r - vadd.f32 q_fout0_i, q_fin0_i, q_s1_i - - vadd.f32 q_s3_r, q_s0_r, q_s2_r - vadd.f32 q_s3_i, q_s0_i, q_s2_i - vsub.f32 q_s4_r, q_s0_r, q_s2_r - vsub.f32 q_s4_i, q_s0_i, q_s2_i - - vsub.f32 q_fout2_r, q_fout0_r, q_s3_r - vsub.f32 q_fout2_i, q_fout0_i, q_s3_i - vadd.f32 q_fout0_r, q_fout0_r, q_s3_r - vadd.f32 q_fout0_i, q_fout0_i, q_s3_i - - vadd.f32 q_fout1_r, q_s5_r, q_s4_i - vsub.f32 q_fout1_i, q_s5_i, q_s4_r - vsub.f32 q_fout3_r, q_s5_r, q_s4_i - vadd.f32 q_fout3_i, q_s5_i, q_s4_r - - vst2.32 {q_fout2_r, q_fout2_i}, [p_fout2]! - vst2.32 {q_fout0_r, q_fout0_i}, [p_fout0]! - vst2.32 {q_fout1_r, q_fout1_i}, [p_fout1]! - vst2.32 {q_fout3_r, q_fout3_i}, [p_fout3]! - .endm - .macro RADIX4_BUTTERFLY_INVERSE_P4 - vld2.32 {q_fin1_r, q_fin1_i}, [p_fout1] - vld2.32 {q_tw1_r, q_tw1_i}, [p_tw1]! - vld2.32 {q_fin2_r, q_fin2_i}, [p_fout2] - vld2.32 {q_tw2_r, q_tw2_i}, [p_tw2]! - vmul.f32 q_s0_r, q_fin1_r, q_tw1_r - vmul.f32 q_s0_i, q_fin1_i, q_tw1_r - vmla.f32 q_s0_r, q_fin1_i, q_tw1_i - vmls.f32 q_s0_i, q_fin1_r, q_tw1_i - - vld2.32 {q_fin3_r, q_fin3_i}, [p_fout3] - vld2.32 {q_tw3_r, q_tw3_i}, [p_tw3]! - vmul.f32 q_s1_r, q_fin2_r, q_tw2_r - vmul.f32 q_s1_i, q_fin2_i, q_tw2_r - vmla.f32 q_s1_r, q_fin2_i, q_tw2_i - vmls.f32 q_s1_i, q_fin2_r, q_tw2_i - - vld2.32 {q_fin0_r, q_fin0_i}, [p_fout0] - vmul.f32 q_s2_r, q_fin3_r, q_tw3_r - vmul.f32 q_s2_i, q_fin3_i, q_tw3_r - vmla.f32 q_s2_r, q_fin3_i, q_tw3_i - vmls.f32 q_s2_i, q_fin3_r, q_tw3_i - - vsub.f32 q_s5_r, q_fin0_r, q_s1_r - vsub.f32 q_s5_i, q_fin0_i, q_s1_i - vadd.f32 q_fout0_r, q_fin0_r, q_s1_r - vadd.f32 q_fout0_i, q_fin0_i, q_s1_i - - vadd.f32 q_s3_r, q_s0_r, q_s2_r - vadd.f32 q_s3_i, q_s0_i, q_s2_i - vsub.f32 q_s4_r, q_s0_r, q_s2_r - vsub.f32 q_s4_i, q_s0_i, q_s2_i - - vsub.f32 q_fout2_r, q_fout0_r, q_s3_r - vsub.f32 q_fout2_i, q_fout0_i, q_s3_i - vadd.f32 q_fout0_r, q_fout0_r, q_s3_r - vadd.f32 q_fout0_i, q_fout0_i, q_s3_i - vst2.32 {q_fout2_r, q_fout2_i}, [p_fout2]! - - vsub.f32 q_fout1_r, q_s5_r, q_s4_i - vadd.f32 q_fout1_i, q_s5_i, q_s4_r - vadd.f32 q_fout3_r, q_s5_r, q_s4_i - vsub.f32 q_fout3_i, q_s5_i, q_s4_r - vst2.32 {q_fout0_r, q_fout0_i}, [p_fout0]! - vst2.32 {q_fout1_r, q_fout1_i}, [p_fout1]! - vst2.32 {q_fout3_r, q_fout3_i}, [p_fout3]! + /*NEON variale Declaration for mstride loop */ + d_fin0_r .dn d0 + d_fin0_i .dn d1 + d_fin1_r .dn d2 + d_fin1_i .dn d3 + d_fin2_r .dn d4 + d_fin2_i .dn d5 + d_fin3_r .dn d6 + d_fin3_i .dn d7 + d_tw0_r .dn d8 + d_tw0_i .dn d9 + d_tw1_r .dn d10 + d_tw1_i .dn d11 + d_tw2_r .dn d12 + d_tw2_i .dn d13 + q_fin0 .qn q0 + q_scr0 .qn q15 + q_scr1 .qn q7 + q_scr2 .qn q8 + q_scr3 .qn q9 + q_scr4 .qn q10 + q_scr5 .qn q11 + q_scr6 .qn q12 + q_scr7 .qn q13 + d_scr1_r .dn d14 + d_scr1_i .dn d15 + d_scr2_r .dn d16 + d_scr2_i .dn d17 + d_scr3_r .dn d18 + d_scr3_i .dn d19 + d_scr5_r .dn d22 + d_scr5_i .dn d23 + d_scr7_r .dn d26 + d_scr7_i .dn d27 + q_fout0 .qn q7 + q_fout2 .qn q8 + d_fout0_r .dn d14 + d_fout0_i .dn d15 + d_fout1_r .dn d28 + d_fout1_i .dn d29 + d_fout2_r .dn d16 + d_fout2_i .dn d17 + d_fout3_r .dn d30 + d_fout3_i .dn d31 + + .macro BUTTERFLY4X2_WITHOUT_TWIDDLES inverse + + /* radix 4 butterfly without twiddles */ + vadd.f32 q_s0_2, q_in0_01, q_in2_01 + vsub.f32 q_s1_2, q_in0_01, q_in2_01 + vld2.32 {q_in0_01}, [p_fin0:64]! + vld2.32 {q_in2_01}, [p_fin2:64]! + vadd.f32 q_s2_2, q_in1_01, q_in3_01 + vsub.f32 q_s3_2, q_in1_01, q_in3_01 + vld2.32 {q_in1_01}, [p_fin1:64]! + vld2.32 {q_in3_01}, [p_fin3:64]! + + vsub.f32 q_out2_2, q_s0_2, q_s2_2 + vadd.f32 q_out0_2, q_s0_2, q_s2_2 + + .ifeqs "\inverse", "TRUE" + vsub.f32 d_out1_r15, d_s1_r2, d_s3_i2 + vadd.f32 d_out1_i15, d_s1_i2, d_s3_r2 + vadd.f32 d_out3_r37, d_s1_r2, d_s3_i2 + vsub.f32 d_out3_i37, d_s1_i2, d_s3_r2 + .else + vadd.f32 d_out1_r15, d_s1_r2, d_s3_i2 + vsub.f32 d_out1_i15, d_s1_i2, d_s3_r2 + vsub.f32 d_out3_r37, d_s1_r2, d_s3_i2 + vadd.f32 d_out3_i37, d_s1_i2, d_s3_r2 + .endif + + vtrn.32 q_out0_2, q_out1_2 + vtrn.32 q_out2_2, q_out3_2 + vst2.32 {q_out0_2}, [p_tmp]! + vst2.32 {q_out2_2}, [p_tmp]! + vst2.32 {q_out1_2}, [p_tmp]! + vst2.32 {q_out3_2}, [p_tmp]! .endm - .macro RADIX24_BUTTERFLY_P4 - vld2.32 {d_tw3_r01, d_tw3_i01}, [p_tw1] - vld2.32 {d_fin0_r01, d_fin1_r01, d_fin0_i01, d_fin1_i01}, [p_fout0]! - vld2.32 {d_fin2_r01, d_fin3_r01, d_fin2_i01, d_fin3_i01}, [p_fout0], tmp0 - vld2.32 {d_fin0_r23, d_fin1_r23, d_fin0_i23, d_fin1_i23}, [p_fout1]! - vld2.32 {d_fin2_r23, d_fin3_r23, d_fin2_i23, d_fin3_i23}, [p_fout1], tmp0 - - vmul.f32 d_s2_r01, d_fin3_r01, d_tw3_r01 - vmul.f32 d_s2_i01, d_fin3_r01, d_tw3_i01 - vmul.f32 d_s2_r23, d_fin3_r23, d_tw3_r01 - vmul.f32 d_s2_i23, d_fin3_r23, d_tw3_i01 - vmls.f32 d_s2_r01, d_fin3_i01, d_tw3_i01 - vmla.f32 d_s2_i01, d_fin3_i01, d_tw3_r01 - vmls.f32 d_s2_r23, d_fin3_i23, d_tw3_i01 - vmla.f32 d_s2_i23, d_fin3_i23, d_tw3_r01 - - vmul.f32 d_s1_r01, d_fin2_r01, d_tw2_r01 - vmul.f32 d_s1_r23, d_fin2_r23, d_tw2_r01 - vmul.f32 d_s1_i01, d_fin2_r01, d_tw2_i01 - vmul.f32 d_s1_i23, d_fin2_r23, d_tw2_i01 - vmls.f32 d_s1_r01, d_fin2_i01, d_tw2_i01 - vmls.f32 d_s1_r23, d_fin2_i23, d_tw2_i01 - vmla.f32 d_s1_i01, d_fin2_i01, d_tw2_r01 - vmla.f32 d_s1_i23, d_fin2_i23, d_tw2_r01 - - vmul.f32 d_s0_r01, d_fin1_r01, d_tw1_r01 - vmul.f32 d_s0_r23, d_fin1_r23, d_tw1_r01 - vmul.f32 d_s0_i01, d_fin1_r01, d_tw1_i01 - vmul.f32 d_s0_i23, d_fin1_r23, d_tw1_i01 - vmls.f32 d_s0_r01, d_fin1_i01, d_tw1_i01 - vmls.f32 d_s0_r23, d_fin1_i23, d_tw1_i01 - vmla.f32 d_s0_i01, d_fin1_i01, d_tw1_r01 - vmla.f32 d_s0_i23, d_fin1_i23, d_tw1_r01 - - vmov d_fin1_r01, d_fin0_r23 - vmov d_fin1_i01, d_fin0_i23 - - vsub.f32 q_s5_r0123, q_fin0_r0123, q_s1_r0123 - vsub.f32 q_s5_i0123, q_fin0_i0123, q_s1_i0123 - vadd.f32 q_fout0_r0123, q_fin0_r0123, q_s1_r0123 - vadd.f32 q_fout0_i0123, q_fin0_i0123, q_s1_i0123 - - vadd.f32 q_s3_r0123, q_s0_r0123, q_s2_r0123 - vadd.f32 q_s3_i0123, q_s0_i0123, q_s2_i0123 - vsub.f32 q_s4_r0123, q_s0_r0123, q_s2_r0123 - vsub.f32 q_s4_i0123, q_s0_i0123, q_s2_i0123 - vsub.f32 q_fout2_r0123, q_fout0_r0123, q_s3_r0123 - vsub.f32 q_fout2_i0123, q_fout0_i0123, q_s3_i0123 - vadd.f32 q_fout0_r0123, q_fout0_r0123, q_s3_r0123 - vadd.f32 q_fout0_i0123, q_fout0_i0123, q_s3_i0123 - - vadd.f32 q_fout1_r0123, q_s5_r0123, q_s4_i0123 - vsub.f32 q_fout1_i0123, q_s5_i0123, q_s4_r0123 - vsub.f32 q_fout3_r0123, q_s5_r0123, q_s4_i0123 - vadd.f32 q_fout3_i0123, q_s5_i0123, q_s4_r0123 - - vswp d_fout1_r01, d_fout0_r23 - vswp d_fout1_i01, d_fout0_i23 - vswp d_fout3_r01, d_fout2_r23 - vswp d_fout3_i01, d_fout2_i23 - - vst2.32 {d_fout0_r01, d_fout1_r01, d_fout0_i01, d_fout1_i01}, [p_fout2]! - vst2.32 {d_fout0_r23, d_fout1_r23, d_fout0_i23, d_fout1_i23}, [p_fout3]! - vst2.32 {d_fout2_r01, d_fout3_r01, d_fout2_i01, d_fout3_i01}, [p_fout2], tmp0 - vst2.32 {d_fout2_r23, d_fout3_r23, d_fout2_i23, d_fout3_i23}, [p_fout3], tmp0 - .endm + .macro BUTTERFLY4X2_WITH_TWIDDLES inverse + + sub p_in1, p_in1, nstep, lsl #2 + add p_in1, p_in1, #16 + sub p_tw1, p_tw1, mstep, lsl #1 + add p_tw1, p_tw1, #16 + vmov q_scr0, q_fin0 + vmul.f32 d_scr1_r, d_fin1_r, d_tw0_r + vmul.f32 d_scr1_i, d_fin1_i, d_tw0_r + vmul.f32 d_scr2_r, d_fin2_r, d_tw1_r + vmul.f32 d_scr2_i, d_fin2_i, d_tw1_r + vmul.f32 d_scr3_r, d_fin3_r, d_tw2_r + vmul.f32 d_scr3_i, d_fin3_i, d_tw2_r + vld2.32 {d_fin0_r, d_fin0_i}, [p_in1:64], nstep + + .ifeqs "\inverse", "TRUE" + vmla.f32 d_scr1_r, d_fin1_i, d_tw0_i + vmls.f32 d_scr1_i, d_fin1_r, d_tw0_i + vmla.f32 d_scr2_r, d_fin2_i, d_tw1_i + vmls.f32 d_scr2_i, d_fin2_r, d_tw1_i + vmla.f32 d_scr3_r, d_fin3_i, d_tw2_i + vmls.f32 d_scr3_i, d_fin3_r, d_tw2_i + .else + vmls.f32 d_scr1_r, d_fin1_i, d_tw0_i + vmla.f32 d_scr1_i, d_fin1_r, d_tw0_i + vmls.f32 d_scr2_r, d_fin2_i, d_tw1_i + vmla.f32 d_scr2_i, d_fin2_r, d_tw1_i + vmls.f32 d_scr3_r, d_fin3_i, d_tw2_i + vmla.f32 d_scr3_i, d_fin3_r, d_tw2_i + .endif + + vld2.32 {d_fin1_r, d_fin1_i}, [p_in1:64], nstep + vld2.32 {d_tw0_r, d_tw0_i}, [p_tw1:64], mstep + vld2.32 {d_fin2_r, d_fin2_i}, [p_in1:64], nstep + vld2.32 {d_fin3_r, d_fin3_i}, [p_in1:64], nstep + vld2.32 {d_tw1_r, d_tw1_i}, [p_tw1:64], mstep + vld2.32 {d_tw2_r, d_tw2_i}, [p_tw1:64] + + vadd.f32 q_scr4, q_scr0, q_scr2 + vsub.f32 q_scr5, q_scr0, q_scr2 + vadd.f32 q_scr6, q_scr1, q_scr3 + vsub.f32 q_scr7, q_scr1, q_scr3 + + vadd.f32 q_fout0, q_scr4, q_scr6 + vsub.f32 q_fout2, q_scr4, q_scr6 + + .ifeqs "\inverse", "TRUE" + vsub.f32 d_fout1_r, d_scr5_r, d_scr7_i + vadd.f32 d_fout1_i, d_scr5_i, d_scr7_r + vadd.f32 d_fout3_r, d_scr5_r, d_scr7_i + vsub.f32 d_fout3_i, d_scr5_i, d_scr7_r + .else + vadd.f32 d_fout1_r, d_scr5_r, d_scr7_i + vsub.f32 d_fout1_i, d_scr5_i, d_scr7_r + vsub.f32 d_fout3_r, d_scr5_r, d_scr7_i + vadd.f32 d_fout3_i, d_scr5_i, d_scr7_r + .endif + + vst2.32 {d_fout0_r, d_fout0_i}, [p_out1], mstep + vst2.32 {d_fout1_r, d_fout1_i}, [p_out1], mstep + vst2.32 {d_fout2_r, d_fout2_i}, [p_out1], mstep + vst2.32 {d_fout3_r, d_fout3_i}, [p_out1], mstep + sub p_out1, p_out1, mstep, lsl #2 + add p_out1, p_out1, #16 - .macro RADIX24_BUTTERFLY_INVERSE_P4 - vld2.32 {d_tw3_r01, d_tw3_i01}, [p_tw1] - vld2.32 {d_fin0_r01, d_fin1_r01, d_fin0_i01, d_fin1_i01}, [p_fout0]! - vld2.32 {d_fin2_r01, d_fin3_r01, d_fin2_i01, d_fin3_i01}, [p_fout0], tmp0 - vld2.32 {d_fin0_r23, d_fin1_r23, d_fin0_i23, d_fin1_i23}, [p_fout1]! - vld2.32 {d_fin2_r23, d_fin3_r23, d_fin2_i23, d_fin3_i23}, [p_fout1], tmp0 - - vmul.f32 d_s2_r01, d_fin3_r01, d_tw3_r01 - vmul.f32 d_s2_i01, d_fin3_i01, d_tw3_r01 - vmul.f32 d_s2_r23, d_fin3_r23, d_tw3_r01 - vmul.f32 d_s2_i23, d_fin3_i23, d_tw3_r01 - vmla.f32 d_s2_r01, d_fin3_i01, d_tw3_i01 - vmls.f32 d_s2_i01, d_fin3_r01, d_tw3_i01 - vmla.f32 d_s2_r23, d_fin3_i23, d_tw3_i01 - vmls.f32 d_s2_i23, d_fin3_r23, d_tw3_i01 - - vmul.f32 d_s1_r01, d_fin2_r01, d_tw2_r01 - vmul.f32 d_s1_r23, d_fin2_r23, d_tw2_r01 - vmul.f32 d_s1_i01, d_fin2_i01, d_tw2_r01 - vmul.f32 d_s1_i23, d_fin2_i23, d_tw2_r01 - vmla.f32 d_s1_r01, d_fin2_i01, d_tw2_i01 - vmla.f32 d_s1_r23, d_fin2_i23, d_tw2_i01 - vmls.f32 d_s1_i01, d_fin2_r01, d_tw2_i01 - vmls.f32 d_s1_i23, d_fin2_r23, d_tw2_i01 - - vmul.f32 d_s0_r01, d_fin1_r01, d_tw1_r01 - vmul.f32 d_s0_r23, d_fin1_r23, d_tw1_r01 - vmul.f32 d_s0_i01, d_fin1_i01, d_tw1_r01 - vmul.f32 d_s0_i23, d_fin1_i23, d_tw1_r01 - vmla.f32 d_s0_r01, d_fin1_i01, d_tw1_i01 - vmla.f32 d_s0_r23, d_fin1_i23, d_tw1_i01 - vmls.f32 d_s0_i01, d_fin1_r01, d_tw1_i01 - vmls.f32 d_s0_i23, d_fin1_r23, d_tw1_i01 - - vmov d_fin1_r01, d_fin0_r23 - vmov d_fin1_i01, d_fin0_i23 - - vsub.f32 q_s5_r0123, q_fin0_r0123, q_s1_r0123 - vsub.f32 q_s5_i0123, q_fin0_i0123, q_s1_i0123 - vadd.f32 q_fout0_r0123, q_fin0_r0123, q_s1_r0123 - vadd.f32 q_fout0_i0123, q_fin0_i0123, q_s1_i0123 - - vadd.f32 q_s3_r0123, q_s0_r0123, q_s2_r0123 - vadd.f32 q_s3_i0123, q_s0_i0123, q_s2_i0123 - vsub.f32 q_s4_r0123, q_s0_r0123, q_s2_r0123 - vsub.f32 q_s4_i0123, q_s0_i0123, q_s2_i0123 - vsub.f32 q_fout2_r0123, q_fout0_r0123, q_s3_r0123 - vsub.f32 q_fout2_i0123, q_fout0_i0123, q_s3_i0123 - vadd.f32 q_fout0_r0123, q_fout0_r0123, q_s3_r0123 - vadd.f32 q_fout0_i0123, q_fout0_i0123, q_s3_i0123 - - vsub.f32 q_fout1_r0123, q_s5_r0123, q_s4_i0123 - vadd.f32 q_fout1_i0123, q_s5_i0123, q_s4_r0123 - vadd.f32 q_fout3_r0123, q_s5_r0123, q_s4_i0123 - vsub.f32 q_fout3_i0123, q_s5_i0123, q_s4_r0123 - - vswp d_fout1_r01, d_fout0_r23 - vswp d_fout1_i01, d_fout0_i23 - vswp d_fout3_r01, d_fout2_r23 - vswp d_fout3_i01, d_fout2_i23 - - vst2.32 {d_fout0_r01, d_fout1_r01, d_fout0_i01, d_fout1_i01}, [p_fout2]! - vst2.32 {d_fout0_r23, d_fout1_r23, d_fout0_i23, d_fout1_i23}, [p_fout3]! - vst2.32 {d_fout2_r01, d_fout3_r01, d_fout2_i01, d_fout3_i01}, [p_fout2], tmp0 - vst2.32 {d_fout2_r23, d_fout3_r23, d_fout2_i23, d_fout3_i23}, [p_fout3], tmp0 .endm - .align 4 - .global ne10_radix4_butterfly_forward_float32_neon - .thumb - .thumb_func - -ne10_radix4_butterfly_forward_float32_neon: - - push {r4-r12,lr} - - ldr stage_count, [p_factors] /* get factors[0]---stage_count */ - ldr fstride, [p_factors, #4] /* get factors[1]---fstride */ - add p_factors, p_factors, stage_count, lsl #3 /* get the address of factors[2*stage_count] */ - ldr mstride, [p_factors, #-4] /* get factors[2*stage_count-1]--- mstride */ - sub stage_count, stage_count, #1 - - /* loop of the stages */ -.L_ne10_radix4_butterfly_forward_stages: - lsr fstride, fstride, #2 - - /* loop of fstride */ - mov count_f, fstride -.L_ne10_radix4_butterfly_forward_stages_fstride: - sub tmp0, fstride, count_f - mul tmp0, tmp0, mstride - add p_fout0, p_fout, tmp0, lsl #5 - add p_fout2, p_fout0, mstride, lsl #4 /* get the address of F[mstride*2] */ - add p_fout1, p_fout0, mstride, lsl #3 /* get the address of F[mstride] */ - add p_fout3, p_fout2, mstride, lsl #3 /* get the address of F[mstride*3] */ - mov p_tw1, p_twiddles - add p_tw2, p_tw1, mstride, lsl #3 /* get the address of tw2 */ - add p_tw3, p_tw1, mstride, lsl #4 /* get the address of tw3 */ - - /* loop of mstride */ - mov count_m, mstride - -.L_ne10_radix4_butterfly_forward_stages_mstride: - - RADIX4_BUTTERFLY_P4 - - subs count_m, count_m, #4 - bgt .L_ne10_radix4_butterfly_forward_stages_mstride - - /* end of mstride_loop */ + .macro BUTTERFLY8X2_WITHOUT_TWIDDLES inverse + /** + * q_in0: Fin1[0] + * q_in1: Fin1[0 + fstride] + * q_in2: Fin1[fstride1] + * q_in3: Fin1[fstride1 + fstride] + * q_in4: Fin1[fstride1*2] + * q_in5: Fin1[fstride1*2 + fstride] + * q_in6: Fin1[fstride1*3] + * q_in7: Fin1[fstride1*3 + fstride] + * + */ - subs count_f, count_f, #1 - bgt .L_ne10_radix4_butterfly_forward_stages_fstride + ldr tmp0, =TW_81 + vld2.32 {d_in0_r, d_in0_i}, [p_in1:64], fstep + vld2.32 {d_in2_r, d_in2_i}, [p_in1:64], fstep + vld2.32 {d_in4_r, d_in4_i}, [p_in1:64], fstep + vld2.32 {d_in6_r, d_in6_i}, [p_in1:64], fstep + vld2.32 {d_in1_r, d_in1_i}, [p_in1:64], fstep + vld2.32 {d_in3_r, d_in3_i}, [p_in1:64], fstep + vld2.32 {d_in5_r, d_in5_i}, [p_in1:64], fstep + vld2.32 {d_in7_r, d_in7_i}, [p_in1:64], fstep + + // radix 4 butterfly without twiddles + vadd.f32 q_sin0, q_in0, q_in1 + vsub.f32 q_sin1, q_in0, q_in1 + vld1.32 {d_tw_twn}, [tmp0] + vadd.f32 q_sin2, q_in2, q_in3 + vsub.f32 q_sin3, q_in2, q_in3 + vadd.f32 q_sin4, q_in4, q_in5 + vsub.f32 q_sin5, q_in4, q_in5 + vadd.f32 q_sin6, q_in6, q_in7 + vsub.f32 q_sin7, q_in6, q_in7 + + .ifeqs "\inverse", "TRUE" + vneg.f32 d_sin5_i, d_sin5_i + vsub.f32 d_s3_r, d_sin3_r, d_sin3_i + vadd.f32 d_s3_i, d_sin3_i, d_sin3_r + vadd.f32 d_s7_r, d_sin7_r, d_sin7_i + vsub.f32 d_s7_i, d_sin7_i, d_sin7_r + .else + vneg.f32 d_sin5_r, d_sin5_r + vadd.f32 d_s3_r, d_sin3_r, d_sin3_i + vsub.f32 d_s3_i, d_sin3_i, d_sin3_r + vsub.f32 d_s7_r, d_sin7_r, d_sin7_i + vadd.f32 d_s7_i, d_sin7_i, d_sin7_r + .endif + vswp d_sin5_r, d_sin5_i + + vmul.f32 q_s3, q_s3, d_tw_twn[0] + vmul.f32 q_s7, q_s7, d_tw_twn[1] + + // radix 2 butterfly + vadd.f32 q_s8, q_sin0, q_sin4 + vadd.f32 q_s9, q_sin1, q_sin5 + vsub.f32 q_s10, q_sin0, q_sin4 + vsub.f32 q_s11, q_sin1, q_sin5 + + // radix 2 butterfly + vadd.f32 q_s12, q_sin2, q_sin6 + vadd.f32 q_s13, q_s3, q_s7 + vsub.f32 q_s14, q_sin2, q_sin6 + vsub.f32 q_s15, q_s3, q_s7 + + vsub.f32 q_out4, q_s8, q_s12 + vsub.f32 q_out5, q_s9, q_s13 + vadd.f32 q_out0, q_s8, q_s12 + vadd.f32 q_out1, q_s9, q_s13 + + .ifeqs "\inverse", "TRUE" + vsub.f32 d_out2_r, d_s10_r, d_s14_i + vadd.f32 d_out2_i, d_s10_i, d_s14_r + vsub.f32 d_out3_r, d_s11_r, d_s15_i + vadd.f32 d_out3_i, d_s11_i, d_s15_r + vadd.f32 d_out6_r, d_s10_r, d_s14_i + vsub.f32 d_out6_i, d_s10_i, d_s14_r + vadd.f32 d_out7_r, d_s11_r, d_s15_i + vsub.f32 d_out7_i, d_s11_i, d_s15_r + .else + vadd.f32 d_out2_r, d_s10_r, d_s14_i + vsub.f32 d_out2_i, d_s10_i, d_s14_r + vadd.f32 d_out3_r, d_s11_r, d_s15_i + vsub.f32 d_out3_i, d_s11_i, d_s15_r + vsub.f32 d_out6_r, d_s10_r, d_s14_i + vadd.f32 d_out6_i, d_s10_i, d_s14_r + vsub.f32 d_out7_r, d_s11_r, d_s15_i + vadd.f32 d_out7_i, d_s11_i, d_s15_r + .endif + + vtrn.32 q_out0, q_out1 + vtrn.32 q_out2, q_out3 + vtrn.32 q_out4, q_out5 + vtrn.32 q_out6, q_out7 + + + vst2.32 {q_out0}, [p_out1]! + vst2.32 {q_out2}, [p_out1]! + vst2.32 {q_out4}, [p_out1]! + vst2.32 {q_out6}, [p_out1]! + vst2.32 {q_out1}, [p_out1]! + vst2.32 {q_out3}, [p_out1]! + vst2.32 {q_out5}, [p_out1]! + vst2.32 {q_out7}, [p_out1]! + + sub p_in1, p_in1, fstep, lsl #3 + add p_in1, p_in1, #16 - add p_twiddles, p_twiddles, mstride, lsl #4 - add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */ - lsl mstride, mstride, #2 + .endm - subs stage_count, stage_count, #1 - bgt .L_ne10_radix4_butterfly_forward_stages + .global TW_81 +TW_81: +.float 0.70710678 +.float -0.70710678 -.L_ne10_radix4_butterfly_forward_end: - /*Return From Function*/ - pop {r4-r12,pc} + /** + * @details + * This function implements the radix4/8 forward FFT + * + * @param[in/out] *Fout points to input/output pointers + * @param[in] *factors factors pointer: + * 0: stage number + * 1: stride for the first stage + * others: factor out powers of 4, powers of 2 + * @param[in] *twiddles twiddles coeffs of FFT + */ .align 4 - .global ne10_radix2_butterfly_forward_float32_neon + .global ne10_mixed_radix_fft_forward_float32_neon .thumb .thumb_func -ne10_radix2_butterfly_forward_float32_neon: - +ne10_mixed_radix_fft_forward_float32_neon: push {r4-r12,lr} vpush {q4-q7} ldr stage_count, [p_factors] /* get factors[0]---stage_count */ ldr fstride, [p_factors, #4] /* get factors[1]---fstride */ add p_factors, p_factors, stage_count, lsl #3 /* get the address of factors[2*stage_count] */ + ldr radix, [p_factors] /* get factors[2*stage_count]--- the first radix */ ldr mstride, [p_factors, #-4] /* get factors[2*stage_count-1]--- mstride */ - sub stage_count, stage_count, #2 + /* save the output buffer for the last stage */ + mov p_out_ls, p_fout - /* loop of the second stages */ -.L_ne10_radix2_butterfly_forwards_second_stage: - lsr fstride, fstride, #2 + /* ---------------the first stage--------------- */ + /* judge the radix is 2 or 4 */ + cmp radix, #2 + beq .L_ne10_radix8_butterfly_first_stage - /* loop of fstride */ - mov count_f, fstride - mov p_tw1, p_twiddles - mov p_fout0, p_fout - add p_fout1, p_fout, mstride, lsl #5 - mov p_fout2, p_fout - mov p_fout3, p_fout1 - mov tmp0, #96 - vld2.32 {d_tw1_r01, d_tw2_r01, d_tw1_i01, d_tw2_i01}, [p_tw1]! - -.L_ne10_radix2_butterfly_forwards_second_stage_fstride: - @RADIX24_BUTTERFLY_P4 - vld2.32 {d_tw3_r01, d_tw3_i01}, [p_tw1] - vld2.32 {d_fin0_r01, d_fin1_r01, d_fin0_i01, d_fin1_i01}, [p_fout0]! - vld2.32 {d_fin2_r01, d_fin3_r01, d_fin2_i01, d_fin3_i01}, [p_fout0], tmp0 - vld2.32 {d_fin0_r23, d_fin1_r23, d_fin0_i23, d_fin1_i23}, [p_fout1]! - vld2.32 {d_fin2_r23, d_fin3_r23, d_fin2_i23, d_fin3_i23}, [p_fout1], tmp0 - - vmul.f32 d_s2_r01, d_fin3_r01, d_tw3_r01 - vmul.f32 d_s2_i01, d_fin3_r01, d_tw3_i01 - vmul.f32 d_s2_r23, d_fin3_r23, d_tw3_r01 - vmul.f32 d_s2_i23, d_fin3_r23, d_tw3_i01 - vmls.f32 d_s2_r01, d_fin3_i01, d_tw3_i01 - vmla.f32 d_s2_i01, d_fin3_i01, d_tw3_r01 - vmls.f32 d_s2_r23, d_fin3_i23, d_tw3_i01 - vmla.f32 d_s2_i23, d_fin3_i23, d_tw3_r01 - - vmul.f32 d_s1_r01, d_fin2_r01, d_tw2_r01 - vmul.f32 d_s1_r23, d_fin2_r23, d_tw2_r01 - vmul.f32 d_s1_i01, d_fin2_r01, d_tw2_i01 - vmul.f32 d_s1_i23, d_fin2_r23, d_tw2_i01 - vmls.f32 d_s1_r01, d_fin2_i01, d_tw2_i01 - vmls.f32 d_s1_r23, d_fin2_i23, d_tw2_i01 - vmla.f32 d_s1_i01, d_fin2_i01, d_tw2_r01 - vmla.f32 d_s1_i23, d_fin2_i23, d_tw2_r01 - - vmul.f32 d_s0_r01, d_fin1_r01, d_tw1_r01 - vmul.f32 d_s0_r23, d_fin1_r23, d_tw1_r01 - vmul.f32 d_s0_i01, d_fin1_r01, d_tw1_i01 - vmul.f32 d_s0_i23, d_fin1_r23, d_tw1_i01 - vmls.f32 d_s0_r01, d_fin1_i01, d_tw1_i01 - vmls.f32 d_s0_r23, d_fin1_i23, d_tw1_i01 - vmla.f32 d_s0_i01, d_fin1_i01, d_tw1_r01 - vmla.f32 d_s0_i23, d_fin1_i23, d_tw1_r01 - - vmov d_fin1_r01, d_fin0_r23 - vmov d_fin1_i01, d_fin0_i23 - - vsub.f32 q_s5_r0123, q_fin0_r0123, q_s1_r0123 - vsub.f32 q_s5_i0123, q_fin0_i0123, q_s1_i0123 - vadd.f32 q_fout0_r0123, q_fin0_r0123, q_s1_r0123 - vadd.f32 q_fout0_i0123, q_fin0_i0123, q_s1_i0123 - - vadd.f32 q_s3_r0123, q_s0_r0123, q_s2_r0123 - vadd.f32 q_s3_i0123, q_s0_i0123, q_s2_i0123 - vsub.f32 q_s4_r0123, q_s0_r0123, q_s2_r0123 - vsub.f32 q_s4_i0123, q_s0_i0123, q_s2_i0123 - vsub.f32 q_fout2_r0123, q_fout0_r0123, q_s3_r0123 - vsub.f32 q_fout2_i0123, q_fout0_i0123, q_s3_i0123 - vadd.f32 q_fout0_r0123, q_fout0_r0123, q_s3_r0123 - vadd.f32 q_fout0_i0123, q_fout0_i0123, q_s3_i0123 - - vadd.f32 q_fout1_r0123, q_s5_r0123, q_s4_i0123 - vsub.f32 q_fout1_i0123, q_s5_i0123, q_s4_r0123 - vsub.f32 q_fout3_r0123, q_s5_r0123, q_s4_i0123 - vadd.f32 q_fout3_i0123, q_s5_i0123, q_s4_r0123 - - vswp d_fout1_r01, d_fout0_r23 - vswp d_fout1_i01, d_fout0_i23 - vswp d_fout3_r01, d_fout2_r23 - vswp d_fout3_i01, d_fout2_i23 - - vst2.32 {d_fout0_r01, d_fout1_r01, d_fout0_i01, d_fout1_i01}, [p_fout2]! - vst2.32 {d_fout0_r23, d_fout1_r23, d_fout0_i23, d_fout1_i23}, [p_fout3]! - vst2.32 {d_fout2_r01, d_fout3_r01, d_fout2_i01, d_fout3_i01}, [p_fout2], tmp0 - vst2.32 {d_fout2_r23, d_fout3_r23, d_fout2_i23, d_fout3_i23}, [p_fout3], tmp0 - subs count_f, count_f, #2 - bgt .L_ne10_radix2_butterfly_forwards_second_stage_fstride + /* ---------------first stage: radix 4 */ + mov count, fstride + mov p_fin0, p_fin + mov p_tmp, p_fout + add p_fin2, p_fin0, fstride, lsl #4 /* get the address of F[fstride*2] */ + add p_fin1, p_fin0, fstride, lsl #3 /* get the address of F[fstride] */ + add p_fin3, p_fin2, fstride, lsl #3 /* get the address of F[fstride*3] */ + vld2.32 {q_in0_01}, [p_fin0:64]! + vld2.32 {q_in2_01}, [p_fin2:64]! + vld2.32 {q_in1_01}, [p_fin1:64]! + vld2.32 {q_in3_01}, [p_fin3:64]! + +.L_ne10_radix4_butterfly_first_stage_fstride: + BUTTERFLY4X2_WITHOUT_TWIDDLES "FALSE" - add p_twiddles, p_twiddles, mstride, lsl #4 - add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */ - lsl mstride, mstride, #2 + subs count, count, #2 + bgt .L_ne10_radix4_butterfly_first_stage_fstride + /* swap input/output buffer */ + mov tmp0, p_fout + mov p_fout, p_fin + mov p_fin, tmp0 - /* loop of the other stages */ -.L_ne10_radix2_butterfly_forwards_stages: + /* (stage_count-2): reduce the counter for the last stage */ + sub stage_count, stage_count, #2 + lsl nstep, fstride, #3 lsr fstride, fstride, #2 - /* loop of fstride */ - mov count_f, fstride -.L_ne10_radix2_butterfly_forwards_stages_fstride: - sub tmp0, fstride, count_f - mul tmp0, tmp0, mstride - add p_fout0, p_fout, tmp0, lsl #5 - add p_fout2, p_fout0, mstride, lsl #4 /* get the address of F[mstride*2] */ - add p_fout1, p_fout0, mstride, lsl #3 /* get the address of F[mstride] */ - add p_fout3, p_fout2, mstride, lsl #3 /* get the address of F[mstride*3] */ - mov p_tw1, p_twiddles - add p_tw2, p_tw1, mstride, lsl #3 /* get the address of tw2 */ - add p_tw3, p_tw1, mstride, lsl #4 /* get the address of tw3 */ + b .L_ne10_butterfly_other_stages + /* ---------------end of first stage: radix 4 */ - /* loop of mstride */ - mov count_m, mstride -.L_ne10_radix2_butterfly_forwards_stages_mstride: - RADIX4_BUTTERFLY_P4 - subs count_m, count_m, #4 - bgt .L_ne10_radix2_butterfly_forwards_stages_mstride + /* ---------------first stage: radix 8 */ +.L_ne10_radix8_butterfly_first_stage: + lsr fstride1, fstride, #2 + mov p_in1, p_fin + mov p_out1, p_fout + lsl fstep, fstride, #1 - /* end of mstride_loop */ +.L_ne10_radix8_butterfly_first_stage_fstride1: + BUTTERFLY8X2_WITHOUT_TWIDDLES "FALSE" - subs count_f, count_f, #1 - bgt .L_ne10_radix2_butterfly_forwards_stages_fstride + subs fstride1, fstride1, #2 + bgt .L_ne10_radix8_butterfly_first_stage_fstride1 - add p_twiddles, p_twiddles, mstride, lsl #4 - add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */ + lsl nstep, fstride, #2 + sub stage_count, stage_count, #2 + lsr fstride, fstride, #4 lsl mstride, mstride, #2 + add p_twiddles, p_twiddles, #48 /* get the address of twiddles += 6 */ - subs stage_count, stage_count, #1 - bgt .L_ne10_radix2_butterfly_forwards_stages + /* swap input/output buffer */ + mov tmp0, p_fout + mov p_fout, p_fin + mov p_fin, tmp0 -.L_ne10_radix2_butterfly_forwards_end: - /*Return From Function*/ - vpop {q4-q7} - pop {r4-r12,pc} + /* if the last stage */ + cmp stage_count, #1 + beq .L_ne10_butterfly_last_stages - .align 4 - .global ne10_radix2_butterfly_backward_float32_neon - .thumb - .thumb_func + /* (stage_count-1): reduce the counter for the last stage */ + sub stage_count, stage_count, #1 + /*--------------- end of first stage: radix 8 */ + /* ---------------end of first stage--------------- */ -ne10_radix2_butterfly_backward_float32_neon: - push {r4-r12,lr} - vpush {q4-q7} - - ldr stage_count, [p_factors] /* get factors[0]---stage_count */ - ldr fstride, [p_factors, #4] /* get factors[1]---fstride */ - add p_factors, p_factors, stage_count, lsl #3 /* get the address of factors[2*stage_count] */ - ldr mstride, [p_factors, #-4] /* get factors[2*stage_count-1]--- mstride */ - sub stage_count, stage_count, #2 - - - /* loop of the second stages */ -.L_ne10_radix2_butterfly_backward_second_stage: - lsr fstride, fstride, #2 + /* ---------------other stages except last stage--------------- */ + /* loop of other stages */ +.L_ne10_butterfly_other_stages: + lsl mstep, mstride, #3 + mov p_in1, p_fin + vld2.32 {d_fin0_r, d_fin0_i}, [p_in1:64], nstep + vld2.32 {d_fin1_r, d_fin1_i}, [p_in1:64], nstep + vld2.32 {d_fin2_r, d_fin2_i}, [p_in1:64], nstep + vld2.32 {d_fin3_r, d_fin3_i}, [p_in1:64], nstep /* loop of fstride */ mov count_f, fstride +.L_ne10_butterfly_other_stages_fstride: mov p_tw1, p_twiddles - mov p_fout0, p_fout - add p_fout1, p_fout, mstride, lsl #5 - mov p_fout2, p_fout - mov p_fout3, p_fout1 - mov tmp0, #96 - vld2.32 {d_tw1_r01, d_tw2_r01, d_tw1_i01, d_tw2_i01}, [p_tw1]! - -.L_ne10_radix2_butterfly_backward_second_stage_fstride: - RADIX24_BUTTERFLY_INVERSE_P4 - - subs count_f, count_f, #2 - bgt .L_ne10_radix2_butterfly_backward_second_stage_fstride - - add p_twiddles, p_twiddles, mstride, lsl #4 - add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */ - lsl mstride, mstride, #2 - - - /* loop of the other stages */ -.L_ne10_radix2_butterfly_backward_stages: - lsr fstride, fstride, #2 - - /* loop of fstride */ - mov count_f, fstride -.L_ne10_radix2_butterfly_backward_stages_fstride: sub tmp0, fstride, count_f mul tmp0, tmp0, mstride - add p_fout0, p_fout, tmp0, lsl #5 - add p_fout2, p_fout0, mstride, lsl #4 /* get the address of F[mstride*2] */ - add p_fout1, p_fout0, mstride, lsl #3 /* get the address of F[mstride] */ - add p_fout3, p_fout2, mstride, lsl #3 /* get the address of F[mstride*3] */ - mov p_tw1, p_twiddles - add p_tw2, p_tw1, mstride, lsl #3 /* get the address of tw2 */ - add p_tw3, p_tw1, mstride, lsl #4 /* get the address of tw3 */ + add p_out1, p_fout, tmp0, lsl #5 + vld2.32 {d_tw0_r, d_tw0_i}, [p_tw1:64], mstep + vld2.32 {d_tw1_r, d_tw1_i}, [p_tw1:64], mstep + vld2.32 {d_tw2_r, d_tw2_i}, [p_tw1:64] /* loop of mstride */ mov count_m, mstride -.L_ne10_radix2_butterfly_backward_stages_mstride: - RADIX4_BUTTERFLY_INVERSE_P4 +.L_ne10_butterfly_other_stages_mstride: + BUTTERFLY4X2_WITH_TWIDDLES "FALSE" - subs count_m, count_m, #4 - bgt .L_ne10_radix2_butterfly_backward_stages_mstride - - /* end of mstride_loop */ + subs count_m, count_m, #2 + bgt .L_ne10_butterfly_other_stages_mstride + /* end of mstride loop */ subs count_f, count_f, #1 - bgt .L_ne10_radix2_butterfly_backward_stages_fstride + bgt .L_ne10_butterfly_other_stages_fstride add p_twiddles, p_twiddles, mstride, lsl #4 add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */ lsl mstride, mstride, #2 + lsr fstride, fstride, #2 - subs stage_count, stage_count, #1 - bgt .L_ne10_radix2_butterfly_backward_stages - - -.L_ne10_radix2_butterfly_backward_end: - /*Return From Function*/ - vpop {q4-q7} - pop {r4-r12,pc} - - - .align 4 - .global ne10_radix4_butterfly_backward_float32_neon - .thumb - .thumb_func - -ne10_radix4_butterfly_backward_float32_neon: - - push {r4-r12,lr} - vpush {q4-q7} + /* swap input/output buffer */ + mov tmp0, p_fout + mov p_fout, p_fin + mov p_fin, tmp0 - ldr stage_count, [p_factors] /* get factors[0]---stage_count */ - ldr fstride, [p_factors, #4] /* get factors[1]---fstride */ - add p_factors, p_factors, stage_count, lsl #3 /* get the address of factors[2*stage_count] */ - ldr mstride, [p_factors, #-4] /* get factors[2*stage_count-1]--- mstride */ - sub stage_count, stage_count, #1 + subs stage_count, stage_count, #1 + bgt .L_ne10_butterfly_other_stages + /* ---------------end other stages except last stage--------------- */ - /* loop of the stages */ -.L_ne10_radix4_butterfly_backward_stages: - lsr fstride, fstride, #2 - /* loop of fstride */ - mov count_f, fstride -.L_ne10_radix4_butterfly_backward_stages_fstride: - sub tmp0, fstride, count_f - mul tmp0, tmp0, mstride - add p_fout0, p_fout, tmp0, lsl #5 - add p_fout2, p_fout0, mstride, lsl #4 /* get the address of F[mstride*2] */ - add p_fout1, p_fout0, mstride, lsl #3 /* get the address of F[mstride] */ - add p_fout3, p_fout2, mstride, lsl #3 /* get the address of F[mstride*3] */ + /* ---------------last stage--------------- */ +.L_ne10_butterfly_last_stages: + mov p_in1, p_fin + mov p_out1, p_out_ls mov p_tw1, p_twiddles - add p_tw2, p_tw1, mstride, lsl #3 /* get the address of tw2 */ - add p_tw3, p_tw1, mstride, lsl #4 /* get the address of tw3 */ + mov mstep, nstep + vld2.32 {d_fin0_r, d_fin0_i}, [p_in1:64], nstep + vld2.32 {d_fin1_r, d_fin1_i}, [p_in1:64], nstep + vld2.32 {d_fin2_r, d_fin2_i}, [p_in1:64], nstep + vld2.32 {d_fin3_r, d_fin3_i}, [p_in1:64], nstep + vld2.32 {d_tw0_r, d_tw0_i}, [p_tw1:64], mstep + vld2.32 {d_tw1_r, d_tw1_i}, [p_tw1:64], mstep + vld2.32 {d_tw2_r, d_tw2_i}, [p_tw1:64] /* loop of mstride */ mov count_m, mstride +.L_ne10_butterfly_last_stages_mstride: + BUTTERFLY4X2_WITH_TWIDDLES "FALSE" -.L_ne10_radix4_butterfly_backward_stages_mstride: - RADIX4_BUTTERFLY_INVERSE_P4 - subs count_m, count_m, #4 - bgt .L_ne10_radix4_butterfly_backward_stages_mstride - - /* end of mstride_loop */ - - subs count_f, count_f, #1 - bgt .L_ne10_radix4_butterfly_backward_stages_fstride - - add p_twiddles, p_twiddles, mstride, lsl #4 - add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */ - lsl mstride, mstride, #2 - - subs stage_count, stage_count, #1 - bgt .L_ne10_radix4_butterfly_backward_stages - + subs count_m, count_m, #2 + bgt .L_ne10_butterfly_last_stages_mstride + /* end of mstride loop */ + /* ---------------end of last stage--------------- */ -.L_ne10_radix4_inverse_butterfly_backward_end: +.L_ne10_butterfly_end: /*Return From Function*/ vpop {q4-q7} pop {r4-r12,pc} - + /* end of ne10_mixed_radix_fft_forward_float32_neon */ /** * @details - * This function implements the 4 butterfly + * This function implements the radix4/8 backward FFT * * @param[in/out] *Fout points to input/output pointers * @param[in] *factors factors pointer: @@ -827,11 +622,11 @@ ne10_radix4_butterfly_backward_float32_neon: */ .align 4 - .global ne10_mixed_radix_butterfly_length_odd_power2_float32_neon + .global ne10_mixed_radix_fft_backward_float32_neon .thumb .thumb_func -ne10_mixed_radix_butterfly_length_odd_power2_float32_neon: +ne10_mixed_radix_fft_backward_float32_neon: push {r4-r12,lr} vpush {q4-q7} @@ -841,420 +636,162 @@ ne10_mixed_radix_butterfly_length_odd_power2_float32_neon: ldr radix, [p_factors] /* get factors[2*stage_count]--- the first radix */ ldr mstride, [p_factors, #-4] /* get factors[2*stage_count-1]--- mstride */ - mov p_fin, p_fout - mov p_fout0, p_fout - mov count, fstride - - /* the first stage */ -.L_ne10_butterfly_length_odd_power2_first_stage: - vld4.32 {d_in0_r01, d_in0_i01, d_in1_r01, d_in1_i01}, [p_fin]! - vld4.32 {d_in0_r23, d_in0_i23, d_in1_r23, d_in1_i23}, [p_fin]! - vsub.f32 q_out1_r0123, q_in0_r0123, q_in1_r0123 - vsub.f32 q_out1_i0123, q_in0_i0123, q_in1_i0123 - vadd.f32 q_out0_r0123, q_in0_r0123, q_in1_r0123 - vadd.f32 q_out0_i0123, q_in0_i0123, q_in1_i0123 - subs count, count, #4 - vst4.32 {d_out0_r01, d_out0_i01, d_out1_r01, d_out1_i01}, [p_fout0]! - vst4.32 {d_out0_r23, d_out0_i23, d_out1_r23, d_out1_i23}, [p_fout0]! - - bgt .L_ne10_butterfly_length_odd_power2_first_stage - - /* the second stages */ - subs stage_count, stage_count, #1 - lsr fstride, fstride, #2 - - /* loop of fstride */ - mov count_f, fstride - mov p_tw1, p_twiddles - mov p_fout0, p_fout - add p_fout1, p_fout, mstride, lsl #5 - mov p_fout2, p_fout - mov p_fout3, p_fout1 - mov tmp0, #96 - vld2.32 {d_tw1_r01, d_tw2_r01, d_tw1_i01, d_tw2_i01}, [p_tw1]! + /* save the output buffer for the last stage */ + mov p_out_ls, p_fout -.L_ne10_butterfly_length_odd_power2_second_stage: - RADIX24_BUTTERFLY_P4 + /* ---------------the first stage--------------- */ + /* judge the radix is 2 or 4 */ + cmp radix, #2 + beq .L_ne10_radix8_butterfly_inverse_first_stage - subs count_f, count_f, #2 - bgt .L_ne10_butterfly_length_odd_power2_second_stage + /* ---------------first stage: radix 4 */ + mov count, fstride + mov p_fin0, p_fin + mov p_tmp, p_fout + add p_fin2, p_fin0, fstride, lsl #4 /* get the address of F[fstride*2] */ + add p_fin1, p_fin0, fstride, lsl #3 /* get the address of F[fstride] */ + add p_fin3, p_fin2, fstride, lsl #3 /* get the address of F[fstride*3] */ + vld2.32 {q_in0_01}, [p_fin0:64]! + vld2.32 {q_in2_01}, [p_fin2:64]! + vld2.32 {q_in1_01}, [p_fin1:64]! + vld2.32 {q_in3_01}, [p_fin3:64]! + +.L_ne10_radix4_butterfly_inverse_first_stage_fstride: + BUTTERFLY4X2_WITHOUT_TWIDDLES "TRUE" - add p_twiddles, p_twiddles, mstride, lsl #4 - add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */ - lsl mstride, mstride, #2 + subs count, count, #2 + bgt .L_ne10_radix4_butterfly_inverse_first_stage_fstride - /* other stages */ - subs stage_count, stage_count, #1 + /* swap input/output buffer */ + mov tmp0, p_fout + mov p_fout, p_fin + mov p_fin, tmp0 - /* loop of other stages */ -.L_ne10_butterfly_length_odd_power2_other_stages: + /* (stage_count-2): reduce the counter for the last stage */ + sub stage_count, stage_count, #2 + lsl nstep, fstride, #3 lsr fstride, fstride, #2 - /* loop of fstride */ - mov count_f, fstride -.L_ne10_butterfly_length_odd_power2_other_stages_fstride: - sub tmp0, fstride, count_f - mul tmp0, tmp0, mstride - add p_fout0, p_fout, tmp0, lsl #5 - add p_fout2, p_fout0, mstride, lsl #4 /* get the address of F[mstride*2] */ - add p_fout1, p_fout0, mstride, lsl #3 /* get the address of F[mstride] */ - add p_fout3, p_fout2, mstride, lsl #3 /* get the address of F[mstride*3] */ - mov p_tw1, p_twiddles - add p_tw2, p_tw1, mstride, lsl #3 /* get the address of tw2 */ - add p_tw3, p_tw1, mstride, lsl #4 /* get the address of tw3 */ + b .L_ne10_butterfly_inverse_other_stages + /* ---------------end of first stage: radix 4 */ - /* loop of mstride */ - mov count_m, mstride -.L_ne10_butterfly_length_odd_power2_other_stages_mstride: - RADIX4_BUTTERFLY_P4 - subs count_m, count_m, #4 - bgt .L_ne10_butterfly_length_odd_power2_other_stages_mstride - /* end of mstride loop */ + /* ---------------first stage: radix 8 */ +.L_ne10_radix8_butterfly_inverse_first_stage: + lsr fstride1, fstride, #2 + mov p_in1, p_fin + mov p_out1, p_fout + lsl fstep, fstride, #1 - subs count_f, count_f, #1 - bgt .L_ne10_butterfly_length_odd_power2_other_stages_fstride +.L_ne10_radix8_butterfly_inverse_first_stage_fstride1: + BUTTERFLY8X2_WITHOUT_TWIDDLES "TRUE" - add p_twiddles, p_twiddles, mstride, lsl #4 - add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */ - lsl mstride, mstride, #2 + subs fstride1, fstride1, #2 + bgt .L_ne10_radix8_butterfly_inverse_first_stage_fstride1 - subs stage_count, stage_count, #1 - bgt .L_ne10_butterfly_length_odd_power2_other_stages - -.L_ne10_butterfly_length_odd_power2_end: - /*Return From Function*/ - vpop {q4-q7} - pop {r4-r12,pc} - - /* end of ne10_butterfly_length_odd_power2_float32_neon */ - - - /** - * @details - * This function implements the 4 butterfly - * - * @param[in/out] *Fout points to input/output pointers - * @param[in] *factors factors pointer: - * 0: stage number - * 1: stride for the first stage - * others: factor out powers of 4, powers of 2 - * @param[in] *twiddles twiddles coeffs of FFT - */ - - .align 4 - .global ne10_mixed_radix_butterfly_length_even_power2_float32_neon - .thumb - .thumb_func - -ne10_mixed_radix_butterfly_length_even_power2_float32_neon: - push {r4-r12,lr} - vpush {q4-q7} + lsl nstep, fstride, #2 + sub stage_count, stage_count, #2 + lsr fstride, fstride, #4 + lsl mstride, mstride, #2 + add p_twiddles, p_twiddles, #48 /* get the address of twiddles += 6 */ - ldr stage_count, [p_factors] /* get factors[0]---stage_count */ - ldr fstride, [p_factors, #4] /* get factors[1]---fstride */ - add p_factors, p_factors, stage_count, lsl #3 /* get the address of factors[2*stage_count] */ - ldr radix, [p_factors] /* get factors[2*stage_count]--- the first radix */ - ldr mstride, [p_factors, #-4] /* get factors[2*stage_count-1]--- mstride */ + /* swap input/output buffer */ + mov tmp0, p_fout + mov p_fout, p_fin + mov p_fin, tmp0 - mov p_fin, p_fout - mov p_fout0, p_fout - mov count, fstride + /* if the last stage */ + cmp stage_count, #1 + beq .L_ne10_butterfly_inverse_last_stages - /* the first stage */ -.L_ne10_butterfly_length_even_power2_first_stage: - vld1.32 {d_in0_0, d_in1_0, d_in2_0, d_in3_0}, [p_fin]! - vld1.32 {d_in0_1, d_in1_1, d_in2_1, d_in3_1}, [p_fin]! - vswp d_in1_0, d_in0_1 - vswp d_in3_0, d_in2_1 - vsub.f32 q_s2_01, q_in0_01, q_in2_01 - vadd.f32 q_out0_01, q_in0_01, q_in2_01 - vadd.f32 q_s0_01, q_in1_01, q_in3_01 - vsub.f32 q_s1_01, q_in1_01, q_in3_01 - vsub.f32 q_out2_01, q_out0_01, q_s0_01 - vrev64.32 q_s1_01, q_s1_01 - vadd.f32 q_out0_01, q_out0_01, q_s0_01 - vadd.f32 q_out1_01, q_s2_01, q_s1_01 - vsub.f32 q_out3_01, q_s2_01, q_s1_01 - vrev64.32 q_tmp, q_out1_01 - vrev64.32 q_tmp2, q_out3_01 - vtrn.32 q_out3_01, q_tmp - vtrn.32 q_out1_01, q_tmp2 - vswp d_out1_0, d_out0_1 - vswp d_out3_0, d_out2_1 - subs count, count, #2 - vst1.32 {d_out0_0, d_out1_0, d_out2_0, d_out3_0}, [p_fout0]! - vst1.32 {d_out0_1, d_out1_1, d_out2_1, d_out3_1}, [p_fout0]! - bgt .L_ne10_butterfly_length_even_power2_first_stage + /* (stage_count-1): reduce the counter for the last stage */ + sub stage_count, stage_count, #1 + /*--------------- end of first stage: radix 8 */ + /* ---------------end of first stage--------------- */ - /* other stages */ - subs stage_count, stage_count, #1 + /* ---------------other stages except last stage--------------- */ /* loop of other stages */ -.L_ne10_butterfly_length_even_power2_other_stages: - lsr fstride, fstride, #2 +.L_ne10_butterfly_inverse_other_stages: + lsl mstep, mstride, #3 + mov p_in1, p_fin + vld2.32 {d_fin0_r, d_fin0_i}, [p_in1:64], nstep + vld2.32 {d_fin1_r, d_fin1_i}, [p_in1:64], nstep + vld2.32 {d_fin2_r, d_fin2_i}, [p_in1:64], nstep + vld2.32 {d_fin3_r, d_fin3_i}, [p_in1:64], nstep /* loop of fstride */ mov count_f, fstride -.L_ne10_butterfly_length_even_power2_other_stages_fstride: +.L_ne10_butterfly_inverse_other_stages_fstride: + mov p_tw1, p_twiddles sub tmp0, fstride, count_f mul tmp0, tmp0, mstride - add p_fout0, p_fout, tmp0, lsl #5 - add p_fout2, p_fout0, mstride, lsl #4 /* get the address of F[mstride*2] */ - add p_fout1, p_fout0, mstride, lsl #3 /* get the address of F[mstride] */ - add p_fout3, p_fout2, mstride, lsl #3 /* get the address of F[mstride*3] */ - mov p_tw1, p_twiddles - add p_tw2, p_tw1, mstride, lsl #3 /* get the address of tw2 */ - add p_tw3, p_tw1, mstride, lsl #4 /* get the address of tw3 */ + add p_out1, p_fout, tmp0, lsl #5 + vld2.32 {d_tw0_r, d_tw0_i}, [p_tw1:64], mstep + vld2.32 {d_tw1_r, d_tw1_i}, [p_tw1:64], mstep + vld2.32 {d_tw2_r, d_tw2_i}, [p_tw1:64] /* loop of mstride */ mov count_m, mstride -.L_ne10_butterfly_length_even_power2_other_stages_mstride: - RADIX4_BUTTERFLY_P4 +.L_ne10_butterfly_inverse_other_stages_mstride: + BUTTERFLY4X2_WITH_TWIDDLES "TRUE" - subs count_m, count_m, #4 - bgt .L_ne10_butterfly_length_even_power2_other_stages_mstride + subs count_m, count_m, #2 + bgt .L_ne10_butterfly_inverse_other_stages_mstride /* end of mstride loop */ subs count_f, count_f, #1 - bgt .L_ne10_butterfly_length_even_power2_other_stages_fstride + bgt .L_ne10_butterfly_inverse_other_stages_fstride add p_twiddles, p_twiddles, mstride, lsl #4 add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */ lsl mstride, mstride, #2 - - subs stage_count, stage_count, #1 - bgt .L_ne10_butterfly_length_even_power2_other_stages - -.L_ne10_butterfly_length_even_power2_end: - /*Return From Function*/ - vpop {q4-q7} - pop {r4-r12,pc} - - /* end of ne10_butterfly_length_even_power2_float32_neon */ - - - /** - * @details - * This function implements the 4 butterfly - * - * @param[in/out] *Fout points to input/output pointers - * @param[in] *factors factors pointer: - * 0: stage number - * 1: stride for the first stage - * others: factor out powers of 4, powers of 2 - * @param[in] *twiddles twiddles coeffs of FFT - */ - - .align 4 - .global ne10_mixed_radix_butterfly_inverse_length_odd_power2_float32_neon - .thumb - .thumb_func - -ne10_mixed_radix_butterfly_inverse_length_odd_power2_float32_neon: - push {r4-r12,lr} - vpush {q4-q7} - - ldr stage_count, [p_factors] /* get factors[0]---stage_count */ - ldr fstride, [p_factors, #4] /* get factors[1]---fstride */ - add p_factors, p_factors, stage_count, lsl #3 /* get the address of factors[2*stage_count] */ - ldr radix, [p_factors] /* get factors[2*stage_count]--- the first radix */ - ldr mstride, [p_factors, #-4] /* get factors[2*stage_count-1]--- mstride */ - - mov p_fin, p_fout - mov p_fout0, p_fout - mov count, fstride - - /* the first stage */ -.L_ne10_butterfly_inverse_length_odd_power2_first_stage: - vld4.32 {d_in0_r01, d_in0_i01, d_in1_r01, d_in1_i01}, [p_fin]! - vld4.32 {d_in0_r23, d_in0_i23, d_in1_r23, d_in1_i23}, [p_fin]! - vsub.f32 q_out1_r0123, q_in0_r0123, q_in1_r0123 - vsub.f32 q_out1_i0123, q_in0_i0123, q_in1_i0123 - vadd.f32 q_out0_r0123, q_in0_r0123, q_in1_r0123 - vadd.f32 q_out0_i0123, q_in0_i0123, q_in1_i0123 - subs count, count, #4 - vst4.32 {d_out0_r01, d_out0_i01, d_out1_r01, d_out1_i01}, [p_fout0]! - vst4.32 {d_out0_r23, d_out0_i23, d_out1_r23, d_out1_i23}, [p_fout0]! - - bgt .L_ne10_butterfly_inverse_length_odd_power2_first_stage - - /* the second stages */ - subs stage_count, stage_count, #1 lsr fstride, fstride, #2 - /* loop of fstride */ - mov count_f, fstride - mov p_tw1, p_twiddles - mov p_fout0, p_fout - add p_fout1, p_fout, mstride, lsl #5 - mov p_fout2, p_fout - mov p_fout3, p_fout1 - mov tmp0, #96 - vld2.32 {d_tw1_r01, d_tw2_r01, d_tw1_i01, d_tw2_i01}, [p_tw1]! - -.L_ne10_butterfly_inverse_length_odd_power2_second_stage: - RADIX24_BUTTERFLY_INVERSE_P4 - - subs count_f, count_f, #2 - bgt .L_ne10_butterfly_inverse_length_odd_power2_second_stage - - add p_twiddles, p_twiddles, mstride, lsl #4 - add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */ - lsl mstride, mstride, #2 + /* swap input/output buffer */ + mov tmp0, p_fout + mov p_fout, p_fin + mov p_fin, tmp0 - /* other stages */ subs stage_count, stage_count, #1 + bgt .L_ne10_butterfly_inverse_other_stages + /* ---------------end other stages except last stage--------------- */ - /* loop of other stages */ -.L_ne10_butterfly_inverse_length_odd_power2_other_stages: - lsr fstride, fstride, #2 - /* loop of fstride */ - mov count_f, fstride -.L_ne10_butterfly_inverse_length_odd_power2_other_stages_fstride: - sub tmp0, fstride, count_f - mul tmp0, tmp0, mstride - add p_fout0, p_fout, tmp0, lsl #5 - add p_fout2, p_fout0, mstride, lsl #4 /* get the address of F[mstride*2] */ - add p_fout1, p_fout0, mstride, lsl #3 /* get the address of F[mstride] */ - add p_fout3, p_fout2, mstride, lsl #3 /* get the address of F[mstride*3] */ + /* ---------------last stage--------------- */ +.L_ne10_butterfly_inverse_last_stages: + mov p_in1, p_fin + mov p_out1, p_out_ls mov p_tw1, p_twiddles - add p_tw2, p_tw1, mstride, lsl #3 /* get the address of tw2 */ - add p_tw3, p_tw1, mstride, lsl #4 /* get the address of tw3 */ + mov mstep, nstep + vld2.32 {d_fin0_r, d_fin0_i}, [p_in1:64], nstep + vld2.32 {d_fin1_r, d_fin1_i}, [p_in1:64], nstep + vld2.32 {d_fin2_r, d_fin2_i}, [p_in1:64], nstep + vld2.32 {d_fin3_r, d_fin3_i}, [p_in1:64], nstep + vld2.32 {d_tw0_r, d_tw0_i}, [p_tw1:64], mstep + vld2.32 {d_tw1_r, d_tw1_i}, [p_tw1:64], mstep + vld2.32 {d_tw2_r, d_tw2_i}, [p_tw1:64] /* loop of mstride */ mov count_m, mstride +.L_ne10_butterfly_inverse_last_stages_mstride: + BUTTERFLY4X2_WITH_TWIDDLES "TRUE" -.L_ne10_butterfly_inverse_length_odd_power2_other_stages_mstride: - RADIX4_BUTTERFLY_INVERSE_P4 - - subs count_m, count_m, #4 - bgt .L_ne10_butterfly_inverse_length_odd_power2_other_stages_mstride + subs count_m, count_m, #2 + bgt .L_ne10_butterfly_inverse_last_stages_mstride /* end of mstride loop */ + /* ---------------end of last stage--------------- */ - subs count_f, count_f, #1 - bgt .L_ne10_butterfly_inverse_length_odd_power2_other_stages_fstride - - add p_twiddles, p_twiddles, mstride, lsl #4 - add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */ - lsl mstride, mstride, #2 - - subs stage_count, stage_count, #1 - bgt .L_ne10_butterfly_inverse_length_odd_power2_other_stages - -.L_ne10_butterfly_inverse_length_odd_power2_end: +.L_ne10_butterfly_inverse_end: /*Return From Function*/ vpop {q4-q7} pop {r4-r12,pc} - /* end of ne10_butterfly_inverse_length_odd_power2_float32_neon */ - - - /** - * @details - * This function implements the 4 butterfly - * - * @param[in/out] *Fout points to input/output pointers - * @param[in] *factors factors pointer: - * 0: stage number - * 1: stride for the first stage - * others: factor out powers of 4, powers of 2 - * @param[in] *twiddles twiddles coeffs of FFT - */ - - .align 4 - .global ne10_mixed_radix_butterfly_inverse_length_even_power2_float32_neon - .thumb - .thumb_func - -ne10_mixed_radix_butterfly_inverse_length_even_power2_float32_neon: - push {r4-r12,lr} - vpush {q4-q7} + /* end of ne10_mixed_radix_fft_forward_float32_neon */ - ldr stage_count, [p_factors] /* get factors[0]---stage_count */ - ldr fstride, [p_factors, #4] /* get factors[1]---fstride */ - add p_factors, p_factors, stage_count, lsl #3 /* get the address of factors[2*stage_count] */ - ldr radix, [p_factors] /* get factors[2*stage_count]--- the first radix */ - ldr mstride, [p_factors, #-4] /* get factors[2*stage_count-1]--- mstride */ - - mov p_fin, p_fout - mov p_fout0, p_fout - mov count, fstride - /* the first stage */ -.L_ne10_butterfly_inverse_length_even_power2_first_stage: - vld1.32 {d_in0_0, d_in1_0, d_in2_0, d_in3_0}, [p_fin]! - vld1.32 {d_in0_1, d_in1_1, d_in2_1, d_in3_1}, [p_fin]! - vswp d_in1_0, d_in0_1 - vswp d_in3_0, d_in2_1 - vsub.f32 q_s2_01, q_in0_01, q_in2_01 - vadd.f32 q_out0_01, q_in0_01, q_in2_01 - vadd.f32 q_s0_01, q_in1_01, q_in3_01 - vsub.f32 q_s1_01, q_in1_01, q_in3_01 - vsub.f32 q_out2_01, q_out0_01, q_s0_01 - vrev64.32 q_s1_01, q_s1_01 - vadd.f32 q_out0_01, q_out0_01, q_s0_01 - vsub.f32 q_out1_01, q_s2_01, q_s1_01 - vadd.f32 q_out3_01, q_s2_01, q_s1_01 - vrev64.32 q_tmp, q_out1_01 - vrev64.32 q_tmp2, q_out3_01 - vtrn.32 q_out3_01, q_tmp - vtrn.32 q_out1_01, q_tmp2 - vswp d_out1_0, d_out0_1 - vswp d_out3_0, d_out2_1 - subs count, count, #2 - vst1.32 {d_out0_0, d_out1_0, d_out2_0, d_out3_0}, [p_fout0]! - vst1.32 {d_out0_1, d_out1_1, d_out2_1, d_out3_1}, [p_fout0]! - bgt .L_ne10_butterfly_inverse_length_even_power2_first_stage - - /* other stages */ - subs stage_count, stage_count, #1 - - /* loop of other stages */ -.L_ne10_butterfly_inverse_length_even_power2_other_stages: - lsr fstride, fstride, #2 - - /* loop of fstride */ - mov count_f, fstride -.L_ne10_butterfly_inverse_length_even_power2_other_stages_fstride: - sub tmp0, fstride, count_f - mul tmp0, tmp0, mstride - add p_fout0, p_fout, tmp0, lsl #5 - add p_fout2, p_fout0, mstride, lsl #4 /* get the address of F[mstride*2] */ - add p_fout1, p_fout0, mstride, lsl #3 /* get the address of F[mstride] */ - add p_fout3, p_fout2, mstride, lsl #3 /* get the address of F[mstride*3] */ - mov p_tw1, p_twiddles - add p_tw2, p_tw1, mstride, lsl #3 /* get the address of tw2 */ - add p_tw3, p_tw1, mstride, lsl #4 /* get the address of tw3 */ - - /* loop of mstride */ - mov count_m, mstride - -.L_ne10_butterfly_inverse_length_even_power2_other_stages_mstride: - RADIX4_BUTTERFLY_INVERSE_P4 - - subs count_m, count_m, #4 - bgt .L_ne10_butterfly_inverse_length_even_power2_other_stages_mstride - /* end of mstride loop */ - - subs count_f, count_f, #1 - bgt .L_ne10_butterfly_inverse_length_even_power2_other_stages_fstride - - add p_twiddles, p_twiddles, mstride, lsl #4 - add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */ - lsl mstride, mstride, #2 - - subs stage_count, stage_count, #1 - bgt .L_ne10_butterfly_inverse_length_even_power2_other_stages - -.L_ne10_butterfly_inverse_length_even_power2_end: - /*Return From Function*/ - vpop {q4-q7} - pop {r4-r12,pc} - /* end of ne10_butterfly_inverse_length_even_power2_float32_neon */ /* end of the file */ diff --git a/modules/dsp/NE10_init_dsp.c b/modules/dsp/NE10_init_dsp.c index 8ed1002..2240ba2 100644 --- a/modules/dsp/NE10_init_dsp.c +++ b/modules/dsp/NE10_init_dsp.c @@ -33,10 +33,6 @@ ne10_result_t ne10_init_dsp (ne10_int32_t is_NEON_available) { if (NE10_OK == is_NEON_available) { - ne10_radix4_butterfly_float = ne10_radix4_butterfly_float_neon; - ne10_radix4_butterfly_inverse_float = ne10_radix4_butterfly_inverse_float_neon; - ne10_rfft_float = ne10_rfft_float_neon; - ne10_fft_c2c_1d_float32 = ne10_fft_c2c_1d_float32_neon; ne10_fft_c2c_1d_int32_unscaled = ne10_fft_c2c_1d_int32_unscaled_neon; ne10_fft_c2c_1d_int32_scaled = ne10_fft_c2c_1d_int32_scaled_neon; @@ -60,10 +56,6 @@ ne10_result_t ne10_init_dsp (ne10_int32_t is_NEON_available) } else { - ne10_radix4_butterfly_float = ne10_radix4_butterfly_float_c; - ne10_radix4_butterfly_inverse_float = ne10_radix4_butterfly_inverse_float_c; - ne10_rfft_float = ne10_rfft_float_c; - ne10_fft_c2c_1d_float32 = ne10_fft_c2c_1d_float32_c; ne10_fft_c2c_1d_int32_unscaled = ne10_fft_c2c_1d_int32_unscaled_c; ne10_fft_c2c_1d_int32_scaled = ne10_fft_c2c_1d_int32_scaled_c; @@ -89,22 +81,6 @@ ne10_result_t ne10_init_dsp (ne10_int32_t is_NEON_available) } // These are actual definitions of our function pointers that are declared in inc/NE10_dsp.h -void (*ne10_radix4_butterfly_float) (ne10_float32_t *pDst, - ne10_float32_t *pSrc, - ne10_uint16_t N, - ne10_float32_t *pCoef); - -void (*ne10_radix4_butterfly_inverse_float) (ne10_float32_t *pDst, - ne10_float32_t *pSrc, - ne10_uint16_t N, - ne10_float32_t *pCoef, - ne10_float32_t onebyN); - -void (*ne10_rfft_float) (const ne10_rfft_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_float32_t * pTemp); - void (*ne10_fft_c2c_1d_float32) (ne10_fft_cpx_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_cpx_float32_t *twiddles, diff --git a/modules/dsp/NE10_rfft.c b/modules/dsp/NE10_rfft.c deleted file mode 100644 index af6737b..0000000 --- a/modules/dsp/NE10_rfft.c +++ /dev/null @@ -1,292 +0,0 @@ -/* - * Copyright 2012-14 ARM Limited - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of ARM Limited nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * NE10 Library : dsp/NE10_rfft.c - */ - -#include "NE10_types.h" -#include "NE10_dsp.h" - -/** - * @ingroup groupDSPs - */ - -/** - * @defgroup RFFT_RIFFT Real FFT - * - * \par - * Complex FFT/IFFT typically assumes complex input and output. However many applications use real valued data in time domain. - * Real FFT/IFFT efficiently process real valued sequences with the advantage of requirement of low memory and with less complexity. - * - * \par - * This set of functions implements Real Fast Fourier Transforms(RFFT) and Real Inverse Fast Fourier Transform(RIFFT) - * for floating-point data types. - * - * - * \par Algorithm: - * - * Real Fast Fourier Transform: - * \par - * Real FFT of N-point is calculated using CFFT of N/2-point and Split RFFT process as shown below figure. - * \par - * \image html RFFT.gif "Real Fast Fourier Transform" - * \par - * The RFFT functions operate on blocks of input and output data and each call to the function processes - * fftLenR samples through the transform. pSrc points to input array containing fftLenR values. - * pDst points to output array containing 2*fftLenR values. \n - * Input for real FFT is in the order of - *
{real[0], real[1], real[2], real[3], ..}
- * Output for real FFT is complex and are in the order of - *
{real(0), imag(0), real(1), imag(1), ...}
- * - * Real Inverse Fast Fourier Transform: - * \par - * Real IFFT of N-point is calculated using Split RIFFT process and CFFT of N/2-point as shown below figure. - * \par - * \image html RIFFT.gif "Real Inverse Fast Fourier Transform" - * \par - * The RIFFT functions operate on blocks of input and output data and each call to the function processes - * 2*fftLenR samples through the transform. pSrc points to input array containing 2*fftLenR values. - * pDst points to output array containing fftLenR values. \n - * Input for real IFFT is complex and are in the order of - *
{real(0), imag(0), real(1), imag(1), ...}
- * Output for real IFFT is real and in the order of - *
{real[0], real[1], real[2], real[3], ..}
- * - * \par Lengths supported by the transform: - * \par - * Real FFT/IFFT supports the lengths [128, 512, 2048], as it internally uses CFFT/CIFFT. - * - * \par Instance Structure - * A separate instance structure must be defined for each Instance but the twiddle factors can be reused. - * There are separate instance structure declarations for each of the 3 supported data types. - * - * \par Initialization Functions - * There is also an associated initialization function for each data type. - * The initialization function performs the following operations: - * - Sets the values of the internal structure fields. - * - Initializes twiddle factor tables. - * - Initializes CFFT data structure fields. - * \par - * Use of the initialization function is optional. - * However, if the initialization function is used, then the instance structure cannot be placed into a const data section. - * To place an instance structure into a const data section, the instance structure must be manually initialized. - * Manually initialize the instance structure as follows: - *
- *ne10_rfft_instance_f32_t S = {fft_len_real, fft_len_by2, ifft_flag_r, bit_reverse_flag_r, twid_coef_r_modifier, p_twiddle_A_real, p_twiddle_B_real, p_cfft};
- * 
- * where fft_len_real length of RFFT/RIFFT; fft_len_by2 length of CFFT/CIFFT. - * ifft_flag_r Flag for selection of RFFT or RIFFT(Set ifftFlagR to calculate RIFFT otherwise calculates RFFT); - * bit_reverse_flag_r Flag for selection of output order(Set bitReverseFlagR to output in normal order otherwise output in bit reversed order); - * twid_coef_r_modifier modifier for twiddle factor table which supports 128, 512, 2048 RFFT lengths with same table; - * p_twiddle_A_realpoints to A array of twiddle coefficients; p_twiddle_B_realpoints to B array of twiddle coefficients; - * p_cfft points to the CFFT Instance structure. The CFFT structure also needs to be initialized, refer to arm_cfft_radix4_f32() for details regarding - * static initialization of cfft structure. - * - */ - -/** - * @brief Core Real FFT process - * @param[in] *pSrc points to the Input buffer - * @param[in] N length of Real FFT - * @param[in] *pATable points to the twiddle Coef A buffer - * @param[in] *pBTable points to the twiddle Coef B buffer - * @param[out] *pDst points to the Output buffer - * @return none. - * The function implements a Real FFT - */ - -static void ne10_split_rfft_float_c( - ne10_float32_t * pSrc, - ne10_uint32_t N, - ne10_float32_t * pReTable, - ne10_float32_t * pImTable, - ne10_float32_t * pDst) -{ - ne10_uint32_t k; /* Loop Counter */ - ne10_float32_t uRe,vRe,uIm,vIm; /* Temporary variables for output */ - ne10_float32_t reTwd,imTwd,reTmp,imTmp; - ne10_float32_t *pCoefRe,*pCoefIm; /* Temporary pointers for twiddle factors */ - ne10_uint32_t NBy2 = N>>1; - - pCoefRe = pReTable; - pCoefIm = pImTable; - - /*First Result*/ - pDst[0] = pSrc[0] + pSrc[1]; - pDst[1] = 0; - /*N/2 th Result*/ - pDst[N] = pSrc[0] - pSrc[1]; - pDst[N+1] = 0; - - /*for k=1 to N/4-1 and k=N/4+1 to K=N/2-1*/ - for(k=1;k<(N>>2);k++) - { - /*uRe = (a[k]+a[N/2-k])/2*/ - uRe = (pSrc[2*k]+pSrc[2*(NBy2-k)])*0.5; - /*uIm = (b[k]-b[N/2-k])/2*/ - uIm = (pSrc[2*k+1]-pSrc[2*(NBy2-k)+1])*0.5; - /*VRe = (b[k]+b[N/2-k])/2*/ - vRe = (pSrc[2*k+1]+pSrc[2*(NBy2-k)+1])*0.5; - /*Vim = -(a[k]-a[N/2-k])/2*/ - vIm = (pSrc[2*(NBy2-k)]-pSrc[2*k])*0.5; - reTwd = pCoefRe[k]; - imTwd = pCoefIm[k]; - reTmp = vRe*reTwd + vIm*imTwd; - imTmp = vIm*reTwd - vRe*imTwd; - pDst[2*k] = uRe + reTmp; - pDst[2*k+1] = uIm + imTmp; - pDst[2*(NBy2-k)] = uRe-reTmp; - pDst[2*(NBy2-k)+1] = imTmp-uIm; - - /*Out Put from K=N/2+1 till k=N-1*/ - /*y[N-k] = conjugate(y[k] k=0 to k>1; - - pCoefRe = pReTable; - pCoefIm = pImTable; - - /*First Result*/ - pDst[0] = (pSrc[0] + pSrc[N])*0.5; - pDst[1] = (pSrc[0] - pSrc[N])*0.5; - - /*for k=1 to N/4-1 and k=N/4+1 to K=N/2-1*/ - for(k=1;k< (N>>2);k++) - { - /*uRe = (a[k]+a[N/2-k])/2*/ - uRe = (pSrc[2*k] + pSrc[2*(NBy2-k)])*0.5; - /*uIm = (b[k]-b[N/2-k])/2*/ - uIm = (pSrc[2*k+1] - pSrc[2*(NBy2-k)+1])*0.5; - - reTmp = (pSrc[2*k] - pSrc[2*(NBy2-k)])*0.5; - imTmp = (pSrc[2*k+1] + pSrc[2*(NBy2-k)+1])*0.5; - - reTwd = pCoefRe[k]; - imTwd = pCoefIm[k]; - - /*VRe = (b[k]+b[N/2-k])/2*/ - vRe = reTmp*reTwd - imTmp*imTwd; - /*Vim = -(a[k]-a[N/2-k])/2*/ - vIm = imTmp*reTwd + reTmp*imTwd; - pDst[2*k] = (uRe-vIm); - pDst[2*k+1] = (uIm+vRe); - - pDst[2*(NBy2-k)] = (uRe+vIm); - pDst[2*(NBy2-k)+1] = (vRe-uIm); - } - /*y[N/4] = a[N/4]-jb[N/4]*/ - pDst[NBy2] = pSrc[NBy2]; - pDst[NBy2+1] = -pSrc[NBy2+1]; - -} - -/** - * @addtogroup RFFT_RIFFT - * @{ - */ - -/** - * @brief Real FFT process - * @param[in] *S is an instance for the structure - * @param[in] *pSrc point to the input buffer (out-of-place: it's also a tmp buffer, so the input buffer is destroyed) - * @param[out] *pDst point to the output buffer (out-of-place) - * @param[in] *pTemp point to the temp buffer (used for intermedia buffer) - * @return none. - * The function implements a Real FFT/ Real IFFT depending - * on the direction flag - * Can support FFT lengths of 128, 512, 2048 - * - */ -void ne10_rfft_float_c( - const ne10_rfft_instance_f32_t * S, - ne10_float32_t * pSrc, - ne10_float32_t * pDst, - ne10_float32_t * pTemp) -{ - const ne10_cfft_radix4_instance_f32_t *S_CFFT = S->p_cfft; - - /* Caluclation of Real IFFT of input */ - if(S->ifft_flag_r == 1u) - { - /* Real IFFT core process */ - ne10_split_rifft_float_c(pSrc, S->fft_len_real, S->p_twiddle_A_real, - S->p_twiddle_B_real, pTemp); - /* Complex radix-4 IFFT process */ - ne10_radix4_butterfly_inverse_float_c(pDst, pTemp, S_CFFT->fft_len, S_CFFT->p_twiddle, S_CFFT->one_by_fft_len); - } - else - { - /* Complex radix-4 FFT process */ - ne10_radix4_butterfly_float_c(pTemp, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle); - /* Real FFT core process */ - ne10_split_rfft_float_c(pTemp, S->fft_len_real, S->p_twiddle_A_real, - S->p_twiddle_B_real, pDst); - } - -} - -/** - * @} end of RFFT_RIFFT group - */ diff --git a/modules/dsp/NE10_rfft.neon.c b/modules/dsp/NE10_rfft.neon.c deleted file mode 100644 index d6518d8..0000000 --- a/modules/dsp/NE10_rfft.neon.c +++ /dev/null @@ -1,508 +0,0 @@ -/* - * Copyright 2012-14 ARM Limited - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of ARM Limited nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * NE10 Library : dsp/NE10_rfft.neon.c - */ -#include - -#include "NE10_types.h" -#include "NE10_mask_table.h" -#include "NE10_dsp.h" -/** - * @brief Core Real FFT process - * @param[in] *pSrc points to the Input buffer - * @param[in] N length of Real FFT - * @param[in] *pATable points to the twiddle Coef A buffer - * @param[in] *pBTable points to the twiddle Coef B buffer - * @param[out] *pDst points to the Output buffer - * @return none. - * The function implements a Real FFT - */ - -static void ne10_split_rfft_float_neon( - ne10_float32_t * pSrc, - ne10_uint32_t N, - ne10_float32_t * pReTable, - ne10_float32_t * pImTable, - ne10_float32_t * pDst) -{ - ne10_uint32_t k,Cnt; /* Loop Counter */ - ne10_float32_t *pCoefRe,*pCoefIm,*pOut1,*pIn1,*pOut2,*pIn2; /* Temporary pointers for twiddle factors */ - ne10_uint32_t NBy2 = N>>1; - /*NEON Variable Declarations*/ - float32x4x2_t vin1q2_f32,vin2q2_f32,vtmpq2_f32; - float32x4_t vtmp1q_f32,vtmp2q_f32; - float32x4_t vureq_f32,vuimq_f32,vvreq_f32,vvimq_f32; - float32x4_t vretwdq_f32,vimtwdq_f32; - float32x4_t vhalfq_f32; - uint32x4_t vmaskq_u32,vmask1q_u32; - - /*Mask value to select three entries*/ - vmaskq_u32 = vld1q_u32(ne10_qMaskTable32+12); - vmask1q_u32 = vld1q_u32(ne10_qMaskTable32+4); - - - - pCoefRe = pReTable+1; - pCoefIm = pImTable+1; - - /*First Result*/ - pDst[0] = pSrc[0] + pSrc[1]; - pDst[1] = 0; - /*N/2 th Result*/ - pDst[N] = pSrc[0] - pSrc[1]; - pDst[N+1] = 0; - - pOut1=pDst+2; - pOut2=pDst+N-8; - pIn1 = pSrc+2; - pIn2 = pSrc +N -8; - - Cnt = ((N>>2)-1)>>2; - vhalfq_f32 = vdupq_n_f32(0.5); - - /*for k=1 to N/4-1 and k=N/4+1 to K=N/2-1*/ - for(k=0;k>1; - - /*NEON Variable Declarations*/ - float32x4x2_t vin1q2_f32,vin2q2_f32,vtmpq2_f32; - float32x4_t vtmp1q_f32,vtmp2q_f32; - float32x4_t vureq_f32,vuimq_f32,vvreq_f32,vvimq_f32; - float32x4_t vretwdq_f32,vimtwdq_f32; - float32x4_t vhalfq_f32; - uint32x4_t vmaskq_u32,vmask1q_u32; - - /*Mask value to select three entries*/ - vmaskq_u32 = vld1q_u32(ne10_qMaskTable32+12); - vmask1q_u32 = vld1q_u32(ne10_qMaskTable32+4); - - pCoefRe = pReTable+1; - pCoefIm = pImTable+1; - - /*First Result*/ - pDst[0] = (pSrc[0] + pSrc[N])*0.5; - pDst[1] = (pSrc[0] - pSrc[N])*0.5; - - pOut1=pDst+2; - pOut2=pDst+N-8; - pIn1 = pSrc+2; - pIn2 = pSrc +N -8; - - Cnt = ((N>>2)-1)>>2; - vhalfq_f32 = vdupq_n_f32(0.5); - - /*for k=1 to N/4-1 and k=N/4+1 to K=N/2-1*/ - for(k=0;kp_cfft; - - /* Caluclation of Real IFFT of input */ - if(S->ifft_flag_r == 1u) - { - /* Real IFFT core process */ - ne10_split_rifft_float_neon(pSrc, S->fft_len_real, S->p_twiddle_A_real, - S->p_twiddle_B_real, pTemp); - /* Complex radix-4 IFFT process */ - ne10_radix4_butterfly_inverse_float_neon(pDst, pTemp, S_CFFT->fft_len, S_CFFT->p_twiddle, S_CFFT->one_by_fft_len); - } - else - { - /* Complex radix-4 FFT process */ - ne10_radix4_butterfly_float_neon(pTemp, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle); - /* Real FFT core process */ - ne10_split_rfft_float_neon(pTemp, S->fft_len_real, S->p_twiddle_A_real, - S->p_twiddle_B_real, pDst); - } - -} -/** - * @} end of RFFT_RIFFT group - */ - diff --git a/modules/dsp/NE10_rfft_init.c b/modules/dsp/NE10_rfft_init.c deleted file mode 100644 index b028129..0000000 --- a/modules/dsp/NE10_rfft_init.c +++ /dev/null @@ -1,1180 +0,0 @@ -/* - * Copyright 2012-14 ARM Limited - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of ARM Limited nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "NE10_types.h" -#include "NE10_dsp.h" - -/* -* @brief Twiddle factors Table -*/ - -/** Pseudo code for Twiddle factor Tables Generation: - -for i=1 to N - rfft_twiddlecoef(2*i) = cos((i-1) * 2*PI/(float)N)) - rfft_rfft_twiddle_coef(2*i + 1) = sin((i-1) * 2*PI/(float)N)) -end - -where N = 1024 and PI = 3.14159265358979 - -N is the maximum FFT Size supported and -Cos and Sin values are interleaved fashion -*/ - -static const ne10_float32_t rfft_twiddle_coef[2048] = { - 1.000000000000000000f, 0.000000000000000000f, - 0.999981175282601110f, 0.006135884649154475f, - 0.999924701839144500f, 0.012271538285719925f, - 0.999830581795823400f, 0.018406729905804820f, - 0.999698818696204250f, 0.024541228522912288f, - 0.999529417501093140f, 0.030674803176636626f, - 0.999322384588349540f, 0.036807222941358832f, - 0.999077727752645360f, 0.042938256934940820f, - 0.998795456205172410f, 0.049067674327418015f, - 0.998475580573294770f, 0.055195244349689934f, - 0.998118112900149180f, 0.061320736302208578f, - 0.997723066644191640f, 0.067443919563664051f, - 0.997290456678690210f, 0.073564563599667426f, - 0.996820299291165670f, 0.079682437971430126f, - 0.996312612182778000f, 0.085797312344439894f, - 0.995767414467659820f, 0.091908956497132724f, - 0.995184726672196930f, 0.098017140329560604f, - 0.994564570734255420f, 0.104121633872054590f, - 0.993906970002356060f, 0.110222207293883060f, - 0.993211949234794500f, 0.116318630911904750f, - 0.992479534598709970f, 0.122410675199216200f, - 0.991709753669099530f, 0.128498110793793170f, - 0.990902635427780010f, 0.134580708507126170f, - 0.990058210262297120f, 0.140658239332849210f, - 0.989176509964781010f, 0.146730474455361750f, - 0.988257567730749460f, 0.152797185258443440f, - 0.987301418157858430f, 0.158858143333861450f, - 0.986308097244598670f, 0.164913120489969890f, - 0.985277642388941220f, 0.170961888760301220f, - 0.984210092386929030f, 0.177004220412148750f, - 0.983105487431216290f, 0.183039887955140950f, - 0.981963869109555240f, 0.189068664149806190f, - 0.980785280403230430f, 0.195090322016128250f, - 0.979569765685440520f, 0.201104634842091900f, - 0.978317370719627650f, 0.207111376192218560f, - 0.977028142657754390f, 0.213110319916091360f, - 0.975702130038528570f, 0.219101240156869800f, - 0.974339382785575860f, 0.225083911359792830f, - 0.972939952205560180f, 0.231058108280671110f, - 0.971503890986251780f, 0.237023605994367200f, - 0.970031253194543970f, 0.242980179903263870f, - 0.968522094274417380f, 0.248927605745720150f, - 0.966976471044852070f, 0.254865659604514570f, - 0.965394441697689400f, 0.260794117915275510f, - 0.963776065795439840f, 0.266712757474898370f, - 0.962121404269041580f, 0.272621355449948980f, - 0.960430519415565790f, 0.278519689385053060f, - 0.958703474895871600f, 0.284407537211271880f, - 0.956940335732208820f, 0.290284677254462330f, - 0.955141168305770780f, 0.296150888243623790f, - 0.953306040354193860f, 0.302005949319228080f, - 0.951435020969008340f, 0.307849640041534870f, - 0.949528180593036670f, 0.313681740398891520f, - 0.947585591017741090f, 0.319502030816015690f, - 0.945607325380521280f, 0.325310292162262930f, - 0.943593458161960390f, 0.331106305759876430f, - 0.941544065183020810f, 0.336889853392220050f, - 0.939459223602189920f, 0.342660717311994380f, - 0.937339011912574960f, 0.348418680249434560f, - 0.935183509938947610f, 0.354163525420490340f, - 0.932992798834738960f, 0.359895036534988110f, - 0.930766961078983710f, 0.365612997804773850f, - 0.928506080473215590f, 0.371317193951837540f, - 0.926210242138311380f, 0.377007410216418260f, - 0.923879532511286740f, 0.382683432365089780f, - 0.921514039342042010f, 0.388345046698826250f, - 0.919113851690057770f, 0.393992040061048100f, - 0.916679059921042700f, 0.399624199845646790f, - 0.914209755703530690f, 0.405241314004989860f, - 0.911706032005429880f, 0.410843171057903910f, - 0.909167983090522380f, 0.416429560097637150f, - 0.906595704514915330f, 0.422000270799799680f, - 0.903989293123443340f, 0.427555093430282080f, - 0.901348847046022030f, 0.433093818853151960f, - 0.898674465693953820f, 0.438616238538527660f, - 0.895966249756185220f, 0.444122144570429200f, - 0.893224301195515320f, 0.449611329654606540f, - 0.890448723244757880f, 0.455083587126343840f, - 0.887639620402853930f, 0.460538710958240010f, - 0.884797098430937790f, 0.465976495767966180f, - 0.881921264348355050f, 0.471396736825997640f, - 0.879012226428633530f, 0.476799230063322090f, - 0.876070094195406600f, 0.482183772079122720f, - 0.873094978418290090f, 0.487550160148436000f, - 0.870086991108711460f, 0.492898192229784040f, - 0.867046245515692650f, 0.498227666972781870f, - 0.863972856121586810f, 0.503538383725717580f, - 0.860866938637767310f, 0.508830142543106990f, - 0.857728610000272120f, 0.514102744193221660f, - 0.854557988365400530f, 0.519355990165589640f, - 0.851355193105265200f, 0.524589682678468950f, - 0.848120344803297230f, 0.529803624686294610f, - 0.844853565249707120f, 0.534997619887097150f, - 0.841554977436898440f, 0.540171472729892850f, - 0.838224705554838080f, 0.545324988422046460f, - 0.834862874986380010f, 0.550457972936604810f, - 0.831469612302545240f, 0.555570233019602180f, - 0.828045045257755800f, 0.560661576197336030f, - 0.824589302785025290f, 0.565731810783613120f, - 0.821102514991104650f, 0.570780745886967260f, - 0.817584813151583710f, 0.575808191417845340f, - 0.814036329705948410f, 0.580813958095764530f, - 0.810457198252594770f, 0.585797857456438860f, - 0.806847553543799330f, 0.590759701858874160f, - 0.803207531480644940f, 0.595699304492433360f, - 0.799537269107905010f, 0.600616479383868970f, - 0.795836904608883570f, 0.605511041404325550f, - 0.792106577300212390f, 0.610382806276309480f, - 0.788346427626606340f, 0.615231590580626820f, - 0.784556597155575240f, 0.620057211763289100f, - 0.780737228572094490f, 0.624859488142386340f, - 0.776888465673232440f, 0.629638238914926980f, - 0.773010453362736990f, 0.634393284163645490f, - 0.769103337645579700f, 0.639124444863775730f, - 0.765167265622458960f, 0.643831542889791390f, - 0.761202385484261780f, 0.648514401022112440f, - 0.757208846506484570f, 0.653172842953776760f, - 0.753186799043612520f, 0.657806693297078640f, - 0.749136394523459370f, 0.662415777590171780f, - 0.745057785441466060f, 0.666999922303637470f, - 0.740951125354959110f, 0.671558954847018330f, - 0.736816568877369900f, 0.676092703575315920f, - 0.732654271672412820f, 0.680600997795453020f, - 0.728464390448225200f, 0.685083667772700360f, - 0.724247082951467000f, 0.689540544737066830f, - 0.720002507961381650f, 0.693971460889654000f, - 0.715730825283818590f, 0.698376249408972920f, - 0.711432195745216430f, 0.702754744457225300f, - 0.707106781186547570f, 0.707106781186547460f, - 0.702754744457225300f, 0.711432195745216430f, - 0.698376249408972920f, 0.715730825283818590f, - 0.693971460889654000f, 0.720002507961381650f, - 0.689540544737066940f, 0.724247082951466890f, - 0.685083667772700360f, 0.728464390448225200f, - 0.680600997795453130f, 0.732654271672412820f, - 0.676092703575316030f, 0.736816568877369790f, - 0.671558954847018330f, 0.740951125354959110f, - 0.666999922303637470f, 0.745057785441465950f, - 0.662415777590171780f, 0.749136394523459260f, - 0.657806693297078640f, 0.753186799043612410f, - 0.653172842953776760f, 0.757208846506484460f, - 0.648514401022112550f, 0.761202385484261780f, - 0.643831542889791500f, 0.765167265622458960f, - 0.639124444863775730f, 0.769103337645579590f, - 0.634393284163645490f, 0.773010453362736990f, - 0.629638238914927100f, 0.776888465673232440f, - 0.624859488142386450f, 0.780737228572094380f, - 0.620057211763289210f, 0.784556597155575240f, - 0.615231590580626820f, 0.788346427626606230f, - 0.610382806276309480f, 0.792106577300212390f, - 0.605511041404325550f, 0.795836904608883460f, - 0.600616479383868970f, 0.799537269107905010f, - 0.595699304492433470f, 0.803207531480644830f, - 0.590759701858874280f, 0.806847553543799220f, - 0.585797857456438860f, 0.810457198252594770f, - 0.580813958095764530f, 0.814036329705948300f, - 0.575808191417845340f, 0.817584813151583710f, - 0.570780745886967370f, 0.821102514991104650f, - 0.565731810783613230f, 0.824589302785025290f, - 0.560661576197336030f, 0.828045045257755800f, - 0.555570233019602290f, 0.831469612302545240f, - 0.550457972936604810f, 0.834862874986380010f, - 0.545324988422046460f, 0.838224705554837970f, - 0.540171472729892970f, 0.841554977436898330f, - 0.534997619887097260f, 0.844853565249707010f, - 0.529803624686294830f, 0.848120344803297120f, - 0.524589682678468840f, 0.851355193105265200f, - 0.519355990165589530f, 0.854557988365400530f, - 0.514102744193221660f, 0.857728610000272120f, - 0.508830142543106990f, 0.860866938637767310f, - 0.503538383725717580f, 0.863972856121586700f, - 0.498227666972781870f, 0.867046245515692650f, - 0.492898192229784090f, 0.870086991108711350f, - 0.487550160148436050f, 0.873094978418290090f, - 0.482183772079122830f, 0.876070094195406600f, - 0.476799230063322250f, 0.879012226428633410f, - 0.471396736825997810f, 0.881921264348354940f, - 0.465976495767966130f, 0.884797098430937790f, - 0.460538710958240010f, 0.887639620402853930f, - 0.455083587126343840f, 0.890448723244757880f, - 0.449611329654606600f, 0.893224301195515320f, - 0.444122144570429260f, 0.895966249756185110f, - 0.438616238538527710f, 0.898674465693953820f, - 0.433093818853152010f, 0.901348847046022030f, - 0.427555093430282200f, 0.903989293123443340f, - 0.422000270799799790f, 0.906595704514915330f, - 0.416429560097637320f, 0.909167983090522270f, - 0.410843171057903910f, 0.911706032005429880f, - 0.405241314004989860f, 0.914209755703530690f, - 0.399624199845646790f, 0.916679059921042700f, - 0.393992040061048100f, 0.919113851690057770f, - 0.388345046698826300f, 0.921514039342041900f, - 0.382683432365089840f, 0.923879532511286740f, - 0.377007410216418310f, 0.926210242138311270f, - 0.371317193951837600f, 0.928506080473215480f, - 0.365612997804773960f, 0.930766961078983710f, - 0.359895036534988280f, 0.932992798834738850f, - 0.354163525420490510f, 0.935183509938947500f, - 0.348418680249434510f, 0.937339011912574960f, - 0.342660717311994380f, 0.939459223602189920f, - 0.336889853392220050f, 0.941544065183020810f, - 0.331106305759876430f, 0.943593458161960390f, - 0.325310292162262980f, 0.945607325380521280f, - 0.319502030816015750f, 0.947585591017741090f, - 0.313681740398891570f, 0.949528180593036670f, - 0.307849640041534980f, 0.951435020969008340f, - 0.302005949319228200f, 0.953306040354193750f, - 0.296150888243623960f, 0.955141168305770670f, - 0.290284677254462330f, 0.956940335732208940f, - 0.284407537211271820f, 0.958703474895871600f, - 0.278519689385053060f, 0.960430519415565790f, - 0.272621355449948980f, 0.962121404269041580f, - 0.266712757474898420f, 0.963776065795439840f, - 0.260794117915275570f, 0.965394441697689400f, - 0.254865659604514630f, 0.966976471044852070f, - 0.248927605745720260f, 0.968522094274417270f, - 0.242980179903263980f, 0.970031253194543970f, - 0.237023605994367340f, 0.971503890986251780f, - 0.231058108280671280f, 0.972939952205560070f, - 0.225083911359792780f, 0.974339382785575860f, - 0.219101240156869770f, 0.975702130038528570f, - 0.213110319916091360f, 0.977028142657754390f, - 0.207111376192218560f, 0.978317370719627650f, - 0.201104634842091960f, 0.979569765685440520f, - 0.195090322016128330f, 0.980785280403230430f, - 0.189068664149806280f, 0.981963869109555240f, - 0.183039887955141060f, 0.983105487431216290f, - 0.177004220412148860f, 0.984210092386929030f, - 0.170961888760301360f, 0.985277642388941220f, - 0.164913120489970090f, 0.986308097244598670f, - 0.158858143333861390f, 0.987301418157858430f, - 0.152797185258443410f, 0.988257567730749460f, - 0.146730474455361750f, 0.989176509964781010f, - 0.140658239332849240f, 0.990058210262297120f, - 0.134580708507126220f, 0.990902635427780010f, - 0.128498110793793220f, 0.991709753669099530f, - 0.122410675199216280f, 0.992479534598709970f, - 0.116318630911904880f, 0.993211949234794500f, - 0.110222207293883180f, 0.993906970002356060f, - 0.104121633872054730f, 0.994564570734255420f, - 0.098017140329560770f, 0.995184726672196820f, - 0.091908956497132696f, 0.995767414467659820f, - 0.085797312344439880f, 0.996312612182778000f, - 0.079682437971430126f, 0.996820299291165670f, - 0.073564563599667454f, 0.997290456678690210f, - 0.067443919563664106f, 0.997723066644191640f, - 0.061320736302208648f, 0.998118112900149180f, - 0.055195244349690031f, 0.998475580573294770f, - 0.049067674327418126f, 0.998795456205172410f, - 0.042938256934940959f, 0.999077727752645360f, - 0.036807222941358991f, 0.999322384588349540f, - 0.030674803176636581f, 0.999529417501093140f, - 0.024541228522912264f, 0.999698818696204250f, - 0.018406729905804820f, 0.999830581795823400f, - 0.012271538285719944f, 0.999924701839144500f, - 0.006135884649154515f, 0.999981175282601110f, - 0.000000000000000061f, 1.000000000000000000f, - -0.006135884649154393f, 0.999981175282601110f, - -0.012271538285719823f, 0.999924701839144500f, - -0.018406729905804695f, 0.999830581795823400f, - -0.024541228522912142f, 0.999698818696204250f, - -0.030674803176636459f, 0.999529417501093140f, - -0.036807222941358866f, 0.999322384588349540f, - -0.042938256934940834f, 0.999077727752645360f, - -0.049067674327418008f, 0.998795456205172410f, - -0.055195244349689913f, 0.998475580573294770f, - -0.061320736302208530f, 0.998118112900149180f, - -0.067443919563663982f, 0.997723066644191640f, - -0.073564563599667329f, 0.997290456678690210f, - -0.079682437971430015f, 0.996820299291165780f, - -0.085797312344439755f, 0.996312612182778000f, - -0.091908956497132571f, 0.995767414467659820f, - -0.098017140329560645f, 0.995184726672196930f, - -0.104121633872054600f, 0.994564570734255420f, - -0.110222207293883060f, 0.993906970002356060f, - -0.116318630911904750f, 0.993211949234794500f, - -0.122410675199216150f, 0.992479534598709970f, - -0.128498110793793110f, 0.991709753669099530f, - -0.134580708507126110f, 0.990902635427780010f, - -0.140658239332849130f, 0.990058210262297120f, - -0.146730474455361640f, 0.989176509964781010f, - -0.152797185258443300f, 0.988257567730749460f, - -0.158858143333861280f, 0.987301418157858430f, - -0.164913120489969950f, 0.986308097244598670f, - -0.170961888760301240f, 0.985277642388941220f, - -0.177004220412148750f, 0.984210092386929030f, - -0.183039887955140920f, 0.983105487431216290f, - -0.189068664149806160f, 0.981963869109555240f, - -0.195090322016128190f, 0.980785280403230430f, - -0.201104634842091820f, 0.979569765685440520f, - -0.207111376192218450f, 0.978317370719627650f, - -0.213110319916091250f, 0.977028142657754390f, - -0.219101240156869660f, 0.975702130038528570f, - -0.225083911359792670f, 0.974339382785575860f, - -0.231058108280671140f, 0.972939952205560180f, - -0.237023605994367230f, 0.971503890986251780f, - -0.242980179903263870f, 0.970031253194543970f, - -0.248927605745720120f, 0.968522094274417380f, - -0.254865659604514520f, 0.966976471044852070f, - -0.260794117915275460f, 0.965394441697689400f, - -0.266712757474898310f, 0.963776065795439840f, - -0.272621355449948870f, 0.962121404269041580f, - -0.278519689385052950f, 0.960430519415565900f, - -0.284407537211271710f, 0.958703474895871600f, - -0.290284677254462160f, 0.956940335732208940f, - -0.296150888243623840f, 0.955141168305770670f, - -0.302005949319228080f, 0.953306040354193860f, - -0.307849640041534870f, 0.951435020969008340f, - -0.313681740398891410f, 0.949528180593036670f, - -0.319502030816015640f, 0.947585591017741200f, - -0.325310292162262870f, 0.945607325380521390f, - -0.331106305759876320f, 0.943593458161960390f, - -0.336889853392219940f, 0.941544065183020810f, - -0.342660717311994270f, 0.939459223602189920f, - -0.348418680249434400f, 0.937339011912574960f, - -0.354163525420490400f, 0.935183509938947610f, - -0.359895036534988170f, 0.932992798834738850f, - -0.365612997804773850f, 0.930766961078983710f, - -0.371317193951837490f, 0.928506080473215590f, - -0.377007410216418200f, 0.926210242138311380f, - -0.382683432365089730f, 0.923879532511286740f, - -0.388345046698826190f, 0.921514039342042010f, - -0.393992040061047990f, 0.919113851690057770f, - -0.399624199845646680f, 0.916679059921042700f, - -0.405241314004989750f, 0.914209755703530690f, - -0.410843171057903800f, 0.911706032005429880f, - -0.416429560097636990f, 0.909167983090522490f, - -0.422000270799799680f, 0.906595704514915330f, - -0.427555093430281860f, 0.903989293123443450f, - -0.433093818853151900f, 0.901348847046022030f, - -0.438616238538527380f, 0.898674465693953930f, - -0.444122144570429140f, 0.895966249756185220f, - -0.449611329654606710f, 0.893224301195515210f, - -0.455083587126343720f, 0.890448723244757990f, - -0.460538710958240060f, 0.887639620402853930f, - -0.465976495767966010f, 0.884797098430937900f, - -0.471396736825997700f, 0.881921264348355050f, - -0.476799230063321920f, 0.879012226428633530f, - -0.482183772079122720f, 0.876070094195406600f, - -0.487550160148435720f, 0.873094978418290200f, - -0.492898192229783980f, 0.870086991108711460f, - -0.498227666972781590f, 0.867046245515692760f, - -0.503538383725717460f, 0.863972856121586810f, - -0.508830142543107100f, 0.860866938637767200f, - -0.514102744193221660f, 0.857728610000272120f, - -0.519355990165589640f, 0.854557988365400530f, - -0.524589682678468730f, 0.851355193105265200f, - -0.529803624686294720f, 0.848120344803297230f, - -0.534997619887097040f, 0.844853565249707230f, - -0.540171472729892850f, 0.841554977436898440f, - -0.545324988422046240f, 0.838224705554838190f, - -0.550457972936604700f, 0.834862874986380120f, - -0.555570233019601960f, 0.831469612302545460f, - -0.560661576197335920f, 0.828045045257755800f, - -0.565731810783613230f, 0.824589302785025180f, - -0.570780745886967140f, 0.821102514991104760f, - -0.575808191417845340f, 0.817584813151583710f, - -0.580813958095764420f, 0.814036329705948520f, - -0.585797857456438860f, 0.810457198252594770f, - -0.590759701858874050f, 0.806847553543799450f, - -0.595699304492433360f, 0.803207531480644940f, - -0.600616479383868750f, 0.799537269107905240f, - -0.605511041404325430f, 0.795836904608883570f, - -0.610382806276309590f, 0.792106577300212280f, - -0.615231590580626710f, 0.788346427626606340f, - -0.620057211763289210f, 0.784556597155575130f, - -0.624859488142386230f, 0.780737228572094600f, - -0.629638238914927100f, 0.776888465673232440f, - -0.634393284163645380f, 0.773010453362737100f, - -0.639124444863775730f, 0.769103337645579590f, - -0.643831542889791280f, 0.765167265622459070f, - -0.648514401022112440f, 0.761202385484261890f, - -0.653172842953776530f, 0.757208846506484680f, - -0.657806693297078640f, 0.753186799043612520f, - -0.662415777590171890f, 0.749136394523459260f, - -0.666999922303637360f, 0.745057785441466060f, - -0.671558954847018440f, 0.740951125354958990f, - -0.676092703575315810f, 0.736816568877370020f, - -0.680600997795453020f, 0.732654271672412820f, - -0.685083667772700240f, 0.728464390448225310f, - -0.689540544737066940f, 0.724247082951466890f, - -0.693971460889653780f, 0.720002507961381770f, - -0.698376249408972800f, 0.715730825283818710f, - -0.702754744457225080f, 0.711432195745216660f, - -0.707106781186547460f, 0.707106781186547570f, - -0.711432195745216540f, 0.702754744457225190f, - -0.715730825283818590f, 0.698376249408972920f, - -0.720002507961381650f, 0.693971460889654000f, - -0.724247082951466780f, 0.689540544737067050f, - -0.728464390448225200f, 0.685083667772700360f, - -0.732654271672412700f, 0.680600997795453240f, - -0.736816568877369900f, 0.676092703575315920f, - -0.740951125354958880f, 0.671558954847018550f, - -0.745057785441465950f, 0.666999922303637580f, - -0.749136394523459150f, 0.662415777590172010f, - -0.753186799043612410f, 0.657806693297078750f, - -0.757208846506484570f, 0.653172842953776640f, - -0.761202385484261670f, 0.648514401022112550f, - -0.765167265622458960f, 0.643831542889791390f, - -0.769103337645579480f, 0.639124444863775840f, - -0.773010453362736990f, 0.634393284163645490f, - -0.776888465673232330f, 0.629638238914927210f, - -0.780737228572094490f, 0.624859488142386340f, - -0.784556597155575020f, 0.620057211763289430f, - -0.788346427626606230f, 0.615231590580626930f, - -0.792106577300212170f, 0.610382806276309700f, - -0.795836904608883460f, 0.605511041404325660f, - -0.799537269107905120f, 0.600616479383868860f, - -0.803207531480644830f, 0.595699304492433470f, - -0.806847553543799330f, 0.590759701858874160f, - -0.810457198252594660f, 0.585797857456438980f, - -0.814036329705948410f, 0.580813958095764530f, - -0.817584813151583600f, 0.575808191417845450f, - -0.821102514991104650f, 0.570780745886967260f, - -0.824589302785025070f, 0.565731810783613450f, - -0.828045045257755690f, 0.560661576197336140f, - -0.831469612302545350f, 0.555570233019602180f, - -0.834862874986380010f, 0.550457972936604920f, - -0.838224705554838080f, 0.545324988422046350f, - -0.841554977436898330f, 0.540171472729892970f, - -0.844853565249707120f, 0.534997619887097150f, - -0.848120344803297120f, 0.529803624686294830f, - -0.851355193105265200f, 0.524589682678468950f, - -0.854557988365400420f, 0.519355990165589750f, - -0.857728610000272010f, 0.514102744193221770f, - -0.860866938637767090f, 0.508830142543107320f, - -0.863972856121586700f, 0.503538383725717690f, - -0.867046245515692760f, 0.498227666972781760f, - -0.870086991108711350f, 0.492898192229784150f, - -0.873094978418290090f, 0.487550160148435880f, - -0.876070094195406490f, 0.482183772079122890f, - -0.879012226428633530f, 0.476799230063322090f, - -0.881921264348354940f, 0.471396736825997860f, - -0.884797098430937790f, 0.465976495767966180f, - -0.887639620402853820f, 0.460538710958240230f, - -0.890448723244757880f, 0.455083587126343890f, - -0.893224301195515210f, 0.449611329654606870f, - -0.895966249756185110f, 0.444122144570429310f, - -0.898674465693953930f, 0.438616238538527550f, - -0.901348847046021920f, 0.433093818853152070f, - -0.903989293123443340f, 0.427555093430282030f, - -0.906595704514915330f, 0.422000270799799850f, - -0.909167983090522380f, 0.416429560097637150f, - -0.911706032005429770f, 0.410843171057904130f, - -0.914209755703530690f, 0.405241314004989920f, - -0.916679059921042590f, 0.399624199845647070f, - -0.919113851690057770f, 0.393992040061048150f, - -0.921514039342041790f, 0.388345046698826580f, - -0.923879532511286740f, 0.382683432365089890f, - -0.926210242138311380f, 0.377007410216418150f, - -0.928506080473215480f, 0.371317193951837710f, - -0.930766961078983710f, 0.365612997804773800f, - -0.932992798834738850f, 0.359895036534988330f, - -0.935183509938947610f, 0.354163525420490400f, - -0.937339011912574850f, 0.348418680249434790f, - -0.939459223602189920f, 0.342660717311994430f, - -0.941544065183020700f, 0.336889853392220330f, - -0.943593458161960390f, 0.331106305759876480f, - -0.945607325380521170f, 0.325310292162263260f, - -0.947585591017741090f, 0.319502030816015800f, - -0.949528180593036670f, 0.313681740398891410f, - -0.951435020969008340f, 0.307849640041535030f, - -0.953306040354193860f, 0.302005949319228030f, - -0.955141168305770670f, 0.296150888243624010f, - -0.956940335732208820f, 0.290284677254462390f, - -0.958703474895871490f, 0.284407537211272100f, - -0.960430519415565790f, 0.278519689385053170f, - -0.962121404269041470f, 0.272621355449949250f, - -0.963776065795439840f, 0.266712757474898480f, - -0.965394441697689290f, 0.260794117915275850f, - -0.966976471044852070f, 0.254865659604514680f, - -0.968522094274417380f, 0.248927605745720090f, - -0.970031253194543970f, 0.242980179903264070f, - -0.971503890986251780f, 0.237023605994367170f, - -0.972939952205560070f, 0.231058108280671330f, - -0.974339382785575860f, 0.225083911359792830f, - -0.975702130038528460f, 0.219101240156870050f, - -0.977028142657754390f, 0.213110319916091420f, - -0.978317370719627540f, 0.207111376192218840f, - -0.979569765685440520f, 0.201104634842092010f, - -0.980785280403230430f, 0.195090322016128610f, - -0.981963869109555240f, 0.189068664149806360f, - -0.983105487431216290f, 0.183039887955140900f, - -0.984210092386929030f, 0.177004220412148940f, - -0.985277642388941220f, 0.170961888760301220f, - -0.986308097244598560f, 0.164913120489970140f, - -0.987301418157858430f, 0.158858143333861470f, - -0.988257567730749460f, 0.152797185258443690f, - -0.989176509964781010f, 0.146730474455361800f, - -0.990058210262297010f, 0.140658239332849540f, - -0.990902635427780010f, 0.134580708507126280f, - -0.991709753669099530f, 0.128498110793793090f, - -0.992479534598709970f, 0.122410675199216350f, - -0.993211949234794500f, 0.116318630911904710f, - -0.993906970002356060f, 0.110222207293883240f, - -0.994564570734255420f, 0.104121633872054570f, - -0.995184726672196820f, 0.098017140329560826f, - -0.995767414467659820f, 0.091908956497132752f, - -0.996312612182778000f, 0.085797312344440158f, - -0.996820299291165670f, 0.079682437971430195f, - -0.997290456678690210f, 0.073564563599667732f, - -0.997723066644191640f, 0.067443919563664176f, - -0.998118112900149180f, 0.061320736302208488f, - -0.998475580573294770f, 0.055195244349690094f, - -0.998795456205172410f, 0.049067674327417966f, - -0.999077727752645360f, 0.042938256934941021f, - -0.999322384588349540f, 0.036807222941358832f, - -0.999529417501093140f, 0.030674803176636865f, - -0.999698818696204250f, 0.024541228522912326f, - -0.999830581795823400f, 0.018406729905805101f, - -0.999924701839144500f, 0.012271538285720007f, - -0.999981175282601110f, 0.006135884649154799f, - -1.000000000000000000f, 0.000000000000000122f, - -0.999981175282601110f, -0.006135884649154554f, - -0.999924701839144500f, -0.012271538285719762f, - -0.999830581795823400f, -0.018406729905804858f, - -0.999698818696204250f, -0.024541228522912080f, - -0.999529417501093140f, -0.030674803176636619f, - -0.999322384588349540f, -0.036807222941358582f, - -0.999077727752645360f, -0.042938256934940779f, - -0.998795456205172410f, -0.049067674327417724f, - -0.998475580573294770f, -0.055195244349689851f, - -0.998118112900149180f, -0.061320736302208245f, - -0.997723066644191640f, -0.067443919563663926f, - -0.997290456678690210f, -0.073564563599667496f, - -0.996820299291165780f, -0.079682437971429945f, - -0.996312612182778000f, -0.085797312344439922f, - -0.995767414467659820f, -0.091908956497132516f, - -0.995184726672196930f, -0.098017140329560590f, - -0.994564570734255530f, -0.104121633872054320f, - -0.993906970002356060f, -0.110222207293883000f, - -0.993211949234794610f, -0.116318630911904470f, - -0.992479534598709970f, -0.122410675199216100f, - -0.991709753669099530f, -0.128498110793792840f, - -0.990902635427780010f, -0.134580708507126060f, - -0.990058210262297120f, -0.140658239332849290f, - -0.989176509964781010f, -0.146730474455361580f, - -0.988257567730749460f, -0.152797185258443440f, - -0.987301418157858430f, -0.158858143333861220f, - -0.986308097244598670f, -0.164913120489969890f, - -0.985277642388941330f, -0.170961888760300970f, - -0.984210092386929140f, -0.177004220412148690f, - -0.983105487431216400f, -0.183039887955140650f, - -0.981963869109555240f, -0.189068664149806110f, - -0.980785280403230430f, -0.195090322016128360f, - -0.979569765685440520f, -0.201104634842091760f, - -0.978317370719627650f, -0.207111376192218590f, - -0.977028142657754390f, -0.213110319916091200f, - -0.975702130038528570f, -0.219101240156869800f, - -0.974339382785575860f, -0.225083911359792610f, - -0.972939952205560180f, -0.231058108280671080f, - -0.971503890986251890f, -0.237023605994366950f, - -0.970031253194543970f, -0.242980179903263820f, - -0.968522094274417380f, -0.248927605745719870f, - -0.966976471044852180f, -0.254865659604514460f, - -0.965394441697689400f, -0.260794117915275630f, - -0.963776065795439950f, -0.266712757474898250f, - -0.962121404269041580f, -0.272621355449949030f, - -0.960430519415565900f, -0.278519689385052890f, - -0.958703474895871600f, -0.284407537211271820f, - -0.956940335732208940f, -0.290284677254462110f, - -0.955141168305770780f, -0.296150888243623790f, - -0.953306040354193970f, -0.302005949319227810f, - -0.951435020969008450f, -0.307849640041534810f, - -0.949528180593036790f, -0.313681740398891180f, - -0.947585591017741200f, -0.319502030816015580f, - -0.945607325380521280f, -0.325310292162262980f, - -0.943593458161960390f, -0.331106305759876260f, - -0.941544065183020810f, -0.336889853392220110f, - -0.939459223602190030f, -0.342660717311994210f, - -0.937339011912574960f, -0.348418680249434560f, - -0.935183509938947720f, -0.354163525420490120f, - -0.932992798834738960f, -0.359895036534988110f, - -0.930766961078983820f, -0.365612997804773580f, - -0.928506080473215590f, -0.371317193951837430f, - -0.926210242138311490f, -0.377007410216417930f, - -0.923879532511286850f, -0.382683432365089670f, - -0.921514039342041900f, -0.388345046698826360f, - -0.919113851690057770f, -0.393992040061047930f, - -0.916679059921042700f, -0.399624199845646840f, - -0.914209755703530690f, -0.405241314004989690f, - -0.911706032005429880f, -0.410843171057903910f, - -0.909167983090522490f, -0.416429560097636930f, - -0.906595704514915450f, -0.422000270799799630f, - -0.903989293123443450f, -0.427555093430281810f, - -0.901348847046022030f, -0.433093818853151850f, - -0.898674465693954040f, -0.438616238538527330f, - -0.895966249756185220f, -0.444122144570429090f, - -0.893224301195515320f, -0.449611329654606650f, - -0.890448723244757990f, -0.455083587126343670f, - -0.887639620402853930f, -0.460538710958240060f, - -0.884797098430937900f, -0.465976495767965960f, - -0.881921264348355050f, -0.471396736825997640f, - -0.879012226428633640f, -0.476799230063321870f, - -0.876070094195406600f, -0.482183772079122660f, - -0.873094978418290200f, -0.487550160148435660f, - -0.870086991108711460f, -0.492898192229783930f, - -0.867046245515692870f, -0.498227666972781540f, - -0.863972856121586810f, -0.503538383725717460f, - -0.860866938637767310f, -0.508830142543107100f, - -0.857728610000272120f, -0.514102744193221550f, - -0.854557988365400530f, -0.519355990165589640f, - -0.851355193105265310f, -0.524589682678468730f, - -0.848120344803297230f, -0.529803624686294610f, - -0.844853565249707230f, -0.534997619887096930f, - -0.841554977436898440f, -0.540171472729892850f, - -0.838224705554838190f, -0.545324988422046130f, - -0.834862874986380120f, -0.550457972936604700f, - -0.831469612302545460f, -0.555570233019601960f, - -0.828045045257755800f, -0.560661576197335920f, - -0.824589302785025290f, -0.565731810783613230f, - -0.821102514991104760f, -0.570780745886967140f, - -0.817584813151583710f, -0.575808191417845340f, - -0.814036329705948520f, -0.580813958095764300f, - -0.810457198252594770f, -0.585797857456438860f, - -0.806847553543799450f, -0.590759701858873940f, - -0.803207531480644940f, -0.595699304492433250f, - -0.799537269107905240f, -0.600616479383868640f, - -0.795836904608883570f, -0.605511041404325430f, - -0.792106577300212280f, -0.610382806276309480f, - -0.788346427626606340f, -0.615231590580626710f, - -0.784556597155575240f, -0.620057211763289210f, - -0.780737228572094600f, -0.624859488142386230f, - -0.776888465673232440f, -0.629638238914926980f, - -0.773010453362737100f, -0.634393284163645270f, - -0.769103337645579700f, -0.639124444863775730f, - -0.765167265622459070f, -0.643831542889791280f, - -0.761202385484261890f, -0.648514401022112330f, - -0.757208846506484790f, -0.653172842953776530f, - -0.753186799043612630f, -0.657806693297078530f, - -0.749136394523459260f, -0.662415777590171780f, - -0.745057785441466060f, -0.666999922303637360f, - -0.740951125354959110f, -0.671558954847018440f, - -0.736816568877370020f, -0.676092703575315810f, - -0.732654271672412820f, -0.680600997795453020f, - -0.728464390448225420f, -0.685083667772700130f, - -0.724247082951467000f, -0.689540544737066830f, - -0.720002507961381880f, -0.693971460889653780f, - -0.715730825283818710f, -0.698376249408972800f, - -0.711432195745216660f, -0.702754744457225080f, - -0.707106781186547680f, -0.707106781186547460f, - -0.702754744457225300f, -0.711432195745216430f, - -0.698376249408973030f, -0.715730825283818480f, - -0.693971460889654000f, -0.720002507961381650f, - -0.689540544737067050f, -0.724247082951466780f, - -0.685083667772700360f, -0.728464390448225200f, - -0.680600997795453240f, -0.732654271672412590f, - -0.676092703575316030f, -0.736816568877369790f, - -0.671558954847018660f, -0.740951125354958880f, - -0.666999922303637580f, -0.745057785441465840f, - -0.662415777590172010f, -0.749136394523459040f, - -0.657806693297078750f, -0.753186799043612410f, - -0.653172842953777090f, -0.757208846506484230f, - -0.648514401022112220f, -0.761202385484262000f, - -0.643831542889791500f, -0.765167265622458960f, - -0.639124444863775950f, -0.769103337645579480f, - -0.634393284163645930f, -0.773010453362736660f, - -0.629638238914926870f, -0.776888465673232550f, - -0.624859488142386450f, -0.780737228572094380f, - -0.620057211763289430f, -0.784556597155575020f, - -0.615231590580627260f, -0.788346427626605890f, - -0.610382806276309360f, -0.792106577300212390f, - -0.605511041404325660f, -0.795836904608883460f, - -0.600616479383869310f, -0.799537269107904790f, - -0.595699304492433130f, -0.803207531480645050f, - -0.590759701858874280f, -0.806847553543799220f, - -0.585797857456439090f, -0.810457198252594660f, - -0.580813958095764970f, -0.814036329705948080f, - -0.575808191417845230f, -0.817584813151583820f, - -0.570780745886967370f, -0.821102514991104650f, - -0.565731810783613450f, -0.824589302785025070f, - -0.560661576197336480f, -0.828045045257755460f, - -0.555570233019602180f, -0.831469612302545240f, - -0.550457972936604920f, -0.834862874986380010f, - -0.545324988422046800f, -0.838224705554837860f, - -0.540171472729892740f, -0.841554977436898550f, - -0.534997619887097260f, -0.844853565249707010f, - -0.529803624686294940f, -0.848120344803297120f, - -0.524589682678469390f, -0.851355193105264860f, - -0.519355990165589420f, -0.854557988365400640f, - -0.514102744193221770f, -0.857728610000272010f, - -0.508830142543107320f, -0.860866938637767090f, - -0.503538383725718020f, -0.863972856121586470f, - -0.498227666972781810f, -0.867046245515692650f, - -0.492898192229784200f, -0.870086991108711350f, - -0.487550160148436330f, -0.873094978418289870f, - -0.482183772079122550f, -0.876070094195406710f, - -0.476799230063322140f, -0.879012226428633410f, - -0.471396736825997860f, -0.881921264348354940f, - -0.465976495767966630f, -0.884797098430937570f, - -0.460538710958239890f, -0.887639620402854050f, - -0.455083587126343950f, -0.890448723244757880f, - -0.449611329654606930f, -0.893224301195515210f, - -0.444122144570429760f, -0.895966249756184880f, - -0.438616238538527600f, -0.898674465693953820f, - -0.433093818853152120f, -0.901348847046021920f, - -0.427555093430282470f, -0.903989293123443120f, - -0.422000270799799520f, -0.906595704514915450f, - -0.416429560097637210f, -0.909167983090522380f, - -0.410843171057904190f, -0.911706032005429770f, - -0.405241314004990360f, -0.914209755703530470f, - -0.399624199845646730f, -0.916679059921042700f, - -0.393992040061048210f, -0.919113851690057660f, - -0.388345046698826630f, -0.921514039342041790f, - -0.382683432365090340f, -0.923879532511286520f, - -0.377007410216418200f, -0.926210242138311380f, - -0.371317193951837770f, -0.928506080473215480f, - -0.365612997804774300f, -0.930766961078983600f, - -0.359895036534987940f, -0.932992798834738960f, - -0.354163525420490450f, -0.935183509938947610f, - -0.348418680249434840f, -0.937339011912574850f, - -0.342660717311994880f, -0.939459223602189700f, - -0.336889853392219940f, -0.941544065183020810f, - -0.331106305759876540f, -0.943593458161960270f, - -0.325310292162263310f, -0.945607325380521170f, - -0.319502030816015410f, -0.947585591017741200f, - -0.313681740398891460f, -0.949528180593036670f, - -0.307849640041535090f, -0.951435020969008340f, - -0.302005949319228530f, -0.953306040354193750f, - -0.296150888243623680f, -0.955141168305770780f, - -0.290284677254462440f, -0.956940335732208820f, - -0.284407537211272150f, -0.958703474895871490f, - -0.278519689385053610f, -0.960430519415565680f, - -0.272621355449948870f, -0.962121404269041580f, - -0.266712757474898530f, -0.963776065795439840f, - -0.260794117915275900f, -0.965394441697689290f, - -0.254865659604514350f, -0.966976471044852180f, - -0.248927605745720150f, -0.968522094274417270f, - -0.242980179903264120f, -0.970031253194543970f, - -0.237023605994367670f, -0.971503890986251670f, - -0.231058108280670940f, -0.972939952205560180f, - -0.225083911359792920f, -0.974339382785575860f, - -0.219101240156870100f, -0.975702130038528460f, - -0.213110319916091920f, -0.977028142657754280f, - -0.207111376192218480f, -0.978317370719627650f, - -0.201104634842092070f, -0.979569765685440520f, - -0.195090322016128660f, -0.980785280403230320f, - -0.189068664149805970f, -0.981963869109555350f, - -0.183039887955140950f, -0.983105487431216290f, - -0.177004220412149000f, -0.984210092386929030f, - -0.170961888760301690f, -0.985277642388941110f, - -0.164913120489969760f, -0.986308097244598670f, - -0.158858143333861530f, -0.987301418157858320f, - -0.152797185258443740f, -0.988257567730749460f, - -0.146730474455362300f, -0.989176509964780900f, - -0.140658239332849160f, -0.990058210262297120f, - -0.134580708507126360f, -0.990902635427780010f, - -0.128498110793793590f, -0.991709753669099530f, - -0.122410675199215960f, -0.992479534598710080f, - -0.116318630911904770f, -0.993211949234794500f, - -0.110222207293883310f, -0.993906970002356060f, - -0.104121633872055070f, -0.994564570734255420f, - -0.098017140329560451f, -0.995184726672196930f, - -0.091908956497132821f, -0.995767414467659820f, - -0.085797312344440227f, -0.996312612182778000f, - -0.079682437971430695f, -0.996820299291165670f, - -0.073564563599667357f, -0.997290456678690210f, - -0.067443919563664231f, -0.997723066644191640f, - -0.061320736302208995f, -0.998118112900149180f, - -0.055195244349689712f, -0.998475580573294770f, - -0.049067674327418029f, -0.998795456205172410f, - -0.042938256934941084f, -0.999077727752645360f, - -0.036807222941359331f, -0.999322384588349430f, - -0.030674803176636484f, -0.999529417501093140f, - -0.024541228522912389f, -0.999698818696204250f, - -0.018406729905805164f, -0.999830581795823400f, - -0.012271538285720512f, -0.999924701839144500f, - -0.006135884649154416f, -0.999981175282601110f, - -0.000000000000000184f, -1.000000000000000000f, - 0.006135884649154049f, -0.999981175282601110f, - 0.012271538285720144f, -0.999924701839144500f, - 0.018406729905804796f, -0.999830581795823400f, - 0.024541228522912021f, -0.999698818696204250f, - 0.030674803176636116f, -0.999529417501093140f, - 0.036807222941358964f, -0.999322384588349540f, - 0.042938256934940716f, -0.999077727752645360f, - 0.049067674327417661f, -0.998795456205172410f, - 0.055195244349689344f, -0.998475580573294770f, - 0.061320736302208627f, -0.998118112900149180f, - 0.067443919563663871f, -0.997723066644191640f, - 0.073564563599666982f, -0.997290456678690210f, - 0.079682437971430334f, -0.996820299291165670f, - 0.085797312344439852f, -0.996312612182778000f, - 0.091908956497132446f, -0.995767414467659820f, - 0.098017140329560090f, -0.995184726672196930f, - 0.104121633872054700f, -0.994564570734255420f, - 0.110222207293882930f, -0.993906970002356060f, - 0.116318630911904410f, -0.993211949234794610f, - 0.122410675199215600f, -0.992479534598710080f, - 0.128498110793793220f, -0.991709753669099530f, - 0.134580708507125970f, -0.990902635427780010f, - 0.140658239332848790f, -0.990058210262297120f, - 0.146730474455361940f, -0.989176509964780900f, - 0.152797185258443380f, -0.988257567730749460f, - 0.158858143333861170f, -0.987301418157858430f, - 0.164913120489969390f, -0.986308097244598780f, - 0.170961888760301330f, -0.985277642388941220f, - 0.177004220412148640f, -0.984210092386929140f, - 0.183039887955140590f, -0.983105487431216400f, - 0.189068664149805610f, -0.981963869109555350f, - 0.195090322016128300f, -0.980785280403230430f, - 0.201104634842091710f, -0.979569765685440630f, - 0.207111376192218120f, -0.978317370719627770f, - 0.213110319916091560f, -0.977028142657754280f, - 0.219101240156869740f, -0.975702130038528570f, - 0.225083911359792550f, -0.974339382785575970f, - 0.231058108280670580f, -0.972939952205560290f, - 0.237023605994367310f, -0.971503890986251780f, - 0.242980179903263760f, -0.970031253194543970f, - 0.248927605745719790f, -0.968522094274417380f, - 0.254865659604513960f, -0.966976471044852290f, - 0.260794117915275510f, -0.965394441697689400f, - 0.266712757474898200f, -0.963776065795439950f, - 0.272621355449948530f, -0.962121404269041690f, - 0.278519689385053280f, -0.960430519415565790f, - 0.284407537211271770f, -0.958703474895871600f, - 0.290284677254462050f, -0.956940335732208940f, - 0.296150888243623290f, -0.955141168305770890f, - 0.302005949319228140f, -0.953306040354193860f, - 0.307849640041534760f, -0.951435020969008450f, - 0.313681740398891130f, -0.949528180593036790f, - 0.319502030816015080f, -0.947585591017741310f, - 0.325310292162262930f, -0.945607325380521280f, - 0.331106305759876210f, -0.943593458161960390f, - 0.336889853392219610f, -0.941544065183020920f, - 0.342660717311994540f, -0.939459223602189810f, - 0.348418680249434510f, -0.937339011912574960f, - 0.354163525420490070f, -0.935183509938947720f, - 0.359895036534987610f, -0.932992798834739070f, - 0.365612997804773960f, -0.930766961078983710f, - 0.371317193951837380f, -0.928506080473215590f, - 0.377007410216417870f, -0.926210242138311490f, - 0.382683432365090000f, -0.923879532511286630f, - 0.388345046698826300f, -0.921514039342041900f, - 0.393992040061047880f, -0.919113851690057880f, - 0.399624199845646400f, -0.916679059921042820f, - 0.405241314004990030f, -0.914209755703530580f, - 0.410843171057903860f, -0.911706032005429880f, - 0.416429560097636870f, -0.909167983090522490f, - 0.422000270799799180f, -0.906595704514915560f, - 0.427555093430282140f, -0.903989293123443340f, - 0.433093818853151790f, -0.901348847046022140f, - 0.438616238538527270f, -0.898674465693954040f, - 0.444122144570429420f, -0.895966249756185000f, - 0.449611329654606600f, -0.893224301195515320f, - 0.455083587126343610f, -0.890448723244757990f, - 0.460538710958239560f, -0.887639620402854160f, - 0.465976495767966290f, -0.884797098430937680f, - 0.471396736825997590f, -0.881921264348355050f, - 0.476799230063321870f, -0.879012226428633640f, - 0.482183772079122220f, -0.876070094195406930f, - 0.487550160148436000f, -0.873094978418290090f, - 0.492898192229783870f, -0.870086991108711460f, - 0.498227666972781480f, -0.867046245515692870f, - 0.503538383725717800f, -0.863972856121586590f, - 0.508830142543106990f, -0.860866938637767310f, - 0.514102744193221550f, -0.857728610000272230f, - 0.519355990165589200f, -0.854557988365400760f, - 0.524589682678469060f, -0.851355193105265080f, - 0.529803624686294610f, -0.848120344803297340f, - 0.534997619887096930f, -0.844853565249707230f, - 0.540171472729892410f, -0.841554977436898780f, - 0.545324988422046460f, -0.838224705554837970f, - 0.550457972936604700f, -0.834862874986380120f, - 0.555570233019601840f, -0.831469612302545460f, - 0.560661576197336250f, -0.828045045257755690f, - 0.565731810783613120f, -0.824589302785025290f, - 0.570780745886967030f, -0.821102514991104870f, - 0.575808191417844890f, -0.817584813151584040f, - 0.580813958095764640f, -0.814036329705948300f, - 0.585797857456438750f, -0.810457198252594880f, - 0.590759701858873940f, -0.806847553543799450f, - 0.595699304492432910f, -0.803207531480645280f, - 0.600616479383868970f, -0.799537269107905010f, - 0.605511041404325320f, -0.795836904608883680f, - 0.610382806276309140f, -0.792106577300212610f, - 0.615231590580627040f, -0.788346427626606120f, - 0.620057211763289100f, -0.784556597155575240f, - 0.624859488142386120f, -0.780737228572094600f, - 0.629638238914926650f, -0.776888465673232780f, - 0.634393284163645600f, -0.773010453362736880f, - 0.639124444863775620f, -0.769103337645579700f, - 0.643831542889791160f, -0.765167265622459180f, - 0.648514401022112000f, -0.761202385484262220f, - 0.653172842953776760f, -0.757208846506484570f, - 0.657806693297078530f, -0.753186799043612630f, - 0.662415777590171450f, -0.749136394523459590f, - 0.666999922303637690f, -0.745057785441465840f, - 0.671558954847018330f, -0.740951125354959110f, - 0.676092703575315700f, -0.736816568877370020f, - 0.680600997795452690f, -0.732654271672413150f, - 0.685083667772700470f, -0.728464390448225090f, - 0.689540544737066830f, -0.724247082951467000f, - 0.693971460889653780f, -0.720002507961381880f, - 0.698376249408972360f, -0.715730825283819040f, - 0.702754744457225300f, -0.711432195745216430f, - 0.707106781186547350f, -0.707106781186547680f, - 0.711432195745216100f, -0.702754744457225630f, - 0.715730825283818820f, -0.698376249408972690f, - 0.720002507961381540f, -0.693971460889654000f, - 0.724247082951466670f, -0.689540544737067160f, - 0.728464390448224860f, -0.685083667772700800f, - 0.732654271672412930f, -0.680600997795453020f, - 0.736816568877369790f, -0.676092703575316030f, - 0.740951125354958880f, -0.671558954847018660f, - 0.745057785441465500f, -0.666999922303638030f, - 0.749136394523459370f, -0.662415777590171780f, - 0.753186799043612300f, -0.657806693297078860f, - 0.757208846506484230f, -0.653172842953777090f, - 0.761202385484261890f, -0.648514401022112330f, - 0.765167265622458850f, -0.643831542889791500f, - 0.769103337645579480f, -0.639124444863775950f, - 0.773010453362736660f, -0.634393284163645930f, - 0.776888465673232550f, -0.629638238914926980f, - 0.780737228572094380f, -0.624859488142386450f, - 0.784556597155575020f, -0.620057211763289540f, - 0.788346427626605890f, -0.615231590580627370f, - 0.792106577300212390f, -0.610382806276309480f, - 0.795836904608883340f, -0.605511041404325660f, - 0.799537269107904790f, -0.600616479383869310f, - 0.803207531480645050f, -0.595699304492433250f, - 0.806847553543799220f, -0.590759701858874280f, - 0.810457198252594660f, -0.585797857456439090f, - 0.814036329705948080f, -0.580813958095764970f, - 0.817584813151583710f, -0.575808191417845230f, - 0.821102514991104540f, -0.570780745886967370f, - 0.824589302785025070f, -0.565731810783613560f, - 0.828045045257755350f, -0.560661576197336590f, - 0.831469612302545240f, -0.555570233019602180f, - 0.834862874986379900f, -0.550457972936605030f, - 0.838224705554837750f, -0.545324988422046800f, - 0.841554977436898440f, -0.540171472729892740f, - 0.844853565249707010f, -0.534997619887097260f, - 0.848120344803297120f, -0.529803624686294940f, - 0.851355193105264860f, -0.524589682678469390f, - 0.854557988365400530f, -0.519355990165589530f, - 0.857728610000272010f, -0.514102744193221880f, - 0.860866938637767090f, -0.508830142543107430f, - 0.863972856121586360f, -0.503538383725718130f, - 0.867046245515692650f, -0.498227666972781870f, - 0.870086991108711350f, -0.492898192229784260f, - 0.873094978418289870f, -0.487550160148436380f, - 0.876070094195406710f, -0.482183772079122610f, - 0.879012226428633410f, -0.476799230063322200f, - 0.881921264348354830f, -0.471396736825997920f, - 0.884797098430937460f, -0.465976495767966680f, - 0.887639620402853930f, -0.460538710958239950f, - 0.890448723244757770f, -0.455083587126344000f, - 0.893224301195515100f, -0.449611329654606980f, - 0.895966249756184880f, -0.444122144570429810f, - 0.898674465693953820f, -0.438616238538527660f, - 0.901348847046021920f, -0.433093818853152180f, - 0.903989293123443120f, -0.427555093430282530f, - 0.906595704514915450f, -0.422000270799799570f, - 0.909167983090522380f, -0.416429560097637260f, - 0.911706032005429660f, -0.410843171057904240f, - 0.914209755703530470f, -0.405241314004990420f, - 0.916679059921042700f, -0.399624199845646790f, - 0.919113851690057660f, -0.393992040061048270f, - 0.921514039342041790f, -0.388345046698826690f, - 0.923879532511286520f, -0.382683432365090390f, - 0.926210242138311380f, -0.377007410216418260f, - 0.928506080473215480f, -0.371317193951837820f, - 0.930766961078983490f, -0.365612997804774350f, - 0.932992798834738960f, -0.359895036534988000f, - 0.935183509938947500f, -0.354163525420490510f, - 0.937339011912574850f, -0.348418680249434900f, - 0.939459223602189700f, -0.342660717311994930f, - 0.941544065183020810f, -0.336889853392220000f, - 0.943593458161960270f, -0.331106305759876600f, - 0.945607325380521170f, -0.325310292162263370f, - 0.947585591017741200f, -0.319502030816015470f, - 0.949528180593036670f, -0.313681740398891520f, - 0.951435020969008340f, -0.307849640041535140f, - 0.953306040354193640f, -0.302005949319228580f, - 0.955141168305770780f, -0.296150888243623730f, - 0.956940335732208820f, -0.290284677254462500f, - 0.958703474895871490f, -0.284407537211272210f, - 0.960430519415565680f, -0.278519689385053670f, - 0.962121404269041580f, -0.272621355449948980f, - 0.963776065795439840f, -0.266712757474898590f, - 0.965394441697689290f, -0.260794117915275960f, - 0.966976471044852180f, -0.254865659604514410f, - 0.968522094274417270f, -0.248927605745720200f, - 0.970031253194543970f, -0.242980179903264180f, - 0.971503890986251670f, -0.237023605994367730f, - 0.972939952205560180f, -0.231058108280671000f, - 0.974339382785575860f, -0.225083911359792970f, - 0.975702130038528460f, -0.219101240156870160f, - 0.977028142657754170f, -0.213110319916091970f, - 0.978317370719627650f, -0.207111376192218530f, - 0.979569765685440520f, -0.201104634842092120f, - 0.980785280403230320f, -0.195090322016128720f, - 0.981963869109555350f, -0.189068664149806030f, - 0.983105487431216290f, -0.183039887955141010f, - 0.984210092386929030f, -0.177004220412149050f, - 0.985277642388941110f, -0.170961888760301770f, - 0.986308097244598670f, -0.164913120489969810f, - 0.987301418157858320f, -0.158858143333861580f, - 0.988257567730749460f, -0.152797185258443800f, - 0.989176509964780900f, -0.146730474455362390f, - 0.990058210262297120f, -0.140658239332849210f, - 0.990902635427780010f, -0.134580708507126420f, - 0.991709753669099410f, -0.128498110793793640f, - 0.992479534598709970f, -0.122410675199216030f, - 0.993211949234794500f, -0.116318630911904840f, - 0.993906970002356060f, -0.110222207293883360f, - 0.994564570734255420f, -0.104121633872055130f, - 0.995184726672196930f, -0.098017140329560506f, - 0.995767414467659820f, -0.091908956497132877f, - 0.996312612182778000f, -0.085797312344440282f, - 0.996820299291165670f, -0.079682437971430750f, - 0.997290456678690210f, -0.073564563599667412f, - 0.997723066644191640f, -0.067443919563664287f, - 0.998118112900149180f, -0.061320736302209057f, - 0.998475580573294770f, -0.055195244349689775f, - 0.998795456205172410f, -0.049067674327418091f, - 0.999077727752645360f, -0.042938256934941139f, - 0.999322384588349430f, -0.036807222941359394f, - 0.999529417501093140f, -0.030674803176636543f, - 0.999698818696204250f, -0.024541228522912448f, - 0.999830581795823400f, -0.018406729905805226f, - 0.999924701839144500f, -0.012271538285720572f, - 0.999981175282601110f, -0.006135884649154477f -}; - -static ne10_float32_t rfft_twiddle_coef_re[1024]; -static ne10_float32_t rfft_twiddle_coef_im[1024]; - - -/** -* @brief Initializations for Real FFT module -* @param[in] *S Instance pointer of Real FFT data structure. -* @param[in] *S_CFFT Instance pointer of Complex FFT data structure. -* @param[in] fftLen FFT length. -* @param[in] ifftFlagR 0 = forward Real FFT. 1 = inverse Real FFT -* @param[in] bitReverseFlag 0 = Result will be in bit-reversed order. 1 = Result will be in normal order -* @return none. -* The function initializes the Twiddle factors table and bit reverse table -*/ - -ne10_result_t ne10_rfft_init_float( - ne10_rfft_instance_f32_t * S, - ne10_cfft_radix4_instance_f32_t * S_CFFT, - ne10_uint32_t fftLen, - ne10_uint32_t ifftFlagR) -{ - ne10_uint32_t i,j; - - /* Initialise the default arm status */ - ne10_result_t status = NE10_OK; - - /* Initialize the Real FFT length */ - S->fft_len_real = (ne10_uint16_t) fftLen; - - /* Initialize the Complex FFT length */ - S->fft_len_by2 = (ne10_uint16_t) fftLen / 2u; - - /* Initialize the Flag for selection of RFFT or RIFFT */ - S->ifft_flag_r = (ne10_uint8_t) ifftFlagR; - - /* Initialize the Flag for calculation Bit reversal or not */ - //S->bit_reverse_flag_r = (ne10_uint8_t) bitReverseFlag; - - S->twid_coef_r_modifier = 1u; - - /* Initializations of structure parameters depending on the FFT length */ - switch (S->fft_len_real) - { - /* Init table modifier value */ - case 2048u: - for(i=0,j=0;i<1024;i++) - { - rfft_twiddle_coef_re[i] = rfft_twiddle_coef[2*j]; - rfft_twiddle_coef_im[i] = rfft_twiddle_coef[2*j+1]; - j= j+ 1; - } - /* Initialize the Twiddle coefficientA pointer */ - S->p_twiddle_A_real = (ne10_float32_t *) rfft_twiddle_coef_re; - /* Initialize the Twiddle coefficientB pointer */ - S->p_twiddle_B_real = (ne10_float32_t *) rfft_twiddle_coef_im; - break; - case 512u: - for(i=0,j=0;i<512;i++) - { - rfft_twiddle_coef_re[i] = rfft_twiddle_coef[2*j]; - rfft_twiddle_coef_im[i] = rfft_twiddle_coef[2*j+1]; - j= j+ 2; - } - /* Initialize the Twiddle coefficientA pointer */ - S->p_twiddle_A_real = (ne10_float32_t *) rfft_twiddle_coef_re; - /* Initialize the Twiddle coefficientB pointer */ - S->p_twiddle_B_real = (ne10_float32_t *) rfft_twiddle_coef_im; - break; - case 128u: - for(i=0,j=0;i<128;i++) - { - rfft_twiddle_coef_re[i] = rfft_twiddle_coef[2*j]; - rfft_twiddle_coef_im[i] = rfft_twiddle_coef[2*j+1]; - j= j+ 8; - } - /* Initialize the Twiddle coefficientA pointer */ - S->p_twiddle_A_real = (ne10_float32_t *) rfft_twiddle_coef_re; - /* Initialize the Twiddle coefficientB pointer */ - S->p_twiddle_B_real = (ne10_float32_t *) rfft_twiddle_coef_im; - break; - default: - /* Reporting argument error if rfftSize is not valid value */ - status = NE10_ERR; - break; - } - - /* Init Complex FFT Instance */ - S->p_cfft = S_CFFT; - - if(S->ifft_flag_r) - { - /* Initializes the CIFFT Module for Nreal/2 length */ - ne10_cfft_radix4_init_float(S->p_cfft, S->fft_len_by2, 1u); - } - else - { - /* Initializes the CFFT Module for Nreal/2 length */ - ne10_cfft_radix4_init_float(S->p_cfft, S->fft_len_by2, 0u); - } - - /* return the status of RFFT Init function */ - return (status); - -} - - diff --git a/modules/dsp/test/test_main.c b/modules/dsp/test/test_main.c index 34588f1..2054e74 100644 --- a/modules/dsp/test/test_main.c +++ b/modules/dsp/test/test_main.c @@ -31,14 +31,12 @@ #include "seatest.h" -void test_fixture_cfft (void); -void test_fixture_rfft (void); -void test_fixture_fft_c2c_1d_float32(void); -void test_fixture_fft_c2c_1d_int32(void); -void test_fixture_fft_c2c_1d_int16(void); -void test_fixture_fft_r2c_1d_float32(void); -void test_fixture_fft_r2c_1d_int32(void); -void test_fixture_fft_r2c_1d_int16(void); +void test_fixture_fft_c2c_1d_float32 (void); +void test_fixture_fft_c2c_1d_int32 (void); +void test_fixture_fft_c2c_1d_int16 (void); +void test_fixture_fft_r2c_1d_float32 (void); +void test_fixture_fft_r2c_1d_int32 (void); +void test_fixture_fft_r2c_1d_int16 (void); void test_fixture_fir (void); void test_fixture_fir_decimate (void); void test_fixture_fir_interpolate (void); @@ -48,8 +46,6 @@ void test_fixture_iir_lattice (void); void all_tests (void) { - test_fixture_cfft(); - test_fixture_rfft(); test_fixture_fft_c2c_1d_float32(); test_fixture_fft_c2c_1d_int32(); test_fixture_fft_c2c_1d_int16(); diff --git a/modules/dsp/test/test_suite_cfft.c b/modules/dsp/test/test_suite_cfft.c deleted file mode 100644 index cc640bc..0000000 --- a/modules/dsp/test/test_suite_cfft.c +++ /dev/null @@ -1,648 +0,0 @@ -/* - * Copyright 2012-14 ARM Limited - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of ARM Limited nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * NE10 Library : test_suite_cfft.c - */ - -#include -#include -#include -#include - -#include "NE10_dsp.h" -#include "seatest.h" -#include "unit_test_common.h" - -/* ---------------------------------------------------------------------- -** Global defines -** ------------------------------------------------------------------- */ - -/* Max FFT Length 1024 and double buffer for real and imag */ -#define TEST_LENGTH_SAMPLES (1024 * 2) - -#define TEST_COUNT 5000 - -/* ---------------------------------------------------------------------- -** Test input data for F32 -** Generated by the MATLAB rand() function -** ------------------------------------------------------------------- */ - -static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] = -{ - -0.432565, 0.864397, -1.665584, 0.094203, 0.125332, -0.851909, 0.287676, 0.873504, - -1.146471, -0.438039, 1.190915, -0.429661, 1.189164, -1.102729, -0.037633, 0.396247, - 0.327292, -0.964925, 0.174639, 0.168449, -0.186709, -1.965359, 0.725791, -0.744302, - -0.588317, -0.552307, 2.183186, -0.819726, -0.136396, 1.109142, 0.113931, -0.614946, - 1.066768, -0.254635, 0.059281, -0.269830, -0.095648, -1.671994, -0.832349, -1.876045, - 0.294411, 0.575006, -1.336182, -0.866133, 0.714325, -2.116523, 1.623562, -0.964466, - -0.691776, 0.212729, 0.857997, 0.477917, 1.254001, 0.100658, -1.593730, 0.297433, - -1.440964, 0.570148, 0.571148, -1.624496, -0.399886, 0.643443, 0.689997, 0.681861, - 0.815622, 0.014655, 0.711908, -1.301541, 1.290250, -1.284587, 0.668601, 0.812213, - 1.190838, 0.838548, -1.202457, 1.420321, -0.019790, -0.989752, -0.156717, -1.183229, - -1.604086, -0.466259, 0.257304, -0.365943, -1.056473, 1.118333, 1.415141, -0.465615, - -0.805090, -1.560800, 0.528743, -0.283103, 0.219321, -1.322941, -0.921902, -0.196238, - -2.170674, 0.419039, -0.059188, 0.742318, -1.010634, -0.143032, 0.614463, -2.161943, - 0.507741, -0.644226, 1.692430, 1.439590, 0.591283, -0.846917, -0.643595, 0.057340, - 0.380337, 0.643408, -1.009116, -0.670431, -0.019511, -0.003142, -0.048221, 0.352931, - 0.000043, 1.179502, -0.317859, -0.685902, 1.095004, 1.676789, -1.873990, -0.255309, - 0.428183, -0.647548, 0.895638, -0.182214, 0.730957, 0.851800, 0.577857, -0.306550, - 0.040314, -0.440529, 0.677089, -0.611472, 0.568900, -0.485207, -0.255645, 1.197019, - -0.377469, 1.394788, -0.295887, 0.165368, -1.475135, -0.509967, -0.234004, 1.377717, - 0.118445, 1.298518, 0.314809, -0.130117, 1.443508, 0.740249, -0.350975, 1.332017, - 0.623234, -0.278071, 0.799049, -0.327993, 0.940890, -0.012527, -0.992092, 0.903179, - 0.212035, -1.112463, 0.237882, -0.839211, -1.007763, 0.035534, -0.742045, -1.246529, - 1.082295, 0.884505, -0.131500, 2.538334, 0.389880, 1.316795, 0.087987, 1.442213, - -0.635465, 1.466919, -0.559573, -1.107052, 0.443653, -0.460936, -0.949904, -0.020296, - 0.781182, -0.045998, 0.568961, -0.544487, -0.821714, 0.917035, -0.265607, -0.019418, - -1.187777, 0.774630, -2.202321, -0.594053, 0.986337, 1.820276, -0.518635, 0.524719, - 0.327368, 0.685938, 0.234057, -0.901304, 0.021466, 2.136023, -1.003944, 0.320126, - -0.947146, -1.584119, -0.374429, -0.502514, -1.185886, 0.737926, -1.055903, -0.525392, - 1.472480, -1.532115, 0.055744, -0.153786, -1.217317, -0.646732, -0.041227, -1.341450, - -1.128344, 0.271534, -1.349278, 0.339541, -0.261102, 1.674580, 0.953465, 0.335636, - 0.128644, -0.550556, 0.656468, -0.286507, -1.167819, -0.814791, -0.460605, 0.053508, - -0.262440, -0.427841, -1.213152, 0.463860, -1.319437, 0.416588, 0.931218, 0.191634, - 0.011245, -1.284328, -0.645146, -1.006709, 0.805729, 0.041786, 0.231626, -0.757276, - -0.989760, 2.278871, 1.339586, -1.800414, 0.289502, 0.176299, 1.478917, -0.263794, - 1.138028, -0.833888, -0.684139, 0.220767, -1.291936, -0.882230, -0.072926, 0.856510, - -0.330599, -0.925690, -0.843628, -0.914070, 0.497770, -1.327629, 1.488490, 1.611727, - -0.546476, -0.561827, -0.846758, 0.276041, -0.246337, -0.227653, 0.663024, 0.184183, - -0.854197, 0.082830, -1.201315, 0.452035, -0.119869, 0.101411, -0.065294, -0.365760, - 0.485296, -0.091035, -0.595491, 0.739457, -0.149668, 0.940328, -0.434752, -0.028961, - -0.079330, -0.928710, 1.535152, 0.745038, -0.606483, 2.488098, -1.347363, 0.691925, - 0.469383, -0.941710, -0.903567, 0.384997, 0.035880, -0.278887, -0.627531, -0.982944, - 0.535398, 1.620751, 0.552884, -3.051825, -0.203690, -0.048454, -2.054325, 0.318202, - 0.132561, -0.635514, 1.592941, -1.028736, 1.018412, 1.641380, -1.580402, 0.019495, - -0.078662, -2.047269, -0.681657, -1.129305, -1.024553, -2.355586, -1.234353, -0.561249, - 0.288807, -0.087973, -0.429303, 1.073777, 0.055801, -0.311909, -0.367874, -1.478774, - -0.464973, -0.043979, 0.370961, -0.799868, 0.728283, -0.865158, 2.112160, -0.119007, - -1.357298, -0.214830, -1.022610, 0.007315, 1.037834, -1.039472, -0.389800, 0.832836, - -1.381266, -0.746695, 0.315543, 0.349276, 1.553243, 0.484013, 0.707894, -1.007859, - 1.957385, 1.003469, 0.504542, -2.676089, 1.864529, 0.016822, -0.339812, -1.443245, - -1.139779, 0.106502, -0.211123, -0.523471, 1.190245, 0.968581, -1.116209, -0.675762, - 0.635274, -1.086512, -0.601412, 0.792917, 0.551185, 1.607967, -1.099840, -1.386200, - 0.085991, 0.858656, -2.004563, 0.207575, -0.493088, 1.048865, 0.462048, -0.784071, - -0.321005, -0.326146, 1.236556, -0.415365, -0.631280, -0.340785, -2.325211, 0.565016, - -1.231637, 0.441829, 1.055648, -0.109207, -0.113224, 0.430549, 0.379224, 0.693041, - 0.944200, -0.547589, -2.120427, 0.944736, -0.644679, -0.792557, -0.704302, 0.280168, - -1.018137, -1.642974, -0.182082, 0.314746, 1.521013, 1.030286, -0.038439, 1.751701, - 1.227448, -0.251608, -0.696205, 1.819214, 0.007524, 1.234399, -0.782893, -2.339612, - 0.586939, -0.038625, -0.251207, 0.007293, 0.480136, -0.565029, 0.668155, 1.108257, - -0.078321, 0.520474, 0.889173, -0.497671, 2.309287, -0.177898, 0.524639, 1.091016, - -0.011787, 1.159731, 0.913141, 0.675004, 0.055941, 2.291756, -1.107070, -1.398845, - 0.485498, -1.532820, -0.005005, 0.403012, -0.276218, -0.466509, 1.276452, 0.428272, - 1.863401, -1.390515, -0.522559, -0.613866, 0.103424, -0.995531, -0.807649, -1.106047, - 0.680439, 0.345156, -2.364590, 1.638406, 0.990115, -0.550912, 0.218899, 1.664607, - 0.261662, -0.048037, 1.213444, 0.662008, -0.274667, -0.296988, -0.133134, 1.660689, - -1.270500, 0.057131, -1.663606, -2.227418, -0.703554, 1.245199, 0.280880, -1.158628, - -0.541209, 0.867397, -1.333531, -0.801315, 1.072686, -0.263610, -0.712085, 0.751058, - -0.011286, 1.795228, -0.000817, 0.984351, -0.249436, 0.046669, 0.396575, 0.323443, - -0.264013, 0.522442, -1.664011, -0.788527, -1.028975, 0.734071, 0.243095, 0.080416, - -1.256590, -0.543988, -0.347183, 0.316257, -0.941372, -1.408710, -1.174560, 0.186814, - -1.021142, -2.262433, -0.401667, 0.500375, 0.173666, -0.224826, -0.116118, -1.455474, - 1.064119, -0.015503, -0.245386, -0.437796, -1.517539, 0.907150, 0.009734, 1.284133, - 0.071373, -0.730091, 0.316536, -1.472669, 0.499826, -1.594354, 1.278084, 0.497586, - -0.547816, 0.741050, 0.260808, -0.355039, -0.013177, -0.810574, -0.580264, 0.238212, - 2.136308, 1.505073, -0.257617, -1.189561, -1.409528, -0.194823, 1.770101, 0.624787, - 0.325546, -1.278067, -1.119040, 0.100259, 0.620350, -0.342182, 1.269782, -0.002015, - -0.896043, -0.498406, 0.135175, 1.049755, -0.139040, -1.670559, -1.163395, -2.014370, - 1.183720, 0.986616, -0.015430, -0.060483, 0.536219, 1.192941, -0.716429, 2.685580, - -0.655559, 0.853734, 0.314363, 1.005549, 0.106814, -0.000982, 1.848216, -0.560458, - -0.275106, -0.191396, 2.212554, -0.048913, 1.508526, 0.600460, -1.945079, -1.994642, - -1.680543, -0.965134, -0.573534, -0.943199, -0.185817, -0.200671, 0.008934, 0.556167, - 0.836950, 2.018381, -0.722271, 1.813736, -0.721490, -0.112448, -0.201181, -0.889976, - -0.020464, -0.726843, 0.278890, 0.763502, 1.058295, -0.598514, 0.621673, 0.723730, - -1.750615, -0.867938, 0.697348, 0.841673, 0.811486, -0.850938, 0.636345, 0.933427, - 1.310080, 0.485960, 0.327098, -0.216203, -0.672993, -0.381497, -0.149327, -1.427041, - -2.449018, -1.487669, 0.473286, -2.515103, 0.116946, -1.306210, -0.591104, -0.376950, - -0.654708, -1.107504, -1.080662, 0.312778, -0.047731, -0.845240, 0.379345, 0.237598, - -0.330361, -0.918767, -0.499898, 2.441691, -0.035979, 0.083121, -0.174760, 0.266263, - -0.957265, -0.762727, 1.292548, -2.492805, 0.440910, -0.163872, 1.280941, 0.701879, - -0.497730, -0.855063, -1.118717, 0.373834, 0.807650, -0.504156, 0.041200, -1.074581, - -0.756209, -0.632952, -0.089129, 1.854859, -2.008850, 0.467423, 1.083918, 1.316068, - -0.981191, 1.779038, -0.688489, -0.384638, 1.339479, 0.895129, -0.909243, 0.473642, - -0.412858, -0.023571, -0.506163, 1.612449, 1.619748, 0.839672, 0.080901, 0.247906, - -1.081056, -0.540454, -1.124518, -1.808434, 1.735676, -0.266203, 1.937459, 0.769024, - 1.635068, 0.076724, -1.255940, 0.078595, -0.213538, 1.063096, -0.198932, 0.349197, - 0.307499, 0.755430, -0.572325, -0.624003, -0.977648, -0.421374, -0.446809, 0.596029, - 1.082092, -1.389987, 2.372648, -2.634668, 0.229288, -0.806934, -0.266623, 0.091930, - 0.701672, 2.371014, -0.487590, -0.008736, 1.862480, 2.122155, 1.106851, -0.684231, - -1.227566, -0.413033, -0.669885, -0.857683, 1.340929, 0.970899, 0.388083, -1.064209, - 0.393059, 1.500750, -1.707334, -0.470707, 0.227859, 1.549526, 0.685633, 0.089955, - -0.636790, -1.859541, -1.002606, -1.408604, -0.185621, 0.115434, -1.054033, -0.480661, - -0.071539, 1.236739, 0.279198, -2.015435, 1.373275, 0.563520, 0.179841, -0.043520, - -0.542017, 0.460448, 1.634191, 0.282654, 0.825215, 1.060032, 0.230761, 0.547056, - 0.671634, 0.220117, -0.508078, -1.909701, 0.856352, 1.117189, 0.268503, -1.607931, - 0.624975, -1.443700, -1.047338, -0.314551, 1.535670, 0.766433, 0.434426, 0.174865, - -1.917136, 1.316849, 0.469940, 0.958586, 1.274351, 0.647691, 0.638542, 0.092485, - 1.380782, -0.411274, 1.319843, 0.346629, -0.909429, -0.348980, -2.305605, -0.200402, - 1.788730, 0.393261, 0.390798, -1.852647, 0.020324, 0.996919, -0.405977, -0.481047, - -1.534895, -0.295456, 0.221373, -0.309043, -1.374479, -0.383007, -0.839286, 1.023837, - -0.208643, 1.360480, 0.755913, -0.705832, 0.375734, -0.609368, -1.345413, -0.112009, - 1.481876, 0.905851, 0.032736, -0.592901, 1.870453, 2.144165, -1.208991, 0.748569, - -0.782632, -1.654092, -0.767299, -0.977911, -0.107200, -0.347368, -0.977057, -0.107734, - -0.963988, -0.402626, -2.379172, -1.065617, -0.838188, 0.878523, 0.257346, 0.460551, - -0.183834, -1.078622, -0.167615, 0.644741, -0.116989, 0.605399, 0.168488, 0.055073, - -0.501206, -0.005505, -0.705076, -0.099485, 0.508165, -0.225578, -0.420922, -1.026005, - 0.229133, -0.732352, -0.959497, -1.405453, -0.146043, -1.119476, 0.744538, 0.186157, - -0.890496, -0.314564, 0.139062, -0.088767, -0.236144, -0.160919, -0.075459, -1.936278, - -0.358572, 2.751755, -2.077635, 1.292404, -0.143546, -0.233895, 1.393341, -0.193140, - 0.651804, -0.104019, -0.377134, -0.814926, -0.661443, -0.108576, 0.248958, -1.569143, - -0.383516, 0.212114, -0.528480, 1.678775, 0.055388, 0.379010, 1.253769, -0.668419, - -2.520004, 1.727974, 0.584856, 1.693388, -1.008064, -0.787045, 0.944285, -1.874471, - -2.423957, 0.023853, -0.223831, 1.518454, 0.058070, 0.534477, -0.424614, -1.355467, - -0.202918, 0.280923, -1.513077, 0.182100, -1.126352, -0.256567, -0.815002, 0.858411, - 0.366614, 0.057070, -0.586107, -1.462498, 1.537409, -2.326166, 0.140072, 2.562645, - -1.862767, -0.639321, -0.454193, 0.706010, -0.652074, 0.627374, 0.103318, -1.465271, - -0.220632, 0.548954, -0.279043, 1.894620, -0.733662, 0.901939, -0.064534, -0.684842, - -1.444004, -0.410065, 0.612340, -1.834344, -1.323503, -0.357176, -0.661577, -0.081545, - -0.146115, -0.557160, 0.248085, -1.778299, -0.076633, 0.038674, 1.738170, 1.603402, - 1.621972, 0.428308, 0.626436, -0.321679, 0.091814, 0.158667, -0.807607, -1.831225, - -0.461337, 1.083138, -1.405969, -0.442318, -0.374530, 0.213002, -0.470911, -0.429068, - 1.751296, 1.112692, 0.753225, 1.054038, 0.064989, 0.192183, -0.292764, -0.175647, - 0.082823, 0.561421, 0.766191, 1.251021, 2.236850, -0.419377, 0.326887, -1.464906, - 0.863304, -0.953308, 0.679387, 1.384259, 0.554758, -0.966553, 1.001630, -0.002071, - 1.259365, 0.508627, 0.044151, 0.346342, -0.314138, -1.396941, 0.226708, 0.520130, - 0.996692, -0.349830, 1.215912, 0.530292, -0.542702, -0.256369, 0.912228, -1.617286, - -0.172141, 1.556859, -0.335955, 0.821068, 0.541487, 0.206095, 0.932111, -1.697353, - -0.570253, -0.168337, -1.498605, 0.828194, -0.050346, 0.047643, 0.553025, -0.815924, - 0.083498, 0.927294, 1.577524, 1.072150, -0.330774, 0.775039, 0.795155, -1.018418, - -0.784800, -1.575652, -1.263121, 1.943766, 0.666655, 1.479345, -1.392632, 1.581105, - -1.300562, -0.514692, -0.605022, -0.907108, -1.488565, 2.258803, 0.558543, 0.040773, - -0.277354, 0.242866, -1.293685, -0.346606, -0.888435, 1.047313, -0.986520, -0.267101, - -0.071618, -0.821778, -2.414591, 0.035640, -0.694349, 1.483087, -1.391389, 0.361272, - 0.329648, 0.623759, 0.598544, -0.910249, 0.147175, -2.556832, -0.101439, 1.665057, - -2.634981, -0.959581, 0.028053, -0.516870, -0.876310, -0.004631, -0.265477, -0.435447, - -0.327578, 0.881754, -1.158247, 0.497467, 0.580053, -0.853947, 0.239756, 0.541670, - -0.350885, 0.551414, 0.892098, -0.137816, 1.578299, -0.643850, -1.108174, -1.300456, - -0.025931, -1.254519, -1.110628, 1.840194, 0.750834, -0.658852, 0.500167, -0.275497, - -0.517261, 1.482824, -0.559209, -0.008348, -0.753371, 0.090242, 0.925813, -1.871995, - -0.248520, -2.196485, -0.149835, -1.042585, -1.258415, 0.545135, 0.312620, -1.164465, - 2.690277, 0.796787, 0.289696, -0.250295, -1.422803, -1.112213, 0.246786, -0.273161, - -1.435773, -1.013451, 0.148573, 0.872165, -1.693073, -1.055581, 0.719188, 0.848015, - 1.141773, 0.301299, 1.551936, -0.682287, 1.383630, -0.507902, -0.758092, -1.029466, - 0.442663, -0.285836, 0.911098, -1.676208, -1.074086, -0.497489, 0.201762, -0.386898, - 0.762863, 0.043459, -1.288187, -0.655169, -0.952962, -0.146682, 0.778175, 0.085724, - -0.006331, -0.961628, 0.524487, 0.459634, 1.364272, -0.516323, 0.482039, -0.735290, - -0.787066, 1.470784, 0.751999, 0.997273, -0.166888, 1.306983, -0.816228, 0.101254, - 2.094065, 1.577574, 0.080153, 2.966203, -0.937295, -0.293681, 0.635739, 1.343905, - 1.682028, -0.749792, 0.593634, -0.698793, 0.790153, -1.302117, 0.105254, -0.171760, - -0.158579, 0.711281, 0.870907, -0.161837, -0.194759, 0.203779, 0.075474, 0.314225, - -0.526635, 0.216177, -0.685484, 0.249631, -0.268388, -1.610941, -1.188346, -0.451156, - 0.248579, -1.600001, 0.102452, -0.145813, -0.041007, 1.192038, -2.247582, 0.285689, - -0.510776, 0.951135, 0.249243, -0.965380, 0.369197, -1.109424, 0.179197, -0.616816, - -0.037283, -1.160418, -1.603310, 0.271828, 0.339372, -1.964992, -0.131135, -0.199710, - 0.485190, 1.792235, 0.598751, -0.079401, -0.086031, 0.764729, 0.325292, 0.660399, - -0.335143, -1.688575, -0.322449, -0.429974, -0.382374, 0.072841, -0.953371, 1.479787, - 0.233576, -0.178427, 1.235245, -1.206583, -0.578532, 0.391987, -0.501537, -0.046549, - 0.722864, 0.952528, 0.039498, 0.492656, 1.541279, 0.307890, -1.701053, -1.667987, - -1.033741, 0.978541, -0.763708, -0.857147, 2.176426, -0.442284, 0.431612, 0.503775, - -0.443765, -0.188553, 0.029996, -0.521717, -0.315671, 0.211892, 0.977846, -0.686392, - 0.018295, -0.884268, 0.817963, -0.059569, 0.702341, -2.475835, -0.231271, 0.565874, - -0.113690, -0.925429, 0.127941, -0.941007, -0.799410, -0.190420, -0.238612, 0.128090, - -0.089463, -0.067882, -1.023264, 1.471262, 0.937538, 1.067682, -1.131719, 0.229875, - -0.710702, -0.005993, -1.169501, -1.168195, 1.065437, -0.901779, -0.680394, 0.323208, - -1.725773, -0.012327, 0.813200, 0.554138, 1.441867, 0.062695, 0.672272, -0.642997, - 0.138665, -0.331304, -0.859534, -0.267175, -0.752251, -0.247761, 1.229615, 0.777400, - 1.150754, 0.343907, -0.608025, 0.863760, 0.806158, 0.858534, 0.217133, 0.687307, - -0.373461, -1.299311, -0.832030, 0.603825, 0.286866, -1.623527, -1.818892, -0.620491, - -1.573051, 0.643601, 2.015666, -1.145666, -0.071982, 0.844191, 2.628909, -0.042906, - -0.243317, -0.504335, 0.173276, -0.443272, 0.923207, 2.083052, -0.178553, 1.858875, - -0.521705, 0.926594, 1.431962, 0.295415, -0.870117, -0.266329, 0.807542, 0.742388, - -0.510635, -0.080934, 0.743514, 0.935612, 0.847898, -0.835204, -0.829901, -0.745189, - 0.532994, 1.361685, 1.032848, -0.306150, -1.052024, 0.878438, 0.362114, -1.100646, - -0.036787, -0.489116, -1.227636, -1.350240, -0.275099, 0.787780, -0.160435, 0.823409, - -1.083575, -0.679319, -1.954213, 0.597177, -0.909487, -1.171166, -0.005579, 2.037004, - -1.723490, -0.440698, 1.263077, -0.278440, -0.600433, 0.270728, -2.063925, 0.400994, - 0.110911, 0.073894, 1.487614, -1.040991, 0.053002, -1.453535, 0.161981, 0.234838, - -0.026878, 1.049677, 0.173576, 0.341401, 0.882168, -0.992679, 0.182294, -1.617417, - 0.755295, -0.444344, 0.508035, -1.055734, 0.131880, -1.498971, 0.280104, 0.178499, - -0.982848, -0.957286, -0.944087, 1.314400, -0.013058, 0.030501, 0.354345, 0.072074, - -0.894709, 0.555023, 0.812111, -0.729819, 0.109537, 1.096371, 2.731644, 1.335793, - 0.411079, 0.411439, -1.306862, 1.632891, 0.383806, 0.243401, 0.499504, -0.003108, - -0.510786, -0.738833, 0.234922, -1.767899, -0.597825, 1.794224, 0.020771, 1.281544, - 0.419443, 0.128371, 1.191104, -0.214895, 0.771214, -0.370359, -2.644222, -1.158590, - 0.285430, -1.478329, 0.826093, -1.475635, -0.008122, 0.651251, 0.858438, -0.092348, - 0.774788, -0.367252, 1.305945, 0.817150, 1.231503, 1.235605, 0.958564, 0.336264, - -1.654548, 0.231398, -0.990396, 0.046288, 0.685236, -0.313591, -0.974870, -1.073320, - -0.606726, -0.063315, 0.686794, 0.915108, 0.020049, -1.675039, 1.063801, 0.918174, - -1.341050, 1.023589, 0.479510, -0.904933, -1.633974, -1.921451, -1.442665, -0.136733, - 0.293781, 1.363955, -0.140364, 0.783375, -1.130341, 0.527358, -0.292538, -0.746975, - -0.582536, 1.711351, -0.896348, -0.151251, 0.248601, 1.519014, -1.489663, -0.399837, - 0.313509, -2.012764, -2.025084, 0.714259, 0.528990, -1.927481, 0.343471, -0.873411, - 0.758193, -0.361042, -0.691940, -1.607898, 0.680179, -0.776993, -1.072541, -0.320873, - 0.899772, -1.313487, -2.123092, -0.108506, 0.284712, -1.017612, -0.733323, 1.300697, - -0.773376, 1.216150, 0.151842, -1.046754, -0.336843, 0.123953, 0.970761, -1.106525, - -0.107236, 0.490938, 1.013492, -1.681596, -0.475347, -0.171544, 0.068948, 0.723101, - 0.398592, -0.777245, 1.116326, -0.093156, 0.620451, 0.167638, -0.287674, -0.637968, - -1.371773, -0.104036, -0.685868, 0.631968, 0.331685, -1.687695, -0.997722, -0.517832, - 0.291418, 0.086520, 1.107078, 2.199959, 0.244959, 0.760919, 0.164976, -1.456448, - 0.406231, -1.774895, 1.215981, 0.295850, 1.448424, 1.018757, -1.025137, -0.643993, - 0.205418, -1.111593, 0.588882, 1.458524, -0.264024, 0.103186, 2.495318, -0.638423, - 0.855948, -0.025377, -0.850954, -1.301284, 0.811879, 0.344693, 0.700242, -1.360544, - 0.759938, 0.235772, -1.712909, 2.432551, 1.537021, -0.352882, -1.609847, -0.253408, - 1.109526, -0.078679, -1.109704, -1.203886, 0.385469, 0.454205, 0.965231, 0.669661, - 0.818297, -0.402472, 0.037049, 0.759026, -0.926012, 1.281841, -0.111919, 0.803598, - -0.803030, -1.204083, -1.665006, -0.826183, -0.901401, -0.711036, 0.588350, 0.436303, - 0.554159, 1.021926, -0.415173, -0.362657, 0.061795, -0.298298, 0.457432, 0.733463, - 0.199014, 0.340668, 0.257558, -1.106307, 2.080730, -2.043328, -2.277237, -0.358905, - 0.339022, 0.595400, 0.289894, 0.375452, 0.662261, 1.202134, -0.580860, 0.543575, - 0.887752, 0.288461, 0.171871, -0.665957, 0.848821, -0.151442, 0.963769, -0.659762, - 1.321918, -1.980876, -0.064345, -1.824813, 1.317053, -0.255301, 0.228017, -0.826776, - -1.429637, 1.532493, -0.149701, 1.704903, -0.504968, -0.214990, -1.729141, 1.705440, - -0.417472, 0.371870, -0.614969, -0.264290, 0.720777, 2.503227, 0.339364, 0.735706, - 0.882845, -1.099957, 0.284245, -1.292489, -0.145541, 1.249176, -0.089646, 0.198285, - 0.289161, -0.704900, 1.164831, 0.384689, 0.805729, -0.744461, -1.355643, -0.085510, - 0.120893, -0.760827, -0.222178, 0.588159, 0.571732, -0.488786, -0.300140, -0.790720, - 1.134277, 0.186925, -0.179356, 1.323236, -1.467067, -0.252240, 1.395346, 0.394448, - 0.440836, 1.221421, 0.565384, -0.630894, -0.693623, -0.172785, 0.833869, 0.590400, - -2.237378, 0.485708, 1.097644, -0.345472, -0.001617, 0.387311, -1.614573, 0.004570, - -1.228727, 0.384520, 0.207405, -1.412140, 0.220942, -1.196011, -1.006073, 0.047957, - -0.453067, 0.422308, 1.399453, 1.080871, -0.461964, -0.072034, 0.032716, -0.752875, - 0.798783, -0.555757, 0.896816, -1.304965, 0.137892, -0.112053, -1.619146, 0.367034, - -1.646606, -0.327046, 0.428707, -0.336445, -0.737231, -0.388655, 0.564926, 1.680910, - -1.384167, 0.707246, 0.460268, 1.030518, 0.629384, 0.305059, 0.379847, -1.121984, - -1.013330, -0.122902, -0.347243, -0.693724, 0.441912, 0.875911, -1.590240, -1.094234, - -0.701417, 0.925002, -1.077601, -0.229572, 1.002220, 0.225260, 1.729481, -0.335907, - 0.709032, 1.218315, -0.747897, -0.096137, 0.228862, 0.120568, -0.223497, 1.004884, - -0.853275, -0.657371, 0.345627, 0.405173, 0.109764, 0.890271, -1.133039, 1.449045, - -0.683124, 1.382923, -0.277856, 1.176089, 0.654790, -1.729798, -1.248394, 0.104649, - -0.597539, -1.487626, -0.481813, -1.743067, 0.983372, -0.510919, 1.762121, -0.067293, - 1.427402, -0.063941, 0.911763, -2.196356, 0.326823, 1.106144, 0.069619, 1.526127, - -1.499763, -0.687166, -0.418223, 1.160927, -0.021037, -0.425076, 0.228425, -0.060661, - -1.008196, -1.899981, -0.664622, 1.219038, 0.558177, 0.901112, -1.188542, 0.823237, - -0.775481, 1.882210, 0.271042, 0.238406, 1.534976, -0.429217, -1.052283, -1.797562, - 0.625559, 1.467291, -0.797626, 1.030351, -0.313522, 0.892838, -0.602210, 1.395587, - 1.259060, 0.416488, 0.858484, 1.545120, -2.105292, 0.664929, -0.360937, 0.706299, - 0.553557, 2.759293, -1.556384, -0.051700, -0.206666, -0.839668, -0.425568, 1.555326, - 0.493778, 0.149258, -0.870908, -1.684651, 0.079828, -0.569951, -0.521619, 0.488593, - -1.413861, -0.029233, -0.384293, -2.238255, -0.457922, -2.117238, -0.291471, 0.152666, - -0.301224, -1.353589, -1.588594, -0.206453, 1.094287, -1.204119, 1.324167, -0.436854, - -0.126480, 0.047149, -0.737164, 2.478964, 0.213719, -1.288683, -0.400529, 0.565879, - 0.064938, -0.489134, -1.757996, 0.571975, 1.686748, -0.533281, 0.327400, 0.764733, - 0.715967, -1.748576, 1.598648, -0.729925, -2.064741, -0.004472, -0.743632, 0.535993, - 0.176185, -0.021122, 0.527839, -0.669683, -0.553153, -0.056435, 0.298280, -0.213079, - -1.226607, 0.432893, -0.189676, -0.065721, -0.301713, -2.272297, 0.956956, -1.046249, - -0.533366, -0.478385, -0.901082, -0.765758, -0.892552, -0.093739, 0.278717, -1.139068, - -0.745807, -0.691504, 1.603464, -3.596550, 0.574270, 0.463068, 0.320655, -1.966329, - -0.151383, 1.222704, 0.315762, 0.237313, 1.343703, -1.015985, -2.237832, 0.640365 -}; - -/* ---------------------------------------------------------------------- -** Defines each of the tests performed -** ------------------------------------------------------------------- */ - -typedef struct -{ - ne10_uint32_t fftSize; - ne10_uint32_t ifftFlag; - ne10_uint32_t doBitReverse; - ne10_float32_t *inputF32; -} test_config_cfft; - -static test_config_cfft CONFIG_CFFT[] = -{ - {1024, 0, 1, &testInput_f32[0]}, - {256, 0, 1, &testInput_f32[0]}, - {64, 0, 1, &testInput_f32[0]}, - {16, 0, 1, &testInput_f32[0]}, -}; -static test_config_cfft CONFIG_CFFT_PERF[] = -{ - {1024, 0, 1, &testInput_f32[0]}, - {256, 0, 1, &testInput_f32[0]}, - {64, 0, 1, &testInput_f32[0]}, - {16, 0, 1, &testInput_f32[0]}, -}; - -#define CFFT_NUM_TESTS (sizeof(CONFIG_CFFT) / sizeof(CONFIG_CFFT[0]) ) -#define CFFT_NUM_PERF_TESTS (sizeof(CONFIG_CFFT_PERF) / sizeof(CONFIG_CFFT_PERF[0]) ) - -//input and output -static ne10_float32_t * guarded_in_c = NULL; -static ne10_float32_t * guarded_in_neon = NULL; -static ne10_float32_t * in_c = NULL; -static ne10_float32_t * in_neon = NULL; - -static ne10_float32_t * guarded_out_c = NULL; -static ne10_float32_t * guarded_out_neon = NULL; -static ne10_float32_t * out_c = NULL; -static ne10_float32_t * out_neon = NULL; - -static ne10_float32_t snr = 0.0f; - -#ifdef PERFORMANCE_TEST -static ne10_int64_t time_c = 0; -static ne10_int64_t time_neon = 0; -static ne10_int64_t time_overhead_c = 0; -static ne10_int64_t time_overhead_neon = 0; -static ne10_float32_t time_speedup = 0.0f; -static ne10_float32_t time_savings = 0.0f; -#endif - -void test_cfft_case0() -{ - ne10_float32_t *p_src = testInput_f32; - ne10_cfft_radix4_instance_f32_t S; - - ne10_uint16_t loop = 0; - ne10_uint16_t k = 0; - ne10_uint16_t i = 0; - ne10_uint16_t pos = 0; - - test_config_cfft *config; - ne10_result_t status = NE10_OK; - - fprintf (stdout, "----------%30s start\n", __FUNCTION__); - - /* init input memory */ - NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end - NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end - - /* init dst memory */ - NE10_DST_ALLOC (out_c, guarded_out_c, TEST_LENGTH_SAMPLES); - NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES); - -#if defined (SMOKE_TEST)||(REGRESSION_TEST) - for (loop = 0; loop < CFFT_NUM_TESTS; loop++) - { - config = &CONFIG_CFFT[loop]; - - /* Initialize the CFFT/CIFFT module */ - status = ne10_cfft_radix4_init_float (&S, config->fftSize, config->ifftFlag); - - if (status == NE10_ERR) - { - printf ("fft init error!\n"); - } - - /* copy input to input buffer and clear the output buffer */ - for (i = 0; i < 2 * config->fftSize; i++) - { - in_c[i] = testInput_f32[i]; - in_neon[i] = testInput_f32[i]; - } - - /* FFT test */ - GUARD_ARRAY (out_c, config->fftSize * 2); - GUARD_ARRAY (out_neon, config->fftSize * 2); - - ne10_radix4_butterfly_float_c (out_c, in_c, S.fft_len, S.p_twiddle); - ne10_radix4_butterfly_float_neon (out_neon, in_neon, S.fft_len, S.p_twiddle); - - CHECK_ARRAY_GUARD (out_c, config->fftSize * 2); - CHECK_ARRAY_GUARD (out_neon, config->fftSize * 2); - - //conformance test 1: compare snr - snr = CAL_SNR_FLOAT32 (out_c, out_neon, 2 * config->fftSize); - assert_false ( (snr < SNR_THRESHOLD)); - - //conformance test 2: compare output of C and neon -#if defined (DEBUG_TRACE) - printf ("--------------------config %d\n", loop); - printf ("fftSize: %d ifftFlag: %d\n", config->fftSize, config->ifftFlag); -#endif - for (pos = 0; pos < config->fftSize * 2; pos++) - { -#if defined (DEBUG_TRACE) - printf ("pos %d \n", pos); - printf ("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos], * (ne10_uint32_t*) &out_c[pos], out_neon[pos], * (ne10_uint32_t*) &out_neon[pos]); -#endif - assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1); - } - - /* IFFT test */ - /* copy input to input buffer and clear the output buffer */ - for (i = 0; i < 2 * config->fftSize; i++) - { - in_c[i] = out_c[i]; - in_neon[i] = out_neon[i]; - } - - GUARD_ARRAY (out_c, config->fftSize * 2); - GUARD_ARRAY (out_neon, config->fftSize * 2); - - ne10_radix4_butterfly_inverse_float_c (out_c, in_c, S.fft_len, S.p_twiddle, S.one_by_fft_len); - ne10_radix4_butterfly_inverse_float_neon (out_neon, in_neon, S.fft_len, S.p_twiddle, S.one_by_fft_len); - - CHECK_ARRAY_GUARD (out_c, config->fftSize * 2); - CHECK_ARRAY_GUARD (out_neon, config->fftSize * 2); - - //conformance test 1: compare snr - snr = CAL_SNR_FLOAT32 (out_c, out_neon, 2 * config->fftSize); - assert_false ( (snr < SNR_THRESHOLD)); - - //conformance test 2: compare output of C and neon -#if defined (DEBUG_TRACE) - printf ("--------------------config %d\n", loop); - printf ("fftSize: %d ifftFlag: %d\n", config->fftSize, config->ifftFlag); - printf ("snr: %f\n", snr); -#endif - for (pos = 0; pos < config->fftSize * 2; pos++) - { -#if defined (DEBUG_TRACE) - printf ("pos %d \n", pos); - printf ("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos], * (ne10_uint32_t*) &out_c[pos], out_neon[pos], * (ne10_uint32_t*) &out_neon[pos]); -#endif - assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1); - } - } -#endif - -#ifdef PERFORMANCE_TEST - fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); - for (loop = 0; loop < CFFT_NUM_PERF_TESTS; loop++) - { - config = &CONFIG_CFFT_PERF[loop]; - - /* Initialize the CFFT/CIFFT module */ - status = ne10_cfft_radix4_init_float (&S, config->fftSize, config->ifftFlag); - - if (status == NE10_ERR) - { - printf ("fft init error!\n"); - } - - /* FFT test */ - GET_TIME - ( - time_overhead_c, - { - for (k = 0; k < TEST_COUNT; k++) - { - for (i = 0; i < 2 * config->fftSize; i++) - { - in_c[i] = testInput_f32[i]; - } - } - } - ); - - GET_TIME - ( - time_c, - { - for (k = 0; k < TEST_COUNT; k++) - { - for (i = 0; i < 2 * config->fftSize; i++) - { - in_c[i] = testInput_f32[i]; - } - ne10_radix4_butterfly_float_c (out_c, in_c, S.fft_len, S.p_twiddle); - } - } - ); - - GET_TIME - ( - time_overhead_neon, - { - for (k = 0; k < TEST_COUNT; k++) - { - for (i = 0; i < 2 * config->fftSize; i++) - { - in_neon[i] = testInput_f32[i]; - } - } - } - ); - - GET_TIME - ( - time_neon, - { - for (k = 0; k < TEST_COUNT; k++) - { - for (i = 0; i < 2 * config->fftSize; i++) - { - in_neon[i] = testInput_f32[i]; - } - ne10_radix4_butterfly_float_neon (out_neon, in_neon, S.fft_len, S.p_twiddle); - } - } - ); - - time_c = time_c - time_overhead_c; - time_neon = time_neon - time_overhead_neon; - time_speedup = (ne10_float32_t) time_c / time_neon; - time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; - ne10_log (__FUNCTION__, "CFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", S.fft_len, time_c, time_neon, time_savings, time_speedup); - - /* IFFT test */ - GET_TIME - ( - time_overhead_c, - { - for (k = 0; k < TEST_COUNT; k++) - { - for (i = 0; i < 2 * config->fftSize; i++) - { - in_c[i] = out_c[i]; - } - } - } - ); - - GET_TIME - ( - time_c, - { - for (k = 0; k < TEST_COUNT; k++) - { - for (i = 0; i < 2 * config->fftSize; i++) - { - in_c[i] = out_c[i]; - } - ne10_radix4_butterfly_inverse_float_c (out_c, in_c, S.fft_len, S.p_twiddle, S.one_by_fft_len); - } - } - ); - - GET_TIME - ( - time_overhead_neon, - { - for (k = 0; k < TEST_COUNT; k++) - { - for (i = 0; i < 2 * config->fftSize; i++) - { - in_neon[i] = out_neon[i]; - } - } - } - ); - - GET_TIME - ( - time_neon, - { - for (k = 0; k < TEST_COUNT; k++) - { - for (i = 0; i < 2 * config->fftSize; i++) - { - in_neon[i] = out_neon[i]; - } - ne10_radix4_butterfly_inverse_float_neon (out_neon, in_neon, S.fft_len, S.p_twiddle, S.one_by_fft_len); - } - } - ); - - time_c = time_c - time_overhead_c; - time_neon = time_neon - time_overhead_neon; - time_speedup = (ne10_float32_t) time_c / time_neon; - time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; - ne10_log (__FUNCTION__, "CIFFT%20d%20lld%20lld%19.2f%%%18.2f:1\n", S.fft_len, time_c, time_neon, time_savings, time_speedup); - } -#endif - - free (guarded_in_c); - free (guarded_in_neon); - free (guarded_out_c); - free (guarded_out_neon); - fprintf (stdout, "----------%30s end\n", __FUNCTION__); -} - -void test_cfft() -{ - test_cfft_case0(); -} - -static void my_test_setup (void) -{ - ne10_log_buffer_ptr = ne10_log_buffer; -} - -void test_fixture_cfft (void) -{ - test_fixture_start(); // starts a fixture - - fixture_setup (my_test_setup); - - run_test (test_cfft); // run tests - - test_fixture_end(); // ends a fixture -} diff --git a/modules/dsp/test/test_suite_fft_float32.c b/modules/dsp/test/test_suite_fft_float32.c index 597abf4..05a108c 100644 --- a/modules/dsp/test/test_suite_fft_float32.c +++ b/modules/dsp/test/test_suite_fft_float32.c @@ -44,11 +44,11 @@ ** ------------------------------------------------------------------- */ /* Max FFT Length and double buffer for real and imag */ -#define TEST_LENGTH_SAMPLES (16384) +#define TEST_LENGTH_SAMPLES (32768) #define MIN_LENGTH_SAMPLES_CPX (4) #define MIN_LENGTH_SAMPLES_REAL (MIN_LENGTH_SAMPLES_CPX*2) -#define TEST_COUNT 250000 +#define TEST_COUNT 10000000 /* ---------------------------------------------------------------------- ** Test input data for F32 @@ -62,18 +62,26 @@ static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES * 2]; ** ------------------------------------------------------------------- */ //input and output -static ne10_float32_t * in = NULL; static ne10_float32_t * guarded_in_c = NULL; static ne10_float32_t * guarded_in_neon = NULL; static ne10_float32_t * in_c = NULL; static ne10_float32_t * in_neon = NULL; +static ne10_float32_t * in_c2 = NULL; +static ne10_float32_t * in_neon2 = NULL; +static ne10_float32_t * guarded_in_c2 = NULL; +static ne10_float32_t * guarded_in_neon2 = NULL; static ne10_float32_t * guarded_out_c = NULL; static ne10_float32_t * guarded_out_neon = NULL; static ne10_float32_t * out_c = NULL; static ne10_float32_t * out_neon = NULL; +static ne10_float32_t * guarded_out_c2 = NULL; +static ne10_float32_t * guarded_out_neon2 = NULL; +static ne10_float32_t * out_c2 = NULL; +static ne10_float32_t * out_neon2 = NULL; static ne10_float32_t snr = 0.0f; +static ne10_float32_t snr2 = 0.0f; static ne10_int64_t time_c = 0; static ne10_int64_t time_neon = 0; @@ -81,6 +89,8 @@ static ne10_int64_t time_overhead_c = 0; static ne10_int64_t time_overhead_neon = 0; static ne10_float32_t time_speedup = 0.0f; static ne10_float32_t time_savings = 0.0f; +static ne10_int64_t time_c2 = 0; +static ne10_int64_t time_neon2 = 0; void test_fft_c2c_1d_float32_conformance() { @@ -190,7 +200,7 @@ void test_fft_c2c_1d_float32_performance() memcpy (in_c, testInput_f32, 2 * fftSize * sizeof (ne10_float32_t)); memcpy (in_neon, testInput_f32, 2 * fftSize * sizeof (ne10_float32_t)); cfg = ne10_fft_alloc_c2c_float32 (fftSize); - test_loop = TEST_COUNT/fftSize; + test_loop = TEST_COUNT / fftSize; GET_TIME ( @@ -365,7 +375,7 @@ void test_fft_r2c_1d_float32_performance() memcpy (in_c, testInput_f32, fftSize * sizeof (ne10_float32_t)); memcpy (in_neon, testInput_f32, fftSize * sizeof (ne10_float32_t)); cfg = ne10_fft_alloc_r2c_float32 (fftSize); - test_loop = TEST_COUNT/fftSize; + test_loop = TEST_COUNT / fftSize; GET_TIME ( diff --git a/modules/dsp/test/test_suite_fft_int16.c b/modules/dsp/test/test_suite_fft_int16.c index 5ebcd0a..cad4882 100644 --- a/modules/dsp/test/test_suite_fft_int16.c +++ b/modules/dsp/test/test_suite_fft_int16.c @@ -119,9 +119,9 @@ void test_fft_c2c_1d_int16_conformance() GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t)); ne10_fft_c2c_1d_int16_scaled_c ( (ne10_fft_cpx_int16_t*) out_c, (ne10_fft_cpx_int16_t*) in_c, - cfg->twiddles, cfg->factors, fftSize, 0); + cfg->twiddles, cfg->factors, fftSize, 0); ne10_fft_c2c_1d_int16_scaled_neon ( (ne10_fft_cpx_int16_t*) out_neon, (ne10_fft_cpx_int16_t*) in_neon, - cfg->twiddles, cfg->factors, fftSize, 0); + cfg->twiddles, cfg->factors, fftSize, 0); CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int16_t)); CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t)); @@ -143,9 +143,9 @@ void test_fft_c2c_1d_int16_conformance() GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t)); ne10_fft_c2c_1d_int16_scaled_c ( (ne10_fft_cpx_int16_t*) out_c, (ne10_fft_cpx_int16_t*) in_c, - cfg->twiddles, cfg->factors, fftSize, 1); + cfg->twiddles, cfg->factors, fftSize, 1); ne10_fft_c2c_1d_int16_scaled_neon ( (ne10_fft_cpx_int16_t*) out_neon, (ne10_fft_cpx_int16_t*) in_neon, - cfg->twiddles, cfg->factors, fftSize, 1); + cfg->twiddles, cfg->factors, fftSize, 1); CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int16_t)); CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t)); @@ -205,7 +205,7 @@ void test_fft_c2c_1d_int16_performance() memcpy (in_c, testInput_i16, 2 * fftSize * sizeof (ne10_int16_t)); memcpy (in_neon, testInput_i16, 2 * fftSize * sizeof (ne10_int16_t)); cfg = ne10_fft_alloc_c2c_int16 (fftSize); - test_loop = TEST_COUNT/fftSize; + test_loop = TEST_COUNT / fftSize; GET_TIME ( @@ -397,7 +397,7 @@ void test_fft_r2c_1d_int16_performance() memcpy (in_c, testInput_i16, fftSize * sizeof (ne10_int16_t)); memcpy (in_neon, testInput_i16, fftSize * sizeof (ne10_int16_t)); cfg = ne10_fft_alloc_r2c_int16 (fftSize); - test_loop = TEST_COUNT/fftSize; + test_loop = TEST_COUNT / fftSize; GET_TIME ( diff --git a/modules/dsp/test/test_suite_fft_int32.c b/modules/dsp/test/test_suite_fft_int32.c index 9dd8531..3cc77f9 100644 --- a/modules/dsp/test/test_suite_fft_int32.c +++ b/modules/dsp/test/test_suite_fft_int32.c @@ -106,7 +106,7 @@ void test_fft_c2c_1d_int32_conformance() for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++) { testInput_i32[i] = (ne10_int32_t) (drand48() * 8192) - 4096; - testInput2_i32[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX/2; + testInput2_i32[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX / 2; } for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2) { @@ -121,9 +121,9 @@ void test_fft_c2c_1d_int32_conformance() GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t)); GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t)); ne10_fft_c2c_1d_int32_unscaled_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, - cfg->twiddles, cfg->factors, fftSize, 0); + cfg->twiddles, cfg->factors, fftSize, 0); ne10_fft_c2c_1d_int32_unscaled_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, - cfg->twiddles, cfg->factors, fftSize, 0); + cfg->twiddles, cfg->factors, fftSize, 0); CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t)); CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t)); @@ -143,9 +143,9 @@ void test_fft_c2c_1d_int32_conformance() GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t)); GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t)); ne10_fft_c2c_1d_int32_unscaled_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, - cfg->twiddles, cfg->factors, fftSize, 1); + cfg->twiddles, cfg->factors, fftSize, 1); ne10_fft_c2c_1d_int32_unscaled_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, - cfg->twiddles, cfg->factors, fftSize, 1); + cfg->twiddles, cfg->factors, fftSize, 1); CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t)); CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t)); @@ -165,9 +165,9 @@ void test_fft_c2c_1d_int32_conformance() GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t)); GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t)); ne10_fft_c2c_1d_int32_scaled_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, - cfg->twiddles, cfg->factors, fftSize, 0); + cfg->twiddles, cfg->factors, fftSize, 0); ne10_fft_c2c_1d_int32_scaled_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, - cfg->twiddles, cfg->factors, fftSize, 0); + cfg->twiddles, cfg->factors, fftSize, 0); CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t)); CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t)); @@ -187,9 +187,9 @@ void test_fft_c2c_1d_int32_conformance() GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t)); GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t)); ne10_fft_c2c_1d_int32_scaled_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, - cfg->twiddles, cfg->factors, fftSize, 1); + cfg->twiddles, cfg->factors, fftSize, 1); ne10_fft_c2c_1d_int32_scaled_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, - cfg->twiddles, cfg->factors, fftSize, 1); + cfg->twiddles, cfg->factors, fftSize, 1); CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t)); CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t)); @@ -239,7 +239,7 @@ void test_fft_c2c_1d_int32_performance() for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++) { testInput_i32[i] = (ne10_int32_t) (drand48() * 8192) - 4096; - testInput2_i32[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX/2; + testInput2_i32[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX / 2; } for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2) { @@ -249,7 +249,7 @@ void test_fft_c2c_1d_int32_performance() memcpy (in_c, testInput_i32, 2 * fftSize * sizeof (ne10_int32_t)); memcpy (in_neon, testInput_i32, 2 * fftSize * sizeof (ne10_int32_t)); cfg = ne10_fft_alloc_c2c_int32 (fftSize); - test_loop = TEST_COUNT/fftSize; + test_loop = TEST_COUNT / fftSize; GET_TIME ( @@ -489,7 +489,7 @@ void test_fft_r2c_1d_int32_performance() memcpy (in_c, testInput_i32, fftSize * sizeof (ne10_int32_t)); memcpy (in_neon, testInput_i32, fftSize * sizeof (ne10_int32_t)); cfg = ne10_fft_alloc_r2c_int32 (fftSize); - test_loop = TEST_COUNT/fftSize; + test_loop = TEST_COUNT / fftSize; GET_TIME ( diff --git a/modules/dsp/test/test_suite_rfft.c b/modules/dsp/test/test_suite_rfft.c deleted file mode 100644 index 7542843..0000000 --- a/modules/dsp/test/test_suite_rfft.c +++ /dev/null @@ -1,663 +0,0 @@ -/* - * Copyright 2012-14 ARM Limited - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of ARM Limited nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * NE10 Library : test_suite_rfft.c - */ - -#include -#include -#include - -#include "NE10_dsp.h" -#include "seatest.h" - - -/* ---------------------------------------------------------------------- -** Global defines -** ------------------------------------------------------------------- */ - -/* Max FFT Length 1024 and double buffer for real and imag */ -#define TEST_LENGTH_SAMPLES (1024 * 2) - -#define TEST_COUNT 5000 - -/* ---------------------------------------------------------------------- -** Test input data for F32 -** Generated by the MATLAB rand() function -** ------------------------------------------------------------------- */ - -static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] = -{ - -0.432565, -1.665584, 0.125332, 0.287676, -1.146471, 1.190915, 1.189164, -0.037633, - 0.327292, 0.174639, -0.186709, 0.725791, -0.588317, 2.183186, -0.136396, 0.113931, - 1.066768, 0.059281, -0.095648, -0.832349, 0.294411, -1.336182, 0.714325, 1.623562, - -0.691776, 0.857997, 1.254001, -1.593730, -1.440964, 0.571148, -0.399886, 0.689997, - 0.815622, 0.711908, 1.290250, 0.668601, 1.190838, -1.202457, -0.019790, -0.156717, - -1.604086, 0.257304, -1.056473, 1.415141, -0.805090, 0.528743, 0.219321, -0.921902, - -2.170674, -0.059188, -1.010634, 0.614463, 0.507741, 1.692430, 0.591283, -0.643595, - 0.380337, -1.009116, -0.019511, -0.048221, 0.000043, -0.317859, 1.095004, -1.873990, - 0.428183, 0.895638, 0.730957, 0.577857, 0.040314, 0.677089, 0.568900, -0.255645, - -0.377469, -0.295887, -1.475135, -0.234004, 0.118445, 0.314809, 1.443508, -0.350975, - 0.623234, 0.799049, 0.940890, -0.992092, 0.212035, 0.237882, -1.007763, -0.742045, - 1.082295, -0.131500, 0.389880, 0.087987, -0.635465, -0.559573, 0.443653, -0.949904, - 0.781182, 0.568961, -0.821714, -0.265607, -1.187777, -2.202321, 0.986337, -0.518635, - 0.327368, 0.234057, 0.021466, -1.003944, -0.947146, -0.374429, -1.185886, -1.055903, - 1.472480, 0.055744, -1.217317, -0.041227, -1.128344, -1.349278, -0.261102, 0.953465, - 0.128644, 0.656468, -1.167819, -0.460605, -0.262440, -1.213152, -1.319437, 0.931218, - 0.011245, -0.645146, 0.805729, 0.231626, -0.989760, 1.339586, 0.289502, 1.478917, - 1.138028, -0.684139, -1.291936, -0.072926, -0.330599, -0.843628, 0.497770, 1.488490, - -0.546476, -0.846758, -0.246337, 0.663024, -0.854197, -1.201315, -0.119869, -0.065294, - 0.485296, -0.595491, -0.149668, -0.434752, -0.079330, 1.535152, -0.606483, -1.347363, - 0.469383, -0.903567, 0.035880, -0.627531, 0.535398, 0.552884, -0.203690, -2.054325, - 0.132561, 1.592941, 1.018412, -1.580402, -0.078662, -0.681657, -1.024553, -1.234353, - 0.288807, -0.429303, 0.055801, -0.367874, -0.464973, 0.370961, 0.728283, 2.112160, - -1.357298, -1.022610, 1.037834, -0.389800, -1.381266, 0.315543, 1.553243, 0.707894, - 1.957385, 0.504542, 1.864529, -0.339812, -1.139779, -0.211123, 1.190245, -1.116209, - 0.635274, -0.601412, 0.551185, -1.099840, 0.085991, -2.004563, -0.493088, 0.462048, - -0.321005, 1.236556, -0.631280, -2.325211, -1.231637, 1.055648, -0.113224, 0.379224, - 0.944200, -2.120427, -0.644679, -0.704302, -1.018137, -0.182082, 1.521013, -0.038439, - 1.227448, -0.696205, 0.007524, -0.782893, 0.586939, -0.251207, 0.480136, 0.668155, - -0.078321, 0.889173, 2.309287, 0.524639, -0.011787, 0.913141, 0.055941, -1.107070, - 0.485498, -0.005005, -0.276218, 1.276452, 1.863401, -0.522559, 0.103424, -0.807649, - 0.680439, -2.364590, 0.990115, 0.218899, 0.261662, 1.213444, -0.274667, -0.133134, - -1.270500, -1.663606, -0.703554, 0.280880, -0.541209, -1.333531, 1.072686, -0.712085, - -0.011286, -0.000817, -0.249436, 0.396575, -0.264013, -1.664011, -1.028975, 0.243095, - -1.256590, -0.347183, -0.941372, -1.174560, -1.021142, -0.401667, 0.173666, -0.116118, - 1.064119, -0.245386, -1.517539, 0.009734, 0.071373, 0.316536, 0.499826, 1.278084, - -0.547816, 0.260808, -0.013177, -0.580264, 2.136308, -0.257617, -1.409528, 1.770101, - 0.325546, -1.119040, 0.620350, 1.269782, -0.896043, 0.135175, -0.139040, -1.163395, - 1.183720, -0.015430, 0.536219, -0.716429, -0.655559, 0.314363, 0.106814, 1.848216, - -0.275106, 2.212554, 1.508526, -1.945079, -1.680543, -0.573534, -0.185817, 0.008934, - 0.836950, -0.722271, -0.721490, -0.201181, -0.020464, 0.278890, 1.058295, 0.621673, - -1.750615, 0.697348, 0.811486, 0.636345, 1.310080, 0.327098, -0.672993, -0.149327, - -2.449018, 0.473286, 0.116946, -0.591104, -0.654708, -1.080662, -0.047731, 0.379345, - -0.330361, -0.499898, -0.035979, -0.174760, -0.957265, 1.292548, 0.440910, 1.280941, - -0.497730, -1.118717, 0.807650, 0.041200, -0.756209, -0.089129, -2.008850, 1.083918, - -0.981191, -0.688489, 1.339479, -0.909243, -0.412858, -0.506163, 1.619748, 0.080901, - -1.081056, -1.124518, 1.735676, 1.937459, 1.635068, -1.255940, -0.213538, -0.198932, - 0.307499, -0.572325, -0.977648, -0.446809, 1.082092, 2.372648, 0.229288, -0.266623, - 0.701672, -0.487590, 1.862480, 1.106851, -1.227566, -0.669885, 1.340929, 0.388083, - 0.393059, -1.707334, 0.227859, 0.685633, -0.636790, -1.002606, -0.185621, -1.054033, - -0.071539, 0.279198, 1.373275, 0.179841, -0.542017, 1.634191, 0.825215, 0.230761, - 0.671634, -0.508078, 0.856352, 0.268503, 0.624975, -1.047338, 1.535670, 0.434426, - -1.917136, 0.469940, 1.274351, 0.638542, 1.380782, 1.319843, -0.909429, -2.305605, - 1.788730, 0.390798, 0.020324, -0.405977, -1.534895, 0.221373, -1.374479, -0.839286, - -0.208643, 0.755913, 0.375734, -1.345413, 1.481876, 0.032736, 1.870453, -1.208991, - -0.782632, -0.767299, -0.107200, -0.977057, -0.963988, -2.379172, -0.838188, 0.257346, - -0.183834, -0.167615, -0.116989, 0.168488, -0.501206, -0.705076, 0.508165, -0.420922, - 0.229133, -0.959497, -0.146043, 0.744538, -0.890496, 0.139062, -0.236144, -0.075459, - -0.358572, -2.077635, -0.143546, 1.393341, 0.651804, -0.377134, -0.661443, 0.248958, - -0.383516, -0.528480, 0.055388, 1.253769, -2.520004, 0.584856, -1.008064, 0.944285, - -2.423957, -0.223831, 0.058070, -0.424614, -0.202918, -1.513077, -1.126352, -0.815002, - 0.366614, -0.586107, 1.537409, 0.140072, -1.862767, -0.454193, -0.652074, 0.103318, - -0.220632, -0.279043, -0.733662, -0.064534, -1.444004, 0.612340, -1.323503, -0.661577, - -0.146115, 0.248085, -0.076633, 1.738170, 1.621972, 0.626436, 0.091814, -0.807607, - -0.461337, -1.405969, -0.374530, -0.470911, 1.751296, 0.753225, 0.064989, -0.292764, - 0.082823, 0.766191, 2.236850, 0.326887, 0.863304, 0.679387, 0.554758, 1.001630, - 1.259365, 0.044151, -0.314138, 0.226708, 0.996692, 1.215912, -0.542702, 0.912228, - -0.172141, -0.335955, 0.541487, 0.932111, -0.570253, -1.498605, -0.050346, 0.553025, - 0.083498, 1.577524, -0.330774, 0.795155, -0.784800, -1.263121, 0.666655, -1.392632, - -1.300562, -0.605022, -1.488565, 0.558543, -0.277354, -1.293685, -0.888435, -0.986520, - -0.071618, -2.414591, -0.694349, -1.391389, 0.329648, 0.598544, 0.147175, -0.101439, - -2.634981, 0.028053, -0.876310, -0.265477, -0.327578, -1.158247, 0.580053, 0.239756, - -0.350885, 0.892098, 1.578299, -1.108174, -0.025931, -1.110628, 0.750834, 0.500167, - -0.517261, -0.559209, -0.753371, 0.925813, -0.248520, -0.149835, -1.258415, 0.312620, - 2.690277, 0.289696, -1.422803, 0.246786, -1.435773, 0.148573, -1.693073, 0.719188, - 1.141773, 1.551936, 1.383630, -0.758092, 0.442663, 0.911098, -1.074086, 0.201762, - 0.762863, -1.288187, -0.952962, 0.778175, -0.006331, 0.524487, 1.364272, 0.482039, - -0.787066, 0.751999, -0.166888, -0.816228, 2.094065, 0.080153, -0.937295, 0.635739, - 1.682028, 0.593634, 0.790153, 0.105254, -0.158579, 0.870907, -0.194759, 0.075474, - -0.526635, -0.685484, -0.268388, -1.188346, 0.248579, 0.102452, -0.041007, -2.247582, - -0.510776, 0.249243, 0.369197, 0.179197, -0.037283, -1.603310, 0.339372, -0.131135, - 0.485190, 0.598751, -0.086031, 0.325292, -0.335143, -0.322449, -0.382374, -0.953371, - 0.233576, 1.235245, -0.578532, -0.501537, 0.722864, 0.039498, 1.541279, -1.701053, - -1.033741, -0.763708, 2.176426, 0.431612, -0.443765, 0.029996, -0.315671, 0.977846, - 0.018295, 0.817963, 0.702341, -0.231271, -0.113690, 0.127941, -0.799410, -0.238612, - -0.089463, -1.023264, 0.937538, -1.131719, -0.710702, -1.169501, 1.065437, -0.680394, - -1.725773, 0.813200, 1.441867, 0.672272, 0.138665, -0.859534, -0.752251, 1.229615, - 1.150754, -0.608025, 0.806158, 0.217133, -0.373461, -0.832030, 0.286866, -1.818892, - -1.573051, 2.015666, -0.071982, 2.628909, -0.243317, 0.173276, 0.923207, -0.178553, - -0.521705, 1.431962, -0.870117, 0.807542, -0.510635, 0.743514, 0.847898, -0.829901, - 0.532994, 1.032848, -1.052024, 0.362114, -0.036787, -1.227636, -0.275099, -0.160435, - -1.083575, -1.954213, -0.909487, -0.005579, -1.723490, 1.263077, -0.600433, -2.063925, - 0.110911, 1.487614, 0.053002, 0.161981, -0.026878, 0.173576, 0.882168, 0.182294, - 0.755295, 0.508035, 0.131880, 0.280104, -0.982848, -0.944087, -0.013058, 0.354345, - -0.894709, 0.812111, 0.109537, 2.731644, 0.411079, -1.306862, 0.383806, 0.499504, - -0.510786, 0.234922, -0.597825, 0.020771, 0.419443, 1.191104, 0.771214, -2.644222, - 0.285430, 0.826093, -0.008122, 0.858438, 0.774788, 1.305945, 1.231503, 0.958564, - -1.654548, -0.990396, 0.685236, -0.974870, -0.606726, 0.686794, 0.020049, 1.063801, - -1.341050, 0.479510, -1.633974, -1.442665, 0.293781, -0.140364, -1.130341, -0.292538, - -0.582536, -0.896348, 0.248601, -1.489663, 0.313509, -2.025084, 0.528990, 0.343471, - 0.758193, -0.691940, 0.680179, -1.072541, 0.899772, -2.123092, 0.284712, -0.733323, - -0.773376, 0.151842, -0.336843, 0.970761, -0.107236, 1.013492, -0.475347, 0.068948, - 0.398592, 1.116326, 0.620451, -0.287674, -1.371773, -0.685868, 0.331685, -0.997722, - 0.291418, 1.107078, 0.244959, 0.164976, 0.406231, 1.215981, 1.448424, -1.025137, - 0.205418, 0.588882, -0.264024, 2.495318, 0.855948, -0.850954, 0.811879, 0.700242, - 0.759938, -1.712909, 1.537021, -1.609847, 1.109526, -1.109704, 0.385469, 0.965231, - 0.818297, 0.037049, -0.926012, -0.111919, -0.803030, -1.665006, -0.901401, 0.588350, - 0.554159, -0.415173, 0.061795, 0.457432, 0.199014, 0.257558, 2.080730, -2.277237, - 0.339022, 0.289894, 0.662261, -0.580860, 0.887752, 0.171871, 0.848821, 0.963769, - 1.321918, -0.064345, 1.317053, 0.228017, -1.429637, -0.149701, -0.504968, -1.729141, - -0.417472, -0.614969, 0.720777, 0.339364, 0.882845, 0.284245, -0.145541, -0.089646, - 0.289161, 1.164831, 0.805729, -1.355643, 0.120893, -0.222178, 0.571732, -0.300140, - 1.134277, -0.179356, -1.467067, 1.395346, 0.440836, 0.565384, -0.693623, 0.833869, - -2.237378, 1.097644, -0.001617, -1.614573, -1.228727, 0.207405, 0.220942, -1.006073, - -0.453067, 1.399453, -0.461964, 0.032716, 0.798783, 0.896816, 0.137892, -1.619146, - -1.646606, 0.428707, -0.737231, 0.564926, -1.384167, 0.460268, 0.629384, 0.379847, - -1.013330, -0.347243, 0.441912, -1.590240, -0.701417, -1.077601, 1.002220, 1.729481, - 0.709032, -0.747897, 0.228862, -0.223497, -0.853275, 0.345627, 0.109764, -1.133039, - -0.683124, -0.277856, 0.654790, -1.248394, -0.597539, -0.481813, 0.983372, 1.762121, - 1.427402, 0.911763, 0.326823, 0.069619, -1.499763, -0.418223, -0.021037, 0.228425, - -1.008196, -0.664622, 0.558177, -1.188542, -0.775481, 0.271042, 1.534976, -1.052283, - 0.625559, -0.797626, -0.313522, -0.602210, 1.259060, 0.858484, -2.105292, -0.360937, - 0.553557, -1.556384, -0.206666, -0.425568, 0.493778, -0.870908, 0.079828, -0.521619, - -1.413861, -0.384293, -0.457922, -0.291471, -0.301224, -1.588594, 1.094287, 1.324167, - -0.126480, -0.737164, 0.213719, -0.400529, 0.064938, -1.757996, 1.686748, 0.327400, - 0.715967, 1.598648, -2.064741, -0.743632, 0.176185, 0.527839, -0.553153, 0.298280, - -1.226607, -0.189676, -0.301713, 0.956956, -0.533366, -0.901082, -0.892552, 0.278717, - -0.745807, 1.603464, 0.574270, 0.320655, -0.151383, 0.315762, 1.343703, -2.237832, - 1.292906, -0.378459, 0.002521, 0.884641, 0.582450, -1.614244, -1.503666, 0.573586, - -0.910537, -1.631277, -0.359138, -0.397616, -1.161307, -1.109838, 0.290672, -1.910239, - 1.314768, 0.665319, -0.275115, -0.023022, -0.907976, -1.043657, 0.373516, 0.901532, - 1.278539, -0.128456, 0.612821, 1.956518, 2.266326, -0.373959, 2.238039, -0.159580, - -0.703281, 0.563477, -0.050296, 1.163593, 0.658808, -1.550089, -3.029118, 0.540578, - -1.008998, 0.908047, 1.582303, -0.979088, 1.007902, 0.158491, -0.586927, 1.574082, - -0.516649, 1.227800, 1.583876, -2.088950, 2.949545, 1.356125, 1.050068, -0.767170, - -0.257653, -1.371845, -1.267656, -0.894948, 0.589089, 1.842629, 1.347967, -0.491253, - -2.177568, 0.237000, -0.735411, -1.779419, 0.448030, 0.581214, 0.856607, -0.266263, - -0.417470, -0.205806, -0.174323, 0.217577, 1.684295, 0.119528, 0.650667, 2.080061, - -0.339225, 0.730113, 0.293969, -0.849109, -2.533858, -2.378941, -0.346276, -0.610937, - -0.408192, -1.415611, 0.227122, 0.207974, -0.719718, 0.757762, -1.643135, -1.056813, - -0.251662, -1.298441, 1.233255, 1.494625, 0.235938, -1.404359, 0.658791, -2.556613, - -0.534945, 3.202525, 0.439198, -1.149901, 0.886765, -0.283386, 1.035336, -0.364878, - 1.341987, 1.008872, 0.213874, -0.299264, 0.255849, -0.190826, -0.079060, 0.699851, - -0.796540, -0.801284, -0.007599, -0.726810, -1.490902, 0.870335, -0.265675, -1.566695, - -0.394636, -0.143855, -2.334247, -1.357539, -1.815689, 1.108422, -0.142115, 1.112757, - 0.559264, 0.478370, -0.679385, 0.284967, -1.332935, -0.723980, -0.663600, 0.198443, - -1.794868, -1.387673, 0.197768, 1.469328, 0.366493, -0.442775, -0.048563, 0.077709, - 1.957910, -0.072848, 0.938810, -0.079608, -0.800959, 0.309424, 1.051826, -1.664211, - -1.090792, -0.191731, 0.463401, -0.924147, -0.649657, 0.622893, -1.335107, 1.047689, - 0.863327, -0.642411, 0.660010, 1.294116, 0.314579, 0.859573, 0.128670, 0.016568, - -0.072801, -0.994310, -0.747358, -0.030814, 0.988355, -0.599017, 1.476644, -0.813801, - 0.645040, -1.309919, -0.867425, -0.474233, 0.222417, 1.871323, 0.110001, -0.411341, - 0.511242, -1.199117, -0.096361, 0.445817, -0.295825, -0.167996, 0.179543, 0.421118, - 1.677678, 1.996949, 0.696964, -1.366382, 0.363045, -0.567044, -1.044154, 0.697139, - 0.484026, -0.193751, -0.378095, -0.886374, -1.840197, -1.628195, -1.173789, -0.415411, - 0.175088, 0.229433, -1.240889, 0.700004, 0.426877, 1.454803, -0.510186, -0.006657, - -0.525496, 0.717698, 1.088374, 0.500552, 2.771790, -0.160309, 0.429489, -1.966817, - -0.546019, -1.888395, -0.107952, -1.316144, -0.672632, -0.902365, -0.154798, 0.947242, - 1.550375, 0.429040, -0.560795, 0.179304, -0.771509, -0.943390, -1.407569, -1.906131, - -0.065293, 0.672149, 0.206147, -0.008124, 0.020042, -0.558447, 1.886079, -0.219975, - -1.414395, -0.302811, -0.569574, -0.121495, -0.390171, -0.844287, -1.737757, -0.449520, - -1.547933, -0.095776, 0.907714, 2.369602, 0.519768, 0.410525, 1.052585, 0.428784, - 1.295088, -0.186053, 0.130733, -0.657627, -0.759267, -0.595170, 0.812400, 0.069541, - -1.833687, 1.827363, 0.654075, -1.544769, -0.375109, 0.207688, -0.765615, -0.106355, - 0.338769, 1.033461, -1.404822, -1.030570, -0.643372, 0.170787, 1.344839, 1.936273, - 0.741336, 0.811980, -0.142808, -0.099858, -0.800131, 0.493249, 1.237574, 1.295951, - -0.278196, 0.217127, 0.630728, -0.548549, 0.229632, 0.355311, 0.521284, -0.615971, - 1.345803, 0.974922, -2.377934, -1.092319, -0.325710, -2.012228, 1.567660, 0.233337, - 0.646420, -1.129412, 0.197038, 1.696870, 0.726034, 0.792526, 0.603357, -0.058405, - -1.108666, 2.144229, -1.352821, 0.457021, 0.391175, 2.073013, -0.323318, 1.468132, - -0.502399, 0.209593, 0.754800, -0.948189, 0.613157, 1.760503, 0.088762, 2.595570, - -0.675470, 2.786804, -0.016827, 0.271651, -0.914102, -1.951371, -0.317418, 0.588333, - 0.828996, -1.674851, -1.922293, -0.436662, 0.044974, 2.416609, -0.309892, 0.187583, - 0.947699, -0.525703, -1.115605, -1.592320, 1.174844, 0.485144, 1.645480, -0.454233, - 1.008768, 2.049403, 0.602020, 0.017860, -1.610426, 1.238752, 0.683587, -0.780716, - 0.530979, 2.134498, 0.354361, 0.231700, 1.287980, -0.013488, -1.333345, -0.556343, - 0.755597, -0.911854, 1.371684, 0.245580, 0.118845, 0.384690, -0.070152, -0.578309, - 0.469308, 1.299687, 1.634798, -0.702809, 0.807253, -1.027451, 1.294496, 0.014930, - 0.218705, 1.713188, -2.078805, 0.112917, -1.086491, -1.558311, 0.637406, -0.404576, - -0.403325, 0.084076, -0.435349, -0.562623, 0.878062, -0.814650, -0.258363, 0.493299, - -0.802694, -0.008329, 0.627571, 0.154382, 2.580735, -1.306246, 1.023526, 0.777795, - -0.833884, -0.586663, 0.065664, -0.012342, -0.076987, -1.558587, 1.702607, -0.468984, - 0.094619, 0.287071, 0.919354, 0.510136, 0.245440, -1.400519, 0.969571, 1.593698, - -1.437917, -1.534230, -0.074710, 0.081459, -0.843240, -0.564640, -0.028207, -1.243702, - 0.733039, 0.059580, 0.149144, 1.595857, -0.777250, 1.550277, 1.055002, -0.166654, - 0.314484, 1.419571, 0.327348, 0.475653, 0.398754, -0.072770, 1.314784, 0.978279, - 1.722114, -0.412302, 0.565133, 0.739851, 0.220138, 1.312807, 0.629152, -1.107987, - -0.447001, -0.725993, 0.354045, -0.506772, -2.103747, -0.664684, 1.450110, -0.329805, - 2.701872, -1.634939, -0.536325, 0.547223, 1.492603, -0.455243, -0.496416, 1.235260, - 0.040926, 0.748467, 1.230764, 0.304903, 1.077771, 0.765151, -1.319580, -0.509191, - 0.555116, -1.957625, -0.760453, -2.443886, -0.659366, -0.114779, 0.300079, -0.583996, - -3.073745, 1.551042, -0.407369, 1.428095, -1.353242, 0.903970, 0.541671, -0.465020, - 2.430415, 2.020479, 0.797287, 0.030996, 0.540738, 0.683921, -0.590052, -0.261084, - 1.517068, 1.007259, 0.303421, -0.817081, -0.491192, 0.867467, 0.360790, -0.080371, - 0.749301, -1.791968, 1.213226, -0.060524, -0.392520, 0.609547, 0.643580, 1.019521, - 0.934437, 1.228582, -0.249486, -0.707583, -0.593824, -0.262310, 1.242847, -1.548902, - -0.386760, 0.275098, 0.826154, -0.979279, -0.104297, 0.127849, 0.062544, 0.371624, - -0.103963, -0.696775, -0.386823, 0.016134, 1.369212, 0.416877, 0.068741, 0.294187, - 0.472633, 1.782735, 0.260577, 1.510728, 0.316968, 0.803473, 0.580874, 1.778584, - -0.938075, -0.916672, 0.376006, 0.909780, 0.154250, -0.202264, 1.488708, -0.621639, - 0.809537, 1.928793, 0.396057, -0.861399, 2.431936, -0.840518, 0.280451, 0.820416, - 1.227828, -0.063565, 0.645265, -1.771318, 0.059612, -0.760177, -1.690901, 1.103672, - 1.462500, 0.236213, -1.097691, 2.415233, -0.402112, 0.914131, -0.135959, 1.314193, - 0.322361, -0.476496, 0.076162, -0.105147, 1.417013, 0.707911, 0.367918, -0.602844, - -0.852110, 0.655122, 1.470184, -0.810403, -1.276157, 1.722268, 0.101878, -0.801997, - -1.250837, 1.237717, 1.528165, 1.776923, 0.631168, 0.083259, 2.140043, 1.263469, - -1.750645, -0.014432, 2.468102, -0.669158, 0.259927, -0.372328, 1.318554, -0.653081, - 0.062179, -0.735873, -0.179324, 1.084675, 0.136915, -0.015608, -0.938491, -1.478085, - 0.361931, 0.477791, 0.321742, -1.877574, 0.680526, 0.233398, 1.239492, 0.125661, - 0.179721, -0.605061, -1.036850, -0.295278, 1.456114, 1.802525, -1.333614, 0.387257, - -0.022809, 0.110596, 0.812811, -1.009099, -1.004572, 0.282958, 0.289750, -0.247297, - -0.218864, 0.898687, -0.642213, -0.180445, 0.717913, 0.301386, 1.548895, -0.044242, - -0.029651, -0.382110, -0.553929, 0.932358, -1.315840, -0.301519, -2.599588, 0.780078, - 0.602941, 0.942799, -1.023913, -0.067830, 0.081760, -1.767027, -1.781264, -0.660354, - 1.351417, 2.136370, 0.166783, -1.705227, 0.276528, 0.394512, -0.098555, 0.176450, - -1.837854, -1.502291, 0.819197, -0.234568, -1.631598, -0.317939, -0.796289, 0.690800, - -0.042010, 0.324041, 0.506456, -1.028590, 0.099426, -0.116351, 0.689239, 1.883291, - 0.325435, -0.095213, 0.031172, -0.613800, -1.731258, 0.478775, -0.447835, 0.386815, - 0.052959, -0.486085, 0.244473, 0.718309, 0.153485, 0.133783, -1.006194, 1.306469, - 1.199137, -2.577336, -2.086270, 0.386132, -0.861031, -1.230808, 2.641554, -0.904404, - -1.223338, 0.303205, -0.730097, -1.143570, -1.413193, -0.591818, 0.518888, -1.492811, - -0.086684, -0.012620, -0.345858, 0.986311, 0.643256, 2.919944, -1.248585, 0.157115, - 0.788733, -0.577083, 0.527634, 1.671694, 0.800079, 0.883787, -0.224185, 0.296991, - -0.521008, -0.155359, -0.098498, 0.997170, 0.434470, -0.025721, -0.379934, -0.242396, - -1.165114, 0.756605, 1.164162, -1.023455, 1.701589, -0.494172, 0.172714, 0.354061, - -0.246258, -0.145741, -1.169008, -0.022011, 0.618278, 1.865865, 0.081875, 1.607995, - -0.380666, -1.299588, -0.723958, -0.564984, 0.621664, -1.335471, -0.123108, -1.102815, - -2.753176, 0.252017, -0.858148, 1.135363, -0.297908, 1.154331, 1.046076, 2.126874, - -0.655774, -1.142368, 0.949039, -0.404608, -0.384329, 0.482020, 0.443774, 0.381100, - 1.102348, 0.856447, -1.178509, 0.401970, -0.584228, -0.979486, 0.115106, 0.068471, - -0.529900, 0.541112, 0.681720, 0.538565, -0.510035, -1.322111, -0.610659, -0.565309, - 0.086175, 0.691501, 2.133751, -0.002864, -0.089523, -0.254982, -0.874212, 0.422928, - -0.133399, 0.539578, 0.875171, -1.250776, 0.868311, -0.804806, -0.752693, -0.745812, - -0.309654, -1.521891, 0.826531, -0.612987, 0.959728, 1.972988, 0.294958, -0.392651, - 0.575927, -1.141419, 0.061069, 0.012318, -0.168118, -0.687349, -0.990650, -0.049762, - 0.719301, -0.283063, -1.424966, 0.461549, 1.091484, -1.044295, -2.842784, 0.996824, - 0.076534, -1.866737, -0.613614, 1.169354, -0.575013, -0.264795, 0.004722, -0.039410, - -0.505393, -1.157832, 0.710427, 0.728172, 0.866884, 2.431569, 0.110204, 0.026449, - 0.970324, -0.005260, 1.409542, 1.757851, 0.885011, 1.140862, 0.403216, 0.191009, - -0.693627, 0.011036, -1.105586, 1.907973, -0.165412, -0.732430, -0.990741, 0.894305, - 0.448227, 0.889219, 1.073337, -0.104734, 1.547319, 0.169834, 0.804048, -1.724029, - 0.174133, -0.484085, -0.731627, -2.131905, -1.810366, -0.052338, -0.086212, -1.189738, - -0.754141, 0.947278, -0.182628, -0.066268, 0.905018, 1.458216, -1.117984, 1.813295, - 0.150753, -0.282994, 1.650122, 0.666378, -0.346362, -0.264042, -0.644349, -0.905540, - 0.716679, -0.007336, -2.814799, -0.149546, 0.577495, 0.753117, -0.166985, -0.581816, - 0.365758, -0.548919, 0.578737, -1.955799, 0.522006, 1.601135, 0.732559, 0.555747, - -0.813346, -0.538975, 1.307876, -0.482579, -1.752447, -0.926570, 0.922440, 0.041001, - 0.413647, 0.597244, 1.924270, 0.714119, -2.312337, 1.380715, 1.390703, -0.453904, - -0.628305, 1.023225, -0.489111, -0.402405, 1.399683, 0.280561, 1.880872, -0.799673, - -0.560699, 1.708875, -0.644810, -1.422496, -0.755937, 0.157520, 0.378346, 0.178665, - -0.602775, -0.993406, 1.188948, 2.388009, 2.265523, 2.301073, -0.270076, 0.502837, - -0.119191, -0.001889, -0.432649, -0.194822, 0.985351, 0.468596, -1.364901, 0.273689, - 2.646683, -0.053754, 0.472511, -2.080034, -0.802494, -0.456793, 0.193857, 0.889525, - -1.591669, -0.321976, -0.703798, -0.744287, 0.371287, 1.437276, 0.459913, 0.660738, - 1.124368, 0.979412, -1.316431, -0.023211, 0.134547, 2.408125, 0.901705, 0.076185, - 0.361743, -2.058669, -2.332033, -0.370905, 1.285684, 0.557046, -0.180229, -0.035676 -}; -ne10_float32_t tmp_buffer[TEST_LENGTH_SAMPLES]; -/* ---------------------------------------------------------------------- -** Defines each of the tests performed -** ------------------------------------------------------------------- */ - -typedef struct -{ - ne10_uint32_t fftSize; - ne10_uint32_t ifftFlag; - ne10_uint32_t doBitReverse; - ne10_float32_t *inputF32; -} test_config_rfft; - -static test_config_rfft CONFIG_RFFT[] = -{ - {128, 0, 1, &testInput_f32[0]}, - {512, 0, 1, &testInput_f32[0]}, -}; - -static test_config_rfft CONFIG_RFFT_PERF[] = -{ - {128, 0, 1, &testInput_f32[0]}, - {512, 0, 1, &testInput_f32[0]}, -}; - -#define RFFT_NUM_TESTS (sizeof(CONFIG_RFFT) / sizeof(CONFIG_RFFT[0]) ) -#define RFFT_NUM_PERF_TESTS (sizeof(CONFIG_RFFT_PERF) / sizeof(CONFIG_RFFT_PERF[0]) ) - -//input and output -static ne10_float32_t * guarded_in_c = NULL; -static ne10_float32_t * guarded_in_neon = NULL; -static ne10_float32_t * in_c = NULL; -static ne10_float32_t * in_neon = NULL; - -static ne10_float32_t * guarded_out_c = NULL; -static ne10_float32_t * guarded_out_neon = NULL; -static ne10_float32_t * out_c = NULL; -static ne10_float32_t * out_neon = NULL; - -static ne10_float32_t snr = 0.0f; - -#ifdef PERFORMANCE_TEST -static ne10_int64_t time_c = 0; -static ne10_int64_t time_neon = 0; -static ne10_int64_t time_overhead_c = 0; -static ne10_int64_t time_overhead_neon = 0; -static ne10_float32_t time_speedup = 0.0f; -static ne10_float32_t time_savings = 0.0f; -#endif - -void test_rfft_case0() -{ - ne10_float32_t *p_src = testInput_f32; - ne10_rfft_instance_f32_t S; - ne10_cfft_radix4_instance_f32_t S_CFFT; - - ne10_uint16_t loop = 0; - ne10_uint16_t k = 0; - ne10_uint16_t i = 0; - ne10_uint16_t pos = 0; - - test_config_rfft *config; - ne10_result_t status = NE10_OK; - - fprintf (stdout, "----------%30s start\n", __FUNCTION__); - - /* init input memory */ - NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end - NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end - - /* init dst memory */ - NE10_DST_ALLOC (out_c, guarded_out_c, TEST_LENGTH_SAMPLES); - NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES); - -#if defined (SMOKE_TEST)||(REGRESSION_TEST) - for (loop = 0; loop < RFFT_NUM_TESTS; loop++) - { - config = &CONFIG_RFFT[loop]; - - /* Initialize the RFFT/RIFFT module */ - status = ne10_rfft_init_float (&S, &S_CFFT, config->fftSize, config->ifftFlag); - - if (status == NE10_ERR) - { - printf ("fft init error!\n"); - } - - /* copy input to input buffer and clear the output buffer */ - for (i = 0; i < config->fftSize; i++) - { - in_c[i] = testInput_f32[i]; - in_neon[i] = testInput_f32[i]; - } - - /* FFT test */ - GUARD_ARRAY (out_c, config->fftSize * 2); - GUARD_ARRAY (out_neon, config->fftSize * 2); - - ne10_rfft_float_c (&S, in_c, out_c, tmp_buffer); - ne10_rfft_float_neon (&S, in_neon, out_neon, tmp_buffer); - - - CHECK_ARRAY_GUARD (out_c, config->fftSize * 2); - CHECK_ARRAY_GUARD (out_neon, config->fftSize * 2); - - //conformance test 1: compare snr - snr = CAL_SNR_FLOAT32 (out_c, out_neon, config->fftSize * 2); - assert_false ( (snr < SNR_THRESHOLD)); - - //conformance test 2: compare output of C and neon -#if defined (DEBUG_TRACE) - printf ("-----------RFFT------------\n"); - printf ("--------------------config %d\n", loop); - printf ("fftSize: %d\n", config->fftSize); - printf ("snr: %f\n", snr); -#endif - for (pos = 0; pos < config->fftSize * 2; pos++) - { -#if defined (DEBUG_TRACE) - printf ("pos %d \n", pos); - printf ("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos], * (ne10_uint32_t*) &out_c[pos], out_neon[pos], * (ne10_uint32_t*) &out_neon[pos]); -#endif - assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1); - } - - /* IFFT test */ - /* Initialize the RFFT/RIFFT module */ - status = ne10_rfft_init_float (&S, &S_CFFT, config->fftSize, 1); - - if (status == NE10_ERR) - { - printf ("fft init error!\n"); - } - - /* copy input to input buffer and clear the output buffer */ - for (i = 0; i < config->fftSize * 2; i++) - { - in_c[i] = out_c[i]; - in_neon[i] = out_neon[i]; - } - - GUARD_ARRAY (out_c, config->fftSize * 2); - GUARD_ARRAY (out_neon, config->fftSize * 2); - - ne10_rfft_float_c (&S, in_c, out_c, tmp_buffer); - ne10_rfft_float_neon (&S, in_neon, out_neon, tmp_buffer); - - CHECK_ARRAY_GUARD (out_c, config->fftSize * 2); - CHECK_ARRAY_GUARD (out_neon, config->fftSize * 2); - - //conformance test 1: compare snr - snr = CAL_SNR_FLOAT32 (out_c, out_neon, config->fftSize); - assert_false ( (snr < SNR_THRESHOLD)); - - //conformance test 2: compare output of C and neon -#if defined (DEBUG_TRACE) - printf ("-----------RIFFT------------\n"); - printf ("--------------------config %d\n", loop); - printf ("fftSize: %d\n", config->fftSize); - printf ("snr: %f\n", snr); -#endif - for (pos = 0; pos < config->fftSize; pos++) - { -#if defined (DEBUG_TRACE) - printf ("pos %d \n", pos); - printf ("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos], * (ne10_uint32_t*) &out_c[pos], out_neon[pos], * (ne10_uint32_t*) &out_neon[pos]); -#endif - assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1); - } - } -#endif - -#ifdef PERFORMANCE_TEST - fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio"); - for (loop = 0; loop < RFFT_NUM_PERF_TESTS; loop++) - { - config = &CONFIG_RFFT_PERF[loop]; - - /* Initialize the RFFT/RIFFT module */ - status = ne10_rfft_init_float (&S, &S_CFFT, config->fftSize, config->ifftFlag); - - if (status == NE10_ERR) - { - printf ("fft init error!\n"); - } - - /* FFT test */ - /* Initialize the RFFT/RIFFT module */ - status = ne10_rfft_init_float (&S, &S_CFFT, config->fftSize, config->ifftFlag); - - GET_TIME - ( - time_overhead_c, - { - for (k = 0; k < TEST_COUNT; k++) - { - for (i = 0; i < config->fftSize; i++) - { - in_c[i] = testInput_f32[i]; - } - } - } - ); - - GET_TIME - ( - time_c, - { - for (k = 0; k < TEST_COUNT; k++) - { - for (i = 0; i < config->fftSize; i++) - { - in_c[i] = testInput_f32[i]; - } - ne10_rfft_float_c (&S, in_c, out_c, tmp_buffer); - } - } - ); - - GET_TIME - ( - time_overhead_neon, - { - for (k = 0; k < TEST_COUNT; k++) - { - for (i = 0; i < config->fftSize; i++) - { - in_neon[i] = testInput_f32[i]; - } - } - } - ); - - GET_TIME - ( - time_neon, - { - for (k = 0; k < TEST_COUNT; k++) - { - for (i = 0; i < config->fftSize; i++) - { - in_neon[i] = testInput_f32[i]; - } - ne10_rfft_float_neon (&S, in_neon, out_neon, tmp_buffer); - } - } - ); - - time_c = time_c - time_overhead_c; - time_neon = time_neon - time_overhead_neon; - time_speedup = (ne10_float32_t) time_c / time_neon; - time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; - ne10_log (__FUNCTION__, "RFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", config->fftSize, time_c, time_neon, time_savings, time_speedup); - - /* IFFT test */ - /* Initialize the RFFT/RIFFT module */ - status = ne10_rfft_init_float (&S, &S_CFFT, config->fftSize, 1); - - GET_TIME - ( - time_overhead_c, - { - for (k = 0; k < TEST_COUNT; k++) - { - for (i = 0; i < 2 * config->fftSize; i++) - { - in_c[i] = out_c[i]; - } - } - } - ); - - GET_TIME - ( - time_c, - { - for (k = 0; k < TEST_COUNT; k++) - { - for (i = 0; i < 2 * config->fftSize; i++) - { - in_c[i] = out_c[i]; - } - ne10_rfft_float_c (&S, in_c, out_c, tmp_buffer); - } - } - ); - - GET_TIME - ( - time_overhead_neon, - { - for (k = 0; k < TEST_COUNT; k++) - { - for (i = 0; i < 2 * config->fftSize; i++) - { - in_neon[i] = out_neon[i]; - } - } - } - ); - - GET_TIME - ( - time_neon, - { - for (k = 0; k < TEST_COUNT; k++) - { - for (i = 0; i < 2 * config->fftSize; i++) - { - in_neon[i] = out_neon[i]; - } - ne10_rfft_float_neon (&S, in_neon, out_neon, tmp_buffer); - } - } - ); - - time_c = time_c - time_overhead_c; - time_neon = time_neon - time_overhead_neon; - time_speedup = (ne10_float32_t) time_c / time_neon; - time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100; - ne10_log (__FUNCTION__, "RIFFT%20d%20lld%20lld%19.2f%%%18.2f:1\n", config->fftSize, time_c, time_neon, time_savings, time_speedup); - } -#endif - - free (guarded_in_c); - free (guarded_in_neon); - free (guarded_out_c); - free (guarded_out_neon); - fprintf (stdout, "----------%30s end\n", __FUNCTION__); -} - -void test_rfft() -{ - test_rfft_case0(); -} - -static void my_test_setup (void) -{ - ne10_log_buffer_ptr = ne10_log_buffer; -} - -void test_fixture_rfft (void) -{ - test_fixture_start(); // starts a fixture - - fixture_setup (my_test_setup); - - run_test (test_rfft); - - test_fixture_end(); // ends a fixture -} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index b217a12..7b466e3 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -97,8 +97,6 @@ if(NE10_ENABLE_DSP) # Define dsp test files. set(NE10_TEST_DSP_SRCS ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_main.c - ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_cfft.c - ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_rfft.c ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_fft_float32.c ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_fft_int32.c ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_fft_int16.c -- 2.7.4