1. To optimize FFT, the algorithm is changed. Bit reversal is removed and radix 8 is added.
2. After test, the optimized FFT show the best performance, so that the old implementations are removed.
The performance result is as follows:
toolchain: gcc 4.8 at -O2
omx fft's execute time is the base. The ratio is less, the performance is better.
panda board A9:
| |16 |32 |64 |128 |256 |512 |1024 |2048 |4096 |
|Ne10 |84.27%|89.57%|85.63%|85.79%|87.89%|87.91%|83.51%|97.08%|92.68%|
|omx |100% |100% |100% |100% |100% |100% |100% |100% |100% |
nexus10 A15:
| |16 |32 |64 |128 |256 |512 |1024 |2048 |4096 |
|Ne10 |84.88%|98.43%|89.46%|101.0%|99.24%|103.2%|93.80%|105.1%|97.44%|
|omx |100% |100% |100% |100% |100% |100% |100% |100% |100% |
Change-Id: I363ee1602f08532e566d3a5a4f3d7a99972a1283
/* fft functions*/
/* function pointers*/
- extern void (*ne10_radix4_butterfly_float) (ne10_float32_t *pDst,
- ne10_float32_t *pSrc,
- ne10_uint16_t N,
- ne10_float32_t *pCoef);
-
- extern void (*ne10_radix4_butterfly_inverse_float) (ne10_float32_t *pDst,
- ne10_float32_t *pSrc,
- ne10_uint16_t N,
- ne10_float32_t *pCoef,
- ne10_float32_t onebyN);
-
- extern void (*ne10_rfft_float) (const ne10_rfft_instance_f32_t * S,
- ne10_float32_t * pSrc,
- ne10_float32_t * pDst,
- ne10_float32_t * pTemp);
-
extern void (*ne10_fft_c2c_1d_float32) (ne10_fft_cpx_float32_t *fout,
ne10_fft_cpx_float32_t *fin,
ne10_fft_cpx_float32_t *twiddles,
extern ne10_fft_r2c_cfg_int16_t ne10_fft_alloc_r2c_int16 (ne10_int32_t nfft);
/* C version*/
- extern void ne10_radix4_butterfly_float_c (ne10_float32_t *pDst,
- ne10_float32_t *pSrc,
- ne10_uint16_t N,
- ne10_float32_t *pCoef);
-
- extern void ne10_radix4_butterfly_inverse_float_c (ne10_float32_t *pDst,
- ne10_float32_t *pSrc,
- ne10_uint16_t N,
- ne10_float32_t *pCoef,
- ne10_float32_t onebyN);
-
- extern void ne10_rfft_float_c (const ne10_rfft_instance_f32_t * S,
- ne10_float32_t * pSrc,
- ne10_float32_t * pDst,
- ne10_float32_t * pTemp);
-
extern void ne10_fft_c2c_1d_float32_c (ne10_fft_cpx_float32_t *fout,
ne10_fft_cpx_float32_t *fin,
ne10_fft_cpx_float32_t *twiddles,
/* NEON version*/
- /**
- * @addtogroup CFFT_CIFFT
- * @{
- */
- extern void ne10_radix4_butterfly_float_neon (ne10_float32_t *pDst,
- ne10_float32_t *pSrc,
- ne10_uint16_t N,
- ne10_float32_t *pCoef)
- asm ("ne10_radix4_butterfly_float_neon");
-
- extern void ne10_radix4_butterfly_inverse_float_neon (ne10_float32_t *pDst,
- ne10_float32_t *pSrc,
- ne10_uint16_t N,
- ne10_float32_t *pCoef,
- ne10_float32_t onebyN)
- asm ("ne10_radix4_butterfly_inverse_float_neon");
- /** @} */ //end of CFFT_CIFFT group
-
-
extern void ne10_rfft_float_neon (const ne10_rfft_instance_f32_t * S,
ne10_float32_t * pSrc,
ne10_float32_t * pDst,
# Add dsp C files.
set(NE10_DSP_C_SRCS
${PROJECT_SOURCE_DIR}/common/NE10_mask_table.c
- ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_cfft.c
- ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_cfft_init.c
- ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_rfft.c
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_float32.c
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_int32.c
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_int16.c
- ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_rfft_init.c
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fir.c
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fir_init.c
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_iir.c
# Add dsp intrinsic NEON files.
set(NE10_DSP_INTRINSIC_SRCS
- ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_rfft.neon.c
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_float32.neon.c
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_int32.neon.c
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_int16.neon.c
# Add dsp NEON files.
set(NE10_DSP_NEON_SRCS
- ${PROJECT_SOURCE_DIR}/modules/dsp/NE10_cfft.neon.s
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_float32.neon.s
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_int32.neon.s
${PROJECT_SOURCE_DIR}/modules/dsp/NE10_fft_int16.neon.s
+++ /dev/null
-/*
- * Copyright 2012-14 ARM Limited
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of ARM Limited nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * NE10 Library : dsp/NE10_cfft.c
- */
-
-#include "NE10_types.h"
-/**
- * @ingroup groupDSPs
- */
-
-/**
- * @defgroup CFFT_CIFFT Complex FFT
- *
- * \par
- * Complex Fast Fourier Transform(CFFT) and Complex Inverse Fast Fourier Transform(CIFFT) is an efficient algorithm to compute Discrete Fourier Transform(DFT) and Inverse Discrete Fourier Transform(IDFT).
- * Computational complexity of CFFT reduces drastically when compared to DFT.
- * \par
- * This set of functions implements CFFT/CIFFT
- * for floating-point data types. The functions operate on out-of-place buffer which use different buffer for input and output.
- * Complex input is stored in input buffer in an interleaved fashion.
- *
- * \par
- * The functions operate on blocks of input and output data and each call to the function processes
- * <code>2*fftLen</code> samples through the transform. <code>pSrc</code> points to input arrays containing <code>2*fftLen</code> values.
- * \par
- * The <code>pDst</code> points to the array of output buffer of size <code>2*fftLen</code> and inputs and outputs are stored in an interleaved fashion as shown below.
- * <pre> {real[0], imag[0], real[1], imag[1],..} </pre>
- *
- * \par Lengths supported by the transform:
- * \par
- * Internally, the functions utilize a radix-4 decimation in frequency(DIF) algorithm
- * and the size of the FFT supported are of the lengths [16, 64, 256, 1024].
- *
- *
- * \par Algorithm:
- *
- * <b>Complex Fast Fourier Transform:</b>
- * \par
- * Input real and imaginary data:
- * <pre>
- * x(n) = xa + j * ya
- * x(n+N/4 ) = xb + j * yb
- * x(n+N/2 ) = xc + j * yc
- * x(n+3N 4) = xd + j * yd
- * </pre>
- * where N is length of FFT
- * \par
- * Output real and imaginary data:
- * <pre>
- * X(4r) = xa'+ j * ya'
- * X(4r+1) = xb'+ j * yb'
- * X(4r+2) = xc'+ j * yc'
- * X(4r+3) = xd'+ j * yd'
- * </pre>
- * \par
- * Twiddle factors for radix-4 FFT:
- * <pre>
- * Wn = co1 + j * (- si1)
- * W2n = co2 + j * (- si2)
- * W3n = co3 + j * (- si3)
- * </pre>
- *
- * \par
- * \image html CFFT.gif "Radix-4 Decimation-in Frequency Complex Fast Fourier Transform"
- *
- * \par
- * Output from Radix-4 CFFT Results in Digit reversal order. Interchange middle two branches of every butterfly results in Bit reversed output.
- * \par
- * <b> Butterfly CFFT equations:</b>
- * <pre>
- * xa' = xa + xb + xc + xd
- * ya' = ya + yb + yc + yd
- * xc' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
- * yc' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
- * xb' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
- * yb' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
- * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
- * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
- * </pre>
- *
- *
- * <b>Complex Inverse Fast Fourier Transform:</b>
- * \par
- * CIFFT uses same twiddle factor table as CFFT with modifications in the design equation as shown below.
- *
- * \par
- * <b> Modified Butterfly CIFFT equations:</b>
- * <pre>
- * xa' = xa + xb + xc + xd
- * ya' = ya + yb + yc + yd
- * xc' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
- * yc' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
- * xb' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
- * yb' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
- * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
- * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
- * </pre>
- *
- * \par Instance Structure
- * A separate instance structure must be defined for each Instance but the twiddle factors and bit reversal tables can be reused.
- * There are separate instance structure declarations for each of the 3 supported data types.
- *
- * \par Initialization Functions
- * There is also an associated initialization function for each data type.
- * The initialization function performs the following operations:
- * - Sets the values of the internal structure fields.
- * - Initializes twiddle factor table and bit reversal table pointers
- * \par
- * Use of the initialization function is optional.
- * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
- * To place an instance structure into a const data section, the instance structure must be manually initialized.
- * Manually initialize the instance structure as follows:
- * <pre>
- *ne10_cfft_radix4_instance_f32_t = {fft_len, ifft_flag, bit_reverse_flag, p_twiddle, p_bit_rev_table, twid_coef_modifier, bit_rev_factor, one_by_fft_len};
- * </pre>
- * \par
- * where <code>fftLen</code> length of CFFT/CIFFT; <code>ifft_flag</code> Flag for selection of CFFT or CIFFT(Set ifft_flag to calculate CIFFT otherwise calculates CFFT);
- * <code>bit_reverse_flag</code> Flag for selection of output order(Set bitReverseFlag to output in normal order otherwise output in bit reversed order);
- * <code>p_twiddle</code>points to array of twiddle coefficients; <code>pBitRevTable</code> points to the array of bit reversal table.
- * <code>p_bit_rev_table</code> modifier for bit reversal table which supports all FFT lengths with same table.
- * <code>twid_coef_modifier</code> modifier for twiddle factor table which supports all FFT lengths with same table;
- * <code>one_by_fft_len</code> value of 1/fftLen to calculate CIFFT;
- *
- */
-
-
-/**
- * @addtogroup CFFT_CIFFT
- * @{
- */
-
-/**
- * @brief Core radix-4 FFT of floating-point data.
- * @param[out] *pDst point to the output buffer (out-of-place)
- * @param[in] *pSrc point to the input buffer (out-of-place: the pSrc is used for intermedia buffer, so the input buffer is destroyed)
- * @param[in] N length of FFT
- * @param[in] *pCoef point to the twiddle factors
- * @return none.
- * The function implements a Radix-4 Complex FFT
- * Can support FFT lengths of 16, 64, 256, 1024
- */
-
-void ne10_radix4_butterfly_float_c(
- ne10_float32_t *pDst,
- ne10_float32_t *pSrc,
- ne10_uint16_t N,
- ne10_float32_t *pCoef)
-{
- ne10_int32_t set,grp;
- ne10_int32_t setCount,grpCount,grpStep,twidStep;
- ne10_float32_t *pTw2,*pTw3,*pTw4,*pT1,*pTmp;
- ne10_float32_t *pOut1,*pOut2,*pOut3,*pOut4;
- ne10_int32_t SubFFTSize=4,SubFFTNum=N/4; /*Intial Number of Groups and Group Size*/
- ne10_float32_t InpRe1,InpIm1,InpRe2,InpIm2,InpRe3,InpIm3,InpRe4,InpIm4;
- ne10_float32_t OutRe1,OutIm1,OutRe2,OutIm2,OutRe3,OutIm3,OutRe4,OutIm4;
- ne10_float32_t ReTmp1,ImTmp1,ReTmp2,ImTmp2,ReTmp3,ImTmp3,ReTmp4,ImTmp4;
- ne10_float32_t TwRe2,TwIm2,TwRe3,TwIm3,TwRe4,TwIm4;
- ne10_float32_t ReTmpT2,ImTmpT2,ReTmpT3,ImTmpT3,ReTmpT4,ImTmpT4;
- ne10_int32_t InpStep=(N>>2),OutStep=1;
-
- /*First stage*/
- grpCount = SubFFTNum;
- pT1 = pSrc;
- pOut1 = pDst;
-
- for(grp = 0; grp < grpCount; grp++)
- {
- InpRe1 = pT1[0];
- InpIm1 = pT1[1];
- InpRe2 = pT1[InpStep<<1];
- InpIm2 = pT1[(InpStep<<1)+1];
- InpRe3 = pT1[(InpStep<<2)];
- InpIm3 = pT1[(InpStep<<2)+1];
- InpRe4 = pT1[3*(InpStep<<1)];
- InpIm4 = pT1[3*(InpStep<<1)+1];
-
- //Inp1 + Inp3
- ReTmp1 = InpRe1 + InpRe3;
- ImTmp1 = InpIm1 + InpIm3;
- //Inp1 - Inp3
- ReTmp2 = InpRe1 - InpRe3;
- ImTmp2 = InpIm1 - InpIm3;
-
- //Inp2 + Inp4
- ReTmp3 = InpRe2 + InpRe4;
- ImTmp3 = InpIm2 + InpIm4;
-
- //Inp2 - Inp4
- ReTmp4 = InpRe2 - InpRe4;
- ImTmp4 = InpIm2 - InpIm4;
-
- /*Radix-4 Butterfly calculation*/
- /*Third Result*/
- OutRe3 = ReTmp1 - ReTmp3;
- OutIm3 = ImTmp1 - ImTmp3;
- /*First Result*/
- OutRe1 = ReTmp1 + ReTmp3;
- OutIm1 = ImTmp1 + ImTmp3;
- /*Second result*/
- OutRe2 = ReTmp2 + ImTmp4;
- OutIm2 = ImTmp2 - ReTmp4;
- /*Fourth Result*/
- OutRe4 = ReTmp2 - ImTmp4;
- OutIm4 = ImTmp2 + ReTmp4;
-
- *pOut1++ = OutRe1;
- *pOut1++ = OutIm1;
- *pOut1++ = OutRe2;
- *pOut1++ = OutIm2;
- *pOut1++ = OutRe3;
- *pOut1++ = OutIm3;
- *pOut1++ = OutRe4;
- *pOut1++ = OutIm4;
- pT1+=2;
-
- }
- /*Remaining FFT Stages: Second Stage to Last Stage*/
- /* Update the Grp count and size for the next stage */
- SubFFTSize = SubFFTSize<<2;
- SubFFTNum = SubFFTNum>>2;
- twidStep = 0;
- /*Swap Input and Output*/
- pTmp = pDst;
- pDst = pSrc;
- pSrc= pTmp;
-
- while(SubFFTNum > 0)
- {
- grpCount = SubFFTNum; /*Number of Blocks*/
- setCount = SubFFTSize>>2; /* setCount is number of Butterflies */
- grpStep = 0;
- OutStep = (OutStep<<2);
- pT1 = pSrc;
- for(grp = 0; grp < grpCount; grp++)
- {
- pOut1 = pDst + (grpStep<<1);
- pOut2 = pOut1 + (OutStep<<1);
- pOut3 = pOut2 + (OutStep<<1);
- pOut4 = pOut3 + (OutStep<<1);
-
- pTw2 = pCoef + twidStep;
- pTw3 = pTw2 + (setCount<<1);
- pTw4 = pTw3 + (setCount<<1);
- for(set = 0; set < setCount; set++)
- {
- InpRe1 = pT1[0];
- InpIm1 = pT1[1];
- InpRe2 = pT1[InpStep<<1];
- InpIm2 = pT1[(InpStep<<1)+1];
- InpRe3 = pT1[(InpStep<<2)];
- InpIm3 = pT1[(InpStep<<2)+1];
- InpRe4 = pT1[3*(InpStep<<1)];
- InpIm4 = pT1[3*(InpStep<<1)+1];
-
- /*Load Twiddles*/
- TwRe2 = *pTw2++;
- TwIm2 = *pTw2++;
- TwRe3 = *pTw3++;
- TwIm3 = *pTw3++;
- TwRe4 = *pTw4++;
- TwIm4 = *pTw4++;
-
-
- /*Butterfly calculation*/
- //CPLX_MUL (pTmpT2, pTw2, Inp2);
- ReTmpT2 = InpRe2*TwRe2 + InpIm2*TwIm2;
- ImTmpT2 = InpIm2*TwRe2 - InpRe2*TwIm2;
-
- //CPLX_MUL (pTmpT3, pTw3, Inp3);
- ReTmpT3 = InpRe3*TwRe3 + InpIm3*TwIm3;
- ImTmpT3 = InpIm3*TwRe3 - InpRe3*TwIm3;
-
-
- //CPLX_MUL (pTmpT4, pTw4, Inp4);
- ReTmpT4 = InpRe4*TwRe4 + InpIm4*TwIm4;
- ImTmpT4 = InpIm4*TwRe4 - InpRe4*TwIm4;
- //CPLX_ADD (pTmp1, Inp1, pTmpT3);
- ReTmp1 = InpRe1 + ReTmpT3;
- ImTmp1 = InpIm1 + ImTmpT3;
-
- //CPLX_SUB (pTmp2, pT1, pTmpT3);
- ReTmp2 = InpRe1 - ReTmpT3;
- ImTmp2 = InpIm1 - ImTmpT3;
-
- //CPLX_ADD (pTmp3, pTmpT2, pTmpT4);
- ReTmp3 = ReTmpT2 + ReTmpT4;
- ImTmp3 = ImTmpT2 + ImTmpT4;
-
- //CPLX_SUB (pTmp4, pTmpT2, pTmpT4);
- ReTmp4 = ReTmpT2 - ReTmpT4;
- ImTmp4 = ImTmpT2 - ImTmpT4;
-
-
-
- /*Third Result*/
- //CPLX_SUB (pT3, pTmp1, pTmp3);
- OutRe3 = ReTmp1 - ReTmp3;
- OutIm3 = ImTmp1 - ImTmp3;
- /*First Result*/
- //CPLX_ADD (pT1, pTmp1, pTmp3);
-
- OutRe1 = ReTmp1 + ReTmp3;
- OutIm1 = ImTmp1 + ImTmp3;
- /*Second result*/
- //CPLX_ADD_SUB_X (pT2, pTmp2, pTmp4);
- OutRe2 = ReTmp2 + ImTmp4;
- OutIm2 = ImTmp2 - ReTmp4;
- /*Fourth Result*/
- //CPLX_SUB_ADD_X (pT4, pTmp2, pTmp4);
- OutRe4 = ReTmp2 - ImTmp4;
- OutIm4 = ImTmp2 + ReTmp4;
- /*Store the Result*/
- *pOut1++ = OutRe1;
- *pOut1++ = OutIm1;
- *pOut2++ = OutRe2;
- *pOut2++ = OutIm2;
- *pOut3++ = OutRe3;
- *pOut3++ = OutIm3;
- *pOut4++ = OutRe4;
- *pOut4++ = OutIm4;
-
- pT1+=2;
- }
- grpStep = grpStep + SubFFTSize;
- }
- /* Update the Grp count and size for the next stage */
- twidStep+= (3*(setCount<<1));
- SubFFTSize = SubFFTSize<<2;
- SubFFTNum = SubFFTNum>>2;
- /*Swap Input and Output*/
- pTmp = pDst;
- pDst = pSrc;
- pSrc= pTmp;
- }
-
- /* if the N is even power of 4, copy the output to dst buffer */
- SubFFTNum = 0;
- set = N;
- while (set > 1)
- {
- set = set>>2;
- SubFFTNum++;
- }
-
- if((SubFFTNum&1) == 0)
- {
- pT1 = pSrc;
- pOut1 = pDst;
- for(grpCount = 0; grpCount < N; grpCount++)
- {
- *pOut1++ = *pT1++;
- *pOut1++ = *pT1++;
- };
- }
-}
-
-
-/**
- * @brief Core radix-4 IFFT of floating-point data.
- * @param[out] *pDst point to the output buffer (out-of-place)
- * @param[in] *pSrc point to the input buffer (out-of-place: the pSrc is used for intermedia buffer, so the input buffer is destroyed)
- * @param[in] N length of FFT
- * @param[in] *pCoef point to the twiddle factors
- * @return none.
- * The function implements a Radix-4 Complex IFFT
- */
-
-void ne10_radix4_butterfly_inverse_float_c(
- ne10_float32_t *pDst,
- ne10_float32_t *pSrc,
- ne10_uint16_t N,
- ne10_float32_t *pCoef,
- ne10_float32_t onebyN)
-{
- ne10_int32_t set,grp;
- ne10_int32_t setCount,grpCount,grpStep,twidStep;
- ne10_float32_t *pTw2,*pTw3,*pTw4,*pT1,*pTmp;
- ne10_float32_t *pOut1,*pOut2,*pOut3,*pOut4;
- ne10_int32_t SubFFTSize=4,SubFFTNum=N/4; /*Intial Number of Groups and Group Size*/
- ne10_float32_t InpRe1,InpIm1,InpRe2,InpIm2,InpRe3,InpIm3,InpRe4,InpIm4;
- ne10_float32_t OutRe1,OutIm1,OutRe2,OutIm2,OutRe3,OutIm3,OutRe4,OutIm4;
- ne10_float32_t ReTmp1,ImTmp1,ReTmp2,ImTmp2,ReTmp3,ImTmp3,ReTmp4,ImTmp4;
- ne10_float32_t TwRe2,TwIm2,TwRe3,TwIm3,TwRe4,TwIm4;
- ne10_float32_t ReTmpT2,ImTmpT2,ReTmpT3,ImTmpT3,ReTmpT4,ImTmpT4;
- ne10_int32_t InpStep=(N>>2),OutStep=1;
-
- /*First stage*/
- grpCount = SubFFTNum;
- pT1 = pSrc;
- pOut1 = pDst;
-
- for(grp = 0; grp < grpCount; grp++)
- {
- InpRe1 = pT1[0];
- InpIm1 = pT1[1];
- InpRe2 = pT1[InpStep<<1];
- InpIm2 = pT1[(InpStep<<1)+1];
- InpRe3 = pT1[(InpStep<<2)];
- InpIm3 = pT1[(InpStep<<2)+1];
- InpRe4 = pT1[3*(InpStep<<1)];
- InpIm4 = pT1[3*(InpStep<<1)+1];
-
- //Inp1 + Inp3
- ReTmp1 = InpRe1 + InpRe3;
- ImTmp1 = InpIm1 + InpIm3;
- //Inp1 - Inp3
- ReTmp2 = InpRe1 - InpRe3;
- ImTmp2 = InpIm1 - InpIm3;
-
- //Inp2 + Inp4
- ReTmp3 = InpRe2 + InpRe4;
- ImTmp3 = InpIm2 + InpIm4;
-
- //Inp2 - Inp4
- ReTmp4 = InpRe2 - InpRe4;
- ImTmp4 = InpIm2 - InpIm4;
-
- /*Radix-4 Butterfly calculation*/
- /*Third Result*/
- OutRe3 = ReTmp1 - ReTmp3;
- OutIm3 = ImTmp1 - ImTmp3;
- /*First Result*/
- OutRe1 = ReTmp1 + ReTmp3;
- OutIm1 = ImTmp1 + ImTmp3;
- /*Second result*/
- OutRe2 = ReTmp2 - ImTmp4;
- OutIm2 = ImTmp2 + ReTmp4;
- /*Fourth Result*/
- OutRe4 = ReTmp2 + ImTmp4;
- OutIm4 = ImTmp2 - ReTmp4;
-
- *pOut1++ = OutRe1;
- *pOut1++ = OutIm1;
- *pOut1++ = OutRe2;
- *pOut1++ = OutIm2;
- *pOut1++ = OutRe3;
- *pOut1++ = OutIm3;
- *pOut1++ = OutRe4;
- *pOut1++ = OutIm4;
- pT1+=2;
-
- }
- /*Intermediate FFT Stages: Second Stage to Last but one Stage*/
- /* Update the Grp count and size for the next stage */
- SubFFTSize = SubFFTSize<<2;
- SubFFTNum = SubFFTNum>>2;
- twidStep = 0;
- /*Swap Input and Output*/
- pTmp = pDst;
- pDst = pSrc;
- pSrc= pTmp;
-
- while(SubFFTNum > 1)
- {
- grpCount = SubFFTNum; /*Number of Blocks*/
- setCount = SubFFTSize>>2; /* setCount is number of Butterflies */
- grpStep = 0;
- OutStep = (OutStep<<2);
- pT1 = pSrc;
- for(grp = 0; grp < grpCount; grp++)
- {
- pOut1 = pDst + (grpStep<<1);
- pOut2 = pOut1 + (OutStep<<1);
- pOut3 = pOut2 + (OutStep<<1);
- pOut4 = pOut3 + (OutStep<<1);
-
- pTw2 = pCoef + twidStep;
- pTw3 = pTw2 + (setCount<<1);
- pTw4 = pTw3 + (setCount<<1);
- for(set = 0; set < setCount; set++)
- {
- InpRe1 = pT1[0];
- InpIm1 = pT1[1];
- InpRe2 = pT1[InpStep<<1];
- InpIm2 = pT1[(InpStep<<1)+1];
- InpRe3 = pT1[(InpStep<<2)];
- InpIm3 = pT1[(InpStep<<2)+1];
- InpRe4 = pT1[3*(InpStep<<1)];
- InpIm4 = pT1[3*(InpStep<<1)+1];
-
- /*Load Twiddles*/
- TwRe2 = *pTw2++;
- TwIm2 = *pTw2++;
- TwRe3 = *pTw3++;
- TwIm3 = *pTw3++;
- TwRe4 = *pTw4++;
- TwIm4 = *pTw4++;
-
-
- /*Butterfly calculation*/
- //CPLX_MUL (pTmpT2, pTw2, Inp2);
- ReTmpT2 = InpRe2*TwRe2 - InpIm2*TwIm2;
- ImTmpT2 = InpIm2*TwRe2 + InpRe2*TwIm2;
-
- //CPLX_MUL (pTmpT3, pTw3, Inp3);
- ReTmpT3 = InpRe3*TwRe3 - InpIm3*TwIm3;
- ImTmpT3 = InpIm3*TwRe3 + InpRe3*TwIm3;
-
- //CPLX_MUL (pTmpT4, pTw4, Inp4);
- ReTmpT4 = InpRe4*TwRe4 - InpIm4*TwIm4;
- ImTmpT4 = InpIm4*TwRe4 + InpRe4*TwIm4;
-
- //CPLX_ADD (pTmp1, Inp1, pTmpT3);
- ReTmp1 = InpRe1 + ReTmpT3;
- ImTmp1 = InpIm1 + ImTmpT3;
-
- //CPLX_SUB (pTmp2, pT1, pTmpT3);
- ReTmp2 = InpRe1 - ReTmpT3;
- ImTmp2 = InpIm1 - ImTmpT3;
-
- //CPLX_ADD (pTmp3, pTmpT2, pTmpT4);
- ReTmp3 = ReTmpT2 + ReTmpT4;
- ImTmp3 = ImTmpT2 + ImTmpT4;
-
- //CPLX_SUB (pTmp4, pTmpT2, pTmpT4);
- ReTmp4 = ReTmpT2 - ReTmpT4;
- ImTmp4 = ImTmpT2 - ImTmpT4;
-
-
- /*Third Result*/
- //CPLX_SUB (pT3, pTmp1, pTmp3);
- OutRe3 = ReTmp1 - ReTmp3;
- OutIm3 = ImTmp1 - ImTmp3;
-
- /*First Result*/
- //CPLX_ADD (pT1, pTmp1, pTmp3);
- OutRe1 = ReTmp1 + ReTmp3;
- OutIm1 = ImTmp1 + ImTmp3;
-
- /*Second result*/
- //CPLX_SUB_ADD_X (pT2, pTmp2, pTmp4);
- OutRe2 = ReTmp2 - ImTmp4;
- OutIm2 = ImTmp2 + ReTmp4;
-
- /*Fourth Result*/
- //CPLX_ADD_SUB_X (pT4, pTmp2, pTmp4);
- OutRe4 = ReTmp2 + ImTmp4;
- OutIm4 = ImTmp2 - ReTmp4;
-
- /*Store the Result*/
- *pOut1++ = OutRe1;
- *pOut1++ = OutIm1;
- *pOut2++ = OutRe2;
- *pOut2++ = OutIm2;
- *pOut3++ = OutRe3;
- *pOut3++ = OutIm3;
- *pOut4++ = OutRe4;
- *pOut4++ = OutIm4;
-
- pT1+=2;
- }
- grpStep = grpStep + SubFFTSize;
- }
- /* Update the Grp count and size for the next stage */
- twidStep+= (3*(setCount<<1));
- SubFFTSize = SubFFTSize<<2;
- SubFFTNum = SubFFTNum>>2;
- /*Swap Input and Output*/
- pTmp = pDst;
- pDst = pSrc;
- pSrc= pTmp;
- }
-
- /* last stage */
- setCount = SubFFTSize>>2; /* setCount is number of Butterflies */
- grpStep = 0;
- OutStep = (OutStep<<2);
- pT1 = pSrc;
-
- pOut1 = pDst + (grpStep<<1);
- pOut2 = pOut1 + (OutStep<<1);
- pOut3 = pOut2 + (OutStep<<1);
- pOut4 = pOut3 + (OutStep<<1);
-
- pTw2 = pCoef + twidStep;
- pTw3 = pTw2 + (setCount<<1);
- pTw4 = pTw3 + (setCount<<1);
-
- for(set = 0; set < setCount; set++)
- {
- InpRe1 = pT1[0];
- InpIm1 = pT1[1];
- InpRe2 = pT1[InpStep<<1];
- InpIm2 = pT1[(InpStep<<1)+1];
- InpRe3 = pT1[(InpStep<<2)];
- InpIm3 = pT1[(InpStep<<2)+1];
- InpRe4 = pT1[3*(InpStep<<1)];
- InpIm4 = pT1[3*(InpStep<<1)+1];
-
- /*Load Twiddles*/
- TwRe2 = *pTw2++;
- TwIm2 = *pTw2++;
- TwRe3 = *pTw3++;
- TwIm3 = *pTw3++;
- TwRe4 = *pTw4++;
- TwIm4 = *pTw4++;
-
-
- /*Butterfly calculation*/
- //CPLX_MUL (pTmpT2, pTw2, Inp2);
- ReTmpT2 = InpRe2*TwRe2 - InpIm2*TwIm2;
- ImTmpT2 = InpIm2*TwRe2 + InpRe2*TwIm2;
-
- //CPLX_MUL (pTmpT3, pTw3, Inp3);
- ReTmpT3 = InpRe3*TwRe3 - InpIm3*TwIm3;
- ImTmpT3 = InpIm3*TwRe3 + InpRe3*TwIm3;
-
- //CPLX_MUL (pTmpT4, pTw4, Inp4);
- ReTmpT4 = InpRe4*TwRe4 - InpIm4*TwIm4;
- ImTmpT4 = InpIm4*TwRe4 + InpRe4*TwIm4;
-
- //CPLX_ADD (pTmp1, Inp1, pTmpT3);
- ReTmp1 = InpRe1 + ReTmpT3;
- ImTmp1 = InpIm1 + ImTmpT3;
-
- //CPLX_SUB (pTmp2, pT1, pTmpT3);
- ReTmp2 = InpRe1 - ReTmpT3;
- ImTmp2 = InpIm1 - ImTmpT3;
-
- //CPLX_ADD (pTmp3, pTmpT2, pTmpT4);
- ReTmp3 = ReTmpT2 + ReTmpT4;
- ImTmp3 = ImTmpT2 + ImTmpT4;
-
- //CPLX_SUB (pTmp4, pTmpT2, pTmpT4);
- ReTmp4 = ReTmpT2 - ReTmpT4;
- ImTmp4 = ImTmpT2 - ImTmpT4;
-
-
- /*Third Result*/
- //CPLX_SUB (pT3, pTmp1, pTmp3);
- OutRe3 = ReTmp1 - ReTmp3;
- OutIm3 = ImTmp1 - ImTmp3;
-
- /*First Result*/
- //CPLX_ADD (pT1, pTmp1, pTmp3);
- OutRe1 = ReTmp1 + ReTmp3;
- OutIm1 = ImTmp1 + ImTmp3;
-
- /*Second result*/
- //CPLX_SUB_ADD_X (pT2, pTmp2, pTmp4);
- OutRe2 = ReTmp2 - ImTmp4;
- OutIm2 = ImTmp2 + ReTmp4;
-
- /*Fourth Result*/
- //CPLX_ADD_SUB_X (pT4, pTmp2, pTmp4);
- OutRe4 = ReTmp2 + ImTmp4;
- OutIm4 = ImTmp2 - ReTmp4;
-
- /*Store the Result*/
- *pOut1++ = OutRe1 * onebyN;
- *pOut1++ = OutIm1 * onebyN;
- *pOut2++ = OutRe2 * onebyN;
- *pOut2++ = OutIm2 * onebyN;
- *pOut3++ = OutRe3 * onebyN;
- *pOut3++ = OutIm3 * onebyN;
- *pOut4++ = OutRe4 * onebyN;
- *pOut4++ = OutIm4 * onebyN;
-
- pT1+=2;
- }
-
- /*Swap Input and Output*/
- pTmp = pDst;
- pDst = pSrc;
- pSrc= pTmp;
-
-
- /* if the N is even power of 4, copy the output to dst buffer */
- SubFFTNum = 0;
- set = N;
- while (set > 1)
- {
- set = set>>2;
- SubFFTNum++;
- }
-
- if((SubFFTNum&1) == 0)
- {
- pT1 = pSrc;
- pOut1 = pDst;
- for(grpCount = 0; grpCount < N; grpCount++)
- {
- *pOut1++ = *pT1++;
- *pOut1++ = *pT1++;
- };
- }
-}
-
-
-/**
- * @} end of CFFT_CIFFT group
- */
+++ /dev/null
-@
-@ Copyright 2012-14 ARM Limited
-@ All rights reserved.
-@
-@ Redistribution and use in source and binary forms, with or without
-@ modification, are permitted provided that the following conditions are met:
-@ * Redistributions of source code must retain the above copyright
-@ notice, this list of conditions and the following disclaimer.
-@ * Redistributions in binary form must reproduce the above copyright
-@ notice, this list of conditions and the following disclaimer in the
-@ documentation and/or other materials provided with the distribution.
-@ * Neither the name of ARM Limited nor the
-@ names of its contributors may be used to endorse or promote products
-@ derived from this software without specific prior written permission.
-@
-@ THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
-@ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-@ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-@ DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
-@ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-@ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-@ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-@ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-@
-
-@/*
-@ * NE10 Library : dsp/NE10_cfft.neon.s
-@ */
-
-@/*
-@ * Note:
-@ * 1. Currently, this is for soft VFP EABI, not for hard vfpv3 ABI yet
-@ * 2. In the assembly code, we use D0-D31 registers. So VFPv3-D32 is used. In VFPv3-D16, there will be failure
-@ */
-
- .text
- .syntax unified
-
- @/*
- @ * @brief Core radix-4 FFT of floating-point data. Do not call this function directly.
- @ * @param[out] *pDst points to the output buffer
- @ * @param[in] *pSrc points to the input buffer
- @ * @param[in] N length of FFT
- @ * @param[in] *pCoef points to the twiddle factors
- @ * @retureq none.
- @ * The function implements a Radix-4 Complex FFT
- @ */
-
- .align 4
- .global ne10_radix4_butterfly_float_neon
- .thumb
- .thumb_func
-
-ne10_radix4_butterfly_float_neon:
-
- PUSH {r4-r12,lr} @push r12: to keep stack 8 bytes aligned
- VPUSH {d8-d15}
-
- qInp1 .qn Q0.F32
- qInp2 .qn Q1.F32
- qInp3 .qn Q2.F32
- qInp4 .qn Q3.F32
- qInp5 .qn Q4.F32
- qInp6 .qn Q5.F32
- qInp7 .qn Q6.F32
- qInp8 .qn Q7.F32
-
- qTwd2Re .qn Q8.F32
- qTwd2Im .qn Q9.F32
- qTwd3Re .qn Q10.F32
- qTwd3Im .qn Q11.F32
- qTwd4Re .qn Q12.F32
- qTwd4Im .qn Q13.F32
-
- qReTmpT2 .qn Q14.F32
- qImTmpT2 .qn Q15.F32
-
- qReTmpT3 .qn Q2.F32
- qImTmpT3 .qn Q3.F32
-
- qReTmpT4 .qn Q4.F32
- qImTmpT4 .qn Q5.F32
-
- qRe1 .qn Q8.F32
- qIm1 .qn Q9.F32
- qRe2 .qn Q10.F32
- qIm2 .qn Q11.F32
- qRe3 .qn Q12.F32
- qIm3 .qn Q13.F32
- qRe4 .qn Q14.F32
- qIm4 .qn Q15.F32
-
- pDst .req R0
- pSrc .req R1
- fftSize .req R2
- pCoef .req R3
-
-
- SubFFTSize .req R4
- SubFFTNum .req R5
- grpCount .req R6
- twidStep .req R8
- setCount .req R9
- grpStep .req R10
-
- pT1 .req R7
- pOut1 .req R11
- pTw2 .req R12
- TwdStep .req R14
- pTmp .req R7
-
- LSR SubFFTNum,fftSize,#2
- MOV SubFFTSize,#4
- MOV pT1,pSrc
- LSR grpCount,SubFFTNum,#2
- MOV pOut1,pDst
- LSL fftSize,#1
-
-fftGrpLoop:
- VLD2 {qInp1,qInp2},[pT1],fftSize @/*Load Input Values*/
- VLD2 {qInp3,qInp4},[pT1],fftSize
- VLD2 {qInp5,qInp6},[pT1],fftSize
- VLD2 {qInp7,qInp8},[pT1],fftSize
-
- @/*pSrc[0] + pSrc[2]*/
- VADD qRe1,qInp1,qInp5
- VADD qIm1,qInp2,qInp6
- @/*pSrc[0] - pSrc[2]*/
- VSUB qRe2,qInp1,qInp5
- VSUB qIm2,qInp2,qInp6
- @/*pSrc[1] + pSrc[3]*/
- VADD qRe3,qInp3,qInp7
- VADD qIm3,qInp4,qInp8
- @/*pSrc[1] - pSrc[3]*/
- VSUB qRe4,qInp3,qInp7
- VSUB qIm4,qInp4,qInp8
-
- @/*Radix-4 Butterfly calculation*/
- @/*Third Result*/
- VSUB qInp5,qRe1,qRe3
- VSUB qInp6,qIm1,qIm3
- @/*First Result*/
- VADD qInp1,qRe1,qRe3
- VADD qInp2,qIm1,qIm3
- @/*Second result*/
- VADD qInp3,qRe2,qIm4
- VSUB qInp4,qIm2,qRe4
- @/*Fourth Result*/
- VSUB qInp7,qRe2,qIm4
- VADD qInp8,qIm2,qRe4
-
- @/*Get Result in correct order for storing*/
- @/*4Re2,4Re0,3Re2,3Re0 2Re2,2Re0,1Re2,1Re0*/
- VZIP qInp1,qInp5
- @/*4Re3,4Re1,3Re3,3Re1 2Re3,2Re1,1Re3,1Re1*/
- VZIP qInp3,qInp7
-
- @/*4Im2,4Im0,3Im2,3Im0 2Im2,2Im0,1Im2,1Im0*/
- VZIP qInp2,qInp6
- @/*4Im3,4Im1,3Im2,3Im1 2Im3,2Im1,1Im3,1Im1*/
- VZIP qInp4,qInp8
-
- SUB pT1,pT1,fftSize, LSL #2
-
- VST4.F32 {d0,d2,d4,d6},[pOut1]!
- VST4.F32 {d1,d3,d5,d7},[pOut1]!
- SUBS grpCount,#1
- ADD pT1,pT1,#32
- VST4.F32 {d8,d10,d12,d14},[pOut1]!
- VST4.F32 {d9,d11,d13,d15},[pOut1]!
-
- BGT fftGrpLoop
-
- @/* Swap Input and Output*/
- MOV pTmp,pDst
- MOV pDst,pSrc
- MOV pSrc,pTmp
-
- @/*Remaining FFT Stages: Second Stage to Last Stage*/
- @/* Update the Grp count and size for the next stage */
- LSR SubFFTNum,#2
- LSL SubFFTSize,#2
-
-fftStageLoop:
- MOV grpCount,SubFFTNum
- MOV grpStep,#0
- ADD pT1,pSrc,fftSize
- LSL TwdStep,SubFFTSize,#1
-
-fftGrpLoop1:
- LSR setCount,SubFFTSize,#2
- ADD pOut1,pDst,grpStep,LSL #3
- MOV pTw2,pCoef
-
- LSL SubFFTSize,#1
-
-fftSetLoop:
- VLD2 {qTwd2Re,qTwd2Im},[pTw2],TwdStep
- VLD2 {qInp3,qInp4},[pT1],fftSize
- @/*CPLX_MUL (pTmpT2, pTw2, pT2);*/
- VMUL qReTmpT2,qTwd2Re,qInp3
- VMUL qImTmpT2,qTwd2Re,qInp4
- VLD2 {qTwd3Re,qTwd3Im},[pTw2],TwdStep
- VLD2 {qInp5,qInp6},[pT1],fftSize
- VMLA qReTmpT2,qTwd2Im,qInp4
- VMLS qImTmpT2,qTwd2Im,qInp3
-
-
- @/*CPLX_MUL (pTmpT3, pTw3, pT3);*/
- VMUL qReTmpT3,qTwd3Re,qInp5
- VMUL qImTmpT3,qTwd3Re,qInp6
- VLD2 {qTwd4Re,qTwd4Im},[pTw2]
- VLD2 {qInp7,qInp8},[pT1],fftSize
- VMLA qReTmpT3,qTwd3Im,qInp6
- VMLS qImTmpT3,qTwd3Im,qInp5
-
- SUB pT1,pT1,fftSize, LSL #2
-
-
- @/*CPLX_MUL (pTmpT4, pTw4, pT4);*/
- VMUL qReTmpT4,qTwd4Re,qInp7
- VMUL qImTmpT4,qTwd4Re,qInp8
- VLD2 {qInp1,qInp2},[pT1],fftSize
- VMLA qReTmpT4,qTwd4Im,qInp8
- VMLS qImTmpT4,qTwd4Im,qInp7
-
-
- @/*CPLX_ADD (pTmp1, pT1, pTmpT3);*/
- VADD qRe1,qInp1,qReTmpT3
- VADD qIm1,qInp2,qImTmpT3
- @/*CPLX_SUB (pTmp2, pT1, pTmpT3);*/
- VSUB qRe2,qInp1,qReTmpT3
- VSUB qIm2,qInp2,qImTmpT3
- @/*CPLX_ADD (pTmp3, pTmpT2, pTmpT4);*/
- VADD qRe3,qReTmpT2,qReTmpT4
- VADD qIm3,qImTmpT2,qImTmpT4
- @/*CPLX_SUB (pTmp4, pTmpT2, pTmpT4);*/
- VSUB qRe4,qReTmpT2,qReTmpT4
- VSUB qIm4,qImTmpT2,qImTmpT4
-
- @/*CPLX_ADD (pT1, pTmp1, pTmp3);*/
- VADD qInp1,qRe1,qRe3
- VADD qInp2,qIm1,qIm3
-
- @/*CPLX_ADD_SUB_X (pT2, pTmp2, pTmp4);*/
- VADD qInp3,qRe2,qIm4
- VSUB qInp4,qIm2,qRe4
-
- @/*CPLX_SUB (pT3, pTmp1, pTmp3);*/
- VSUB qInp5,qRe1,qRe3
- VSUB qInp6,qIm1,qIm3
- @/*CPLX_SUB_ADD_X (pT4, pTmp2, pTmp4);*/
- VSUB qInp7,qRe2,qIm4
- VADD qInp8,qIm2,qRe4
-
- SUBS setCount,#4
- @/* Store the Result*/
-
- VST2 {qInp1,qInp2},[pOut1],SubFFTSize
- VST2 {qInp3,qInp4},[pOut1],SubFFTSize
-
- VST2 {qInp5,qInp6},[pOut1],SubFFTSize
- VST2 {qInp7,qInp8},[pOut1],SubFFTSize
-
- SUB pTw2,pTw2,TwdStep, LSL #1
- SUB pOut1,pOut1,SubFFTSize, LSL #2
-
- ADD pT1,pT1,#32
- ADD pTw2,pTw2,#32
- ADD pOut1,pOut1,#32
-
- BGT fftSetLoop
- LSR SubFFTSize,#1
- SUBS grpCount,grpCount,#1
- ADD grpStep,grpStep,SubFFTSize
-
- BGT fftGrpLoop1
- @/* Update the Grp count and size for the next stage */
- ADD twidStep,SubFFTSize,SubFFTSize, LSL #1
- LSRS SubFFTNum,SubFFTNum,#2
-
- @/* Swap Input and Output*/
- MOV pTmp,pDst
- MOV pDst,pSrc
- MOV pSrc,pTmp
-
- ADD pCoef,pCoef,twidStep,LSL #1
-
- LSL SubFFTSize,SubFFTSize,#2
-
- BGT fftStageLoop
-
- @/* if the N is even power of 4, copy the output to dst buffer */
- ASR fftSize,fftSize,#1
- CLZ SubFFTNum,fftSize
- MOV setCount, #32
- SUB SubFFTNum, setCount, SubFFTNum
- ASR SubFFTNum,SubFFTNum,#1
- ANDS SubFFTNum, SubFFTNum, #1
-
- BNE fftEnd
-
- ASR grpCount, fftSize, #4
-fftCopyLoop:
- VLD1.F32 {d0,d1,d2,d3},[pSrc]!
- VLD1.F32 {d4,d5,d6,d7},[pSrc]!
- VLD1.F32 {d8,d9,d10,d11},[pSrc]!
- VLD1.F32 {d12,d13,d14,d15},[pSrc]!
-
- SUBS grpCount,#1
- VST1.F32 {d0,d1,d2,d3},[pDst]!
- VST1.F32 {d4,d5,d6,d7},[pDst]!
- VST1.F32 {d8,d9,d10,d11},[pDst]!
- VST1.F32 {d12,d13,d14,d15},[pDst]!
-
- BGT fftCopyLoop
-
-fftEnd:
- @/* Retureq From Function*/
- VPOP {d8-d15}
- POP {r4-r12,pc}
-
- @/*
- @ * @brief Core radix-4 IFFT of floating-point data. Do not call this function directly.
- @ * @param[out] *pDst points to the output buffer
- @ * @param[in] *pSrc points to the input buffer
- @ * @param[in] N length of FFT
- @ * @param[in] *pCoef points to the twiddle factors
- @ * @param[in] onebyN reciprocal of FFT length
- @ * @retureq none.
- @ * The function implements a Radix-4 Complex FFT
- @ */
-
- .align 4
- .global ne10_radix4_butterfly_inverse_float_neon
- .thumb
- .thumb_func
-
-ne10_radix4_butterfly_inverse_float_neon:
-
- PUSH {r4-r12,lr} @push r12: to keep stack 8 bytes aligned
- VPUSH {d8-d15}
-#if defined (NE10_ENABLE_HF)
- VPUSH {s0,s1}
-#endif
- qInp1 .qn Q0.F32
- qInp2 .qn Q1.F32
- qInp3 .qn Q2.F32
- qInp4 .qn Q3.F32
- qInp5 .qn Q4.F32
- qInp6 .qn Q5.F32
- qInp7 .qn Q6.F32
- qInp8 .qn Q7.F32
-
- qTwd2Re .qn Q8.F32
- qTwd2Im .qn Q9.F32
- qTwd3Re .qn Q10.F32
- qTwd3Im .qn Q11.F32
- qTwd4Re .qn Q12.F32
- qTwd4Im .qn Q13.F32
-
- qReTmpT2 .qn Q14.F32
- qImTmpT2 .qn Q15.F32
-
- qReTmpT3 .qn Q2.F32
- qImTmpT3 .qn Q3.F32
-
- qReTmpT4 .qn Q4.F32
- qImTmpT4 .qn Q5.F32
-
- qRe1 .qn Q8.F32
- qIm1 .qn Q9.F32
- qRe2 .qn Q10.F32
- qIm2 .qn Q11.F32
- qRe3 .qn Q12.F32
- qIm3 .qn Q13.F32
- qRe4 .qn Q14.F32
- qIm4 .qn Q15.F32
-
- pDst .req R0
- pSrc .req R1
- fftSize .req R2
- pCoef .req R3
-
-
- SubFFTSize .req R4
- SubFFTNum .req R5
- grpCount .req R6
- twidStep .req R8
- setCount .req R9
- grpStep .req R10
-
- pT1 .req R7
- pOut1 .req R11
- pTw2 .req R12
- TwdStep .req R14
- pTmp .req R7
-
- LSR SubFFTNum,fftSize,#2
- MOV SubFFTSize,#4
- MOV pT1,pSrc
- LSR grpCount,SubFFTNum,#2
- MOV pOut1,pDst
- LSL fftSize,#1
-
-ifftGrpLoop:
- VLD2 {qInp1,qInp2},[pT1],fftSize @/*Load Input Values*/
- VLD2 {qInp3,qInp4},[pT1],fftSize
- VLD2 {qInp5,qInp6},[pT1],fftSize
- VLD2 {qInp7,qInp8},[pT1],fftSize
-
- @/*pSrc[0] + pSrc[2]*/
- VADD qRe1,qInp1,qInp5
- VADD qIm1,qInp2,qInp6
- @/*pSrc[0] - pSrc[2]*/
- VSUB qRe2,qInp1,qInp5
- VSUB qIm2,qInp2,qInp6
- @/*pSrc[1] + pSrc[3]*/
- VADD qRe3,qInp3,qInp7
- VADD qIm3,qInp4,qInp8
- @/*pSrc[1] - pSrc[3]*/
- VSUB qRe4,qInp3,qInp7
- VSUB qIm4,qInp4,qInp8
-
- @/*Radix-4 Butterfly calculation*/
- @/*Third Result*/
- VSUB qInp5,qRe1,qRe3
- VSUB qInp6,qIm1,qIm3
- @/*First Result*/
- VADD qInp1,qRe1,qRe3
- VADD qInp2,qIm1,qIm3
- @/*Second result*/
- VSUB qInp3,qRe2,qIm4
- VADD qInp4,qIm2,qRe4
- @/*Fourth Result*/
- VADD qInp7,qRe2,qIm4
- VSUB qInp8,qIm2,qRe4
-
- @/*Get Result in correct order for storing*/
- @/*4Re2,4Re0,3Re2,3Re0 2Re2,2Re0,1Re2,1Re0*/
- VZIP qInp1,qInp5
- @/*4Re3,4Re1,3Re3,3Re1 2Re3,2Re1,1Re3,1Re1*/
- VZIP qInp3,qInp7
-
- @/*4Im2,4Im0,3Im2,3Im0 2Im2,2Im0,1Im2,1Im0*/
- VZIP qInp2,qInp6
- @/*4Im3,4Im1,3Im2,3Im1 2Im3,2Im1,1Im3,1Im1*/
- VZIP qInp4,qInp8
-
-
- SUB pT1,pT1,fftSize, LSL #2
-
-
- VST4.F32 {d0,d2,d4,d6},[pOut1]!
- VST4.F32 {d1,d3,d5,d7},[pOut1]!
- SUBS grpCount,#1
- ADD pT1,pT1,#32
- VST4.F32 {d8,d10,d12,d14},[pOut1]!
- VST4.F32 {d9,d11,d13,d15},[pOut1]!
-
-
- BGT ifftGrpLoop
-
- @/* Swap Input and Output*/
- MOV pTmp,pDst
- MOV pDst,pSrc
- MOV pSrc,pTmp
-
- @/*Intermediate FFT Stages: Second Stage to Last but one Stage*/
- @/* Update the Grp count and size for the next stage */
-
- LSR SubFFTNum,#2
- LSL SubFFTSize,#2
- SUBS pTmp, SubFFTNum, #1
- BEQ ifftLastStageLoop
-
-ifftStageLoop:
- MOV grpCount,SubFFTNum
- MOV grpStep,#0
- ADD pT1,pSrc,fftSize
- LSL TwdStep,SubFFTSize,#1
-
-ifftGrpLoop1:
- LSR setCount,SubFFTSize,#2
- ADD pOut1,pDst,grpStep,LSL #3
- MOV pTw2,pCoef
-
- LSL SubFFTSize,#1
-
-ifftSetLoop:
- VLD2 {qTwd2Re,qTwd2Im},[pTw2],TwdStep
- VLD2 {qInp3,qInp4},[pT1],fftSize
- @/*CPLX_MUL (pTmpT2, pTw2, pT2);*/
- VMUL qReTmpT2,qTwd2Re,qInp3
- VMUL qImTmpT2,qTwd2Re,qInp4
- VLD2 {qTwd3Re,qTwd3Im},[pTw2],TwdStep
- VLD2 {qInp5,qInp6},[pT1],fftSize
- VMLS qReTmpT2,qTwd2Im,qInp4
- VMLA qImTmpT2,qTwd2Im,qInp3
-
-
- @/*CPLX_MUL (pTmpT3, pTw3, pT3);*/
- VMUL qReTmpT3,qTwd3Re,qInp5
- VMUL qImTmpT3,qTwd3Re,qInp6
- VLD2 {qTwd4Re,qTwd4Im},[pTw2]
- VLD2 {qInp7,qInp8},[pT1],fftSize
- VMLS qReTmpT3,qTwd3Im,qInp6
- VMLA qImTmpT3,qTwd3Im,qInp5
-
- SUB pT1,pT1,fftSize, LSL #2
-
-
- @/*CPLX_MUL (pTmpT4, pTw4, pT4);*/
- VMUL qReTmpT4,qTwd4Re,qInp7
- VMUL qImTmpT4,qTwd4Re,qInp8
- VLD2 {qInp1,qInp2},[pT1],fftSize
- VMLS qReTmpT4,qTwd4Im,qInp8
- VMLA qImTmpT4,qTwd4Im,qInp7
-
-
- @/*CPLX_ADD (pTmp1, pT1, pTmpT3);*/
- VADD qRe1,qInp1,qReTmpT3
- VADD qIm1,qInp2,qImTmpT3
- @/*CPLX_SUB (pTmp2, pT1, pTmpT3);*/
- VSUB qRe2,qInp1,qReTmpT3
- VSUB qIm2,qInp2,qImTmpT3
- @/*CPLX_ADD (pTmp3, pTmpT2, pTmpT4);*/
- VADD qRe3,qReTmpT2,qReTmpT4
- VADD qIm3,qImTmpT2,qImTmpT4
- @/*CPLX_SUB (pTmp4, pTmpT2, pTmpT4);*/
- VSUB qRe4,qReTmpT2,qReTmpT4
- VSUB qIm4,qImTmpT2,qImTmpT4
-
- @/*CPLX_ADD (pT1, pTmp1, pTmp3);*/
- VADD qInp1,qRe1,qRe3
- VADD qInp2,qIm1,qIm3
-
- @/*CPLX_SUB_ADD_X (pT2, pTmp2, pTmp4);*/
- VSUB qInp3,qRe2,qIm4
- VADD qInp4,qIm2,qRe4
-
- @/*CPLX_SUB (pT3, pTmp1, pTmp3);*/
- VSUB qInp5,qRe1,qRe3
- VSUB qInp6,qIm1,qIm3
- @/*CPLX_ADD_SUB_X (pT4, pTmp2, pTmp4);*/
- VADD qInp7,qRe2,qIm4
- VSUB qInp8,qIm2,qRe4
-
- SUBS setCount,#4
- @/* Store the Result*/
-
- VST2 {qInp1,qInp2},[pOut1],SubFFTSize
- VST2 {qInp3,qInp4},[pOut1],SubFFTSize
-
- VST2 {qInp5,qInp6},[pOut1],SubFFTSize
- VST2 {qInp7,qInp8},[pOut1],SubFFTSize
-
- SUB pTw2,pTw2,TwdStep, LSL #1
- SUB pOut1,pOut1,SubFFTSize, LSL #2
-
- ADD pT1,pT1,#32
- ADD pTw2,pTw2,#32
- ADD pOut1,pOut1,#32
-
-
- BGT ifftSetLoop
- LSR SubFFTSize,#1
- SUBS grpCount,grpCount,#1
- ADD grpStep,grpStep,SubFFTSize
-
- BGT ifftGrpLoop1
- @/* Update the Grp count and size for the next stage */
- ADD twidStep,SubFFTSize,SubFFTSize, LSL #1
- LSR SubFFTNum,SubFFTNum,#2
- SUBS pTmp, SubFFTNum, #1
-
- @/* Swap Input and Output*/
- MOV pTmp,pDst
- MOV pDst,pSrc
- MOV pSrc,pTmp
-
- ADD pCoef,pCoef,twidStep,LSL #1
-
- LSL SubFFTSize,SubFFTSize,#2
-
- BGT ifftStageLoop
-
- @/* last stage */
-ifftLastStageLoop:
- MOV grpStep,#0
- ADD pT1,pSrc,fftSize
- LSL TwdStep,SubFFTSize,#1
-
-@ifftLastStageGrpLoop1:
- LSR setCount,SubFFTSize,#2
- ADD pOut1,pDst,grpStep,LSL #3
- MOV pTw2,pCoef
-
- LSL SubFFTSize,#1
-
-ifftLastStageSetLoop:
- VLD2 {qTwd2Re,qTwd2Im},[pTw2],TwdStep
- VLD2 {qInp3,qInp4},[pT1],fftSize
- @/*CPLX_MUL (pTmpT2, pTw2, pT2);*/
- VMUL qReTmpT2,qTwd2Re,qInp3
- VMUL qImTmpT2,qTwd2Re,qInp4
- VLD2 {qTwd3Re,qTwd3Im},[pTw2],TwdStep
- VLD2 {qInp5,qInp6},[pT1],fftSize
- VMLS qReTmpT2,qTwd2Im,qInp4
- VMLA qImTmpT2,qTwd2Im,qInp3
-
-
- @/*CPLX_MUL (pTmpT3, pTw3, pT3);*/
- VMUL qReTmpT3,qTwd3Re,qInp5
- VMUL qImTmpT3,qTwd3Re,qInp6
- VLD2 {qTwd4Re,qTwd4Im},[pTw2]
- VLD2 {qInp7,qInp8},[pT1],fftSize
- VMLS qReTmpT3,qTwd3Im,qInp6
- VMLA qImTmpT3,qTwd3Im,qInp5
-
- SUB pT1,pT1,fftSize, LSL #2
-
-
- @/*CPLX_MUL (pTmpT4, pTw4, pT4);*/
- VMUL qReTmpT4,qTwd4Re,qInp7
- VMUL qImTmpT4,qTwd4Re,qInp8
- VLD2 {qInp1,qInp2},[pT1],fftSize
- VMLS qReTmpT4,qTwd4Im,qInp8
- VMLA qImTmpT4,qTwd4Im,qInp7
-
-
- @/*CPLX_ADD (pTmp1, pT1, pTmpT3);*/
- VADD qRe1,qInp1,qReTmpT3
- VADD qIm1,qInp2,qImTmpT3
- @/*CPLX_SUB (pTmp2, pT1, pTmpT3);*/
- VSUB qRe2,qInp1,qReTmpT3
- VSUB qIm2,qInp2,qImTmpT3
- @/*CPLX_ADD (pTmp3, pTmpT2, pTmpT4);*/
- VADD qRe3,qReTmpT2,qReTmpT4
- VADD qIm3,qImTmpT2,qImTmpT4
- @/*CPLX_SUB (pTmp4, pTmpT2, pTmpT4);*/
- VSUB qRe4,qReTmpT2,qReTmpT4
- VSUB qIm4,qImTmpT2,qImTmpT4
-
- @/*CPLX_ADD (pT1, pTmp1, pTmp3);*/
- VADD qInp1,qRe1,qRe3
- VADD qInp2,qIm1,qIm3
-
- @/*CPLX_SUB_ADD_X (pT2, pTmp2, pTmp4);*/
- VSUB qInp3,qRe2,qIm4
- VADD qInp4,qIm2,qRe4
-
- @/*CPLX_SUB (pT3, pTmp1, pTmp3);*/
- VSUB qInp5,qRe1,qRe3
- VSUB qInp6,qIm1,qIm3
- @/*CPLX_ADD_SUB_X (pT4, pTmp2, pTmp4);*/
- VADD qInp7,qRe2,qIm4
- VSUB qInp8,qIm2,qRe4
-
- @/* multiply onebyN */
-#if defined (NE10_ENABLE_HF)
- LDR grpCount,[sp,#0] @revert the original value
-#else
- LDR grpCount,[sp,#104] @revert the original value
-#endif
- VDUP.f32 q8,grpCount
-
- VMUL qInp1,qInp1,qRe1
- VMUL qInp2,qInp2,qRe1
- VMUL qInp3,qInp3,qRe1
- VMUL qInp4,qInp4,qRe1
- VMUL qInp5,qInp5,qRe1
- VMUL qInp6,qInp6,qRe1
- VMUL qInp7,qInp7,qRe1
- VMUL qInp8,qInp8,qRe1
-
- SUBS setCount,#4
- @/* Store the Result*/
-
- VST2 {qInp1,qInp2},[pOut1],SubFFTSize
- VST2 {qInp3,qInp4},[pOut1],SubFFTSize
-
- VST2 {qInp5,qInp6},[pOut1],SubFFTSize
- VST2 {qInp7,qInp8},[pOut1],SubFFTSize
-
- SUB pTw2,pTw2,TwdStep, LSL #1
- SUB pOut1,pOut1,SubFFTSize, LSL #2
-
- ADD pT1,pT1,#32
- ADD pTw2,pTw2,#32
- ADD pOut1,pOut1,#32
-
- BGT ifftLastStageSetLoop
-
- @/* Swap Input and Output*/
- MOV pTmp,pDst
- MOV pDst,pSrc
- MOV pSrc,pTmp
-
- @/* if the N is even power of 4, copy the output to dst buffer */
- ASR fftSize,fftSize,#1
- CLZ SubFFTNum,fftSize
- MOV setCount, #32
- SUB SubFFTNum, setCount, SubFFTNum
- ASR SubFFTNum,SubFFTNum,#1
- ANDS SubFFTNum, SubFFTNum, #1
-
- BNE ifftEnd
-
- ASR grpCount, fftSize, #4
-
-ifftCopyLoop:
- VLD1.F32 {d0,d1,d2,d3},[pSrc]!
- VLD1.F32 {d4,d5,d6,d7},[pSrc]!
- VLD1.F32 {d8,d9,d10,d11},[pSrc]!
- VLD1.F32 {d12,d13,d14,d15},[pSrc]!
-
- SUBS grpCount,#1
- VST1.F32 {d0,d1,d2,d3},[pDst]!
- VST1.F32 {d4,d5,d6,d7},[pDst]!
- VST1.F32 {d8,d9,d10,d11},[pDst]!
- VST1.F32 {d12,d13,d14,d15},[pDst]!
-
- BGT ifftCopyLoop
-
-ifftEnd:
- @/* Retureq From Function*/
-#if defined (NE10_ENABLE_HF)
- VPOP {s0,s1}
-#endif
- VPOP {d8-d15}
- POP {r4-r12,pc}
-
-
- .end
-
+++ /dev/null
-/*
- * Copyright 2012-14 ARM Limited
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of ARM Limited nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "NE10_types.h"
-
-/*
-* @brief Twiddle factors Table
-*/
-/** Pseudo code for Twiddle factor Tables Generation:
-
-for i=1 to N
- cfft_twiddle_coef(2*i) = cos((i-1) * 2*PI/(float)N))
- cfft_twiddle_coef(2*i + 1) = sin((i-1) * 2*PI/(float)N))
-end
-
-where N = 1024 and PI = 3.14159265358979
-
-N is the maximum FFT Size supported and
-Cos and Sin values are interleaved fashion
-*/
-
-/*Twiddles below are generated for each FFT-DIT stage seperately*/
-
-static ne10_float32_t cfft_twiddle_coef[2040]={
-1.000000,0.000000,0.923880,0.382683,0.707107,0.707107,0.382683,0.923880,
-1.000000,0.000000,0.707107,0.707107,0.000000,1.000000,-0.707107,0.707107,
-1.000000,0.000000,0.382683,0.923880,-0.707107,0.707107,-0.923880,-0.382683,
-
-1.000000,0.000000,0.995185,0.098017,0.980785,0.195090,0.956940,0.290285,0.923880
-,0.382683,0.881921,0.471397,0.831470,0.555570,0.773010,0.634393,0.707107,
-0.707107,0.634393,0.773010,0.555570,0.831470,0.471397,0.881921,0.382683,0.923880,
-0.290285,0.956940,0.195090,0.980785,0.098017,0.995185,
-
-1.000000,0.000000,0.980785,0.195090,0.923880,0.382683,0.831470,0.555570,0.707107
-,0.707107,0.555570,0.831470,0.382683,0.923880,0.195090,0.980785,0.000000,
-1.000000,-0.195090,0.980785,-0.382683,0.923880,-0.555570,0.831470,-0.707107,0.707107,
--0.831470,0.555570,-0.923880,0.382683,-0.980785,0.195090,
-
-1.000000,0.000000,0.956940,0.290285,0.831470,0.555570,0.634393,0.773010,0.382683
-,0.923880,0.098017,0.995185,-0.195090,0.980785,-0.471397,0.881921,-0.707107,
-0.707107,-0.881921,0.471397,-0.980785,0.195090,-0.995185,-0.098017,-0.923880,
--0.382683,-0.773010,-0.634393,-0.555570,-0.831470,-0.290285,-0.956940,
-
-1.000000,0.000000,0.999699,0.024541,0.998795,0.049068,0.997290,0.073565,0.995185
-,0.098017,0.992480,0.122411,0.989177,0.146730,0.985278,0.170962,0.980785,
-0.195090,0.975702,0.219101,0.970031,0.242980,0.963776,0.266713,0.956940,0.290285,
-0.949528,0.313682,0.941544,0.336890,0.932993,0.359895,0.923880,0.382683,0.914210,
-0.405241,0.903989,0.427555,0.893224,0.449611,0.881921,0.471397,0.870087,0.492898,
-0.857729,0.514103,0.844854,0.534998,0.831470,0.555570,0.817585,0.575808,0.803208,
-0.595699,0.788346,0.615232,0.773010,0.634393,0.757209,0.653173,0.740951,0.671559,
-0.724247,0.689541,0.707107,0.707107,0.689541,0.724247,0.671559,0.740951,0.653173,
-0.757209,0.634393,0.773010,0.615232,0.788346,0.595699,0.803208,0.575808,0.817585,
-0.555570,0.831470,0.534998,0.844854,0.514103,0.857729,0.492898,0.870087,0.471397
-,0.881921,0.449611,0.893224,0.427555,0.903989,0.405241,0.914210,0.382683,
-0.923880,0.359895,0.932993,0.336890,0.941544,0.313682,0.949528,0.290285,0.956940,
-0.266713,0.963776,0.242980,0.970031,0.219101,0.975702,0.195090,0.980785,0.170962,
-0.985278,0.146730,0.989177,0.122411,0.992480,0.098017,0.995185,0.073565,0.997290,
-0.049068,0.998795,0.024541,0.999699,
-
-1.000000,0.000000,0.998795,0.049068,0.995185,0.098017,0.989177,0.146730,0.980785
-,0.195090,0.970031,0.242980,0.956940,0.290285,0.941544,0.336890,0.923880,
-0.382683,0.903989,0.427555,0.881921,0.471397,0.857729,0.514103,0.831470,0.555570,
-0.803208,0.595699,0.773010,0.634393,0.740951,0.671559,0.707107,0.707107,0.671559,
-0.740951,0.634393,0.773010,0.595699,0.803208,0.555570,0.831470,0.514103,0.857729,
-0.471397,0.881921,0.427555,0.903989,0.382683,0.923880,0.336890,0.941544,0.290285,
-0.956940,0.242980,0.970031,0.195090,0.980785,0.146730,0.989177,0.098017,0.995185,
-0.049068,0.998795,0.000000,1.000000,-0.049068,0.998795,-0.098017,0.995185,
--0.146730,0.989177,-0.195090,0.980785,-0.242980,0.970031,-0.290285,0.956940,-0.336890,
-0.941544,-0.382683,0.923880,-0.427555,0.903989,-0.471397,0.881921,-0.514103,
-0.857729,-0.555570,0.831470,-0.595699,0.803208,-0.634393,0.773010,-0.671559,0.740951,
--0.707107,0.707107,-0.740951,0.671559,-0.773010,0.634393,-0.803208,0.595699,
--0.831470,0.555570,-0.857729,0.514103,-0.881921,0.471397,-0.903989,0.427555,
--0.923880,0.382683,-0.941544,0.336890,-0.956940,0.290285,-0.970031,0.242980,-0.980785,
-0.195090,-0.989177,0.146730,-0.995185,0.098017,-0.998795,0.049068,
-
-1.000000,0.000000,0.997290,0.073565,0.989177,0.146730,0.975702,0.219101,0.956940
-,0.290285,0.932993,0.359895,0.903989,0.427555,0.870087,0.492898,0.831470,
-0.555570,0.788346,0.615232,0.740951,0.671559,0.689541,0.724247,0.634393,0.773010,
-0.575808,0.817585,0.514103,0.857729,0.449611,0.893224,0.382683,0.923880,0.313682,
-0.949528,0.242980,0.970031,0.170962,0.985278,0.098017,0.995185,0.024541,0.999699,
--0.049068,0.998795,-0.122411,0.992480,-0.195090,0.980785,-0.266713,0.963776,
--0.336890,0.941544,-0.405241,0.914210,-0.471397,0.881921,-0.534998,0.844854,-0.595699,
-0.803208,-0.653173,0.757209,-0.707107,0.707107,-0.757209,0.653173,-0.803208,
-0.595699,-0.844854,0.534998,-0.881921,0.471397,-0.914210,0.405241,-0.941544,0.336890,
--0.963776,0.266713,-0.980785,0.195090,-0.992480,0.122411,-0.998795,0.049068,
--0.999699,-0.024541,-0.995185,-0.098017,-0.985278,-0.170962,-0.970031,-0.242980,
--0.949528,-0.313682,-0.923880,-0.382683,-0.893224,-0.449611,-0.857729,-0.514103,
--0.817585,-0.575808,-0.773010,-0.634393,-0.724247,-0.689541,-0.671559,-0.740951,
--0.615232,-0.788346,-0.555570,-0.831470,-0.492898,-0.870087,-0.427555,-0.903989,
--0.359895,-0.932993,-0.290285,-0.956940,-0.219101,-0.975702,-0.146730,-0.989177,
--0.073565,-0.997290,
-
-
-1.000000,0.000000,0.999981,0.006136,0.999925,0.012272,0.999831,0.018407,0.999699
-,0.024541,0.999529,0.030675,0.999322,0.036807,0.999078,0.042938,0.998795,
-0.049068,0.998476,0.055195,0.998118,0.061321,0.997723,0.067444,0.997290,0.073565,
-0.996820,0.079682,0.996313,0.085797,0.995767,0.091909,0.995185,0.098017,0.994565,
-0.104122,0.993907,0.110222,0.993212,0.116319,0.992480,0.122411,0.991710,0.128498,
-0.990903,0.134581,0.990058,0.140658,0.989177,0.146730,0.988258,0.152797,0.987301,
-0.158858,0.986308,0.164913,0.985278,0.170962,0.984210,0.177004,0.983105,0.183040,
-0.981964,0.189069,0.980785,0.195090,0.979570,0.201105,0.978317,0.207111,0.977028,
-0.213110,0.975702,0.219101,0.974339,0.225084,0.972940,0.231058,0.971504,0.237024,
-0.970031,0.242980,0.968522,0.248928,0.966976,0.254866,0.965394,0.260794,0.963776
-,0.266713,0.962121,0.272621,0.960431,0.278520,0.958703,0.284408,0.956940,
-0.290285,0.955141,0.296151,0.953306,0.302006,0.951435,0.307850,0.949528,0.313682,
-0.947586,0.319502,0.945607,0.325310,0.943593,0.331106,0.941544,0.336890,0.939459,
-0.342661,0.937339,0.348419,0.935184,0.354164,0.932993,0.359895,0.930767,0.365613,
-0.928506,0.371317,0.926210,0.377007,0.923880,0.382683,0.921514,0.388345,0.919114,
-0.393992,0.916679,0.399624,0.914210,0.405241,0.911706,0.410843,0.909168,0.416430,
-0.906596,0.422000,0.903989,0.427555,0.901349,0.433094,0.898674,0.438616,0.895966,
-0.444122,0.893224,0.449611,0.890449,0.455084,0.887640,0.460539,0.884797,0.465977,
-0.881921,0.471397,0.879012,0.476799,0.876070,0.482184,0.873095,0.487550,0.870087
-,0.492898,0.867046,0.498228,0.863973,0.503538,0.860867,0.508830,0.857729,
-0.514103,0.854558,0.519356,0.851355,0.524590,0.848120,0.529804,0.844854,0.534998,
-0.841555,0.540171,0.838225,0.545325,0.834863,0.550458,0.831470,0.555570,0.828045,
-0.560662,0.824589,0.565732,0.821102,0.570781,0.817585,0.575808,0.814036,0.580814,
-0.810457,0.585798,0.806848,0.590760,0.803208,0.595699,0.799537,0.600616,0.795837,
-0.605511,0.792107,0.610383,0.788346,0.615232,0.784557,0.620057,0.780737,0.624860,
-0.776888,0.629638,0.773010,0.634393,0.769103,0.639124,0.765167,0.643832,0.761202,
-0.648514,0.757209,0.653173,0.753187,0.657807,0.749136,0.662416,0.745058,0.667000,
-0.740951,0.671559,0.736817,0.676093,0.732654,0.680601,0.728464,0.685084,0.724247
-,0.689541,0.720003,0.693971,0.715731,0.698376,0.711432,0.702755,0.707107,
-0.707107,0.702755,0.711432,0.698376,0.715731,0.693971,0.720003,0.689541,0.724247,
-0.685084,0.728464,0.680601,0.732654,0.676093,0.736817,0.671559,0.740951,0.667000,
-0.745058,0.662416,0.749136,0.657807,0.753187,0.653173,0.757209,0.648514,0.761202,
-0.643832,0.765167,0.639124,0.769103,0.634393,0.773010,0.629638,0.776888,0.624860,
-0.780737,0.620057,0.784557,0.615232,0.788346,0.610383,0.792107,0.605511,0.795837,
-0.600616,0.799537,0.595699,0.803208,0.590760,0.806848,0.585798,0.810457,0.580814,
-0.814036,0.575808,0.817585,0.570781,0.821102,0.565732,0.824589,0.560662,0.828045,
-0.555570,0.831470,0.550458,0.834863,0.545325,0.838225,0.540171,0.841555,0.534998
-,0.844854,0.529804,0.848120,0.524590,0.851355,0.519356,0.854558,0.514103,
-0.857729,0.508830,0.860867,0.503538,0.863973,0.498228,0.867046,0.492898,0.870087,
-0.487550,0.873095,0.482184,0.876070,0.476799,0.879012,0.471397,0.881921,0.465977,
-0.884797,0.460539,0.887640,0.455084,0.890449,0.449611,0.893224,0.444122,0.895966,
-0.438616,0.898674,0.433094,0.901349,0.427555,0.903989,0.422000,0.906596,0.416430,
-0.909168,0.410843,0.911706,0.405241,0.914210,0.399624,0.916679,0.393992,0.919114,
-0.388345,0.921514,0.382683,0.923880,0.377007,0.926210,0.371317,0.928506,0.365613,
-0.930767,0.359895,0.932993,0.354164,0.935184,0.348419,0.937339,0.342661,0.939459,
-0.336890,0.941544,0.331106,0.943593,0.325310,0.945607,0.319502,0.947586,0.313682
-,0.949528,0.307850,0.951435,0.302006,0.953306,0.296151,0.955141,0.290285,
-0.956940,0.284408,0.958703,0.278520,0.960431,0.272621,0.962121,0.266713,0.963776,
-0.260794,0.965394,0.254866,0.966976,0.248928,0.968522,0.242980,0.970031,0.237024,
-0.971504,0.231058,0.972940,0.225084,0.974339,0.219101,0.975702,0.213110,0.977028,
-0.207111,0.978317,0.201105,0.979570,0.195090,0.980785,0.189069,0.981964,0.183040,
-0.983105,0.177004,0.984210,0.170962,0.985278,0.164913,0.986308,0.158858,0.987301,
-0.152797,0.988258,0.146730,0.989177,0.140658,0.990058,0.134581,0.990903,0.128498,
-0.991710,0.122411,0.992480,0.116319,0.993212,0.110222,0.993907,0.104122,0.994565,
-0.098017,0.995185,0.091909,0.995767,0.085797,0.996313,0.079682,0.996820,0.073565
-,0.997290,0.067444,0.997723,0.061321,0.998118,0.055195,0.998476,0.049068,
-0.998795,0.042938,0.999078,0.036807,0.999322,0.030675,0.999529,0.024541,0.999699,
-0.018407,0.999831,0.012272,0.999925,0.006136,0.999981,
-
-
-1.000000,0.000000,0.999925,0.012272,0.999699,0.024541,0.999322,0.036807,0.998795
-,0.049068,0.998118,0.061321,0.997290,0.073565,0.996313,0.085797,0.995185,
-0.098017,0.993907,0.110222,0.992480,0.122411,0.990903,0.134581,0.989177,0.146730,
-0.987301,0.158858,0.985278,0.170962,0.983105,0.183040,0.980785,0.195090,0.978317,
-0.207111,0.975702,0.219101,0.972940,0.231058,0.970031,0.242980,0.966976,0.254866,
-0.963776,0.266713,0.960431,0.278520,0.956940,0.290285,0.953306,0.302006,0.949528,
-0.313682,0.945607,0.325310,0.941544,0.336890,0.937339,0.348419,0.932993,0.359895,
-0.928506,0.371317,0.923880,0.382683,0.919114,0.393992,0.914210,0.405241,0.909168,
-0.416430,0.903989,0.427555,0.898674,0.438616,0.893224,0.449611,0.887640,0.460539,
-0.881921,0.471397,0.876070,0.482184,0.870087,0.492898,0.863973,0.503538,0.857729
-,0.514103,0.851355,0.524590,0.844854,0.534998,0.838225,0.545325,0.831470,
-0.555570,0.824589,0.565732,0.817585,0.575808,0.810457,0.585798,0.803208,0.595699,
-0.795837,0.605511,0.788346,0.615232,0.780737,0.624860,0.773010,0.634393,0.765167,
-0.643832,0.757209,0.653173,0.749136,0.662416,0.740951,0.671559,0.732654,0.680601,
-0.724247,0.689541,0.715731,0.698376,0.707107,0.707107,0.698376,0.715731,0.689541,
-0.724247,0.680601,0.732654,0.671559,0.740951,0.662416,0.749136,0.653173,0.757209,
-0.643832,0.765167,0.634393,0.773010,0.624860,0.780737,0.615232,0.788346,0.605511,
-0.795837,0.595699,0.803208,0.585798,0.810457,0.575808,0.817585,0.565732,0.824589,
-0.555570,0.831470,0.545325,0.838225,0.534998,0.844854,0.524590,0.851355,0.514103
-,0.857729,0.503538,0.863973,0.492898,0.870087,0.482184,0.876070,0.471397,
-0.881921,0.460539,0.887640,0.449611,0.893224,0.438616,0.898674,0.427555,0.903989,
-0.416430,0.909168,0.405241,0.914210,0.393992,0.919114,0.382683,0.923880,0.371317,
-0.928506,0.359895,0.932993,0.348419,0.937339,0.336890,0.941544,0.325310,0.945607,
-0.313682,0.949528,0.302006,0.953306,0.290285,0.956940,0.278520,0.960431,0.266713,
-0.963776,0.254866,0.966976,0.242980,0.970031,0.231058,0.972940,0.219101,0.975702,
-0.207111,0.978317,0.195090,0.980785,0.183040,0.983105,0.170962,0.985278,0.158858,
-0.987301,0.146730,0.989177,0.134581,0.990903,0.122411,0.992480,0.110222,0.993907,
-0.098017,0.995185,0.085797,0.996313,0.073565,0.997290,0.061321,0.998118,0.049068
-,0.998795,0.036807,0.999322,0.024541,0.999699,0.012272,0.999925,0.000000,
-1.000000,-0.012272,0.999925,-0.024541,0.999699,-0.036807,0.999322,-0.049068,0.998795,
--0.061321,0.998118,-0.073565,0.997290,-0.085797,0.996313,-0.098017,0.995185,
--0.110222,0.993907,-0.122411,0.992480,-0.134581,0.990903,-0.146730,0.989177,-0.158858,
-0.987301,-0.170962,0.985278,-0.183040,0.983105,-0.195090,0.980785,-0.207111,
-0.978317,-0.219101,0.975702,-0.231058,0.972940,-0.242980,0.970031,-0.254866,
-0.966976,-0.266713,0.963776,-0.278520,0.960431,-0.290285,0.956940,-0.302006,0.953306,
--0.313682,0.949528,-0.325310,0.945607,-0.336890,0.941544,-0.348419,0.937339,
--0.359895,0.932993,-0.371317,0.928506,-0.382683,0.923880,-0.393992,0.919114,-0.405241,
-0.914210,-0.416430,0.909168,-0.427555,0.903989,-0.438616,0.898674,-0.449611,
-0.893224,-0.460539,0.887640,-0.471397,0.881921,-0.482184,0.876070,-0.492898,0.870087,
--0.503538,0.863973,-0.514103,0.857729,-0.524590,0.851355,-0.534998,0.844854,
--0.545325,0.838225,-0.555570,0.831470,-0.565732,0.824589,-0.575808,0.817585,
--0.585798,0.810457,-0.595699,0.803208,-0.605511,0.795837,-0.615232,0.788346,-0.624860,
-0.780737,-0.634393,0.773010,-0.643832,0.765167,-0.653173,0.757209,-0.662416,
-0.749136,-0.671559,0.740951,-0.680601,0.732654,-0.689541,0.724247,-0.698376,0.715731,
--0.707107,0.707107,-0.715731,0.698376,-0.724247,0.689541,-0.732654,0.680601,
--0.740951,0.671559,-0.749136,0.662416,-0.757209,0.653173,-0.765167,0.643832,
--0.773010,0.634393,-0.780737,0.624860,-0.788346,0.615232,-0.795837,0.605511,-0.803208,
-0.595699,-0.810457,0.585798,-0.817585,0.575808,-0.824589,0.565732,-0.831470,
-0.555570,-0.838225,0.545325,-0.844854,0.534998,-0.851355,0.524590,-0.857729,0.514103,
--0.863973,0.503538,-0.870087,0.492898,-0.876070,0.482184,-0.881921,0.471397,
--0.887640,0.460539,-0.893224,0.449611,-0.898674,0.438616,-0.903989,0.427555,-0.909168,
-0.416430,-0.914210,0.405241,-0.919114,0.393992,-0.923880,0.382683,-0.928506,
-0.371317,-0.932993,0.359895,-0.937339,0.348419,-0.941544,0.336890,-0.945607,
-0.325310,-0.949528,0.313682,-0.953306,0.302006,-0.956940,0.290285,-0.960431,0.278520,
--0.963776,0.266713,-0.966976,0.254866,-0.970031,0.242980,-0.972940,0.231058,
--0.975702,0.219101,-0.978317,0.207111,-0.980785,0.195090,-0.983105,0.183040,-0.985278,
-0.170962,-0.987301,0.158858,-0.989177,0.146730,-0.990903,0.134581,-0.992480,
-0.122411,-0.993907,0.110222,-0.995185,0.098017,-0.996313,0.085797,-0.997290,0.073565,
--0.998118,0.061321,-0.998795,0.049068,-0.999322,0.036807,-0.999699,0.024541,
--0.999925,0.012272,
-
-
-
-1.000000,0.000000,0.999831,0.018407,0.999322,0.036807,0.998476,0.055195,0.997290
-,0.073565,0.995767,0.091909,0.993907,0.110222,0.991710,0.128498,0.989177,
-0.146730,0.986308,0.164913,0.983105,0.183040,0.979570,0.201105,0.975702,0.219101,
-0.971504,0.237024,0.966976,0.254866,0.962121,0.272621,0.956940,0.290285,0.951435,
-0.307850,0.945607,0.325310,0.939459,0.342661,0.932993,0.359895,0.926210,0.377007,
-0.919114,0.393992,0.911706,0.410843,0.903989,0.427555,0.895966,0.444122,0.887640,
-0.460539,0.879012,0.476799,0.870087,0.492898,0.860867,0.508830,0.851355,0.524590,
-0.841555,0.540171,0.831470,0.555570,0.821102,0.570781,0.810457,0.585798,0.799537,
-0.600616,0.788346,0.615232,0.776888,0.629638,0.765167,0.643832,0.753187,0.657807,
-0.740951,0.671559,0.728464,0.685084,0.715731,0.698376,0.702755,0.711432,0.689541
-,0.724247,0.676093,0.736817,0.662416,0.749136,0.648514,0.761202,0.634393,
-0.773010,0.620057,0.784557,0.605511,0.795837,0.590760,0.806848,0.575808,0.817585,
-0.560662,0.828045,0.545325,0.838225,0.529804,0.848120,0.514103,0.857729,0.498228,
-0.867046,0.482184,0.876070,0.465977,0.884797,0.449611,0.893224,0.433094,0.901349,
-0.416430,0.909168,0.399624,0.916679,0.382683,0.923880,0.365613,0.930767,0.348419,
-0.937339,0.331106,0.943593,0.313682,0.949528,0.296151,0.955141,0.278520,0.960431,
-0.260794,0.965394,0.242980,0.970031,0.225084,0.974339,0.207111,0.978317,0.189069,
-0.981964,0.170962,0.985278,0.152797,0.988258,0.134581,0.990903,0.116319,0.993212,
-0.098017,0.995185,0.079682,0.996820,0.061321,0.998118,0.042938,0.999078,0.024541
-,0.999699,0.006136,0.999981,-0.012272,0.999925,-0.030675,0.999529,-0.049068,
-0.998795,-0.067444,0.997723,-0.085797,0.996313,-0.104122,0.994565,-0.122411,
-0.992480,-0.140658,0.990058,-0.158858,0.987301,-0.177004,0.984210,-0.195090,0.980785,
--0.213110,0.977028,-0.231058,0.972940,-0.248928,0.968522,-0.266713,0.963776,
--0.284408,0.958703,-0.302006,0.953306,-0.319502,0.947586,-0.336890,0.941544,-0.354164,
-0.935184,-0.371317,0.928506,-0.388345,0.921514,-0.405241,0.914210,-0.422000,
-0.906596,-0.438616,0.898674,-0.455084,0.890449,-0.471397,0.881921,-0.487550,0.873095,
--0.503538,0.863973,-0.519356,0.854558,-0.534998,0.844854,-0.550458,0.834863,
--0.565732,0.824589,-0.580814,0.814036,-0.595699,0.803208,-0.610383,0.792107,
--0.624860,0.780737,-0.639124,0.769103,-0.653173,0.757209,-0.667000,0.745058,-0.680601,
-0.732654,-0.693971,0.720003,-0.707107,0.707107,-0.720003,0.693971,-0.732654,
-0.680601,-0.745058,0.667000,-0.757209,0.653173,-0.769103,0.639124,-0.780737,0.624860,
--0.792107,0.610383,-0.803208,0.595699,-0.814036,0.580814,-0.824589,0.565732,
--0.834863,0.550458,-0.844854,0.534998,-0.854558,0.519356,-0.863973,0.503538,-0.873095
-,0.487550,-0.881921,0.471397,-0.890449,0.455084,-0.898674,0.438616,-0.906596,
-0.422000,-0.914210,0.405241,-0.921514,0.388345,-0.928506,0.371317,-0.935184,
-0.354164,-0.941544,0.336890,-0.947586,0.319502,-0.953306,0.302006,-0.958703,0.284408,
--0.963776,0.266713,-0.968522,0.248928,-0.972940,0.231058,-0.977028,0.213110,
--0.980785,0.195090,-0.984210,0.177004,-0.987301,0.158858,-0.990058,0.140658,-0.992480,
-0.122411,-0.994565,0.104122,-0.996313,0.085797,-0.997723,0.067444,-0.998795,
-0.049068,-0.999529,0.030675,-0.999925,0.012272,-0.999981,-0.006136,-0.999699,
--0.024541,-0.999078,-0.042938,-0.998118,-0.061321,-0.996820,-0.079682,-0.995185,
--0.098017,-0.993212,-0.116319,-0.990903,-0.134581,-0.988258,-0.152797,-0.985278,
--0.170962,-0.981964,-0.189069,-0.978317,-0.207111,-0.974339,-0.225084,-0.970031,
--0.242980,-0.965394,-0.260794,-0.960431,-0.278520,-0.955141,-0.296151,-0.949528,
--0.313682,-0.943593,-0.331106,-0.937339,-0.348419,-0.930767,-0.365613,-0.923880,
--0.382683,-0.916679,-0.399624,-0.909168,-0.416430,-0.901349,-0.433094,-0.893224,
--0.449611,-0.884797,-0.465977,-0.876070,-0.482184,-0.867046,-0.498228,-0.857729,
--0.514103,-0.848120,-0.529804,-0.838225,-0.545325,-0.828045,-0.560662,-0.817585,
--0.575808,-0.806848,-0.590760,-0.795837,-0.605511,-0.784557,-0.620057,-0.773010,
--0.634393,-0.761202,-0.648514,-0.749136,-0.662416,-0.736817,-0.676093,-0.724247,
--0.689541,-0.711432,-0.702755,-0.698376,-0.715731,-0.685084,-0.728464,-0.671559,
--0.740951,-0.657807,-0.753187,-0.643832,-0.765167,-0.629638,-0.776888,-0.615232,
--0.788346,-0.600616,-0.799537,-0.585798,-0.810457,-0.570781,-0.821102,-0.555570,
--0.831470,-0.540171,-0.841555,-0.524590,-0.851355,-0.508830,-0.860867,-0.492898,
--0.870087,-0.476799,-0.879012,-0.460539,-0.887640,-0.444122,-0.895966,-0.427555,
--0.903989,-0.410843,-0.911706,-0.393992,-0.919114,-0.377007,-0.926210,-0.359895,
--0.932993,-0.342661,-0.939459,-0.325310,-0.945607,-0.307850,-0.951435,-0.290285,
--0.956940,-0.272621,-0.962121,-0.254866,-0.966976,-0.237024,-0.971504,-0.219101,
--0.975702,-0.201105,-0.979570,-0.183040,-0.983105,-0.164913,-0.986308,-0.146730,
--0.989177,-0.128498,-0.991710,-0.110222,-0.993907,-0.091909,-0.995767,-0.073565,
--0.997290,-0.055195,-0.998476,-0.036807,-0.999322,-0.018407,-0.999831
-
-};
-
-/*
-* @brief Initialization function for the floating point CFFT/CIFFT function.
-*
-* @param[in,out] *S points to an instance of the floating point CFFT/CIFFT function structure.
-* @param[in] fftLen length of the CFFT/CIFFT .
-* @param[in] ifft_flag Flag for the selection of CFFT or CIFFT
-* @return The function returns NE10_OK if initialization was successful or NE10_ERR if
-* <code>fftLen</code> is not a supported value.
-*
-* The function inialises the Twiddle factors table and bit reverse table
-*/
-
-ne10_result_t ne10_cfft_radix4_init_float(
- ne10_cfft_radix4_instance_f32_t * S,
- ne10_uint16_t fftLen,
- ne10_uint8_t ifftFlag)
-{
- ne10_uint32_t i,j;
- /* Initialise the default arm status */
- ne10_result_t status = NE10_OK;
-
- /* Initialise the FFT length */
- S->fft_len = fftLen;
-
- /* Initialise the twiddle coef modifier value */
- S->twid_coef_modifier = 1u;
-
- /* Initialise the Flag for selection of CFFT or CIFFT */
- S->ifft_flag = ifftFlag;
-
- /* Initializations of structure parameters depending on the FFT length */
- switch (S->fft_len)
- {
-
- case 1024u:
- /* Initializations of structure parameters for 1024 point FFT */
-
- /* Initialise the Twiddle coefficient pointer */
- S->p_twiddle = (ne10_float32_t *) cfft_twiddle_coef;
-
- /* Initialise the bit reversal table modifier */
- S->bit_rev_factor = 1u;
- /* Initialise the 1/N Value */
- S->one_by_fft_len = 0.0009765625f;
- break;
-
-
- case 256u:
- /* Initializations of structure parameters for 256 point FFT */
-
- /* Initialise the Twiddle coefficient pointer */
- S->p_twiddle = (ne10_float32_t *) cfft_twiddle_coef;
- S->bit_rev_factor = 4u;
- S->one_by_fft_len = 0.00390625f;
- break;
-
- case 64u:
- /* Initializations of structure parameters for 64 point FFT */
- /* Initialise the Twiddle coefficient pointer */
- S->p_twiddle = (ne10_float32_t *) cfft_twiddle_coef;
- S->bit_rev_factor = 16u;
- S->one_by_fft_len = 0.015625f;
- break;
-
- case 16u:
- /* Initializations of structure parameters for 16 point FFT */
-
- /* Initialise the Twiddle coefficient pointer */
- S->p_twiddle = (ne10_float32_t *) cfft_twiddle_coef;
-
- S->bit_rev_factor = 64u;
- S->one_by_fft_len = 0.0625f;
- break;
-
-
- default:
- /* Reporting argument error if fftSize is not valid value */
- status = NE10_ERR;
- break;
- }
- return status;
-}
-
/*common fft functions */
/*common functions for float fft */
- extern void ne10_data_bitreversal_float32 (ne10_fft_cpx_float32_t * Fout,
- const ne10_fft_cpx_float32_t * f,
- ne10_int32_t fstride,
- ne10_int32_t * factors);
extern void ne10_fft_split_r2c_1d_float32 (ne10_fft_cpx_float32_t *dst,
const ne10_fft_cpx_float32_t *src,
ne10_fft_cpx_float32_t *twiddles,
const ne10_fft_cpx_float32_t *src,
ne10_fft_cpx_float32_t *twiddles,
ne10_int32_t ncfft);
- extern void ne10_radix4_butterfly_forward_float32_neon (ne10_fft_cpx_float32_t * Fout,
+ extern void ne10_mixed_radix_fft_forward_float32_neon (ne10_fft_cpx_float32_t * Fout,
+ ne10_fft_cpx_float32_t * Fin,
ne10_int32_t * factors,
ne10_fft_cpx_float32_t * twiddles)
- asm ("ne10_radix4_butterfly_forward_float32_neon");
- extern void ne10_radix4_butterfly_backward_float32_neon (ne10_fft_cpx_float32_t * Fout,
- ne10_int32_t * factors,
- ne10_fft_cpx_float32_t * twiddles)
- asm ("ne10_radix4_butterfly_backward_float32_neon");
+ asm ("ne10_mixed_radix_fft_forward_float32_neon");
- extern void ne10_radix2_butterfly_forward_float32_neon (ne10_fft_cpx_float32_t * Fout,
+ extern void ne10_mixed_radix_fft_backward_float32_neon (ne10_fft_cpx_float32_t * Fout,
+ ne10_fft_cpx_float32_t * Fin,
ne10_int32_t * factors,
ne10_fft_cpx_float32_t * twiddles)
- asm ("ne10_radix2_butterfly_forward_float32_neon");
- extern void ne10_radix2_butterfly_backward_float32_neon (ne10_fft_cpx_float32_t * Fout,
- ne10_int32_t * factors,
- ne10_fft_cpx_float32_t * twiddles)
- asm ("ne10_radix2_butterfly_backward_float32_neon");
-
- extern void ne10_mixed_radix_butterfly_length_even_power2_float32_neon (ne10_fft_cpx_float32_t * Fout,
- ne10_int32_t * factors,
- ne10_fft_cpx_float32_t * twiddles)
- asm ("ne10_mixed_radix_butterfly_length_even_power2_float32_neon");
- extern void ne10_mixed_radix_butterfly_length_odd_power2_float32_neon (ne10_fft_cpx_float32_t * Fout,
- ne10_int32_t * factors,
- ne10_fft_cpx_float32_t * twiddles)
- asm ("ne10_mixed_radix_butterfly_length_odd_power2_float32_neon");
-
- extern void ne10_mixed_radix_butterfly_inverse_length_even_power2_float32_neon (ne10_fft_cpx_float32_t * Fout,
- ne10_int32_t * factors,
- ne10_fft_cpx_float32_t * twiddles)
- asm ("ne10_mixed_radix_butterfly_inverse_length_even_power2_float32_neon");
- extern void ne10_mixed_radix_butterfly_inverse_length_odd_power2_float32_neon (ne10_fft_cpx_float32_t * Fout,
- ne10_int32_t * factors,
- ne10_fft_cpx_float32_t * twiddles)
- asm ("ne10_mixed_radix_butterfly_inverse_length_odd_power2_float32_neon");
+ asm ("ne10_mixed_radix_fft_backward_float32_neon");
/* common functions for fixed point fft */
/* bit reversal for int 16 */
ne10_fft_cpx_int32_t * twiddles)
asm ("ne10_radix2_butterfly_backward_int32_scaled_neon");
+
#ifdef __cplusplus
}
#endif
#include "NE10_fft.h"
static void ne10_mixed_radix_butterfly_float32_c (ne10_fft_cpx_float32_t * Fout,
+ ne10_fft_cpx_float32_t * Fin,
ne10_int32_t * factors,
ne10_fft_cpx_float32_t * twiddles)
{
- ne10_int32_t i, j, mstride;
+ ne10_int32_t fstride, mstride, N;
+ ne10_int32_t fstride1;
+ ne10_int32_t f_count, m_count;
ne10_int32_t stage_count;
- ne10_int32_t fstride;
- ne10_fft_cpx_float32_t tmp;
- ne10_fft_cpx_float32_t scratch[6];
- ne10_fft_cpx_float32_t *tw, *tw1, *tw2, *tw3;
- ne10_fft_cpx_float32_t * F;
+ ne10_fft_cpx_float32_t scratch_in[8];
+ ne10_fft_cpx_float32_t scratch_out[8];
+ ne10_fft_cpx_float32_t scratch[16];
+ ne10_fft_cpx_float32_t scratch_tw[6];
+ ne10_fft_cpx_float32_t *Fin1, *Fin2, *Fout1, *Fout2;
+ ne10_fft_cpx_float32_t *Fout_ls = Fout;
+ ne10_fft_cpx_float32_t *Ftmp;
+ ne10_fft_cpx_float32_t *tw, *tw1, *tw2;
+ const ne10_float32_t TW_81 = 0.70710678;
+ const ne10_float32_t TW_81N = -0.70710678;
- // the first stage
+ // init fstride, mstride, N
stage_count = factors[0];
fstride = factors[1];
- if (factors[2 * stage_count] == 2) // length of FFT is 2^n (n is odd)
+ mstride = factors[ (stage_count << 1) - 1 ];
+ N = factors[ stage_count << 1 ]; // radix
+
+ // the first stage
+ Fin1 = Fin;
+ Fout1 = Fout;
+ if (N == 2) // length of FFT is 2^n (n is odd)
{
- //fstride is nfft>>1
- for (i = 0; i < fstride; i++)
+ // radix 8
+ N = fstride >> 1; // 1/4 of length of FFT
+ tw = twiddles;
+ fstride1 = fstride >> 2;
+
+ Fin1 = Fin;
+ for (f_count = 0; f_count < fstride1; f_count ++)
{
- tmp.r = Fout[2 * i + 1].r;
- tmp.i = Fout[2 * i + 1].i;
- Fout[2 * i + 1].r = Fout[2 * i].r - tmp.r;
- Fout[2 * i + 1].i = Fout[2 * i].i - tmp.i;
- Fout[2 * i].r = Fout[2 * i].r + tmp.r;
- Fout[2 * i].i = Fout[2 * i].i + tmp.i;
- }
+ Fout1 = & Fout[ f_count * 8 ];
+ // load
+ scratch_tw[0] = tw[0];
+ scratch_tw[2] = tw[2];
+ scratch_tw[4] = tw[4];
+ scratch_tw[1] = tw[1];
+ scratch_tw[3] = tw[3];
+ scratch_tw[5] = tw[5];
+
+ scratch_in[0].r = Fin1[0].r + Fin1[0 + fstride].r;
+ scratch_in[0].i = Fin1[0].i + Fin1[0 + fstride].i;
+ scratch_in[1].r = Fin1[0].r - Fin1[0 + fstride].r;
+ scratch_in[1].i = Fin1[0].i - Fin1[0 + fstride].i;
+ scratch_in[2].r = Fin1[fstride1].r + Fin1[fstride1 + fstride].r;
+ scratch_in[2].i = Fin1[fstride1].i + Fin1[fstride1 + fstride].i;
+ scratch_in[3].r = Fin1[fstride1].r - Fin1[fstride1 + fstride].r;
+ scratch_in[3].i = Fin1[fstride1].i - Fin1[fstride1 + fstride].i;
+ scratch_in[4].r = Fin1[fstride1 * 2].r + Fin1[fstride1 * 2 + fstride].r;
+ scratch_in[4].i = Fin1[fstride1 * 2].i + Fin1[fstride1 * 2 + fstride].i;
+ scratch_in[5].r = Fin1[fstride1 * 2].r - Fin1[fstride1 * 2 + fstride].r;
+ scratch_in[5].i = Fin1[fstride1 * 2].i - Fin1[fstride1 * 2 + fstride].i;
+ scratch_in[6].r = Fin1[fstride1 * 3].r + Fin1[fstride1 * 3 + fstride].r;
+ scratch_in[6].i = Fin1[fstride1 * 3].i + Fin1[fstride1 * 3 + fstride].i;
+ scratch_in[7].r = Fin1[fstride1 * 3].r - Fin1[fstride1 * 3 + fstride].r;
+ scratch_in[7].i = Fin1[fstride1 * 3].i - Fin1[fstride1 * 3 + fstride].i;
+
+ // radix 4 butterfly without twiddles
+ scratch[0] = scratch_in[0];
+ scratch[1] = scratch_in[1];
+
+ scratch[2] = scratch_in[2];
+ scratch[3].r = (scratch_in[3].r + scratch_in[3].i) * TW_81;
+ scratch[3].i = (scratch_in[3].i - scratch_in[3].r) * TW_81;
+
+ scratch[4] = scratch_in[4];
+ scratch[5].r = scratch_in[5].i;
+ scratch[5].i = -scratch_in[5].r;
+
+ scratch[6].r = scratch_in[6].r;
+ scratch[6].i = scratch_in[6].i;
+ scratch[7].r = (scratch_in[7].r - scratch_in[7].i) * TW_81N;
+ scratch[7].i = (scratch_in[7].i + scratch_in[7].r) * TW_81N;
+
+ // radix 2 butterfly
+ scratch[8].r = scratch[0].r + scratch[4].r;
+ scratch[8].i = scratch[0].i + scratch[4].i;
+ scratch[9].r = scratch[1].r + scratch[5].r;
+ scratch[9].i = scratch[1].i + scratch[5].i;
+
+ scratch[10].r = scratch[0].r - scratch[4].r;
+ scratch[10].i = scratch[0].i - scratch[4].i;
+ scratch[11].r = scratch[1].r - scratch[5].r;
+ scratch[11].i = scratch[1].i - scratch[5].i;
+
+ // radix 2 butterfly
+ scratch[12].r = scratch[2].r + scratch[6].r;
+ scratch[12].i = scratch[2].i + scratch[6].i;
+ scratch[13].r = scratch[3].r + scratch[7].r;
+ scratch[13].i = scratch[3].i + scratch[7].i;
+
+ scratch[14].r = scratch[2].r - scratch[6].r;
+ scratch[14].i = scratch[2].i - scratch[6].i;
+ scratch[15].r = scratch[3].r - scratch[7].r;
+ scratch[15].i = scratch[3].i - scratch[7].i;
+
+ // third result
+ scratch_out[4].r = scratch[8].r - scratch[12].r;
+ scratch_out[4].i = scratch[8].i - scratch[12].i;
+ scratch_out[5].r = scratch[9].r - scratch[13].r;
+ scratch_out[5].i = scratch[9].i - scratch[13].i;
+
+ // first result
+ scratch_out[0].r = scratch[8].r + scratch[12].r;
+ scratch_out[0].i = scratch[8].i + scratch[12].i;
+ scratch_out[1].r = scratch[9].r + scratch[13].r;
+ scratch_out[1].i = scratch[9].i + scratch[13].i;
+
+ // second result
+ scratch_out[2].r = scratch[10].r + scratch[14].i;
+ scratch_out[2].i = scratch[10].i - scratch[14].r;
+ scratch_out[3].r = scratch[11].r + scratch[15].i;
+ scratch_out[3].i = scratch[11].i - scratch[15].r;
+
+ // forth result
+ scratch_out[6].r = scratch[10].r - scratch[14].i;
+ scratch_out[6].i = scratch[10].i + scratch[14].r;
+ scratch_out[7].r = scratch[11].r - scratch[15].i;
+ scratch_out[7].i = scratch[11].i + scratch[15].r;
+
+ // store
+ Fout1[0] = scratch_out[0];
+ Fout1[1] = scratch_out[1];
+ Fout1[2] = scratch_out[2];
+ Fout1[3] = scratch_out[3];
+ Fout1[4] = scratch_out[4];
+ Fout1[5] = scratch_out[5];
+ Fout1[6] = scratch_out[6];
+ Fout1[7] = scratch_out[7];
+
+ Fin1 += 1;
+ } // f_count
+ tw += 6;
+ mstride <<= 2;
+ fstride >>= 4;
+ stage_count -= 2;
+
+ // swap
+ Ftmp = Fin;
+ Fin = Fout;
+ Fout = Ftmp;
}
- else if (factors[2 * stage_count] == 4) // length of FFT is 2^n (n is even)
+ else if (N == 4) // length of FFT is 2^n (n is even)
{
//fstride is nfft>>2
- for (i = 0; i < fstride; i++)
+ for (f_count = fstride; f_count ; f_count --)
{
- scratch[2].r = Fout[4 * i].r - Fout[4 * i + 2].r;
- scratch[2].i = Fout[4 * i].i - Fout[4 * i + 2].i;
-
- Fout[4 * i].r += Fout[4 * i + 2].r;
- Fout[4 * i].i += Fout[4 * i + 2].i;
-
- scratch[0].r = Fout[4 * i + 1].r + Fout[4 * i + 3].r;
- scratch[0].i = Fout[4 * i + 1].i + Fout[4 * i + 3].i;
-
- scratch[1].r = Fout[4 * i + 1].r - Fout[4 * i + 3].r;
- scratch[1].i = Fout[4 * i + 1].i - Fout[4 * i + 3].i;
- Fout[4 * i + 2].r = Fout[4 * i].r - scratch[0].r;
- Fout[4 * i + 2].i = Fout[4 * i].i - scratch[0].i;
-
- Fout[4 * i].r += scratch[0].r;
- Fout[4 * i].i += scratch[0].i;
-
- Fout[4 * i + 1].r = scratch[2].r + scratch[1].i;
- Fout[4 * i + 1].i = scratch[2].i - scratch[1].r;
- Fout[4 * i + 3].r = scratch[2].r - scratch[1].i;
- Fout[4 * i + 3].i = scratch[2].i + scratch[1].r;
- }
+ // load
+ scratch_in[0] = *Fin1;
+ Fin2 = Fin1 + fstride;
+ scratch_in[1] = *Fin2;
+ Fin2 = Fin2 + fstride;
+ scratch_in[2] = *Fin2;
+ Fin2 = Fin2 + fstride;
+ scratch_in[3] = *Fin2;
+
+ // radix 4 butterfly without twiddles
+
+ // radix 2 butterfly
+ scratch[0].r = scratch_in[0].r + scratch_in[2].r;
+ scratch[0].i = scratch_in[0].i + scratch_in[2].i;
+
+ scratch[1].r = scratch_in[0].r - scratch_in[2].r;
+ scratch[1].i = scratch_in[0].i - scratch_in[2].i;
+
+ // radix 2 butterfly
+ scratch[2].r = scratch_in[1].r + scratch_in[3].r;
+ scratch[2].i = scratch_in[1].i + scratch_in[3].i;
+
+ scratch[3].r = scratch_in[1].r - scratch_in[3].r;
+ scratch[3].i = scratch_in[1].i - scratch_in[3].i;
+
+ // third result
+ scratch_out[2].r = scratch[0].r - scratch[2].r;
+ scratch_out[2].i = scratch[0].i - scratch[2].i;
+
+ // first result
+ scratch_out[0].r = scratch[0].r + scratch[2].r;
+ scratch_out[0].i = scratch[0].i + scratch[2].i;
+
+ // second result
+ scratch_out[1].r = scratch[1].r + scratch[3].i;
+ scratch_out[1].i = scratch[1].i - scratch[3].r;
+
+ // forth result
+ scratch_out[3].r = scratch[1].r - scratch[3].i;
+ scratch_out[3].i = scratch[1].i + scratch[3].r;
+
+ // store
+ * Fout1 ++ = scratch_out[0];
+ * Fout1 ++ = scratch_out[1];
+ * Fout1 ++ = scratch_out[2];
+ * Fout1 ++ = scratch_out[3];
+
+ Fin1++;
+ } // f_count
+
+ N = fstride; // 1/4 of length of FFT
+
+ // swap
+ Ftmp = Fin;
+ Fin = Fout;
+ Fout = Ftmp;
+
+ // update address for other stages
+ stage_count--;
+ tw = twiddles;
+ fstride >>= 2;
+ // end of first stage
}
- stage_count--;
- // other stages
- mstride = factors[2 * stage_count + 1];
- tw = twiddles;
- for (; stage_count > 0; stage_count--)
+
+ // others but the last one
+ for (; stage_count > 1 ; stage_count--)
{
- fstride = fstride >> 2;
- for (i = 0; i < fstride; i++)
+ Fin1 = Fin;
+ for (f_count = 0; f_count < fstride; f_count ++)
{
- F = &Fout[i * mstride * 4];
+ Fout1 = & Fout[ f_count * mstride << 2 ];
tw1 = tw;
- tw2 = tw + mstride;
- tw3 = tw + mstride * 2;
- for (j = 0; j < mstride; j++)
+ for (m_count = mstride; m_count ; m_count --)
{
- scratch[0].r = F[mstride].r * tw1->r - F[mstride].i * tw1->i;
- scratch[0].i = F[mstride].r * tw1->i + F[mstride].i * tw1->r;
- scratch[1].r = F[mstride * 2].r * tw2->r - F[mstride * 2].i * tw2->i;
- scratch[1].i = F[mstride * 2].r * tw2->i + F[mstride * 2].i * tw2->r;
- scratch[2].r = F[mstride * 3].r * tw3->r - F[mstride * 3].i * tw3->i;
- scratch[2].i = F[mstride * 3].r * tw3->i + F[mstride * 3].i * tw3->r;
-
- scratch[5].r = F->r - scratch[1].r;
- scratch[5].i = F->i - scratch[1].i;
- F->r += scratch[1].r;
- F->i += scratch[1].i;
-
- scratch[3].r = scratch[0].r + scratch[2].r;
- scratch[3].i = scratch[0].i + scratch[2].i;
- scratch[4].r = scratch[0].r - scratch[2].r;
- scratch[4].i = scratch[0].i - scratch[2].i;
-
- F[mstride * 2].r = F->r - scratch[3].r;
- F[mstride * 2].i = F->i - scratch[3].i;
- F->r += scratch[3].r;
- F->i += scratch[3].i;
-
- F[mstride].r = scratch[5].r + scratch[4].i;
- F[mstride].i = scratch[5].i - scratch[4].r;
- F[mstride * 3].r = scratch[5].r - scratch[4].i;
- F[mstride * 3].i = scratch[5].i + scratch[4].r;
+ // load
+ scratch_tw[0] = *tw1;
+ tw2 = tw1 + mstride;
+ scratch_tw[1] = *tw2;
+ tw2 += mstride;
+ scratch_tw[2] = *tw2;
+ scratch_in[0] = * Fin1;
+ Fin2 = Fin1 + N;
+ scratch_in[1] = * Fin2;
+ Fin2 += N;
+ scratch_in[2] = * Fin2;
+ Fin2 += N;
+ scratch_in[3] = * Fin2;
+
+ // radix 4 butterfly with twiddles
+
+ scratch[0] = scratch_in[0];
+ scratch[1].r = scratch_in[1].r * scratch_tw[0].r - scratch_in[1].i * scratch_tw[0].i;
+ scratch[1].i = scratch_in[1].i * scratch_tw[0].r + scratch_in[1].r * scratch_tw[0].i;
+
+ scratch[2].r = scratch_in[2].r * scratch_tw[1].r - scratch_in[2].i * scratch_tw[1].i;
+ scratch[2].i = scratch_in[2].i * scratch_tw[1].r + scratch_in[2].r * scratch_tw[1].i;
+
+ scratch[3].r = scratch_in[3].r * scratch_tw[2].r - scratch_in[3].i * scratch_tw[2].i;
+ scratch[3].i = scratch_in[3].i * scratch_tw[2].r + scratch_in[3].r * scratch_tw[2].i;
+
+ // radix 2 butterfly
+ scratch[4].r = scratch[0].r + scratch[2].r;
+ scratch[4].i = scratch[0].i + scratch[2].i;
+
+ scratch[5].r = scratch[0].r - scratch[2].r;
+ scratch[5].i = scratch[0].i - scratch[2].i;
+
+ // radix 2 butterfly
+ scratch[6].r = scratch[1].r + scratch[3].r;
+ scratch[6].i = scratch[1].i + scratch[3].i;
+
+ scratch[7].r = scratch[1].r - scratch[3].r;
+ scratch[7].i = scratch[1].i - scratch[3].i;
+
+ // third result
+ scratch_out[2].r = scratch[4].r - scratch[6].r;
+ scratch_out[2].i = scratch[4].i - scratch[6].i;
+
+ // first result
+ scratch_out[0].r = scratch[4].r + scratch[6].r;
+ scratch_out[0].i = scratch[4].i + scratch[6].i;
+
+ // second result
+ scratch_out[1].r = scratch[5].r + scratch[7].i;
+ scratch_out[1].i = scratch[5].i - scratch[7].r;
+
+ // forth result
+ scratch_out[3].r = scratch[5].r - scratch[7].i;
+ scratch_out[3].i = scratch[5].i + scratch[7].r;
+
+ // store
+ *Fout1 = scratch_out[0];
+ Fout2 = Fout1 + mstride;
+ *Fout2 = scratch_out[1];
+ Fout2 += mstride;
+ *Fout2 = scratch_out[2];
+ Fout2 += mstride;
+ *Fout2 = scratch_out[3];
tw1++;
- tw2++;
- tw3++;
- F++;
- }
- }
+ Fin1 ++;
+ Fout1 ++;
+ } // m_count
+ } // f_count
tw += mstride * 3;
mstride <<= 2;
- }
+ // swap
+ Ftmp = Fin;
+ Fin = Fout;
+ Fout = Ftmp;
+ fstride >>= 2;
+ } // stage_count
+
+ // the last one
+ if (stage_count)
+ {
+ Fin1 = Fin;
+ // if stage count is even, output to the input array
+ Fout1 = Fout_ls;
+ for (f_count = 0; f_count < fstride; f_count ++)
+ {
+ tw1 = tw;
+ for (m_count = mstride; m_count ; m_count --)
+ {
+ // load
+ scratch_tw[0] = *tw1;
+ tw2 = tw1 + mstride;
+ scratch_tw[1] = *tw2;
+ tw2 += mstride;
+ scratch_tw[2] = *tw2;
+ scratch_in[0] = * Fin1;
+ Fin2 = Fin1 + N;
+ scratch_in[1] = * Fin2;
+ Fin2 += N;
+ scratch_in[2] = * Fin2;
+ Fin2 += N;
+ scratch_in[3] = * Fin2;
+
+ // radix 4 butterfly with twiddles
+
+ scratch[0] = scratch_in[0];
+ scratch[1].r = scratch_in[1].r * scratch_tw[0].r - scratch_in[1].i * scratch_tw[0].i;
+ scratch[1].i = scratch_in[1].i * scratch_tw[0].r + scratch_in[1].r * scratch_tw[0].i;
+
+ scratch[2].r = scratch_in[2].r * scratch_tw[1].r - scratch_in[2].i * scratch_tw[1].i;
+ scratch[2].i = scratch_in[2].i * scratch_tw[1].r + scratch_in[2].r * scratch_tw[1].i;
+
+ scratch[3].r = scratch_in[3].r * scratch_tw[2].r - scratch_in[3].i * scratch_tw[2].i;
+ scratch[3].i = scratch_in[3].i * scratch_tw[2].r + scratch_in[3].r * scratch_tw[2].i;
+
+ // radix 2 butterfly
+ scratch[4].r = scratch[0].r + scratch[2].r;
+ scratch[4].i = scratch[0].i + scratch[2].i;
+
+ scratch[5].r = scratch[0].r - scratch[2].r;
+ scratch[5].i = scratch[0].i - scratch[2].i;
+
+ // radix 2 butterfly
+ scratch[6].r = scratch[1].r + scratch[3].r;
+ scratch[6].i = scratch[1].i + scratch[3].i;
+
+ scratch[7].r = scratch[1].r - scratch[3].r;
+ scratch[7].i = scratch[1].i - scratch[3].i;
+
+ // third result
+ scratch_out[2].r = scratch[4].r - scratch[6].r;
+ scratch_out[2].i = scratch[4].i - scratch[6].i;
+
+ // first result
+ scratch_out[0].r = scratch[4].r + scratch[6].r;
+ scratch_out[0].i = scratch[4].i + scratch[6].i;
+
+ // second result
+ scratch_out[1].r = scratch[5].r + scratch[7].i;
+ scratch_out[1].i = scratch[5].i - scratch[7].r;
+
+ // forth result
+ scratch_out[3].r = scratch[5].r - scratch[7].i;
+ scratch_out[3].i = scratch[5].i + scratch[7].r;
+
+ // store
+ *Fout1 = scratch_out[0];
+ Fout2 = Fout1 + N;
+ *Fout2 = scratch_out[1];
+ Fout2 += N;
+ *Fout2 = scratch_out[2];
+ Fout2 += N;
+ *Fout2 = scratch_out[3];
+
+ tw1 ++;
+ Fin1 ++;
+ Fout1 ++;
+ } // m_count
+ } // f_count
+ } // last stage
}
static void ne10_mixed_radix_butterfly_inverse_float32_c (ne10_fft_cpx_float32_t * Fout,
+ ne10_fft_cpx_float32_t * Fin,
ne10_int32_t * factors,
ne10_fft_cpx_float32_t * twiddles)
-
{
- ne10_int32_t i, j, mstride;
+ ne10_int32_t fstride, mstride, N;
+ ne10_int32_t fstride1;
+ ne10_int32_t f_count, m_count;
ne10_int32_t stage_count;
- ne10_int32_t fstride;
- ne10_fft_cpx_float32_t tmp;
- ne10_fft_cpx_float32_t scratch[6];
- ne10_fft_cpx_float32_t *tw, *tw1, *tw2, *tw3;
- ne10_fft_cpx_float32_t * F;
+ ne10_fft_cpx_float32_t scratch_in[8];
+ ne10_fft_cpx_float32_t scratch_out[8];
+ ne10_fft_cpx_float32_t scratch[16];
+ ne10_fft_cpx_float32_t scratch_tw[6];
+ ne10_fft_cpx_float32_t *Fin1, *Fin2, *Fout1, *Fout2;
+ ne10_fft_cpx_float32_t *Fout_ls = Fout;
+ ne10_fft_cpx_float32_t *Ftmp;
+ ne10_fft_cpx_float32_t *tw, *tw1, *tw2;
+ const ne10_float32_t TW_81 = 0.70710678;
+ const ne10_float32_t TW_81N = -0.70710678;
- // the first stage
+ // init fstride, mstride, N
stage_count = factors[0];
fstride = factors[1];
- if (factors[2 * stage_count] == 2) // length of FFT is 2^n (n is odd)
+ mstride = factors[ (stage_count << 1) - 1 ];
+ N = factors[ stage_count << 1 ]; // radix
+
+ // the first stage
+ Fin1 = Fin;
+ Fout1 = Fout;
+ if (N == 2) // length of FFT is 2^n (n is odd)
{
- //fstride is nfft>>1;
- for (i = 0; i < fstride; i++)
+ // radix 8
+ N = fstride >> 1; // 1/4 of length of FFT
+ tw = twiddles;
+ fstride1 = fstride >> 2;
+
+ Fin1 = Fin;
+ for (f_count = 0; f_count < fstride1; f_count ++)
{
- tmp.r = Fout[2 * i + 1].r;
- tmp.i = Fout[2 * i + 1].i;
- Fout[2 * i + 1].r = Fout[2 * i].r - tmp.r;
- Fout[2 * i + 1].i = Fout[2 * i].i - tmp.i;
- Fout[2 * i].r = Fout[2 * i].r + tmp.r;
- Fout[2 * i].i = Fout[2 * i].i + tmp.i;
- }
+ Fout1 = & Fout[ f_count * 8 ];
+ // load
+ scratch_tw[0] = tw[0];
+ scratch_tw[2] = tw[2];
+ scratch_tw[4] = tw[4];
+ scratch_tw[1] = tw[1];
+ scratch_tw[3] = tw[3];
+ scratch_tw[5] = tw[5];
+
+ scratch_in[0].r = Fin1[0].r + Fin1[0 + fstride].r;
+ scratch_in[0].i = Fin1[0].i + Fin1[0 + fstride].i;
+ scratch_in[1].r = Fin1[0].r - Fin1[0 + fstride].r;
+ scratch_in[1].i = Fin1[0].i - Fin1[0 + fstride].i;
+ scratch_in[2].r = Fin1[fstride1].r + Fin1[fstride1 + fstride].r;
+ scratch_in[2].i = Fin1[fstride1].i + Fin1[fstride1 + fstride].i;
+ scratch_in[3].r = Fin1[fstride1].r - Fin1[fstride1 + fstride].r;
+ scratch_in[3].i = Fin1[fstride1].i - Fin1[fstride1 + fstride].i;
+ scratch_in[4].r = Fin1[fstride1 * 2].r + Fin1[fstride1 * 2 + fstride].r;
+ scratch_in[4].i = Fin1[fstride1 * 2].i + Fin1[fstride1 * 2 + fstride].i;
+ scratch_in[5].r = Fin1[fstride1 * 2].r - Fin1[fstride1 * 2 + fstride].r;
+ scratch_in[5].i = Fin1[fstride1 * 2].i - Fin1[fstride1 * 2 + fstride].i;
+ scratch_in[6].r = Fin1[fstride1 * 3].r + Fin1[fstride1 * 3 + fstride].r;
+ scratch_in[6].i = Fin1[fstride1 * 3].i + Fin1[fstride1 * 3 + fstride].i;
+ scratch_in[7].r = Fin1[fstride1 * 3].r - Fin1[fstride1 * 3 + fstride].r;
+ scratch_in[7].i = Fin1[fstride1 * 3].i - Fin1[fstride1 * 3 + fstride].i;
+
+ // radix 4 butterfly with twiddles
+
+ scratch[0] = scratch_in[0];
+ scratch[1] = scratch_in[1];
+
+ scratch[2] = scratch_in[2];
+ scratch[3].r = (scratch_in[3].r - scratch_in[3].i) * TW_81;
+ scratch[3].i = (scratch_in[3].i + scratch_in[3].r) * TW_81;
+
+ scratch[4] = scratch_in[4];
+ scratch[5].r = -scratch_in[5].i;
+ scratch[5].i = scratch_in[5].r;
+
+ scratch[6].r = scratch_in[6].r;
+ scratch[6].i = scratch_in[6].i;
+ scratch[7].r = (scratch_in[7].r + scratch_in[7].i) * TW_81N;
+ scratch[7].i = (scratch_in[7].i - scratch_in[7].r) * TW_81N;
+
+ // radix 2 butterfly
+ scratch[8].r = scratch[0].r + scratch[4].r;
+ scratch[8].i = scratch[0].i + scratch[4].i;
+ scratch[9].r = scratch[1].r + scratch[5].r;
+ scratch[9].i = scratch[1].i + scratch[5].i;
+
+ scratch[10].r = scratch[0].r - scratch[4].r;
+ scratch[10].i = scratch[0].i - scratch[4].i;
+ scratch[11].r = scratch[1].r - scratch[5].r;
+ scratch[11].i = scratch[1].i - scratch[5].i;
+
+ // radix 2 butterfly
+ scratch[12].r = scratch[2].r + scratch[6].r;
+ scratch[12].i = scratch[2].i + scratch[6].i;
+ scratch[13].r = scratch[3].r + scratch[7].r;
+ scratch[13].i = scratch[3].i + scratch[7].i;
+
+ scratch[14].r = scratch[2].r - scratch[6].r;
+ scratch[14].i = scratch[2].i - scratch[6].i;
+ scratch[15].r = scratch[3].r - scratch[7].r;
+ scratch[15].i = scratch[3].i - scratch[7].i;
+
+ // third result
+ scratch_out[4].r = scratch[8].r - scratch[12].r;
+ scratch_out[4].i = scratch[8].i - scratch[12].i;
+ scratch_out[5].r = scratch[9].r - scratch[13].r;
+ scratch_out[5].i = scratch[9].i - scratch[13].i;
+
+ // first result
+ scratch_out[0].r = scratch[8].r + scratch[12].r;
+ scratch_out[0].i = scratch[8].i + scratch[12].i;
+ scratch_out[1].r = scratch[9].r + scratch[13].r;
+ scratch_out[1].i = scratch[9].i + scratch[13].i;
+
+ // second result
+ scratch_out[2].r = scratch[10].r - scratch[14].i;
+ scratch_out[2].i = scratch[10].i + scratch[14].r;
+ scratch_out[3].r = scratch[11].r - scratch[15].i;
+ scratch_out[3].i = scratch[11].i + scratch[15].r;
+
+ // forth result
+ scratch_out[6].r = scratch[10].r + scratch[14].i;
+ scratch_out[6].i = scratch[10].i - scratch[14].r;
+ scratch_out[7].r = scratch[11].r + scratch[15].i;
+ scratch_out[7].i = scratch[11].i - scratch[15].r;
+
+ // store
+ Fout1[0] = scratch_out[0];
+ Fout1[1] = scratch_out[1];
+ Fout1[2] = scratch_out[2];
+ Fout1[3] = scratch_out[3];
+ Fout1[4] = scratch_out[4];
+ Fout1[5] = scratch_out[5];
+ Fout1[6] = scratch_out[6];
+ Fout1[7] = scratch_out[7];
+
+ Fin1 += 1;
+ } // f_count
+ tw += 6;
+ mstride <<= 2;
+ fstride >>= 4;
+ stage_count -= 2;
+
+ // swap
+ Ftmp = Fin;
+ Fin = Fout;
+ Fout = Ftmp;
}
- else if (factors[2 * stage_count] == 4) // length of FFT is 2^n (n is even)
+ else if (N == 4) // length of FFT is 2^n (n is even)
{
//fstride is nfft>>2
- for (i = 0; i < fstride; i++)
+ for (f_count = fstride; f_count ; f_count --)
{
- scratch[2].r = Fout[4 * i].r - Fout[4 * i + 2].r;
- scratch[2].i = Fout[4 * i].i - Fout[4 * i + 2].i;
-
- Fout[4 * i].r += Fout[4 * i + 2].r;
- Fout[4 * i].i += Fout[4 * i + 2].i;
-
- scratch[0].r = Fout[4 * i + 1].r + Fout[4 * i + 3].r;
- scratch[0].i = Fout[4 * i + 1].i + Fout[4 * i + 3].i;
-
- scratch[1].r = Fout[4 * i + 1].r - Fout[4 * i + 3].r;
- scratch[1].i = Fout[4 * i + 1].i - Fout[4 * i + 3].i;
- Fout[4 * i + 2].r = Fout[4 * i].r - scratch[0].r;
- Fout[4 * i + 2].i = Fout[4 * i].i - scratch[0].i;
-
- Fout[4 * i].r += scratch[0].r;
- Fout[4 * i].i += scratch[0].i;
-
- Fout[4 * i + 1].r = scratch[2].r - scratch[1].i;
- Fout[4 * i + 1].i = scratch[2].i + scratch[1].r;
- Fout[4 * i + 3].r = scratch[2].r + scratch[1].i;
- Fout[4 * i + 3].i = scratch[2].i - scratch[1].r;
- }
+ // load
+ scratch_in[0] = *Fin1;
+ Fin2 = Fin1 + fstride;
+ scratch_in[1] = *Fin2;
+ Fin2 = Fin2 + fstride;
+ scratch_in[2] = *Fin2;
+ Fin2 = Fin2 + fstride;
+ scratch_in[3] = *Fin2;
+
+ // radix 4 butterfly without twiddles
+
+ // radix 2 butterfly
+ scratch[0].r = scratch_in[0].r + scratch_in[2].r;
+ scratch[0].i = scratch_in[0].i + scratch_in[2].i;
+
+ scratch[1].r = scratch_in[0].r - scratch_in[2].r;
+ scratch[1].i = scratch_in[0].i - scratch_in[2].i;
+
+ // radix 2 butterfly
+ scratch[2].r = scratch_in[1].r + scratch_in[3].r;
+ scratch[2].i = scratch_in[1].i + scratch_in[3].i;
+
+ scratch[3].r = scratch_in[1].r - scratch_in[3].r;
+ scratch[3].i = scratch_in[1].i - scratch_in[3].i;
+
+ // third result
+ scratch_out[2].r = scratch[0].r - scratch[2].r;
+ scratch_out[2].i = scratch[0].i - scratch[2].i;
+
+ // first result
+ scratch_out[0].r = scratch[0].r + scratch[2].r;
+ scratch_out[0].i = scratch[0].i + scratch[2].i;
+
+ // second result
+ scratch_out[1].r = scratch[1].r - scratch[3].i;
+ scratch_out[1].i = scratch[1].i + scratch[3].r;
+
+ // forth result
+ scratch_out[3].r = scratch[1].r + scratch[3].i;
+ scratch_out[3].i = scratch[1].i - scratch[3].r;
+
+ // store
+ * Fout1 ++ = scratch_out[0];
+ * Fout1 ++ = scratch_out[1];
+ * Fout1 ++ = scratch_out[2];
+ * Fout1 ++ = scratch_out[3];
+
+ Fin1++;
+ } // f_count
+
+ N = fstride; // 1/4 of length of FFT
+
+ // swap
+ Ftmp = Fin;
+ Fin = Fout;
+ Fout = Ftmp;
+
+ // update address for other stages
+ stage_count--;
+ tw = twiddles;
+ fstride >>= 2;
+ // end of first stage
}
- stage_count--;
- // other stages
- mstride = factors[2 * stage_count + 1];
- tw = twiddles;
- for (; stage_count > 0; stage_count--)
+
+ // others but the last one
+ for (; stage_count > 1 ; stage_count--)
{
- fstride = fstride >> 2;
- for (i = 0; i < fstride; i++)
+ Fin1 = Fin;
+ for (f_count = 0; f_count < fstride; f_count ++)
{
- F = &Fout[i * mstride * 4];
+ Fout1 = & Fout[ f_count * mstride << 2 ];
tw1 = tw;
- tw2 = tw + mstride;
- tw3 = tw + mstride * 2;
- for (j = 0; j < mstride; j++)
+ for (m_count = mstride; m_count ; m_count --)
{
- scratch[0].r = F[mstride].r * tw1->r + F[mstride].i * tw1->i;
- scratch[0].i = F[mstride].i * tw1->r - F[mstride].r * tw1->i;
- scratch[1].r = F[mstride * 2].r * tw2->r + F[mstride * 2].i * tw2->i;
- scratch[1].i = F[mstride * 2].i * tw2->r - F[mstride * 2].r * tw2->i;
- scratch[2].r = F[mstride * 3].r * tw3->r + F[mstride * 3].i * tw3->i;
- scratch[2].i = F[mstride * 3].i * tw3->r - F[mstride * 3].r * tw3->i;
-
- scratch[5].r = F->r - scratch[1].r;
- scratch[5].i = F->i - scratch[1].i;
- F->r += scratch[1].r;
- F->i += scratch[1].i;
-
- scratch[3].r = scratch[0].r + scratch[2].r;
- scratch[3].i = scratch[0].i + scratch[2].i;
- scratch[4].r = scratch[0].r - scratch[2].r;
- scratch[4].i = scratch[0].i - scratch[2].i;
-
- F[mstride * 2].r = F->r - scratch[3].r;
- F[mstride * 2].i = F->i - scratch[3].i;
- F->r += scratch[3].r;
- F->i += scratch[3].i;
-
- F[mstride].r = scratch[5].r - scratch[4].i;
- F[mstride].i = scratch[5].i + scratch[4].r;
- F[mstride * 3].r = scratch[5].r + scratch[4].i;
- F[mstride * 3].i = scratch[5].i - scratch[4].r;
+ // load
+ scratch_tw[0] = *tw1;
+ tw2 = tw1 + mstride;
+ scratch_tw[1] = *tw2;
+ tw2 += mstride;
+ scratch_tw[2] = *tw2;
+ scratch_in[0] = * Fin1;
+ Fin2 = Fin1 + N;
+ scratch_in[1] = * Fin2;
+ Fin2 += N;
+ scratch_in[2] = * Fin2;
+ Fin2 += N;
+ scratch_in[3] = * Fin2;
+
+ // radix 4 butterfly with twiddles
+
+ scratch[0] = scratch_in[0];
+ scratch[1].r = scratch_in[1].r * scratch_tw[0].r + scratch_in[1].i * scratch_tw[0].i;
+ scratch[1].i = scratch_in[1].i * scratch_tw[0].r - scratch_in[1].r * scratch_tw[0].i;
+
+ scratch[2].r = scratch_in[2].r * scratch_tw[1].r + scratch_in[2].i * scratch_tw[1].i;
+ scratch[2].i = scratch_in[2].i * scratch_tw[1].r - scratch_in[2].r * scratch_tw[1].i;
+
+ scratch[3].r = scratch_in[3].r * scratch_tw[2].r + scratch_in[3].i * scratch_tw[2].i;
+ scratch[3].i = scratch_in[3].i * scratch_tw[2].r - scratch_in[3].r * scratch_tw[2].i;
+
+ // radix 2 butterfly
+ scratch[4].r = scratch[0].r + scratch[2].r;
+ scratch[4].i = scratch[0].i + scratch[2].i;
+
+ scratch[5].r = scratch[0].r - scratch[2].r;
+ scratch[5].i = scratch[0].i - scratch[2].i;
+
+ // radix 2 butterfly
+ scratch[6].r = scratch[1].r + scratch[3].r;
+ scratch[6].i = scratch[1].i + scratch[3].i;
+
+ scratch[7].r = scratch[1].r - scratch[3].r;
+ scratch[7].i = scratch[1].i - scratch[3].i;
+
+ // third result
+ scratch_out[2].r = scratch[4].r - scratch[6].r;
+ scratch_out[2].i = scratch[4].i - scratch[6].i;
+
+ // first result
+ scratch_out[0].r = scratch[4].r + scratch[6].r;
+ scratch_out[0].i = scratch[4].i + scratch[6].i;
+
+ // second result
+ scratch_out[1].r = scratch[5].r - scratch[7].i;
+ scratch_out[1].i = scratch[5].i + scratch[7].r;
+
+ // forth result
+ scratch_out[3].r = scratch[5].r + scratch[7].i;
+ scratch_out[3].i = scratch[5].i - scratch[7].r;
+
+ // store
+ *Fout1 = scratch_out[0];
+ Fout2 = Fout1 + mstride;
+ *Fout2 = scratch_out[1];
+ Fout2 += mstride;
+ *Fout2 = scratch_out[2];
+ Fout2 += mstride;
+ *Fout2 = scratch_out[3];
tw1++;
- tw2++;
- tw3++;
- F++;
- }
- }
+ Fin1 ++;
+ Fout1 ++;
+ } // m_count
+ } // f_count
tw += mstride * 3;
mstride <<= 2;
- }
+ // swap
+ Ftmp = Fin;
+ Fin = Fout;
+ Fout = Ftmp;
+ fstride >>= 2;
+ } // stage_count
+
+ // the last one
+ if (stage_count)
+ {
+ Fin1 = Fin;
+ // if stage count is even, output to the input array
+ Fout1 = Fout_ls;
+
+ for (f_count = 0; f_count < fstride; f_count ++)
+ {
+ tw1 = tw;
+ for (m_count = mstride; m_count ; m_count --)
+ {
+ // load
+ scratch_tw[0] = *tw1;
+ tw2 = tw1 + mstride;
+ scratch_tw[1] = *tw2;
+ tw2 += mstride;
+ scratch_tw[2] = *tw2;
+ scratch_in[0] = * Fin1;
+ Fin2 = Fin1 + N;
+ scratch_in[1] = * Fin2;
+ Fin2 += N;
+ scratch_in[2] = * Fin2;
+ Fin2 += N;
+ scratch_in[3] = * Fin2;
+
+ // radix 4 butterfly with twiddles
+
+ scratch[0] = scratch_in[0];
+ scratch[1].r = scratch_in[1].r * scratch_tw[0].r + scratch_in[1].i * scratch_tw[0].i;
+ scratch[1].i = scratch_in[1].i * scratch_tw[0].r - scratch_in[1].r * scratch_tw[0].i;
+
+ scratch[2].r = scratch_in[2].r * scratch_tw[1].r + scratch_in[2].i * scratch_tw[1].i;
+ scratch[2].i = scratch_in[2].i * scratch_tw[1].r - scratch_in[2].r * scratch_tw[1].i;
+
+ scratch[3].r = scratch_in[3].r * scratch_tw[2].r + scratch_in[3].i * scratch_tw[2].i;
+ scratch[3].i = scratch_in[3].i * scratch_tw[2].r - scratch_in[3].r * scratch_tw[2].i;
+
+ // radix 2 butterfly
+ scratch[4].r = scratch[0].r + scratch[2].r;
+ scratch[4].i = scratch[0].i + scratch[2].i;
+
+ scratch[5].r = scratch[0].r - scratch[2].r;
+ scratch[5].i = scratch[0].i - scratch[2].i;
+
+ // radix 2 butterfly
+ scratch[6].r = scratch[1].r + scratch[3].r;
+ scratch[6].i = scratch[1].i + scratch[3].i;
+
+ scratch[7].r = scratch[1].r - scratch[3].r;
+ scratch[7].i = scratch[1].i - scratch[3].i;
+
+ // third result
+ scratch_out[2].r = scratch[4].r - scratch[6].r;
+ scratch_out[2].i = scratch[4].i - scratch[6].i;
+
+ // first result
+ scratch_out[0].r = scratch[4].r + scratch[6].r;
+ scratch_out[0].i = scratch[4].i + scratch[6].i;
+
+ // second result
+ scratch_out[1].r = scratch[5].r - scratch[7].i;
+ scratch_out[1].i = scratch[5].i + scratch[7].r;
+
+ // forth result
+ scratch_out[3].r = scratch[5].r + scratch[7].i;
+ scratch_out[3].i = scratch[5].i - scratch[7].r;
+
+ // store
+ *Fout1 = scratch_out[0];
+ Fout2 = Fout1 + N;
+ *Fout2 = scratch_out[1];
+ Fout2 += N;
+ *Fout2 = scratch_out[2];
+ Fout2 += N;
+ *Fout2 = scratch_out[3];
+
+ tw1 ++;
+ Fin1 ++;
+ Fout1 ++;
+ } // m_count
+ } // f_count
+ } // last stage
}
/* factors buffer:
return NE10_OK;
}
-void ne10_data_bitreversal_float32 (ne10_fft_cpx_float32_t * Fout,
- const ne10_fft_cpx_float32_t * f,
- ne10_int32_t fstride,
- ne10_int32_t * factors)
-{
- const ne10_int32_t p = *factors++; /* the radix */
- const ne10_int32_t m = *factors++; /* stage's fft length/p */
- const ne10_fft_cpx_float32_t * Fout_end = Fout + p * m;
- if (m == 1)
- {
- do
- {
- *Fout = *f;
- f += fstride;
- }
- while (++Fout != Fout_end);
- }
- else
- {
- do
- {
- ne10_data_bitreversal_float32 (Fout, f, fstride * p, factors);
- f += fstride;
- }
- while ( (Fout += m) != Fout_end);
- }
-
-}
void ne10_fft_split_r2c_1d_float32 (ne10_fft_cpx_float32_t *dst,
const ne10_fft_cpx_float32_t *src,
ne10_int32_t nfft,
ne10_int32_t inverse_fft)
{
- // copy the data from input to output and bit reversal
- ne10_data_bitreversal_float32 (fout, fin, 1, &factors[2]);
if (inverse_fft)
- ne10_mixed_radix_butterfly_inverse_float32_c (fout, factors, twiddles);
+ ne10_mixed_radix_butterfly_inverse_float32_c (fout, fin, factors, twiddles);
else
- ne10_mixed_radix_butterfly_float32_c (fout, factors, twiddles);
+ ne10_mixed_radix_butterfly_float32_c (fout, fin, factors, twiddles);
}
+
/**
* @}
*/ //end of C2C_FFT_IFFT group
Fout[6].r = t0_r - t3_i;
Fout[6].i = t0_i + t3_r;
- t4_r = (s3_r + s3_i) * TW_81;
- t4_i = -(s3_r - s3_i) * TW_81;
- t5_r = (s7_r - s7_i) * TW_81;
- t5_i = (s7_r + s7_i) * TW_81;
+ t4_r = (s3_r + s3_i) * TW_81;
+ t4_i = - (s3_r - s3_i) * TW_81;
+ t5_r = (s7_r - s7_i) * TW_81;
+ t5_i = (s7_r + s7_i) * TW_81;
t0_r = s1_r - s5_i;
t0_i = s1_i + s5_r;
t4_r = (s3_r - s3_i) * TW_81;
t4_i = (s3_r + s3_i) * TW_81;
t5_r = (s7_r + s7_i) * TW_81;
- t5_i = -(s7_r - s7_i) * TW_81;
+ t5_i = - (s7_r - s7_i) * TW_81;
t0_r = s1_r + s5_i;
t0_i = s1_i - s5_r;
Fout[7].i = t0_i - t3_r;
}
-static inline ne10_data_bitreversal_butterfly4_forward_float32 (ne10_fft_cpx_float32_t * out,
- ne10_fft_cpx_float32_t * in,
- ne10_int32_t fstride,
- ne10_int32_t stride1)
-{
- ne10_float32_t t0_r, t0_i, t1_r, t1_i, t2_r, t2_i, t3_r, t3_i, t4_r, t4_i, t5_r, t5_i;
- t2_r = in[0].r - in[fstride * 2].r;
- t2_i = in[0].i - in[fstride * 2].i;
- t3_r = in[0].r + in[fstride * 2].r;
- t3_i = in[0].i + in[fstride * 2].i;
- t0_r = in[fstride].r + in[fstride * 3].r;
- t0_i = in[fstride].i + in[fstride * 3].i;
- t1_r = in[fstride].r - in[fstride * 3].r;
- t1_i = in[fstride].i - in[fstride * 3].i;
- out[2].r = t3_r - t0_r;
- out[2].i = t3_i - t0_i;
- out[0].r = t3_r + t0_r;
- out[0].i = t3_i + t0_i;
- out[1].r = t2_r + t1_i;
- out[1].i = t2_i - t1_r;
- out[3].r = t2_r - t1_i;
- out[3].i = t2_i + t1_r;
-}
-
-static inline ne10_data_bitreversal_butterfly4_backward_float32 (ne10_fft_cpx_float32_t * out,
- ne10_fft_cpx_float32_t * in,
- ne10_int32_t fstride,
- ne10_int32_t stride1)
-{
- ne10_float32_t t0_r, t0_i, t1_r, t1_i, t2_r, t2_i, t3_r, t3_i, t4_r, t4_i, t5_r, t5_i;
- t2_r = in[0].r - in[fstride * 2].r;
- t2_i = in[0].i - in[fstride * 2].i;
- t3_r = in[0].r + in[fstride * 2].r;
- t3_i = in[0].i + in[fstride * 2].i;
- t0_r = in[fstride].r + in[fstride * 3].r;
- t0_i = in[fstride].i + in[fstride * 3].i;
- t1_r = in[fstride].r - in[fstride * 3].r;
- t1_i = in[fstride].i - in[fstride * 3].i;
- out[2].r = t3_r - t0_r;
- out[2].i = t3_i - t0_i;
- out[0].r = t3_r + t0_r;
- out[0].i = t3_i + t0_i;
- out[1].r = t2_r - t1_i;
- out[1].i = t2_i + t1_r;
- out[3].r = t2_r + t1_i;
- out[3].i = t2_i - t1_r;
-}
-
-#define ne10_data_bitreversal64_butterfly4_float32(inverse) \
-static void ne10_data_bitreversal64_butterfly4_##inverse##_float32 (ne10_fft_cpx_float32_t * Fout, \
- ne10_fft_cpx_float32_t * Fin) \
-{ \
- ne10_int32_t i, p; \
- ne10_int32_t fstride; \
- ne10_int32_t stride1; \
- ne10_int32_t t0_r, t0_i, t1_r, t1_i, t2_r, t2_i, t3_r, t3_i, t4_r, t4_i, t5_r, t5_i; \
- ne10_fft_cpx_float32_t * F; \
- ne10_fft_cpx_float32_t * in; \
- fstride = 16; \
- F = Fout; \
- in = Fin; \
- stride1 = fstride >> 2; \
- for (i = 0; i < 4; i++) \
- { \
- in = &Fin[i]; \
- for (p = 0; p < 4; p++) \
- { \
- ne10_data_bitreversal_butterfly4_##inverse##_float32 (F, in, fstride, stride1); \
- in += stride1; \
- F += 4; \
- } \
- } \
-}
-
-#define ne10_data_bitreversal256_butterfly4_float32(inverse) \
-static void ne10_data_bitreversal256_butterfly4_##inverse##_float32 (ne10_fft_cpx_float32_t * Fout, \
- ne10_fft_cpx_float32_t * Fin) \
-{ \
- ne10_int32_t i, j, p; \
- ne10_int32_t fstride; \
- ne10_int32_t stride1; \
- ne10_int32_t stride2; \
- ne10_int32_t t0_r, t0_i, t1_r, t1_i, t2_r, t2_i, t3_r, t3_i, t4_r, t4_i, t5_r, t5_i; \
- ne10_fft_cpx_float32_t * F; \
- ne10_fft_cpx_float32_t * in; \
- fstride = 64; \
- F = Fout; \
- in = Fin; \
- stride1 = fstride >> 2; \
- stride2 = stride1 >> 2; \
- for (j = 0; j < 4; j++) \
- { \
- for (i = 0; i < 4; i++) \
- { \
- in = &Fin[j + i * stride2]; \
- for (p = 0; p < 4; p++) \
- { \
- ne10_data_bitreversal_butterfly4_##inverse##_float32 (F, in, fstride, stride1); \
- in += stride1; \
- F += 4; \
- } \
- } \
- } \
-}
-
-#define ne10_data_bitreversal1024_butterfly4_float32(inverse) \
-static void ne10_data_bitreversal1024_butterfly4_##inverse##_float32 (ne10_fft_cpx_float32_t * Fout, \
- ne10_fft_cpx_float32_t * Fin) \
-{ \
- ne10_int32_t i, j, k, p; \
- ne10_int32_t fstride; \
- ne10_int32_t stride1; \
- ne10_int32_t stride2; \
- ne10_int32_t stride3; \
- ne10_int32_t t0_r, t0_i, t1_r, t1_i, t2_r, t2_i, t3_r, t3_i, t4_r, t4_i, t5_r, t5_i; \
- ne10_fft_cpx_float32_t * F; \
- ne10_fft_cpx_float32_t * in; \
- fstride = 256; \
- F = Fout; \
- in = Fin; \
- stride1 = fstride >> 2; \
- stride2 = stride1 >> 2; \
- stride3 = stride2 >> 2; \
- for (k = 0; k < 4; k++) \
- { \
- for (j = 0; j < 4; j++) \
- { \
- for (i = 0; i < 4; i++) \
- { \
- in = &Fin[k + j * stride3 + i * stride2]; \
- for (p = 0; p < 4; p++) \
- { \
- ne10_data_bitreversal_butterfly4_##inverse##_float32 (F, in, fstride, stride1); \
- in += stride1; \
- F += 4; \
- } \
- } \
- } \
- } \
-}
-
-#define ne10_data_bitreversal4096_butterfly4_float32(inverse) \
-static void ne10_data_bitreversal4096_butterfly4_##inverse##_float32 (ne10_fft_cpx_float32_t * Fout, \
- ne10_fft_cpx_float32_t * Fin) \
-{ \
- ne10_int32_t i, j, k, l, p; \
- ne10_int32_t fstride; \
- ne10_int32_t stride1; \
- ne10_int32_t stride2; \
- ne10_int32_t stride3; \
- ne10_int32_t stride4; \
- ne10_int32_t t0_r, t0_i, t1_r, t1_i, t2_r, t2_i, t3_r, t3_i, t4_r, t4_i, t5_r, t5_i; \
- ne10_fft_cpx_float32_t * F; \
- ne10_fft_cpx_float32_t * in; \
- fstride = 1024; \
- F = Fout; \
- in = Fin; \
- stride1 = fstride >> 2; \
- stride2 = stride1 >> 2; \
- stride3 = stride2 >> 2; \
- stride4 = stride3 >> 2; \
- for (l = 0; l < 4; l++) \
- { \
- for (k = 0; k < 4; k++) \
- { \
- for (j = 0; j < 4; j++) \
- { \
- for (i = 0; i < 4; i++) \
- { \
- in = &Fin[l + k*stride4 + j * stride3 + i * stride2]; \
- for (p = 0; p < 4; p++) \
- { \
- ne10_data_bitreversal_butterfly4_##inverse##_float32 (F, in, fstride, stride1); \
- in += stride1; \
- F += 4; \
- } \
- } \
- } \
- } \
- } \
-}
-
-#define ne10_butterfly_length_even_power2_float32_neon(inverse) \
-static void ne10_butterfly_##inverse##_length_even_power2_float32_neon (ne10_fft_cpx_float32_t * Fout, \
- ne10_fft_cpx_float32_t * Fin, \
- ne10_int32_t * factors, \
- ne10_fft_cpx_float32_t * twiddles) \
-{ \
- ne10_int32_t fstride = factors[1]; \
- if (fstride == 16) \
- ne10_data_bitreversal64_butterfly4_##inverse##_float32 (Fout, Fin); \
- else if (fstride == 64) \
- ne10_data_bitreversal256_butterfly4_##inverse##_float32 (Fout, Fin); \
- else if (fstride == 256) \
- ne10_data_bitreversal1024_butterfly4_##inverse##_float32 (Fout, Fin); \
- else if (fstride == 1024) \
- ne10_data_bitreversal4096_butterfly4_##inverse##_float32 (Fout, Fin); \
- ne10_radix4_butterfly_##inverse##_float32_neon (Fout, factors, twiddles);\
-}
-
-ne10_data_bitreversal64_butterfly4_float32 (forward)
-ne10_data_bitreversal64_butterfly4_float32 (backward)
-ne10_data_bitreversal256_butterfly4_float32 (forward)
-ne10_data_bitreversal256_butterfly4_float32 (backward)
-ne10_data_bitreversal1024_butterfly4_float32 (forward)
-ne10_data_bitreversal1024_butterfly4_float32 (backward)
-ne10_data_bitreversal4096_butterfly4_float32 (forward)
-ne10_data_bitreversal4096_butterfly4_float32 (backward)
-
-ne10_butterfly_length_even_power2_float32_neon (forward)
-ne10_butterfly_length_even_power2_float32_neon (backward)
-
-static inline ne10_data_bitreversal_butterfly2_float32_neon (ne10_fft_cpx_float32_t * out,
- ne10_fft_cpx_float32_t * in,
- ne10_int32_t fstride,
- ne10_int32_t stride1)
-{
- float32x2_t d_in0_0, d_in0_1;
- float32x2_t d_in1_0, d_in1_1;
- float32x2_t d_in2_0, d_in2_1;
- float32x2_t d_in3_0, d_in3_1;
- float32x4_t q_in01_0, q_in01_1, q_in23_0, q_in23_1;
- float32x4_t q_out01_0, q_out01_1, q_out23_0, q_out23_1;
- /* load loop */
- d_in0_0 = vld1_f32 ( (float32_t*) (&in[0]));
- d_in0_1 = vld1_f32 ( (float32_t*) (&in[fstride]));
- d_in1_0 = vld1_f32 ( (float32_t*) (&in[stride1]));
- d_in1_1 = vld1_f32 ( (float32_t*) (&in[stride1 + fstride]));
- d_in2_0 = vld1_f32 ( (float32_t*) (&in[stride1 * 2]));
- d_in2_1 = vld1_f32 ( (float32_t*) (&in[stride1 * 2 + fstride]));
- d_in3_0 = vld1_f32 ( (float32_t*) (&in[stride1 * 3]));
- d_in3_1 = vld1_f32 ( (float32_t*) (&in[stride1 * 3 + fstride]));
- /* calculate loop */
- q_in01_0 = vcombine_f32 (d_in0_0, d_in1_0);
- q_in01_1 = vcombine_f32 (d_in0_1, d_in1_1);
- q_in23_0 = vcombine_f32 (d_in2_0, d_in3_0);
- q_in23_1 = vcombine_f32 (d_in2_1, d_in3_1);
- q_out01_0 = vaddq_f32 (q_in01_0, q_in01_1);
- q_out01_1 = vsubq_f32 (q_in01_0, q_in01_1);
- q_out23_0 = vaddq_f32 (q_in23_0, q_in23_1);
- q_out23_1 = vsubq_f32 (q_in23_0, q_in23_1);
- /* store loop */
- vst1q_f32 ( (float32_t*) (&out[0]), vcombine_f32 (vget_low_f32 (q_out01_0), vget_low_f32 (q_out01_1)));
- vst1q_f32 ( (float32_t*) (&out[2]), vcombine_f32 (vget_high_f32 (q_out01_0), vget_high_f32 (q_out01_1)));
- vst1q_f32 ( (float32_t*) (&out[4]), vcombine_f32 (vget_low_f32 (q_out23_0), vget_low_f32 (q_out23_1)));
- vst1q_f32 ( (float32_t*) (&out[6]), vcombine_f32 (vget_high_f32 (q_out23_0), vget_high_f32 (q_out23_1)));
-}
-
-static void ne10_data_bitreversal32_float32_neon (ne10_fft_cpx_float32_t * Fout,
- ne10_fft_cpx_float32_t * Fin)
-{
- ne10_int32_t i;
- ne10_int32_t fstride;
-
- ne10_fft_cpx_float32_t * F;
- ne10_fft_cpx_float32_t * in;
- ne10_int32_t stride1;
- ne10_int32_t stride2;
-
- // get the input, resort, calculate the first stage
- fstride = 16;
-
- F = Fout;
- in = Fin;
- stride1 = fstride >> 2;
- stride2 = stride1 >> 2;
- for (i = 0; i < 4; i++)
- {
- ne10_data_bitreversal_butterfly2_float32_neon (F, in, fstride, stride1);
- F += 8;
- in += stride2;
- }
-}
-
-static void ne10_data_bitreversal128_float32_neon (ne10_fft_cpx_float32_t * Fout,
- ne10_fft_cpx_float32_t * Fin)
-{
- ne10_int32_t i, j;
- ne10_int32_t fstride;
- ne10_int32_t stride1;
- ne10_int32_t stride2;
-
- ne10_fft_cpx_float32_t * F;
- ne10_fft_cpx_float32_t * in;
-
- // get the input, resort, calculate the first stage
- fstride = 64;
- F = Fout;
- stride1 = fstride >> 2;
- stride2 = stride1 >> 2;
- for (j = 0; j < 4; j++)
- {
- in = &Fin[j];
- for (i = 0; i < 4; i++)
- {
- ne10_data_bitreversal_butterfly2_float32_neon (F, in, fstride, stride1);
- F += 8;
- in += stride2;
- }
- }
-}
-
-
-static void ne10_data_bitreversal512_float32_neon (ne10_fft_cpx_float32_t * Fout,
- ne10_fft_cpx_float32_t * Fin)
-{
- ne10_int32_t i, j, k;
- ne10_int32_t fstride;
- ne10_int32_t stride1;
- ne10_int32_t stride2;
- ne10_int32_t stride3;
-
- ne10_fft_cpx_float32_t * F;
- ne10_fft_cpx_float32_t * in;
-
- // get the input, resort, calculate the first stage
- fstride = 256;
-
- F = Fout;
- stride1 = fstride >> 2;
- stride2 = stride1 >> 2;
- stride3 = stride2 >> 2;
- for (k = 0; k < 4; k++)
- {
- for (j = 0; j < 4; j++)
- {
- in = &Fin[k + j * stride3];
- for (i = 0; i < 4; i++)
- {
- ne10_data_bitreversal_butterfly2_float32_neon (F, in, fstride, stride1);
- F += 8;
- in += stride2;
- }
- }
- }
-}
-static void ne10_data_bitreversal2048_float32_neon (ne10_fft_cpx_float32_t * Fout,
- ne10_fft_cpx_float32_t * Fin)
-{
- ne10_int32_t i, j, k, l;
- ne10_int32_t fstride;
- ne10_int32_t stride1;
- ne10_int32_t stride2;
- ne10_int32_t stride3;
- ne10_int32_t stride4;
-
- ne10_fft_cpx_float32_t * F;
- ne10_fft_cpx_float32_t * in;
-
- // get the input, resort, calculate the first stage
- fstride = 1024;
-
- F = Fout;
- stride1 = fstride >> 2;
- stride2 = stride1 >> 2;
- stride3 = stride2 >> 2;
- stride4 = stride3 >> 2;
- for (l = 0; l < 4; l++)
- {
- for (k = 0; k < 4; k++)
- {
- for (j = 0; j < 4; j++)
- {
- in = &Fin[l + k * stride4 + j * stride3];
- for (i = 0; i < 4; i++)
- {
- ne10_data_bitreversal_butterfly2_float32_neon (F, in, fstride, stride1);
- F += 8;
- in += stride2;
- }
- }
- }
- }
-}
-
-
-#define ne10_butterfly_length_odd_power2_float32_neon(inverse) \
-static void ne10_butterfly_##inverse##_length_odd_power2_float32_neon (ne10_fft_cpx_float32_t * Fout, \
- ne10_fft_cpx_float32_t * Fin, \
- ne10_int32_t * factors, \
- ne10_fft_cpx_float32_t * twiddles) \
-{ \
- ne10_int32_t fstride = factors[1]; \
- ne10_int32_t i; \
- if (fstride == 16) \
- ne10_data_bitreversal32_float32_neon (Fout, Fin); \
- else if (fstride == 64) \
- ne10_data_bitreversal128_float32_neon (Fout, Fin); \
- else if (fstride == 256) \
- ne10_data_bitreversal512_float32_neon (Fout, Fin); \
- else if (fstride == 1024) \
- ne10_data_bitreversal2048_float32_neon (Fout, Fin); \
- ne10_radix2_butterfly_##inverse##_float32_neon (Fout, factors, twiddles); \
-}
-
-ne10_butterfly_length_odd_power2_float32_neon (forward)
-ne10_butterfly_length_odd_power2_float32_neon (backward)
-
static void ne10_fft16_forward_float32_neon (ne10_fft_cpx_float32_t * Fout,
ne10_fft_cpx_float32_t * Fin,
ne10_fft_cpx_float32_t * twiddles)
vst2q_f32 (p_dst3, q2_out_cdef);
}
-static void ne10_mixed_radix_butterfly_forward_float32_neon (ne10_fft_cpx_float32_t * Fout,
- ne10_int32_t * factors,
- ne10_fft_cpx_float32_t * twiddles)
-{
- ne10_int32_t stage_count;
-
- // the first stage
- stage_count = factors[0];
- if (factors[2 * stage_count] == 2)
- {
- //radix 2/4, FFT length is 2^n (n is odd)
- ne10_mixed_radix_butterfly_length_odd_power2_float32_neon (Fout, factors, twiddles);
- }
- else if (factors[2 * stage_count] == 4)
- {
- //radix 4, FFT length is 2^n (n is even)
- ne10_mixed_radix_butterfly_length_even_power2_float32_neon (Fout, factors, twiddles);
- }
-}
-
-static void ne10_mixed_radix_butterfly_backward_float32_neon (ne10_fft_cpx_float32_t * Fout,
- ne10_int32_t * factors,
- ne10_fft_cpx_float32_t * twiddles)
-{
- ne10_int32_t stage_count;
-
- stage_count = factors[0];
- if (factors[2 * stage_count] == 2)
- {
- //radix 2/4, FFT length is 2^n (n is odd)
- ne10_mixed_radix_butterfly_inverse_length_odd_power2_float32_neon (Fout, factors, twiddles);
- }
- else if (factors[2 * stage_count] == 4)
- {
- //radix 4, FFT length is 2^n (n is even)
- ne10_mixed_radix_butterfly_inverse_length_even_power2_float32_neon (Fout, factors, twiddles);
- }
-}
-
-
void ne10_fft_split_r2c_1d_float32_neon (ne10_fft_cpx_float32_t *dst,
const ne10_fft_cpx_float32_t *src,
ne10_fft_cpx_float32_t *twiddles,
* Otherwise, this FFT is an out-of-place algorithm. When you want to get an in-place FFT, it creates a temp buffer as
* output buffer and then copies the temp buffer back to input buffer. For the usage of this function, please check test/test_suite_fft_float32.c
*/
+
void ne10_fft_c2c_1d_float32_neon (ne10_fft_cpx_float32_t *fout,
ne10_fft_cpx_float32_t *fin,
ne10_fft_cpx_float32_t *twiddles,
case 16:
ne10_fft16_backward_float32_neon (fout, fin, twiddles);
break;
- case 32:
- case 128:
- case 512:
- case 2048:
- ne10_butterfly_backward_length_odd_power2_float32_neon (fout, fin, factors, twiddles);
- break;
- case 64:
- case 256:
- case 1024:
- case 4096:
- ne10_butterfly_backward_length_even_power2_float32_neon (fout, fin, factors, twiddles);
- break;
default:
- ne10_data_bitreversal_float32 (fout, fin, 1, &factors[2]);
- ne10_mixed_radix_butterfly_backward_float32_neon (fout, factors, twiddles);
+ ne10_mixed_radix_fft_backward_float32_neon (fout, fin, factors, twiddles);
break;
}
}
case 16:
ne10_fft16_forward_float32_neon (fout, fin, twiddles);
break;
- case 32:
- case 128:
- case 512:
- case 2048:
- ne10_butterfly_forward_length_odd_power2_float32_neon (fout, fin, factors, twiddles);
- break;
- case 64:
- case 256:
- case 1024:
- case 4096:
- ne10_butterfly_forward_length_even_power2_float32_neon (fout, fin, factors, twiddles);
- break;
default:
- ne10_data_bitreversal_float32 (fout, fin, 1, &factors[2]);
- ne10_mixed_radix_butterfly_forward_float32_neon (fout, factors, twiddles);
+ ne10_mixed_radix_fft_forward_float32_neon (fout, fin, factors, twiddles);
break;
}
}
/* Registers define*/
/*ARM Registers*/
p_fout .req r0
- p_factors .req r1
- p_twiddles .req r2
- p_fin .req r3
- p_fout0 .req r4
- p_fout1 .req r5
- p_fout2 .req r6
- p_fout3 .req r7
- stage_count .req r8
- fstride .req r9
- mstride .req r10
- count .req r1
- count_f .req r1
- count_m .req r12
- p_tw1 .req r3
- p_tw2 .req r11
- p_tw3 .req r14
- radix .req r5
- tmp0 .req r12
+ p_fin .req r1
+ p_factors .req r2
+ p_twiddles .req r3
+ stage_count .req r4
+ fstride .req r5
+ mstride .req r6
+
+ radix .req r12
+ p_fin0 .req r7
+ p_fin1 .req r8
+ p_fin2 .req r9
+ p_fin3 .req r10
+ p_tmp .req r11
+ count .req r2
+ fstride1 .req r2
+ fstep .req r7
+
+ p_out_ls .req r14
+ nstep .req r2
+ mstep .req r7
+ count_f .req r8
+ count_m .req r9
+ p_tw1 .req r10
+ p_in1 .req r11
+ p_out1 .req r12
+ tmp0 .req r9
/*NEON variale Declaration for the first stage*/
- d_in0_r01 .dn d0
- d_in0_i01 .dn d2
- d_in1_r01 .dn d4
- d_in1_i01 .dn d6
- d_in0_r23 .dn d1
- d_in0_i23 .dn d3
- d_in1_r23 .dn d5
- d_in1_i23 .dn d7
- q_in0_r0123 .qn q0
- q_in0_i0123 .qn q1
- q_in1_r0123 .qn q2
- q_in1_i0123 .qn q3
- d_out0_r01 .dn d16
- d_out0_i01 .dn d18
- d_out1_r01 .dn d20
- d_out1_i01 .dn d22
- d_out0_r23 .dn d17
- d_out0_i23 .dn d19
- d_out1_r23 .dn d21
- d_out1_i23 .dn d23
- q_out0_r0123 .qn q8
- q_out0_i0123 .qn q9
- q_out1_r0123 .qn q10
- q_out1_i0123 .qn q11
-
- d_in0_0 .dn d0
- d_in1_0 .dn d1
- d_in2_0 .dn d2
- d_in3_0 .dn d3
- d_in0_1 .dn d4
- d_in1_1 .dn d5
- d_in2_1 .dn d6
- d_in3_1 .dn d7
q_in0_01 .qn q0
- q_in1_01 .qn q2
- q_in2_01 .qn q1
+ q_in1_01 .qn q1
+ q_in2_01 .qn q2
q_in3_01 .qn q3
- d_out0_0 .dn d16
- d_out1_0 .dn d17
- d_out2_0 .dn d18
- d_out3_0 .dn d19
- d_out0_1 .dn d20
- d_out1_1 .dn d21
- d_out2_1 .dn d22
- d_out3_1 .dn d23
- q_out0_01 .qn q8
- q_out1_01 .qn q10
- q_out2_01 .qn q9
- q_out3_01 .qn q11
- d_s0 .dn d24
- q_s0_01 .qn q12
- d_s1 .dn d26
- q_s1_01 .qn q13
- d_s2 .dn d28
- q_s2_01 .qn q14
+ q_s0_2 .qn q4
+ q_s1_2 .qn q5
+ q_s2_2 .qn q6
+ q_s3_2 .qn q7
+ d_s1_r2 .dn d10
+ d_s1_i2 .dn d11
+ d_s3_r2 .dn d14
+ d_s3_i2 .dn d15
+ q_out0_2 .qn q8
+ q_out1_2 .qn q9
+ q_out2_2 .qn q10
+ q_out3_2 .qn q11
+ d_out1_r15 .dn d18
+ d_out1_i15 .dn d19
+ d_out3_r37 .dn d22
+ d_out3_i37 .dn d23
+
+ d_in0_r .dn d0
+ d_in0_i .dn d1
+ d_in1_r .dn d2
+ d_in1_i .dn d3
+ d_in2_r .dn d4
+ d_in2_i .dn d5
+ d_in3_r .dn d6
+ d_in3_i .dn d7
+ d_in4_r .dn d8
+ d_in4_i .dn d9
+ d_in5_r .dn d10
+ d_in5_i .dn d11
+ d_in6_r .dn d12
+ d_in6_i .dn d13
+ d_in7_r .dn d14
+ d_in7_i .dn d15
+ q_in0 .qn q0
+ q_in1 .qn q1
+ q_in2 .qn q2
+ q_in3 .qn q3
+ q_in4 .qn q4
+ q_in5 .qn q5
+ q_in6 .qn q6
+ q_in7 .qn q7
+ q_sin0 .qn q8
+ q_sin1 .qn q9
+ q_sin2 .qn q10
+ q_sin3 .qn q11
+ q_sin4 .qn q12
+ q_sin5 .qn q13
+ q_sin6 .qn q14
+ q_sin7 .qn q15
+ d_sin3_r .dn d22
+ d_sin3_i .dn d23
+ d_sin5_r .dn d26
+ d_sin5_i .dn d27
+ d_sin7_r .dn d30
+ d_sin7_i .dn d31
+
+ d_tw_twn .dn d0
+ d_s3_r .dn d2
+ d_s3_i .dn d3
+ d_s7_r .dn d4
+ d_s7_i .dn d5
+ q_s3 .qn q1
+ q_s7 .qn q2
+ q_s8 .qn q11
+ q_s9 .qn q15
+ q_s10 .qn q3
+ q_s11 .qn q4
+ q_s12 .qn q5
+ q_s13 .qn q6
+ q_s14 .qn q7
+ q_s15 .qn q0
+ q_out0 .qn q1
+ q_out1 .qn q2
+ q_out2 .qn q8
+ q_out3 .qn q9
+ q_out4 .qn q10
+ q_out5 .qn q12
+ q_out6 .qn q13
+ q_out7 .qn q14
+ d_s10_r .dn d6
+ d_s10_i .dn d7
+ d_s11_r .dn d8
+ d_s11_i .dn d9
+ d_s14_r .dn d14
+ d_s14_i .dn d15
+ d_s15_r .dn d0
+ d_s15_i .dn d1
+ d_out2_r .dn d16
+ d_out2_i .dn d17
+ d_out3_r .dn d18
+ d_out3_i .dn d19
+ d_out6_r .dn d26
+ d_out6_i .dn d27
+ d_out7_r .dn d28
+ d_out7_i .dn d29
- /*NEON variale Declaration for mstride loop */
- q_fin0_r .qn q0
- q_fin0_i .qn q1
- q_fin1_r .qn q0
- q_fin1_i .qn q1
- q_tw1_r .qn q2
- q_tw1_i .qn q3
- q_fin2_r .qn q8
- q_fin2_i .qn q9
- q_tw2_r .qn q10
- q_tw2_i .qn q11
- q_fin3_r .qn q4
- q_fin3_i .qn q5
- q_tw3_r .qn q6
- q_tw3_i .qn q7
- q_s0_r .qn q12
- q_s0_i .qn q13
- q_s1_r .qn q14
- q_s1_i .qn q15
- q_s2_r .qn q2
- q_s2_i .qn q10
- q_s5_r .qn q4
- q_s5_i .qn q5
- q_s4_r .qn q6
- q_s4_i .qn q7
- q_s3_r .qn q8
- q_s3_i .qn q9
- q_fout0_r .qn q0
- q_fout0_i .qn q1
- q_fout2_r .qn q2
- q_fout2_i .qn q3
- q_fout1_r .qn q12
- q_fout1_i .qn q13
- q_fout3_r .qn q14
- q_fout3_i .qn q15
-
- /*NEON variale Declaration for mstride 2 loop */
- d_tw1_r01 .dn d16
- d_tw2_r01 .dn d17
- d_tw1_i01 .dn d18
- d_tw2_i01 .dn d19
- d_tw3_r01 .dn d20
- d_tw3_i01 .dn d21
- q_fin0_r0123 .qn q0
- q_fin0_i0123 .qn q1
- d_fin0_r01 .dn d0
- d_fin1_r01 .dn d1
- d_fin0_i01 .dn d2
- d_fin1_i01 .dn d3
- d_fin2_r01 .dn d4
- d_fin3_r01 .dn d5
- d_fin2_i01 .dn d6
- d_fin3_i01 .dn d7
- d_fin0_r23 .dn d22
- d_fin1_r23 .dn d23
- d_fin0_i23 .dn d24
- d_fin1_i23 .dn d25
- d_fin2_r23 .dn d26
- d_fin3_r23 .dn d27
- d_fin2_i23 .dn d28
- d_fin3_i23 .dn d29
- q_s0_r0123 .qn q13
- q_s0_i0123 .qn q14
- d_s0_r01 .dn d26
- d_s0_r23 .dn d27
- d_s0_i01 .dn d28
- d_s0_i23 .dn d29
- q_s1_r0123 .qn q5
- q_s1_i0123 .qn q6
- d_s1_r01 .dn d10
- d_s1_r23 .dn d11
- d_s1_i01 .dn d12
- d_s1_i23 .dn d13
- q_s2_r0123 .qn q15
- q_s2_i0123 .qn q4
- d_s2_r01 .dn d30
- d_s2_r23 .dn d31
- d_s2_i01 .dn d8
- d_s2_i23 .dn d9
- q_s5_r0123 .qn q11
- q_s5_i0123 .qn q12
- q_s4_r0123 .qn q5
- q_s4_i0123 .qn q10
- q_s3_r0123 .qn q6
- q_s3_i0123 .qn q7
- q_fout0_r0123 .qn q0
- q_fout0_i0123 .qn q1
- q_fout2_r0123 .qn q2
- q_fout2_i0123 .qn q3
- q_fout1_r0123 .qn q13
- q_fout1_i0123 .qn q14
- q_fout3_r0123 .qn q6
- q_fout3_i0123 .qn q7
- d_fout0_r01 .dn d0
- d_fout1_r01 .dn d1
- d_fout0_i01 .dn d2
- d_fout1_i01 .dn d3
- d_fout2_r01 .dn d4
- d_fout3_r01 .dn d5
- d_fout2_i01 .dn d6
- d_fout3_i01 .dn d7
- d_fout0_r23 .dn d26
- d_fout1_r23 .dn d27
- d_fout0_i23 .dn d28
- d_fout1_i23 .dn d29
- d_fout2_r23 .dn d12
- d_fout3_r23 .dn d13
- d_fout2_i23 .dn d14
- d_fout3_i23 .dn d15
-
- d_tmp0 .dn d30
- d_tmp1 .dn d31
- q_tmp .qn q15
- d_tmp2_0 .dn d28
- d_tmp2_1 .dn d29
- q_tmp2 .qn q14
-
- .macro RADIX4_BUTTERFLY_P4
- vld2.32 {q_fin1_r, q_fin1_i}, [p_fout1]
- vld2.32 {q_tw1_r, q_tw1_i}, [p_tw1]!
- vld2.32 {q_fin2_r, q_fin2_i}, [p_fout2]
- vld2.32 {q_tw2_r, q_tw2_i}, [p_tw2]!
- vld2.32 {q_fin3_r, q_fin3_i}, [p_fout3]
- vld2.32 {q_tw3_r, q_tw3_i}, [p_tw3]!
-
- vmul.f32 q_s0_r, q_fin1_r, q_tw1_r
- vmul.f32 q_s0_i, q_fin1_i, q_tw1_r
- vmul.f32 q_s1_r, q_fin2_r, q_tw2_r
- vmul.f32 q_s1_i, q_fin2_i, q_tw2_r
- vmul.f32 q_s2_r, q_fin3_r, q_tw3_r
- vmul.f32 q_s2_i, q_fin3_i, q_tw3_r
- vmls.f32 q_s0_r, q_fin1_i, q_tw1_i
- vmla.f32 q_s0_i, q_fin1_r, q_tw1_i
- vmls.f32 q_s1_r, q_fin2_i, q_tw2_i
- vmla.f32 q_s1_i, q_fin2_r, q_tw2_i
- vld2.32 {q_fin0_r, q_fin0_i}, [p_fout0]
- vmls.f32 q_s2_r, q_fin3_i, q_tw3_i
- vmla.f32 q_s2_i, q_fin3_r, q_tw3_i
-
- vsub.f32 q_s5_r, q_fin0_r, q_s1_r
- vsub.f32 q_s5_i, q_fin0_i, q_s1_i
- vadd.f32 q_fout0_r, q_fin0_r, q_s1_r
- vadd.f32 q_fout0_i, q_fin0_i, q_s1_i
-
- vadd.f32 q_s3_r, q_s0_r, q_s2_r
- vadd.f32 q_s3_i, q_s0_i, q_s2_i
- vsub.f32 q_s4_r, q_s0_r, q_s2_r
- vsub.f32 q_s4_i, q_s0_i, q_s2_i
-
- vsub.f32 q_fout2_r, q_fout0_r, q_s3_r
- vsub.f32 q_fout2_i, q_fout0_i, q_s3_i
- vadd.f32 q_fout0_r, q_fout0_r, q_s3_r
- vadd.f32 q_fout0_i, q_fout0_i, q_s3_i
-
- vadd.f32 q_fout1_r, q_s5_r, q_s4_i
- vsub.f32 q_fout1_i, q_s5_i, q_s4_r
- vsub.f32 q_fout3_r, q_s5_r, q_s4_i
- vadd.f32 q_fout3_i, q_s5_i, q_s4_r
-
- vst2.32 {q_fout2_r, q_fout2_i}, [p_fout2]!
- vst2.32 {q_fout0_r, q_fout0_i}, [p_fout0]!
- vst2.32 {q_fout1_r, q_fout1_i}, [p_fout1]!
- vst2.32 {q_fout3_r, q_fout3_i}, [p_fout3]!
- .endm
- .macro RADIX4_BUTTERFLY_INVERSE_P4
- vld2.32 {q_fin1_r, q_fin1_i}, [p_fout1]
- vld2.32 {q_tw1_r, q_tw1_i}, [p_tw1]!
- vld2.32 {q_fin2_r, q_fin2_i}, [p_fout2]
- vld2.32 {q_tw2_r, q_tw2_i}, [p_tw2]!
- vmul.f32 q_s0_r, q_fin1_r, q_tw1_r
- vmul.f32 q_s0_i, q_fin1_i, q_tw1_r
- vmla.f32 q_s0_r, q_fin1_i, q_tw1_i
- vmls.f32 q_s0_i, q_fin1_r, q_tw1_i
-
- vld2.32 {q_fin3_r, q_fin3_i}, [p_fout3]
- vld2.32 {q_tw3_r, q_tw3_i}, [p_tw3]!
- vmul.f32 q_s1_r, q_fin2_r, q_tw2_r
- vmul.f32 q_s1_i, q_fin2_i, q_tw2_r
- vmla.f32 q_s1_r, q_fin2_i, q_tw2_i
- vmls.f32 q_s1_i, q_fin2_r, q_tw2_i
-
- vld2.32 {q_fin0_r, q_fin0_i}, [p_fout0]
- vmul.f32 q_s2_r, q_fin3_r, q_tw3_r
- vmul.f32 q_s2_i, q_fin3_i, q_tw3_r
- vmla.f32 q_s2_r, q_fin3_i, q_tw3_i
- vmls.f32 q_s2_i, q_fin3_r, q_tw3_i
-
- vsub.f32 q_s5_r, q_fin0_r, q_s1_r
- vsub.f32 q_s5_i, q_fin0_i, q_s1_i
- vadd.f32 q_fout0_r, q_fin0_r, q_s1_r
- vadd.f32 q_fout0_i, q_fin0_i, q_s1_i
-
- vadd.f32 q_s3_r, q_s0_r, q_s2_r
- vadd.f32 q_s3_i, q_s0_i, q_s2_i
- vsub.f32 q_s4_r, q_s0_r, q_s2_r
- vsub.f32 q_s4_i, q_s0_i, q_s2_i
-
- vsub.f32 q_fout2_r, q_fout0_r, q_s3_r
- vsub.f32 q_fout2_i, q_fout0_i, q_s3_i
- vadd.f32 q_fout0_r, q_fout0_r, q_s3_r
- vadd.f32 q_fout0_i, q_fout0_i, q_s3_i
- vst2.32 {q_fout2_r, q_fout2_i}, [p_fout2]!
-
- vsub.f32 q_fout1_r, q_s5_r, q_s4_i
- vadd.f32 q_fout1_i, q_s5_i, q_s4_r
- vadd.f32 q_fout3_r, q_s5_r, q_s4_i
- vsub.f32 q_fout3_i, q_s5_i, q_s4_r
- vst2.32 {q_fout0_r, q_fout0_i}, [p_fout0]!
- vst2.32 {q_fout1_r, q_fout1_i}, [p_fout1]!
- vst2.32 {q_fout3_r, q_fout3_i}, [p_fout3]!
+ /*NEON variale Declaration for mstride loop */
+ d_fin0_r .dn d0
+ d_fin0_i .dn d1
+ d_fin1_r .dn d2
+ d_fin1_i .dn d3
+ d_fin2_r .dn d4
+ d_fin2_i .dn d5
+ d_fin3_r .dn d6
+ d_fin3_i .dn d7
+ d_tw0_r .dn d8
+ d_tw0_i .dn d9
+ d_tw1_r .dn d10
+ d_tw1_i .dn d11
+ d_tw2_r .dn d12
+ d_tw2_i .dn d13
+ q_fin0 .qn q0
+ q_scr0 .qn q15
+ q_scr1 .qn q7
+ q_scr2 .qn q8
+ q_scr3 .qn q9
+ q_scr4 .qn q10
+ q_scr5 .qn q11
+ q_scr6 .qn q12
+ q_scr7 .qn q13
+ d_scr1_r .dn d14
+ d_scr1_i .dn d15
+ d_scr2_r .dn d16
+ d_scr2_i .dn d17
+ d_scr3_r .dn d18
+ d_scr3_i .dn d19
+ d_scr5_r .dn d22
+ d_scr5_i .dn d23
+ d_scr7_r .dn d26
+ d_scr7_i .dn d27
+ q_fout0 .qn q7
+ q_fout2 .qn q8
+ d_fout0_r .dn d14
+ d_fout0_i .dn d15
+ d_fout1_r .dn d28
+ d_fout1_i .dn d29
+ d_fout2_r .dn d16
+ d_fout2_i .dn d17
+ d_fout3_r .dn d30
+ d_fout3_i .dn d31
+
+ .macro BUTTERFLY4X2_WITHOUT_TWIDDLES inverse
+
+ /* radix 4 butterfly without twiddles */
+ vadd.f32 q_s0_2, q_in0_01, q_in2_01
+ vsub.f32 q_s1_2, q_in0_01, q_in2_01
+ vld2.32 {q_in0_01}, [p_fin0:64]!
+ vld2.32 {q_in2_01}, [p_fin2:64]!
+ vadd.f32 q_s2_2, q_in1_01, q_in3_01
+ vsub.f32 q_s3_2, q_in1_01, q_in3_01
+ vld2.32 {q_in1_01}, [p_fin1:64]!
+ vld2.32 {q_in3_01}, [p_fin3:64]!
+
+ vsub.f32 q_out2_2, q_s0_2, q_s2_2
+ vadd.f32 q_out0_2, q_s0_2, q_s2_2
+
+ .ifeqs "\inverse", "TRUE"
+ vsub.f32 d_out1_r15, d_s1_r2, d_s3_i2
+ vadd.f32 d_out1_i15, d_s1_i2, d_s3_r2
+ vadd.f32 d_out3_r37, d_s1_r2, d_s3_i2
+ vsub.f32 d_out3_i37, d_s1_i2, d_s3_r2
+ .else
+ vadd.f32 d_out1_r15, d_s1_r2, d_s3_i2
+ vsub.f32 d_out1_i15, d_s1_i2, d_s3_r2
+ vsub.f32 d_out3_r37, d_s1_r2, d_s3_i2
+ vadd.f32 d_out3_i37, d_s1_i2, d_s3_r2
+ .endif
+
+ vtrn.32 q_out0_2, q_out1_2
+ vtrn.32 q_out2_2, q_out3_2
+ vst2.32 {q_out0_2}, [p_tmp]!
+ vst2.32 {q_out2_2}, [p_tmp]!
+ vst2.32 {q_out1_2}, [p_tmp]!
+ vst2.32 {q_out3_2}, [p_tmp]!
.endm
- .macro RADIX24_BUTTERFLY_P4
- vld2.32 {d_tw3_r01, d_tw3_i01}, [p_tw1]
- vld2.32 {d_fin0_r01, d_fin1_r01, d_fin0_i01, d_fin1_i01}, [p_fout0]!
- vld2.32 {d_fin2_r01, d_fin3_r01, d_fin2_i01, d_fin3_i01}, [p_fout0], tmp0
- vld2.32 {d_fin0_r23, d_fin1_r23, d_fin0_i23, d_fin1_i23}, [p_fout1]!
- vld2.32 {d_fin2_r23, d_fin3_r23, d_fin2_i23, d_fin3_i23}, [p_fout1], tmp0
-
- vmul.f32 d_s2_r01, d_fin3_r01, d_tw3_r01
- vmul.f32 d_s2_i01, d_fin3_r01, d_tw3_i01
- vmul.f32 d_s2_r23, d_fin3_r23, d_tw3_r01
- vmul.f32 d_s2_i23, d_fin3_r23, d_tw3_i01
- vmls.f32 d_s2_r01, d_fin3_i01, d_tw3_i01
- vmla.f32 d_s2_i01, d_fin3_i01, d_tw3_r01
- vmls.f32 d_s2_r23, d_fin3_i23, d_tw3_i01
- vmla.f32 d_s2_i23, d_fin3_i23, d_tw3_r01
-
- vmul.f32 d_s1_r01, d_fin2_r01, d_tw2_r01
- vmul.f32 d_s1_r23, d_fin2_r23, d_tw2_r01
- vmul.f32 d_s1_i01, d_fin2_r01, d_tw2_i01
- vmul.f32 d_s1_i23, d_fin2_r23, d_tw2_i01
- vmls.f32 d_s1_r01, d_fin2_i01, d_tw2_i01
- vmls.f32 d_s1_r23, d_fin2_i23, d_tw2_i01
- vmla.f32 d_s1_i01, d_fin2_i01, d_tw2_r01
- vmla.f32 d_s1_i23, d_fin2_i23, d_tw2_r01
-
- vmul.f32 d_s0_r01, d_fin1_r01, d_tw1_r01
- vmul.f32 d_s0_r23, d_fin1_r23, d_tw1_r01
- vmul.f32 d_s0_i01, d_fin1_r01, d_tw1_i01
- vmul.f32 d_s0_i23, d_fin1_r23, d_tw1_i01
- vmls.f32 d_s0_r01, d_fin1_i01, d_tw1_i01
- vmls.f32 d_s0_r23, d_fin1_i23, d_tw1_i01
- vmla.f32 d_s0_i01, d_fin1_i01, d_tw1_r01
- vmla.f32 d_s0_i23, d_fin1_i23, d_tw1_r01
-
- vmov d_fin1_r01, d_fin0_r23
- vmov d_fin1_i01, d_fin0_i23
-
- vsub.f32 q_s5_r0123, q_fin0_r0123, q_s1_r0123
- vsub.f32 q_s5_i0123, q_fin0_i0123, q_s1_i0123
- vadd.f32 q_fout0_r0123, q_fin0_r0123, q_s1_r0123
- vadd.f32 q_fout0_i0123, q_fin0_i0123, q_s1_i0123
-
- vadd.f32 q_s3_r0123, q_s0_r0123, q_s2_r0123
- vadd.f32 q_s3_i0123, q_s0_i0123, q_s2_i0123
- vsub.f32 q_s4_r0123, q_s0_r0123, q_s2_r0123
- vsub.f32 q_s4_i0123, q_s0_i0123, q_s2_i0123
- vsub.f32 q_fout2_r0123, q_fout0_r0123, q_s3_r0123
- vsub.f32 q_fout2_i0123, q_fout0_i0123, q_s3_i0123
- vadd.f32 q_fout0_r0123, q_fout0_r0123, q_s3_r0123
- vadd.f32 q_fout0_i0123, q_fout0_i0123, q_s3_i0123
-
- vadd.f32 q_fout1_r0123, q_s5_r0123, q_s4_i0123
- vsub.f32 q_fout1_i0123, q_s5_i0123, q_s4_r0123
- vsub.f32 q_fout3_r0123, q_s5_r0123, q_s4_i0123
- vadd.f32 q_fout3_i0123, q_s5_i0123, q_s4_r0123
-
- vswp d_fout1_r01, d_fout0_r23
- vswp d_fout1_i01, d_fout0_i23
- vswp d_fout3_r01, d_fout2_r23
- vswp d_fout3_i01, d_fout2_i23
-
- vst2.32 {d_fout0_r01, d_fout1_r01, d_fout0_i01, d_fout1_i01}, [p_fout2]!
- vst2.32 {d_fout0_r23, d_fout1_r23, d_fout0_i23, d_fout1_i23}, [p_fout3]!
- vst2.32 {d_fout2_r01, d_fout3_r01, d_fout2_i01, d_fout3_i01}, [p_fout2], tmp0
- vst2.32 {d_fout2_r23, d_fout3_r23, d_fout2_i23, d_fout3_i23}, [p_fout3], tmp0
- .endm
+ .macro BUTTERFLY4X2_WITH_TWIDDLES inverse
+
+ sub p_in1, p_in1, nstep, lsl #2
+ add p_in1, p_in1, #16
+ sub p_tw1, p_tw1, mstep, lsl #1
+ add p_tw1, p_tw1, #16
+ vmov q_scr0, q_fin0
+ vmul.f32 d_scr1_r, d_fin1_r, d_tw0_r
+ vmul.f32 d_scr1_i, d_fin1_i, d_tw0_r
+ vmul.f32 d_scr2_r, d_fin2_r, d_tw1_r
+ vmul.f32 d_scr2_i, d_fin2_i, d_tw1_r
+ vmul.f32 d_scr3_r, d_fin3_r, d_tw2_r
+ vmul.f32 d_scr3_i, d_fin3_i, d_tw2_r
+ vld2.32 {d_fin0_r, d_fin0_i}, [p_in1:64], nstep
+
+ .ifeqs "\inverse", "TRUE"
+ vmla.f32 d_scr1_r, d_fin1_i, d_tw0_i
+ vmls.f32 d_scr1_i, d_fin1_r, d_tw0_i
+ vmla.f32 d_scr2_r, d_fin2_i, d_tw1_i
+ vmls.f32 d_scr2_i, d_fin2_r, d_tw1_i
+ vmla.f32 d_scr3_r, d_fin3_i, d_tw2_i
+ vmls.f32 d_scr3_i, d_fin3_r, d_tw2_i
+ .else
+ vmls.f32 d_scr1_r, d_fin1_i, d_tw0_i
+ vmla.f32 d_scr1_i, d_fin1_r, d_tw0_i
+ vmls.f32 d_scr2_r, d_fin2_i, d_tw1_i
+ vmla.f32 d_scr2_i, d_fin2_r, d_tw1_i
+ vmls.f32 d_scr3_r, d_fin3_i, d_tw2_i
+ vmla.f32 d_scr3_i, d_fin3_r, d_tw2_i
+ .endif
+
+ vld2.32 {d_fin1_r, d_fin1_i}, [p_in1:64], nstep
+ vld2.32 {d_tw0_r, d_tw0_i}, [p_tw1:64], mstep
+ vld2.32 {d_fin2_r, d_fin2_i}, [p_in1:64], nstep
+ vld2.32 {d_fin3_r, d_fin3_i}, [p_in1:64], nstep
+ vld2.32 {d_tw1_r, d_tw1_i}, [p_tw1:64], mstep
+ vld2.32 {d_tw2_r, d_tw2_i}, [p_tw1:64]
+
+ vadd.f32 q_scr4, q_scr0, q_scr2
+ vsub.f32 q_scr5, q_scr0, q_scr2
+ vadd.f32 q_scr6, q_scr1, q_scr3
+ vsub.f32 q_scr7, q_scr1, q_scr3
+
+ vadd.f32 q_fout0, q_scr4, q_scr6
+ vsub.f32 q_fout2, q_scr4, q_scr6
+
+ .ifeqs "\inverse", "TRUE"
+ vsub.f32 d_fout1_r, d_scr5_r, d_scr7_i
+ vadd.f32 d_fout1_i, d_scr5_i, d_scr7_r
+ vadd.f32 d_fout3_r, d_scr5_r, d_scr7_i
+ vsub.f32 d_fout3_i, d_scr5_i, d_scr7_r
+ .else
+ vadd.f32 d_fout1_r, d_scr5_r, d_scr7_i
+ vsub.f32 d_fout1_i, d_scr5_i, d_scr7_r
+ vsub.f32 d_fout3_r, d_scr5_r, d_scr7_i
+ vadd.f32 d_fout3_i, d_scr5_i, d_scr7_r
+ .endif
+
+ vst2.32 {d_fout0_r, d_fout0_i}, [p_out1], mstep
+ vst2.32 {d_fout1_r, d_fout1_i}, [p_out1], mstep
+ vst2.32 {d_fout2_r, d_fout2_i}, [p_out1], mstep
+ vst2.32 {d_fout3_r, d_fout3_i}, [p_out1], mstep
+ sub p_out1, p_out1, mstep, lsl #2
+ add p_out1, p_out1, #16
- .macro RADIX24_BUTTERFLY_INVERSE_P4
- vld2.32 {d_tw3_r01, d_tw3_i01}, [p_tw1]
- vld2.32 {d_fin0_r01, d_fin1_r01, d_fin0_i01, d_fin1_i01}, [p_fout0]!
- vld2.32 {d_fin2_r01, d_fin3_r01, d_fin2_i01, d_fin3_i01}, [p_fout0], tmp0
- vld2.32 {d_fin0_r23, d_fin1_r23, d_fin0_i23, d_fin1_i23}, [p_fout1]!
- vld2.32 {d_fin2_r23, d_fin3_r23, d_fin2_i23, d_fin3_i23}, [p_fout1], tmp0
-
- vmul.f32 d_s2_r01, d_fin3_r01, d_tw3_r01
- vmul.f32 d_s2_i01, d_fin3_i01, d_tw3_r01
- vmul.f32 d_s2_r23, d_fin3_r23, d_tw3_r01
- vmul.f32 d_s2_i23, d_fin3_i23, d_tw3_r01
- vmla.f32 d_s2_r01, d_fin3_i01, d_tw3_i01
- vmls.f32 d_s2_i01, d_fin3_r01, d_tw3_i01
- vmla.f32 d_s2_r23, d_fin3_i23, d_tw3_i01
- vmls.f32 d_s2_i23, d_fin3_r23, d_tw3_i01
-
- vmul.f32 d_s1_r01, d_fin2_r01, d_tw2_r01
- vmul.f32 d_s1_r23, d_fin2_r23, d_tw2_r01
- vmul.f32 d_s1_i01, d_fin2_i01, d_tw2_r01
- vmul.f32 d_s1_i23, d_fin2_i23, d_tw2_r01
- vmla.f32 d_s1_r01, d_fin2_i01, d_tw2_i01
- vmla.f32 d_s1_r23, d_fin2_i23, d_tw2_i01
- vmls.f32 d_s1_i01, d_fin2_r01, d_tw2_i01
- vmls.f32 d_s1_i23, d_fin2_r23, d_tw2_i01
-
- vmul.f32 d_s0_r01, d_fin1_r01, d_tw1_r01
- vmul.f32 d_s0_r23, d_fin1_r23, d_tw1_r01
- vmul.f32 d_s0_i01, d_fin1_i01, d_tw1_r01
- vmul.f32 d_s0_i23, d_fin1_i23, d_tw1_r01
- vmla.f32 d_s0_r01, d_fin1_i01, d_tw1_i01
- vmla.f32 d_s0_r23, d_fin1_i23, d_tw1_i01
- vmls.f32 d_s0_i01, d_fin1_r01, d_tw1_i01
- vmls.f32 d_s0_i23, d_fin1_r23, d_tw1_i01
-
- vmov d_fin1_r01, d_fin0_r23
- vmov d_fin1_i01, d_fin0_i23
-
- vsub.f32 q_s5_r0123, q_fin0_r0123, q_s1_r0123
- vsub.f32 q_s5_i0123, q_fin0_i0123, q_s1_i0123
- vadd.f32 q_fout0_r0123, q_fin0_r0123, q_s1_r0123
- vadd.f32 q_fout0_i0123, q_fin0_i0123, q_s1_i0123
-
- vadd.f32 q_s3_r0123, q_s0_r0123, q_s2_r0123
- vadd.f32 q_s3_i0123, q_s0_i0123, q_s2_i0123
- vsub.f32 q_s4_r0123, q_s0_r0123, q_s2_r0123
- vsub.f32 q_s4_i0123, q_s0_i0123, q_s2_i0123
- vsub.f32 q_fout2_r0123, q_fout0_r0123, q_s3_r0123
- vsub.f32 q_fout2_i0123, q_fout0_i0123, q_s3_i0123
- vadd.f32 q_fout0_r0123, q_fout0_r0123, q_s3_r0123
- vadd.f32 q_fout0_i0123, q_fout0_i0123, q_s3_i0123
-
- vsub.f32 q_fout1_r0123, q_s5_r0123, q_s4_i0123
- vadd.f32 q_fout1_i0123, q_s5_i0123, q_s4_r0123
- vadd.f32 q_fout3_r0123, q_s5_r0123, q_s4_i0123
- vsub.f32 q_fout3_i0123, q_s5_i0123, q_s4_r0123
-
- vswp d_fout1_r01, d_fout0_r23
- vswp d_fout1_i01, d_fout0_i23
- vswp d_fout3_r01, d_fout2_r23
- vswp d_fout3_i01, d_fout2_i23
-
- vst2.32 {d_fout0_r01, d_fout1_r01, d_fout0_i01, d_fout1_i01}, [p_fout2]!
- vst2.32 {d_fout0_r23, d_fout1_r23, d_fout0_i23, d_fout1_i23}, [p_fout3]!
- vst2.32 {d_fout2_r01, d_fout3_r01, d_fout2_i01, d_fout3_i01}, [p_fout2], tmp0
- vst2.32 {d_fout2_r23, d_fout3_r23, d_fout2_i23, d_fout3_i23}, [p_fout3], tmp0
.endm
- .align 4
- .global ne10_radix4_butterfly_forward_float32_neon
- .thumb
- .thumb_func
-
-ne10_radix4_butterfly_forward_float32_neon:
-
- push {r4-r12,lr}
-
- ldr stage_count, [p_factors] /* get factors[0]---stage_count */
- ldr fstride, [p_factors, #4] /* get factors[1]---fstride */
- add p_factors, p_factors, stage_count, lsl #3 /* get the address of factors[2*stage_count] */
- ldr mstride, [p_factors, #-4] /* get factors[2*stage_count-1]--- mstride */
- sub stage_count, stage_count, #1
-
- /* loop of the stages */
-.L_ne10_radix4_butterfly_forward_stages:
- lsr fstride, fstride, #2
-
- /* loop of fstride */
- mov count_f, fstride
-.L_ne10_radix4_butterfly_forward_stages_fstride:
- sub tmp0, fstride, count_f
- mul tmp0, tmp0, mstride
- add p_fout0, p_fout, tmp0, lsl #5
- add p_fout2, p_fout0, mstride, lsl #4 /* get the address of F[mstride*2] */
- add p_fout1, p_fout0, mstride, lsl #3 /* get the address of F[mstride] */
- add p_fout3, p_fout2, mstride, lsl #3 /* get the address of F[mstride*3] */
- mov p_tw1, p_twiddles
- add p_tw2, p_tw1, mstride, lsl #3 /* get the address of tw2 */
- add p_tw3, p_tw1, mstride, lsl #4 /* get the address of tw3 */
-
- /* loop of mstride */
- mov count_m, mstride
-
-.L_ne10_radix4_butterfly_forward_stages_mstride:
-
- RADIX4_BUTTERFLY_P4
-
- subs count_m, count_m, #4
- bgt .L_ne10_radix4_butterfly_forward_stages_mstride
-
- /* end of mstride_loop */
+ .macro BUTTERFLY8X2_WITHOUT_TWIDDLES inverse
+ /**
+ * q_in0: Fin1[0]
+ * q_in1: Fin1[0 + fstride]
+ * q_in2: Fin1[fstride1]
+ * q_in3: Fin1[fstride1 + fstride]
+ * q_in4: Fin1[fstride1*2]
+ * q_in5: Fin1[fstride1*2 + fstride]
+ * q_in6: Fin1[fstride1*3]
+ * q_in7: Fin1[fstride1*3 + fstride]
+ *
+ */
- subs count_f, count_f, #1
- bgt .L_ne10_radix4_butterfly_forward_stages_fstride
+ ldr tmp0, =TW_81
+ vld2.32 {d_in0_r, d_in0_i}, [p_in1:64], fstep
+ vld2.32 {d_in2_r, d_in2_i}, [p_in1:64], fstep
+ vld2.32 {d_in4_r, d_in4_i}, [p_in1:64], fstep
+ vld2.32 {d_in6_r, d_in6_i}, [p_in1:64], fstep
+ vld2.32 {d_in1_r, d_in1_i}, [p_in1:64], fstep
+ vld2.32 {d_in3_r, d_in3_i}, [p_in1:64], fstep
+ vld2.32 {d_in5_r, d_in5_i}, [p_in1:64], fstep
+ vld2.32 {d_in7_r, d_in7_i}, [p_in1:64], fstep
+
+ // radix 4 butterfly without twiddles
+ vadd.f32 q_sin0, q_in0, q_in1
+ vsub.f32 q_sin1, q_in0, q_in1
+ vld1.32 {d_tw_twn}, [tmp0]
+ vadd.f32 q_sin2, q_in2, q_in3
+ vsub.f32 q_sin3, q_in2, q_in3
+ vadd.f32 q_sin4, q_in4, q_in5
+ vsub.f32 q_sin5, q_in4, q_in5
+ vadd.f32 q_sin6, q_in6, q_in7
+ vsub.f32 q_sin7, q_in6, q_in7
+
+ .ifeqs "\inverse", "TRUE"
+ vneg.f32 d_sin5_i, d_sin5_i
+ vsub.f32 d_s3_r, d_sin3_r, d_sin3_i
+ vadd.f32 d_s3_i, d_sin3_i, d_sin3_r
+ vadd.f32 d_s7_r, d_sin7_r, d_sin7_i
+ vsub.f32 d_s7_i, d_sin7_i, d_sin7_r
+ .else
+ vneg.f32 d_sin5_r, d_sin5_r
+ vadd.f32 d_s3_r, d_sin3_r, d_sin3_i
+ vsub.f32 d_s3_i, d_sin3_i, d_sin3_r
+ vsub.f32 d_s7_r, d_sin7_r, d_sin7_i
+ vadd.f32 d_s7_i, d_sin7_i, d_sin7_r
+ .endif
+ vswp d_sin5_r, d_sin5_i
+
+ vmul.f32 q_s3, q_s3, d_tw_twn[0]
+ vmul.f32 q_s7, q_s7, d_tw_twn[1]
+
+ // radix 2 butterfly
+ vadd.f32 q_s8, q_sin0, q_sin4
+ vadd.f32 q_s9, q_sin1, q_sin5
+ vsub.f32 q_s10, q_sin0, q_sin4
+ vsub.f32 q_s11, q_sin1, q_sin5
+
+ // radix 2 butterfly
+ vadd.f32 q_s12, q_sin2, q_sin6
+ vadd.f32 q_s13, q_s3, q_s7
+ vsub.f32 q_s14, q_sin2, q_sin6
+ vsub.f32 q_s15, q_s3, q_s7
+
+ vsub.f32 q_out4, q_s8, q_s12
+ vsub.f32 q_out5, q_s9, q_s13
+ vadd.f32 q_out0, q_s8, q_s12
+ vadd.f32 q_out1, q_s9, q_s13
+
+ .ifeqs "\inverse", "TRUE"
+ vsub.f32 d_out2_r, d_s10_r, d_s14_i
+ vadd.f32 d_out2_i, d_s10_i, d_s14_r
+ vsub.f32 d_out3_r, d_s11_r, d_s15_i
+ vadd.f32 d_out3_i, d_s11_i, d_s15_r
+ vadd.f32 d_out6_r, d_s10_r, d_s14_i
+ vsub.f32 d_out6_i, d_s10_i, d_s14_r
+ vadd.f32 d_out7_r, d_s11_r, d_s15_i
+ vsub.f32 d_out7_i, d_s11_i, d_s15_r
+ .else
+ vadd.f32 d_out2_r, d_s10_r, d_s14_i
+ vsub.f32 d_out2_i, d_s10_i, d_s14_r
+ vadd.f32 d_out3_r, d_s11_r, d_s15_i
+ vsub.f32 d_out3_i, d_s11_i, d_s15_r
+ vsub.f32 d_out6_r, d_s10_r, d_s14_i
+ vadd.f32 d_out6_i, d_s10_i, d_s14_r
+ vsub.f32 d_out7_r, d_s11_r, d_s15_i
+ vadd.f32 d_out7_i, d_s11_i, d_s15_r
+ .endif
+
+ vtrn.32 q_out0, q_out1
+ vtrn.32 q_out2, q_out3
+ vtrn.32 q_out4, q_out5
+ vtrn.32 q_out6, q_out7
+
+
+ vst2.32 {q_out0}, [p_out1]!
+ vst2.32 {q_out2}, [p_out1]!
+ vst2.32 {q_out4}, [p_out1]!
+ vst2.32 {q_out6}, [p_out1]!
+ vst2.32 {q_out1}, [p_out1]!
+ vst2.32 {q_out3}, [p_out1]!
+ vst2.32 {q_out5}, [p_out1]!
+ vst2.32 {q_out7}, [p_out1]!
+
+ sub p_in1, p_in1, fstep, lsl #3
+ add p_in1, p_in1, #16
- add p_twiddles, p_twiddles, mstride, lsl #4
- add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */
- lsl mstride, mstride, #2
+ .endm
- subs stage_count, stage_count, #1
- bgt .L_ne10_radix4_butterfly_forward_stages
+ .global TW_81
+TW_81:
+.float 0.70710678
+.float -0.70710678
-.L_ne10_radix4_butterfly_forward_end:
- /*Return From Function*/
- pop {r4-r12,pc}
+ /**
+ * @details
+ * This function implements the radix4/8 forward FFT
+ *
+ * @param[in/out] *Fout points to input/output pointers
+ * @param[in] *factors factors pointer:
+ * 0: stage number
+ * 1: stride for the first stage
+ * others: factor out powers of 4, powers of 2
+ * @param[in] *twiddles twiddles coeffs of FFT
+ */
.align 4
- .global ne10_radix2_butterfly_forward_float32_neon
+ .global ne10_mixed_radix_fft_forward_float32_neon
.thumb
.thumb_func
-ne10_radix2_butterfly_forward_float32_neon:
-
+ne10_mixed_radix_fft_forward_float32_neon:
push {r4-r12,lr}
vpush {q4-q7}
ldr stage_count, [p_factors] /* get factors[0]---stage_count */
ldr fstride, [p_factors, #4] /* get factors[1]---fstride */
add p_factors, p_factors, stage_count, lsl #3 /* get the address of factors[2*stage_count] */
+ ldr radix, [p_factors] /* get factors[2*stage_count]--- the first radix */
ldr mstride, [p_factors, #-4] /* get factors[2*stage_count-1]--- mstride */
- sub stage_count, stage_count, #2
+ /* save the output buffer for the last stage */
+ mov p_out_ls, p_fout
- /* loop of the second stages */
-.L_ne10_radix2_butterfly_forwards_second_stage:
- lsr fstride, fstride, #2
+ /* ---------------the first stage--------------- */
+ /* judge the radix is 2 or 4 */
+ cmp radix, #2
+ beq .L_ne10_radix8_butterfly_first_stage
- /* loop of fstride */
- mov count_f, fstride
- mov p_tw1, p_twiddles
- mov p_fout0, p_fout
- add p_fout1, p_fout, mstride, lsl #5
- mov p_fout2, p_fout
- mov p_fout3, p_fout1
- mov tmp0, #96
- vld2.32 {d_tw1_r01, d_tw2_r01, d_tw1_i01, d_tw2_i01}, [p_tw1]!
-
-.L_ne10_radix2_butterfly_forwards_second_stage_fstride:
- @RADIX24_BUTTERFLY_P4
- vld2.32 {d_tw3_r01, d_tw3_i01}, [p_tw1]
- vld2.32 {d_fin0_r01, d_fin1_r01, d_fin0_i01, d_fin1_i01}, [p_fout0]!
- vld2.32 {d_fin2_r01, d_fin3_r01, d_fin2_i01, d_fin3_i01}, [p_fout0], tmp0
- vld2.32 {d_fin0_r23, d_fin1_r23, d_fin0_i23, d_fin1_i23}, [p_fout1]!
- vld2.32 {d_fin2_r23, d_fin3_r23, d_fin2_i23, d_fin3_i23}, [p_fout1], tmp0
-
- vmul.f32 d_s2_r01, d_fin3_r01, d_tw3_r01
- vmul.f32 d_s2_i01, d_fin3_r01, d_tw3_i01
- vmul.f32 d_s2_r23, d_fin3_r23, d_tw3_r01
- vmul.f32 d_s2_i23, d_fin3_r23, d_tw3_i01
- vmls.f32 d_s2_r01, d_fin3_i01, d_tw3_i01
- vmla.f32 d_s2_i01, d_fin3_i01, d_tw3_r01
- vmls.f32 d_s2_r23, d_fin3_i23, d_tw3_i01
- vmla.f32 d_s2_i23, d_fin3_i23, d_tw3_r01
-
- vmul.f32 d_s1_r01, d_fin2_r01, d_tw2_r01
- vmul.f32 d_s1_r23, d_fin2_r23, d_tw2_r01
- vmul.f32 d_s1_i01, d_fin2_r01, d_tw2_i01
- vmul.f32 d_s1_i23, d_fin2_r23, d_tw2_i01
- vmls.f32 d_s1_r01, d_fin2_i01, d_tw2_i01
- vmls.f32 d_s1_r23, d_fin2_i23, d_tw2_i01
- vmla.f32 d_s1_i01, d_fin2_i01, d_tw2_r01
- vmla.f32 d_s1_i23, d_fin2_i23, d_tw2_r01
-
- vmul.f32 d_s0_r01, d_fin1_r01, d_tw1_r01
- vmul.f32 d_s0_r23, d_fin1_r23, d_tw1_r01
- vmul.f32 d_s0_i01, d_fin1_r01, d_tw1_i01
- vmul.f32 d_s0_i23, d_fin1_r23, d_tw1_i01
- vmls.f32 d_s0_r01, d_fin1_i01, d_tw1_i01
- vmls.f32 d_s0_r23, d_fin1_i23, d_tw1_i01
- vmla.f32 d_s0_i01, d_fin1_i01, d_tw1_r01
- vmla.f32 d_s0_i23, d_fin1_i23, d_tw1_r01
-
- vmov d_fin1_r01, d_fin0_r23
- vmov d_fin1_i01, d_fin0_i23
-
- vsub.f32 q_s5_r0123, q_fin0_r0123, q_s1_r0123
- vsub.f32 q_s5_i0123, q_fin0_i0123, q_s1_i0123
- vadd.f32 q_fout0_r0123, q_fin0_r0123, q_s1_r0123
- vadd.f32 q_fout0_i0123, q_fin0_i0123, q_s1_i0123
-
- vadd.f32 q_s3_r0123, q_s0_r0123, q_s2_r0123
- vadd.f32 q_s3_i0123, q_s0_i0123, q_s2_i0123
- vsub.f32 q_s4_r0123, q_s0_r0123, q_s2_r0123
- vsub.f32 q_s4_i0123, q_s0_i0123, q_s2_i0123
- vsub.f32 q_fout2_r0123, q_fout0_r0123, q_s3_r0123
- vsub.f32 q_fout2_i0123, q_fout0_i0123, q_s3_i0123
- vadd.f32 q_fout0_r0123, q_fout0_r0123, q_s3_r0123
- vadd.f32 q_fout0_i0123, q_fout0_i0123, q_s3_i0123
-
- vadd.f32 q_fout1_r0123, q_s5_r0123, q_s4_i0123
- vsub.f32 q_fout1_i0123, q_s5_i0123, q_s4_r0123
- vsub.f32 q_fout3_r0123, q_s5_r0123, q_s4_i0123
- vadd.f32 q_fout3_i0123, q_s5_i0123, q_s4_r0123
-
- vswp d_fout1_r01, d_fout0_r23
- vswp d_fout1_i01, d_fout0_i23
- vswp d_fout3_r01, d_fout2_r23
- vswp d_fout3_i01, d_fout2_i23
-
- vst2.32 {d_fout0_r01, d_fout1_r01, d_fout0_i01, d_fout1_i01}, [p_fout2]!
- vst2.32 {d_fout0_r23, d_fout1_r23, d_fout0_i23, d_fout1_i23}, [p_fout3]!
- vst2.32 {d_fout2_r01, d_fout3_r01, d_fout2_i01, d_fout3_i01}, [p_fout2], tmp0
- vst2.32 {d_fout2_r23, d_fout3_r23, d_fout2_i23, d_fout3_i23}, [p_fout3], tmp0
- subs count_f, count_f, #2
- bgt .L_ne10_radix2_butterfly_forwards_second_stage_fstride
+ /* ---------------first stage: radix 4 */
+ mov count, fstride
+ mov p_fin0, p_fin
+ mov p_tmp, p_fout
+ add p_fin2, p_fin0, fstride, lsl #4 /* get the address of F[fstride*2] */
+ add p_fin1, p_fin0, fstride, lsl #3 /* get the address of F[fstride] */
+ add p_fin3, p_fin2, fstride, lsl #3 /* get the address of F[fstride*3] */
+ vld2.32 {q_in0_01}, [p_fin0:64]!
+ vld2.32 {q_in2_01}, [p_fin2:64]!
+ vld2.32 {q_in1_01}, [p_fin1:64]!
+ vld2.32 {q_in3_01}, [p_fin3:64]!
+
+.L_ne10_radix4_butterfly_first_stage_fstride:
+ BUTTERFLY4X2_WITHOUT_TWIDDLES "FALSE"
- add p_twiddles, p_twiddles, mstride, lsl #4
- add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */
- lsl mstride, mstride, #2
+ subs count, count, #2
+ bgt .L_ne10_radix4_butterfly_first_stage_fstride
+ /* swap input/output buffer */
+ mov tmp0, p_fout
+ mov p_fout, p_fin
+ mov p_fin, tmp0
- /* loop of the other stages */
-.L_ne10_radix2_butterfly_forwards_stages:
+ /* (stage_count-2): reduce the counter for the last stage */
+ sub stage_count, stage_count, #2
+ lsl nstep, fstride, #3
lsr fstride, fstride, #2
- /* loop of fstride */
- mov count_f, fstride
-.L_ne10_radix2_butterfly_forwards_stages_fstride:
- sub tmp0, fstride, count_f
- mul tmp0, tmp0, mstride
- add p_fout0, p_fout, tmp0, lsl #5
- add p_fout2, p_fout0, mstride, lsl #4 /* get the address of F[mstride*2] */
- add p_fout1, p_fout0, mstride, lsl #3 /* get the address of F[mstride] */
- add p_fout3, p_fout2, mstride, lsl #3 /* get the address of F[mstride*3] */
- mov p_tw1, p_twiddles
- add p_tw2, p_tw1, mstride, lsl #3 /* get the address of tw2 */
- add p_tw3, p_tw1, mstride, lsl #4 /* get the address of tw3 */
+ b .L_ne10_butterfly_other_stages
+ /* ---------------end of first stage: radix 4 */
- /* loop of mstride */
- mov count_m, mstride
-.L_ne10_radix2_butterfly_forwards_stages_mstride:
- RADIX4_BUTTERFLY_P4
- subs count_m, count_m, #4
- bgt .L_ne10_radix2_butterfly_forwards_stages_mstride
+ /* ---------------first stage: radix 8 */
+.L_ne10_radix8_butterfly_first_stage:
+ lsr fstride1, fstride, #2
+ mov p_in1, p_fin
+ mov p_out1, p_fout
+ lsl fstep, fstride, #1
- /* end of mstride_loop */
+.L_ne10_radix8_butterfly_first_stage_fstride1:
+ BUTTERFLY8X2_WITHOUT_TWIDDLES "FALSE"
- subs count_f, count_f, #1
- bgt .L_ne10_radix2_butterfly_forwards_stages_fstride
+ subs fstride1, fstride1, #2
+ bgt .L_ne10_radix8_butterfly_first_stage_fstride1
- add p_twiddles, p_twiddles, mstride, lsl #4
- add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */
+ lsl nstep, fstride, #2
+ sub stage_count, stage_count, #2
+ lsr fstride, fstride, #4
lsl mstride, mstride, #2
+ add p_twiddles, p_twiddles, #48 /* get the address of twiddles += 6 */
- subs stage_count, stage_count, #1
- bgt .L_ne10_radix2_butterfly_forwards_stages
+ /* swap input/output buffer */
+ mov tmp0, p_fout
+ mov p_fout, p_fin
+ mov p_fin, tmp0
-.L_ne10_radix2_butterfly_forwards_end:
- /*Return From Function*/
- vpop {q4-q7}
- pop {r4-r12,pc}
+ /* if the last stage */
+ cmp stage_count, #1
+ beq .L_ne10_butterfly_last_stages
- .align 4
- .global ne10_radix2_butterfly_backward_float32_neon
- .thumb
- .thumb_func
+ /* (stage_count-1): reduce the counter for the last stage */
+ sub stage_count, stage_count, #1
+ /*--------------- end of first stage: radix 8 */
+ /* ---------------end of first stage--------------- */
-ne10_radix2_butterfly_backward_float32_neon:
- push {r4-r12,lr}
- vpush {q4-q7}
-
- ldr stage_count, [p_factors] /* get factors[0]---stage_count */
- ldr fstride, [p_factors, #4] /* get factors[1]---fstride */
- add p_factors, p_factors, stage_count, lsl #3 /* get the address of factors[2*stage_count] */
- ldr mstride, [p_factors, #-4] /* get factors[2*stage_count-1]--- mstride */
- sub stage_count, stage_count, #2
-
-
- /* loop of the second stages */
-.L_ne10_radix2_butterfly_backward_second_stage:
- lsr fstride, fstride, #2
+ /* ---------------other stages except last stage--------------- */
+ /* loop of other stages */
+.L_ne10_butterfly_other_stages:
+ lsl mstep, mstride, #3
+ mov p_in1, p_fin
+ vld2.32 {d_fin0_r, d_fin0_i}, [p_in1:64], nstep
+ vld2.32 {d_fin1_r, d_fin1_i}, [p_in1:64], nstep
+ vld2.32 {d_fin2_r, d_fin2_i}, [p_in1:64], nstep
+ vld2.32 {d_fin3_r, d_fin3_i}, [p_in1:64], nstep
/* loop of fstride */
mov count_f, fstride
+.L_ne10_butterfly_other_stages_fstride:
mov p_tw1, p_twiddles
- mov p_fout0, p_fout
- add p_fout1, p_fout, mstride, lsl #5
- mov p_fout2, p_fout
- mov p_fout3, p_fout1
- mov tmp0, #96
- vld2.32 {d_tw1_r01, d_tw2_r01, d_tw1_i01, d_tw2_i01}, [p_tw1]!
-
-.L_ne10_radix2_butterfly_backward_second_stage_fstride:
- RADIX24_BUTTERFLY_INVERSE_P4
-
- subs count_f, count_f, #2
- bgt .L_ne10_radix2_butterfly_backward_second_stage_fstride
-
- add p_twiddles, p_twiddles, mstride, lsl #4
- add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */
- lsl mstride, mstride, #2
-
-
- /* loop of the other stages */
-.L_ne10_radix2_butterfly_backward_stages:
- lsr fstride, fstride, #2
-
- /* loop of fstride */
- mov count_f, fstride
-.L_ne10_radix2_butterfly_backward_stages_fstride:
sub tmp0, fstride, count_f
mul tmp0, tmp0, mstride
- add p_fout0, p_fout, tmp0, lsl #5
- add p_fout2, p_fout0, mstride, lsl #4 /* get the address of F[mstride*2] */
- add p_fout1, p_fout0, mstride, lsl #3 /* get the address of F[mstride] */
- add p_fout3, p_fout2, mstride, lsl #3 /* get the address of F[mstride*3] */
- mov p_tw1, p_twiddles
- add p_tw2, p_tw1, mstride, lsl #3 /* get the address of tw2 */
- add p_tw3, p_tw1, mstride, lsl #4 /* get the address of tw3 */
+ add p_out1, p_fout, tmp0, lsl #5
+ vld2.32 {d_tw0_r, d_tw0_i}, [p_tw1:64], mstep
+ vld2.32 {d_tw1_r, d_tw1_i}, [p_tw1:64], mstep
+ vld2.32 {d_tw2_r, d_tw2_i}, [p_tw1:64]
/* loop of mstride */
mov count_m, mstride
-.L_ne10_radix2_butterfly_backward_stages_mstride:
- RADIX4_BUTTERFLY_INVERSE_P4
+.L_ne10_butterfly_other_stages_mstride:
+ BUTTERFLY4X2_WITH_TWIDDLES "FALSE"
- subs count_m, count_m, #4
- bgt .L_ne10_radix2_butterfly_backward_stages_mstride
-
- /* end of mstride_loop */
+ subs count_m, count_m, #2
+ bgt .L_ne10_butterfly_other_stages_mstride
+ /* end of mstride loop */
subs count_f, count_f, #1
- bgt .L_ne10_radix2_butterfly_backward_stages_fstride
+ bgt .L_ne10_butterfly_other_stages_fstride
add p_twiddles, p_twiddles, mstride, lsl #4
add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */
lsl mstride, mstride, #2
+ lsr fstride, fstride, #2
- subs stage_count, stage_count, #1
- bgt .L_ne10_radix2_butterfly_backward_stages
-
-
-.L_ne10_radix2_butterfly_backward_end:
- /*Return From Function*/
- vpop {q4-q7}
- pop {r4-r12,pc}
-
-
- .align 4
- .global ne10_radix4_butterfly_backward_float32_neon
- .thumb
- .thumb_func
-
-ne10_radix4_butterfly_backward_float32_neon:
-
- push {r4-r12,lr}
- vpush {q4-q7}
+ /* swap input/output buffer */
+ mov tmp0, p_fout
+ mov p_fout, p_fin
+ mov p_fin, tmp0
- ldr stage_count, [p_factors] /* get factors[0]---stage_count */
- ldr fstride, [p_factors, #4] /* get factors[1]---fstride */
- add p_factors, p_factors, stage_count, lsl #3 /* get the address of factors[2*stage_count] */
- ldr mstride, [p_factors, #-4] /* get factors[2*stage_count-1]--- mstride */
- sub stage_count, stage_count, #1
+ subs stage_count, stage_count, #1
+ bgt .L_ne10_butterfly_other_stages
+ /* ---------------end other stages except last stage--------------- */
- /* loop of the stages */
-.L_ne10_radix4_butterfly_backward_stages:
- lsr fstride, fstride, #2
- /* loop of fstride */
- mov count_f, fstride
-.L_ne10_radix4_butterfly_backward_stages_fstride:
- sub tmp0, fstride, count_f
- mul tmp0, tmp0, mstride
- add p_fout0, p_fout, tmp0, lsl #5
- add p_fout2, p_fout0, mstride, lsl #4 /* get the address of F[mstride*2] */
- add p_fout1, p_fout0, mstride, lsl #3 /* get the address of F[mstride] */
- add p_fout3, p_fout2, mstride, lsl #3 /* get the address of F[mstride*3] */
+ /* ---------------last stage--------------- */
+.L_ne10_butterfly_last_stages:
+ mov p_in1, p_fin
+ mov p_out1, p_out_ls
mov p_tw1, p_twiddles
- add p_tw2, p_tw1, mstride, lsl #3 /* get the address of tw2 */
- add p_tw3, p_tw1, mstride, lsl #4 /* get the address of tw3 */
+ mov mstep, nstep
+ vld2.32 {d_fin0_r, d_fin0_i}, [p_in1:64], nstep
+ vld2.32 {d_fin1_r, d_fin1_i}, [p_in1:64], nstep
+ vld2.32 {d_fin2_r, d_fin2_i}, [p_in1:64], nstep
+ vld2.32 {d_fin3_r, d_fin3_i}, [p_in1:64], nstep
+ vld2.32 {d_tw0_r, d_tw0_i}, [p_tw1:64], mstep
+ vld2.32 {d_tw1_r, d_tw1_i}, [p_tw1:64], mstep
+ vld2.32 {d_tw2_r, d_tw2_i}, [p_tw1:64]
/* loop of mstride */
mov count_m, mstride
+.L_ne10_butterfly_last_stages_mstride:
+ BUTTERFLY4X2_WITH_TWIDDLES "FALSE"
-.L_ne10_radix4_butterfly_backward_stages_mstride:
- RADIX4_BUTTERFLY_INVERSE_P4
- subs count_m, count_m, #4
- bgt .L_ne10_radix4_butterfly_backward_stages_mstride
-
- /* end of mstride_loop */
-
- subs count_f, count_f, #1
- bgt .L_ne10_radix4_butterfly_backward_stages_fstride
-
- add p_twiddles, p_twiddles, mstride, lsl #4
- add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */
- lsl mstride, mstride, #2
-
- subs stage_count, stage_count, #1
- bgt .L_ne10_radix4_butterfly_backward_stages
-
+ subs count_m, count_m, #2
+ bgt .L_ne10_butterfly_last_stages_mstride
+ /* end of mstride loop */
+ /* ---------------end of last stage--------------- */
-.L_ne10_radix4_inverse_butterfly_backward_end:
+.L_ne10_butterfly_end:
/*Return From Function*/
vpop {q4-q7}
pop {r4-r12,pc}
-
+ /* end of ne10_mixed_radix_fft_forward_float32_neon */
/**
* @details
- * This function implements the 4 butterfly
+ * This function implements the radix4/8 backward FFT
*
* @param[in/out] *Fout points to input/output pointers
* @param[in] *factors factors pointer:
*/
.align 4
- .global ne10_mixed_radix_butterfly_length_odd_power2_float32_neon
+ .global ne10_mixed_radix_fft_backward_float32_neon
.thumb
.thumb_func
-ne10_mixed_radix_butterfly_length_odd_power2_float32_neon:
+ne10_mixed_radix_fft_backward_float32_neon:
push {r4-r12,lr}
vpush {q4-q7}
ldr radix, [p_factors] /* get factors[2*stage_count]--- the first radix */
ldr mstride, [p_factors, #-4] /* get factors[2*stage_count-1]--- mstride */
- mov p_fin, p_fout
- mov p_fout0, p_fout
- mov count, fstride
-
- /* the first stage */
-.L_ne10_butterfly_length_odd_power2_first_stage:
- vld4.32 {d_in0_r01, d_in0_i01, d_in1_r01, d_in1_i01}, [p_fin]!
- vld4.32 {d_in0_r23, d_in0_i23, d_in1_r23, d_in1_i23}, [p_fin]!
- vsub.f32 q_out1_r0123, q_in0_r0123, q_in1_r0123
- vsub.f32 q_out1_i0123, q_in0_i0123, q_in1_i0123
- vadd.f32 q_out0_r0123, q_in0_r0123, q_in1_r0123
- vadd.f32 q_out0_i0123, q_in0_i0123, q_in1_i0123
- subs count, count, #4
- vst4.32 {d_out0_r01, d_out0_i01, d_out1_r01, d_out1_i01}, [p_fout0]!
- vst4.32 {d_out0_r23, d_out0_i23, d_out1_r23, d_out1_i23}, [p_fout0]!
-
- bgt .L_ne10_butterfly_length_odd_power2_first_stage
-
- /* the second stages */
- subs stage_count, stage_count, #1
- lsr fstride, fstride, #2
-
- /* loop of fstride */
- mov count_f, fstride
- mov p_tw1, p_twiddles
- mov p_fout0, p_fout
- add p_fout1, p_fout, mstride, lsl #5
- mov p_fout2, p_fout
- mov p_fout3, p_fout1
- mov tmp0, #96
- vld2.32 {d_tw1_r01, d_tw2_r01, d_tw1_i01, d_tw2_i01}, [p_tw1]!
+ /* save the output buffer for the last stage */
+ mov p_out_ls, p_fout
-.L_ne10_butterfly_length_odd_power2_second_stage:
- RADIX24_BUTTERFLY_P4
+ /* ---------------the first stage--------------- */
+ /* judge the radix is 2 or 4 */
+ cmp radix, #2
+ beq .L_ne10_radix8_butterfly_inverse_first_stage
- subs count_f, count_f, #2
- bgt .L_ne10_butterfly_length_odd_power2_second_stage
+ /* ---------------first stage: radix 4 */
+ mov count, fstride
+ mov p_fin0, p_fin
+ mov p_tmp, p_fout
+ add p_fin2, p_fin0, fstride, lsl #4 /* get the address of F[fstride*2] */
+ add p_fin1, p_fin0, fstride, lsl #3 /* get the address of F[fstride] */
+ add p_fin3, p_fin2, fstride, lsl #3 /* get the address of F[fstride*3] */
+ vld2.32 {q_in0_01}, [p_fin0:64]!
+ vld2.32 {q_in2_01}, [p_fin2:64]!
+ vld2.32 {q_in1_01}, [p_fin1:64]!
+ vld2.32 {q_in3_01}, [p_fin3:64]!
+
+.L_ne10_radix4_butterfly_inverse_first_stage_fstride:
+ BUTTERFLY4X2_WITHOUT_TWIDDLES "TRUE"
- add p_twiddles, p_twiddles, mstride, lsl #4
- add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */
- lsl mstride, mstride, #2
+ subs count, count, #2
+ bgt .L_ne10_radix4_butterfly_inverse_first_stage_fstride
- /* other stages */
- subs stage_count, stage_count, #1
+ /* swap input/output buffer */
+ mov tmp0, p_fout
+ mov p_fout, p_fin
+ mov p_fin, tmp0
- /* loop of other stages */
-.L_ne10_butterfly_length_odd_power2_other_stages:
+ /* (stage_count-2): reduce the counter for the last stage */
+ sub stage_count, stage_count, #2
+ lsl nstep, fstride, #3
lsr fstride, fstride, #2
- /* loop of fstride */
- mov count_f, fstride
-.L_ne10_butterfly_length_odd_power2_other_stages_fstride:
- sub tmp0, fstride, count_f
- mul tmp0, tmp0, mstride
- add p_fout0, p_fout, tmp0, lsl #5
- add p_fout2, p_fout0, mstride, lsl #4 /* get the address of F[mstride*2] */
- add p_fout1, p_fout0, mstride, lsl #3 /* get the address of F[mstride] */
- add p_fout3, p_fout2, mstride, lsl #3 /* get the address of F[mstride*3] */
- mov p_tw1, p_twiddles
- add p_tw2, p_tw1, mstride, lsl #3 /* get the address of tw2 */
- add p_tw3, p_tw1, mstride, lsl #4 /* get the address of tw3 */
+ b .L_ne10_butterfly_inverse_other_stages
+ /* ---------------end of first stage: radix 4 */
- /* loop of mstride */
- mov count_m, mstride
-.L_ne10_butterfly_length_odd_power2_other_stages_mstride:
- RADIX4_BUTTERFLY_P4
- subs count_m, count_m, #4
- bgt .L_ne10_butterfly_length_odd_power2_other_stages_mstride
- /* end of mstride loop */
+ /* ---------------first stage: radix 8 */
+.L_ne10_radix8_butterfly_inverse_first_stage:
+ lsr fstride1, fstride, #2
+ mov p_in1, p_fin
+ mov p_out1, p_fout
+ lsl fstep, fstride, #1
- subs count_f, count_f, #1
- bgt .L_ne10_butterfly_length_odd_power2_other_stages_fstride
+.L_ne10_radix8_butterfly_inverse_first_stage_fstride1:
+ BUTTERFLY8X2_WITHOUT_TWIDDLES "TRUE"
- add p_twiddles, p_twiddles, mstride, lsl #4
- add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */
- lsl mstride, mstride, #2
+ subs fstride1, fstride1, #2
+ bgt .L_ne10_radix8_butterfly_inverse_first_stage_fstride1
- subs stage_count, stage_count, #1
- bgt .L_ne10_butterfly_length_odd_power2_other_stages
-
-.L_ne10_butterfly_length_odd_power2_end:
- /*Return From Function*/
- vpop {q4-q7}
- pop {r4-r12,pc}
-
- /* end of ne10_butterfly_length_odd_power2_float32_neon */
-
-
- /**
- * @details
- * This function implements the 4 butterfly
- *
- * @param[in/out] *Fout points to input/output pointers
- * @param[in] *factors factors pointer:
- * 0: stage number
- * 1: stride for the first stage
- * others: factor out powers of 4, powers of 2
- * @param[in] *twiddles twiddles coeffs of FFT
- */
-
- .align 4
- .global ne10_mixed_radix_butterfly_length_even_power2_float32_neon
- .thumb
- .thumb_func
-
-ne10_mixed_radix_butterfly_length_even_power2_float32_neon:
- push {r4-r12,lr}
- vpush {q4-q7}
+ lsl nstep, fstride, #2
+ sub stage_count, stage_count, #2
+ lsr fstride, fstride, #4
+ lsl mstride, mstride, #2
+ add p_twiddles, p_twiddles, #48 /* get the address of twiddles += 6 */
- ldr stage_count, [p_factors] /* get factors[0]---stage_count */
- ldr fstride, [p_factors, #4] /* get factors[1]---fstride */
- add p_factors, p_factors, stage_count, lsl #3 /* get the address of factors[2*stage_count] */
- ldr radix, [p_factors] /* get factors[2*stage_count]--- the first radix */
- ldr mstride, [p_factors, #-4] /* get factors[2*stage_count-1]--- mstride */
+ /* swap input/output buffer */
+ mov tmp0, p_fout
+ mov p_fout, p_fin
+ mov p_fin, tmp0
- mov p_fin, p_fout
- mov p_fout0, p_fout
- mov count, fstride
+ /* if the last stage */
+ cmp stage_count, #1
+ beq .L_ne10_butterfly_inverse_last_stages
- /* the first stage */
-.L_ne10_butterfly_length_even_power2_first_stage:
- vld1.32 {d_in0_0, d_in1_0, d_in2_0, d_in3_0}, [p_fin]!
- vld1.32 {d_in0_1, d_in1_1, d_in2_1, d_in3_1}, [p_fin]!
- vswp d_in1_0, d_in0_1
- vswp d_in3_0, d_in2_1
- vsub.f32 q_s2_01, q_in0_01, q_in2_01
- vadd.f32 q_out0_01, q_in0_01, q_in2_01
- vadd.f32 q_s0_01, q_in1_01, q_in3_01
- vsub.f32 q_s1_01, q_in1_01, q_in3_01
- vsub.f32 q_out2_01, q_out0_01, q_s0_01
- vrev64.32 q_s1_01, q_s1_01
- vadd.f32 q_out0_01, q_out0_01, q_s0_01
- vadd.f32 q_out1_01, q_s2_01, q_s1_01
- vsub.f32 q_out3_01, q_s2_01, q_s1_01
- vrev64.32 q_tmp, q_out1_01
- vrev64.32 q_tmp2, q_out3_01
- vtrn.32 q_out3_01, q_tmp
- vtrn.32 q_out1_01, q_tmp2
- vswp d_out1_0, d_out0_1
- vswp d_out3_0, d_out2_1
- subs count, count, #2
- vst1.32 {d_out0_0, d_out1_0, d_out2_0, d_out3_0}, [p_fout0]!
- vst1.32 {d_out0_1, d_out1_1, d_out2_1, d_out3_1}, [p_fout0]!
- bgt .L_ne10_butterfly_length_even_power2_first_stage
+ /* (stage_count-1): reduce the counter for the last stage */
+ sub stage_count, stage_count, #1
+ /*--------------- end of first stage: radix 8 */
+ /* ---------------end of first stage--------------- */
- /* other stages */
- subs stage_count, stage_count, #1
+ /* ---------------other stages except last stage--------------- */
/* loop of other stages */
-.L_ne10_butterfly_length_even_power2_other_stages:
- lsr fstride, fstride, #2
+.L_ne10_butterfly_inverse_other_stages:
+ lsl mstep, mstride, #3
+ mov p_in1, p_fin
+ vld2.32 {d_fin0_r, d_fin0_i}, [p_in1:64], nstep
+ vld2.32 {d_fin1_r, d_fin1_i}, [p_in1:64], nstep
+ vld2.32 {d_fin2_r, d_fin2_i}, [p_in1:64], nstep
+ vld2.32 {d_fin3_r, d_fin3_i}, [p_in1:64], nstep
/* loop of fstride */
mov count_f, fstride
-.L_ne10_butterfly_length_even_power2_other_stages_fstride:
+.L_ne10_butterfly_inverse_other_stages_fstride:
+ mov p_tw1, p_twiddles
sub tmp0, fstride, count_f
mul tmp0, tmp0, mstride
- add p_fout0, p_fout, tmp0, lsl #5
- add p_fout2, p_fout0, mstride, lsl #4 /* get the address of F[mstride*2] */
- add p_fout1, p_fout0, mstride, lsl #3 /* get the address of F[mstride] */
- add p_fout3, p_fout2, mstride, lsl #3 /* get the address of F[mstride*3] */
- mov p_tw1, p_twiddles
- add p_tw2, p_tw1, mstride, lsl #3 /* get the address of tw2 */
- add p_tw3, p_tw1, mstride, lsl #4 /* get the address of tw3 */
+ add p_out1, p_fout, tmp0, lsl #5
+ vld2.32 {d_tw0_r, d_tw0_i}, [p_tw1:64], mstep
+ vld2.32 {d_tw1_r, d_tw1_i}, [p_tw1:64], mstep
+ vld2.32 {d_tw2_r, d_tw2_i}, [p_tw1:64]
/* loop of mstride */
mov count_m, mstride
-.L_ne10_butterfly_length_even_power2_other_stages_mstride:
- RADIX4_BUTTERFLY_P4
+.L_ne10_butterfly_inverse_other_stages_mstride:
+ BUTTERFLY4X2_WITH_TWIDDLES "TRUE"
- subs count_m, count_m, #4
- bgt .L_ne10_butterfly_length_even_power2_other_stages_mstride
+ subs count_m, count_m, #2
+ bgt .L_ne10_butterfly_inverse_other_stages_mstride
/* end of mstride loop */
subs count_f, count_f, #1
- bgt .L_ne10_butterfly_length_even_power2_other_stages_fstride
+ bgt .L_ne10_butterfly_inverse_other_stages_fstride
add p_twiddles, p_twiddles, mstride, lsl #4
add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */
lsl mstride, mstride, #2
-
- subs stage_count, stage_count, #1
- bgt .L_ne10_butterfly_length_even_power2_other_stages
-
-.L_ne10_butterfly_length_even_power2_end:
- /*Return From Function*/
- vpop {q4-q7}
- pop {r4-r12,pc}
-
- /* end of ne10_butterfly_length_even_power2_float32_neon */
-
-
- /**
- * @details
- * This function implements the 4 butterfly
- *
- * @param[in/out] *Fout points to input/output pointers
- * @param[in] *factors factors pointer:
- * 0: stage number
- * 1: stride for the first stage
- * others: factor out powers of 4, powers of 2
- * @param[in] *twiddles twiddles coeffs of FFT
- */
-
- .align 4
- .global ne10_mixed_radix_butterfly_inverse_length_odd_power2_float32_neon
- .thumb
- .thumb_func
-
-ne10_mixed_radix_butterfly_inverse_length_odd_power2_float32_neon:
- push {r4-r12,lr}
- vpush {q4-q7}
-
- ldr stage_count, [p_factors] /* get factors[0]---stage_count */
- ldr fstride, [p_factors, #4] /* get factors[1]---fstride */
- add p_factors, p_factors, stage_count, lsl #3 /* get the address of factors[2*stage_count] */
- ldr radix, [p_factors] /* get factors[2*stage_count]--- the first radix */
- ldr mstride, [p_factors, #-4] /* get factors[2*stage_count-1]--- mstride */
-
- mov p_fin, p_fout
- mov p_fout0, p_fout
- mov count, fstride
-
- /* the first stage */
-.L_ne10_butterfly_inverse_length_odd_power2_first_stage:
- vld4.32 {d_in0_r01, d_in0_i01, d_in1_r01, d_in1_i01}, [p_fin]!
- vld4.32 {d_in0_r23, d_in0_i23, d_in1_r23, d_in1_i23}, [p_fin]!
- vsub.f32 q_out1_r0123, q_in0_r0123, q_in1_r0123
- vsub.f32 q_out1_i0123, q_in0_i0123, q_in1_i0123
- vadd.f32 q_out0_r0123, q_in0_r0123, q_in1_r0123
- vadd.f32 q_out0_i0123, q_in0_i0123, q_in1_i0123
- subs count, count, #4
- vst4.32 {d_out0_r01, d_out0_i01, d_out1_r01, d_out1_i01}, [p_fout0]!
- vst4.32 {d_out0_r23, d_out0_i23, d_out1_r23, d_out1_i23}, [p_fout0]!
-
- bgt .L_ne10_butterfly_inverse_length_odd_power2_first_stage
-
- /* the second stages */
- subs stage_count, stage_count, #1
lsr fstride, fstride, #2
- /* loop of fstride */
- mov count_f, fstride
- mov p_tw1, p_twiddles
- mov p_fout0, p_fout
- add p_fout1, p_fout, mstride, lsl #5
- mov p_fout2, p_fout
- mov p_fout3, p_fout1
- mov tmp0, #96
- vld2.32 {d_tw1_r01, d_tw2_r01, d_tw1_i01, d_tw2_i01}, [p_tw1]!
-
-.L_ne10_butterfly_inverse_length_odd_power2_second_stage:
- RADIX24_BUTTERFLY_INVERSE_P4
-
- subs count_f, count_f, #2
- bgt .L_ne10_butterfly_inverse_length_odd_power2_second_stage
-
- add p_twiddles, p_twiddles, mstride, lsl #4
- add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */
- lsl mstride, mstride, #2
+ /* swap input/output buffer */
+ mov tmp0, p_fout
+ mov p_fout, p_fin
+ mov p_fin, tmp0
- /* other stages */
subs stage_count, stage_count, #1
+ bgt .L_ne10_butterfly_inverse_other_stages
+ /* ---------------end other stages except last stage--------------- */
- /* loop of other stages */
-.L_ne10_butterfly_inverse_length_odd_power2_other_stages:
- lsr fstride, fstride, #2
- /* loop of fstride */
- mov count_f, fstride
-.L_ne10_butterfly_inverse_length_odd_power2_other_stages_fstride:
- sub tmp0, fstride, count_f
- mul tmp0, tmp0, mstride
- add p_fout0, p_fout, tmp0, lsl #5
- add p_fout2, p_fout0, mstride, lsl #4 /* get the address of F[mstride*2] */
- add p_fout1, p_fout0, mstride, lsl #3 /* get the address of F[mstride] */
- add p_fout3, p_fout2, mstride, lsl #3 /* get the address of F[mstride*3] */
+ /* ---------------last stage--------------- */
+.L_ne10_butterfly_inverse_last_stages:
+ mov p_in1, p_fin
+ mov p_out1, p_out_ls
mov p_tw1, p_twiddles
- add p_tw2, p_tw1, mstride, lsl #3 /* get the address of tw2 */
- add p_tw3, p_tw1, mstride, lsl #4 /* get the address of tw3 */
+ mov mstep, nstep
+ vld2.32 {d_fin0_r, d_fin0_i}, [p_in1:64], nstep
+ vld2.32 {d_fin1_r, d_fin1_i}, [p_in1:64], nstep
+ vld2.32 {d_fin2_r, d_fin2_i}, [p_in1:64], nstep
+ vld2.32 {d_fin3_r, d_fin3_i}, [p_in1:64], nstep
+ vld2.32 {d_tw0_r, d_tw0_i}, [p_tw1:64], mstep
+ vld2.32 {d_tw1_r, d_tw1_i}, [p_tw1:64], mstep
+ vld2.32 {d_tw2_r, d_tw2_i}, [p_tw1:64]
/* loop of mstride */
mov count_m, mstride
+.L_ne10_butterfly_inverse_last_stages_mstride:
+ BUTTERFLY4X2_WITH_TWIDDLES "TRUE"
-.L_ne10_butterfly_inverse_length_odd_power2_other_stages_mstride:
- RADIX4_BUTTERFLY_INVERSE_P4
-
- subs count_m, count_m, #4
- bgt .L_ne10_butterfly_inverse_length_odd_power2_other_stages_mstride
+ subs count_m, count_m, #2
+ bgt .L_ne10_butterfly_inverse_last_stages_mstride
/* end of mstride loop */
+ /* ---------------end of last stage--------------- */
- subs count_f, count_f, #1
- bgt .L_ne10_butterfly_inverse_length_odd_power2_other_stages_fstride
-
- add p_twiddles, p_twiddles, mstride, lsl #4
- add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */
- lsl mstride, mstride, #2
-
- subs stage_count, stage_count, #1
- bgt .L_ne10_butterfly_inverse_length_odd_power2_other_stages
-
-.L_ne10_butterfly_inverse_length_odd_power2_end:
+.L_ne10_butterfly_inverse_end:
/*Return From Function*/
vpop {q4-q7}
pop {r4-r12,pc}
- /* end of ne10_butterfly_inverse_length_odd_power2_float32_neon */
-
-
- /**
- * @details
- * This function implements the 4 butterfly
- *
- * @param[in/out] *Fout points to input/output pointers
- * @param[in] *factors factors pointer:
- * 0: stage number
- * 1: stride for the first stage
- * others: factor out powers of 4, powers of 2
- * @param[in] *twiddles twiddles coeffs of FFT
- */
-
- .align 4
- .global ne10_mixed_radix_butterfly_inverse_length_even_power2_float32_neon
- .thumb
- .thumb_func
-
-ne10_mixed_radix_butterfly_inverse_length_even_power2_float32_neon:
- push {r4-r12,lr}
- vpush {q4-q7}
+ /* end of ne10_mixed_radix_fft_forward_float32_neon */
- ldr stage_count, [p_factors] /* get factors[0]---stage_count */
- ldr fstride, [p_factors, #4] /* get factors[1]---fstride */
- add p_factors, p_factors, stage_count, lsl #3 /* get the address of factors[2*stage_count] */
- ldr radix, [p_factors] /* get factors[2*stage_count]--- the first radix */
- ldr mstride, [p_factors, #-4] /* get factors[2*stage_count-1]--- mstride */
-
- mov p_fin, p_fout
- mov p_fout0, p_fout
- mov count, fstride
- /* the first stage */
-.L_ne10_butterfly_inverse_length_even_power2_first_stage:
- vld1.32 {d_in0_0, d_in1_0, d_in2_0, d_in3_0}, [p_fin]!
- vld1.32 {d_in0_1, d_in1_1, d_in2_1, d_in3_1}, [p_fin]!
- vswp d_in1_0, d_in0_1
- vswp d_in3_0, d_in2_1
- vsub.f32 q_s2_01, q_in0_01, q_in2_01
- vadd.f32 q_out0_01, q_in0_01, q_in2_01
- vadd.f32 q_s0_01, q_in1_01, q_in3_01
- vsub.f32 q_s1_01, q_in1_01, q_in3_01
- vsub.f32 q_out2_01, q_out0_01, q_s0_01
- vrev64.32 q_s1_01, q_s1_01
- vadd.f32 q_out0_01, q_out0_01, q_s0_01
- vsub.f32 q_out1_01, q_s2_01, q_s1_01
- vadd.f32 q_out3_01, q_s2_01, q_s1_01
- vrev64.32 q_tmp, q_out1_01
- vrev64.32 q_tmp2, q_out3_01
- vtrn.32 q_out3_01, q_tmp
- vtrn.32 q_out1_01, q_tmp2
- vswp d_out1_0, d_out0_1
- vswp d_out3_0, d_out2_1
- subs count, count, #2
- vst1.32 {d_out0_0, d_out1_0, d_out2_0, d_out3_0}, [p_fout0]!
- vst1.32 {d_out0_1, d_out1_1, d_out2_1, d_out3_1}, [p_fout0]!
- bgt .L_ne10_butterfly_inverse_length_even_power2_first_stage
-
- /* other stages */
- subs stage_count, stage_count, #1
-
- /* loop of other stages */
-.L_ne10_butterfly_inverse_length_even_power2_other_stages:
- lsr fstride, fstride, #2
-
- /* loop of fstride */
- mov count_f, fstride
-.L_ne10_butterfly_inverse_length_even_power2_other_stages_fstride:
- sub tmp0, fstride, count_f
- mul tmp0, tmp0, mstride
- add p_fout0, p_fout, tmp0, lsl #5
- add p_fout2, p_fout0, mstride, lsl #4 /* get the address of F[mstride*2] */
- add p_fout1, p_fout0, mstride, lsl #3 /* get the address of F[mstride] */
- add p_fout3, p_fout2, mstride, lsl #3 /* get the address of F[mstride*3] */
- mov p_tw1, p_twiddles
- add p_tw2, p_tw1, mstride, lsl #3 /* get the address of tw2 */
- add p_tw3, p_tw1, mstride, lsl #4 /* get the address of tw3 */
-
- /* loop of mstride */
- mov count_m, mstride
-
-.L_ne10_butterfly_inverse_length_even_power2_other_stages_mstride:
- RADIX4_BUTTERFLY_INVERSE_P4
-
- subs count_m, count_m, #4
- bgt .L_ne10_butterfly_inverse_length_even_power2_other_stages_mstride
- /* end of mstride loop */
-
- subs count_f, count_f, #1
- bgt .L_ne10_butterfly_inverse_length_even_power2_other_stages_fstride
-
- add p_twiddles, p_twiddles, mstride, lsl #4
- add p_twiddles, p_twiddles, mstride, lsl #3 /* get the address of twiddles += mstride*3 */
- lsl mstride, mstride, #2
-
- subs stage_count, stage_count, #1
- bgt .L_ne10_butterfly_inverse_length_even_power2_other_stages
-
-.L_ne10_butterfly_inverse_length_even_power2_end:
- /*Return From Function*/
- vpop {q4-q7}
- pop {r4-r12,pc}
- /* end of ne10_butterfly_inverse_length_even_power2_float32_neon */
/* end of the file */
{
if (NE10_OK == is_NEON_available)
{
- ne10_radix4_butterfly_float = ne10_radix4_butterfly_float_neon;
- ne10_radix4_butterfly_inverse_float = ne10_radix4_butterfly_inverse_float_neon;
- ne10_rfft_float = ne10_rfft_float_neon;
-
ne10_fft_c2c_1d_float32 = ne10_fft_c2c_1d_float32_neon;
ne10_fft_c2c_1d_int32_unscaled = ne10_fft_c2c_1d_int32_unscaled_neon;
ne10_fft_c2c_1d_int32_scaled = ne10_fft_c2c_1d_int32_scaled_neon;
}
else
{
- ne10_radix4_butterfly_float = ne10_radix4_butterfly_float_c;
- ne10_radix4_butterfly_inverse_float = ne10_radix4_butterfly_inverse_float_c;
- ne10_rfft_float = ne10_rfft_float_c;
-
ne10_fft_c2c_1d_float32 = ne10_fft_c2c_1d_float32_c;
ne10_fft_c2c_1d_int32_unscaled = ne10_fft_c2c_1d_int32_unscaled_c;
ne10_fft_c2c_1d_int32_scaled = ne10_fft_c2c_1d_int32_scaled_c;
}
// These are actual definitions of our function pointers that are declared in inc/NE10_dsp.h
-void (*ne10_radix4_butterfly_float) (ne10_float32_t *pDst,
- ne10_float32_t *pSrc,
- ne10_uint16_t N,
- ne10_float32_t *pCoef);
-
-void (*ne10_radix4_butterfly_inverse_float) (ne10_float32_t *pDst,
- ne10_float32_t *pSrc,
- ne10_uint16_t N,
- ne10_float32_t *pCoef,
- ne10_float32_t onebyN);
-
-void (*ne10_rfft_float) (const ne10_rfft_instance_f32_t * S,
- ne10_float32_t * pSrc,
- ne10_float32_t * pDst,
- ne10_float32_t * pTemp);
-
void (*ne10_fft_c2c_1d_float32) (ne10_fft_cpx_float32_t *fout,
ne10_fft_cpx_float32_t *fin,
ne10_fft_cpx_float32_t *twiddles,
+++ /dev/null
-/*
- * Copyright 2012-14 ARM Limited
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of ARM Limited nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * NE10 Library : dsp/NE10_rfft.c
- */
-
-#include "NE10_types.h"
-#include "NE10_dsp.h"
-
-/**
- * @ingroup groupDSPs
- */
-
-/**
- * @defgroup RFFT_RIFFT Real FFT
- *
- * \par
- * Complex FFT/IFFT typically assumes complex input and output. However many applications use real valued data in time domain.
- * Real FFT/IFFT efficiently process real valued sequences with the advantage of requirement of low memory and with less complexity.
- *
- * \par
- * This set of functions implements Real Fast Fourier Transforms(RFFT) and Real Inverse Fast Fourier Transform(RIFFT)
- * for floating-point data types.
- *
- *
- * \par Algorithm:
- *
- * <b>Real Fast Fourier Transform:</b>
- * \par
- * Real FFT of N-point is calculated using CFFT of N/2-point and Split RFFT process as shown below figure.
- * \par
- * \image html RFFT.gif "Real Fast Fourier Transform"
- * \par
- * The RFFT functions operate on blocks of input and output data and each call to the function processes
- * <code>fftLenR</code> samples through the transform. <code>pSrc</code> points to input array containing <code>fftLenR</code> values.
- * <code>pDst</code> points to output array containing <code>2*fftLenR</code> values. \n
- * Input for real FFT is in the order of
- * <pre>{real[0], real[1], real[2], real[3], ..}</pre>
- * Output for real FFT is complex and are in the order of
- * <pre>{real(0), imag(0), real(1), imag(1), ...}</pre>
- *
- * <b>Real Inverse Fast Fourier Transform:</b>
- * \par
- * Real IFFT of N-point is calculated using Split RIFFT process and CFFT of N/2-point as shown below figure.
- * \par
- * \image html RIFFT.gif "Real Inverse Fast Fourier Transform"
- * \par
- * The RIFFT functions operate on blocks of input and output data and each call to the function processes
- * <code>2*fftLenR</code> samples through the transform. <code>pSrc</code> points to input array containing <code>2*fftLenR</code> values.
- * <code>pDst</code> points to output array containing <code>fftLenR</code> values. \n
- * Input for real IFFT is complex and are in the order of
- * <pre>{real(0), imag(0), real(1), imag(1), ...}</pre>
- * Output for real IFFT is real and in the order of
- * <pre>{real[0], real[1], real[2], real[3], ..}</pre>
- *
- * \par Lengths supported by the transform:
- * \par
- * Real FFT/IFFT supports the lengths [128, 512, 2048], as it internally uses CFFT/CIFFT.
- *
- * \par Instance Structure
- * A separate instance structure must be defined for each Instance but the twiddle factors can be reused.
- * There are separate instance structure declarations for each of the 3 supported data types.
- *
- * \par Initialization Functions
- * There is also an associated initialization function for each data type.
- * The initialization function performs the following operations:
- * - Sets the values of the internal structure fields.
- * - Initializes twiddle factor tables.
- * - Initializes CFFT data structure fields.
- * \par
- * Use of the initialization function is optional.
- * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
- * To place an instance structure into a const data section, the instance structure must be manually initialized.
- * Manually initialize the instance structure as follows:
- * <pre>
- *ne10_rfft_instance_f32_t S = {fft_len_real, fft_len_by2, ifft_flag_r, bit_reverse_flag_r, twid_coef_r_modifier, p_twiddle_A_real, p_twiddle_B_real, p_cfft};
- * </pre>
- * where <code>fft_len_real</code> length of RFFT/RIFFT; <code>fft_len_by2</code> length of CFFT/CIFFT.
- * <code>ifft_flag_r</code> Flag for selection of RFFT or RIFFT(Set ifftFlagR to calculate RIFFT otherwise calculates RFFT);
- * <code>bit_reverse_flag_r</code> Flag for selection of output order(Set bitReverseFlagR to output in normal order otherwise output in bit reversed order);
- * <code>twid_coef_r_modifier</code> modifier for twiddle factor table which supports 128, 512, 2048 RFFT lengths with same table;
- * <code>p_twiddle_A_real</code>points to A array of twiddle coefficients; <code>p_twiddle_B_real</code>points to B array of twiddle coefficients;
- * <code>p_cfft</code> points to the CFFT Instance structure. The CFFT structure also needs to be initialized, refer to arm_cfft_radix4_f32() for details regarding
- * static initialization of cfft structure.
- *
- */
-
-/**
- * @brief Core Real FFT process
- * @param[in] *pSrc points to the Input buffer
- * @param[in] N length of Real FFT
- * @param[in] *pATable points to the twiddle Coef A buffer
- * @param[in] *pBTable points to the twiddle Coef B buffer
- * @param[out] *pDst points to the Output buffer
- * @return none.
- * The function implements a Real FFT
- */
-
-static void ne10_split_rfft_float_c(
- ne10_float32_t * pSrc,
- ne10_uint32_t N,
- ne10_float32_t * pReTable,
- ne10_float32_t * pImTable,
- ne10_float32_t * pDst)
-{
- ne10_uint32_t k; /* Loop Counter */
- ne10_float32_t uRe,vRe,uIm,vIm; /* Temporary variables for output */
- ne10_float32_t reTwd,imTwd,reTmp,imTmp;
- ne10_float32_t *pCoefRe,*pCoefIm; /* Temporary pointers for twiddle factors */
- ne10_uint32_t NBy2 = N>>1;
-
- pCoefRe = pReTable;
- pCoefIm = pImTable;
-
- /*First Result*/
- pDst[0] = pSrc[0] + pSrc[1];
- pDst[1] = 0;
- /*N/2 th Result*/
- pDst[N] = pSrc[0] - pSrc[1];
- pDst[N+1] = 0;
-
- /*for k=1 to N/4-1 and k=N/4+1 to K=N/2-1*/
- for(k=1;k<(N>>2);k++)
- {
- /*uRe = (a[k]+a[N/2-k])/2*/
- uRe = (pSrc[2*k]+pSrc[2*(NBy2-k)])*0.5;
- /*uIm = (b[k]-b[N/2-k])/2*/
- uIm = (pSrc[2*k+1]-pSrc[2*(NBy2-k)+1])*0.5;
- /*VRe = (b[k]+b[N/2-k])/2*/
- vRe = (pSrc[2*k+1]+pSrc[2*(NBy2-k)+1])*0.5;
- /*Vim = -(a[k]-a[N/2-k])/2*/
- vIm = (pSrc[2*(NBy2-k)]-pSrc[2*k])*0.5;
- reTwd = pCoefRe[k];
- imTwd = pCoefIm[k];
- reTmp = vRe*reTwd + vIm*imTwd;
- imTmp = vIm*reTwd - vRe*imTwd;
- pDst[2*k] = uRe + reTmp;
- pDst[2*k+1] = uIm + imTmp;
- pDst[2*(NBy2-k)] = uRe-reTmp;
- pDst[2*(NBy2-k)+1] = imTmp-uIm;
-
- /*Out Put from K=N/2+1 till k=N-1*/
- /*y[N-k] = conjugate(y[k] k=0 to k<N/2)*/
- pDst[2*(N-k)] = uRe + reTmp;
- pDst[2*(N-k)+1] = -(uIm + imTmp);
- pDst[2*(NBy2+k)] = uRe-reTmp;
- pDst[2*(NBy2+k)+1] = uIm-imTmp;
- }
- /*y[N/4] = a[N/4]-jb[N/4]; y[3*N/4] = a[N/4] + jb[N/4]*/
- pDst[NBy2] = pSrc[NBy2];
- pDst[NBy2+1] = -pSrc[NBy2+1];
- pDst[N+NBy2] = pSrc[NBy2];
- pDst[N+NBy2+1] = pSrc[NBy2+1];
-
-}
-
-
-/**
- * @brief Core Real IFFT process
- * @param[in] *pSrc points to the Input buffer
- * @param[in] N length of Real FFT
- * @param[in] *pATable points to the twiddle Coef A buffer
- * @param[in] *pBTable points to the twiddle Coef B buffer
- * @param[out] *pDst points to the Output buffer
- * @return none.
- * The function implements a Real FFT
- */
-
-
-static void ne10_split_rifft_float_c(
- ne10_float32_t * pSrc,
- ne10_uint32_t N,
- ne10_float32_t * pReTable,
- ne10_float32_t * pImTable,
- ne10_float32_t * pDst)
-{
- ne10_uint32_t k; /* Loop Counter */
- ne10_float32_t uRe,vRe,uIm,vIm; /* Temporary variables for output */
- ne10_float32_t reTwd,imTwd,reTmp,imTmp;
- ne10_float32_t *pCoefRe,*pCoefIm; /* Temporary pointers for twiddle factors */
- ne10_uint32_t NBy2 = N>>1;
-
- pCoefRe = pReTable;
- pCoefIm = pImTable;
-
- /*First Result*/
- pDst[0] = (pSrc[0] + pSrc[N])*0.5;
- pDst[1] = (pSrc[0] - pSrc[N])*0.5;
-
- /*for k=1 to N/4-1 and k=N/4+1 to K=N/2-1*/
- for(k=1;k< (N>>2);k++)
- {
- /*uRe = (a[k]+a[N/2-k])/2*/
- uRe = (pSrc[2*k] + pSrc[2*(NBy2-k)])*0.5;
- /*uIm = (b[k]-b[N/2-k])/2*/
- uIm = (pSrc[2*k+1] - pSrc[2*(NBy2-k)+1])*0.5;
-
- reTmp = (pSrc[2*k] - pSrc[2*(NBy2-k)])*0.5;
- imTmp = (pSrc[2*k+1] + pSrc[2*(NBy2-k)+1])*0.5;
-
- reTwd = pCoefRe[k];
- imTwd = pCoefIm[k];
-
- /*VRe = (b[k]+b[N/2-k])/2*/
- vRe = reTmp*reTwd - imTmp*imTwd;
- /*Vim = -(a[k]-a[N/2-k])/2*/
- vIm = imTmp*reTwd + reTmp*imTwd;
- pDst[2*k] = (uRe-vIm);
- pDst[2*k+1] = (uIm+vRe);
-
- pDst[2*(NBy2-k)] = (uRe+vIm);
- pDst[2*(NBy2-k)+1] = (vRe-uIm);
- }
- /*y[N/4] = a[N/4]-jb[N/4]*/
- pDst[NBy2] = pSrc[NBy2];
- pDst[NBy2+1] = -pSrc[NBy2+1];
-
-}
-
-/**
- * @addtogroup RFFT_RIFFT
- * @{
- */
-
-/**
- * @brief Real FFT process
- * @param[in] *S is an instance for the structure
- * @param[in] *pSrc point to the input buffer (out-of-place: it's also a tmp buffer, so the input buffer is destroyed)
- * @param[out] *pDst point to the output buffer (out-of-place)
- * @param[in] *pTemp point to the temp buffer (used for intermedia buffer)
- * @return none.
- * The function implements a Real FFT/ Real IFFT depending
- * on the direction flag
- * Can support FFT lengths of 128, 512, 2048
- *
- */
-void ne10_rfft_float_c(
- const ne10_rfft_instance_f32_t * S,
- ne10_float32_t * pSrc,
- ne10_float32_t * pDst,
- ne10_float32_t * pTemp)
-{
- const ne10_cfft_radix4_instance_f32_t *S_CFFT = S->p_cfft;
-
- /* Caluclation of Real IFFT of input */
- if(S->ifft_flag_r == 1u)
- {
- /* Real IFFT core process */
- ne10_split_rifft_float_c(pSrc, S->fft_len_real, S->p_twiddle_A_real,
- S->p_twiddle_B_real, pTemp);
- /* Complex radix-4 IFFT process */
- ne10_radix4_butterfly_inverse_float_c(pDst, pTemp, S_CFFT->fft_len, S_CFFT->p_twiddle, S_CFFT->one_by_fft_len);
- }
- else
- {
- /* Complex radix-4 FFT process */
- ne10_radix4_butterfly_float_c(pTemp, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle);
- /* Real FFT core process */
- ne10_split_rfft_float_c(pTemp, S->fft_len_real, S->p_twiddle_A_real,
- S->p_twiddle_B_real, pDst);
- }
-
-}
-
-/**
- * @} end of RFFT_RIFFT group
- */
+++ /dev/null
-/*
- * Copyright 2012-14 ARM Limited
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of ARM Limited nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * NE10 Library : dsp/NE10_rfft.neon.c
- */
-#include <arm_neon.h>
-
-#include "NE10_types.h"
-#include "NE10_mask_table.h"
-#include "NE10_dsp.h"
-/**
- * @brief Core Real FFT process
- * @param[in] *pSrc points to the Input buffer
- * @param[in] N length of Real FFT
- * @param[in] *pATable points to the twiddle Coef A buffer
- * @param[in] *pBTable points to the twiddle Coef B buffer
- * @param[out] *pDst points to the Output buffer
- * @return none.
- * The function implements a Real FFT
- */
-
-static void ne10_split_rfft_float_neon(
- ne10_float32_t * pSrc,
- ne10_uint32_t N,
- ne10_float32_t * pReTable,
- ne10_float32_t * pImTable,
- ne10_float32_t * pDst)
-{
- ne10_uint32_t k,Cnt; /* Loop Counter */
- ne10_float32_t *pCoefRe,*pCoefIm,*pOut1,*pIn1,*pOut2,*pIn2; /* Temporary pointers for twiddle factors */
- ne10_uint32_t NBy2 = N>>1;
- /*NEON Variable Declarations*/
- float32x4x2_t vin1q2_f32,vin2q2_f32,vtmpq2_f32;
- float32x4_t vtmp1q_f32,vtmp2q_f32;
- float32x4_t vureq_f32,vuimq_f32,vvreq_f32,vvimq_f32;
- float32x4_t vretwdq_f32,vimtwdq_f32;
- float32x4_t vhalfq_f32;
- uint32x4_t vmaskq_u32,vmask1q_u32;
-
- /*Mask value to select three entries*/
- vmaskq_u32 = vld1q_u32(ne10_qMaskTable32+12);
- vmask1q_u32 = vld1q_u32(ne10_qMaskTable32+4);
-
-
-
- pCoefRe = pReTable+1;
- pCoefIm = pImTable+1;
-
- /*First Result*/
- pDst[0] = pSrc[0] + pSrc[1];
- pDst[1] = 0;
- /*N/2 th Result*/
- pDst[N] = pSrc[0] - pSrc[1];
- pDst[N+1] = 0;
-
- pOut1=pDst+2;
- pOut2=pDst+N-8;
- pIn1 = pSrc+2;
- pIn2 = pSrc +N -8;
-
- Cnt = ((N>>2)-1)>>2;
- vhalfq_f32 = vdupq_n_f32(0.5);
-
- /*for k=1 to N/4-1 and k=N/4+1 to K=N/2-1*/
- for(k=0;k<Cnt;k++)
- {
- /*b[4] b[3] b[2] b[1] a[4] a[3] a[2] a[1]*/
- vin1q2_f32 = vld2q_f32(pIn1);
- pIn1+=8;
- /*b[N/2-1] b[N/2-2] b[N/2-3] b[N/2-4] a[N/2-1] a[N/2-2] a[N/2-3] a[N/2-4]*/
- vin2q2_f32 = vld2q_f32(pIn2);
- pIn2-=8;
- /* a[N/2-2] a[N/2-1] a[N/2-4] a[N/2-3]*/
- vtmp1q_f32 =vrev64q_f32(vin2q2_f32.val[0]);
- /* b[N/2-2] b[N/2-1] b[N/2-4] b[N/2-3]*/
- vtmp2q_f32 =vrev64q_f32(vin2q2_f32.val[1]);
- /*a[N/2-4] a[N/2-3] a[N/2-2] a[N/2-1]*/
- vtmp1q_f32 = vcombine_f32(vget_high_f32(vtmp1q_f32),vget_low_f32(vtmp1q_f32));
- /* b[N/2-4] b[N/2-3] b[N/2-2] b[N/2-1]*/
- vtmp2q_f32 = vcombine_f32(vget_high_f32(vtmp2q_f32),vget_low_f32(vtmp2q_f32));
-
- /*uRe = (a[k]+a[N/2-k])/2*/
- vureq_f32 = vaddq_f32(vin1q2_f32.val[0],vtmp1q_f32);
- /*uIm = (b[k]-b[N/2-k])/2*/
- vuimq_f32 = vsubq_f32(vin1q2_f32.val[1],vtmp2q_f32);
- /*VRe = (b[k]+b[N/2-k])/2*/
- vvreq_f32 = vaddq_f32(vin1q2_f32.val[1],vtmp2q_f32);
- /*Vim = -(a[k]-a[N/2-k])/2*/
- vvimq_f32 = vsubq_f32(vtmp1q_f32,vin1q2_f32.val[0]);
-
- vureq_f32 = vmulq_f32(vureq_f32,vhalfq_f32);
- vuimq_f32 = vmulq_f32(vuimq_f32,vhalfq_f32);
- vvreq_f32 = vmulq_f32(vvreq_f32,vhalfq_f32);
- vvimq_f32 = vmulq_f32(vvimq_f32,vhalfq_f32);
-
- vretwdq_f32 = vld1q_f32(pCoefRe);
- vimtwdq_f32 = vld1q_f32(pCoefIm);
- pCoefRe+=4;
- pCoefIm+=4;
-
- /*reTmp = vRe*reTwd + vIm*imTwd */
- vtmp1q_f32 = vmulq_f32(vvreq_f32,vretwdq_f32);
- vtmp1q_f32 = vmlaq_f32(vtmp1q_f32,vvimq_f32,vimtwdq_f32);
- /*imTmp = vIm*reTwd - vRe*imTwd */
- vtmp2q_f32 = vmulq_f32(vvimq_f32,vretwdq_f32);
- vtmp2q_f32 = vmlsq_f32(vtmp2q_f32,vvreq_f32,vimtwdq_f32);
- //pDst[2*k] = uRe + reTmp;
- //pDst[2*k+1] = uIm + imTmp;
- vin1q2_f32.val[0] = vaddq_f32(vureq_f32,vtmp1q_f32);
- vin1q2_f32.val[1] = vaddq_f32(vuimq_f32,vtmp2q_f32);
- //pDst[2*(NBy2+k)] = uRe-reTmp;
- //pDst[2*(NBy2+k)+1] = uIm-imTmp;
- vin2q2_f32.val[0] = vsubq_f32(vureq_f32,vtmp1q_f32);
- vin2q2_f32.val[1] = vsubq_f32(vuimq_f32,vtmp2q_f32);
- vst2q_f32(pOut1,vin1q2_f32);
- vst2q_f32(pOut1+N,vin2q2_f32);
- pOut1+=8;
-
- //pDst[2*(NBy2-k)] = uRe-reTmp;
- //pDst[2*(NBy2-k)+1] = imTmp-uIm;
- vtmp2q_f32 = vsubq_f32(vtmp2q_f32,vuimq_f32);
- vtmp1q_f32 = vrev64q_f32(vin2q2_f32.val[0]);
- vtmp2q_f32 = vrev64q_f32(vtmp2q_f32);
-
- vin2q2_f32.val[0] = vcombine_f32(vget_high_f32(vtmp1q_f32),vget_low_f32(vtmp1q_f32));
- vin2q2_f32.val[1] = vcombine_f32(vget_high_f32(vtmp2q_f32),vget_low_f32(vtmp2q_f32));
-
- //pDst[2*(N-k)] = uRe + reTmp;
- //pDst[2*(N-k)+1] = -(uIm + imTmp);
- vtmp2q_f32 = vnegq_f32(vin1q2_f32.val[1]);
- vtmp1q_f32 = vrev64q_f32(vin1q2_f32.val[0]);
- vtmp2q_f32 = vrev64q_f32(vtmp2q_f32);
-
- vin1q2_f32.val[0] = vcombine_f32(vget_high_f32(vtmp1q_f32),vget_low_f32(vtmp1q_f32));
- vin1q2_f32.val[1] = vcombine_f32(vget_high_f32(vtmp2q_f32),vget_low_f32(vtmp2q_f32));
-
- vst2q_f32(pOut2,vin2q2_f32);
- vst2q_f32(pOut2+N,vin1q2_f32);
- pOut2-=8;
- }
- /*Lst Three VAlues*/
- /*b[4] b[3] b[2] b[1] a[4] a[3] a[2] a[1]*/
- vin1q2_f32 = vld2q_f32(pIn1);
- pIn1+=8;
- /*b[N/2-1] b[N/2-2] b[N/2-3] b[N/2-4] a[N/2-1] a[N/2-2] a[N/2-3] a[N/2-4]*/
- vin2q2_f32 = vld2q_f32(pIn2);
- pIn2-=8;
- /* a[N/2-2] a[N/2-1] a[N/2-4] a[N/2-3]*/
- vtmp1q_f32 =vrev64q_f32(vin2q2_f32.val[0]);
- /* b[N/2-2] b[N/2-1] b[N/2-4] b[N/2-3]*/
- vtmp2q_f32 =vrev64q_f32(vin2q2_f32.val[1]);
- /*a[N/2-4] a[N/2-3] a[N/2-2] a[N/2-1]*/
- vtmp1q_f32 = vcombine_f32(vget_high_f32(vtmp1q_f32),vget_low_f32(vtmp1q_f32));
- /* b[N/2-4] b[N/2-3] b[N/2-2] b[N/2-1]*/
- vtmp2q_f32 = vcombine_f32(vget_high_f32(vtmp2q_f32),vget_low_f32(vtmp2q_f32));
-
- /*uRe = (a[k]+a[N/2-k])/2*/
- vureq_f32 = vaddq_f32(vin1q2_f32.val[0],vtmp1q_f32);
- /*uIm = (b[k]-b[N/2-k])/2*/
- vuimq_f32 = vsubq_f32(vin1q2_f32.val[1],vtmp2q_f32);
- /*VRe = (b[k]+b[N/2-k])/2*/
- vvreq_f32 = vaddq_f32(vin1q2_f32.val[1],vtmp2q_f32);
- /*Vim = -(a[k]-a[N/2-k])/2*/
- vvimq_f32 = vsubq_f32(vtmp1q_f32,vin1q2_f32.val[0]);
-
- vureq_f32 = vmulq_f32(vureq_f32,vhalfq_f32);
- vuimq_f32 = vmulq_f32(vuimq_f32,vhalfq_f32);
- vvreq_f32 = vmulq_f32(vvreq_f32,vhalfq_f32);
- vvimq_f32 = vmulq_f32(vvimq_f32,vhalfq_f32);
-
- vretwdq_f32 = vld1q_f32(pCoefRe);
- vimtwdq_f32 = vld1q_f32(pCoefIm);
- pCoefRe+=4;
- pCoefIm+=4;
-
- /*reTmp = vRe*reTwd + vIm*imTwd */
- vtmp1q_f32 = vmulq_f32(vvreq_f32,vretwdq_f32);
- vtmp1q_f32 = vmlaq_f32(vtmp1q_f32,vvimq_f32,vimtwdq_f32);
- /*imTmp = vIm*reTwd - vRe*imTwd */
- vtmp2q_f32 = vmulq_f32(vvimq_f32,vretwdq_f32);
- vtmp2q_f32 = vmlsq_f32(vtmp2q_f32,vvreq_f32,vimtwdq_f32);
- //pDst[2*k] = uRe + reTmp;
- //pDst[2*k+1] = uIm + imTmp;
- vin1q2_f32.val[0] = vaddq_f32(vureq_f32,vtmp1q_f32);
- vin1q2_f32.val[1] = vaddq_f32(vuimq_f32,vtmp2q_f32);
-
- vtmpq2_f32 = vld2q_f32(pOut1);
- vin1q2_f32.val[0] = vbslq_f32(vmaskq_u32,vin1q2_f32.val[0],vtmpq2_f32.val[0]);
- vin1q2_f32.val[1] = vbslq_f32(vmaskq_u32,vin1q2_f32.val[1],vtmpq2_f32.val[1]);
- //pDst[2*(NBy2+k)] = uRe-reTmp;
- //pDst[2*(NBy2+k)+1] = uIm-imTmp;
- vin2q2_f32.val[0] = vsubq_f32(vureq_f32,vtmp1q_f32);
- vin2q2_f32.val[1] = vsubq_f32(vuimq_f32,vtmp2q_f32);
-
- vtmpq2_f32 = vld2q_f32(pOut1+N);
- vin2q2_f32.val[0] = vbslq_f32(vmaskq_u32,vin2q2_f32.val[0],vtmpq2_f32.val[0]);
- vin2q2_f32.val[1] = vbslq_f32(vmaskq_u32,vin2q2_f32.val[1],vtmpq2_f32.val[1]);
-
- vst2q_f32(pOut1,vin1q2_f32);
- vst2q_f32(pOut1+N,vin2q2_f32);
- pOut1+=8;
-
-
- //pDst[2*(NBy2-k)] = uRe-reTmp;
- //pDst[2*(NBy2-k)+1] = imTmp-uIm;
- vtmp2q_f32 = vnegq_f32(vin2q2_f32.val[1]);
- vtmp1q_f32 = vrev64q_f32(vin2q2_f32.val[0]);
- vtmp2q_f32 = vrev64q_f32(vtmp2q_f32);
-
- vin2q2_f32.val[0] = vcombine_f32(vget_high_f32(vtmp1q_f32),vget_low_f32(vtmp1q_f32));
- vin2q2_f32.val[1] = vcombine_f32(vget_high_f32(vtmp2q_f32),vget_low_f32(vtmp2q_f32));
-
- vtmpq2_f32 = vld2q_f32(pOut2);
- vin2q2_f32.val[0] = vbslq_f32(vmask1q_u32,vtmpq2_f32.val[0],vin2q2_f32.val[0]);
- vin2q2_f32.val[1] = vbslq_f32(vmask1q_u32,vtmpq2_f32.val[1],vin2q2_f32.val[1]);
-
-
- //pDst[2*(N-k)] = uRe + reTmp;
- //pDst[2*(N-k)+1] = -(uIm + imTmp);
- vtmp2q_f32 = vnegq_f32(vin1q2_f32.val[1]);
- vtmp1q_f32 = vrev64q_f32(vin1q2_f32.val[0]);
- vtmp2q_f32 = vrev64q_f32(vtmp2q_f32);
-
- vin1q2_f32.val[0] = vcombine_f32(vget_high_f32(vtmp1q_f32),vget_low_f32(vtmp1q_f32));
- vin1q2_f32.val[1] = vcombine_f32(vget_high_f32(vtmp2q_f32),vget_low_f32(vtmp2q_f32));
-
- vtmpq2_f32 = vld2q_f32(pOut2+N);
- vin1q2_f32.val[0] = vbslq_f32(vmask1q_u32,vtmpq2_f32.val[0],vin1q2_f32.val[0]);
- vin1q2_f32.val[1] = vbslq_f32(vmask1q_u32,vtmpq2_f32.val[1],vin1q2_f32.val[1]);
-
- vst2q_f32(pOut2,vin2q2_f32);
- vst2q_f32(pOut2+N,vin1q2_f32);
- pOut2-=8;
-
-
-
- /*y[N/4] = a[N/4]-jb[N/4]; y[3*N/4] = a[N/4] + jb[N/4]*/
- pDst[NBy2] = pSrc[NBy2];
- pDst[NBy2+1] = -pSrc[NBy2+1];
- pDst[N+NBy2] = pSrc[NBy2];
- pDst[N+NBy2+1] = pSrc[NBy2+1];
-
-
-}
-
-
-/**
- * @brief Core Real IFFT process
- * @param[in] *pSrc points to the Input buffer
- * @param[in] N length of Real FFT
- * @param[in] *pATable points to the twiddle Coef A buffer
- * @param[in] *pBTable points to the twiddle Coef B buffer
- * @param[out] *pDst points to the Output buffer
- * @return none.
- * The function implements a Real FFT
- */
-
-
-static void ne10_split_rifft_float_neon(
- ne10_float32_t * pSrc,
- ne10_uint32_t N,
- ne10_float32_t * pReTable,
- ne10_float32_t * pImTable,
- ne10_float32_t * pDst)
-{
- ne10_uint32_t k,Cnt; /* Loop Counter */
- ne10_float32_t *pCoefRe,*pCoefIm,*pOut1,*pOut2,*pIn1,*pIn2; /* Temporary pointers for twiddle factors */
- ne10_uint32_t NBy2 = N>>1;
-
- /*NEON Variable Declarations*/
- float32x4x2_t vin1q2_f32,vin2q2_f32,vtmpq2_f32;
- float32x4_t vtmp1q_f32,vtmp2q_f32;
- float32x4_t vureq_f32,vuimq_f32,vvreq_f32,vvimq_f32;
- float32x4_t vretwdq_f32,vimtwdq_f32;
- float32x4_t vhalfq_f32;
- uint32x4_t vmaskq_u32,vmask1q_u32;
-
- /*Mask value to select three entries*/
- vmaskq_u32 = vld1q_u32(ne10_qMaskTable32+12);
- vmask1q_u32 = vld1q_u32(ne10_qMaskTable32+4);
-
- pCoefRe = pReTable+1;
- pCoefIm = pImTable+1;
-
- /*First Result*/
- pDst[0] = (pSrc[0] + pSrc[N])*0.5;
- pDst[1] = (pSrc[0] - pSrc[N])*0.5;
-
- pOut1=pDst+2;
- pOut2=pDst+N-8;
- pIn1 = pSrc+2;
- pIn2 = pSrc +N -8;
-
- Cnt = ((N>>2)-1)>>2;
- vhalfq_f32 = vdupq_n_f32(0.5);
-
- /*for k=1 to N/4-1 and k=N/4+1 to K=N/2-1*/
- for(k=0;k<Cnt;k++)
- {
- /*b[4] b[3] b[2] b[1] a[4] a[3] a[2] a[1]*/
- vin1q2_f32 = vld2q_f32(pIn1);
- pIn1+=8;
- /*b[N/2-1] b[N/2-2] b[N/2-3] b[N/2-4] a[N/2-1] a[N/2-2] a[N/2-3] a[N/2-4]*/
- vin2q2_f32 = vld2q_f32(pIn2);
- pIn2-=8;
- /* a[N/2-2] a[N/2-1] a[N/2-4] a[N/2-3]*/
- vtmp1q_f32 =vrev64q_f32(vin2q2_f32.val[0]);
- /* b[N/2-2] b[N/2-1] b[N/2-4] b[N/2-3]*/
- vtmp2q_f32 =vrev64q_f32(vin2q2_f32.val[1]);
- /*a[N/2-4] a[N/2-3] a[N/2-2] a[N/2-1]*/
- vtmp1q_f32 = vcombine_f32(vget_high_f32(vtmp1q_f32),vget_low_f32(vtmp1q_f32));
- /* b[N/2-4] b[N/2-3] b[N/2-2] b[N/2-1]*/
- vtmp2q_f32 = vcombine_f32(vget_high_f32(vtmp2q_f32),vget_low_f32(vtmp2q_f32));
-
- /*uRe = (a[k]+a[N/2-k])/2*/
- vureq_f32 = vaddq_f32(vin1q2_f32.val[0],vtmp1q_f32);
- /*uIm = (b[k]-b[N/2-k])/2*/
- vuimq_f32 = vsubq_f32(vin1q2_f32.val[1],vtmp2q_f32);
-
- /*VRe = (a[k]-a[N/2-k])/2*/
- vvreq_f32 = vsubq_f32(vin1q2_f32.val[0],vtmp1q_f32);
- /*Vim = (b[k]+b[N/2-k])/2*/
- vvimq_f32 = vaddq_f32(vin1q2_f32.val[1],vtmp2q_f32);
-
- vureq_f32 = vmulq_f32(vureq_f32,vhalfq_f32);
- vuimq_f32 = vmulq_f32(vuimq_f32,vhalfq_f32);
- vvreq_f32 = vmulq_f32(vvreq_f32,vhalfq_f32);
- vvimq_f32 = vmulq_f32(vvimq_f32,vhalfq_f32);
-
- vretwdq_f32 = vld1q_f32(pCoefRe);
- vimtwdq_f32 = vld1q_f32(pCoefIm);
- pCoefRe+=4;
- pCoefIm+=4;
-
- /*reTmp = vRe*reTwd - vIm*imTwd */
- vtmp1q_f32 = vmulq_f32(vvreq_f32,vretwdq_f32);
- vtmp1q_f32 = vmlsq_f32(vtmp1q_f32,vvimq_f32,vimtwdq_f32);
- /*imTmp = vIm*reTwd + vRe*imTwd */
- vtmp2q_f32 = vmulq_f32(vvimq_f32,vretwdq_f32);
- vtmp2q_f32 = vmlaq_f32(vtmp2q_f32,vvreq_f32,vimtwdq_f32);
- //pDst[2*k] = uRe - imTmp;
- //pDst[2*k+1] = uIm + reTmp;
- vin1q2_f32.val[0] = vsubq_f32(vureq_f32,vtmp2q_f32);
- vin1q2_f32.val[1] = vaddq_f32(vuimq_f32,vtmp1q_f32);
-
- //pDst[2*(NBy2-k)] = uRe+imTmp;
- //pDst[2*(NBy2-k)+1] = reTmp-uIm;
- vtmp2q_f32 = vaddq_f32(vtmp2q_f32,vureq_f32);
- vtmp1q_f32 = vsubq_f32(vtmp1q_f32,vuimq_f32);
- vtmp2q_f32 = vrev64q_f32(vtmp2q_f32);
- vtmp1q_f32 = vrev64q_f32(vtmp1q_f32);
-
- vin2q2_f32.val[1] = vcombine_f32(vget_high_f32(vtmp1q_f32),vget_low_f32(vtmp1q_f32));
- vin2q2_f32.val[0] = vcombine_f32(vget_high_f32(vtmp2q_f32),vget_low_f32(vtmp2q_f32));
-
-
- vst2q_f32(pOut1,vin1q2_f32);
- vst2q_f32(pOut2,vin2q2_f32);
- pOut1+=8;
- pOut2-=8;
- }
- /*b[4] b[3] b[2] b[1] a[4] a[3] a[2] a[1]*/
- vin1q2_f32 = vld2q_f32(pIn1);
- pIn1+=8;
- /*b[N/2-1] b[N/2-2] b[N/2-3] b[N/2-4] a[N/2-1] a[N/2-2] a[N/2-3] a[N/2-4]*/
- vin2q2_f32 = vld2q_f32(pIn2);
- pIn2-=8;
- /* a[N/2-2] a[N/2-1] a[N/2-4] a[N/2-3]*/
- vtmp1q_f32 =vrev64q_f32(vin2q2_f32.val[0]);
- /* b[N/2-2] b[N/2-1] b[N/2-4] b[N/2-3]*/
- vtmp2q_f32 =vrev64q_f32(vin2q2_f32.val[1]);
- /*a[N/2-4] a[N/2-3] a[N/2-2] a[N/2-1]*/
- vtmp1q_f32 = vcombine_f32(vget_high_f32(vtmp1q_f32),vget_low_f32(vtmp1q_f32));
- /* b[N/2-4] b[N/2-3] b[N/2-2] b[N/2-1]*/
- vtmp2q_f32 = vcombine_f32(vget_high_f32(vtmp2q_f32),vget_low_f32(vtmp2q_f32));
-
- /*uRe = (a[k]+a[N/2-k])/2*/
- vureq_f32 = vaddq_f32(vin1q2_f32.val[0],vtmp1q_f32);
- /*uIm = (b[k]-b[N/2-k])/2*/
- vuimq_f32 = vsubq_f32(vin1q2_f32.val[1],vtmp2q_f32);
-
- /*VRe = (a[k]-a[N/2-k])/2*/
- vvreq_f32 = vsubq_f32(vin1q2_f32.val[0],vtmp1q_f32);
- /*Vim = (b[k]+b[N/2-k])/2*/
- vvimq_f32 = vaddq_f32(vin1q2_f32.val[1],vtmp2q_f32);
-
- vureq_f32 = vmulq_f32(vureq_f32,vhalfq_f32);
- vuimq_f32 = vmulq_f32(vuimq_f32,vhalfq_f32);
- vvreq_f32 = vmulq_f32(vvreq_f32,vhalfq_f32);
- vvimq_f32 = vmulq_f32(vvimq_f32,vhalfq_f32);
-
- vretwdq_f32 = vld1q_f32(pCoefRe);
- vimtwdq_f32 = vld1q_f32(pCoefIm);
- pCoefRe+=4;
- pCoefIm+=4;
-
- /*reTmp = vRe*reTwd - vIm*imTwd */
- vtmp1q_f32 = vmulq_f32(vvreq_f32,vretwdq_f32);
- vtmp1q_f32 = vmlsq_f32(vtmp1q_f32,vvimq_f32,vimtwdq_f32);
- /*imTmp = vIm*reTwd + vRe*imTwd */
- vtmp2q_f32 = vmulq_f32(vvimq_f32,vretwdq_f32);
- vtmp2q_f32 = vmlaq_f32(vtmp2q_f32,vvreq_f32,vimtwdq_f32);
- //pDst[2*k] = uRe - imTmp;
- //pDst[2*k+1] = uIm + reTmp;
- vin1q2_f32.val[0] = vsubq_f32(vureq_f32,vtmp2q_f32);
- vin1q2_f32.val[1] = vaddq_f32(vuimq_f32,vtmp1q_f32);
-
- vtmpq2_f32 = vld2q_f32(pOut1);
- vin1q2_f32.val[0] = vbslq_f32(vmaskq_u32,vin1q2_f32.val[0],vtmpq2_f32.val[0]);
- vin1q2_f32.val[1] = vbslq_f32(vmaskq_u32,vin1q2_f32.val[1],vtmpq2_f32.val[1]);
-
- //pDst[2*(NBy2-k)] = uRe+imTmp;
- //pDst[2*(NBy2-k)+1] = reTmp-uIm;
- vtmp2q_f32 = vaddq_f32(vtmp2q_f32,vureq_f32);
- vtmp1q_f32 = vsubq_f32(vtmp1q_f32,vuimq_f32);
- vtmp2q_f32 = vrev64q_f32(vtmp2q_f32);
- vtmp1q_f32 = vrev64q_f32(vtmp1q_f32);
-
- vin2q2_f32.val[1] = vcombine_f32(vget_high_f32(vtmp1q_f32),vget_low_f32(vtmp1q_f32));
- vin2q2_f32.val[0] = vcombine_f32(vget_high_f32(vtmp2q_f32),vget_low_f32(vtmp2q_f32));
-
- vtmpq2_f32 = vld2q_f32(pOut2);
- vin2q2_f32.val[0] = vbslq_f32(vmask1q_u32,vtmpq2_f32.val[0],vin2q2_f32.val[0]);
- vin2q2_f32.val[1] = vbslq_f32(vmask1q_u32,vtmpq2_f32.val[1],vin2q2_f32.val[1]);
-
- vst2q_f32(pOut1,vin1q2_f32);
- vst2q_f32(pOut2,vin2q2_f32);
- pOut1+=6;
- pOut2-=6;
- /*y[N/4] = a[N/4]-jb[N/4]*/
- pDst[NBy2] = pSrc[NBy2];
- pDst[NBy2+1] = -pSrc[NBy2+1];
-
-}
-
-/**
- * @addtogroup RFFT_RIFFT
- * @{
- */
-
-/**
- * @brief Real FFT process
- * @param[in] *S is an instance for the structure
- * @param[in] *pSrc point to the input buffer (out-of-place: it's also a tmp buffer, so the input buffer is destroyed)
- * @param[out] *pDst point to the output buffer (out-of-place)
- * @param[in] *pTemp point to the temp buffer (used for intermedia buffer)
- * @return none.
- * The function implements a Real FFT/ Real IFFT depending
- * on the direction flag
- * Can support FFT lengths of 128, 512, 2048
- *
- */
-void ne10_rfft_float_neon(
- const ne10_rfft_instance_f32_t * S,
- ne10_float32_t * pSrc,
- ne10_float32_t * pDst,
- ne10_float32_t * pTemp)
-{
- const ne10_cfft_radix4_instance_f32_t *S_CFFT = S->p_cfft;
-
- /* Caluclation of Real IFFT of input */
- if(S->ifft_flag_r == 1u)
- {
- /* Real IFFT core process */
- ne10_split_rifft_float_neon(pSrc, S->fft_len_real, S->p_twiddle_A_real,
- S->p_twiddle_B_real, pTemp);
- /* Complex radix-4 IFFT process */
- ne10_radix4_butterfly_inverse_float_neon(pDst, pTemp, S_CFFT->fft_len, S_CFFT->p_twiddle, S_CFFT->one_by_fft_len);
- }
- else
- {
- /* Complex radix-4 FFT process */
- ne10_radix4_butterfly_float_neon(pTemp, pSrc, S_CFFT->fft_len, S_CFFT->p_twiddle);
- /* Real FFT core process */
- ne10_split_rfft_float_neon(pTemp, S->fft_len_real, S->p_twiddle_A_real,
- S->p_twiddle_B_real, pDst);
- }
-
-}
-/**
- * @} end of RFFT_RIFFT group
- */
-
+++ /dev/null
-/*
- * Copyright 2012-14 ARM Limited
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of ARM Limited nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "NE10_types.h"
-#include "NE10_dsp.h"
-
-/*
-* @brief Twiddle factors Table
-*/
-
-/** Pseudo code for Twiddle factor Tables Generation:
-
-for i=1 to N
- rfft_twiddlecoef(2*i) = cos((i-1) * 2*PI/(float)N))
- rfft_rfft_twiddle_coef(2*i + 1) = sin((i-1) * 2*PI/(float)N))
-end
-
-where N = 1024 and PI = 3.14159265358979
-
-N is the maximum FFT Size supported and
-Cos and Sin values are interleaved fashion
-*/
-
-static const ne10_float32_t rfft_twiddle_coef[2048] = {
- 1.000000000000000000f, 0.000000000000000000f,
- 0.999981175282601110f, 0.006135884649154475f,
- 0.999924701839144500f, 0.012271538285719925f,
- 0.999830581795823400f, 0.018406729905804820f,
- 0.999698818696204250f, 0.024541228522912288f,
- 0.999529417501093140f, 0.030674803176636626f,
- 0.999322384588349540f, 0.036807222941358832f,
- 0.999077727752645360f, 0.042938256934940820f,
- 0.998795456205172410f, 0.049067674327418015f,
- 0.998475580573294770f, 0.055195244349689934f,
- 0.998118112900149180f, 0.061320736302208578f,
- 0.997723066644191640f, 0.067443919563664051f,
- 0.997290456678690210f, 0.073564563599667426f,
- 0.996820299291165670f, 0.079682437971430126f,
- 0.996312612182778000f, 0.085797312344439894f,
- 0.995767414467659820f, 0.091908956497132724f,
- 0.995184726672196930f, 0.098017140329560604f,
- 0.994564570734255420f, 0.104121633872054590f,
- 0.993906970002356060f, 0.110222207293883060f,
- 0.993211949234794500f, 0.116318630911904750f,
- 0.992479534598709970f, 0.122410675199216200f,
- 0.991709753669099530f, 0.128498110793793170f,
- 0.990902635427780010f, 0.134580708507126170f,
- 0.990058210262297120f, 0.140658239332849210f,
- 0.989176509964781010f, 0.146730474455361750f,
- 0.988257567730749460f, 0.152797185258443440f,
- 0.987301418157858430f, 0.158858143333861450f,
- 0.986308097244598670f, 0.164913120489969890f,
- 0.985277642388941220f, 0.170961888760301220f,
- 0.984210092386929030f, 0.177004220412148750f,
- 0.983105487431216290f, 0.183039887955140950f,
- 0.981963869109555240f, 0.189068664149806190f,
- 0.980785280403230430f, 0.195090322016128250f,
- 0.979569765685440520f, 0.201104634842091900f,
- 0.978317370719627650f, 0.207111376192218560f,
- 0.977028142657754390f, 0.213110319916091360f,
- 0.975702130038528570f, 0.219101240156869800f,
- 0.974339382785575860f, 0.225083911359792830f,
- 0.972939952205560180f, 0.231058108280671110f,
- 0.971503890986251780f, 0.237023605994367200f,
- 0.970031253194543970f, 0.242980179903263870f,
- 0.968522094274417380f, 0.248927605745720150f,
- 0.966976471044852070f, 0.254865659604514570f,
- 0.965394441697689400f, 0.260794117915275510f,
- 0.963776065795439840f, 0.266712757474898370f,
- 0.962121404269041580f, 0.272621355449948980f,
- 0.960430519415565790f, 0.278519689385053060f,
- 0.958703474895871600f, 0.284407537211271880f,
- 0.956940335732208820f, 0.290284677254462330f,
- 0.955141168305770780f, 0.296150888243623790f,
- 0.953306040354193860f, 0.302005949319228080f,
- 0.951435020969008340f, 0.307849640041534870f,
- 0.949528180593036670f, 0.313681740398891520f,
- 0.947585591017741090f, 0.319502030816015690f,
- 0.945607325380521280f, 0.325310292162262930f,
- 0.943593458161960390f, 0.331106305759876430f,
- 0.941544065183020810f, 0.336889853392220050f,
- 0.939459223602189920f, 0.342660717311994380f,
- 0.937339011912574960f, 0.348418680249434560f,
- 0.935183509938947610f, 0.354163525420490340f,
- 0.932992798834738960f, 0.359895036534988110f,
- 0.930766961078983710f, 0.365612997804773850f,
- 0.928506080473215590f, 0.371317193951837540f,
- 0.926210242138311380f, 0.377007410216418260f,
- 0.923879532511286740f, 0.382683432365089780f,
- 0.921514039342042010f, 0.388345046698826250f,
- 0.919113851690057770f, 0.393992040061048100f,
- 0.916679059921042700f, 0.399624199845646790f,
- 0.914209755703530690f, 0.405241314004989860f,
- 0.911706032005429880f, 0.410843171057903910f,
- 0.909167983090522380f, 0.416429560097637150f,
- 0.906595704514915330f, 0.422000270799799680f,
- 0.903989293123443340f, 0.427555093430282080f,
- 0.901348847046022030f, 0.433093818853151960f,
- 0.898674465693953820f, 0.438616238538527660f,
- 0.895966249756185220f, 0.444122144570429200f,
- 0.893224301195515320f, 0.449611329654606540f,
- 0.890448723244757880f, 0.455083587126343840f,
- 0.887639620402853930f, 0.460538710958240010f,
- 0.884797098430937790f, 0.465976495767966180f,
- 0.881921264348355050f, 0.471396736825997640f,
- 0.879012226428633530f, 0.476799230063322090f,
- 0.876070094195406600f, 0.482183772079122720f,
- 0.873094978418290090f, 0.487550160148436000f,
- 0.870086991108711460f, 0.492898192229784040f,
- 0.867046245515692650f, 0.498227666972781870f,
- 0.863972856121586810f, 0.503538383725717580f,
- 0.860866938637767310f, 0.508830142543106990f,
- 0.857728610000272120f, 0.514102744193221660f,
- 0.854557988365400530f, 0.519355990165589640f,
- 0.851355193105265200f, 0.524589682678468950f,
- 0.848120344803297230f, 0.529803624686294610f,
- 0.844853565249707120f, 0.534997619887097150f,
- 0.841554977436898440f, 0.540171472729892850f,
- 0.838224705554838080f, 0.545324988422046460f,
- 0.834862874986380010f, 0.550457972936604810f,
- 0.831469612302545240f, 0.555570233019602180f,
- 0.828045045257755800f, 0.560661576197336030f,
- 0.824589302785025290f, 0.565731810783613120f,
- 0.821102514991104650f, 0.570780745886967260f,
- 0.817584813151583710f, 0.575808191417845340f,
- 0.814036329705948410f, 0.580813958095764530f,
- 0.810457198252594770f, 0.585797857456438860f,
- 0.806847553543799330f, 0.590759701858874160f,
- 0.803207531480644940f, 0.595699304492433360f,
- 0.799537269107905010f, 0.600616479383868970f,
- 0.795836904608883570f, 0.605511041404325550f,
- 0.792106577300212390f, 0.610382806276309480f,
- 0.788346427626606340f, 0.615231590580626820f,
- 0.784556597155575240f, 0.620057211763289100f,
- 0.780737228572094490f, 0.624859488142386340f,
- 0.776888465673232440f, 0.629638238914926980f,
- 0.773010453362736990f, 0.634393284163645490f,
- 0.769103337645579700f, 0.639124444863775730f,
- 0.765167265622458960f, 0.643831542889791390f,
- 0.761202385484261780f, 0.648514401022112440f,
- 0.757208846506484570f, 0.653172842953776760f,
- 0.753186799043612520f, 0.657806693297078640f,
- 0.749136394523459370f, 0.662415777590171780f,
- 0.745057785441466060f, 0.666999922303637470f,
- 0.740951125354959110f, 0.671558954847018330f,
- 0.736816568877369900f, 0.676092703575315920f,
- 0.732654271672412820f, 0.680600997795453020f,
- 0.728464390448225200f, 0.685083667772700360f,
- 0.724247082951467000f, 0.689540544737066830f,
- 0.720002507961381650f, 0.693971460889654000f,
- 0.715730825283818590f, 0.698376249408972920f,
- 0.711432195745216430f, 0.702754744457225300f,
- 0.707106781186547570f, 0.707106781186547460f,
- 0.702754744457225300f, 0.711432195745216430f,
- 0.698376249408972920f, 0.715730825283818590f,
- 0.693971460889654000f, 0.720002507961381650f,
- 0.689540544737066940f, 0.724247082951466890f,
- 0.685083667772700360f, 0.728464390448225200f,
- 0.680600997795453130f, 0.732654271672412820f,
- 0.676092703575316030f, 0.736816568877369790f,
- 0.671558954847018330f, 0.740951125354959110f,
- 0.666999922303637470f, 0.745057785441465950f,
- 0.662415777590171780f, 0.749136394523459260f,
- 0.657806693297078640f, 0.753186799043612410f,
- 0.653172842953776760f, 0.757208846506484460f,
- 0.648514401022112550f, 0.761202385484261780f,
- 0.643831542889791500f, 0.765167265622458960f,
- 0.639124444863775730f, 0.769103337645579590f,
- 0.634393284163645490f, 0.773010453362736990f,
- 0.629638238914927100f, 0.776888465673232440f,
- 0.624859488142386450f, 0.780737228572094380f,
- 0.620057211763289210f, 0.784556597155575240f,
- 0.615231590580626820f, 0.788346427626606230f,
- 0.610382806276309480f, 0.792106577300212390f,
- 0.605511041404325550f, 0.795836904608883460f,
- 0.600616479383868970f, 0.799537269107905010f,
- 0.595699304492433470f, 0.803207531480644830f,
- 0.590759701858874280f, 0.806847553543799220f,
- 0.585797857456438860f, 0.810457198252594770f,
- 0.580813958095764530f, 0.814036329705948300f,
- 0.575808191417845340f, 0.817584813151583710f,
- 0.570780745886967370f, 0.821102514991104650f,
- 0.565731810783613230f, 0.824589302785025290f,
- 0.560661576197336030f, 0.828045045257755800f,
- 0.555570233019602290f, 0.831469612302545240f,
- 0.550457972936604810f, 0.834862874986380010f,
- 0.545324988422046460f, 0.838224705554837970f,
- 0.540171472729892970f, 0.841554977436898330f,
- 0.534997619887097260f, 0.844853565249707010f,
- 0.529803624686294830f, 0.848120344803297120f,
- 0.524589682678468840f, 0.851355193105265200f,
- 0.519355990165589530f, 0.854557988365400530f,
- 0.514102744193221660f, 0.857728610000272120f,
- 0.508830142543106990f, 0.860866938637767310f,
- 0.503538383725717580f, 0.863972856121586700f,
- 0.498227666972781870f, 0.867046245515692650f,
- 0.492898192229784090f, 0.870086991108711350f,
- 0.487550160148436050f, 0.873094978418290090f,
- 0.482183772079122830f, 0.876070094195406600f,
- 0.476799230063322250f, 0.879012226428633410f,
- 0.471396736825997810f, 0.881921264348354940f,
- 0.465976495767966130f, 0.884797098430937790f,
- 0.460538710958240010f, 0.887639620402853930f,
- 0.455083587126343840f, 0.890448723244757880f,
- 0.449611329654606600f, 0.893224301195515320f,
- 0.444122144570429260f, 0.895966249756185110f,
- 0.438616238538527710f, 0.898674465693953820f,
- 0.433093818853152010f, 0.901348847046022030f,
- 0.427555093430282200f, 0.903989293123443340f,
- 0.422000270799799790f, 0.906595704514915330f,
- 0.416429560097637320f, 0.909167983090522270f,
- 0.410843171057903910f, 0.911706032005429880f,
- 0.405241314004989860f, 0.914209755703530690f,
- 0.399624199845646790f, 0.916679059921042700f,
- 0.393992040061048100f, 0.919113851690057770f,
- 0.388345046698826300f, 0.921514039342041900f,
- 0.382683432365089840f, 0.923879532511286740f,
- 0.377007410216418310f, 0.926210242138311270f,
- 0.371317193951837600f, 0.928506080473215480f,
- 0.365612997804773960f, 0.930766961078983710f,
- 0.359895036534988280f, 0.932992798834738850f,
- 0.354163525420490510f, 0.935183509938947500f,
- 0.348418680249434510f, 0.937339011912574960f,
- 0.342660717311994380f, 0.939459223602189920f,
- 0.336889853392220050f, 0.941544065183020810f,
- 0.331106305759876430f, 0.943593458161960390f,
- 0.325310292162262980f, 0.945607325380521280f,
- 0.319502030816015750f, 0.947585591017741090f,
- 0.313681740398891570f, 0.949528180593036670f,
- 0.307849640041534980f, 0.951435020969008340f,
- 0.302005949319228200f, 0.953306040354193750f,
- 0.296150888243623960f, 0.955141168305770670f,
- 0.290284677254462330f, 0.956940335732208940f,
- 0.284407537211271820f, 0.958703474895871600f,
- 0.278519689385053060f, 0.960430519415565790f,
- 0.272621355449948980f, 0.962121404269041580f,
- 0.266712757474898420f, 0.963776065795439840f,
- 0.260794117915275570f, 0.965394441697689400f,
- 0.254865659604514630f, 0.966976471044852070f,
- 0.248927605745720260f, 0.968522094274417270f,
- 0.242980179903263980f, 0.970031253194543970f,
- 0.237023605994367340f, 0.971503890986251780f,
- 0.231058108280671280f, 0.972939952205560070f,
- 0.225083911359792780f, 0.974339382785575860f,
- 0.219101240156869770f, 0.975702130038528570f,
- 0.213110319916091360f, 0.977028142657754390f,
- 0.207111376192218560f, 0.978317370719627650f,
- 0.201104634842091960f, 0.979569765685440520f,
- 0.195090322016128330f, 0.980785280403230430f,
- 0.189068664149806280f, 0.981963869109555240f,
- 0.183039887955141060f, 0.983105487431216290f,
- 0.177004220412148860f, 0.984210092386929030f,
- 0.170961888760301360f, 0.985277642388941220f,
- 0.164913120489970090f, 0.986308097244598670f,
- 0.158858143333861390f, 0.987301418157858430f,
- 0.152797185258443410f, 0.988257567730749460f,
- 0.146730474455361750f, 0.989176509964781010f,
- 0.140658239332849240f, 0.990058210262297120f,
- 0.134580708507126220f, 0.990902635427780010f,
- 0.128498110793793220f, 0.991709753669099530f,
- 0.122410675199216280f, 0.992479534598709970f,
- 0.116318630911904880f, 0.993211949234794500f,
- 0.110222207293883180f, 0.993906970002356060f,
- 0.104121633872054730f, 0.994564570734255420f,
- 0.098017140329560770f, 0.995184726672196820f,
- 0.091908956497132696f, 0.995767414467659820f,
- 0.085797312344439880f, 0.996312612182778000f,
- 0.079682437971430126f, 0.996820299291165670f,
- 0.073564563599667454f, 0.997290456678690210f,
- 0.067443919563664106f, 0.997723066644191640f,
- 0.061320736302208648f, 0.998118112900149180f,
- 0.055195244349690031f, 0.998475580573294770f,
- 0.049067674327418126f, 0.998795456205172410f,
- 0.042938256934940959f, 0.999077727752645360f,
- 0.036807222941358991f, 0.999322384588349540f,
- 0.030674803176636581f, 0.999529417501093140f,
- 0.024541228522912264f, 0.999698818696204250f,
- 0.018406729905804820f, 0.999830581795823400f,
- 0.012271538285719944f, 0.999924701839144500f,
- 0.006135884649154515f, 0.999981175282601110f,
- 0.000000000000000061f, 1.000000000000000000f,
- -0.006135884649154393f, 0.999981175282601110f,
- -0.012271538285719823f, 0.999924701839144500f,
- -0.018406729905804695f, 0.999830581795823400f,
- -0.024541228522912142f, 0.999698818696204250f,
- -0.030674803176636459f, 0.999529417501093140f,
- -0.036807222941358866f, 0.999322384588349540f,
- -0.042938256934940834f, 0.999077727752645360f,
- -0.049067674327418008f, 0.998795456205172410f,
- -0.055195244349689913f, 0.998475580573294770f,
- -0.061320736302208530f, 0.998118112900149180f,
- -0.067443919563663982f, 0.997723066644191640f,
- -0.073564563599667329f, 0.997290456678690210f,
- -0.079682437971430015f, 0.996820299291165780f,
- -0.085797312344439755f, 0.996312612182778000f,
- -0.091908956497132571f, 0.995767414467659820f,
- -0.098017140329560645f, 0.995184726672196930f,
- -0.104121633872054600f, 0.994564570734255420f,
- -0.110222207293883060f, 0.993906970002356060f,
- -0.116318630911904750f, 0.993211949234794500f,
- -0.122410675199216150f, 0.992479534598709970f,
- -0.128498110793793110f, 0.991709753669099530f,
- -0.134580708507126110f, 0.990902635427780010f,
- -0.140658239332849130f, 0.990058210262297120f,
- -0.146730474455361640f, 0.989176509964781010f,
- -0.152797185258443300f, 0.988257567730749460f,
- -0.158858143333861280f, 0.987301418157858430f,
- -0.164913120489969950f, 0.986308097244598670f,
- -0.170961888760301240f, 0.985277642388941220f,
- -0.177004220412148750f, 0.984210092386929030f,
- -0.183039887955140920f, 0.983105487431216290f,
- -0.189068664149806160f, 0.981963869109555240f,
- -0.195090322016128190f, 0.980785280403230430f,
- -0.201104634842091820f, 0.979569765685440520f,
- -0.207111376192218450f, 0.978317370719627650f,
- -0.213110319916091250f, 0.977028142657754390f,
- -0.219101240156869660f, 0.975702130038528570f,
- -0.225083911359792670f, 0.974339382785575860f,
- -0.231058108280671140f, 0.972939952205560180f,
- -0.237023605994367230f, 0.971503890986251780f,
- -0.242980179903263870f, 0.970031253194543970f,
- -0.248927605745720120f, 0.968522094274417380f,
- -0.254865659604514520f, 0.966976471044852070f,
- -0.260794117915275460f, 0.965394441697689400f,
- -0.266712757474898310f, 0.963776065795439840f,
- -0.272621355449948870f, 0.962121404269041580f,
- -0.278519689385052950f, 0.960430519415565900f,
- -0.284407537211271710f, 0.958703474895871600f,
- -0.290284677254462160f, 0.956940335732208940f,
- -0.296150888243623840f, 0.955141168305770670f,
- -0.302005949319228080f, 0.953306040354193860f,
- -0.307849640041534870f, 0.951435020969008340f,
- -0.313681740398891410f, 0.949528180593036670f,
- -0.319502030816015640f, 0.947585591017741200f,
- -0.325310292162262870f, 0.945607325380521390f,
- -0.331106305759876320f, 0.943593458161960390f,
- -0.336889853392219940f, 0.941544065183020810f,
- -0.342660717311994270f, 0.939459223602189920f,
- -0.348418680249434400f, 0.937339011912574960f,
- -0.354163525420490400f, 0.935183509938947610f,
- -0.359895036534988170f, 0.932992798834738850f,
- -0.365612997804773850f, 0.930766961078983710f,
- -0.371317193951837490f, 0.928506080473215590f,
- -0.377007410216418200f, 0.926210242138311380f,
- -0.382683432365089730f, 0.923879532511286740f,
- -0.388345046698826190f, 0.921514039342042010f,
- -0.393992040061047990f, 0.919113851690057770f,
- -0.399624199845646680f, 0.916679059921042700f,
- -0.405241314004989750f, 0.914209755703530690f,
- -0.410843171057903800f, 0.911706032005429880f,
- -0.416429560097636990f, 0.909167983090522490f,
- -0.422000270799799680f, 0.906595704514915330f,
- -0.427555093430281860f, 0.903989293123443450f,
- -0.433093818853151900f, 0.901348847046022030f,
- -0.438616238538527380f, 0.898674465693953930f,
- -0.444122144570429140f, 0.895966249756185220f,
- -0.449611329654606710f, 0.893224301195515210f,
- -0.455083587126343720f, 0.890448723244757990f,
- -0.460538710958240060f, 0.887639620402853930f,
- -0.465976495767966010f, 0.884797098430937900f,
- -0.471396736825997700f, 0.881921264348355050f,
- -0.476799230063321920f, 0.879012226428633530f,
- -0.482183772079122720f, 0.876070094195406600f,
- -0.487550160148435720f, 0.873094978418290200f,
- -0.492898192229783980f, 0.870086991108711460f,
- -0.498227666972781590f, 0.867046245515692760f,
- -0.503538383725717460f, 0.863972856121586810f,
- -0.508830142543107100f, 0.860866938637767200f,
- -0.514102744193221660f, 0.857728610000272120f,
- -0.519355990165589640f, 0.854557988365400530f,
- -0.524589682678468730f, 0.851355193105265200f,
- -0.529803624686294720f, 0.848120344803297230f,
- -0.534997619887097040f, 0.844853565249707230f,
- -0.540171472729892850f, 0.841554977436898440f,
- -0.545324988422046240f, 0.838224705554838190f,
- -0.550457972936604700f, 0.834862874986380120f,
- -0.555570233019601960f, 0.831469612302545460f,
- -0.560661576197335920f, 0.828045045257755800f,
- -0.565731810783613230f, 0.824589302785025180f,
- -0.570780745886967140f, 0.821102514991104760f,
- -0.575808191417845340f, 0.817584813151583710f,
- -0.580813958095764420f, 0.814036329705948520f,
- -0.585797857456438860f, 0.810457198252594770f,
- -0.590759701858874050f, 0.806847553543799450f,
- -0.595699304492433360f, 0.803207531480644940f,
- -0.600616479383868750f, 0.799537269107905240f,
- -0.605511041404325430f, 0.795836904608883570f,
- -0.610382806276309590f, 0.792106577300212280f,
- -0.615231590580626710f, 0.788346427626606340f,
- -0.620057211763289210f, 0.784556597155575130f,
- -0.624859488142386230f, 0.780737228572094600f,
- -0.629638238914927100f, 0.776888465673232440f,
- -0.634393284163645380f, 0.773010453362737100f,
- -0.639124444863775730f, 0.769103337645579590f,
- -0.643831542889791280f, 0.765167265622459070f,
- -0.648514401022112440f, 0.761202385484261890f,
- -0.653172842953776530f, 0.757208846506484680f,
- -0.657806693297078640f, 0.753186799043612520f,
- -0.662415777590171890f, 0.749136394523459260f,
- -0.666999922303637360f, 0.745057785441466060f,
- -0.671558954847018440f, 0.740951125354958990f,
- -0.676092703575315810f, 0.736816568877370020f,
- -0.680600997795453020f, 0.732654271672412820f,
- -0.685083667772700240f, 0.728464390448225310f,
- -0.689540544737066940f, 0.724247082951466890f,
- -0.693971460889653780f, 0.720002507961381770f,
- -0.698376249408972800f, 0.715730825283818710f,
- -0.702754744457225080f, 0.711432195745216660f,
- -0.707106781186547460f, 0.707106781186547570f,
- -0.711432195745216540f, 0.702754744457225190f,
- -0.715730825283818590f, 0.698376249408972920f,
- -0.720002507961381650f, 0.693971460889654000f,
- -0.724247082951466780f, 0.689540544737067050f,
- -0.728464390448225200f, 0.685083667772700360f,
- -0.732654271672412700f, 0.680600997795453240f,
- -0.736816568877369900f, 0.676092703575315920f,
- -0.740951125354958880f, 0.671558954847018550f,
- -0.745057785441465950f, 0.666999922303637580f,
- -0.749136394523459150f, 0.662415777590172010f,
- -0.753186799043612410f, 0.657806693297078750f,
- -0.757208846506484570f, 0.653172842953776640f,
- -0.761202385484261670f, 0.648514401022112550f,
- -0.765167265622458960f, 0.643831542889791390f,
- -0.769103337645579480f, 0.639124444863775840f,
- -0.773010453362736990f, 0.634393284163645490f,
- -0.776888465673232330f, 0.629638238914927210f,
- -0.780737228572094490f, 0.624859488142386340f,
- -0.784556597155575020f, 0.620057211763289430f,
- -0.788346427626606230f, 0.615231590580626930f,
- -0.792106577300212170f, 0.610382806276309700f,
- -0.795836904608883460f, 0.605511041404325660f,
- -0.799537269107905120f, 0.600616479383868860f,
- -0.803207531480644830f, 0.595699304492433470f,
- -0.806847553543799330f, 0.590759701858874160f,
- -0.810457198252594660f, 0.585797857456438980f,
- -0.814036329705948410f, 0.580813958095764530f,
- -0.817584813151583600f, 0.575808191417845450f,
- -0.821102514991104650f, 0.570780745886967260f,
- -0.824589302785025070f, 0.565731810783613450f,
- -0.828045045257755690f, 0.560661576197336140f,
- -0.831469612302545350f, 0.555570233019602180f,
- -0.834862874986380010f, 0.550457972936604920f,
- -0.838224705554838080f, 0.545324988422046350f,
- -0.841554977436898330f, 0.540171472729892970f,
- -0.844853565249707120f, 0.534997619887097150f,
- -0.848120344803297120f, 0.529803624686294830f,
- -0.851355193105265200f, 0.524589682678468950f,
- -0.854557988365400420f, 0.519355990165589750f,
- -0.857728610000272010f, 0.514102744193221770f,
- -0.860866938637767090f, 0.508830142543107320f,
- -0.863972856121586700f, 0.503538383725717690f,
- -0.867046245515692760f, 0.498227666972781760f,
- -0.870086991108711350f, 0.492898192229784150f,
- -0.873094978418290090f, 0.487550160148435880f,
- -0.876070094195406490f, 0.482183772079122890f,
- -0.879012226428633530f, 0.476799230063322090f,
- -0.881921264348354940f, 0.471396736825997860f,
- -0.884797098430937790f, 0.465976495767966180f,
- -0.887639620402853820f, 0.460538710958240230f,
- -0.890448723244757880f, 0.455083587126343890f,
- -0.893224301195515210f, 0.449611329654606870f,
- -0.895966249756185110f, 0.444122144570429310f,
- -0.898674465693953930f, 0.438616238538527550f,
- -0.901348847046021920f, 0.433093818853152070f,
- -0.903989293123443340f, 0.427555093430282030f,
- -0.906595704514915330f, 0.422000270799799850f,
- -0.909167983090522380f, 0.416429560097637150f,
- -0.911706032005429770f, 0.410843171057904130f,
- -0.914209755703530690f, 0.405241314004989920f,
- -0.916679059921042590f, 0.399624199845647070f,
- -0.919113851690057770f, 0.393992040061048150f,
- -0.921514039342041790f, 0.388345046698826580f,
- -0.923879532511286740f, 0.382683432365089890f,
- -0.926210242138311380f, 0.377007410216418150f,
- -0.928506080473215480f, 0.371317193951837710f,
- -0.930766961078983710f, 0.365612997804773800f,
- -0.932992798834738850f, 0.359895036534988330f,
- -0.935183509938947610f, 0.354163525420490400f,
- -0.937339011912574850f, 0.348418680249434790f,
- -0.939459223602189920f, 0.342660717311994430f,
- -0.941544065183020700f, 0.336889853392220330f,
- -0.943593458161960390f, 0.331106305759876480f,
- -0.945607325380521170f, 0.325310292162263260f,
- -0.947585591017741090f, 0.319502030816015800f,
- -0.949528180593036670f, 0.313681740398891410f,
- -0.951435020969008340f, 0.307849640041535030f,
- -0.953306040354193860f, 0.302005949319228030f,
- -0.955141168305770670f, 0.296150888243624010f,
- -0.956940335732208820f, 0.290284677254462390f,
- -0.958703474895871490f, 0.284407537211272100f,
- -0.960430519415565790f, 0.278519689385053170f,
- -0.962121404269041470f, 0.272621355449949250f,
- -0.963776065795439840f, 0.266712757474898480f,
- -0.965394441697689290f, 0.260794117915275850f,
- -0.966976471044852070f, 0.254865659604514680f,
- -0.968522094274417380f, 0.248927605745720090f,
- -0.970031253194543970f, 0.242980179903264070f,
- -0.971503890986251780f, 0.237023605994367170f,
- -0.972939952205560070f, 0.231058108280671330f,
- -0.974339382785575860f, 0.225083911359792830f,
- -0.975702130038528460f, 0.219101240156870050f,
- -0.977028142657754390f, 0.213110319916091420f,
- -0.978317370719627540f, 0.207111376192218840f,
- -0.979569765685440520f, 0.201104634842092010f,
- -0.980785280403230430f, 0.195090322016128610f,
- -0.981963869109555240f, 0.189068664149806360f,
- -0.983105487431216290f, 0.183039887955140900f,
- -0.984210092386929030f, 0.177004220412148940f,
- -0.985277642388941220f, 0.170961888760301220f,
- -0.986308097244598560f, 0.164913120489970140f,
- -0.987301418157858430f, 0.158858143333861470f,
- -0.988257567730749460f, 0.152797185258443690f,
- -0.989176509964781010f, 0.146730474455361800f,
- -0.990058210262297010f, 0.140658239332849540f,
- -0.990902635427780010f, 0.134580708507126280f,
- -0.991709753669099530f, 0.128498110793793090f,
- -0.992479534598709970f, 0.122410675199216350f,
- -0.993211949234794500f, 0.116318630911904710f,
- -0.993906970002356060f, 0.110222207293883240f,
- -0.994564570734255420f, 0.104121633872054570f,
- -0.995184726672196820f, 0.098017140329560826f,
- -0.995767414467659820f, 0.091908956497132752f,
- -0.996312612182778000f, 0.085797312344440158f,
- -0.996820299291165670f, 0.079682437971430195f,
- -0.997290456678690210f, 0.073564563599667732f,
- -0.997723066644191640f, 0.067443919563664176f,
- -0.998118112900149180f, 0.061320736302208488f,
- -0.998475580573294770f, 0.055195244349690094f,
- -0.998795456205172410f, 0.049067674327417966f,
- -0.999077727752645360f, 0.042938256934941021f,
- -0.999322384588349540f, 0.036807222941358832f,
- -0.999529417501093140f, 0.030674803176636865f,
- -0.999698818696204250f, 0.024541228522912326f,
- -0.999830581795823400f, 0.018406729905805101f,
- -0.999924701839144500f, 0.012271538285720007f,
- -0.999981175282601110f, 0.006135884649154799f,
- -1.000000000000000000f, 0.000000000000000122f,
- -0.999981175282601110f, -0.006135884649154554f,
- -0.999924701839144500f, -0.012271538285719762f,
- -0.999830581795823400f, -0.018406729905804858f,
- -0.999698818696204250f, -0.024541228522912080f,
- -0.999529417501093140f, -0.030674803176636619f,
- -0.999322384588349540f, -0.036807222941358582f,
- -0.999077727752645360f, -0.042938256934940779f,
- -0.998795456205172410f, -0.049067674327417724f,
- -0.998475580573294770f, -0.055195244349689851f,
- -0.998118112900149180f, -0.061320736302208245f,
- -0.997723066644191640f, -0.067443919563663926f,
- -0.997290456678690210f, -0.073564563599667496f,
- -0.996820299291165780f, -0.079682437971429945f,
- -0.996312612182778000f, -0.085797312344439922f,
- -0.995767414467659820f, -0.091908956497132516f,
- -0.995184726672196930f, -0.098017140329560590f,
- -0.994564570734255530f, -0.104121633872054320f,
- -0.993906970002356060f, -0.110222207293883000f,
- -0.993211949234794610f, -0.116318630911904470f,
- -0.992479534598709970f, -0.122410675199216100f,
- -0.991709753669099530f, -0.128498110793792840f,
- -0.990902635427780010f, -0.134580708507126060f,
- -0.990058210262297120f, -0.140658239332849290f,
- -0.989176509964781010f, -0.146730474455361580f,
- -0.988257567730749460f, -0.152797185258443440f,
- -0.987301418157858430f, -0.158858143333861220f,
- -0.986308097244598670f, -0.164913120489969890f,
- -0.985277642388941330f, -0.170961888760300970f,
- -0.984210092386929140f, -0.177004220412148690f,
- -0.983105487431216400f, -0.183039887955140650f,
- -0.981963869109555240f, -0.189068664149806110f,
- -0.980785280403230430f, -0.195090322016128360f,
- -0.979569765685440520f, -0.201104634842091760f,
- -0.978317370719627650f, -0.207111376192218590f,
- -0.977028142657754390f, -0.213110319916091200f,
- -0.975702130038528570f, -0.219101240156869800f,
- -0.974339382785575860f, -0.225083911359792610f,
- -0.972939952205560180f, -0.231058108280671080f,
- -0.971503890986251890f, -0.237023605994366950f,
- -0.970031253194543970f, -0.242980179903263820f,
- -0.968522094274417380f, -0.248927605745719870f,
- -0.966976471044852180f, -0.254865659604514460f,
- -0.965394441697689400f, -0.260794117915275630f,
- -0.963776065795439950f, -0.266712757474898250f,
- -0.962121404269041580f, -0.272621355449949030f,
- -0.960430519415565900f, -0.278519689385052890f,
- -0.958703474895871600f, -0.284407537211271820f,
- -0.956940335732208940f, -0.290284677254462110f,
- -0.955141168305770780f, -0.296150888243623790f,
- -0.953306040354193970f, -0.302005949319227810f,
- -0.951435020969008450f, -0.307849640041534810f,
- -0.949528180593036790f, -0.313681740398891180f,
- -0.947585591017741200f, -0.319502030816015580f,
- -0.945607325380521280f, -0.325310292162262980f,
- -0.943593458161960390f, -0.331106305759876260f,
- -0.941544065183020810f, -0.336889853392220110f,
- -0.939459223602190030f, -0.342660717311994210f,
- -0.937339011912574960f, -0.348418680249434560f,
- -0.935183509938947720f, -0.354163525420490120f,
- -0.932992798834738960f, -0.359895036534988110f,
- -0.930766961078983820f, -0.365612997804773580f,
- -0.928506080473215590f, -0.371317193951837430f,
- -0.926210242138311490f, -0.377007410216417930f,
- -0.923879532511286850f, -0.382683432365089670f,
- -0.921514039342041900f, -0.388345046698826360f,
- -0.919113851690057770f, -0.393992040061047930f,
- -0.916679059921042700f, -0.399624199845646840f,
- -0.914209755703530690f, -0.405241314004989690f,
- -0.911706032005429880f, -0.410843171057903910f,
- -0.909167983090522490f, -0.416429560097636930f,
- -0.906595704514915450f, -0.422000270799799630f,
- -0.903989293123443450f, -0.427555093430281810f,
- -0.901348847046022030f, -0.433093818853151850f,
- -0.898674465693954040f, -0.438616238538527330f,
- -0.895966249756185220f, -0.444122144570429090f,
- -0.893224301195515320f, -0.449611329654606650f,
- -0.890448723244757990f, -0.455083587126343670f,
- -0.887639620402853930f, -0.460538710958240060f,
- -0.884797098430937900f, -0.465976495767965960f,
- -0.881921264348355050f, -0.471396736825997640f,
- -0.879012226428633640f, -0.476799230063321870f,
- -0.876070094195406600f, -0.482183772079122660f,
- -0.873094978418290200f, -0.487550160148435660f,
- -0.870086991108711460f, -0.492898192229783930f,
- -0.867046245515692870f, -0.498227666972781540f,
- -0.863972856121586810f, -0.503538383725717460f,
- -0.860866938637767310f, -0.508830142543107100f,
- -0.857728610000272120f, -0.514102744193221550f,
- -0.854557988365400530f, -0.519355990165589640f,
- -0.851355193105265310f, -0.524589682678468730f,
- -0.848120344803297230f, -0.529803624686294610f,
- -0.844853565249707230f, -0.534997619887096930f,
- -0.841554977436898440f, -0.540171472729892850f,
- -0.838224705554838190f, -0.545324988422046130f,
- -0.834862874986380120f, -0.550457972936604700f,
- -0.831469612302545460f, -0.555570233019601960f,
- -0.828045045257755800f, -0.560661576197335920f,
- -0.824589302785025290f, -0.565731810783613230f,
- -0.821102514991104760f, -0.570780745886967140f,
- -0.817584813151583710f, -0.575808191417845340f,
- -0.814036329705948520f, -0.580813958095764300f,
- -0.810457198252594770f, -0.585797857456438860f,
- -0.806847553543799450f, -0.590759701858873940f,
- -0.803207531480644940f, -0.595699304492433250f,
- -0.799537269107905240f, -0.600616479383868640f,
- -0.795836904608883570f, -0.605511041404325430f,
- -0.792106577300212280f, -0.610382806276309480f,
- -0.788346427626606340f, -0.615231590580626710f,
- -0.784556597155575240f, -0.620057211763289210f,
- -0.780737228572094600f, -0.624859488142386230f,
- -0.776888465673232440f, -0.629638238914926980f,
- -0.773010453362737100f, -0.634393284163645270f,
- -0.769103337645579700f, -0.639124444863775730f,
- -0.765167265622459070f, -0.643831542889791280f,
- -0.761202385484261890f, -0.648514401022112330f,
- -0.757208846506484790f, -0.653172842953776530f,
- -0.753186799043612630f, -0.657806693297078530f,
- -0.749136394523459260f, -0.662415777590171780f,
- -0.745057785441466060f, -0.666999922303637360f,
- -0.740951125354959110f, -0.671558954847018440f,
- -0.736816568877370020f, -0.676092703575315810f,
- -0.732654271672412820f, -0.680600997795453020f,
- -0.728464390448225420f, -0.685083667772700130f,
- -0.724247082951467000f, -0.689540544737066830f,
- -0.720002507961381880f, -0.693971460889653780f,
- -0.715730825283818710f, -0.698376249408972800f,
- -0.711432195745216660f, -0.702754744457225080f,
- -0.707106781186547680f, -0.707106781186547460f,
- -0.702754744457225300f, -0.711432195745216430f,
- -0.698376249408973030f, -0.715730825283818480f,
- -0.693971460889654000f, -0.720002507961381650f,
- -0.689540544737067050f, -0.724247082951466780f,
- -0.685083667772700360f, -0.728464390448225200f,
- -0.680600997795453240f, -0.732654271672412590f,
- -0.676092703575316030f, -0.736816568877369790f,
- -0.671558954847018660f, -0.740951125354958880f,
- -0.666999922303637580f, -0.745057785441465840f,
- -0.662415777590172010f, -0.749136394523459040f,
- -0.657806693297078750f, -0.753186799043612410f,
- -0.653172842953777090f, -0.757208846506484230f,
- -0.648514401022112220f, -0.761202385484262000f,
- -0.643831542889791500f, -0.765167265622458960f,
- -0.639124444863775950f, -0.769103337645579480f,
- -0.634393284163645930f, -0.773010453362736660f,
- -0.629638238914926870f, -0.776888465673232550f,
- -0.624859488142386450f, -0.780737228572094380f,
- -0.620057211763289430f, -0.784556597155575020f,
- -0.615231590580627260f, -0.788346427626605890f,
- -0.610382806276309360f, -0.792106577300212390f,
- -0.605511041404325660f, -0.795836904608883460f,
- -0.600616479383869310f, -0.799537269107904790f,
- -0.595699304492433130f, -0.803207531480645050f,
- -0.590759701858874280f, -0.806847553543799220f,
- -0.585797857456439090f, -0.810457198252594660f,
- -0.580813958095764970f, -0.814036329705948080f,
- -0.575808191417845230f, -0.817584813151583820f,
- -0.570780745886967370f, -0.821102514991104650f,
- -0.565731810783613450f, -0.824589302785025070f,
- -0.560661576197336480f, -0.828045045257755460f,
- -0.555570233019602180f, -0.831469612302545240f,
- -0.550457972936604920f, -0.834862874986380010f,
- -0.545324988422046800f, -0.838224705554837860f,
- -0.540171472729892740f, -0.841554977436898550f,
- -0.534997619887097260f, -0.844853565249707010f,
- -0.529803624686294940f, -0.848120344803297120f,
- -0.524589682678469390f, -0.851355193105264860f,
- -0.519355990165589420f, -0.854557988365400640f,
- -0.514102744193221770f, -0.857728610000272010f,
- -0.508830142543107320f, -0.860866938637767090f,
- -0.503538383725718020f, -0.863972856121586470f,
- -0.498227666972781810f, -0.867046245515692650f,
- -0.492898192229784200f, -0.870086991108711350f,
- -0.487550160148436330f, -0.873094978418289870f,
- -0.482183772079122550f, -0.876070094195406710f,
- -0.476799230063322140f, -0.879012226428633410f,
- -0.471396736825997860f, -0.881921264348354940f,
- -0.465976495767966630f, -0.884797098430937570f,
- -0.460538710958239890f, -0.887639620402854050f,
- -0.455083587126343950f, -0.890448723244757880f,
- -0.449611329654606930f, -0.893224301195515210f,
- -0.444122144570429760f, -0.895966249756184880f,
- -0.438616238538527600f, -0.898674465693953820f,
- -0.433093818853152120f, -0.901348847046021920f,
- -0.427555093430282470f, -0.903989293123443120f,
- -0.422000270799799520f, -0.906595704514915450f,
- -0.416429560097637210f, -0.909167983090522380f,
- -0.410843171057904190f, -0.911706032005429770f,
- -0.405241314004990360f, -0.914209755703530470f,
- -0.399624199845646730f, -0.916679059921042700f,
- -0.393992040061048210f, -0.919113851690057660f,
- -0.388345046698826630f, -0.921514039342041790f,
- -0.382683432365090340f, -0.923879532511286520f,
- -0.377007410216418200f, -0.926210242138311380f,
- -0.371317193951837770f, -0.928506080473215480f,
- -0.365612997804774300f, -0.930766961078983600f,
- -0.359895036534987940f, -0.932992798834738960f,
- -0.354163525420490450f, -0.935183509938947610f,
- -0.348418680249434840f, -0.937339011912574850f,
- -0.342660717311994880f, -0.939459223602189700f,
- -0.336889853392219940f, -0.941544065183020810f,
- -0.331106305759876540f, -0.943593458161960270f,
- -0.325310292162263310f, -0.945607325380521170f,
- -0.319502030816015410f, -0.947585591017741200f,
- -0.313681740398891460f, -0.949528180593036670f,
- -0.307849640041535090f, -0.951435020969008340f,
- -0.302005949319228530f, -0.953306040354193750f,
- -0.296150888243623680f, -0.955141168305770780f,
- -0.290284677254462440f, -0.956940335732208820f,
- -0.284407537211272150f, -0.958703474895871490f,
- -0.278519689385053610f, -0.960430519415565680f,
- -0.272621355449948870f, -0.962121404269041580f,
- -0.266712757474898530f, -0.963776065795439840f,
- -0.260794117915275900f, -0.965394441697689290f,
- -0.254865659604514350f, -0.966976471044852180f,
- -0.248927605745720150f, -0.968522094274417270f,
- -0.242980179903264120f, -0.970031253194543970f,
- -0.237023605994367670f, -0.971503890986251670f,
- -0.231058108280670940f, -0.972939952205560180f,
- -0.225083911359792920f, -0.974339382785575860f,
- -0.219101240156870100f, -0.975702130038528460f,
- -0.213110319916091920f, -0.977028142657754280f,
- -0.207111376192218480f, -0.978317370719627650f,
- -0.201104634842092070f, -0.979569765685440520f,
- -0.195090322016128660f, -0.980785280403230320f,
- -0.189068664149805970f, -0.981963869109555350f,
- -0.183039887955140950f, -0.983105487431216290f,
- -0.177004220412149000f, -0.984210092386929030f,
- -0.170961888760301690f, -0.985277642388941110f,
- -0.164913120489969760f, -0.986308097244598670f,
- -0.158858143333861530f, -0.987301418157858320f,
- -0.152797185258443740f, -0.988257567730749460f,
- -0.146730474455362300f, -0.989176509964780900f,
- -0.140658239332849160f, -0.990058210262297120f,
- -0.134580708507126360f, -0.990902635427780010f,
- -0.128498110793793590f, -0.991709753669099530f,
- -0.122410675199215960f, -0.992479534598710080f,
- -0.116318630911904770f, -0.993211949234794500f,
- -0.110222207293883310f, -0.993906970002356060f,
- -0.104121633872055070f, -0.994564570734255420f,
- -0.098017140329560451f, -0.995184726672196930f,
- -0.091908956497132821f, -0.995767414467659820f,
- -0.085797312344440227f, -0.996312612182778000f,
- -0.079682437971430695f, -0.996820299291165670f,
- -0.073564563599667357f, -0.997290456678690210f,
- -0.067443919563664231f, -0.997723066644191640f,
- -0.061320736302208995f, -0.998118112900149180f,
- -0.055195244349689712f, -0.998475580573294770f,
- -0.049067674327418029f, -0.998795456205172410f,
- -0.042938256934941084f, -0.999077727752645360f,
- -0.036807222941359331f, -0.999322384588349430f,
- -0.030674803176636484f, -0.999529417501093140f,
- -0.024541228522912389f, -0.999698818696204250f,
- -0.018406729905805164f, -0.999830581795823400f,
- -0.012271538285720512f, -0.999924701839144500f,
- -0.006135884649154416f, -0.999981175282601110f,
- -0.000000000000000184f, -1.000000000000000000f,
- 0.006135884649154049f, -0.999981175282601110f,
- 0.012271538285720144f, -0.999924701839144500f,
- 0.018406729905804796f, -0.999830581795823400f,
- 0.024541228522912021f, -0.999698818696204250f,
- 0.030674803176636116f, -0.999529417501093140f,
- 0.036807222941358964f, -0.999322384588349540f,
- 0.042938256934940716f, -0.999077727752645360f,
- 0.049067674327417661f, -0.998795456205172410f,
- 0.055195244349689344f, -0.998475580573294770f,
- 0.061320736302208627f, -0.998118112900149180f,
- 0.067443919563663871f, -0.997723066644191640f,
- 0.073564563599666982f, -0.997290456678690210f,
- 0.079682437971430334f, -0.996820299291165670f,
- 0.085797312344439852f, -0.996312612182778000f,
- 0.091908956497132446f, -0.995767414467659820f,
- 0.098017140329560090f, -0.995184726672196930f,
- 0.104121633872054700f, -0.994564570734255420f,
- 0.110222207293882930f, -0.993906970002356060f,
- 0.116318630911904410f, -0.993211949234794610f,
- 0.122410675199215600f, -0.992479534598710080f,
- 0.128498110793793220f, -0.991709753669099530f,
- 0.134580708507125970f, -0.990902635427780010f,
- 0.140658239332848790f, -0.990058210262297120f,
- 0.146730474455361940f, -0.989176509964780900f,
- 0.152797185258443380f, -0.988257567730749460f,
- 0.158858143333861170f, -0.987301418157858430f,
- 0.164913120489969390f, -0.986308097244598780f,
- 0.170961888760301330f, -0.985277642388941220f,
- 0.177004220412148640f, -0.984210092386929140f,
- 0.183039887955140590f, -0.983105487431216400f,
- 0.189068664149805610f, -0.981963869109555350f,
- 0.195090322016128300f, -0.980785280403230430f,
- 0.201104634842091710f, -0.979569765685440630f,
- 0.207111376192218120f, -0.978317370719627770f,
- 0.213110319916091560f, -0.977028142657754280f,
- 0.219101240156869740f, -0.975702130038528570f,
- 0.225083911359792550f, -0.974339382785575970f,
- 0.231058108280670580f, -0.972939952205560290f,
- 0.237023605994367310f, -0.971503890986251780f,
- 0.242980179903263760f, -0.970031253194543970f,
- 0.248927605745719790f, -0.968522094274417380f,
- 0.254865659604513960f, -0.966976471044852290f,
- 0.260794117915275510f, -0.965394441697689400f,
- 0.266712757474898200f, -0.963776065795439950f,
- 0.272621355449948530f, -0.962121404269041690f,
- 0.278519689385053280f, -0.960430519415565790f,
- 0.284407537211271770f, -0.958703474895871600f,
- 0.290284677254462050f, -0.956940335732208940f,
- 0.296150888243623290f, -0.955141168305770890f,
- 0.302005949319228140f, -0.953306040354193860f,
- 0.307849640041534760f, -0.951435020969008450f,
- 0.313681740398891130f, -0.949528180593036790f,
- 0.319502030816015080f, -0.947585591017741310f,
- 0.325310292162262930f, -0.945607325380521280f,
- 0.331106305759876210f, -0.943593458161960390f,
- 0.336889853392219610f, -0.941544065183020920f,
- 0.342660717311994540f, -0.939459223602189810f,
- 0.348418680249434510f, -0.937339011912574960f,
- 0.354163525420490070f, -0.935183509938947720f,
- 0.359895036534987610f, -0.932992798834739070f,
- 0.365612997804773960f, -0.930766961078983710f,
- 0.371317193951837380f, -0.928506080473215590f,
- 0.377007410216417870f, -0.926210242138311490f,
- 0.382683432365090000f, -0.923879532511286630f,
- 0.388345046698826300f, -0.921514039342041900f,
- 0.393992040061047880f, -0.919113851690057880f,
- 0.399624199845646400f, -0.916679059921042820f,
- 0.405241314004990030f, -0.914209755703530580f,
- 0.410843171057903860f, -0.911706032005429880f,
- 0.416429560097636870f, -0.909167983090522490f,
- 0.422000270799799180f, -0.906595704514915560f,
- 0.427555093430282140f, -0.903989293123443340f,
- 0.433093818853151790f, -0.901348847046022140f,
- 0.438616238538527270f, -0.898674465693954040f,
- 0.444122144570429420f, -0.895966249756185000f,
- 0.449611329654606600f, -0.893224301195515320f,
- 0.455083587126343610f, -0.890448723244757990f,
- 0.460538710958239560f, -0.887639620402854160f,
- 0.465976495767966290f, -0.884797098430937680f,
- 0.471396736825997590f, -0.881921264348355050f,
- 0.476799230063321870f, -0.879012226428633640f,
- 0.482183772079122220f, -0.876070094195406930f,
- 0.487550160148436000f, -0.873094978418290090f,
- 0.492898192229783870f, -0.870086991108711460f,
- 0.498227666972781480f, -0.867046245515692870f,
- 0.503538383725717800f, -0.863972856121586590f,
- 0.508830142543106990f, -0.860866938637767310f,
- 0.514102744193221550f, -0.857728610000272230f,
- 0.519355990165589200f, -0.854557988365400760f,
- 0.524589682678469060f, -0.851355193105265080f,
- 0.529803624686294610f, -0.848120344803297340f,
- 0.534997619887096930f, -0.844853565249707230f,
- 0.540171472729892410f, -0.841554977436898780f,
- 0.545324988422046460f, -0.838224705554837970f,
- 0.550457972936604700f, -0.834862874986380120f,
- 0.555570233019601840f, -0.831469612302545460f,
- 0.560661576197336250f, -0.828045045257755690f,
- 0.565731810783613120f, -0.824589302785025290f,
- 0.570780745886967030f, -0.821102514991104870f,
- 0.575808191417844890f, -0.817584813151584040f,
- 0.580813958095764640f, -0.814036329705948300f,
- 0.585797857456438750f, -0.810457198252594880f,
- 0.590759701858873940f, -0.806847553543799450f,
- 0.595699304492432910f, -0.803207531480645280f,
- 0.600616479383868970f, -0.799537269107905010f,
- 0.605511041404325320f, -0.795836904608883680f,
- 0.610382806276309140f, -0.792106577300212610f,
- 0.615231590580627040f, -0.788346427626606120f,
- 0.620057211763289100f, -0.784556597155575240f,
- 0.624859488142386120f, -0.780737228572094600f,
- 0.629638238914926650f, -0.776888465673232780f,
- 0.634393284163645600f, -0.773010453362736880f,
- 0.639124444863775620f, -0.769103337645579700f,
- 0.643831542889791160f, -0.765167265622459180f,
- 0.648514401022112000f, -0.761202385484262220f,
- 0.653172842953776760f, -0.757208846506484570f,
- 0.657806693297078530f, -0.753186799043612630f,
- 0.662415777590171450f, -0.749136394523459590f,
- 0.666999922303637690f, -0.745057785441465840f,
- 0.671558954847018330f, -0.740951125354959110f,
- 0.676092703575315700f, -0.736816568877370020f,
- 0.680600997795452690f, -0.732654271672413150f,
- 0.685083667772700470f, -0.728464390448225090f,
- 0.689540544737066830f, -0.724247082951467000f,
- 0.693971460889653780f, -0.720002507961381880f,
- 0.698376249408972360f, -0.715730825283819040f,
- 0.702754744457225300f, -0.711432195745216430f,
- 0.707106781186547350f, -0.707106781186547680f,
- 0.711432195745216100f, -0.702754744457225630f,
- 0.715730825283818820f, -0.698376249408972690f,
- 0.720002507961381540f, -0.693971460889654000f,
- 0.724247082951466670f, -0.689540544737067160f,
- 0.728464390448224860f, -0.685083667772700800f,
- 0.732654271672412930f, -0.680600997795453020f,
- 0.736816568877369790f, -0.676092703575316030f,
- 0.740951125354958880f, -0.671558954847018660f,
- 0.745057785441465500f, -0.666999922303638030f,
- 0.749136394523459370f, -0.662415777590171780f,
- 0.753186799043612300f, -0.657806693297078860f,
- 0.757208846506484230f, -0.653172842953777090f,
- 0.761202385484261890f, -0.648514401022112330f,
- 0.765167265622458850f, -0.643831542889791500f,
- 0.769103337645579480f, -0.639124444863775950f,
- 0.773010453362736660f, -0.634393284163645930f,
- 0.776888465673232550f, -0.629638238914926980f,
- 0.780737228572094380f, -0.624859488142386450f,
- 0.784556597155575020f, -0.620057211763289540f,
- 0.788346427626605890f, -0.615231590580627370f,
- 0.792106577300212390f, -0.610382806276309480f,
- 0.795836904608883340f, -0.605511041404325660f,
- 0.799537269107904790f, -0.600616479383869310f,
- 0.803207531480645050f, -0.595699304492433250f,
- 0.806847553543799220f, -0.590759701858874280f,
- 0.810457198252594660f, -0.585797857456439090f,
- 0.814036329705948080f, -0.580813958095764970f,
- 0.817584813151583710f, -0.575808191417845230f,
- 0.821102514991104540f, -0.570780745886967370f,
- 0.824589302785025070f, -0.565731810783613560f,
- 0.828045045257755350f, -0.560661576197336590f,
- 0.831469612302545240f, -0.555570233019602180f,
- 0.834862874986379900f, -0.550457972936605030f,
- 0.838224705554837750f, -0.545324988422046800f,
- 0.841554977436898440f, -0.540171472729892740f,
- 0.844853565249707010f, -0.534997619887097260f,
- 0.848120344803297120f, -0.529803624686294940f,
- 0.851355193105264860f, -0.524589682678469390f,
- 0.854557988365400530f, -0.519355990165589530f,
- 0.857728610000272010f, -0.514102744193221880f,
- 0.860866938637767090f, -0.508830142543107430f,
- 0.863972856121586360f, -0.503538383725718130f,
- 0.867046245515692650f, -0.498227666972781870f,
- 0.870086991108711350f, -0.492898192229784260f,
- 0.873094978418289870f, -0.487550160148436380f,
- 0.876070094195406710f, -0.482183772079122610f,
- 0.879012226428633410f, -0.476799230063322200f,
- 0.881921264348354830f, -0.471396736825997920f,
- 0.884797098430937460f, -0.465976495767966680f,
- 0.887639620402853930f, -0.460538710958239950f,
- 0.890448723244757770f, -0.455083587126344000f,
- 0.893224301195515100f, -0.449611329654606980f,
- 0.895966249756184880f, -0.444122144570429810f,
- 0.898674465693953820f, -0.438616238538527660f,
- 0.901348847046021920f, -0.433093818853152180f,
- 0.903989293123443120f, -0.427555093430282530f,
- 0.906595704514915450f, -0.422000270799799570f,
- 0.909167983090522380f, -0.416429560097637260f,
- 0.911706032005429660f, -0.410843171057904240f,
- 0.914209755703530470f, -0.405241314004990420f,
- 0.916679059921042700f, -0.399624199845646790f,
- 0.919113851690057660f, -0.393992040061048270f,
- 0.921514039342041790f, -0.388345046698826690f,
- 0.923879532511286520f, -0.382683432365090390f,
- 0.926210242138311380f, -0.377007410216418260f,
- 0.928506080473215480f, -0.371317193951837820f,
- 0.930766961078983490f, -0.365612997804774350f,
- 0.932992798834738960f, -0.359895036534988000f,
- 0.935183509938947500f, -0.354163525420490510f,
- 0.937339011912574850f, -0.348418680249434900f,
- 0.939459223602189700f, -0.342660717311994930f,
- 0.941544065183020810f, -0.336889853392220000f,
- 0.943593458161960270f, -0.331106305759876600f,
- 0.945607325380521170f, -0.325310292162263370f,
- 0.947585591017741200f, -0.319502030816015470f,
- 0.949528180593036670f, -0.313681740398891520f,
- 0.951435020969008340f, -0.307849640041535140f,
- 0.953306040354193640f, -0.302005949319228580f,
- 0.955141168305770780f, -0.296150888243623730f,
- 0.956940335732208820f, -0.290284677254462500f,
- 0.958703474895871490f, -0.284407537211272210f,
- 0.960430519415565680f, -0.278519689385053670f,
- 0.962121404269041580f, -0.272621355449948980f,
- 0.963776065795439840f, -0.266712757474898590f,
- 0.965394441697689290f, -0.260794117915275960f,
- 0.966976471044852180f, -0.254865659604514410f,
- 0.968522094274417270f, -0.248927605745720200f,
- 0.970031253194543970f, -0.242980179903264180f,
- 0.971503890986251670f, -0.237023605994367730f,
- 0.972939952205560180f, -0.231058108280671000f,
- 0.974339382785575860f, -0.225083911359792970f,
- 0.975702130038528460f, -0.219101240156870160f,
- 0.977028142657754170f, -0.213110319916091970f,
- 0.978317370719627650f, -0.207111376192218530f,
- 0.979569765685440520f, -0.201104634842092120f,
- 0.980785280403230320f, -0.195090322016128720f,
- 0.981963869109555350f, -0.189068664149806030f,
- 0.983105487431216290f, -0.183039887955141010f,
- 0.984210092386929030f, -0.177004220412149050f,
- 0.985277642388941110f, -0.170961888760301770f,
- 0.986308097244598670f, -0.164913120489969810f,
- 0.987301418157858320f, -0.158858143333861580f,
- 0.988257567730749460f, -0.152797185258443800f,
- 0.989176509964780900f, -0.146730474455362390f,
- 0.990058210262297120f, -0.140658239332849210f,
- 0.990902635427780010f, -0.134580708507126420f,
- 0.991709753669099410f, -0.128498110793793640f,
- 0.992479534598709970f, -0.122410675199216030f,
- 0.993211949234794500f, -0.116318630911904840f,
- 0.993906970002356060f, -0.110222207293883360f,
- 0.994564570734255420f, -0.104121633872055130f,
- 0.995184726672196930f, -0.098017140329560506f,
- 0.995767414467659820f, -0.091908956497132877f,
- 0.996312612182778000f, -0.085797312344440282f,
- 0.996820299291165670f, -0.079682437971430750f,
- 0.997290456678690210f, -0.073564563599667412f,
- 0.997723066644191640f, -0.067443919563664287f,
- 0.998118112900149180f, -0.061320736302209057f,
- 0.998475580573294770f, -0.055195244349689775f,
- 0.998795456205172410f, -0.049067674327418091f,
- 0.999077727752645360f, -0.042938256934941139f,
- 0.999322384588349430f, -0.036807222941359394f,
- 0.999529417501093140f, -0.030674803176636543f,
- 0.999698818696204250f, -0.024541228522912448f,
- 0.999830581795823400f, -0.018406729905805226f,
- 0.999924701839144500f, -0.012271538285720572f,
- 0.999981175282601110f, -0.006135884649154477f
-};
-
-static ne10_float32_t rfft_twiddle_coef_re[1024];
-static ne10_float32_t rfft_twiddle_coef_im[1024];
-
-
-/**
-* @brief Initializations for Real FFT module
-* @param[in] *S Instance pointer of Real FFT data structure.
-* @param[in] *S_CFFT Instance pointer of Complex FFT data structure.
-* @param[in] fftLen FFT length.
-* @param[in] ifftFlagR 0 = forward Real FFT. 1 = inverse Real FFT
-* @param[in] bitReverseFlag 0 = Result will be in bit-reversed order. 1 = Result will be in normal order
-* @return none.
-* The function initializes the Twiddle factors table and bit reverse table
-*/
-
-ne10_result_t ne10_rfft_init_float(
- ne10_rfft_instance_f32_t * S,
- ne10_cfft_radix4_instance_f32_t * S_CFFT,
- ne10_uint32_t fftLen,
- ne10_uint32_t ifftFlagR)
-{
- ne10_uint32_t i,j;
-
- /* Initialise the default arm status */
- ne10_result_t status = NE10_OK;
-
- /* Initialize the Real FFT length */
- S->fft_len_real = (ne10_uint16_t) fftLen;
-
- /* Initialize the Complex FFT length */
- S->fft_len_by2 = (ne10_uint16_t) fftLen / 2u;
-
- /* Initialize the Flag for selection of RFFT or RIFFT */
- S->ifft_flag_r = (ne10_uint8_t) ifftFlagR;
-
- /* Initialize the Flag for calculation Bit reversal or not */
- //S->bit_reverse_flag_r = (ne10_uint8_t) bitReverseFlag;
-
- S->twid_coef_r_modifier = 1u;
-
- /* Initializations of structure parameters depending on the FFT length */
- switch (S->fft_len_real)
- {
- /* Init table modifier value */
- case 2048u:
- for(i=0,j=0;i<1024;i++)
- {
- rfft_twiddle_coef_re[i] = rfft_twiddle_coef[2*j];
- rfft_twiddle_coef_im[i] = rfft_twiddle_coef[2*j+1];
- j= j+ 1;
- }
- /* Initialize the Twiddle coefficientA pointer */
- S->p_twiddle_A_real = (ne10_float32_t *) rfft_twiddle_coef_re;
- /* Initialize the Twiddle coefficientB pointer */
- S->p_twiddle_B_real = (ne10_float32_t *) rfft_twiddle_coef_im;
- break;
- case 512u:
- for(i=0,j=0;i<512;i++)
- {
- rfft_twiddle_coef_re[i] = rfft_twiddle_coef[2*j];
- rfft_twiddle_coef_im[i] = rfft_twiddle_coef[2*j+1];
- j= j+ 2;
- }
- /* Initialize the Twiddle coefficientA pointer */
- S->p_twiddle_A_real = (ne10_float32_t *) rfft_twiddle_coef_re;
- /* Initialize the Twiddle coefficientB pointer */
- S->p_twiddle_B_real = (ne10_float32_t *) rfft_twiddle_coef_im;
- break;
- case 128u:
- for(i=0,j=0;i<128;i++)
- {
- rfft_twiddle_coef_re[i] = rfft_twiddle_coef[2*j];
- rfft_twiddle_coef_im[i] = rfft_twiddle_coef[2*j+1];
- j= j+ 8;
- }
- /* Initialize the Twiddle coefficientA pointer */
- S->p_twiddle_A_real = (ne10_float32_t *) rfft_twiddle_coef_re;
- /* Initialize the Twiddle coefficientB pointer */
- S->p_twiddle_B_real = (ne10_float32_t *) rfft_twiddle_coef_im;
- break;
- default:
- /* Reporting argument error if rfftSize is not valid value */
- status = NE10_ERR;
- break;
- }
-
- /* Init Complex FFT Instance */
- S->p_cfft = S_CFFT;
-
- if(S->ifft_flag_r)
- {
- /* Initializes the CIFFT Module for Nreal/2 length */
- ne10_cfft_radix4_init_float(S->p_cfft, S->fft_len_by2, 1u);
- }
- else
- {
- /* Initializes the CFFT Module for Nreal/2 length */
- ne10_cfft_radix4_init_float(S->p_cfft, S->fft_len_by2, 0u);
- }
-
- /* return the status of RFFT Init function */
- return (status);
-
-}
-
-
#include "seatest.h"
-void test_fixture_cfft (void);
-void test_fixture_rfft (void);
-void test_fixture_fft_c2c_1d_float32(void);
-void test_fixture_fft_c2c_1d_int32(void);
-void test_fixture_fft_c2c_1d_int16(void);
-void test_fixture_fft_r2c_1d_float32(void);
-void test_fixture_fft_r2c_1d_int32(void);
-void test_fixture_fft_r2c_1d_int16(void);
+void test_fixture_fft_c2c_1d_float32 (void);
+void test_fixture_fft_c2c_1d_int32 (void);
+void test_fixture_fft_c2c_1d_int16 (void);
+void test_fixture_fft_r2c_1d_float32 (void);
+void test_fixture_fft_r2c_1d_int32 (void);
+void test_fixture_fft_r2c_1d_int16 (void);
void test_fixture_fir (void);
void test_fixture_fir_decimate (void);
void test_fixture_fir_interpolate (void);
void all_tests (void)
{
- test_fixture_cfft();
- test_fixture_rfft();
test_fixture_fft_c2c_1d_float32();
test_fixture_fft_c2c_1d_int32();
test_fixture_fft_c2c_1d_int16();
+++ /dev/null
-/*
- * Copyright 2012-14 ARM Limited
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of ARM Limited nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * NE10 Library : test_suite_cfft.c
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-
-#include "NE10_dsp.h"
-#include "seatest.h"
-#include "unit_test_common.h"
-
-/* ----------------------------------------------------------------------
-** Global defines
-** ------------------------------------------------------------------- */
-
-/* Max FFT Length 1024 and double buffer for real and imag */
-#define TEST_LENGTH_SAMPLES (1024 * 2)
-
-#define TEST_COUNT 5000
-
-/* ----------------------------------------------------------------------
-** Test input data for F32
-** Generated by the MATLAB rand() function
-** ------------------------------------------------------------------- */
-
-static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] =
-{
- -0.432565, 0.864397, -1.665584, 0.094203, 0.125332, -0.851909, 0.287676, 0.873504,
- -1.146471, -0.438039, 1.190915, -0.429661, 1.189164, -1.102729, -0.037633, 0.396247,
- 0.327292, -0.964925, 0.174639, 0.168449, -0.186709, -1.965359, 0.725791, -0.744302,
- -0.588317, -0.552307, 2.183186, -0.819726, -0.136396, 1.109142, 0.113931, -0.614946,
- 1.066768, -0.254635, 0.059281, -0.269830, -0.095648, -1.671994, -0.832349, -1.876045,
- 0.294411, 0.575006, -1.336182, -0.866133, 0.714325, -2.116523, 1.623562, -0.964466,
- -0.691776, 0.212729, 0.857997, 0.477917, 1.254001, 0.100658, -1.593730, 0.297433,
- -1.440964, 0.570148, 0.571148, -1.624496, -0.399886, 0.643443, 0.689997, 0.681861,
- 0.815622, 0.014655, 0.711908, -1.301541, 1.290250, -1.284587, 0.668601, 0.812213,
- 1.190838, 0.838548, -1.202457, 1.420321, -0.019790, -0.989752, -0.156717, -1.183229,
- -1.604086, -0.466259, 0.257304, -0.365943, -1.056473, 1.118333, 1.415141, -0.465615,
- -0.805090, -1.560800, 0.528743, -0.283103, 0.219321, -1.322941, -0.921902, -0.196238,
- -2.170674, 0.419039, -0.059188, 0.742318, -1.010634, -0.143032, 0.614463, -2.161943,
- 0.507741, -0.644226, 1.692430, 1.439590, 0.591283, -0.846917, -0.643595, 0.057340,
- 0.380337, 0.643408, -1.009116, -0.670431, -0.019511, -0.003142, -0.048221, 0.352931,
- 0.000043, 1.179502, -0.317859, -0.685902, 1.095004, 1.676789, -1.873990, -0.255309,
- 0.428183, -0.647548, 0.895638, -0.182214, 0.730957, 0.851800, 0.577857, -0.306550,
- 0.040314, -0.440529, 0.677089, -0.611472, 0.568900, -0.485207, -0.255645, 1.197019,
- -0.377469, 1.394788, -0.295887, 0.165368, -1.475135, -0.509967, -0.234004, 1.377717,
- 0.118445, 1.298518, 0.314809, -0.130117, 1.443508, 0.740249, -0.350975, 1.332017,
- 0.623234, -0.278071, 0.799049, -0.327993, 0.940890, -0.012527, -0.992092, 0.903179,
- 0.212035, -1.112463, 0.237882, -0.839211, -1.007763, 0.035534, -0.742045, -1.246529,
- 1.082295, 0.884505, -0.131500, 2.538334, 0.389880, 1.316795, 0.087987, 1.442213,
- -0.635465, 1.466919, -0.559573, -1.107052, 0.443653, -0.460936, -0.949904, -0.020296,
- 0.781182, -0.045998, 0.568961, -0.544487, -0.821714, 0.917035, -0.265607, -0.019418,
- -1.187777, 0.774630, -2.202321, -0.594053, 0.986337, 1.820276, -0.518635, 0.524719,
- 0.327368, 0.685938, 0.234057, -0.901304, 0.021466, 2.136023, -1.003944, 0.320126,
- -0.947146, -1.584119, -0.374429, -0.502514, -1.185886, 0.737926, -1.055903, -0.525392,
- 1.472480, -1.532115, 0.055744, -0.153786, -1.217317, -0.646732, -0.041227, -1.341450,
- -1.128344, 0.271534, -1.349278, 0.339541, -0.261102, 1.674580, 0.953465, 0.335636,
- 0.128644, -0.550556, 0.656468, -0.286507, -1.167819, -0.814791, -0.460605, 0.053508,
- -0.262440, -0.427841, -1.213152, 0.463860, -1.319437, 0.416588, 0.931218, 0.191634,
- 0.011245, -1.284328, -0.645146, -1.006709, 0.805729, 0.041786, 0.231626, -0.757276,
- -0.989760, 2.278871, 1.339586, -1.800414, 0.289502, 0.176299, 1.478917, -0.263794,
- 1.138028, -0.833888, -0.684139, 0.220767, -1.291936, -0.882230, -0.072926, 0.856510,
- -0.330599, -0.925690, -0.843628, -0.914070, 0.497770, -1.327629, 1.488490, 1.611727,
- -0.546476, -0.561827, -0.846758, 0.276041, -0.246337, -0.227653, 0.663024, 0.184183,
- -0.854197, 0.082830, -1.201315, 0.452035, -0.119869, 0.101411, -0.065294, -0.365760,
- 0.485296, -0.091035, -0.595491, 0.739457, -0.149668, 0.940328, -0.434752, -0.028961,
- -0.079330, -0.928710, 1.535152, 0.745038, -0.606483, 2.488098, -1.347363, 0.691925,
- 0.469383, -0.941710, -0.903567, 0.384997, 0.035880, -0.278887, -0.627531, -0.982944,
- 0.535398, 1.620751, 0.552884, -3.051825, -0.203690, -0.048454, -2.054325, 0.318202,
- 0.132561, -0.635514, 1.592941, -1.028736, 1.018412, 1.641380, -1.580402, 0.019495,
- -0.078662, -2.047269, -0.681657, -1.129305, -1.024553, -2.355586, -1.234353, -0.561249,
- 0.288807, -0.087973, -0.429303, 1.073777, 0.055801, -0.311909, -0.367874, -1.478774,
- -0.464973, -0.043979, 0.370961, -0.799868, 0.728283, -0.865158, 2.112160, -0.119007,
- -1.357298, -0.214830, -1.022610, 0.007315, 1.037834, -1.039472, -0.389800, 0.832836,
- -1.381266, -0.746695, 0.315543, 0.349276, 1.553243, 0.484013, 0.707894, -1.007859,
- 1.957385, 1.003469, 0.504542, -2.676089, 1.864529, 0.016822, -0.339812, -1.443245,
- -1.139779, 0.106502, -0.211123, -0.523471, 1.190245, 0.968581, -1.116209, -0.675762,
- 0.635274, -1.086512, -0.601412, 0.792917, 0.551185, 1.607967, -1.099840, -1.386200,
- 0.085991, 0.858656, -2.004563, 0.207575, -0.493088, 1.048865, 0.462048, -0.784071,
- -0.321005, -0.326146, 1.236556, -0.415365, -0.631280, -0.340785, -2.325211, 0.565016,
- -1.231637, 0.441829, 1.055648, -0.109207, -0.113224, 0.430549, 0.379224, 0.693041,
- 0.944200, -0.547589, -2.120427, 0.944736, -0.644679, -0.792557, -0.704302, 0.280168,
- -1.018137, -1.642974, -0.182082, 0.314746, 1.521013, 1.030286, -0.038439, 1.751701,
- 1.227448, -0.251608, -0.696205, 1.819214, 0.007524, 1.234399, -0.782893, -2.339612,
- 0.586939, -0.038625, -0.251207, 0.007293, 0.480136, -0.565029, 0.668155, 1.108257,
- -0.078321, 0.520474, 0.889173, -0.497671, 2.309287, -0.177898, 0.524639, 1.091016,
- -0.011787, 1.159731, 0.913141, 0.675004, 0.055941, 2.291756, -1.107070, -1.398845,
- 0.485498, -1.532820, -0.005005, 0.403012, -0.276218, -0.466509, 1.276452, 0.428272,
- 1.863401, -1.390515, -0.522559, -0.613866, 0.103424, -0.995531, -0.807649, -1.106047,
- 0.680439, 0.345156, -2.364590, 1.638406, 0.990115, -0.550912, 0.218899, 1.664607,
- 0.261662, -0.048037, 1.213444, 0.662008, -0.274667, -0.296988, -0.133134, 1.660689,
- -1.270500, 0.057131, -1.663606, -2.227418, -0.703554, 1.245199, 0.280880, -1.158628,
- -0.541209, 0.867397, -1.333531, -0.801315, 1.072686, -0.263610, -0.712085, 0.751058,
- -0.011286, 1.795228, -0.000817, 0.984351, -0.249436, 0.046669, 0.396575, 0.323443,
- -0.264013, 0.522442, -1.664011, -0.788527, -1.028975, 0.734071, 0.243095, 0.080416,
- -1.256590, -0.543988, -0.347183, 0.316257, -0.941372, -1.408710, -1.174560, 0.186814,
- -1.021142, -2.262433, -0.401667, 0.500375, 0.173666, -0.224826, -0.116118, -1.455474,
- 1.064119, -0.015503, -0.245386, -0.437796, -1.517539, 0.907150, 0.009734, 1.284133,
- 0.071373, -0.730091, 0.316536, -1.472669, 0.499826, -1.594354, 1.278084, 0.497586,
- -0.547816, 0.741050, 0.260808, -0.355039, -0.013177, -0.810574, -0.580264, 0.238212,
- 2.136308, 1.505073, -0.257617, -1.189561, -1.409528, -0.194823, 1.770101, 0.624787,
- 0.325546, -1.278067, -1.119040, 0.100259, 0.620350, -0.342182, 1.269782, -0.002015,
- -0.896043, -0.498406, 0.135175, 1.049755, -0.139040, -1.670559, -1.163395, -2.014370,
- 1.183720, 0.986616, -0.015430, -0.060483, 0.536219, 1.192941, -0.716429, 2.685580,
- -0.655559, 0.853734, 0.314363, 1.005549, 0.106814, -0.000982, 1.848216, -0.560458,
- -0.275106, -0.191396, 2.212554, -0.048913, 1.508526, 0.600460, -1.945079, -1.994642,
- -1.680543, -0.965134, -0.573534, -0.943199, -0.185817, -0.200671, 0.008934, 0.556167,
- 0.836950, 2.018381, -0.722271, 1.813736, -0.721490, -0.112448, -0.201181, -0.889976,
- -0.020464, -0.726843, 0.278890, 0.763502, 1.058295, -0.598514, 0.621673, 0.723730,
- -1.750615, -0.867938, 0.697348, 0.841673, 0.811486, -0.850938, 0.636345, 0.933427,
- 1.310080, 0.485960, 0.327098, -0.216203, -0.672993, -0.381497, -0.149327, -1.427041,
- -2.449018, -1.487669, 0.473286, -2.515103, 0.116946, -1.306210, -0.591104, -0.376950,
- -0.654708, -1.107504, -1.080662, 0.312778, -0.047731, -0.845240, 0.379345, 0.237598,
- -0.330361, -0.918767, -0.499898, 2.441691, -0.035979, 0.083121, -0.174760, 0.266263,
- -0.957265, -0.762727, 1.292548, -2.492805, 0.440910, -0.163872, 1.280941, 0.701879,
- -0.497730, -0.855063, -1.118717, 0.373834, 0.807650, -0.504156, 0.041200, -1.074581,
- -0.756209, -0.632952, -0.089129, 1.854859, -2.008850, 0.467423, 1.083918, 1.316068,
- -0.981191, 1.779038, -0.688489, -0.384638, 1.339479, 0.895129, -0.909243, 0.473642,
- -0.412858, -0.023571, -0.506163, 1.612449, 1.619748, 0.839672, 0.080901, 0.247906,
- -1.081056, -0.540454, -1.124518, -1.808434, 1.735676, -0.266203, 1.937459, 0.769024,
- 1.635068, 0.076724, -1.255940, 0.078595, -0.213538, 1.063096, -0.198932, 0.349197,
- 0.307499, 0.755430, -0.572325, -0.624003, -0.977648, -0.421374, -0.446809, 0.596029,
- 1.082092, -1.389987, 2.372648, -2.634668, 0.229288, -0.806934, -0.266623, 0.091930,
- 0.701672, 2.371014, -0.487590, -0.008736, 1.862480, 2.122155, 1.106851, -0.684231,
- -1.227566, -0.413033, -0.669885, -0.857683, 1.340929, 0.970899, 0.388083, -1.064209,
- 0.393059, 1.500750, -1.707334, -0.470707, 0.227859, 1.549526, 0.685633, 0.089955,
- -0.636790, -1.859541, -1.002606, -1.408604, -0.185621, 0.115434, -1.054033, -0.480661,
- -0.071539, 1.236739, 0.279198, -2.015435, 1.373275, 0.563520, 0.179841, -0.043520,
- -0.542017, 0.460448, 1.634191, 0.282654, 0.825215, 1.060032, 0.230761, 0.547056,
- 0.671634, 0.220117, -0.508078, -1.909701, 0.856352, 1.117189, 0.268503, -1.607931,
- 0.624975, -1.443700, -1.047338, -0.314551, 1.535670, 0.766433, 0.434426, 0.174865,
- -1.917136, 1.316849, 0.469940, 0.958586, 1.274351, 0.647691, 0.638542, 0.092485,
- 1.380782, -0.411274, 1.319843, 0.346629, -0.909429, -0.348980, -2.305605, -0.200402,
- 1.788730, 0.393261, 0.390798, -1.852647, 0.020324, 0.996919, -0.405977, -0.481047,
- -1.534895, -0.295456, 0.221373, -0.309043, -1.374479, -0.383007, -0.839286, 1.023837,
- -0.208643, 1.360480, 0.755913, -0.705832, 0.375734, -0.609368, -1.345413, -0.112009,
- 1.481876, 0.905851, 0.032736, -0.592901, 1.870453, 2.144165, -1.208991, 0.748569,
- -0.782632, -1.654092, -0.767299, -0.977911, -0.107200, -0.347368, -0.977057, -0.107734,
- -0.963988, -0.402626, -2.379172, -1.065617, -0.838188, 0.878523, 0.257346, 0.460551,
- -0.183834, -1.078622, -0.167615, 0.644741, -0.116989, 0.605399, 0.168488, 0.055073,
- -0.501206, -0.005505, -0.705076, -0.099485, 0.508165, -0.225578, -0.420922, -1.026005,
- 0.229133, -0.732352, -0.959497, -1.405453, -0.146043, -1.119476, 0.744538, 0.186157,
- -0.890496, -0.314564, 0.139062, -0.088767, -0.236144, -0.160919, -0.075459, -1.936278,
- -0.358572, 2.751755, -2.077635, 1.292404, -0.143546, -0.233895, 1.393341, -0.193140,
- 0.651804, -0.104019, -0.377134, -0.814926, -0.661443, -0.108576, 0.248958, -1.569143,
- -0.383516, 0.212114, -0.528480, 1.678775, 0.055388, 0.379010, 1.253769, -0.668419,
- -2.520004, 1.727974, 0.584856, 1.693388, -1.008064, -0.787045, 0.944285, -1.874471,
- -2.423957, 0.023853, -0.223831, 1.518454, 0.058070, 0.534477, -0.424614, -1.355467,
- -0.202918, 0.280923, -1.513077, 0.182100, -1.126352, -0.256567, -0.815002, 0.858411,
- 0.366614, 0.057070, -0.586107, -1.462498, 1.537409, -2.326166, 0.140072, 2.562645,
- -1.862767, -0.639321, -0.454193, 0.706010, -0.652074, 0.627374, 0.103318, -1.465271,
- -0.220632, 0.548954, -0.279043, 1.894620, -0.733662, 0.901939, -0.064534, -0.684842,
- -1.444004, -0.410065, 0.612340, -1.834344, -1.323503, -0.357176, -0.661577, -0.081545,
- -0.146115, -0.557160, 0.248085, -1.778299, -0.076633, 0.038674, 1.738170, 1.603402,
- 1.621972, 0.428308, 0.626436, -0.321679, 0.091814, 0.158667, -0.807607, -1.831225,
- -0.461337, 1.083138, -1.405969, -0.442318, -0.374530, 0.213002, -0.470911, -0.429068,
- 1.751296, 1.112692, 0.753225, 1.054038, 0.064989, 0.192183, -0.292764, -0.175647,
- 0.082823, 0.561421, 0.766191, 1.251021, 2.236850, -0.419377, 0.326887, -1.464906,
- 0.863304, -0.953308, 0.679387, 1.384259, 0.554758, -0.966553, 1.001630, -0.002071,
- 1.259365, 0.508627, 0.044151, 0.346342, -0.314138, -1.396941, 0.226708, 0.520130,
- 0.996692, -0.349830, 1.215912, 0.530292, -0.542702, -0.256369, 0.912228, -1.617286,
- -0.172141, 1.556859, -0.335955, 0.821068, 0.541487, 0.206095, 0.932111, -1.697353,
- -0.570253, -0.168337, -1.498605, 0.828194, -0.050346, 0.047643, 0.553025, -0.815924,
- 0.083498, 0.927294, 1.577524, 1.072150, -0.330774, 0.775039, 0.795155, -1.018418,
- -0.784800, -1.575652, -1.263121, 1.943766, 0.666655, 1.479345, -1.392632, 1.581105,
- -1.300562, -0.514692, -0.605022, -0.907108, -1.488565, 2.258803, 0.558543, 0.040773,
- -0.277354, 0.242866, -1.293685, -0.346606, -0.888435, 1.047313, -0.986520, -0.267101,
- -0.071618, -0.821778, -2.414591, 0.035640, -0.694349, 1.483087, -1.391389, 0.361272,
- 0.329648, 0.623759, 0.598544, -0.910249, 0.147175, -2.556832, -0.101439, 1.665057,
- -2.634981, -0.959581, 0.028053, -0.516870, -0.876310, -0.004631, -0.265477, -0.435447,
- -0.327578, 0.881754, -1.158247, 0.497467, 0.580053, -0.853947, 0.239756, 0.541670,
- -0.350885, 0.551414, 0.892098, -0.137816, 1.578299, -0.643850, -1.108174, -1.300456,
- -0.025931, -1.254519, -1.110628, 1.840194, 0.750834, -0.658852, 0.500167, -0.275497,
- -0.517261, 1.482824, -0.559209, -0.008348, -0.753371, 0.090242, 0.925813, -1.871995,
- -0.248520, -2.196485, -0.149835, -1.042585, -1.258415, 0.545135, 0.312620, -1.164465,
- 2.690277, 0.796787, 0.289696, -0.250295, -1.422803, -1.112213, 0.246786, -0.273161,
- -1.435773, -1.013451, 0.148573, 0.872165, -1.693073, -1.055581, 0.719188, 0.848015,
- 1.141773, 0.301299, 1.551936, -0.682287, 1.383630, -0.507902, -0.758092, -1.029466,
- 0.442663, -0.285836, 0.911098, -1.676208, -1.074086, -0.497489, 0.201762, -0.386898,
- 0.762863, 0.043459, -1.288187, -0.655169, -0.952962, -0.146682, 0.778175, 0.085724,
- -0.006331, -0.961628, 0.524487, 0.459634, 1.364272, -0.516323, 0.482039, -0.735290,
- -0.787066, 1.470784, 0.751999, 0.997273, -0.166888, 1.306983, -0.816228, 0.101254,
- 2.094065, 1.577574, 0.080153, 2.966203, -0.937295, -0.293681, 0.635739, 1.343905,
- 1.682028, -0.749792, 0.593634, -0.698793, 0.790153, -1.302117, 0.105254, -0.171760,
- -0.158579, 0.711281, 0.870907, -0.161837, -0.194759, 0.203779, 0.075474, 0.314225,
- -0.526635, 0.216177, -0.685484, 0.249631, -0.268388, -1.610941, -1.188346, -0.451156,
- 0.248579, -1.600001, 0.102452, -0.145813, -0.041007, 1.192038, -2.247582, 0.285689,
- -0.510776, 0.951135, 0.249243, -0.965380, 0.369197, -1.109424, 0.179197, -0.616816,
- -0.037283, -1.160418, -1.603310, 0.271828, 0.339372, -1.964992, -0.131135, -0.199710,
- 0.485190, 1.792235, 0.598751, -0.079401, -0.086031, 0.764729, 0.325292, 0.660399,
- -0.335143, -1.688575, -0.322449, -0.429974, -0.382374, 0.072841, -0.953371, 1.479787,
- 0.233576, -0.178427, 1.235245, -1.206583, -0.578532, 0.391987, -0.501537, -0.046549,
- 0.722864, 0.952528, 0.039498, 0.492656, 1.541279, 0.307890, -1.701053, -1.667987,
- -1.033741, 0.978541, -0.763708, -0.857147, 2.176426, -0.442284, 0.431612, 0.503775,
- -0.443765, -0.188553, 0.029996, -0.521717, -0.315671, 0.211892, 0.977846, -0.686392,
- 0.018295, -0.884268, 0.817963, -0.059569, 0.702341, -2.475835, -0.231271, 0.565874,
- -0.113690, -0.925429, 0.127941, -0.941007, -0.799410, -0.190420, -0.238612, 0.128090,
- -0.089463, -0.067882, -1.023264, 1.471262, 0.937538, 1.067682, -1.131719, 0.229875,
- -0.710702, -0.005993, -1.169501, -1.168195, 1.065437, -0.901779, -0.680394, 0.323208,
- -1.725773, -0.012327, 0.813200, 0.554138, 1.441867, 0.062695, 0.672272, -0.642997,
- 0.138665, -0.331304, -0.859534, -0.267175, -0.752251, -0.247761, 1.229615, 0.777400,
- 1.150754, 0.343907, -0.608025, 0.863760, 0.806158, 0.858534, 0.217133, 0.687307,
- -0.373461, -1.299311, -0.832030, 0.603825, 0.286866, -1.623527, -1.818892, -0.620491,
- -1.573051, 0.643601, 2.015666, -1.145666, -0.071982, 0.844191, 2.628909, -0.042906,
- -0.243317, -0.504335, 0.173276, -0.443272, 0.923207, 2.083052, -0.178553, 1.858875,
- -0.521705, 0.926594, 1.431962, 0.295415, -0.870117, -0.266329, 0.807542, 0.742388,
- -0.510635, -0.080934, 0.743514, 0.935612, 0.847898, -0.835204, -0.829901, -0.745189,
- 0.532994, 1.361685, 1.032848, -0.306150, -1.052024, 0.878438, 0.362114, -1.100646,
- -0.036787, -0.489116, -1.227636, -1.350240, -0.275099, 0.787780, -0.160435, 0.823409,
- -1.083575, -0.679319, -1.954213, 0.597177, -0.909487, -1.171166, -0.005579, 2.037004,
- -1.723490, -0.440698, 1.263077, -0.278440, -0.600433, 0.270728, -2.063925, 0.400994,
- 0.110911, 0.073894, 1.487614, -1.040991, 0.053002, -1.453535, 0.161981, 0.234838,
- -0.026878, 1.049677, 0.173576, 0.341401, 0.882168, -0.992679, 0.182294, -1.617417,
- 0.755295, -0.444344, 0.508035, -1.055734, 0.131880, -1.498971, 0.280104, 0.178499,
- -0.982848, -0.957286, -0.944087, 1.314400, -0.013058, 0.030501, 0.354345, 0.072074,
- -0.894709, 0.555023, 0.812111, -0.729819, 0.109537, 1.096371, 2.731644, 1.335793,
- 0.411079, 0.411439, -1.306862, 1.632891, 0.383806, 0.243401, 0.499504, -0.003108,
- -0.510786, -0.738833, 0.234922, -1.767899, -0.597825, 1.794224, 0.020771, 1.281544,
- 0.419443, 0.128371, 1.191104, -0.214895, 0.771214, -0.370359, -2.644222, -1.158590,
- 0.285430, -1.478329, 0.826093, -1.475635, -0.008122, 0.651251, 0.858438, -0.092348,
- 0.774788, -0.367252, 1.305945, 0.817150, 1.231503, 1.235605, 0.958564, 0.336264,
- -1.654548, 0.231398, -0.990396, 0.046288, 0.685236, -0.313591, -0.974870, -1.073320,
- -0.606726, -0.063315, 0.686794, 0.915108, 0.020049, -1.675039, 1.063801, 0.918174,
- -1.341050, 1.023589, 0.479510, -0.904933, -1.633974, -1.921451, -1.442665, -0.136733,
- 0.293781, 1.363955, -0.140364, 0.783375, -1.130341, 0.527358, -0.292538, -0.746975,
- -0.582536, 1.711351, -0.896348, -0.151251, 0.248601, 1.519014, -1.489663, -0.399837,
- 0.313509, -2.012764, -2.025084, 0.714259, 0.528990, -1.927481, 0.343471, -0.873411,
- 0.758193, -0.361042, -0.691940, -1.607898, 0.680179, -0.776993, -1.072541, -0.320873,
- 0.899772, -1.313487, -2.123092, -0.108506, 0.284712, -1.017612, -0.733323, 1.300697,
- -0.773376, 1.216150, 0.151842, -1.046754, -0.336843, 0.123953, 0.970761, -1.106525,
- -0.107236, 0.490938, 1.013492, -1.681596, -0.475347, -0.171544, 0.068948, 0.723101,
- 0.398592, -0.777245, 1.116326, -0.093156, 0.620451, 0.167638, -0.287674, -0.637968,
- -1.371773, -0.104036, -0.685868, 0.631968, 0.331685, -1.687695, -0.997722, -0.517832,
- 0.291418, 0.086520, 1.107078, 2.199959, 0.244959, 0.760919, 0.164976, -1.456448,
- 0.406231, -1.774895, 1.215981, 0.295850, 1.448424, 1.018757, -1.025137, -0.643993,
- 0.205418, -1.111593, 0.588882, 1.458524, -0.264024, 0.103186, 2.495318, -0.638423,
- 0.855948, -0.025377, -0.850954, -1.301284, 0.811879, 0.344693, 0.700242, -1.360544,
- 0.759938, 0.235772, -1.712909, 2.432551, 1.537021, -0.352882, -1.609847, -0.253408,
- 1.109526, -0.078679, -1.109704, -1.203886, 0.385469, 0.454205, 0.965231, 0.669661,
- 0.818297, -0.402472, 0.037049, 0.759026, -0.926012, 1.281841, -0.111919, 0.803598,
- -0.803030, -1.204083, -1.665006, -0.826183, -0.901401, -0.711036, 0.588350, 0.436303,
- 0.554159, 1.021926, -0.415173, -0.362657, 0.061795, -0.298298, 0.457432, 0.733463,
- 0.199014, 0.340668, 0.257558, -1.106307, 2.080730, -2.043328, -2.277237, -0.358905,
- 0.339022, 0.595400, 0.289894, 0.375452, 0.662261, 1.202134, -0.580860, 0.543575,
- 0.887752, 0.288461, 0.171871, -0.665957, 0.848821, -0.151442, 0.963769, -0.659762,
- 1.321918, -1.980876, -0.064345, -1.824813, 1.317053, -0.255301, 0.228017, -0.826776,
- -1.429637, 1.532493, -0.149701, 1.704903, -0.504968, -0.214990, -1.729141, 1.705440,
- -0.417472, 0.371870, -0.614969, -0.264290, 0.720777, 2.503227, 0.339364, 0.735706,
- 0.882845, -1.099957, 0.284245, -1.292489, -0.145541, 1.249176, -0.089646, 0.198285,
- 0.289161, -0.704900, 1.164831, 0.384689, 0.805729, -0.744461, -1.355643, -0.085510,
- 0.120893, -0.760827, -0.222178, 0.588159, 0.571732, -0.488786, -0.300140, -0.790720,
- 1.134277, 0.186925, -0.179356, 1.323236, -1.467067, -0.252240, 1.395346, 0.394448,
- 0.440836, 1.221421, 0.565384, -0.630894, -0.693623, -0.172785, 0.833869, 0.590400,
- -2.237378, 0.485708, 1.097644, -0.345472, -0.001617, 0.387311, -1.614573, 0.004570,
- -1.228727, 0.384520, 0.207405, -1.412140, 0.220942, -1.196011, -1.006073, 0.047957,
- -0.453067, 0.422308, 1.399453, 1.080871, -0.461964, -0.072034, 0.032716, -0.752875,
- 0.798783, -0.555757, 0.896816, -1.304965, 0.137892, -0.112053, -1.619146, 0.367034,
- -1.646606, -0.327046, 0.428707, -0.336445, -0.737231, -0.388655, 0.564926, 1.680910,
- -1.384167, 0.707246, 0.460268, 1.030518, 0.629384, 0.305059, 0.379847, -1.121984,
- -1.013330, -0.122902, -0.347243, -0.693724, 0.441912, 0.875911, -1.590240, -1.094234,
- -0.701417, 0.925002, -1.077601, -0.229572, 1.002220, 0.225260, 1.729481, -0.335907,
- 0.709032, 1.218315, -0.747897, -0.096137, 0.228862, 0.120568, -0.223497, 1.004884,
- -0.853275, -0.657371, 0.345627, 0.405173, 0.109764, 0.890271, -1.133039, 1.449045,
- -0.683124, 1.382923, -0.277856, 1.176089, 0.654790, -1.729798, -1.248394, 0.104649,
- -0.597539, -1.487626, -0.481813, -1.743067, 0.983372, -0.510919, 1.762121, -0.067293,
- 1.427402, -0.063941, 0.911763, -2.196356, 0.326823, 1.106144, 0.069619, 1.526127,
- -1.499763, -0.687166, -0.418223, 1.160927, -0.021037, -0.425076, 0.228425, -0.060661,
- -1.008196, -1.899981, -0.664622, 1.219038, 0.558177, 0.901112, -1.188542, 0.823237,
- -0.775481, 1.882210, 0.271042, 0.238406, 1.534976, -0.429217, -1.052283, -1.797562,
- 0.625559, 1.467291, -0.797626, 1.030351, -0.313522, 0.892838, -0.602210, 1.395587,
- 1.259060, 0.416488, 0.858484, 1.545120, -2.105292, 0.664929, -0.360937, 0.706299,
- 0.553557, 2.759293, -1.556384, -0.051700, -0.206666, -0.839668, -0.425568, 1.555326,
- 0.493778, 0.149258, -0.870908, -1.684651, 0.079828, -0.569951, -0.521619, 0.488593,
- -1.413861, -0.029233, -0.384293, -2.238255, -0.457922, -2.117238, -0.291471, 0.152666,
- -0.301224, -1.353589, -1.588594, -0.206453, 1.094287, -1.204119, 1.324167, -0.436854,
- -0.126480, 0.047149, -0.737164, 2.478964, 0.213719, -1.288683, -0.400529, 0.565879,
- 0.064938, -0.489134, -1.757996, 0.571975, 1.686748, -0.533281, 0.327400, 0.764733,
- 0.715967, -1.748576, 1.598648, -0.729925, -2.064741, -0.004472, -0.743632, 0.535993,
- 0.176185, -0.021122, 0.527839, -0.669683, -0.553153, -0.056435, 0.298280, -0.213079,
- -1.226607, 0.432893, -0.189676, -0.065721, -0.301713, -2.272297, 0.956956, -1.046249,
- -0.533366, -0.478385, -0.901082, -0.765758, -0.892552, -0.093739, 0.278717, -1.139068,
- -0.745807, -0.691504, 1.603464, -3.596550, 0.574270, 0.463068, 0.320655, -1.966329,
- -0.151383, 1.222704, 0.315762, 0.237313, 1.343703, -1.015985, -2.237832, 0.640365
-};
-
-/* ----------------------------------------------------------------------
-** Defines each of the tests performed
-** ------------------------------------------------------------------- */
-
-typedef struct
-{
- ne10_uint32_t fftSize;
- ne10_uint32_t ifftFlag;
- ne10_uint32_t doBitReverse;
- ne10_float32_t *inputF32;
-} test_config_cfft;
-
-static test_config_cfft CONFIG_CFFT[] =
-{
- {1024, 0, 1, &testInput_f32[0]},
- {256, 0, 1, &testInput_f32[0]},
- {64, 0, 1, &testInput_f32[0]},
- {16, 0, 1, &testInput_f32[0]},
-};
-static test_config_cfft CONFIG_CFFT_PERF[] =
-{
- {1024, 0, 1, &testInput_f32[0]},
- {256, 0, 1, &testInput_f32[0]},
- {64, 0, 1, &testInput_f32[0]},
- {16, 0, 1, &testInput_f32[0]},
-};
-
-#define CFFT_NUM_TESTS (sizeof(CONFIG_CFFT) / sizeof(CONFIG_CFFT[0]) )
-#define CFFT_NUM_PERF_TESTS (sizeof(CONFIG_CFFT_PERF) / sizeof(CONFIG_CFFT_PERF[0]) )
-
-//input and output
-static ne10_float32_t * guarded_in_c = NULL;
-static ne10_float32_t * guarded_in_neon = NULL;
-static ne10_float32_t * in_c = NULL;
-static ne10_float32_t * in_neon = NULL;
-
-static ne10_float32_t * guarded_out_c = NULL;
-static ne10_float32_t * guarded_out_neon = NULL;
-static ne10_float32_t * out_c = NULL;
-static ne10_float32_t * out_neon = NULL;
-
-static ne10_float32_t snr = 0.0f;
-
-#ifdef PERFORMANCE_TEST
-static ne10_int64_t time_c = 0;
-static ne10_int64_t time_neon = 0;
-static ne10_int64_t time_overhead_c = 0;
-static ne10_int64_t time_overhead_neon = 0;
-static ne10_float32_t time_speedup = 0.0f;
-static ne10_float32_t time_savings = 0.0f;
-#endif
-
-void test_cfft_case0()
-{
- ne10_float32_t *p_src = testInput_f32;
- ne10_cfft_radix4_instance_f32_t S;
-
- ne10_uint16_t loop = 0;
- ne10_uint16_t k = 0;
- ne10_uint16_t i = 0;
- ne10_uint16_t pos = 0;
-
- test_config_cfft *config;
- ne10_result_t status = NE10_OK;
-
- fprintf (stdout, "----------%30s start\n", __FUNCTION__);
-
- /* init input memory */
- NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
- NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
-
- /* init dst memory */
- NE10_DST_ALLOC (out_c, guarded_out_c, TEST_LENGTH_SAMPLES);
- NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES);
-
-#if defined (SMOKE_TEST)||(REGRESSION_TEST)
- for (loop = 0; loop < CFFT_NUM_TESTS; loop++)
- {
- config = &CONFIG_CFFT[loop];
-
- /* Initialize the CFFT/CIFFT module */
- status = ne10_cfft_radix4_init_float (&S, config->fftSize, config->ifftFlag);
-
- if (status == NE10_ERR)
- {
- printf ("fft init error!\n");
- }
-
- /* copy input to input buffer and clear the output buffer */
- for (i = 0; i < 2 * config->fftSize; i++)
- {
- in_c[i] = testInput_f32[i];
- in_neon[i] = testInput_f32[i];
- }
-
- /* FFT test */
- GUARD_ARRAY (out_c, config->fftSize * 2);
- GUARD_ARRAY (out_neon, config->fftSize * 2);
-
- ne10_radix4_butterfly_float_c (out_c, in_c, S.fft_len, S.p_twiddle);
- ne10_radix4_butterfly_float_neon (out_neon, in_neon, S.fft_len, S.p_twiddle);
-
- CHECK_ARRAY_GUARD (out_c, config->fftSize * 2);
- CHECK_ARRAY_GUARD (out_neon, config->fftSize * 2);
-
- //conformance test 1: compare snr
- snr = CAL_SNR_FLOAT32 (out_c, out_neon, 2 * config->fftSize);
- assert_false ( (snr < SNR_THRESHOLD));
-
- //conformance test 2: compare output of C and neon
-#if defined (DEBUG_TRACE)
- printf ("--------------------config %d\n", loop);
- printf ("fftSize: %d ifftFlag: %d\n", config->fftSize, config->ifftFlag);
-#endif
- for (pos = 0; pos < config->fftSize * 2; pos++)
- {
-#if defined (DEBUG_TRACE)
- printf ("pos %d \n", pos);
- printf ("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos], * (ne10_uint32_t*) &out_c[pos], out_neon[pos], * (ne10_uint32_t*) &out_neon[pos]);
-#endif
- assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1);
- }
-
- /* IFFT test */
- /* copy input to input buffer and clear the output buffer */
- for (i = 0; i < 2 * config->fftSize; i++)
- {
- in_c[i] = out_c[i];
- in_neon[i] = out_neon[i];
- }
-
- GUARD_ARRAY (out_c, config->fftSize * 2);
- GUARD_ARRAY (out_neon, config->fftSize * 2);
-
- ne10_radix4_butterfly_inverse_float_c (out_c, in_c, S.fft_len, S.p_twiddle, S.one_by_fft_len);
- ne10_radix4_butterfly_inverse_float_neon (out_neon, in_neon, S.fft_len, S.p_twiddle, S.one_by_fft_len);
-
- CHECK_ARRAY_GUARD (out_c, config->fftSize * 2);
- CHECK_ARRAY_GUARD (out_neon, config->fftSize * 2);
-
- //conformance test 1: compare snr
- snr = CAL_SNR_FLOAT32 (out_c, out_neon, 2 * config->fftSize);
- assert_false ( (snr < SNR_THRESHOLD));
-
- //conformance test 2: compare output of C and neon
-#if defined (DEBUG_TRACE)
- printf ("--------------------config %d\n", loop);
- printf ("fftSize: %d ifftFlag: %d\n", config->fftSize, config->ifftFlag);
- printf ("snr: %f\n", snr);
-#endif
- for (pos = 0; pos < config->fftSize * 2; pos++)
- {
-#if defined (DEBUG_TRACE)
- printf ("pos %d \n", pos);
- printf ("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos], * (ne10_uint32_t*) &out_c[pos], out_neon[pos], * (ne10_uint32_t*) &out_neon[pos]);
-#endif
- assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1);
- }
- }
-#endif
-
-#ifdef PERFORMANCE_TEST
- fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
- for (loop = 0; loop < CFFT_NUM_PERF_TESTS; loop++)
- {
- config = &CONFIG_CFFT_PERF[loop];
-
- /* Initialize the CFFT/CIFFT module */
- status = ne10_cfft_radix4_init_float (&S, config->fftSize, config->ifftFlag);
-
- if (status == NE10_ERR)
- {
- printf ("fft init error!\n");
- }
-
- /* FFT test */
- GET_TIME
- (
- time_overhead_c,
- {
- for (k = 0; k < TEST_COUNT; k++)
- {
- for (i = 0; i < 2 * config->fftSize; i++)
- {
- in_c[i] = testInput_f32[i];
- }
- }
- }
- );
-
- GET_TIME
- (
- time_c,
- {
- for (k = 0; k < TEST_COUNT; k++)
- {
- for (i = 0; i < 2 * config->fftSize; i++)
- {
- in_c[i] = testInput_f32[i];
- }
- ne10_radix4_butterfly_float_c (out_c, in_c, S.fft_len, S.p_twiddle);
- }
- }
- );
-
- GET_TIME
- (
- time_overhead_neon,
- {
- for (k = 0; k < TEST_COUNT; k++)
- {
- for (i = 0; i < 2 * config->fftSize; i++)
- {
- in_neon[i] = testInput_f32[i];
- }
- }
- }
- );
-
- GET_TIME
- (
- time_neon,
- {
- for (k = 0; k < TEST_COUNT; k++)
- {
- for (i = 0; i < 2 * config->fftSize; i++)
- {
- in_neon[i] = testInput_f32[i];
- }
- ne10_radix4_butterfly_float_neon (out_neon, in_neon, S.fft_len, S.p_twiddle);
- }
- }
- );
-
- time_c = time_c - time_overhead_c;
- time_neon = time_neon - time_overhead_neon;
- time_speedup = (ne10_float32_t) time_c / time_neon;
- time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
- ne10_log (__FUNCTION__, "CFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", S.fft_len, time_c, time_neon, time_savings, time_speedup);
-
- /* IFFT test */
- GET_TIME
- (
- time_overhead_c,
- {
- for (k = 0; k < TEST_COUNT; k++)
- {
- for (i = 0; i < 2 * config->fftSize; i++)
- {
- in_c[i] = out_c[i];
- }
- }
- }
- );
-
- GET_TIME
- (
- time_c,
- {
- for (k = 0; k < TEST_COUNT; k++)
- {
- for (i = 0; i < 2 * config->fftSize; i++)
- {
- in_c[i] = out_c[i];
- }
- ne10_radix4_butterfly_inverse_float_c (out_c, in_c, S.fft_len, S.p_twiddle, S.one_by_fft_len);
- }
- }
- );
-
- GET_TIME
- (
- time_overhead_neon,
- {
- for (k = 0; k < TEST_COUNT; k++)
- {
- for (i = 0; i < 2 * config->fftSize; i++)
- {
- in_neon[i] = out_neon[i];
- }
- }
- }
- );
-
- GET_TIME
- (
- time_neon,
- {
- for (k = 0; k < TEST_COUNT; k++)
- {
- for (i = 0; i < 2 * config->fftSize; i++)
- {
- in_neon[i] = out_neon[i];
- }
- ne10_radix4_butterfly_inverse_float_neon (out_neon, in_neon, S.fft_len, S.p_twiddle, S.one_by_fft_len);
- }
- }
- );
-
- time_c = time_c - time_overhead_c;
- time_neon = time_neon - time_overhead_neon;
- time_speedup = (ne10_float32_t) time_c / time_neon;
- time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
- ne10_log (__FUNCTION__, "CIFFT%20d%20lld%20lld%19.2f%%%18.2f:1\n", S.fft_len, time_c, time_neon, time_savings, time_speedup);
- }
-#endif
-
- free (guarded_in_c);
- free (guarded_in_neon);
- free (guarded_out_c);
- free (guarded_out_neon);
- fprintf (stdout, "----------%30s end\n", __FUNCTION__);
-}
-
-void test_cfft()
-{
- test_cfft_case0();
-}
-
-static void my_test_setup (void)
-{
- ne10_log_buffer_ptr = ne10_log_buffer;
-}
-
-void test_fixture_cfft (void)
-{
- test_fixture_start(); // starts a fixture
-
- fixture_setup (my_test_setup);
-
- run_test (test_cfft); // run tests
-
- test_fixture_end(); // ends a fixture
-}
** ------------------------------------------------------------------- */
/* Max FFT Length and double buffer for real and imag */
-#define TEST_LENGTH_SAMPLES (16384)
+#define TEST_LENGTH_SAMPLES (32768)
#define MIN_LENGTH_SAMPLES_CPX (4)
#define MIN_LENGTH_SAMPLES_REAL (MIN_LENGTH_SAMPLES_CPX*2)
-#define TEST_COUNT 250000
+#define TEST_COUNT 10000000
/* ----------------------------------------------------------------------
** Test input data for F32
** ------------------------------------------------------------------- */
//input and output
-static ne10_float32_t * in = NULL;
static ne10_float32_t * guarded_in_c = NULL;
static ne10_float32_t * guarded_in_neon = NULL;
static ne10_float32_t * in_c = NULL;
static ne10_float32_t * in_neon = NULL;
+static ne10_float32_t * in_c2 = NULL;
+static ne10_float32_t * in_neon2 = NULL;
+static ne10_float32_t * guarded_in_c2 = NULL;
+static ne10_float32_t * guarded_in_neon2 = NULL;
static ne10_float32_t * guarded_out_c = NULL;
static ne10_float32_t * guarded_out_neon = NULL;
static ne10_float32_t * out_c = NULL;
static ne10_float32_t * out_neon = NULL;
+static ne10_float32_t * guarded_out_c2 = NULL;
+static ne10_float32_t * guarded_out_neon2 = NULL;
+static ne10_float32_t * out_c2 = NULL;
+static ne10_float32_t * out_neon2 = NULL;
static ne10_float32_t snr = 0.0f;
+static ne10_float32_t snr2 = 0.0f;
static ne10_int64_t time_c = 0;
static ne10_int64_t time_neon = 0;
static ne10_int64_t time_overhead_neon = 0;
static ne10_float32_t time_speedup = 0.0f;
static ne10_float32_t time_savings = 0.0f;
+static ne10_int64_t time_c2 = 0;
+static ne10_int64_t time_neon2 = 0;
void test_fft_c2c_1d_float32_conformance()
{
memcpy (in_c, testInput_f32, 2 * fftSize * sizeof (ne10_float32_t));
memcpy (in_neon, testInput_f32, 2 * fftSize * sizeof (ne10_float32_t));
cfg = ne10_fft_alloc_c2c_float32 (fftSize);
- test_loop = TEST_COUNT/fftSize;
+ test_loop = TEST_COUNT / fftSize;
GET_TIME
(
memcpy (in_c, testInput_f32, fftSize * sizeof (ne10_float32_t));
memcpy (in_neon, testInput_f32, fftSize * sizeof (ne10_float32_t));
cfg = ne10_fft_alloc_r2c_float32 (fftSize);
- test_loop = TEST_COUNT/fftSize;
+ test_loop = TEST_COUNT / fftSize;
GET_TIME
(
GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t));
ne10_fft_c2c_1d_int16_scaled_c ( (ne10_fft_cpx_int16_t*) out_c, (ne10_fft_cpx_int16_t*) in_c,
- cfg->twiddles, cfg->factors, fftSize, 0);
+ cfg->twiddles, cfg->factors, fftSize, 0);
ne10_fft_c2c_1d_int16_scaled_neon ( (ne10_fft_cpx_int16_t*) out_neon, (ne10_fft_cpx_int16_t*) in_neon,
- cfg->twiddles, cfg->factors, fftSize, 0);
+ cfg->twiddles, cfg->factors, fftSize, 0);
CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int16_t));
CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t));
GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t));
ne10_fft_c2c_1d_int16_scaled_c ( (ne10_fft_cpx_int16_t*) out_c, (ne10_fft_cpx_int16_t*) in_c,
- cfg->twiddles, cfg->factors, fftSize, 1);
+ cfg->twiddles, cfg->factors, fftSize, 1);
ne10_fft_c2c_1d_int16_scaled_neon ( (ne10_fft_cpx_int16_t*) out_neon, (ne10_fft_cpx_int16_t*) in_neon,
- cfg->twiddles, cfg->factors, fftSize, 1);
+ cfg->twiddles, cfg->factors, fftSize, 1);
CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int16_t));
CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t));
memcpy (in_c, testInput_i16, 2 * fftSize * sizeof (ne10_int16_t));
memcpy (in_neon, testInput_i16, 2 * fftSize * sizeof (ne10_int16_t));
cfg = ne10_fft_alloc_c2c_int16 (fftSize);
- test_loop = TEST_COUNT/fftSize;
+ test_loop = TEST_COUNT / fftSize;
GET_TIME
(
memcpy (in_c, testInput_i16, fftSize * sizeof (ne10_int16_t));
memcpy (in_neon, testInput_i16, fftSize * sizeof (ne10_int16_t));
cfg = ne10_fft_alloc_r2c_int16 (fftSize);
- test_loop = TEST_COUNT/fftSize;
+ test_loop = TEST_COUNT / fftSize;
GET_TIME
(
for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
{
testInput_i32[i] = (ne10_int32_t) (drand48() * 8192) - 4096;
- testInput2_i32[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX/2;
+ testInput2_i32[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX / 2;
}
for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
{
GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
ne10_fft_c2c_1d_int32_unscaled_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c,
- cfg->twiddles, cfg->factors, fftSize, 0);
+ cfg->twiddles, cfg->factors, fftSize, 0);
ne10_fft_c2c_1d_int32_unscaled_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon,
- cfg->twiddles, cfg->factors, fftSize, 0);
+ cfg->twiddles, cfg->factors, fftSize, 0);
CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
ne10_fft_c2c_1d_int32_unscaled_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c,
- cfg->twiddles, cfg->factors, fftSize, 1);
+ cfg->twiddles, cfg->factors, fftSize, 1);
ne10_fft_c2c_1d_int32_unscaled_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon,
- cfg->twiddles, cfg->factors, fftSize, 1);
+ cfg->twiddles, cfg->factors, fftSize, 1);
CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
ne10_fft_c2c_1d_int32_scaled_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c,
- cfg->twiddles, cfg->factors, fftSize, 0);
+ cfg->twiddles, cfg->factors, fftSize, 0);
ne10_fft_c2c_1d_int32_scaled_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon,
- cfg->twiddles, cfg->factors, fftSize, 0);
+ cfg->twiddles, cfg->factors, fftSize, 0);
CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
ne10_fft_c2c_1d_int32_scaled_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c,
- cfg->twiddles, cfg->factors, fftSize, 1);
+ cfg->twiddles, cfg->factors, fftSize, 1);
ne10_fft_c2c_1d_int32_scaled_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon,
- cfg->twiddles, cfg->factors, fftSize, 1);
+ cfg->twiddles, cfg->factors, fftSize, 1);
CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
{
testInput_i32[i] = (ne10_int32_t) (drand48() * 8192) - 4096;
- testInput2_i32[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX/2;
+ testInput2_i32[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX / 2;
}
for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
{
memcpy (in_c, testInput_i32, 2 * fftSize * sizeof (ne10_int32_t));
memcpy (in_neon, testInput_i32, 2 * fftSize * sizeof (ne10_int32_t));
cfg = ne10_fft_alloc_c2c_int32 (fftSize);
- test_loop = TEST_COUNT/fftSize;
+ test_loop = TEST_COUNT / fftSize;
GET_TIME
(
memcpy (in_c, testInput_i32, fftSize * sizeof (ne10_int32_t));
memcpy (in_neon, testInput_i32, fftSize * sizeof (ne10_int32_t));
cfg = ne10_fft_alloc_r2c_int32 (fftSize);
- test_loop = TEST_COUNT/fftSize;
+ test_loop = TEST_COUNT / fftSize;
GET_TIME
(
+++ /dev/null
-/*
- * Copyright 2012-14 ARM Limited
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of ARM Limited nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * NE10 Library : test_suite_rfft.c
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-
-#include "NE10_dsp.h"
-#include "seatest.h"
-
-
-/* ----------------------------------------------------------------------
-** Global defines
-** ------------------------------------------------------------------- */
-
-/* Max FFT Length 1024 and double buffer for real and imag */
-#define TEST_LENGTH_SAMPLES (1024 * 2)
-
-#define TEST_COUNT 5000
-
-/* ----------------------------------------------------------------------
-** Test input data for F32
-** Generated by the MATLAB rand() function
-** ------------------------------------------------------------------- */
-
-static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] =
-{
- -0.432565, -1.665584, 0.125332, 0.287676, -1.146471, 1.190915, 1.189164, -0.037633,
- 0.327292, 0.174639, -0.186709, 0.725791, -0.588317, 2.183186, -0.136396, 0.113931,
- 1.066768, 0.059281, -0.095648, -0.832349, 0.294411, -1.336182, 0.714325, 1.623562,
- -0.691776, 0.857997, 1.254001, -1.593730, -1.440964, 0.571148, -0.399886, 0.689997,
- 0.815622, 0.711908, 1.290250, 0.668601, 1.190838, -1.202457, -0.019790, -0.156717,
- -1.604086, 0.257304, -1.056473, 1.415141, -0.805090, 0.528743, 0.219321, -0.921902,
- -2.170674, -0.059188, -1.010634, 0.614463, 0.507741, 1.692430, 0.591283, -0.643595,
- 0.380337, -1.009116, -0.019511, -0.048221, 0.000043, -0.317859, 1.095004, -1.873990,
- 0.428183, 0.895638, 0.730957, 0.577857, 0.040314, 0.677089, 0.568900, -0.255645,
- -0.377469, -0.295887, -1.475135, -0.234004, 0.118445, 0.314809, 1.443508, -0.350975,
- 0.623234, 0.799049, 0.940890, -0.992092, 0.212035, 0.237882, -1.007763, -0.742045,
- 1.082295, -0.131500, 0.389880, 0.087987, -0.635465, -0.559573, 0.443653, -0.949904,
- 0.781182, 0.568961, -0.821714, -0.265607, -1.187777, -2.202321, 0.986337, -0.518635,
- 0.327368, 0.234057, 0.021466, -1.003944, -0.947146, -0.374429, -1.185886, -1.055903,
- 1.472480, 0.055744, -1.217317, -0.041227, -1.128344, -1.349278, -0.261102, 0.953465,
- 0.128644, 0.656468, -1.167819, -0.460605, -0.262440, -1.213152, -1.319437, 0.931218,
- 0.011245, -0.645146, 0.805729, 0.231626, -0.989760, 1.339586, 0.289502, 1.478917,
- 1.138028, -0.684139, -1.291936, -0.072926, -0.330599, -0.843628, 0.497770, 1.488490,
- -0.546476, -0.846758, -0.246337, 0.663024, -0.854197, -1.201315, -0.119869, -0.065294,
- 0.485296, -0.595491, -0.149668, -0.434752, -0.079330, 1.535152, -0.606483, -1.347363,
- 0.469383, -0.903567, 0.035880, -0.627531, 0.535398, 0.552884, -0.203690, -2.054325,
- 0.132561, 1.592941, 1.018412, -1.580402, -0.078662, -0.681657, -1.024553, -1.234353,
- 0.288807, -0.429303, 0.055801, -0.367874, -0.464973, 0.370961, 0.728283, 2.112160,
- -1.357298, -1.022610, 1.037834, -0.389800, -1.381266, 0.315543, 1.553243, 0.707894,
- 1.957385, 0.504542, 1.864529, -0.339812, -1.139779, -0.211123, 1.190245, -1.116209,
- 0.635274, -0.601412, 0.551185, -1.099840, 0.085991, -2.004563, -0.493088, 0.462048,
- -0.321005, 1.236556, -0.631280, -2.325211, -1.231637, 1.055648, -0.113224, 0.379224,
- 0.944200, -2.120427, -0.644679, -0.704302, -1.018137, -0.182082, 1.521013, -0.038439,
- 1.227448, -0.696205, 0.007524, -0.782893, 0.586939, -0.251207, 0.480136, 0.668155,
- -0.078321, 0.889173, 2.309287, 0.524639, -0.011787, 0.913141, 0.055941, -1.107070,
- 0.485498, -0.005005, -0.276218, 1.276452, 1.863401, -0.522559, 0.103424, -0.807649,
- 0.680439, -2.364590, 0.990115, 0.218899, 0.261662, 1.213444, -0.274667, -0.133134,
- -1.270500, -1.663606, -0.703554, 0.280880, -0.541209, -1.333531, 1.072686, -0.712085,
- -0.011286, -0.000817, -0.249436, 0.396575, -0.264013, -1.664011, -1.028975, 0.243095,
- -1.256590, -0.347183, -0.941372, -1.174560, -1.021142, -0.401667, 0.173666, -0.116118,
- 1.064119, -0.245386, -1.517539, 0.009734, 0.071373, 0.316536, 0.499826, 1.278084,
- -0.547816, 0.260808, -0.013177, -0.580264, 2.136308, -0.257617, -1.409528, 1.770101,
- 0.325546, -1.119040, 0.620350, 1.269782, -0.896043, 0.135175, -0.139040, -1.163395,
- 1.183720, -0.015430, 0.536219, -0.716429, -0.655559, 0.314363, 0.106814, 1.848216,
- -0.275106, 2.212554, 1.508526, -1.945079, -1.680543, -0.573534, -0.185817, 0.008934,
- 0.836950, -0.722271, -0.721490, -0.201181, -0.020464, 0.278890, 1.058295, 0.621673,
- -1.750615, 0.697348, 0.811486, 0.636345, 1.310080, 0.327098, -0.672993, -0.149327,
- -2.449018, 0.473286, 0.116946, -0.591104, -0.654708, -1.080662, -0.047731, 0.379345,
- -0.330361, -0.499898, -0.035979, -0.174760, -0.957265, 1.292548, 0.440910, 1.280941,
- -0.497730, -1.118717, 0.807650, 0.041200, -0.756209, -0.089129, -2.008850, 1.083918,
- -0.981191, -0.688489, 1.339479, -0.909243, -0.412858, -0.506163, 1.619748, 0.080901,
- -1.081056, -1.124518, 1.735676, 1.937459, 1.635068, -1.255940, -0.213538, -0.198932,
- 0.307499, -0.572325, -0.977648, -0.446809, 1.082092, 2.372648, 0.229288, -0.266623,
- 0.701672, -0.487590, 1.862480, 1.106851, -1.227566, -0.669885, 1.340929, 0.388083,
- 0.393059, -1.707334, 0.227859, 0.685633, -0.636790, -1.002606, -0.185621, -1.054033,
- -0.071539, 0.279198, 1.373275, 0.179841, -0.542017, 1.634191, 0.825215, 0.230761,
- 0.671634, -0.508078, 0.856352, 0.268503, 0.624975, -1.047338, 1.535670, 0.434426,
- -1.917136, 0.469940, 1.274351, 0.638542, 1.380782, 1.319843, -0.909429, -2.305605,
- 1.788730, 0.390798, 0.020324, -0.405977, -1.534895, 0.221373, -1.374479, -0.839286,
- -0.208643, 0.755913, 0.375734, -1.345413, 1.481876, 0.032736, 1.870453, -1.208991,
- -0.782632, -0.767299, -0.107200, -0.977057, -0.963988, -2.379172, -0.838188, 0.257346,
- -0.183834, -0.167615, -0.116989, 0.168488, -0.501206, -0.705076, 0.508165, -0.420922,
- 0.229133, -0.959497, -0.146043, 0.744538, -0.890496, 0.139062, -0.236144, -0.075459,
- -0.358572, -2.077635, -0.143546, 1.393341, 0.651804, -0.377134, -0.661443, 0.248958,
- -0.383516, -0.528480, 0.055388, 1.253769, -2.520004, 0.584856, -1.008064, 0.944285,
- -2.423957, -0.223831, 0.058070, -0.424614, -0.202918, -1.513077, -1.126352, -0.815002,
- 0.366614, -0.586107, 1.537409, 0.140072, -1.862767, -0.454193, -0.652074, 0.103318,
- -0.220632, -0.279043, -0.733662, -0.064534, -1.444004, 0.612340, -1.323503, -0.661577,
- -0.146115, 0.248085, -0.076633, 1.738170, 1.621972, 0.626436, 0.091814, -0.807607,
- -0.461337, -1.405969, -0.374530, -0.470911, 1.751296, 0.753225, 0.064989, -0.292764,
- 0.082823, 0.766191, 2.236850, 0.326887, 0.863304, 0.679387, 0.554758, 1.001630,
- 1.259365, 0.044151, -0.314138, 0.226708, 0.996692, 1.215912, -0.542702, 0.912228,
- -0.172141, -0.335955, 0.541487, 0.932111, -0.570253, -1.498605, -0.050346, 0.553025,
- 0.083498, 1.577524, -0.330774, 0.795155, -0.784800, -1.263121, 0.666655, -1.392632,
- -1.300562, -0.605022, -1.488565, 0.558543, -0.277354, -1.293685, -0.888435, -0.986520,
- -0.071618, -2.414591, -0.694349, -1.391389, 0.329648, 0.598544, 0.147175, -0.101439,
- -2.634981, 0.028053, -0.876310, -0.265477, -0.327578, -1.158247, 0.580053, 0.239756,
- -0.350885, 0.892098, 1.578299, -1.108174, -0.025931, -1.110628, 0.750834, 0.500167,
- -0.517261, -0.559209, -0.753371, 0.925813, -0.248520, -0.149835, -1.258415, 0.312620,
- 2.690277, 0.289696, -1.422803, 0.246786, -1.435773, 0.148573, -1.693073, 0.719188,
- 1.141773, 1.551936, 1.383630, -0.758092, 0.442663, 0.911098, -1.074086, 0.201762,
- 0.762863, -1.288187, -0.952962, 0.778175, -0.006331, 0.524487, 1.364272, 0.482039,
- -0.787066, 0.751999, -0.166888, -0.816228, 2.094065, 0.080153, -0.937295, 0.635739,
- 1.682028, 0.593634, 0.790153, 0.105254, -0.158579, 0.870907, -0.194759, 0.075474,
- -0.526635, -0.685484, -0.268388, -1.188346, 0.248579, 0.102452, -0.041007, -2.247582,
- -0.510776, 0.249243, 0.369197, 0.179197, -0.037283, -1.603310, 0.339372, -0.131135,
- 0.485190, 0.598751, -0.086031, 0.325292, -0.335143, -0.322449, -0.382374, -0.953371,
- 0.233576, 1.235245, -0.578532, -0.501537, 0.722864, 0.039498, 1.541279, -1.701053,
- -1.033741, -0.763708, 2.176426, 0.431612, -0.443765, 0.029996, -0.315671, 0.977846,
- 0.018295, 0.817963, 0.702341, -0.231271, -0.113690, 0.127941, -0.799410, -0.238612,
- -0.089463, -1.023264, 0.937538, -1.131719, -0.710702, -1.169501, 1.065437, -0.680394,
- -1.725773, 0.813200, 1.441867, 0.672272, 0.138665, -0.859534, -0.752251, 1.229615,
- 1.150754, -0.608025, 0.806158, 0.217133, -0.373461, -0.832030, 0.286866, -1.818892,
- -1.573051, 2.015666, -0.071982, 2.628909, -0.243317, 0.173276, 0.923207, -0.178553,
- -0.521705, 1.431962, -0.870117, 0.807542, -0.510635, 0.743514, 0.847898, -0.829901,
- 0.532994, 1.032848, -1.052024, 0.362114, -0.036787, -1.227636, -0.275099, -0.160435,
- -1.083575, -1.954213, -0.909487, -0.005579, -1.723490, 1.263077, -0.600433, -2.063925,
- 0.110911, 1.487614, 0.053002, 0.161981, -0.026878, 0.173576, 0.882168, 0.182294,
- 0.755295, 0.508035, 0.131880, 0.280104, -0.982848, -0.944087, -0.013058, 0.354345,
- -0.894709, 0.812111, 0.109537, 2.731644, 0.411079, -1.306862, 0.383806, 0.499504,
- -0.510786, 0.234922, -0.597825, 0.020771, 0.419443, 1.191104, 0.771214, -2.644222,
- 0.285430, 0.826093, -0.008122, 0.858438, 0.774788, 1.305945, 1.231503, 0.958564,
- -1.654548, -0.990396, 0.685236, -0.974870, -0.606726, 0.686794, 0.020049, 1.063801,
- -1.341050, 0.479510, -1.633974, -1.442665, 0.293781, -0.140364, -1.130341, -0.292538,
- -0.582536, -0.896348, 0.248601, -1.489663, 0.313509, -2.025084, 0.528990, 0.343471,
- 0.758193, -0.691940, 0.680179, -1.072541, 0.899772, -2.123092, 0.284712, -0.733323,
- -0.773376, 0.151842, -0.336843, 0.970761, -0.107236, 1.013492, -0.475347, 0.068948,
- 0.398592, 1.116326, 0.620451, -0.287674, -1.371773, -0.685868, 0.331685, -0.997722,
- 0.291418, 1.107078, 0.244959, 0.164976, 0.406231, 1.215981, 1.448424, -1.025137,
- 0.205418, 0.588882, -0.264024, 2.495318, 0.855948, -0.850954, 0.811879, 0.700242,
- 0.759938, -1.712909, 1.537021, -1.609847, 1.109526, -1.109704, 0.385469, 0.965231,
- 0.818297, 0.037049, -0.926012, -0.111919, -0.803030, -1.665006, -0.901401, 0.588350,
- 0.554159, -0.415173, 0.061795, 0.457432, 0.199014, 0.257558, 2.080730, -2.277237,
- 0.339022, 0.289894, 0.662261, -0.580860, 0.887752, 0.171871, 0.848821, 0.963769,
- 1.321918, -0.064345, 1.317053, 0.228017, -1.429637, -0.149701, -0.504968, -1.729141,
- -0.417472, -0.614969, 0.720777, 0.339364, 0.882845, 0.284245, -0.145541, -0.089646,
- 0.289161, 1.164831, 0.805729, -1.355643, 0.120893, -0.222178, 0.571732, -0.300140,
- 1.134277, -0.179356, -1.467067, 1.395346, 0.440836, 0.565384, -0.693623, 0.833869,
- -2.237378, 1.097644, -0.001617, -1.614573, -1.228727, 0.207405, 0.220942, -1.006073,
- -0.453067, 1.399453, -0.461964, 0.032716, 0.798783, 0.896816, 0.137892, -1.619146,
- -1.646606, 0.428707, -0.737231, 0.564926, -1.384167, 0.460268, 0.629384, 0.379847,
- -1.013330, -0.347243, 0.441912, -1.590240, -0.701417, -1.077601, 1.002220, 1.729481,
- 0.709032, -0.747897, 0.228862, -0.223497, -0.853275, 0.345627, 0.109764, -1.133039,
- -0.683124, -0.277856, 0.654790, -1.248394, -0.597539, -0.481813, 0.983372, 1.762121,
- 1.427402, 0.911763, 0.326823, 0.069619, -1.499763, -0.418223, -0.021037, 0.228425,
- -1.008196, -0.664622, 0.558177, -1.188542, -0.775481, 0.271042, 1.534976, -1.052283,
- 0.625559, -0.797626, -0.313522, -0.602210, 1.259060, 0.858484, -2.105292, -0.360937,
- 0.553557, -1.556384, -0.206666, -0.425568, 0.493778, -0.870908, 0.079828, -0.521619,
- -1.413861, -0.384293, -0.457922, -0.291471, -0.301224, -1.588594, 1.094287, 1.324167,
- -0.126480, -0.737164, 0.213719, -0.400529, 0.064938, -1.757996, 1.686748, 0.327400,
- 0.715967, 1.598648, -2.064741, -0.743632, 0.176185, 0.527839, -0.553153, 0.298280,
- -1.226607, -0.189676, -0.301713, 0.956956, -0.533366, -0.901082, -0.892552, 0.278717,
- -0.745807, 1.603464, 0.574270, 0.320655, -0.151383, 0.315762, 1.343703, -2.237832,
- 1.292906, -0.378459, 0.002521, 0.884641, 0.582450, -1.614244, -1.503666, 0.573586,
- -0.910537, -1.631277, -0.359138, -0.397616, -1.161307, -1.109838, 0.290672, -1.910239,
- 1.314768, 0.665319, -0.275115, -0.023022, -0.907976, -1.043657, 0.373516, 0.901532,
- 1.278539, -0.128456, 0.612821, 1.956518, 2.266326, -0.373959, 2.238039, -0.159580,
- -0.703281, 0.563477, -0.050296, 1.163593, 0.658808, -1.550089, -3.029118, 0.540578,
- -1.008998, 0.908047, 1.582303, -0.979088, 1.007902, 0.158491, -0.586927, 1.574082,
- -0.516649, 1.227800, 1.583876, -2.088950, 2.949545, 1.356125, 1.050068, -0.767170,
- -0.257653, -1.371845, -1.267656, -0.894948, 0.589089, 1.842629, 1.347967, -0.491253,
- -2.177568, 0.237000, -0.735411, -1.779419, 0.448030, 0.581214, 0.856607, -0.266263,
- -0.417470, -0.205806, -0.174323, 0.217577, 1.684295, 0.119528, 0.650667, 2.080061,
- -0.339225, 0.730113, 0.293969, -0.849109, -2.533858, -2.378941, -0.346276, -0.610937,
- -0.408192, -1.415611, 0.227122, 0.207974, -0.719718, 0.757762, -1.643135, -1.056813,
- -0.251662, -1.298441, 1.233255, 1.494625, 0.235938, -1.404359, 0.658791, -2.556613,
- -0.534945, 3.202525, 0.439198, -1.149901, 0.886765, -0.283386, 1.035336, -0.364878,
- 1.341987, 1.008872, 0.213874, -0.299264, 0.255849, -0.190826, -0.079060, 0.699851,
- -0.796540, -0.801284, -0.007599, -0.726810, -1.490902, 0.870335, -0.265675, -1.566695,
- -0.394636, -0.143855, -2.334247, -1.357539, -1.815689, 1.108422, -0.142115, 1.112757,
- 0.559264, 0.478370, -0.679385, 0.284967, -1.332935, -0.723980, -0.663600, 0.198443,
- -1.794868, -1.387673, 0.197768, 1.469328, 0.366493, -0.442775, -0.048563, 0.077709,
- 1.957910, -0.072848, 0.938810, -0.079608, -0.800959, 0.309424, 1.051826, -1.664211,
- -1.090792, -0.191731, 0.463401, -0.924147, -0.649657, 0.622893, -1.335107, 1.047689,
- 0.863327, -0.642411, 0.660010, 1.294116, 0.314579, 0.859573, 0.128670, 0.016568,
- -0.072801, -0.994310, -0.747358, -0.030814, 0.988355, -0.599017, 1.476644, -0.813801,
- 0.645040, -1.309919, -0.867425, -0.474233, 0.222417, 1.871323, 0.110001, -0.411341,
- 0.511242, -1.199117, -0.096361, 0.445817, -0.295825, -0.167996, 0.179543, 0.421118,
- 1.677678, 1.996949, 0.696964, -1.366382, 0.363045, -0.567044, -1.044154, 0.697139,
- 0.484026, -0.193751, -0.378095, -0.886374, -1.840197, -1.628195, -1.173789, -0.415411,
- 0.175088, 0.229433, -1.240889, 0.700004, 0.426877, 1.454803, -0.510186, -0.006657,
- -0.525496, 0.717698, 1.088374, 0.500552, 2.771790, -0.160309, 0.429489, -1.966817,
- -0.546019, -1.888395, -0.107952, -1.316144, -0.672632, -0.902365, -0.154798, 0.947242,
- 1.550375, 0.429040, -0.560795, 0.179304, -0.771509, -0.943390, -1.407569, -1.906131,
- -0.065293, 0.672149, 0.206147, -0.008124, 0.020042, -0.558447, 1.886079, -0.219975,
- -1.414395, -0.302811, -0.569574, -0.121495, -0.390171, -0.844287, -1.737757, -0.449520,
- -1.547933, -0.095776, 0.907714, 2.369602, 0.519768, 0.410525, 1.052585, 0.428784,
- 1.295088, -0.186053, 0.130733, -0.657627, -0.759267, -0.595170, 0.812400, 0.069541,
- -1.833687, 1.827363, 0.654075, -1.544769, -0.375109, 0.207688, -0.765615, -0.106355,
- 0.338769, 1.033461, -1.404822, -1.030570, -0.643372, 0.170787, 1.344839, 1.936273,
- 0.741336, 0.811980, -0.142808, -0.099858, -0.800131, 0.493249, 1.237574, 1.295951,
- -0.278196, 0.217127, 0.630728, -0.548549, 0.229632, 0.355311, 0.521284, -0.615971,
- 1.345803, 0.974922, -2.377934, -1.092319, -0.325710, -2.012228, 1.567660, 0.233337,
- 0.646420, -1.129412, 0.197038, 1.696870, 0.726034, 0.792526, 0.603357, -0.058405,
- -1.108666, 2.144229, -1.352821, 0.457021, 0.391175, 2.073013, -0.323318, 1.468132,
- -0.502399, 0.209593, 0.754800, -0.948189, 0.613157, 1.760503, 0.088762, 2.595570,
- -0.675470, 2.786804, -0.016827, 0.271651, -0.914102, -1.951371, -0.317418, 0.588333,
- 0.828996, -1.674851, -1.922293, -0.436662, 0.044974, 2.416609, -0.309892, 0.187583,
- 0.947699, -0.525703, -1.115605, -1.592320, 1.174844, 0.485144, 1.645480, -0.454233,
- 1.008768, 2.049403, 0.602020, 0.017860, -1.610426, 1.238752, 0.683587, -0.780716,
- 0.530979, 2.134498, 0.354361, 0.231700, 1.287980, -0.013488, -1.333345, -0.556343,
- 0.755597, -0.911854, 1.371684, 0.245580, 0.118845, 0.384690, -0.070152, -0.578309,
- 0.469308, 1.299687, 1.634798, -0.702809, 0.807253, -1.027451, 1.294496, 0.014930,
- 0.218705, 1.713188, -2.078805, 0.112917, -1.086491, -1.558311, 0.637406, -0.404576,
- -0.403325, 0.084076, -0.435349, -0.562623, 0.878062, -0.814650, -0.258363, 0.493299,
- -0.802694, -0.008329, 0.627571, 0.154382, 2.580735, -1.306246, 1.023526, 0.777795,
- -0.833884, -0.586663, 0.065664, -0.012342, -0.076987, -1.558587, 1.702607, -0.468984,
- 0.094619, 0.287071, 0.919354, 0.510136, 0.245440, -1.400519, 0.969571, 1.593698,
- -1.437917, -1.534230, -0.074710, 0.081459, -0.843240, -0.564640, -0.028207, -1.243702,
- 0.733039, 0.059580, 0.149144, 1.595857, -0.777250, 1.550277, 1.055002, -0.166654,
- 0.314484, 1.419571, 0.327348, 0.475653, 0.398754, -0.072770, 1.314784, 0.978279,
- 1.722114, -0.412302, 0.565133, 0.739851, 0.220138, 1.312807, 0.629152, -1.107987,
- -0.447001, -0.725993, 0.354045, -0.506772, -2.103747, -0.664684, 1.450110, -0.329805,
- 2.701872, -1.634939, -0.536325, 0.547223, 1.492603, -0.455243, -0.496416, 1.235260,
- 0.040926, 0.748467, 1.230764, 0.304903, 1.077771, 0.765151, -1.319580, -0.509191,
- 0.555116, -1.957625, -0.760453, -2.443886, -0.659366, -0.114779, 0.300079, -0.583996,
- -3.073745, 1.551042, -0.407369, 1.428095, -1.353242, 0.903970, 0.541671, -0.465020,
- 2.430415, 2.020479, 0.797287, 0.030996, 0.540738, 0.683921, -0.590052, -0.261084,
- 1.517068, 1.007259, 0.303421, -0.817081, -0.491192, 0.867467, 0.360790, -0.080371,
- 0.749301, -1.791968, 1.213226, -0.060524, -0.392520, 0.609547, 0.643580, 1.019521,
- 0.934437, 1.228582, -0.249486, -0.707583, -0.593824, -0.262310, 1.242847, -1.548902,
- -0.386760, 0.275098, 0.826154, -0.979279, -0.104297, 0.127849, 0.062544, 0.371624,
- -0.103963, -0.696775, -0.386823, 0.016134, 1.369212, 0.416877, 0.068741, 0.294187,
- 0.472633, 1.782735, 0.260577, 1.510728, 0.316968, 0.803473, 0.580874, 1.778584,
- -0.938075, -0.916672, 0.376006, 0.909780, 0.154250, -0.202264, 1.488708, -0.621639,
- 0.809537, 1.928793, 0.396057, -0.861399, 2.431936, -0.840518, 0.280451, 0.820416,
- 1.227828, -0.063565, 0.645265, -1.771318, 0.059612, -0.760177, -1.690901, 1.103672,
- 1.462500, 0.236213, -1.097691, 2.415233, -0.402112, 0.914131, -0.135959, 1.314193,
- 0.322361, -0.476496, 0.076162, -0.105147, 1.417013, 0.707911, 0.367918, -0.602844,
- -0.852110, 0.655122, 1.470184, -0.810403, -1.276157, 1.722268, 0.101878, -0.801997,
- -1.250837, 1.237717, 1.528165, 1.776923, 0.631168, 0.083259, 2.140043, 1.263469,
- -1.750645, -0.014432, 2.468102, -0.669158, 0.259927, -0.372328, 1.318554, -0.653081,
- 0.062179, -0.735873, -0.179324, 1.084675, 0.136915, -0.015608, -0.938491, -1.478085,
- 0.361931, 0.477791, 0.321742, -1.877574, 0.680526, 0.233398, 1.239492, 0.125661,
- 0.179721, -0.605061, -1.036850, -0.295278, 1.456114, 1.802525, -1.333614, 0.387257,
- -0.022809, 0.110596, 0.812811, -1.009099, -1.004572, 0.282958, 0.289750, -0.247297,
- -0.218864, 0.898687, -0.642213, -0.180445, 0.717913, 0.301386, 1.548895, -0.044242,
- -0.029651, -0.382110, -0.553929, 0.932358, -1.315840, -0.301519, -2.599588, 0.780078,
- 0.602941, 0.942799, -1.023913, -0.067830, 0.081760, -1.767027, -1.781264, -0.660354,
- 1.351417, 2.136370, 0.166783, -1.705227, 0.276528, 0.394512, -0.098555, 0.176450,
- -1.837854, -1.502291, 0.819197, -0.234568, -1.631598, -0.317939, -0.796289, 0.690800,
- -0.042010, 0.324041, 0.506456, -1.028590, 0.099426, -0.116351, 0.689239, 1.883291,
- 0.325435, -0.095213, 0.031172, -0.613800, -1.731258, 0.478775, -0.447835, 0.386815,
- 0.052959, -0.486085, 0.244473, 0.718309, 0.153485, 0.133783, -1.006194, 1.306469,
- 1.199137, -2.577336, -2.086270, 0.386132, -0.861031, -1.230808, 2.641554, -0.904404,
- -1.223338, 0.303205, -0.730097, -1.143570, -1.413193, -0.591818, 0.518888, -1.492811,
- -0.086684, -0.012620, -0.345858, 0.986311, 0.643256, 2.919944, -1.248585, 0.157115,
- 0.788733, -0.577083, 0.527634, 1.671694, 0.800079, 0.883787, -0.224185, 0.296991,
- -0.521008, -0.155359, -0.098498, 0.997170, 0.434470, -0.025721, -0.379934, -0.242396,
- -1.165114, 0.756605, 1.164162, -1.023455, 1.701589, -0.494172, 0.172714, 0.354061,
- -0.246258, -0.145741, -1.169008, -0.022011, 0.618278, 1.865865, 0.081875, 1.607995,
- -0.380666, -1.299588, -0.723958, -0.564984, 0.621664, -1.335471, -0.123108, -1.102815,
- -2.753176, 0.252017, -0.858148, 1.135363, -0.297908, 1.154331, 1.046076, 2.126874,
- -0.655774, -1.142368, 0.949039, -0.404608, -0.384329, 0.482020, 0.443774, 0.381100,
- 1.102348, 0.856447, -1.178509, 0.401970, -0.584228, -0.979486, 0.115106, 0.068471,
- -0.529900, 0.541112, 0.681720, 0.538565, -0.510035, -1.322111, -0.610659, -0.565309,
- 0.086175, 0.691501, 2.133751, -0.002864, -0.089523, -0.254982, -0.874212, 0.422928,
- -0.133399, 0.539578, 0.875171, -1.250776, 0.868311, -0.804806, -0.752693, -0.745812,
- -0.309654, -1.521891, 0.826531, -0.612987, 0.959728, 1.972988, 0.294958, -0.392651,
- 0.575927, -1.141419, 0.061069, 0.012318, -0.168118, -0.687349, -0.990650, -0.049762,
- 0.719301, -0.283063, -1.424966, 0.461549, 1.091484, -1.044295, -2.842784, 0.996824,
- 0.076534, -1.866737, -0.613614, 1.169354, -0.575013, -0.264795, 0.004722, -0.039410,
- -0.505393, -1.157832, 0.710427, 0.728172, 0.866884, 2.431569, 0.110204, 0.026449,
- 0.970324, -0.005260, 1.409542, 1.757851, 0.885011, 1.140862, 0.403216, 0.191009,
- -0.693627, 0.011036, -1.105586, 1.907973, -0.165412, -0.732430, -0.990741, 0.894305,
- 0.448227, 0.889219, 1.073337, -0.104734, 1.547319, 0.169834, 0.804048, -1.724029,
- 0.174133, -0.484085, -0.731627, -2.131905, -1.810366, -0.052338, -0.086212, -1.189738,
- -0.754141, 0.947278, -0.182628, -0.066268, 0.905018, 1.458216, -1.117984, 1.813295,
- 0.150753, -0.282994, 1.650122, 0.666378, -0.346362, -0.264042, -0.644349, -0.905540,
- 0.716679, -0.007336, -2.814799, -0.149546, 0.577495, 0.753117, -0.166985, -0.581816,
- 0.365758, -0.548919, 0.578737, -1.955799, 0.522006, 1.601135, 0.732559, 0.555747,
- -0.813346, -0.538975, 1.307876, -0.482579, -1.752447, -0.926570, 0.922440, 0.041001,
- 0.413647, 0.597244, 1.924270, 0.714119, -2.312337, 1.380715, 1.390703, -0.453904,
- -0.628305, 1.023225, -0.489111, -0.402405, 1.399683, 0.280561, 1.880872, -0.799673,
- -0.560699, 1.708875, -0.644810, -1.422496, -0.755937, 0.157520, 0.378346, 0.178665,
- -0.602775, -0.993406, 1.188948, 2.388009, 2.265523, 2.301073, -0.270076, 0.502837,
- -0.119191, -0.001889, -0.432649, -0.194822, 0.985351, 0.468596, -1.364901, 0.273689,
- 2.646683, -0.053754, 0.472511, -2.080034, -0.802494, -0.456793, 0.193857, 0.889525,
- -1.591669, -0.321976, -0.703798, -0.744287, 0.371287, 1.437276, 0.459913, 0.660738,
- 1.124368, 0.979412, -1.316431, -0.023211, 0.134547, 2.408125, 0.901705, 0.076185,
- 0.361743, -2.058669, -2.332033, -0.370905, 1.285684, 0.557046, -0.180229, -0.035676
-};
-ne10_float32_t tmp_buffer[TEST_LENGTH_SAMPLES];
-/* ----------------------------------------------------------------------
-** Defines each of the tests performed
-** ------------------------------------------------------------------- */
-
-typedef struct
-{
- ne10_uint32_t fftSize;
- ne10_uint32_t ifftFlag;
- ne10_uint32_t doBitReverse;
- ne10_float32_t *inputF32;
-} test_config_rfft;
-
-static test_config_rfft CONFIG_RFFT[] =
-{
- {128, 0, 1, &testInput_f32[0]},
- {512, 0, 1, &testInput_f32[0]},
-};
-
-static test_config_rfft CONFIG_RFFT_PERF[] =
-{
- {128, 0, 1, &testInput_f32[0]},
- {512, 0, 1, &testInput_f32[0]},
-};
-
-#define RFFT_NUM_TESTS (sizeof(CONFIG_RFFT) / sizeof(CONFIG_RFFT[0]) )
-#define RFFT_NUM_PERF_TESTS (sizeof(CONFIG_RFFT_PERF) / sizeof(CONFIG_RFFT_PERF[0]) )
-
-//input and output
-static ne10_float32_t * guarded_in_c = NULL;
-static ne10_float32_t * guarded_in_neon = NULL;
-static ne10_float32_t * in_c = NULL;
-static ne10_float32_t * in_neon = NULL;
-
-static ne10_float32_t * guarded_out_c = NULL;
-static ne10_float32_t * guarded_out_neon = NULL;
-static ne10_float32_t * out_c = NULL;
-static ne10_float32_t * out_neon = NULL;
-
-static ne10_float32_t snr = 0.0f;
-
-#ifdef PERFORMANCE_TEST
-static ne10_int64_t time_c = 0;
-static ne10_int64_t time_neon = 0;
-static ne10_int64_t time_overhead_c = 0;
-static ne10_int64_t time_overhead_neon = 0;
-static ne10_float32_t time_speedup = 0.0f;
-static ne10_float32_t time_savings = 0.0f;
-#endif
-
-void test_rfft_case0()
-{
- ne10_float32_t *p_src = testInput_f32;
- ne10_rfft_instance_f32_t S;
- ne10_cfft_radix4_instance_f32_t S_CFFT;
-
- ne10_uint16_t loop = 0;
- ne10_uint16_t k = 0;
- ne10_uint16_t i = 0;
- ne10_uint16_t pos = 0;
-
- test_config_rfft *config;
- ne10_result_t status = NE10_OK;
-
- fprintf (stdout, "----------%30s start\n", __FUNCTION__);
-
- /* init input memory */
- NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
- NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
-
- /* init dst memory */
- NE10_DST_ALLOC (out_c, guarded_out_c, TEST_LENGTH_SAMPLES);
- NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES);
-
-#if defined (SMOKE_TEST)||(REGRESSION_TEST)
- for (loop = 0; loop < RFFT_NUM_TESTS; loop++)
- {
- config = &CONFIG_RFFT[loop];
-
- /* Initialize the RFFT/RIFFT module */
- status = ne10_rfft_init_float (&S, &S_CFFT, config->fftSize, config->ifftFlag);
-
- if (status == NE10_ERR)
- {
- printf ("fft init error!\n");
- }
-
- /* copy input to input buffer and clear the output buffer */
- for (i = 0; i < config->fftSize; i++)
- {
- in_c[i] = testInput_f32[i];
- in_neon[i] = testInput_f32[i];
- }
-
- /* FFT test */
- GUARD_ARRAY (out_c, config->fftSize * 2);
- GUARD_ARRAY (out_neon, config->fftSize * 2);
-
- ne10_rfft_float_c (&S, in_c, out_c, tmp_buffer);
- ne10_rfft_float_neon (&S, in_neon, out_neon, tmp_buffer);
-
-
- CHECK_ARRAY_GUARD (out_c, config->fftSize * 2);
- CHECK_ARRAY_GUARD (out_neon, config->fftSize * 2);
-
- //conformance test 1: compare snr
- snr = CAL_SNR_FLOAT32 (out_c, out_neon, config->fftSize * 2);
- assert_false ( (snr < SNR_THRESHOLD));
-
- //conformance test 2: compare output of C and neon
-#if defined (DEBUG_TRACE)
- printf ("-----------RFFT------------\n");
- printf ("--------------------config %d\n", loop);
- printf ("fftSize: %d\n", config->fftSize);
- printf ("snr: %f\n", snr);
-#endif
- for (pos = 0; pos < config->fftSize * 2; pos++)
- {
-#if defined (DEBUG_TRACE)
- printf ("pos %d \n", pos);
- printf ("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos], * (ne10_uint32_t*) &out_c[pos], out_neon[pos], * (ne10_uint32_t*) &out_neon[pos]);
-#endif
- assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1);
- }
-
- /* IFFT test */
- /* Initialize the RFFT/RIFFT module */
- status = ne10_rfft_init_float (&S, &S_CFFT, config->fftSize, 1);
-
- if (status == NE10_ERR)
- {
- printf ("fft init error!\n");
- }
-
- /* copy input to input buffer and clear the output buffer */
- for (i = 0; i < config->fftSize * 2; i++)
- {
- in_c[i] = out_c[i];
- in_neon[i] = out_neon[i];
- }
-
- GUARD_ARRAY (out_c, config->fftSize * 2);
- GUARD_ARRAY (out_neon, config->fftSize * 2);
-
- ne10_rfft_float_c (&S, in_c, out_c, tmp_buffer);
- ne10_rfft_float_neon (&S, in_neon, out_neon, tmp_buffer);
-
- CHECK_ARRAY_GUARD (out_c, config->fftSize * 2);
- CHECK_ARRAY_GUARD (out_neon, config->fftSize * 2);
-
- //conformance test 1: compare snr
- snr = CAL_SNR_FLOAT32 (out_c, out_neon, config->fftSize);
- assert_false ( (snr < SNR_THRESHOLD));
-
- //conformance test 2: compare output of C and neon
-#if defined (DEBUG_TRACE)
- printf ("-----------RIFFT------------\n");
- printf ("--------------------config %d\n", loop);
- printf ("fftSize: %d\n", config->fftSize);
- printf ("snr: %f\n", snr);
-#endif
- for (pos = 0; pos < config->fftSize; pos++)
- {
-#if defined (DEBUG_TRACE)
- printf ("pos %d \n", pos);
- printf ("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos], * (ne10_uint32_t*) &out_c[pos], out_neon[pos], * (ne10_uint32_t*) &out_neon[pos]);
-#endif
- assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1);
- }
- }
-#endif
-
-#ifdef PERFORMANCE_TEST
- fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
- for (loop = 0; loop < RFFT_NUM_PERF_TESTS; loop++)
- {
- config = &CONFIG_RFFT_PERF[loop];
-
- /* Initialize the RFFT/RIFFT module */
- status = ne10_rfft_init_float (&S, &S_CFFT, config->fftSize, config->ifftFlag);
-
- if (status == NE10_ERR)
- {
- printf ("fft init error!\n");
- }
-
- /* FFT test */
- /* Initialize the RFFT/RIFFT module */
- status = ne10_rfft_init_float (&S, &S_CFFT, config->fftSize, config->ifftFlag);
-
- GET_TIME
- (
- time_overhead_c,
- {
- for (k = 0; k < TEST_COUNT; k++)
- {
- for (i = 0; i < config->fftSize; i++)
- {
- in_c[i] = testInput_f32[i];
- }
- }
- }
- );
-
- GET_TIME
- (
- time_c,
- {
- for (k = 0; k < TEST_COUNT; k++)
- {
- for (i = 0; i < config->fftSize; i++)
- {
- in_c[i] = testInput_f32[i];
- }
- ne10_rfft_float_c (&S, in_c, out_c, tmp_buffer);
- }
- }
- );
-
- GET_TIME
- (
- time_overhead_neon,
- {
- for (k = 0; k < TEST_COUNT; k++)
- {
- for (i = 0; i < config->fftSize; i++)
- {
- in_neon[i] = testInput_f32[i];
- }
- }
- }
- );
-
- GET_TIME
- (
- time_neon,
- {
- for (k = 0; k < TEST_COUNT; k++)
- {
- for (i = 0; i < config->fftSize; i++)
- {
- in_neon[i] = testInput_f32[i];
- }
- ne10_rfft_float_neon (&S, in_neon, out_neon, tmp_buffer);
- }
- }
- );
-
- time_c = time_c - time_overhead_c;
- time_neon = time_neon - time_overhead_neon;
- time_speedup = (ne10_float32_t) time_c / time_neon;
- time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
- ne10_log (__FUNCTION__, "RFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", config->fftSize, time_c, time_neon, time_savings, time_speedup);
-
- /* IFFT test */
- /* Initialize the RFFT/RIFFT module */
- status = ne10_rfft_init_float (&S, &S_CFFT, config->fftSize, 1);
-
- GET_TIME
- (
- time_overhead_c,
- {
- for (k = 0; k < TEST_COUNT; k++)
- {
- for (i = 0; i < 2 * config->fftSize; i++)
- {
- in_c[i] = out_c[i];
- }
- }
- }
- );
-
- GET_TIME
- (
- time_c,
- {
- for (k = 0; k < TEST_COUNT; k++)
- {
- for (i = 0; i < 2 * config->fftSize; i++)
- {
- in_c[i] = out_c[i];
- }
- ne10_rfft_float_c (&S, in_c, out_c, tmp_buffer);
- }
- }
- );
-
- GET_TIME
- (
- time_overhead_neon,
- {
- for (k = 0; k < TEST_COUNT; k++)
- {
- for (i = 0; i < 2 * config->fftSize; i++)
- {
- in_neon[i] = out_neon[i];
- }
- }
- }
- );
-
- GET_TIME
- (
- time_neon,
- {
- for (k = 0; k < TEST_COUNT; k++)
- {
- for (i = 0; i < 2 * config->fftSize; i++)
- {
- in_neon[i] = out_neon[i];
- }
- ne10_rfft_float_neon (&S, in_neon, out_neon, tmp_buffer);
- }
- }
- );
-
- time_c = time_c - time_overhead_c;
- time_neon = time_neon - time_overhead_neon;
- time_speedup = (ne10_float32_t) time_c / time_neon;
- time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
- ne10_log (__FUNCTION__, "RIFFT%20d%20lld%20lld%19.2f%%%18.2f:1\n", config->fftSize, time_c, time_neon, time_savings, time_speedup);
- }
-#endif
-
- free (guarded_in_c);
- free (guarded_in_neon);
- free (guarded_out_c);
- free (guarded_out_neon);
- fprintf (stdout, "----------%30s end\n", __FUNCTION__);
-}
-
-void test_rfft()
-{
- test_rfft_case0();
-}
-
-static void my_test_setup (void)
-{
- ne10_log_buffer_ptr = ne10_log_buffer;
-}
-
-void test_fixture_rfft (void)
-{
- test_fixture_start(); // starts a fixture
-
- fixture_setup (my_test_setup);
-
- run_test (test_rfft);
-
- test_fixture_end(); // ends a fixture
-}
# Define dsp test files.
set(NE10_TEST_DSP_SRCS
${PROJECT_SOURCE_DIR}/modules/dsp/test/test_main.c
- ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_cfft.c
- ${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_rfft.c
${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_fft_float32.c
${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_fft_int32.c
${PROJECT_SOURCE_DIR}/modules/dsp/test/test_suite_fft_int16.c