2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
4 @// Use of this source code is governed by a BSD-style license
5 @// that can be found in the LICENSE file in the root of the source
6 @// tree. An additional intellectual property rights grant can be found
7 @// in the file PATENTS. All contributing project authors may
8 @// be found in the AUTHORS file in the root of the source tree.
10 @// This file was originally licensed as follows. It has been
11 @// relicensed with permission from the copyright holders.
15 @// File Name: armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.s
16 @// OpenMAX DL: v1.0.2
17 @// Last Modified Revision: 7766
18 @// Last Modified Date: Thu, 27 Sep 2007
20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
25 @// Compute a first stage Radix 8 FFT stage for a N point complex signal
30 @// Include standard headers
32 #include "dl/api/arm/armCOMM_s.h"
33 #include "dl/api/arm/omxtypes_s.h"
36 @// Import symbols required from other files
37 @// (For example tables)
40 @// Set debugging level
41 @//DEBUG_ON SETL {TRUE}
45 @// Guarding implementation by the processor name
50 @// Guarding implementation by the processor name
60 @// dest buffer for the next stage (not pSrc for first stage)
61 #define pPingPongBuf r5
67 @//Local Scratch Registers
70 @// Reuse grpSize as setCount
73 #define outPointStep r4
121 @// reuse dXr7 and dXi7
178 @// reuse dYr4 and dYi4
196 .set ONEBYSQRT2, 0x00005A82 @// Q15 format
199 .MACRO FFTSTAGE scaled, inverse , name
201 @// Define stack arguments
203 @// Update pSubFFTSize and pSubFFTNum regs
204 MOV subFFTSize,#8 @// subFFTSize = 1 for the first stage
205 LDR t0,=ONEBYSQRT2 @// t0=(1/sqrt(2)) as Q15 format
207 @// Note: setCount = subFFTNum/8 (reuse the grpSize reg for setCount)
208 LSR grpSize,subFFTNum,#3
209 MOV subFFTNum,grpSize
212 @// pT0+1 increments pT0 by 4 bytes
213 @// pT0+pointStep = increment of 4*pointStep bytes = grpSize/2 bytes
214 @// Note: outPointStep = pointStep for firststage
216 MOV pointStep,grpSize,LSL #2
219 @// Calculate the step of input data for the next set
220 @//MOV step1,pointStep,LSL #1 @// step1 = 2*pointStep
221 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0]
222 MOV step1,grpSize,LSL #3
224 MOV step2,pointStep,LSL #3
225 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
226 SUB step2,step2,pointStep @// step2 = 7*pointStep
227 RSB setStep,step2,#16 @// setStep = - 7*pointStep+16
231 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
232 VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
233 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
234 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
235 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
236 @// grp = 0 a special case since all the twiddle factors are 1
237 @// Loop on the sets : 4 sets at a time
240 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7] & update pSrc for the next set
241 @// setStep = -7*pointStep + 16
243 @// Decrement setcount
244 SUBS setCount,setCount,#4 @// decrement the set loop counter
247 .ifeqs "\scaled", "TRUE"
248 @// finish first stage of 8 point FFT
255 @// finish second stage of 8 point FFT
262 @// finish third stage of 8 point FFT
266 VST2 {dYr0,dYi0},[pDst :128],step1 @// store y0
268 .ifeqs "\inverse", "TRUE"
274 VST2 {dYr2,dYi2},[pDst :128],step1 @// store y2
278 VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
282 VST2 {dYr6,dYi6},[pDst :128],step1 @// store y6
290 VST2 {dYr6,dYi6},[pDst :128],step1 @// store y2
295 VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
298 VST2 {dYr2,dYi2},[pDst :128],step1 @// store y6
303 @// finish first stage of 8 point FFT
308 @// finish second stage of 8 point FFT
311 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] for next iteration
314 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
318 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
321 VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
324 @// finish third stage of 8 point FFT
326 .ifeqs "\inverse", "TRUE"
329 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
330 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
331 VQRDMULH dVi5,dVi5,dT0[0]
333 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
334 VSUB dVr5,dT1,dVi5 @// a * V5
337 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
340 VQRDMULH dT1,dVr7,dT0[0]
341 VQRDMULH dVi7,dVi7,dT0[0]
347 VADD dVr7,dT1,dVi7 @// b * V7
349 SUB pDst, pDst, step2 @// set pDst to y1
353 VST2 {dYr1,dYi1},[pDst :128],step1 @// store y1
358 VST2 {dYr3,dYi3},[pDst :128],step1 @// store y3
359 VST2 {dYr5,dYi5},[pDst :128],step1 @// store y5
361 VST2 {dYr7,dYi7},[pDst :128],#16 @// store y7
363 VST2 {dYr7,dYi7},[pDst :128]! @// store y7
368 VQRDMULH dT1,dVr7,dT0[0]
369 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
370 VQRDMULH dVi7,dVi7,dT0[0]
372 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
373 VADD dVr7,dT1,dVi7 @// b * V7
376 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
379 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
380 VQRDMULH dVi5,dVi5,dT0[0]
384 SUB pDst, pDst, step2 @// set pDst to y1
386 VSUB dVr5,dT1,dVi5 @// a * V5
392 VST2 {dYr7,dYi7},[pDst :128],step1 @// store y1
397 VST2 {dYr5,dYi5},[pDst :128],step1 @// store y3
398 VST2 {dYr3,dYi3},[pDst :128],step1 @// store y5
400 VST2 {dYr1,dYi1},[pDst :128],#16 @// store y7
402 VST2 {dYr1,dYi1},[pDst :128]! @// store y7
410 @// finish first stage of 8 point FFT
417 @// finish second stage of 8 point FFT
424 @// finish third stage of 8 point FFT
428 VST2 {dYr0,dYi0},[pDst :128],step1 @// store y0
430 .ifeqs "\inverse", "TRUE"
436 VST2 {dYr2,dYi2},[pDst :128],step1 @// store y2
440 VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
444 VST2 {dYr6,dYi6},[pDst :128],step1 @// store y6
452 VST2 {dYr6,dYi6},[pDst :128],step1 @// store y2
457 VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
460 VST2 {dYr2,dYi2},[pDst :128],step1 @// store y6
465 @// finish first stage of 8 point FFT
470 @// finish second stage of 8 point FFT
473 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] for next iteration
476 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
480 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
483 VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
486 @// finish third stage of 8 point FFT
488 .ifeqs "\inverse", "TRUE"
491 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
492 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
493 VQRDMULH dVi5,dVi5,dT0[0]
495 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
496 VSUB dVr5,dT1,dVi5 @// a * V5
499 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
502 VQRDMULH dT1,dVr7,dT0[0]
503 VQRDMULH dVi7,dVi7,dT0[0]
509 VADD dVr7,dT1,dVi7 @// b * V7
511 SUB pDst, pDst, step2 @// set pDst to y1
515 VST2 {dYr1,dYi1},[pDst :128],step1 @// store y1
520 VST2 {dYr3,dYi3},[pDst :128],step1 @// store y3
521 VST2 {dYr5,dYi5},[pDst :128],step1 @// store y5
523 VST2 {dYr7,dYi7},[pDst :128],#16 @// store y7
525 VST2 {dYr7,dYi7},[pDst :128]! @// store y7
530 VQRDMULH dT1,dVr7,dT0[0]
531 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
532 VQRDMULH dVi7,dVi7,dT0[0]
534 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
535 VADD dVr7,dT1,dVi7 @// b * V7
538 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
541 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
542 VQRDMULH dVi5,dVi5,dT0[0]
546 SUB pDst, pDst, step2 @// set pDst to y1
548 VSUB dVr5,dT1,dVi5 @// a * V5
554 VST2 {dYr7,dYi7},[pDst :128],step1 @// store y1
559 VST2 {dYr5,dYi5},[pDst :128],step1 @// store y3
560 VST2 {dYr3,dYi3},[pDst :128],step1 @// store y5
562 VST2 {dYr1,dYi1},[pDst :128],#16 @// store y7
564 VST2 {dYr1,dYi1},[pDst :128]! @// store y7
572 SUB pDst, pDst, step2 @// update pDst for the next set
573 BGT grpZeroSetLoop\name
576 @// reset pSrc to pDst for the next stage
577 SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize
578 MOV pDst,pPingPongBuf
585 @// Allocate stack memory required by the function
588 M_START armSP_FFTFwd_CToC_SC16_Radix8_fs_OutOfPlace_unsafe,r4
589 FFTSTAGE "FALSE","FALSE",FWD
593 M_START armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe,r4
594 FFTSTAGE "FALSE","TRUE",INV
598 M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe,r4
599 FFTSTAGE "TRUE","FALSE",FWDSFS
603 M_START armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe,r4
604 FFTSTAGE "TRUE","TRUE",INVSFS