1 # BCM2835 "GPU_FFT" release 3.0
3 # Copyright (c) 2015, Andrew Holme.
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are met:
8 # * Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
10 # * Redistributions in binary form must reproduce the above copyright
11 # notice, this list of conditions and the following disclaimer in the
12 # documentation and/or other materials provided with the distribution.
13 # * Neither the name of the copyright holder nor the
14 # names of its contributors may be used to endorse or promote products
15 # derived from this software without specific prior written permission.
17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
21 # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 .include "gpu_fft_ex.qinc"
32 ##############################################################################
35 .set TW32_BASE, 0 # rx_tw_shared
41 .set TW16_P4_BASE, 0 # rx_tw_unique
43 ##############################################################################
51 ##############################################################################
63 .set rx_save_slave_16, rb4
67 .set rx_sync_slave, rb6
75 .set rx_save_slave_32, rb10
77 .set rx_tw_shared, ra11
78 .set rx_tw_unique, rb11
80 .set ra_tw_re, ra12 # 9
81 .set rb_tw_im, rb12 # 9
88 .set rx_0x55555555, ra29
89 .set rx_0x33333333, ra30
90 .set rx_0x0F0F0F0F, ra31
91 .set rx_0x00FF00FF, rb24
92 .set rx_0x0000FFFF, rb25
101 ##############################################################################
104 mov rb_STAGES, STAGES
113 mov rx_0x55555555, 0x55555555
114 mov rx_0x33333333, 0x33333333
115 mov rx_0x0F0F0F0F, 0x0F0F0F0F
116 mov rx_0x00FF00FF, 0x00FF00FF
117 mov rx_0x0000FFFF, 0x0000FFFF
119 mov ra_vdw_16, vdw_setup_0(16, 16, dma_h32( 0,0))
120 mov rb_vdw_16, vdw_setup_0(16, 16, dma_h32(32,0))
121 mov ra_vdw_32, vdw_setup_0(32, 16, dma_h32( 0,0))
122 mov rb_vdw_32, vdw_setup_0(32, 16, dma_h32(32,0))
124 ##############################################################################
127 mov rx_tw_shared, unif
128 mov rx_tw_unique, unif
130 ##############################################################################
134 inst_vpm rb_inst, ra_vpm_lo, ra_vpm_hi, rb_vpm_lo, rb_vpm_hi
136 ##############################################################################
137 # Master/slave procedures
139 proc ra_save_16, r:1f
140 body_ra_save_16 ra_vpm_lo, ra_vdw_16
143 proc rx_save_slave_16, r:1f
144 body_rx_save_slave_16 ra_vpm_lo
147 proc ra_save_32, r:1f
151 proc rx_save_slave_32, r:1f
152 body_rx_save_slave_32
159 proc rx_sync_slave, r:main
162 ##############################################################################
169 body_pass_32 LOAD_REVERSED
174 body_pass_16 LOAD_STRAIGHT
176 ##############################################################################
183 add.ifnz ra_sync, rx_sync_slave, r0
184 mov.ifnz ra_save_16, rx_save_slave_16
185 mov.ifnz ra_save_32, rx_save_slave_32
188 mov.setf ra_addr_x, unif # Ping buffer or null
189 mov rb_addr_y, unif # Pong buffer or IRQ enable
193 ##############################################################################
196 load_tw rx_tw_shared, TW16+3, TW16_BASE
197 load_tw rx_tw_shared, TW32+0, TW32_BASE
201 brr ra_link_1, r:pass_1
204 add ra_points, ra_points, rb_0x100
206 shr.setf -, ra_points, rb_STAGES
211 add ra_points, ra_points, rb_0x100
213 bra ra_link_1, ra_sync
218 ##############################################################################
222 load_tw rx_tw_shared, TW16+3, TW16_BASE
223 load_tw rx_tw_shared, TW16_STEP, TW16_P2_STEP
227 brr ra_link_1, r:pass_2
230 add ra_points, ra_points, rb_0x80
232 and.setf -, ra_points, rb_0xFFF
234 brr.allnz -, r:pass_2
237 add.ifnz ra_points, ra_points, rb_0x80
241 shr.setf -, ra_points, rb_STAGES
246 add ra_points, ra_points, rb_0x80
248 bra ra_link_1, ra_sync
253 ##############################################################################
257 load_tw rx_tw_shared, TW16+3, TW16_BASE
258 load_tw rx_tw_shared, TW16_STEP, TW16_P3_STEP
263 brr ra_link_1, r:pass_3
266 add ra_points, ra_points, rb_0x80
271 shr.setf -, ra_points, rb_STAGES
275 sub ra_link_1, ra_link_1, r0
276 add ra_points, ra_points, rb_0x80
278 bra ra_link_1, ra_sync
283 ##############################################################################
287 load_tw rx_tw_unique, TW16+3, TW16_P4_BASE
288 load_tw rx_tw_shared, TW16_STEP, TW16_P4_STEP
292 brr ra_link_1, r:pass_4
295 add ra_points, ra_points, rb_0x80
299 shr.setf -, ra_points, rb_STAGES
304 add ra_points, ra_points, rb_0x80
306 bra ra_link_1, ra_sync
311 ##############################################################################