2 @ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
4 @ Use of this source code is governed by a BSD-style license
5 @ that can be found in the LICENSE file in the root of the source
6 @ tree. An additional intellectual property rights grant can be found
7 @ in the file PATENTS. All contributing project authors may
8 @ be found in the AUTHORS file in the root of the source tree.
11 @ This file contains some minimum and maximum functions, optimized for
12 @ ARM Neon platform. The description header can be found in
13 @ signal_processing_library.h
15 @ The reference C code is in file min_max_operations.c. Code here is basically
16 @ a loop unrolling by 8 with Neon instructions. Bit-exact.
18 #include "webrtc/system_wrappers/interface/asm_defines.h"
20 GLOBAL_FUNCTION WebRtcSpl_MaxAbsValueW16Neon
21 GLOBAL_FUNCTION WebRtcSpl_MaxAbsValueW32Neon
22 GLOBAL_FUNCTION WebRtcSpl_MaxValueW16Neon
23 GLOBAL_FUNCTION WebRtcSpl_MaxValueW32Neon
24 GLOBAL_FUNCTION WebRtcSpl_MinValueW16Neon
25 GLOBAL_FUNCTION WebRtcSpl_MinValueW32Neon
28 @ int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, int length);
29 DEFINE_FUNCTION WebRtcSpl_MaxAbsValueW16Neon
30 mov r2, #-1 @ Initialize the return value.
32 beq END_MAX_ABS_VALUE_W16
34 ble END_MAX_ABS_VALUE_W16
37 blt LOOP_MAX_ABS_VALUE_W16
40 sub r1, #8 @ Counter for loops
42 LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16:
45 vabs.s16 q13, q13 @ Note vabs doesn't change the value of -32768.
46 vmax.u16 q12, q13 @ Use u16 so we don't lose the value -32768.
47 bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16
49 @ Find the maximum value in the Neon registers and move it to r2.
51 vpmax.u16 d24, d24, d24
52 vpmax.u16 d24, d24, d24
55 beq END_MAX_ABS_VALUE_W16
57 LOOP_MAX_ABS_VALUE_W16:
59 eor r12, r3, r3, asr #31 @ eor and then sub, to get absolute value.
60 sub r12, r12, r3, asr #31
64 bne LOOP_MAX_ABS_VALUE_W16
66 END_MAX_ABS_VALUE_W16:
67 cmp r2, #0x8000 @ Guard against the case for -32768.
74 @ int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, int length);
75 DEFINE_FUNCTION WebRtcSpl_MaxAbsValueW32Neon
78 beq EXIT @ Return -1 for a NULL pointer.
81 ble EXIT @ Return -1 if length <= 0.
86 blt LOOP_MAX_ABS_VALUE_W32
88 sub r1, #8 @ Counter for loops
90 LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32:
91 vld1.32 {q13, q14}, [r0]!
92 subs r1, #8 @ Counter for loops
93 vabs.s32 q13, q13 @ vabs doesn't change the value of 0x80000000.
95 vmax.u32 q11, q13 @ Use u32 so we don't lose the value 0x80000000.
97 bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32
99 @ Find the maximum value in the Neon registers and move it to r2.
102 vpmax.u32 d24, d24, d24
105 beq END_MAX_ABS_VALUE_W32
107 LOOP_MAX_ABS_VALUE_W32:
109 eor r12, r3, r3, asr #31 @ eor and then sub, to get absolute value.
110 sub r12, r12, r3, asr #31
114 bne LOOP_MAX_ABS_VALUE_W32
116 END_MAX_ABS_VALUE_W32:
117 mvn r0, #0x80000000 @ Guard against the case for 0x80000000.
124 @ int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, int length);
125 DEFINE_FUNCTION WebRtcSpl_MaxValueW16Neon
126 mov r2, #0x8000 @ Initialize the return value.
128 beq END_MAX_VALUE_W16
130 ble END_MAX_VALUE_W16
132 vmov.i16 q12, #0x8000
134 blt LOOP_MAX_VALUE_W16
136 sub r1, #8 @ Counter for loops
138 LOOP_UNROLLED_BY_8_MAX_VALUE_W16:
142 bge LOOP_UNROLLED_BY_8_MAX_VALUE_W16
144 @ Find the maximum value in the Neon registers and move it to r2.
146 vpmax.s16 d24, d24, d24
147 vpmax.s16 d24, d24, d24
150 beq END_MAX_VALUE_W16
157 bne LOOP_MAX_VALUE_W16
163 @ int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, int length);
164 DEFINE_FUNCTION WebRtcSpl_MaxValueW32Neon
165 mov r2, #0x80000000 @ Initialize the return value.
167 beq END_MAX_VALUE_W32
169 ble END_MAX_VALUE_W32
171 vmov.i32 q11, #0x80000000
172 vmov.i32 q12, #0x80000000
174 blt LOOP_MAX_VALUE_W32
176 sub r1, #8 @ Counter for loops
178 LOOP_UNROLLED_BY_8_MAX_VALUE_W32:
179 vld1.32 {q13, q14}, [r0]!
183 bge LOOP_UNROLLED_BY_8_MAX_VALUE_W32
185 @ Find the maximum value in the Neon registers and move it to r2.
187 vpmax.s32 d24, d24, d25
188 vpmax.s32 d24, d24, d24
191 beq END_MAX_VALUE_W32
198 bne LOOP_MAX_VALUE_W32
204 @ int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, int length);
205 DEFINE_FUNCTION WebRtcSpl_MinValueW16Neon
206 movw r2, #0x7FFF @ Initialize the return value.
208 beq END_MIN_VALUE_W16
210 ble END_MIN_VALUE_W16
214 blt LOOP_MIN_VALUE_W16
216 sub r1, #8 @ Counter for loops
218 LOOP_UNROLLED_BY_8_MIN_VALUE_W16:
222 bge LOOP_UNROLLED_BY_8_MIN_VALUE_W16
224 @ Find the maximum value in the Neon registers and move it to r2.
226 vpmin.s16 d24, d24, d24
227 vpmin.s16 d24, d24, d24
231 beq END_MIN_VALUE_W16
238 bne LOOP_MIN_VALUE_W16
244 @ int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, int length);
245 DEFINE_FUNCTION WebRtcSpl_MinValueW32Neon
246 mov r2, #0x7FFFFFFF @ Initialize the return value.
248 beq END_MIN_VALUE_W32
250 ble END_MIN_VALUE_W32
255 blt LOOP_MIN_VALUE_W32
257 sub r1, #8 @ Counter for loops
259 LOOP_UNROLLED_BY_8_MIN_VALUE_W32:
260 vld1.32 {q13, q14}, [r0]!
264 bge LOOP_UNROLLED_BY_8_MIN_VALUE_W32
266 @ Find the maximum value in the Neon registers and move it to r2.
268 vpmin.s32 d24, d24, d25
269 vpmin.s32 d24, d24, d24
272 beq END_MIN_VALUE_W32
279 bne LOOP_MIN_VALUE_W32