src/third_party/webrtc/common_audio/signal_processing/min_max_operations_neon.S

   1 @
   2 @ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
   3 @
   4 @ Use of this source code is governed by a BSD-style license
   5 @ that can be found in the LICENSE file in the root of the source
   6 @ tree. An additional intellectual property rights grant can be found
   7 @ in the file PATENTS.  All contributing project authors may
   8 @ be found in the AUTHORS file in the root of the source tree.
   9 @
  10
  11 @ This file contains some minimum and maximum functions, optimized for
  12 @ ARM Neon platform. The description header can be found in
  13 @ signal_processing_library.h
  14 @
  15 @ The reference C code is in file min_max_operations.c. Code here is basically
  16 @ a loop unrolling by 8 with Neon instructions. Bit-exact.
  17
  18 #include "webrtc/system_wrappers/interface/asm_defines.h"
  19
  20 GLOBAL_FUNCTION WebRtcSpl_MaxAbsValueW16Neon
  21 GLOBAL_FUNCTION WebRtcSpl_MaxAbsValueW32Neon
  22 GLOBAL_FUNCTION WebRtcSpl_MaxValueW16Neon
  23 GLOBAL_FUNCTION WebRtcSpl_MaxValueW32Neon
  24 GLOBAL_FUNCTION WebRtcSpl_MinValueW16Neon
  25 GLOBAL_FUNCTION WebRtcSpl_MinValueW32Neon
  26
  27 .align  2
  28 @ int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, int length);
  29 DEFINE_FUNCTION WebRtcSpl_MaxAbsValueW16Neon
  30   mov r2, #-1                 @ Initialize the return value.
  31   cmp r0, #0
  32   beq END_MAX_ABS_VALUE_W16
  33   cmp r1, #0
  34   ble END_MAX_ABS_VALUE_W16
  35
  36   cmp r1, #8
  37   blt LOOP_MAX_ABS_VALUE_W16
  38
  39   vmov.i16 q12, #0
  40   sub r1, #8                  @ Counter for loops
  41
  42 LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16:
  43   vld1.16 {q13}, [r0]!
  44   subs r1, #8
  45   vabs.s16 q13, q13           @ Note vabs doesn't change the value of -32768.
  46   vmax.u16 q12, q13           @ Use u16 so we don't lose the value -32768.
  47   bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16
  48
  49   @ Find the maximum value in the Neon registers and move it to r2.
  50   vmax.u16 d24, d25
  51   vpmax.u16 d24, d24, d24
  52   vpmax.u16 d24, d24, d24
  53   adds r1, #8
  54   vmov.u16 r2, d24[0]
  55   beq END_MAX_ABS_VALUE_W16
  56
  57 LOOP_MAX_ABS_VALUE_W16:
  58   ldrsh r3, [r0], #2
  59   eor r12, r3, r3, asr #31    @ eor and then sub, to get absolute value.
  60   sub r12, r12, r3, asr #31
  61   cmp r2, r12
  62   movlt r2, r12
  63   subs r1, #1
  64   bne LOOP_MAX_ABS_VALUE_W16
  65
  66 END_MAX_ABS_VALUE_W16:
  67   cmp r2, #0x8000             @ Guard against the case for -32768.
  68   subeq r2, #1
  69   mov r0, r2
  70   bx  lr
  71
  72
  73
  74 @ int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, int length);
  75 DEFINE_FUNCTION WebRtcSpl_MaxAbsValueW32Neon
  76   cmp r0, #0
  77   moveq r0, #-1
  78   beq EXIT                    @ Return -1 for a NULL pointer.
  79   cmp r1, #0                  @ length
  80   movle r0, #-1
  81   ble EXIT                    @ Return -1 if length <= 0.
  82
  83   vmov.i32 q11, #0
  84   vmov.i32 q12, #0
  85   cmp r1, #8
  86   blt LOOP_MAX_ABS_VALUE_W32
  87
  88   sub r1, #8                  @ Counter for loops
  89
  90 LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32:
  91   vld1.32 {q13, q14}, [r0]!
  92   subs r1, #8                 @ Counter for loops
  93   vabs.s32 q13, q13           @ vabs doesn't change the value of 0x80000000.
  94   vabs.s32 q14, q14
  95   vmax.u32 q11, q13           @ Use u32 so we don't lose the value 0x80000000.
  96   vmax.u32 q12, q14
  97   bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32
  98
  99   @ Find the maximum value in the Neon registers and move it to r2.
 100   vmax.u32 q12, q11
 101   vmax.u32 d24, d25
 102   vpmax.u32 d24, d24, d24
 103   adds r1, #8
 104   vmov.u32 r2, d24[0]
 105   beq END_MAX_ABS_VALUE_W32
 106
 107 LOOP_MAX_ABS_VALUE_W32:
 108   ldr r3, [r0], #4
 109   eor r12, r3, r3, asr #31    @ eor and then sub, to get absolute value.
 110   sub r12, r12, r3, asr #31
 111   cmp r2, r12
 112   movcc r2, r12
 113   subs r1, #1
 114   bne LOOP_MAX_ABS_VALUE_W32
 115
 116 END_MAX_ABS_VALUE_W32:
 117   mvn r0, #0x80000000         @ Guard against the case for 0x80000000.
 118   cmp r2, r0
 119   movcc r0, r2
 120
 121 EXIT:
 122   bx  lr
 123
 124 @ int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, int length);
 125 DEFINE_FUNCTION WebRtcSpl_MaxValueW16Neon
 126   mov r2, #0x8000             @ Initialize the return value.
 127   cmp r0, #0
 128   beq END_MAX_VALUE_W16
 129   cmp r1, #0
 130   ble END_MAX_VALUE_W16
 131
 132   vmov.i16 q12, #0x8000
 133   cmp r1, #8
 134   blt LOOP_MAX_VALUE_W16
 135
 136   sub r1, #8                  @ Counter for loops
 137
 138 LOOP_UNROLLED_BY_8_MAX_VALUE_W16:
 139   vld1.16 {q13}, [r0]!
 140   subs r1, #8
 141   vmax.s16 q12, q13
 142   bge LOOP_UNROLLED_BY_8_MAX_VALUE_W16
 143
 144   @ Find the maximum value in the Neon registers and move it to r2.
 145   vmax.s16 d24, d25
 146   vpmax.s16 d24, d24, d24
 147   vpmax.s16 d24, d24, d24
 148   adds r1, #8
 149   vmov.u16 r2, d24[0]
 150   beq END_MAX_VALUE_W16
 151
 152 LOOP_MAX_VALUE_W16:
 153   ldrsh r3, [r0], #2
 154   cmp r2, r3
 155   movlt r2, r3
 156   subs r1, #1
 157   bne LOOP_MAX_VALUE_W16
 158
 159 END_MAX_VALUE_W16:
 160   mov r0, r2
 161   bx  lr
 162
 163 @ int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, int length);
 164 DEFINE_FUNCTION WebRtcSpl_MaxValueW32Neon
 165   mov r2, #0x80000000         @ Initialize the return value.
 166   cmp r0, #0
 167   beq END_MAX_VALUE_W32
 168   cmp r1, #0
 169   ble END_MAX_VALUE_W32
 170
 171   vmov.i32 q11, #0x80000000
 172   vmov.i32 q12, #0x80000000
 173   cmp r1, #8
 174   blt LOOP_MAX_VALUE_W32
 175
 176   sub r1, #8                  @ Counter for loops
 177
 178 LOOP_UNROLLED_BY_8_MAX_VALUE_W32:
 179   vld1.32 {q13, q14}, [r0]!
 180   subs r1, #8
 181   vmax.s32 q11, q13
 182   vmax.s32 q12, q14
 183   bge LOOP_UNROLLED_BY_8_MAX_VALUE_W32
 184
 185   @ Find the maximum value in the Neon registers and move it to r2.
 186   vmax.s32 q12, q11
 187   vpmax.s32 d24, d24, d25
 188   vpmax.s32 d24, d24, d24
 189   adds r1, #8
 190   vmov.s32 r2, d24[0]
 191   beq END_MAX_VALUE_W32
 192
 193 LOOP_MAX_VALUE_W32:
 194   ldr r3, [r0], #4
 195   cmp r2, r3
 196   movlt r2, r3
 197   subs r1, #1
 198   bne LOOP_MAX_VALUE_W32
 199
 200 END_MAX_VALUE_W32:
 201   mov r0, r2
 202   bx  lr
 203
 204 @ int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, int length);
 205 DEFINE_FUNCTION WebRtcSpl_MinValueW16Neon
 206   movw r2, #0x7FFF            @ Initialize the return value.
 207   cmp r0, #0
 208   beq END_MIN_VALUE_W16
 209   cmp r1, #0
 210   ble END_MIN_VALUE_W16
 211
 212   vdup.16 q12, r2
 213   cmp r1, #8
 214   blt LOOP_MIN_VALUE_W16
 215
 216   sub r1, #8                  @ Counter for loops
 217
 218 LOOP_UNROLLED_BY_8_MIN_VALUE_W16:
 219   vld1.16 {q13}, [r0]!
 220   subs r1, #8
 221   vmin.s16 q12, q13
 222   bge LOOP_UNROLLED_BY_8_MIN_VALUE_W16
 223
 224   @ Find the maximum value in the Neon registers and move it to r2.
 225   vmin.s16 d24, d25
 226   vpmin.s16 d24, d24, d24
 227   vpmin.s16 d24, d24, d24
 228   adds r1, #8
 229   vmov.s16 r2, d24[0]
 230   sxth  r2, r2
 231   beq END_MIN_VALUE_W16
 232
 233 LOOP_MIN_VALUE_W16:
 234   ldrsh r3, [r0], #2
 235   cmp r2, r3
 236   movge r2, r3
 237   subs r1, #1
 238   bne LOOP_MIN_VALUE_W16
 239
 240 END_MIN_VALUE_W16:
 241   mov r0, r2
 242   bx  lr
 243
 244 @ int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, int length);
 245 DEFINE_FUNCTION WebRtcSpl_MinValueW32Neon
 246   mov r2, #0x7FFFFFFF         @ Initialize the return value.
 247   cmp r0, #0
 248   beq END_MIN_VALUE_W32
 249   cmp r1, #0
 250   ble END_MIN_VALUE_W32
 251
 252   vdup.32 q11, r2
 253   vdup.32 q12, r2
 254   cmp r1, #8
 255   blt LOOP_MIN_VALUE_W32
 256
 257   sub r1, #8                  @ Counter for loops
 258
 259 LOOP_UNROLLED_BY_8_MIN_VALUE_W32:
 260   vld1.32 {q13, q14}, [r0]!
 261   subs r1, #8
 262   vmin.s32 q11, q13
 263   vmin.s32 q12, q14
 264   bge LOOP_UNROLLED_BY_8_MIN_VALUE_W32
 265
 266   @ Find the maximum value in the Neon registers and move it to r2.
 267   vmin.s32 q12, q11
 268   vpmin.s32 d24, d24, d25
 269   vpmin.s32 d24, d24, d24
 270   adds r1, #8
 271   vmov.s32 r2, d24[0]
 272   beq END_MIN_VALUE_W32
 273
 274 LOOP_MIN_VALUE_W32:
 275   ldr r3, [r0], #4
 276   cmp r2, r3
 277   movge r2, r3
 278   subs r1, #1
 279   bne LOOP_MIN_VALUE_W32
 280
 281 END_MIN_VALUE_W32:
 282   mov r0, r2
 283   bx  lr