.end:
ret
- align 16
+ ALIGN 16
cident FLAC__lpc_compute_autocorrelation_asm_ia32_3dnow
;[ebp + 32] autoc
;[ebp + 28] lag
cmp esi, ecx
mov eax, esi
ja short .loop2_pre
- align 16 ;4 nops
+ ALIGN 16 ;4 nops
.loop1_i:
movd mm0, [eax]
movd mm2, [eax + 4]
punpckldq mm2, mm2
punpckldq mm4, mm4
punpckldq mm6, mm6
- align 16 ;3 nops
+ ALIGN 16 ;3 nops
.loop1_j:
sub ebx, byte 2
movd mm1, [eax + 4 * ebx]
ret
; WATCHOUT: this routine works on 16 bit data which means bits-per-sample for
-; the channel must be <= 16. Especially note that this routine cannot be used
-; for side-channel coded 16bps channels since the effective bps is 17.
+; the channel and qlp_coeffs must be <= 16. Especially note that this routine
+; cannot be used for side-channel coded 16bps channels since the effective bps
+; is 17.
ALIGN 16
cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx
;[esp + 40] residual[]
cmp eax, byte 4
jnbe short .mmx_4more
- align 16
+ ALIGN 16
.mmx_4_loop_i:
movd mm1, [esi]
movq mm3, mm4
neg eax
add eax, byte 16
- align 16
+ ALIGN 16
.mmx_4more_loop_i:
movd mm1, [esi]
punpckldq mm1, [esi + 4]
add ecx, eax
mov edx, esp
- align 16
+ ALIGN 16
.mmx_4more_loop_j:
movd mm0, [ecx - 16]
movd mm7, [ecx - 8]
ret
; WATCHOUT: this routine works on 16 bit data which means bits-per-sample for
-; the channel must be <= 16. Especially note that this routine cannot be used
-; for side-channel coded 16bps channels since the effective bps is 17.
+; the channel and qlp_coeffs must be <= 16. Especially note that this routine
+; cannot be used for side-channel coded 16bps channels since the effective bps
+; is 17.
; WATCHOUT: this routine requires that each data array have a buffer of up to
; 3 zeroes in front (at negative indices) for alignment purposes, i.e. for each
; channel n, data[n][-1] through data[n][-3] should be accessible and zero.
cmp eax, byte 4
jnbe short .mmx_4more
- align 16
+ ALIGN 16
.mmx_4_loop_i:
movq mm7, mm4
pmaddwd mm7, mm5
shl eax, 2
neg eax
add eax, byte 16
- align 16
+ ALIGN 16
.mmx_4more_loop_i:
mov ecx, edi
add ecx, eax
movq mm7, mm4
pmaddwd mm7, mm5
- align 16
+ ALIGN 16
.mmx_4more_loop_j:
movd mm0, [ecx - 16]
punpckldq mm0, [ecx - 12]