Some calculations were changed in
b6a3849 to use mmsize, which was not correct
for the AVX version, which uses INIT_YMM and therefore has mmsize == 32.
Fixes Bug 341.
Signed-off-by: Justin Ruggles <justin.ruggles@gmail.com>
push rrevtab
%endif
- sub r3, mmsize/4
+%if mmsize == 8
+ sub r3, 2
+%else
+ sub r3, 4
+%endif
%if ARCH_X86_64 || mmsize == 8
xor r4, r4
sub r4, r3
mova [r1+r5*8], m0
mova [r1+r6*8], m2
add r4, 2
-%elif ARCH_X86_64
+ sub r4, 2
+%else
+%if ARCH_X86_64
movzx r5, word [rrevtab+r4-4]
movzx r6, word [rrevtab+r4-2]
movzx r10, word [rrevtab+r3]
movlps [r1+r5*8], xmm1
movhps [r1+r4*8], xmm1
%endif
- sub r3, mmsize/4
+ sub r3, 4
+%endif
jns .pre
mov r5, r0