#undef movsd
#ifndef OPTERON
-#define movlps movsd
+#define MOVLPS movsd
+#else
+#define MOVLPS movlps
#endif
PROLOGUE
sarl $3, %eax
jle .L25
- movlps -16 * SIZE(X), %xmm4
+ MOVLPS -16 * SIZE(X), %xmm4
movhps -15 * SIZE(X), %xmm4
movaps -16 * SIZE(Y), %xmm6
- movlps -14 * SIZE(X), %xmm5
+ MOVLPS -14 * SIZE(X), %xmm5
movhps -13 * SIZE(X), %xmm5
movaps -14 * SIZE(Y), %xmm7
addpd %xmm6, %xmm0
movaps -12 * SIZE(Y), %xmm6
mulpd %xmm4, %xmm3
- movlps -12 * SIZE(X), %xmm4
+ MOVLPS -12 * SIZE(X), %xmm4
movhps -11 * SIZE(X), %xmm4
addpd %xmm3, %xmm1
addpd %xmm7, %xmm0
movaps -10 * SIZE(Y), %xmm7
mulpd %xmm5, %xmm3
- movlps -10 * SIZE(X), %xmm5
+ MOVLPS -10 * SIZE(X), %xmm5
movhps -9 * SIZE(X), %xmm5
addpd %xmm3, %xmm1
addpd %xmm6, %xmm0
movaps -8 * SIZE(Y), %xmm6
mulpd %xmm4, %xmm3
- movlps -8 * SIZE(X), %xmm4
+ MOVLPS -8 * SIZE(X), %xmm4
movhps -7 * SIZE(X), %xmm4
addpd %xmm3, %xmm1
addpd %xmm7, %xmm0
movaps -6 * SIZE(Y), %xmm7
mulpd %xmm5, %xmm3
- movlps -6 * SIZE(X), %xmm5
+ MOVLPS -6 * SIZE(X), %xmm5
movhps -5 * SIZE(X), %xmm5
addpd %xmm3, %xmm1
addpd %xmm6, %xmm0
movaps -4 * SIZE(Y), %xmm6
mulpd %xmm4, %xmm3
- movlps -4 * SIZE(X), %xmm4
+ MOVLPS -4 * SIZE(X), %xmm4
movhps -3 * SIZE(X), %xmm4
addpd %xmm3, %xmm1
addpd %xmm7, %xmm0
movaps -2 * SIZE(Y), %xmm7
mulpd %xmm5, %xmm3
- movlps -2 * SIZE(X), %xmm5
+ MOVLPS -2 * SIZE(X), %xmm5
movhps -1 * SIZE(X), %xmm5
addpd %xmm3, %xmm1
addpd %xmm6, %xmm0
movaps 0 * SIZE(Y), %xmm6
mulpd %xmm4, %xmm3
- movlps 0 * SIZE(X), %xmm4
+ MOVLPS 0 * SIZE(X), %xmm4
movhps 1 * SIZE(X), %xmm4
addpd %xmm3, %xmm1
addpd %xmm7, %xmm0
movaps 2 * SIZE(Y), %xmm7
mulpd %xmm5, %xmm3
- movlps 2 * SIZE(X), %xmm5
+ MOVLPS 2 * SIZE(X), %xmm5
movhps 3 * SIZE(X), %xmm5
addpd %xmm3, %xmm1
addpd %xmm6, %xmm0
movaps -12 * SIZE(Y), %xmm6
mulpd %xmm4, %xmm3
- movlps -12 * SIZE(X), %xmm4
+ MOVLPS -12 * SIZE(X), %xmm4
movhps -11 * SIZE(X), %xmm4
addpd %xmm3, %xmm1
addpd %xmm7, %xmm0
movaps -10 * SIZE(Y), %xmm7
mulpd %xmm5, %xmm3
- movlps -10 * SIZE(X), %xmm5
+ MOVLPS -10 * SIZE(X), %xmm5
movhps -9 * SIZE(X), %xmm5
addpd %xmm3, %xmm1
addpd %xmm6, %xmm0
movaps -8 * SIZE(Y), %xmm6
mulpd %xmm4, %xmm3
- movlps -8 * SIZE(X), %xmm4
+ MOVLPS -8 * SIZE(X), %xmm4
movhps -7 * SIZE(X), %xmm4
addpd %xmm3, %xmm1
addpd %xmm7, %xmm0
movaps -6 * SIZE(Y), %xmm7
mulpd %xmm5, %xmm3
- movlps -6 * SIZE(X), %xmm5
+ MOVLPS -6 * SIZE(X), %xmm5
movhps -5 * SIZE(X), %xmm5
addpd %xmm3, %xmm1
addpd %xmm6, %xmm0
movaps -4 * SIZE(Y), %xmm6
mulpd %xmm4, %xmm3
- movlps -4 * SIZE(X), %xmm4
+ MOVLPS -4 * SIZE(X), %xmm4
movhps -3 * SIZE(X), %xmm4
addpd %xmm3, %xmm1
addpd %xmm7, %xmm0
movaps -2 * SIZE(Y), %xmm7
mulpd %xmm5, %xmm3
- movlps -2 * SIZE(X), %xmm5
+ MOVLPS -2 * SIZE(X), %xmm5
movhps -1 * SIZE(X), %xmm5
addpd %xmm3, %xmm1
testl $4, N
jle .L26
- movlps -16 * SIZE(X), %xmm4
+ MOVLPS -16 * SIZE(X), %xmm4
movhps -15 * SIZE(X), %xmm4
movaps -16 * SIZE(Y), %xmm6
- movlps -14 * SIZE(X), %xmm5
+ MOVLPS -14 * SIZE(X), %xmm5
movhps -13 * SIZE(X), %xmm5
movaps -14 * SIZE(Y), %xmm7
addpd %xmm6, %xmm0
movaps -12 * SIZE(Y), %xmm6
mulpd %xmm4, %xmm3
- movlps -12 * SIZE(X), %xmm4
+ MOVLPS -12 * SIZE(X), %xmm4
movhps -11 * SIZE(X), %xmm4
addpd %xmm3, %xmm1
addpd %xmm7, %xmm0
movaps -10 * SIZE(Y), %xmm7
mulpd %xmm5, %xmm3
- movlps -10 * SIZE(X), %xmm5
+ MOVLPS -10 * SIZE(X), %xmm5
movhps -9 * SIZE(X), %xmm5
addpd %xmm3, %xmm1
testl $2, N
jle .L27
- movlps -16 * SIZE(X), %xmm4
+ MOVLPS -16 * SIZE(X), %xmm4
movhps -15 * SIZE(X), %xmm4
movaps -16 * SIZE(Y), %xmm6
mulpd %xmm4, %xmm3
addpd %xmm3, %xmm1
- movlps -14 * SIZE(X), %xmm5
+ MOVLPS -14 * SIZE(X), %xmm5
movhps -13 * SIZE(X), %xmm5
movaps -14 * SIZE(Y), %xmm7
testl $1, N
jle .L98
- movlps -16 * SIZE(X), %xmm4
+ MOVLPS -16 * SIZE(X), %xmm4
movhps -15 * SIZE(X), %xmm4
movaps -16 * SIZE(Y), %xmm6
sarl $3, %eax
jle .L35
- movlps -16 * SIZE(Y), %xmm4
+ MOVLPS -16 * SIZE(Y), %xmm4
movhps -15 * SIZE(Y), %xmm4
movaps -16 * SIZE(X), %xmm6
- movlps -14 * SIZE(Y), %xmm5
+ MOVLPS -14 * SIZE(Y), %xmm5
movhps -13 * SIZE(Y), %xmm5
movaps -14 * SIZE(X), %xmm7
addpd %xmm6, %xmm0
movaps -12 * SIZE(X), %xmm6
mulpd %xmm4, %xmm3
- movlps -12 * SIZE(Y), %xmm4
+ MOVLPS -12 * SIZE(Y), %xmm4
movhps -11 * SIZE(Y), %xmm4
addpd %xmm3, %xmm1
addpd %xmm7, %xmm0
movaps -10 * SIZE(X), %xmm7
mulpd %xmm5, %xmm3
- movlps -10 * SIZE(Y), %xmm5
+ MOVLPS -10 * SIZE(Y), %xmm5
movhps -9 * SIZE(Y), %xmm5
addpd %xmm3, %xmm1
addpd %xmm6, %xmm0
movaps -8 * SIZE(X), %xmm6
mulpd %xmm4, %xmm3
- movlps -8 * SIZE(Y), %xmm4
+ MOVLPS -8 * SIZE(Y), %xmm4
movhps -7 * SIZE(Y), %xmm4
addpd %xmm3, %xmm1
addpd %xmm7, %xmm0
movaps -6 * SIZE(X), %xmm7
mulpd %xmm5, %xmm3
- movlps -6 * SIZE(Y), %xmm5
+ MOVLPS -6 * SIZE(Y), %xmm5
movhps -5 * SIZE(Y), %xmm5
addpd %xmm3, %xmm1
addpd %xmm6, %xmm0
movaps -4 * SIZE(X), %xmm6
mulpd %xmm4, %xmm3
- movlps -4 * SIZE(Y), %xmm4
+ MOVLPS -4 * SIZE(Y), %xmm4
movhps -3 * SIZE(Y), %xmm4
addpd %xmm3, %xmm1
addpd %xmm7, %xmm0
movaps -2 * SIZE(X), %xmm7
mulpd %xmm5, %xmm3
- movlps -2 * SIZE(Y), %xmm5
+ MOVLPS -2 * SIZE(Y), %xmm5
movhps -1 * SIZE(Y), %xmm5
addpd %xmm3, %xmm1
addpd %xmm6, %xmm0
movaps 0 * SIZE(X), %xmm6
mulpd %xmm4, %xmm3
- movlps 0 * SIZE(Y), %xmm4
+ MOVLPS 0 * SIZE(Y), %xmm4
movhps 1 * SIZE(Y), %xmm4
addpd %xmm3, %xmm1
addpd %xmm7, %xmm0
movaps 2 * SIZE(X), %xmm7
mulpd %xmm5, %xmm3
- movlps 2 * SIZE(Y), %xmm5
+ MOVLPS 2 * SIZE(Y), %xmm5
movhps 3 * SIZE(Y), %xmm5
addpd %xmm3, %xmm1
addpd %xmm6, %xmm0
movaps -12 * SIZE(X), %xmm6
mulpd %xmm4, %xmm3
- movlps -12 * SIZE(Y), %xmm4
+ MOVLPS -12 * SIZE(Y), %xmm4
movhps -11 * SIZE(Y), %xmm4
addpd %xmm3, %xmm1
addpd %xmm7, %xmm0
movaps -10 * SIZE(X), %xmm7
mulpd %xmm5, %xmm3
- movlps -10 * SIZE(Y), %xmm5
+ MOVLPS -10 * SIZE(Y), %xmm5
movhps -9 * SIZE(Y), %xmm5
addpd %xmm3, %xmm1
addpd %xmm6, %xmm0
movaps -8 * SIZE(X), %xmm6
mulpd %xmm4, %xmm3
- movlps -8 * SIZE(Y), %xmm4
+ MOVLPS -8 * SIZE(Y), %xmm4
movhps -7 * SIZE(Y), %xmm4
addpd %xmm3, %xmm1
addpd %xmm7, %xmm0
movaps -6 * SIZE(X), %xmm7
mulpd %xmm5, %xmm3
- movlps -6 * SIZE(Y), %xmm5
+ MOVLPS -6 * SIZE(Y), %xmm5
movhps -5 * SIZE(Y), %xmm5
addpd %xmm3, %xmm1
addpd %xmm6, %xmm0
movaps -4 * SIZE(X), %xmm6
mulpd %xmm4, %xmm3
- movlps -4 * SIZE(Y), %xmm4
+ MOVLPS -4 * SIZE(Y), %xmm4
movhps -3 * SIZE(Y), %xmm4
addpd %xmm3, %xmm1
addpd %xmm7, %xmm0
movaps -2 * SIZE(X), %xmm7
mulpd %xmm5, %xmm3
- movlps -2 * SIZE(Y), %xmm5
+ MOVLPS -2 * SIZE(Y), %xmm5
movhps -1 * SIZE(Y), %xmm5
addpd %xmm3, %xmm1
testl $4, N
jle .L36
- movlps -16 * SIZE(Y), %xmm4
+ MOVLPS -16 * SIZE(Y), %xmm4
movhps -15 * SIZE(Y), %xmm4
movaps -16 * SIZE(X), %xmm6
- movlps -14 * SIZE(Y), %xmm5
+ MOVLPS -14 * SIZE(Y), %xmm5
movhps -13 * SIZE(Y), %xmm5
movaps -14 * SIZE(X), %xmm7
addpd %xmm6, %xmm0
movaps -12 * SIZE(X), %xmm6
mulpd %xmm4, %xmm3
- movlps -12 * SIZE(Y), %xmm4
+ MOVLPS -12 * SIZE(Y), %xmm4
movhps -11 * SIZE(Y), %xmm4
addpd %xmm3, %xmm1
addpd %xmm7, %xmm0
movaps -10 * SIZE(X), %xmm7
mulpd %xmm5, %xmm3
- movlps -10 * SIZE(Y), %xmm5
+ MOVLPS -10 * SIZE(Y), %xmm5
movhps -9 * SIZE(Y), %xmm5
addpd %xmm3, %xmm1
testl $2, N
jle .L37
- movlps -16 * SIZE(Y), %xmm4
+ MOVLPS -16 * SIZE(Y), %xmm4
movhps -15 * SIZE(Y), %xmm4
movaps -16 * SIZE(X), %xmm6
mulpd %xmm4, %xmm3
addpd %xmm3, %xmm1
- movlps -14 * SIZE(Y), %xmm5
+ MOVLPS -14 * SIZE(Y), %xmm5
movhps -13 * SIZE(Y), %xmm5
movaps -14 * SIZE(X), %xmm7
testl $1, N
jle .L98
- movlps -16 * SIZE(Y), %xmm4
+ MOVLPS -16 * SIZE(Y), %xmm4
movhps -15 * SIZE(Y), %xmm4
movaps -16 * SIZE(X), %xmm6
sarl $3, %eax
jle .L55
- movlps 0 * SIZE(X), %xmm4
+ MOVLPS 0 * SIZE(X), %xmm4
movhps 1 * SIZE(X), %xmm4
addl INCX, X
- movlps 0 * SIZE(Y), %xmm6
+ MOVLPS 0 * SIZE(Y), %xmm6
movhps 1 * SIZE(Y), %xmm6
addl INCY, Y
- movlps 0 * SIZE(X), %xmm5
+ MOVLPS 0 * SIZE(X), %xmm5
movhps 1 * SIZE(X), %xmm5
addl INCX, X
- movlps 0 * SIZE(Y), %xmm7
+ MOVLPS 0 * SIZE(Y), %xmm7
movhps 1 * SIZE(Y), %xmm7
addl INCY, Y
pshufd $0x4e, %xmm6, %xmm3
mulpd %xmm4, %xmm6
addpd %xmm6, %xmm0
- movlps 0 * SIZE(Y), %xmm6
+ MOVLPS 0 * SIZE(Y), %xmm6
movhps 1 * SIZE(Y), %xmm6
addl INCY, Y
mulpd %xmm4, %xmm3
- movlps 0 * SIZE(X), %xmm4
+ MOVLPS 0 * SIZE(X), %xmm4
movhps 1 * SIZE(X), %xmm4
addl INCX, X
addpd %xmm3, %xmm1
pshufd $0x4e, %xmm7, %xmm3
mulpd %xmm5, %xmm7
addpd %xmm7, %xmm0
- movlps 0 * SIZE(Y), %xmm7
+ MOVLPS 0 * SIZE(Y), %xmm7
movhps 1 * SIZE(Y), %xmm7
addl INCY, Y
mulpd %xmm5, %xmm3
- movlps 0 * SIZE(X), %xmm5
+ MOVLPS 0 * SIZE(X), %xmm5
movhps 1 * SIZE(X), %xmm5
addl INCX, X
addpd %xmm3, %xmm1
pshufd $0x4e, %xmm6, %xmm3
mulpd %xmm4, %xmm6
addpd %xmm6, %xmm0
- movlps 0 * SIZE(Y), %xmm6
+ MOVLPS 0 * SIZE(Y), %xmm6
movhps 1 * SIZE(Y), %xmm6
addl INCY, Y
mulpd %xmm4, %xmm3
- movlps 0 * SIZE(X), %xmm4
+ MOVLPS 0 * SIZE(X), %xmm4
movhps 1 * SIZE(X), %xmm4
addl INCX, X
addpd %xmm3, %xmm1
pshufd $0x4e, %xmm7, %xmm3
mulpd %xmm5, %xmm7
addpd %xmm7, %xmm0
- movlps 0 * SIZE(Y), %xmm7
+ MOVLPS 0 * SIZE(Y), %xmm7
movhps 1 * SIZE(Y), %xmm7
addl INCY, Y
mulpd %xmm5, %xmm3
- movlps 0 * SIZE(X), %xmm5
+ MOVLPS 0 * SIZE(X), %xmm5
movhps 1 * SIZE(X), %xmm5
addl INCX, X
addpd %xmm3, %xmm1
pshufd $0x4e, %xmm6, %xmm3
mulpd %xmm4, %xmm6
addpd %xmm6, %xmm0
- movlps 0 * SIZE(Y), %xmm6
+ MOVLPS 0 * SIZE(Y), %xmm6
movhps 1 * SIZE(Y), %xmm6
addl INCY, Y
mulpd %xmm4, %xmm3
- movlps 0 * SIZE(X), %xmm4
+ MOVLPS 0 * SIZE(X), %xmm4
movhps 1 * SIZE(X), %xmm4
addl INCX, X
addpd %xmm3, %xmm1
pshufd $0x4e, %xmm7, %xmm3
mulpd %xmm5, %xmm7
addpd %xmm7, %xmm0
- movlps 0 * SIZE(Y), %xmm7
+ MOVLPS 0 * SIZE(Y), %xmm7
movhps 1 * SIZE(Y), %xmm7
addl INCY, Y
mulpd %xmm5, %xmm3
- movlps 0 * SIZE(X), %xmm5
+ MOVLPS 0 * SIZE(X), %xmm5
movhps 1 * SIZE(X), %xmm5
addl INCX, X
addpd %xmm3, %xmm1
pshufd $0x4e, %xmm6, %xmm3
mulpd %xmm4, %xmm6
addpd %xmm6, %xmm0
- movlps 0 * SIZE(Y), %xmm6
+ MOVLPS 0 * SIZE(Y), %xmm6
movhps 1 * SIZE(Y), %xmm6
addl INCY, Y
mulpd %xmm4, %xmm3
- movlps 0 * SIZE(X), %xmm4
+ MOVLPS 0 * SIZE(X), %xmm4
movhps 1 * SIZE(X), %xmm4
addl INCX, X
addpd %xmm3, %xmm1
pshufd $0x4e, %xmm7, %xmm3
mulpd %xmm5, %xmm7
addpd %xmm7, %xmm0
- movlps 0 * SIZE(Y), %xmm7
+ MOVLPS 0 * SIZE(Y), %xmm7
movhps 1 * SIZE(Y), %xmm7
addl INCY, Y
mulpd %xmm5, %xmm3
- movlps 0 * SIZE(X), %xmm5
+ MOVLPS 0 * SIZE(X), %xmm5
movhps 1 * SIZE(X), %xmm5
addl INCX, X
addpd %xmm3, %xmm1
pshufd $0x4e, %xmm6, %xmm3
mulpd %xmm4, %xmm6
addpd %xmm6, %xmm0
- movlps 0 * SIZE(Y), %xmm6
+ MOVLPS 0 * SIZE(Y), %xmm6
movhps 1 * SIZE(Y), %xmm6
addl INCY, Y
mulpd %xmm4, %xmm3
- movlps 0 * SIZE(X), %xmm4
+ MOVLPS 0 * SIZE(X), %xmm4
movhps 1 * SIZE(X), %xmm4
addl INCX, X
addpd %xmm3, %xmm1
pshufd $0x4e, %xmm7, %xmm3
mulpd %xmm5, %xmm7
addpd %xmm7, %xmm0
- movlps 0 * SIZE(Y), %xmm7
+ MOVLPS 0 * SIZE(Y), %xmm7
movhps 1 * SIZE(Y), %xmm7
addl INCY, Y
mulpd %xmm5, %xmm3
- movlps 0 * SIZE(X), %xmm5
+ MOVLPS 0 * SIZE(X), %xmm5
movhps 1 * SIZE(X), %xmm5
addl INCX, X
addpd %xmm3, %xmm1
pshufd $0x4e, %xmm6, %xmm3
mulpd %xmm4, %xmm6
addpd %xmm6, %xmm0
- movlps 0 * SIZE(Y), %xmm6
+ MOVLPS 0 * SIZE(Y), %xmm6
movhps 1 * SIZE(Y), %xmm6
addl INCY, Y
mulpd %xmm4, %xmm3
- movlps 0 * SIZE(X), %xmm4
+ MOVLPS 0 * SIZE(X), %xmm4
movhps 1 * SIZE(X), %xmm4
addl INCX, X
addpd %xmm3, %xmm1
pshufd $0x4e, %xmm7, %xmm3
mulpd %xmm5, %xmm7
addpd %xmm7, %xmm0
- movlps 0 * SIZE(Y), %xmm7
+ MOVLPS 0 * SIZE(Y), %xmm7
movhps 1 * SIZE(Y), %xmm7
addl INCY, Y
mulpd %xmm5, %xmm3
- movlps 0 * SIZE(X), %xmm5
+ MOVLPS 0 * SIZE(X), %xmm5
movhps 1 * SIZE(X), %xmm5
addl INCX, X
addpd %xmm3, %xmm1
pshufd $0x4e, %xmm6, %xmm3
mulpd %xmm4, %xmm6
addpd %xmm6, %xmm0
- movlps 0 * SIZE(Y), %xmm6
+ MOVLPS 0 * SIZE(Y), %xmm6
movhps 1 * SIZE(Y), %xmm6
addl INCY, Y
mulpd %xmm4, %xmm3
- movlps 0 * SIZE(X), %xmm4
+ MOVLPS 0 * SIZE(X), %xmm4
movhps 1 * SIZE(X), %xmm4
addl INCX, X
addpd %xmm3, %xmm1
pshufd $0x4e, %xmm7, %xmm3
mulpd %xmm5, %xmm7
addpd %xmm7, %xmm0
- movlps 0 * SIZE(Y), %xmm7
+ MOVLPS 0 * SIZE(Y), %xmm7
movhps 1 * SIZE(Y), %xmm7
addl INCY, Y
mulpd %xmm5, %xmm3
- movlps 0 * SIZE(X), %xmm5
+ MOVLPS 0 * SIZE(X), %xmm5
movhps 1 * SIZE(X), %xmm5
addl INCX, X
addpd %xmm3, %xmm1
testl $4, N
jle .L56
- movlps 0 * SIZE(X), %xmm4
+ MOVLPS 0 * SIZE(X), %xmm4
movhps 1 * SIZE(X), %xmm4
addl INCX, X
- movlps 0 * SIZE(Y), %xmm6
+ MOVLPS 0 * SIZE(Y), %xmm6
movhps 1 * SIZE(Y), %xmm6
addl INCY, Y
- movlps 0 * SIZE(X), %xmm5
+ MOVLPS 0 * SIZE(X), %xmm5
movhps 1 * SIZE(X), %xmm5
addl INCX, X
- movlps 0 * SIZE(Y), %xmm7
+ MOVLPS 0 * SIZE(Y), %xmm7
movhps 1 * SIZE(Y), %xmm7
addl INCY, Y
pshufd $0x4e, %xmm6, %xmm3
mulpd %xmm4, %xmm6
addpd %xmm6, %xmm0
- movlps 0 * SIZE(Y), %xmm6
+ MOVLPS 0 * SIZE(Y), %xmm6
movhps 1 * SIZE(Y), %xmm6
addl INCY, Y
mulpd %xmm4, %xmm3
- movlps 0 * SIZE(X), %xmm4
+ MOVLPS 0 * SIZE(X), %xmm4
movhps 1 * SIZE(X), %xmm4
addl INCX, X
addpd %xmm3, %xmm1
pshufd $0x4e, %xmm7, %xmm3
mulpd %xmm5, %xmm7
addpd %xmm7, %xmm0
- movlps 0 * SIZE(Y), %xmm7
+ MOVLPS 0 * SIZE(Y), %xmm7
movhps 1 * SIZE(Y), %xmm7
addl INCY, Y
mulpd %xmm5, %xmm3
- movlps 0 * SIZE(X), %xmm5
+ MOVLPS 0 * SIZE(X), %xmm5
movhps 1 * SIZE(X), %xmm5
addl INCX, X
addpd %xmm3, %xmm1
testl $2, N
jle .L57
- movlps 0 * SIZE(X), %xmm4
+ MOVLPS 0 * SIZE(X), %xmm4
movhps 1 * SIZE(X), %xmm4
addl INCX, X
- movlps 0 * SIZE(Y), %xmm6
+ MOVLPS 0 * SIZE(Y), %xmm6
movhps 1 * SIZE(Y), %xmm6
addl INCY, Y
mulpd %xmm4, %xmm3
addpd %xmm3, %xmm1
- movlps 0 * SIZE(X), %xmm5
+ MOVLPS 0 * SIZE(X), %xmm5
movhps 1 * SIZE(X), %xmm5
addl INCX, X
- movlps 0 * SIZE(Y), %xmm7
+ MOVLPS 0 * SIZE(Y), %xmm7
movhps 1 * SIZE(Y), %xmm7
addl INCY, Y
testl $1, N
jle .L98
- movlps 0 * SIZE(X), %xmm4
+ MOVLPS 0 * SIZE(X), %xmm4
movhps 1 * SIZE(X), %xmm4
- movlps 0 * SIZE(Y), %xmm6
+ MOVLPS 0 * SIZE(Y), %xmm6
movhps 1 * SIZE(Y), %xmm6
pshufd $0x4e, %xmm6, %xmm3
.L999:
movl RESULT, %eax
- movlps %xmm0, 0 * SIZE(%eax)
- movlps %xmm1, 1 * SIZE(%eax)
+ MOVLPS %xmm0, 0 * SIZE(%eax)
+ MOVLPS %xmm1, 1 * SIZE(%eax)
popl %ebx
popl %esi