#ifndef WINDOWS_ABI
-#define STACKSIZE 64
+#define STACKSIZE 128
#define OLD_INCX 8 + STACKSIZE(%rsp)
#define OLD_Y 16 + STACKSIZE(%rsp)
#define OLD_INCY 24 + STACKSIZE(%rsp)
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
#define ALPHA 48 (%rsp)
-
+
+#define MMM 64(%rsp)
+#define NN 72(%rsp)
+#define AA 80(%rsp)
+#define XX 88(%rsp)
+#define LDAX 96(%rsp)
+#define ALPHAR 104(%rsp)
+#define ALPHAI 112(%rsp)
+
#define M %rdi
#define N %rsi
#define A %rcx
#else
-#define STACKSIZE 256
+#define STACKSIZE 288
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
#define OLD_A 48 + STACKSIZE(%rsp)
#define OLD_BUFFER 96 + STACKSIZE(%rsp)
#define ALPHA 224 (%rsp)
+#define MMM 232(%rsp)
+#define NN 240(%rsp)
+#define AA 248(%rsp)
+#define XX 256(%rsp)
+#define LDAX 264(%rsp)
+#define ALPHAR 272(%rsp)
+#define ALPHAI 280(%rsp)
+
#define M %rcx
#define N %rdx
#define A %r8
movaps %xmm3, %xmm0
movss OLD_ALPHA_I, %xmm1
#endif
+ movq A, AA
+ movq N, NN
+ movq M, MMM
+ movq LDA, LDAX
+ movq X, XX
+ movq OLD_Y, Y
+ movss %xmm0,ALPHAR
+ movss %xmm1,ALPHAI
+
+.L0t:
+ xorq I,I
+ addq $1,I
+ salq $20,I
+ subq I,MMM
+ movq I,M
+ movss ALPHAR,%xmm0
+ movss ALPHAI,%xmm1
+ jge .L00t
+
+ movq MMM,M
+ addq I,M
+ jle .L999x
+
+.L00t:
+ movq AA, A
+ movq NN, N
+ movq LDAX, LDA
+ movq XX, X
movq OLD_INCX, INCX
- movq OLD_Y, Y
+# movq OLD_Y, Y
movq OLD_INCY, INCY
movq OLD_BUFFER, BUFFER
ALIGN_3
.L999:
+ movq M, I
+ salq $ZBASE_SHIFT,I
+ addq I,AA
+ jmp .L0t
+.L999x:
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12
#ifndef WINDOWS_ABI
-#define STACKSIZE 64
+#define STACKSIZE 128
#define OLD_INCX 8 + STACKSIZE(%rsp)
#define OLD_Y 16 + STACKSIZE(%rsp)
#define OLD_INCY 24 + STACKSIZE(%rsp)
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
#define ALPHA 48 (%rsp)
+#define MMM 64(%rsp)
+#define NN 72(%rsp)
+#define AA 80(%rsp)
+#define LDAX 88(%rsp)
+#define ALPHAR 96(%rsp)
+#define ALPHAI 104(%rsp)
#define M %rdi
#define N %rsi
#else
-#define STACKSIZE 256
+#define STACKSIZE 288
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
#define OLD_A 48 + STACKSIZE(%rsp)
#define OLD_BUFFER 96 + STACKSIZE(%rsp)
#define ALPHA 224 (%rsp)
+#define MMM 232(%rsp)
+#define NN 240(%rsp)
+#define AA 248(%rsp)
+#define LDAX 256(%rsp)
+#define ALPHAR 264(%rsp)
+#define ALPHAI 272(%rsp)
+
#define M %rcx
#define N %rdx
#define A %r8
movss OLD_ALPHA_I, %xmm1
#endif
+ movq A, AA
+ movq N, NN
+ movq M, MMM
+ movq LDA, LDAX
+ movss %xmm0,ALPHAR
+ movss %xmm1,ALPHAI
+
+.L0t:
+ xorq I,I
+ addq $1,I
+ salq $20,I
+ subq I,MMM
+ movq I,M
+ movss ALPHAR,%xmm0
+ movss ALPHAI,%xmm1
+ jge .L00t
+
+ movq MMM,M
+ addq I,M
+ jle .L999x
+
+.L00t:
+ movq AA, A
+ movq NN, N
+ movq LDAX, LDA
+
movq OLD_INCX, INCX
movq OLD_Y, Y
movq OLD_INCY, INCY
ALIGN_3
.L999:
+ movq M, I
+ salq $ZBASE_SHIFT,I
+ addq I,AA
+ jmp .L0t
+.L999x:
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12
#ifndef WINDOWS_ABI
-#define STACKSIZE 64
+#define STACKSIZE 128
#define OLD_M %rdi
#define OLD_N %rsi
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
#define ALPHA 48 (%rsp)
+#define MMM 56(%rsp)
+#define NN 64(%rsp)
+#define AA 72(%rsp)
+#define LDAX 80(%rsp)
+#define XX 88(%rsp)
#else
#define STACKSIZE 256
movq OLD_LDA, LDA
#endif
- movq STACK_INCX, INCX
- movq STACK_Y, Y
- movq STACK_INCY, INCY
- movq STACK_BUFFER, BUFFER
-
#ifndef WINDOWS_ABI
movsd %xmm0, ALPHA
#else
movsd %xmm3, ALPHA
#endif
+ movq STACK_Y, Y
+ movq A,AA
+ movq N,NN
+ movq M,MMM
+ movq LDA,LDAX
+ movq X,XX
+
+.L0t:
+ xorq I,I
+ addq $1,I
+ salq $21,I
+ subq I,MMM
+ movq I,M
+ jge .L00t
+
+ movq MMM,M
+ addq I,M
+ jle .L999x
+
+.L00t:
+ movq XX,X
+ movq AA,A
+ movq NN,N
+ movq LDAX,LDA
+
+ movq STACK_INCX, INCX
+ movq STACK_INCY, INCY
+ movq STACK_BUFFER, BUFFER
+
+
leaq -1(INCY), %rax
leaq (,INCX, SIZE), INCX
ALIGN_3
.L999:
+ leaq (, M, SIZE), %rax
+ addq %rax,AA
+ jmp .L0t
+ ALIGN_4
+
+.L999x:
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12
#ifndef WINDOWS_ABI
-#define STACKSIZE 64
+#define STACKSIZE 128
#define OLD_M %rdi
#define OLD_N %rsi
#define STACK_INCY 24 + STACKSIZE(%rsp)
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
#define ALPHA 48 (%rsp)
-
+#define MMM 56(%rsp)
+#define NN 64(%rsp)
+#define AA 72(%rsp)
+#define LDAX 80(%rsp)
+#define XX 96(%rsp)
#else
-#define STACKSIZE 256
+#define STACKSIZE 288
#define OLD_M %rcx
#define OLD_N %rdx
#define STACK_BUFFER 88 + STACKSIZE(%rsp)
#define ALPHA 224 (%rsp)
+#define MMM 232(%rsp)
+#define NN 240(%rsp)
+#define AA 248(%rsp)
+#define LDAX 256(%rsp)
+#define XX 264(%rsp)
+#define
#endif
#define LDA %r8
movq OLD_LDA, LDA
#endif
- movq STACK_INCX, INCX
- movq STACK_Y, Y
- movq STACK_INCY, INCY
- movq STACK_BUFFER, BUFFER
-
#ifndef WINDOWS_ABI
movss %xmm0, ALPHA
#else
movss %xmm3, ALPHA
#endif
+
+ movq M,MMM
+ movq A,AA
+ movq N,NN
+ movq LDA,LDAX
+ movq X,XX
+ movq STACK_Y, Y
+.L0t:
+ xorq I,I
+ addq $1,I
+ salq $22,I
+ subq I,MMM
+ movq I,M
+ jge .L00t
+
+ movq MMM,M
+ addq I,M
+ jle .L999x
+
+.L00t:
+ movq AA,A
+ movq NN,N
+ movq LDAX,LDA
+ movq XX,X
+
+ movq STACK_INCX, INCX
+ movq STACK_INCY, INCY
+ movq STACK_BUFFER, BUFFER
+
leaq (,INCX, SIZE), INCX
leaq (,INCY, SIZE), INCY
leaq (,LDA, SIZE), LDA
ALIGN_3
.L999:
+ leaq (,M,SIZE),%rax
+ addq %rax,AA
+ jmp .L0t
+ ALIGN_4
+
+.L999x:
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12
#else
-#define STACKSIZE 256
+#define STACKSIZE 288
#define OLD_M %rcx
#define OLD_N %rdx
#define STACK_Y 72 + STACKSIZE(%rsp)
#define STACK_INCY 80 + STACKSIZE(%rsp)
#define STACK_BUFFER 88 + STACKSIZE(%rsp)
-#define MMM 216(%rsp)
-#define NN 224(%rsp)
-#define AA 232(%rsp)
-#define LDAX 240(%rsp)
+#define MMM 232(%rsp)
+#define NN 240(%rsp)
+#define AA 248(%rsp)
+#define LDAX 256(%rsp)
#endif
#ifndef WINDOWS_ABI
-#define STACKSIZE 64
+#define STACKSIZE 128
#define OLD_INCX 8 + STACKSIZE(%rsp)
#define OLD_Y 16 + STACKSIZE(%rsp)
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
#define ALPHA_R 48 (%rsp)
#define ALPHA_I 56 (%rsp)
-
+
+#define MMM 64(%rsp)
+#define NN 72(%rsp)
+#define AA 80(%rsp)
+#define XX 88(%rsp)
+#define LDAX 96(%rsp)
+#define ALPHAR 104(%rsp)
+#define ALPHAI 112(%rsp)
+
#define M %rdi
#define N %rsi
#define A %rcx
#else
-#define STACKSIZE 256
+#define STACKSIZE 288
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
#define OLD_A 48 + STACKSIZE(%rsp)
#define ALPHA_R 224 (%rsp)
#define ALPHA_I 232 (%rsp)
+#define MMM 232(%rsp)
+#define NN 240(%rsp)
+#define AA 248(%rsp)
+#define XX 256(%rsp)
+#define LDAX 264(%rsp)
+#define ALPHAR 272(%rsp)
+#define ALPHAI 280(%rsp)
+
#define M %rcx
#define N %rdx
#define A %r8
movsd OLD_ALPHA_I, %xmm1
#endif
- movq OLD_INCX, INCX
+ movq A, AA
+ movq N, NN
+ movq M, MMM
+ movq LDA, LDAX
+ movq X, XX
movq OLD_Y, Y
+ movsd %xmm0,ALPHAR
+ movsd %xmm1,ALPHAI
+
+.L0t:
+ xorq I,I
+ addq $1,I
+ salq $18,I
+ subq I,MMM
+ movq I,M
+ movsd ALPHAR,%xmm0
+ movsd ALPHAI,%xmm1
+ jge .L00t
+
+ movq MMM,M
+ addq I,M
+ jle .L999x
+
+.L00t:
+ movq AA, A
+ movq NN, N
+ movq LDAX, LDA
+ movq XX, X
+
+ movq OLD_INCX, INCX
+# movq OLD_Y, Y
movq OLD_INCY, INCY
movq OLD_BUFFER, BUFFER
ALIGN_3
.L999:
+ movq M, I
+ salq $ZBASE_SHIFT,I
+ addq I,AA
+ jmp .L0t
+.L999x:
+
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12
#ifndef WINDOWS_ABI
-#define STACKSIZE 64
+#define STACKSIZE 128
#define OLD_INCX 8 + STACKSIZE(%rsp)
#define OLD_Y 16 + STACKSIZE(%rsp)
#define OLD_INCY 24 + STACKSIZE(%rsp)
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
+#define MMM 64(%rsp)
+#define NN 72(%rsp)
+#define AA 80(%rsp)
+#define LDAX 88(%rsp)
+#define ALPHAR 96(%rsp)
+#define ALPHAI 104(%rsp)
+
#define M %rdi
#define N %rsi
#define A %rcx
#else
-#define STACKSIZE 256
+#define STACKSIZE 288
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
#define OLD_A 48 + STACKSIZE(%rsp)
#define OLD_INCY 88 + STACKSIZE(%rsp)
#define OLD_BUFFER 96 + STACKSIZE(%rsp)
+#define MMM 232(%rsp)
+#define NN 240(%rsp)
+#define AA 248(%rsp)
+#define LDAX 256(%rsp)
+#define ALPHAR 264(%rsp)
+#define ALPHAI 272(%rsp)
+
#define M %rcx
#define N %rdx
#define A %r8
movsd OLD_ALPHA_I, %xmm1
#endif
+ movq A, AA
+ movq N, NN
+ movq M, MMM
+ movq LDA, LDAX
+ movsd %xmm0,ALPHAR
+ movsd %xmm1,ALPHAI
+
+.L0t:
+ xorq I,I
+ addq $1,I
+ salq $19,I
+ subq I,MMM
+ movq I,M
+ movsd ALPHAR,%xmm0
+ movsd ALPHAI,%xmm1
+ jge .L00t
+
+ movq MMM,M
+ addq I,M
+ jle .L999x
+
+.L00t:
+ movq AA, A
+ movq NN, N
+ movq LDAX, LDA
+
movq OLD_INCX, INCX
movq OLD_Y, Y
movq OLD_INCY, INCY
ALIGN_3
.L999:
+ movq M, I
+ salq $ZBASE_SHIFT,I
+ addq I,AA
+ jmp .L0t
+.L999x:
+
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12