From: Zhang Xianyi Date: Mon, 19 Nov 2012 14:32:27 +0000 (+0800) Subject: Refs #154. Fixed a SEGFAULT bug of dgemv_t when m is very large. X-Git-Tag: v0.2.9.rc1~142 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=5f0117385e1d4f986ad75fa66b873b014a7792c2;p=platform%2Fupstream%2Fopenblas.git Refs #154. Fixed a SEGFAULT bug of dgemv_t when m is very large. It overflowed the internal buffer. Thus, we split vector x into blocks when m is very large. Thank @wangqian for this patch. --- diff --git a/kernel/x86_64/dgemv_t.S b/kernel/x86_64/dgemv_t.S index 0719207..02601be 100644 --- a/kernel/x86_64/dgemv_t.S +++ b/kernel/x86_64/dgemv_t.S @@ -47,7 +47,7 @@ #ifndef WINDOWS_ABI -#define STACKSIZE 64 +#define STACKSIZE 128 #define OLD_M %rdi #define OLD_N %rsi @@ -57,7 +57,10 @@ #define STACK_Y 16 + STACKSIZE(%rsp) #define STACK_INCY 24 + STACKSIZE(%rsp) #define STACK_BUFFER 32 + STACKSIZE(%rsp) - +#define MMM 56(%rsp) +#define NN 64(%rsp) +#define AA 72(%rsp) +#define LDAX 80(%rsp) #else #define STACKSIZE 256 @@ -132,12 +135,44 @@ movq OLD_LDA, LDA movq OLD_X, X #else - movq OLD_M, M - movq OLD_N, N - movq OLD_A, A - movq OLD_LDA, LDA + movq OLD_M, MMM + movq OLD_N, NN + movq OLD_A, AA + movq OLD_LDA, LDAX +#endif +#ifdef HAVE_SSE3 +#ifndef WINDOWS_ABI + movddup %xmm0, ALPHA +#else + movddup %xmm3, ALPHA #endif +#else +#ifndef WINDOWS_ABI + movapd %xmm0, ALPHA +#else + movapd %xmm3, ALPHA +#endif + unpcklpd ALPHA, ALPHA +#endif + + +.L0x: + xorq M,M + addq $1,M + salq $22,M + subq M,MMM + jge .L00 + + movq MMM,%rax + addq M,%rax + jle .L999x + movq %rax,M + +.L00: + movq LDAX,LDA + movq NN,N + movq AA,A movq STACK_INCX, INCX movq STACK_Y, Y movq STACK_INCY, INCY @@ -153,21 +188,6 @@ subq $-16 * SIZE, A -#ifdef HAVE_SSE3 -#ifndef WINDOWS_ABI - movddup %xmm0, ALPHA -#else - movddup %xmm3, ALPHA -#endif -#else -#ifndef WINDOWS_ABI - movapd %xmm0, ALPHA -#else - movapd %xmm3, ALPHA -#endif - unpcklpd ALPHA, ALPHA -#endif - testq M, M jle .L999 testq N, N @@ -854,7 +874,6 @@ .L21: #endif - subq $4, N leaq 16 * SIZE(BUFFER), X1 @@ -2461,6 +2480,12 @@ ALIGN_4 .L999: + leaq (, M, SIZE), %rax + addq %rax,AA + jmp .L0x; + ALIGN_4 + +.L999x: movq 0(%rsp), %rbx movq 8(%rsp), %rbp movq 16(%rsp), %r12