From 23e182ca7c7cbf3dae151d3d084c074078b075fa Mon Sep 17 00:00:00 2001 From: traz Date: Fri, 2 Sep 2011 15:28:01 +0000 Subject: [PATCH] Fix stack-pointer bug for strmm. --- kernel/mips64/sgemm_kernel_8x4_ps.S | 122 +++++++++++++++++------------------- 1 file changed, 58 insertions(+), 64 deletions(-) diff --git a/kernel/mips64/sgemm_kernel_8x4_ps.S b/kernel/mips64/sgemm_kernel_8x4_ps.S index 1b4dae8..9300254 100644 --- a/kernel/mips64/sgemm_kernel_8x4_ps.S +++ b/kernel/mips64/sgemm_kernel_8x4_ps.S @@ -3,7 +3,7 @@ #include "common.h" #define FETCH ld -#define STACKSIZE 192 +#define STACKSIZE 160 #define gsLQC1(base,fq,ft,offset) .word(0x32<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq) #define gsSQC1(base,fq,ft,offset) .word(0x3A<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq) @@ -127,7 +127,7 @@ # .ent gemm # .type gemm, @function #gemm: -# .frame $fp,STACKSIZE,$31 # vars= 48, regs= 1/0, args= 0, gp= 0 +# .frame $sp,STACKSIZE,$31 # vars= 48, regs= 1/0, args= 0, gp= 0 # .mask 0x40000000,-8 # .fmask 0x00000000,0 # .set noreorder @@ -137,34 +137,34 @@ PROLOGUE daddiu $sp,$sp,-STACKSIZE - sd $fp,184($sp) - move $fp,$sp - - sd $16, 0($fp) - sd $17, 8($fp) - sd $18, 16($fp) - sd $19, 24($fp) - sd $20, 32($fp) - sd $21, 40($fp) - sd $22, 48($fp) - - ST $f24, 56($fp) - ST $f25, 64($fp) - ST $f26, 72($fp) - ST $f27, 80($fp) - ST $f28, 88($fp) + + sd $16, 0($sp) + sd $17, 8($sp) + sd $18, 16($sp) + sd $19, 24($sp) + sd $20, 32($sp) + sd $21, 40($sp) + sd $22, 48($sp) + + ST $f24, 56($sp) + ST $f25, 64($sp) + ST $f26, 72($sp) + ST $f27, 80($sp) + ST $f28, 88($sp) #if defined(TRMMKERNEL) - sd $23, 96($fp) - sd $24, 104($fp) - sd $25, 112($fp) + sd $23, 96($sp) + sd $24, 104($sp) + sd $25, 112($sp) + + LDARG OFFSET, 160($sp) #endif #ifndef __64BIT__ - ST $f20,120($fp) - ST $f21,128($fp) - ST $f22,136($fp) - ST $f23,144($fp) + ST $f20,120($sp) + ST $f21,128($sp) + ST $f22,136($sp) + ST $f23,144($sp) #endif .align 4 @@ -172,16 +172,12 @@ dsra J, N, 2 # NR=4 dsll LDC, LDC, BASE_SHIFT# LDC*SIZE -#if defined(TRMMKERNEL) - LD OFFSET, 192($fp) -#endif - #if defined(TRMMKERNEL) && !defined(LEFT) neg KK, OFFSET #endif blez J, .L2 - ST ALPHA, 152($fp) + ST ALPHA, 152($sp) .L48: dsra I, M, 3 # MR=8 @@ -4670,7 +4666,7 @@ andi L, TEMP, 1 #endif blez L, .L480 - LD ALPHA, 152($fp) + LD ALPHA, 152($sp) MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 @@ -5273,7 +5269,7 @@ andi L, TEMP, 1 #endif blez L, .L440 - LD ALPHA, 152($fp) + LD ALPHA, 152($sp) MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 @@ -5653,7 +5649,7 @@ andi L, TEMP, 1 #endif blez L, .L420 - LD ALPHA, 152($fp) + LD ALPHA, 152($sp) MADPS C11, C11, A1, B1 MADPS C12, C12, A1, B2 @@ -5968,7 +5964,7 @@ andi L, TEMP, 1 #endif blez L, .L410 - LD ALPHA, 152($fp) + LD ALPHA, 152($sp) MADD C11, C11, A1, B1 MADD C12, C12, A1, B2 @@ -6258,7 +6254,7 @@ andi L, TEMP, 1 #endif blez L, .L280 - LD ALPHA, 152($fp) + LD ALPHA, 152($sp) MADD C13, C13, A5, B1 MADD C23, C23, A6, B1 @@ -6574,7 +6570,7 @@ andi L, TEMP, 1 #endif blez L, .L240 - LD ALPHA, 152($fp) + LD ALPHA, 152($sp) MADD C11, C11, A1, B1 MADD C21, C21, A2, B1 @@ -6784,7 +6780,7 @@ andi L, TEMP, 1 #endif blez L, .L220 - LD ALPHA, 152($fp) + LD ALPHA, 152($sp) MADD C11, C11, A1, B1 MADD C21, C21, A2, B1 @@ -6953,7 +6949,7 @@ andi L, TEMP, 1 #endif blez L, .L210 - LD ALPHA, 152($fp) + LD ALPHA, 152($sp) MADD C11, C11, A1, B1 MADD C12, C12, A1, B2 @@ -7204,7 +7200,7 @@ andi L, TEMP, 1 #endif blez L, .L180 - LD ALPHA, 152($fp) + LD ALPHA, 152($sp) MADD C13, C13, A5, B1 MADD C23, C23, A6, B1 @@ -7435,7 +7431,7 @@ andi L, TEMP, 1 #endif blez L, .L140 - LD ALPHA, 152($fp) + LD ALPHA, 152($sp) MADD C11, C11, A1, B1 MADD C21, C21, A2, B1 @@ -7597,7 +7593,7 @@ andi L, TEMP, 1 #endif blez L, .L120 - LD ALPHA, 152($fp) + LD ALPHA, 152($sp) MADD C11, C11, A1, B1 MADD C21, C21, A2, B1 @@ -7730,7 +7726,7 @@ andi L, TEMP, 1 #endif blez L, .L110 - LD ALPHA, 152($fp) + LD ALPHA, 152($sp) MADD C11, C11, A1, B1 daddiu AO, AO, 1 * SIZE @@ -7762,35 +7758,33 @@ NOP .L999: - ld $16, 0($fp) - ld $17, 8($fp) - ld $18, 16($fp) - ld $19, 24($fp) - ld $20, 32($fp) - ld $21, 40($fp) - ld $22, 48($fp) - - LD $f24, 56($fp) - LD $f25, 64($fp) - LD $f26, 72($fp) - LD $f27, 80($fp) - LD $f28, 88($fp) + ld $16, 0($sp) + ld $17, 8($sp) + ld $18, 16($sp) + ld $19, 24($sp) + ld $20, 32($sp) + ld $21, 40($sp) + ld $22, 48($sp) + + LD $f24, 56($sp) + LD $f25, 64($sp) + LD $f26, 72($sp) + LD $f27, 80($sp) + LD $f28, 88($sp) #if defined(TRMMKERNEL) - ld $23, 96($fp) - ld $24, 104($fp) - ld $25, 112($fp) + ld $23, 96($sp) + ld $24, 104($sp) + ld $25, 112($sp) #endif #ifndef __64BIT__ - LD $f20,120($fp) - LD $f21,128($fp) - LD $f22,136($fp) - LD $f23,144($fp) + LD $f20,120($sp) + LD $f21,128($sp) + LD $f22,136($sp) + LD $f23,144($sp) #endif - move $sp,$fp - ld $fp,184($sp) daddiu $sp,$sp,STACKSIZE j $31 nop -- 2.7.4