#include "common.h"
#define FETCH ld
-#define STACKSIZE 192
+#define STACKSIZE 160
#define gsLQC1(base,fq,ft,offset) .word(0x32<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq)
#define gsSQC1(base,fq,ft,offset) .word(0x3A<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq)
# .ent gemm
# .type gemm, @function
#gemm:
-# .frame $fp,STACKSIZE,$31 # vars= 48, regs= 1/0, args= 0, gp= 0
+# .frame $sp,STACKSIZE,$31 # vars= 48, regs= 1/0, args= 0, gp= 0
# .mask 0x40000000,-8
# .fmask 0x00000000,0
# .set noreorder
PROLOGUE
daddiu $sp,$sp,-STACKSIZE
- sd $fp,184($sp)
- move $fp,$sp
-
- sd $16, 0($fp)
- sd $17, 8($fp)
- sd $18, 16($fp)
- sd $19, 24($fp)
- sd $20, 32($fp)
- sd $21, 40($fp)
- sd $22, 48($fp)
-
- ST $f24, 56($fp)
- ST $f25, 64($fp)
- ST $f26, 72($fp)
- ST $f27, 80($fp)
- ST $f28, 88($fp)
+
+ sd $16, 0($sp)
+ sd $17, 8($sp)
+ sd $18, 16($sp)
+ sd $19, 24($sp)
+ sd $20, 32($sp)
+ sd $21, 40($sp)
+ sd $22, 48($sp)
+
+ ST $f24, 56($sp)
+ ST $f25, 64($sp)
+ ST $f26, 72($sp)
+ ST $f27, 80($sp)
+ ST $f28, 88($sp)
#if defined(TRMMKERNEL)
- sd $23, 96($fp)
- sd $24, 104($fp)
- sd $25, 112($fp)
+ sd $23, 96($sp)
+ sd $24, 104($sp)
+ sd $25, 112($sp)
+
+ LDARG OFFSET, 160($sp)
#endif
#ifndef __64BIT__
- ST $f20,120($fp)
- ST $f21,128($fp)
- ST $f22,136($fp)
- ST $f23,144($fp)
+ ST $f20,120($sp)
+ ST $f21,128($sp)
+ ST $f22,136($sp)
+ ST $f23,144($sp)
#endif
.align 4
dsra J, N, 2 # NR=4
dsll LDC, LDC, BASE_SHIFT# LDC*SIZE
-#if defined(TRMMKERNEL)
- LD OFFSET, 192($fp)
-#endif
-
#if defined(TRMMKERNEL) && !defined(LEFT)
neg KK, OFFSET
#endif
blez J, .L2
- ST ALPHA, 152($fp)
+ ST ALPHA, 152($sp)
.L48:
dsra I, M, 3 # MR=8
andi L, TEMP, 1
#endif
blez L, .L480
- LD ALPHA, 152($fp)
+ LD ALPHA, 152($sp)
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
andi L, TEMP, 1
#endif
blez L, .L440
- LD ALPHA, 152($fp)
+ LD ALPHA, 152($sp)
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
andi L, TEMP, 1
#endif
blez L, .L420
- LD ALPHA, 152($fp)
+ LD ALPHA, 152($sp)
MADPS C11, C11, A1, B1
MADPS C12, C12, A1, B2
andi L, TEMP, 1
#endif
blez L, .L410
- LD ALPHA, 152($fp)
+ LD ALPHA, 152($sp)
MADD C11, C11, A1, B1
MADD C12, C12, A1, B2
andi L, TEMP, 1
#endif
blez L, .L280
- LD ALPHA, 152($fp)
+ LD ALPHA, 152($sp)
MADD C13, C13, A5, B1
MADD C23, C23, A6, B1
andi L, TEMP, 1
#endif
blez L, .L240
- LD ALPHA, 152($fp)
+ LD ALPHA, 152($sp)
MADD C11, C11, A1, B1
MADD C21, C21, A2, B1
andi L, TEMP, 1
#endif
blez L, .L220
- LD ALPHA, 152($fp)
+ LD ALPHA, 152($sp)
MADD C11, C11, A1, B1
MADD C21, C21, A2, B1
andi L, TEMP, 1
#endif
blez L, .L210
- LD ALPHA, 152($fp)
+ LD ALPHA, 152($sp)
MADD C11, C11, A1, B1
MADD C12, C12, A1, B2
andi L, TEMP, 1
#endif
blez L, .L180
- LD ALPHA, 152($fp)
+ LD ALPHA, 152($sp)
MADD C13, C13, A5, B1
MADD C23, C23, A6, B1
andi L, TEMP, 1
#endif
blez L, .L140
- LD ALPHA, 152($fp)
+ LD ALPHA, 152($sp)
MADD C11, C11, A1, B1
MADD C21, C21, A2, B1
andi L, TEMP, 1
#endif
blez L, .L120
- LD ALPHA, 152($fp)
+ LD ALPHA, 152($sp)
MADD C11, C11, A1, B1
MADD C21, C21, A2, B1
andi L, TEMP, 1
#endif
blez L, .L110
- LD ALPHA, 152($fp)
+ LD ALPHA, 152($sp)
MADD C11, C11, A1, B1
daddiu AO, AO, 1 * SIZE
NOP
.L999:
- ld $16, 0($fp)
- ld $17, 8($fp)
- ld $18, 16($fp)
- ld $19, 24($fp)
- ld $20, 32($fp)
- ld $21, 40($fp)
- ld $22, 48($fp)
-
- LD $f24, 56($fp)
- LD $f25, 64($fp)
- LD $f26, 72($fp)
- LD $f27, 80($fp)
- LD $f28, 88($fp)
+ ld $16, 0($sp)
+ ld $17, 8($sp)
+ ld $18, 16($sp)
+ ld $19, 24($sp)
+ ld $20, 32($sp)
+ ld $21, 40($sp)
+ ld $22, 48($sp)
+
+ LD $f24, 56($sp)
+ LD $f25, 64($sp)
+ LD $f26, 72($sp)
+ LD $f27, 80($sp)
+ LD $f28, 88($sp)
#if defined(TRMMKERNEL)
- ld $23, 96($fp)
- ld $24, 104($fp)
- ld $25, 112($fp)
+ ld $23, 96($sp)
+ ld $24, 104($sp)
+ ld $25, 112($sp)
#endif
#ifndef __64BIT__
- LD $f20,120($fp)
- LD $f21,128($fp)
- LD $f22,136($fp)
- LD $f23,144($fp)
+ LD $f20,120($sp)
+ LD $f21,128($sp)
+ LD $f22,136($sp)
+ LD $f23,144($sp)
#endif
- move $sp,$fp
- ld $fp,184($sp)
daddiu $sp,$sp,STACKSIZE
j $31
nop