#define M $4
#define N $5
#define K $6
-#define A $8
-#define B $9
-#define C $10
-#define LDC $11
+#define A $9
+#define B $10
+#define C $11
+#define LDC $8
#### Pointer A, B, C ####
#define AO $12
PROLOGUE
+ LDARG LDC, 0($sp)
daddiu $sp,$sp,-STACKSIZE
sd $16, 0($sp)
sd $24, 104($sp)
sd $25, 112($sp)
- LDARG OFFSET, 160($sp)
+ LDARG OFFSET, STACKSIZE($sp)
#endif
#ifndef __64BIT__
/* (a + bi) * (c + di) */
SUB C11, C11, A1 # ac'+'bd
SUB C21, C21, A2
- LD A1, 152($sp) # load alpha_r
# LD A1, 0 * SIZE(A) # load alpha_r
SUB C31, C31, A3
+ LD A1, 152($sp) # load alpha_r
+ SUB C41, C41, A4
LD A2, 160($sp) # load alpha_i
# LD A2, 0 * SIZE(A) # load alpha_i
-
- SUB C41, C41, A4
ADD C13, A5, C13 # ad'+'cb
ADD C23, A6, C23
ADD C33, A7, C33
ADD C11, A1, C11 # ac'+'bd
ADD C21, A2, C21
# LD A1, 0 * SIZE(A) # load alpha_r
- LD A1, 152($sp) # load alpha_r
-
ADD C31, A3, C31
+ LD A1, 152($sp) # load alpha_r
+ ADD C41, A4, C41
LD A2, 160($sp) # load alpha_i
# LD A2, 0 * SIZE(A) # load alpha_r
+ SUB C13, A5, C13 # ad'+'cb
+ SUB C23, A6, C23
+ SUB C33, A7, C33
+ SUB C43, A8, C43
+ ADD C12, B1, C12
+ ADD C22, B2, C22
+ ADD C32, B3, C32
+ ADD C42, B4, C42
+ SUB C14, B5, C14
+ SUB C24, B6, C24
+ SUB C34, B7, C34
+ SUB C44, B8, C44
- ADD C41, A4, C41
LD B1, 0 * SIZE(CO1)
-
- SUB C13, A5, C13 # ad'+'cb
LD B3, 2 * SIZE(CO1)
-
- SUB C23, A6, C23
LD B5, 4 * SIZE(CO1)
-
- SUB C33, A7, C33
LD B7, 6 * SIZE(CO1)
-
- SUB C43, A8, C43
LD B2, 1 * SIZE(CO1)
-
- ADD C12, B1, C12
LD B4, 3 * SIZE(CO1)
-
- ADD C22, B2, C22
LD B6, 5 * SIZE(CO1)
-
- ADD C32, B3, C32
LD B8, 7 * SIZE(CO1)
- ADD C42, B4, C42
MADD B1, B1, C11, A1 # A1 = alpha_r
-
- SUB C14, B5, C14
MADD B3, B3, C21, A1
-
- SUB C24, B6, C24
MADD B5, B5, C31, A1
-
- SUB C34, B7, C34
MADD B7, B7, C41, A1
-
- SUB C44, B8, C44
MADD B2, B2, C13, A1
-
MADD B4, B4, C23, A1
MADD B6, B6, C33, A1
-
MADD B8, B8, C43, A1
NMSUB B1, B1, C13, A2 # A2 = alpha_i
-
NMSUB B3, B3, C23, A2
NMSUB B5, B5, C33, A2
- LD C13, 0 * SIZE(CO2)
-
NMSUB B7, B7, C43, A2
MADD B2, B2, C11, A2
- LD C23, 2 * SIZE(CO2)
-
MADD B4, B4, C12, A2
MADD B6, B6, C13, A2
- LD C33, 4 * SIZE(CO2)
-
MADD B8, B8, C14, A2
- LD C43, 6 * SIZE(CO2)
+ LD C13, 0 * SIZE(CO2)
+ LD C23, 2 * SIZE(CO2)
+ LD C33, 4 * SIZE(CO2)
+ LD C43, 6 * SIZE(CO2)
LD C11, 1 * SIZE(CO2)
LD C21, 3 * SIZE(CO2)
LD C31, 5 * SIZE(CO2)
- MADD C13, C13, C12, A1
-
LD C41, 7 * SIZE(CO2)
+
+ MADD C13, C13, C12, A1
MADD C23, C23, C22, A1
MADD C33, C33, C32, A1
ADD C11, A1, C11 # ac'+'bd
ADD C21, A2, C21
# LD A1, 0 * SIZE(A) # load alpha_r
- LD A1, 152($sp) # load alpha_r
-
ADD C31, A3, C31
+ LD A1, 152($sp) # load alpha_r
# LD A2, 0 * SIZE(A) # load alpha_r
+ ADD C41, A4, C41
LD A2, 160($sp) # load alpha_i
+ SUB C13, C13, A5 # ad'+'cb
+ SUB C23, C23, A6
+ SUB C33, C33, A7
+ SUB C43, C43, A8
+ ADD C12, B1, C12
+ ADD C22, B2, C22
+ ADD C32, B3, C32
+ ADD C42, B4, C42
+ SUB C14, C14, B5
+ SUB C24, C24, B6
+ SUB C34, C34, B7
+ SUB C44, C44, B8
- ADD C41, A4, C41
LD B1, 0 * SIZE(CO1)
-
- SUB C13, C13, A5 # ad'+'cb
LD B3, 2 * SIZE(CO1)
-
- SUB C23, C23, A6
LD B5, 4 * SIZE(CO1)
-
- SUB C33, C33, A7
LD B7, 6 * SIZE(CO1)
-
- SUB C43, C43, A8
LD B2, 1 * SIZE(CO1)
-
- ADD C12, B1, C12
LD B4, 3 * SIZE(CO1)
-
- ADD C22, B2, C22
LD B6, 5 * SIZE(CO1)
-
- ADD C32, B3, C32
LD B8, 7 * SIZE(CO1)
- ADD C42, B4, C42
MADD B1, B1, C11, A1 # A1 = alpha_r
-
- SUB C14, C14, B5
MADD B3, B3, C21, A1
-
- SUB C24, C24, B6
MADD B5, B5, C31, A1
-
- SUB C34, C34, B7
MADD B7, B7, C41, A1
-
- SUB C44, C44, B8
MADD B2, B2, C13, A1
-
MADD B4, B4, C23, A1
MADD B6, B6, C33, A1
-
MADD B8, B8, C43, A1
NMSUB B1, B1, C13, A2 # A2 = alpha_i
-
NMSUB B3, B3, C23, A2
NMSUB B5, B5, C33, A2
- LD C13, 0 * SIZE(CO2)
-
NMSUB B7, B7, C43, A2
MADD B2, B2, C11, A2
- LD C23, 2 * SIZE(CO2)
-
MADD B4, B4, C12, A2
MADD B6, B6, C13, A2
- LD C33, 4 * SIZE(CO2)
-
MADD B8, B8, C14, A2
- LD C43, 6 * SIZE(CO2)
+ LD C13, 0 * SIZE(CO2)
+ LD C23, 2 * SIZE(CO2)
+ LD C33, 4 * SIZE(CO2)
+ LD C43, 6 * SIZE(CO2)
LD C11, 1 * SIZE(CO2)
LD C21, 3 * SIZE(CO2)
LD C31, 5 * SIZE(CO2)
- MADD C13, C13, C12, A1
-
LD C41, 7 * SIZE(CO2)
+
+ MADD C13, C13, C12, A1
MADD C23, C23, C22, A1
MADD C33, C33, C32, A1
#if defined(RR) || defined(RC) || defined(CR) || defined(CC)
/* (a - bi) * (c - di) */
- SUB C11, A1, C11 # ac'+'bd
- SUB C21, A2, C21
+ SUB C11, C11, A1 # ac'+'bd
+ SUB C21, C21, A2
+ SUB C31, C31, A3
LD A1, 152($sp) # load alpha_r
# LD A1, 0 * SIZE(A) # load alpha_r
-
- SUB C31, A3, C31
-# LD A2, 0 * SIZE(A) # load alpha_i
+ SUB C41, C41, A4
LD A2, 160($sp)
-
- SUB C41, A4, C41
- LD B1, 0 * SIZE(CO1)
+# LD A2, 0 * SIZE(A) # load alpha_i
ADD C13, A5, C13 # ad'+'cb
- LD B3, 2 * SIZE(CO1)
-
ADD C23, A6, C23
- LD B5, 4 * SIZE(CO1)
-
ADD C33, A7, C33
- LD B7, 6 * SIZE(CO1)
-
ADD C43, A8, C43
- LD B2, 1 * SIZE(CO1)
+ SUB C12, C12, B1
+ SUB C22, C22, B2
+ SUB C32, C32, B3
+ SUB C42, C42, B4
+ ADD C14, B5, C14
+ ADD C24, B6, C24
+ ADD C34, B7, C34
+ ADD C44, B8, C44
- SUB C12, B1, C12
+ LD B1, 0 * SIZE(CO1)
+ LD B3, 2 * SIZE(CO1)
+ LD B5, 4 * SIZE(CO1)
+ LD B7, 6 * SIZE(CO1)
+ LD B2, 1 * SIZE(CO1)
LD B4, 3 * SIZE(CO1)
-
- SUB C22, B2, C22
LD B6, 5 * SIZE(CO1)
-
- SUB C32, B3, C32
LD B8, 7 * SIZE(CO1)
- SUB C42, B4, C42
MADD B1, B1, C11, A1 # A1 = alpha_r
-
- ADD C14, B5, C14
MADD B3, B3, C21, A1
-
- ADD C24, B6, C24
MADD B5, B5, C31, A1
-
- ADD C34, B7, C34
MADD B7, B7, C41, A1
-
- ADD C44, B8, C44
NMSUB B2, B2, C13, A1
-
NMSUB B4, B4, C23, A1
NMSUB B6, B6, C33, A1
-
NMSUB B8, B8, C43, A1
NMSUB B1, B1, C13, A2 # A2 = alpha_i
-
NMSUB B3, B3, C23, A2
NMSUB B5, B5, C33, A2
- LD C13, 0 * SIZE(CO2)
-
NMSUB B7, B7, C43, A2
MADD B2, B2, C11, A2
- LD C23, 2 * SIZE(CO2)
-
MADD B4, B4, C12, A2
MADD B6, B6, C13, A2
- LD C33, 4 * SIZE(CO2)
-
MADD B8, B8, C14, A2
- LD C43, 6 * SIZE(CO2)
+ LD C13, 0 * SIZE(CO2)
+ LD C43, 6 * SIZE(CO2)
+ LD C23, 2 * SIZE(CO2)
+ LD C33, 4 * SIZE(CO2)
LD C11, 1 * SIZE(CO2)
LD C21, 3 * SIZE(CO2)
LD C31, 5 * SIZE(CO2)
- MADD C13, C13, C12, A1
-
LD C41, 7 * SIZE(CO2)
- MADD C23, C23, C22, A1
- MADD C33, C33, C32, A1
+ MADD C13, C13, C12, A1
ST B1, 0 * SIZE(CO1)
- MADD C43, C43, C42, A1
+ MADD C23, C23, C22, A1
ST B3, 2 * SIZE(CO1)
- NMSUB C11, C11, C14, A1
+ MADD C33, C33, C32, A1
ST B5, 4 * SIZE(CO1)
- NMSUB C21, C21, C24, A1
+ MADD C43, C43, C42, A1
ST B7, 6 * SIZE(CO1)
- NMSUB C31, C31, C34, A1
+ NMSUB C11, C11, C14, A1
ST B2, 1 * SIZE(CO1)
- NMSUB C41, C41, C44, A1
+ NMSUB C21, C21, C24, A1
ST B4, 3 * SIZE(CO1)
- NMSUB C13, C13, C14, A2
+ NMSUB C31, C31, C34, A1
ST B6, 5 * SIZE(CO1)
- NMSUB C23, C23, C24, A2
+ NMSUB C41, C41, C44, A1
ST B8, 7 * SIZE(CO1)
+ NMSUB C13, C13, C14, A2
+ NMSUB C23, C23, C24, A2
NMSUB C33, C33, C34, A2
NMSUB C43, C43, C44, A2
MADD C11, C11, C12, A2
MADD C21, C21, C22, A2
-
MADD C31, C31, C32, A2
MADD C41, C41, C42, A2