prfm PLDL1KEEP, [origPB]
prfm PLDL1KEEP, [origPA]
-
- ldr A_PRE_SIZE, =dgemm_prefetch_size_a
- ldr A_PRE_SIZE, [A_PRE_SIZE]
- ldr B_PRE_SIZE, =dgemm_prefetch_size_b
- ldr B_PRE_SIZE, [B_PRE_SIZE]
- ldr C_PRE_SIZE, =dgemm_prefetch_size_c
- ldr C_PRE_SIZE, [C_PRE_SIZE]
+ mov A_PRE_SIZE, #3584
+ mov B_PRE_SIZE, #512
+ mov C_PRE_SIZE, #128
add A_PRE_SIZE_64, A_PRE_SIZE, #64
add B_PRE_SIZE_64, B_PRE_SIZE, #64
#define ZGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_N 4
-#define SGEMM_DEFAULT_P sgemm_p
-#define DGEMM_DEFAULT_P dgemm_p
-#define CGEMM_DEFAULT_P cgemm_p
-#define ZGEMM_DEFAULT_P zgemm_p
+#define SGEMM_DEFAULT_P 128
+#define DGEMM_DEFAULT_P 160
+#define CGEMM_DEFAULT_P 128
+#define ZGEMM_DEFAULT_P 128
-#define SGEMM_DEFAULT_Q sgemm_q
-#define DGEMM_DEFAULT_Q dgemm_q
-#define CGEMM_DEFAULT_Q cgemm_q
-#define ZGEMM_DEFAULT_Q zgemm_q
+#define SGEMM_DEFAULT_Q 352
+#define DGEMM_DEFAULT_Q 128
+#define CGEMM_DEFAULT_Q 224
+#define ZGEMM_DEFAULT_Q 112
-#define SGEMM_DEFAULT_R sgemm_r
-#define DGEMM_DEFAULT_R dgemm_r
-#define CGEMM_DEFAULT_R cgemm_r
-#define ZGEMM_DEFAULT_R zgemm_r
+#define SGEMM_DEFAULT_R 4096
+#define DGEMM_DEFAULT_R 4096
+#define CGEMM_DEFAULT_R 4096
+#define ZGEMM_DEFAULT_R 4096
#define SYMV_P 16
#endif
#define ZGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_N 4
-#define SGEMM_DEFAULT_P sgemm_p
-#define DGEMM_DEFAULT_P dgemm_p
-#define CGEMM_DEFAULT_P cgemm_p
-#define ZGEMM_DEFAULT_P zgemm_p
+#define SGEMM_DEFAULT_P 128
+#define DGEMM_DEFAULT_P 160
+#define CGEMM_DEFAULT_P 128
+#define ZGEMM_DEFAULT_P 128
-#define SGEMM_DEFAULT_Q sgemm_q
-#define DGEMM_DEFAULT_Q dgemm_q
-#define CGEMM_DEFAULT_Q cgemm_q
-#define ZGEMM_DEFAULT_Q zgemm_q
+#define SGEMM_DEFAULT_Q 352
+#define DGEMM_DEFAULT_Q 128
+#define CGEMM_DEFAULT_Q 224
+#define ZGEMM_DEFAULT_Q 112
-#define SGEMM_DEFAULT_R sgemm_r
-#define DGEMM_DEFAULT_R dgemm_r
-#define CGEMM_DEFAULT_R cgemm_r
-#define ZGEMM_DEFAULT_R zgemm_r
+#define SGEMM_DEFAULT_R 4096
+#define DGEMM_DEFAULT_R 4096
+#define CGEMM_DEFAULT_R 4096
+#define ZGEMM_DEFAULT_R 4096
#define SYMV_P 16
#endif