Update dgemm_kernel_4x8_skylakex_2.c
authorwjc404 <52632443+wjc404@users.noreply.github.com>
Thu, 28 Nov 2019 11:56:35 +0000 (19:56 +0800)
committerGitHub <noreply@github.com>
Thu, 28 Nov 2019 11:56:35 +0000 (19:56 +0800)
kernel/x86_64/dgemm_kernel_4x8_skylakex_2.c

index 51b0b94..90a4c2b 100644 (file)
 \r
 /* %10 for prefetch of C elements before storage; %4 = ldc(in bytes),%11 for prefetch of next B block */\r
 #define INNER_KERNELm8(nn) \\r
-    "movq %3,%10;cmpq $16,%2;jb "#nn"001f;"\\r
+    "movq %3,%10;cmpq $18,%2;jb "#nn"001f;"\\r
     #nn"008:\n\t"\\r
     INNER_KERNEL_k1m8n##nn "addq $64,%1;"\\r
     INNER_KERNEL_k1m8n##nn "addq $64,%1;"\\r
+    INNER_KERNEL_k1m8n##nn "addq $64,%1;"\\r
     "prefetcht1 (%10); prefetcht1 63(%10); addq %4,%10;"\\r
     INNER_KERNEL_k1m8n##nn "addq $64,%1;"\\r
     INNER_KERNEL_k1m8n##nn "addq $64,%1;"\\r
-    "prefetcht1 (%11); addq $16,%11;"\\r
-    "subq $4,%2;cmpq $16,%2;jnb "#nn"008b;"\\r
+    INNER_KERNEL_k1m8n##nn "addq $64,%1;"\\r
+    "prefetcht1 (%11); addq $32,%11;"\\r
+    "subq $6,%2;cmpq $18,%2;jnb "#nn"008b;"\\r
     "movq %3,%10;"\\r
     #nn"001:\n\t"\\r
     "cmpq $1,%2;jb "#nn"000f;"\\r